C++ API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

file_data_source.h
1 /* file: file_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2020 Intel Corporation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *******************************************************************************/
17 
18 /*
19 //++
20 // Implementation of the file data source class.
21 //--
22 */
23 
24 #ifndef __FILE_DATA_SOURCE_H__
25 #define __FILE_DATA_SOURCE_H__
26 
27 #include <cstdio>
28 #include "services/daal_memory.h"
29 #include "data_management/data_source/data_source.h"
30 #include "data_management/data_source/csv_data_source.h"
31 #include "data_management/data/data_dictionary.h"
32 #include "data_management/data/numeric_table.h"
33 #include "data_management/data/homogen_numeric_table.h"
34 
35 namespace daal
36 {
37 namespace data_management
38 {
39 
40 namespace interface1
41 {
52 template< typename FeatureManager, typename SummaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE>
53 class FileDataSource : public CsvDataSource<FeatureManager, SummaryStatisticsType>
54 {
55 private:
56  typedef CsvDataSource<FeatureManager, SummaryStatisticsType> super;
57 
58 protected:
59  using super::_rawLineBuffer;
60  using super::_rawLineBufferLen;
61  using super::_rawLineLength;
62  using super::_status;
63 
64 public:
74  FileDataSource(const std::string &fileName,
75  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
76  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
77  size_t initialMaxRows = 10) :
78  super(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows)
79  {
80  _status |= initialize(fileName);
81  }
82 
89  FileDataSource(const std::string &fileName,
90  CsvDataSourceOptions options,
91  size_t initialMaxRows = 10) :
92  super(options, initialMaxRows)
93  {
94  _status |= initialize(fileName);
95  }
96 
97  virtual ~FileDataSource()
98  {
99  if (_file)
100  fclose(_file);
101  daal::services::daal_free( _fileBuffer );
102  }
103 
104 public:
105  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
106  {
107  services::Status s = super::createDictionaryFromContext();
108  fseek(_file, 0, SEEK_SET);
109  _fileBufferPos = _fileBufferLen;
110  return s;
111  }
112 
113  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
114  {
115  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
116  }
117 
118 protected:
119  bool iseof() const DAAL_C11_OVERRIDE
120  {
121  return (_fileBufferPos == _readedFromFileLen && feof(_file));
122  }
123 
124  bool readLine(char *buffer, int count, int& pos)
125  {
126  bool bRes = true;
127  pos = 0;
128  while (pos + 1 < count)
129  {
130  if (_fileBufferPos < _readedFromFileLen)
131  {
132  if (_fileBuffer[_fileBufferPos] == '\0')
133  {
134  return false;
135  }
136  buffer[pos] = _fileBuffer[_fileBufferPos];
137  ++pos;
138  ++_fileBufferPos;
139  if (buffer[pos - 1] == '\n')
140  break;
141  }
142  else
143  {
144  if (iseof ())
145  break;
146  _fileBufferPos = 0;
147  _readedFromFileLen = (int)fread(_fileBuffer, 1, _fileBufferLen, _file);
148  if (ferror(_file))
149  {
150  bRes = false;
151  break;
152  }
153  }
154  }
155  buffer[pos] = '\0';
156  return bRes;
157  }
158 
159  services::Status readLine() DAAL_C11_OVERRIDE
160  {
161  _rawLineLength = 0;
162  while(!iseof())
163  {
164  int readLen = 0;
165  if(!readLine(_rawLineBuffer + _rawLineLength, _rawLineBufferLen - _rawLineLength, readLen))
166  {
167  return services::Status(services::ErrorOnFileRead);
168  }
169 
170  if (readLen <= 0)
171  {
172  _rawLineLength = 0;
173  break;
174  }
175  _rawLineLength += readLen;
176  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
177  {
178  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
179  {
180  _rawLineLength--;
181  }
182  _rawLineBuffer[_rawLineLength] = '\0';
183  break;
184  }
185  if(!super::enlargeBuffer())
186  return services::Status(services::ErrorMemoryAllocationFailed);
187  }
188  return services::Status();
189  }
190 
191 private:
192  services::Status initialize(const std::string &fileName)
193  {
194  _file = NULL;
195  _fileName = fileName;
196  _fileBufferLen = (int)INITIAL_FILE_BUFFER_LENGTH;
197  _fileBufferPos = _fileBufferLen;
198  _fileBuffer = NULL;
199  _readedFromFileLen = 0;
200  if (fileName.find('\0') != std::string::npos)
201  {
202  return services::throwIfPossible(services::ErrorNullByteInjection);
203  }
204  #if (defined(_MSC_VER)&&(_MSC_VER >= 1400))
205  errno_t error;
206  error = fopen_s( &_file, fileName.c_str(), "r" );
207  if (error != 0 || !_file)
208  {
209  return services::throwIfPossible(services::ErrorOnFileOpen);
210  }
211  #else
212  _file = fopen( (char*)(fileName.c_str()), "r" );
213  if (!_file)
214  {
215  return services::throwIfPossible(services::ErrorOnFileOpen);
216  }
217  #endif
218  _fileBuffer = (char *)daal::services::daal_malloc(_fileBufferLen);
219  if (!_fileBuffer)
220  {
221  fclose(_file);
222  _file = NULL;
223  return services::throwIfPossible(services::ErrorMemoryAllocationFailed);
224  }
225  return services::Status();
226  }
227 
228 protected:
229  std::string _fileName;
230 
231  FILE *_file;
232 
233  char *_fileBuffer;
234  int _fileBufferLen;
235  int _fileBufferPos;
236  int _readedFromFileLen;
237 
238 private:
239  static const size_t INITIAL_FILE_BUFFER_LENGTH = 1048576;
240 };
243 } // namespace interface1
244 
245 using interface1::FileDataSource;
246 
247 } // namespace data_management
248 } // namespace daal
249 
250 #endif
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:80
daal::services::ErrorOnFileOpen
Definition: error_indexes.h:392
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:100
daal::data_management::interface1::FileDataSource
Specifies methods to access data stored in files.
Definition: file_data_source.h:53
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:327
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:82
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:72
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:60
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, CsvDataSourceOptions options, size_t initialMaxRows=10)
Definition: file_data_source.h:89
daal::data_management::interface1::FileDataSource::FileDataSource
FileDataSource(const std::string &fileName, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: file_data_source.h:74
daal::services::ErrorNullByteInjection
Definition: error_indexes.h:394
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:58
daal::data_management::interface1::CsvDataSourceOptions
Options of CSV data source.
Definition: csv_data_source.h:50
daal::data_management::interface1::FileDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: file_data_source.h:113
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:62
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:83
daal::services::ErrorOnFileRead
Definition: error_indexes.h:393
daal::data_management::interface1::FileDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: file_data_source.h:105
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:70
daal::services::daal_malloc
DAAL_EXPORT void * daal_malloc(size_t size, size_t alignment=DAAL_MALLOC_DEFAULT_ALIGNMENT)
daal::services::daal_free
DAAL_EXPORT void daal_free(void *ptr)
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:150
daal::data_management::interface1::DataSourceTemplate
Implements the abstract DataSourceIface interface.
Definition: data_source.h:464

For more complete information about compiler optimizations, see our Optimization Notice.