C++ API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

string_data_source.h
1 /* file: string_data_source.h */
2 /*******************************************************************************
3 * Copyright 2014-2020 Intel Corporation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *******************************************************************************/
17 
18 /*
19 //++
20 // Implementation of the string data source class.
21 //--
22 */
23 
24 #ifndef __STRING_DATA_SOURCE_H__
25 #define __STRING_DATA_SOURCE_H__
26 
27 #include "services/daal_memory.h"
28 #include "data_management/data_source/data_source.h"
29 #include "data_management/data_source/csv_data_source.h"
30 #include "data_management/data/data_dictionary.h"
31 #include "data_management/data/numeric_table.h"
32 #include "data_management/data/homogen_numeric_table.h"
33 
34 namespace daal
35 {
36 namespace data_management
37 {
38 
39 namespace interface1
40 {
50 template<typename FeatureManager, typename SummaryStatisticsType = DAAL_SUMMARY_STATISTICS_TYPE>
51 class StringDataSource : public CsvDataSource<FeatureManager, SummaryStatisticsType>
52 {
53 private:
54  typedef CsvDataSource<FeatureManager, SummaryStatisticsType> super;
55  typedef data_management::HomogenNumericTable<DAAL_DATA_TYPE> DefaultNumericTableType;
56 
57 protected:
58  using super::_rawLineBuffer;
59  using super::_rawLineBufferLen;
60  using super::_rawLineLength;
61  using super::_status;
62 
63 public:
73  StringDataSource(const byte *data,
74  DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable = DataSource::notAllocateNumericTable,
75  DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext = DataSource::notDictionaryFromContext,
76  size_t initialMaxRows = 10):
77  super(doAllocateNumericTable, doCreateDictionaryFromContext, initialMaxRows),
78  _contextDictFlag(false)
79  {
80  setData(data);
81  }
82 
87  void setData( const byte *data )
88  {
89  if( !data )
90  {
91  _status.add(services::throwIfPossible(services::Status(services::ErrorNullPtr)));
92  return;
93  }
94  _stringBufferPos = 0;
95  _stringBuffer = (char *)data;
96  }
97 
102  const byte *getData()
103  {
104  return (const byte *)(_stringBuffer);
105  }
106 
110  void resetData()
111  {
112  _stringBufferPos = 0;
113  }
114 
115 public:
116  services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
117  {
118  services::Status s = super::createDictionaryFromContext();
119  _stringBufferPos = 0;
120  return s;
121  }
122 
123  DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
124  {
125  return (iseof() ? DataSourceIface::endOfData : DataSourceIface::readyForLoad);
126  }
127 
128 protected:
129  bool iseof() const DAAL_C11_OVERRIDE
130  {
131  return (_stringBuffer[_stringBufferPos] == '\0');
132  }
133 
134  int readLine(char *buffer, int count)
135  {
136  int pos = 0;
137  for(;pos<count-1;pos++)
138  {
139  buffer[pos] = _stringBuffer[_stringBufferPos+pos];
140 
141  if( buffer[pos]=='\0' || buffer[pos]=='\n' )
142  {
143  break;
144  }
145  }
146  if(buffer[pos]=='\n')
147  {
148  pos++;
149  }
150  _stringBufferPos += pos;
151  buffer[pos] = '\0';
152  return pos;
153  }
154 
155  services::Status readLine() DAAL_C11_OVERRIDE
156  {
157  _rawLineLength = 0;
158  while(!iseof())
159  {
160  const int readLen = readLine (_rawLineBuffer + _rawLineLength, (int)(_rawLineBufferLen - _rawLineLength));
161  if (readLen <= 0)
162  {
163  _rawLineLength = 0;
164  return services::Status();
165  }
166  _rawLineLength += readLen;
167  if (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r')
168  {
169  while (_rawLineLength > 0 && (_rawLineBuffer[_rawLineLength - 1] == '\n' || _rawLineBuffer[_rawLineLength - 1] == '\r'))
170  {
171  _rawLineLength--;
172  }
173  _rawLineBuffer[_rawLineLength] = '\0';
174  return services::Status();
175  }
176  if(!super::enlargeBuffer())
177  return services::Status(services::ErrorMemoryAllocationFailed);
178  }
179  return services::Status();
180  }
181 
182 private:
183  char *_stringBuffer;
184  size_t _stringBufferPos;
185 
186  bool _contextDictFlag;
187 };
189 } // namespace interface1
190 using interface1::StringDataSource;
191 
192 }
193 }
194 #endif
daal::data_management::interface1::DataSourceIface::NumericTableAllocationFlag
NumericTableAllocationFlag
Specifies whether a Numeric Table is allocated inside of the Data Source object.
Definition: data_source.h:80
daal::data_management::interface1::CsvDataSource
Specifies methods to access data stored in files.
Definition: csv_data_source.h:100
daal::data_management::interface1::CsvDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: csv_data_source.h:327
daal::algorithms::association_rules::data
Definition: apriori_types.h:83
daal::data_management::interface1::DataSourceIface::notAllocateNumericTable
Definition: data_source.h:82
daal::data_management::interface1::DataSourceIface::notDictionaryFromContext
Definition: data_source.h:72
daal::data_management::interface1::StringDataSource::getStatus
DataSourceIface::DataSourceStatus getStatus() DAAL_C11_OVERRIDE
Definition: string_data_source.h:123
daal::services::ErrorNullPtr
Definition: error_indexes.h:141
daal::data_management::interface1::DataSourceIface::readyForLoad
Definition: data_source.h:60
daal::data_management::interface1::StringDataSource
Specifies methods to access data stored in byte arrays in the C-string format.
Definition: string_data_source.h:51
daal::data_management::interface1::StringDataSource::StringDataSource
StringDataSource(const byte *data, DataSourceIface::NumericTableAllocationFlag doAllocateNumericTable=DataSource::notAllocateNumericTable, DataSourceIface::DictionaryCreationFlag doCreateDictionaryFromContext=DataSource::notDictionaryFromContext, size_t initialMaxRows=10)
Definition: string_data_source.h:73
daal::data_management::interface1::StringDataSource::createDictionaryFromContext
services::Status createDictionaryFromContext() DAAL_C11_OVERRIDE
Definition: string_data_source.h:116
daal::data_management::interface1::DataSourceIface::DataSourceStatus
DataSourceStatus
Specifies the status of the Data Source.
Definition: data_source.h:58
daal::data_management::interface1::DataSourceIface::endOfData
Definition: data_source.h:62
daal::data_management::interface1::DataSourceIface::doAllocateNumericTable
Definition: data_source.h:83
daal::data_management::interface1::StringDataSource::resetData
void resetData()
Definition: string_data_source.h:110
daal::data_management::interface1::DataSourceIface::DictionaryCreationFlag
DictionaryCreationFlag
Specifies whether a Data Dictionary is created from the context of a Data Source. ...
Definition: data_source.h:70
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:150
daal::data_management::interface1::StringDataSource::getData
const byte * getData()
Definition: string_data_source.h:102
daal::data_management::interface1::StringDataSource::setData
void setData(const byte *data)
Definition: string_data_source.h:87
daal::data_management::interface1::DataSourceTemplate
Implements the abstract DataSourceIface interface.
Definition: data_source.h:464

For more complete information about compiler optimizations, see our Optimization Notice.