C++ API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

csv/internal/default_modifiers.h
1 /* file: default_modifiers.h */
2 /*******************************************************************************
3 * Copyright 2014-2020 Intel Corporation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *******************************************************************************/
17 
18 #ifndef __DATA_SOURCE_MODIFIERS_CSV_DEFAULT_MODIFIERS_H__
19 #define __DATA_SOURCE_MODIFIERS_CSV_DEFAULT_MODIFIERS_H__
20 
21 #include "services/daal_shared_ptr.h"
22 #include "services/internal/collection.h"
23 
24 #include "data_management/features/defines.h"
25 #include "data_management/data_source/modifiers/csv/modifier.h"
26 
27 namespace daal
28 {
29 namespace data_management
30 {
31 namespace modifiers
32 {
33 namespace csv
34 {
35 namespace internal
36 {
37 
42 class FeatureModifierPrimitive : public Base
43 {
44 public:
45  virtual void initialize(Config &context, size_t index) { }
46  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) = 0;
47  virtual void finalize(Config &context, size_t index) { }
48 };
49 
54 class DefaultFeatureModifierPrimitive : public FeatureModifierPrimitive
55 {
56 public:
57  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
58  {
59  return (DAAL_DATA_TYPE)0;
60  }
61 };
62 
67 class ContinuousFeatureModifierPrimitive : public FeatureModifierPrimitive
68 {
69 public:
70  virtual void initialize(Config &config, size_t index) DAAL_C11_OVERRIDE
71  {
72  config.setOutputFeatureType(index, features::DAAL_CONTINUOUS);
73  }
74 
75  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
76  {
77  return context.getTokenAs<DAAL_DATA_TYPE>(index);
78  }
79 };
80 
85 class CategoricalFeatureModifierPrimitive : public FeatureModifierPrimitive
86 {
87 public:
88  CategoricalFeatureModifierPrimitive() :
89  _catDict(new CategoricalFeatureDictionary()) { }
90 
91  virtual void initialize(Config &config, size_t index) DAAL_C11_OVERRIDE
92  {
93  config.setOutputFeatureType(index, features::DAAL_CATEGORICAL);
94  }
95 
96  virtual DAAL_DATA_TYPE apply(Context &context, size_t index) DAAL_C11_OVERRIDE
97  {
98  const services::StringView token = context.getToken(index);
99  const std::string sToken(token.begin(), token.end());
100  const CategoricalFeatureDictionary::iterator it = _catDict->find(sToken);
101 
102  if (it != _catDict->end())
103  {
104  it->second.second++;
105  return (DAAL_DATA_TYPE)it->second.first;
106  }
107  else
108  {
109  const int itemIndex = (int)(_catDict->size());
110  const std::pair<int, int> indexPair(itemIndex, 1);
111  (*_catDict)[sToken] = indexPair;
112  return (DAAL_DATA_TYPE)itemIndex;
113  }
114  }
115 
116  virtual void finalize(Config &config, size_t index) DAAL_C11_OVERRIDE
117  {
118  const size_t numberOfCategories = _catDict->size();
119  config.setNumberOfCategories(index, numberOfCategories);
120  config.setCategoricalDictionary(index, _catDict);
121  }
122 
123 private:
124  CategoricalFeatureDictionaryPtr _catDict;
125 };
126 
131 class ContinuousFeatureModifier : public FeatureModifier
132 {
133 public:
134  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
135  {
136  FeatureModifier::initialize(config);
137 
138  const size_t numberOfFeatures = config.getNumberOfInputFeatures();
139  for (size_t i = 0; i < numberOfFeatures; i++)
140  {
141  config.setOutputFeatureType(i, features::DAAL_CONTINUOUS);
142  }
143  }
144 
145  virtual void apply(Context &context) DAAL_C11_OVERRIDE
146  {
147  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
148  for (size_t i = 0; i < outputBuffer.size(); i++)
149  {
150  outputBuffer[i] = context.getTokenAs<DAAL_DATA_TYPE>(i);
151  }
152  }
153 };
154 
159 class CategoricalFeatureModifier : public FeatureModifier
160 {
161 public:
162  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
163  {
164  FeatureModifier::initialize(config);
165 
166  const size_t numberOfInputFeatures = config.getNumberOfInputFeatures();
167  _primitives = services::Collection<CategoricalFeatureModifierPrimitive>(numberOfInputFeatures);
168  if ( !_primitives.data() )
169  {
170  services::throwIfPossible(services::ErrorMemoryAllocationFailed);
171  return;
172  }
173 
174  for (size_t i = 0; i < numberOfInputFeatures; i++)
175  {
176  _primitives[i].initialize(config, i);
177  }
178  }
179 
180  virtual void apply(Context &context) DAAL_C11_OVERRIDE
181  {
182  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
183  for (size_t i = 0; i < outputBuffer.size(); i++)
184  {
185  outputBuffer[i] = _primitives[i].apply(context, i);
186  }
187  }
188 
189  virtual void finalize(Config &config) DAAL_C11_OVERRIDE
190  {
191  FeatureModifier::finalize(config);
192 
193  const size_t numberOfOutputFeatures = config.getNumberOfInputFeatures();
194  for (size_t i = 0; i < numberOfOutputFeatures; i++)
195  {
196  _primitives[i].finalize(config, i);
197  }
198  }
199 
200 private:
201  services::Collection<CategoricalFeatureModifierPrimitive> _primitives;
202 };
203 
208 class AutomaticFeatureModifier : public FeatureModifier
209 {
210 public:
211  virtual void initialize(Config &config) DAAL_C11_OVERRIDE
212  {
213  FeatureModifier::initialize(config);
214 
215  const size_t numberOfInputFeatures = config.getNumberOfInputFeatures();
216  for (size_t i = 0; i < numberOfInputFeatures; i++)
217  {
218  FeatureModifierPrimitive *primitive =
219  createPrimitive(config.getInputFeatureDetectedType(i));
220 
221  if ( !_primitives.push_back(primitive) )
222  {
223  services::throwIfPossible(services::ErrorMemoryAllocationFailed);
224  return;
225  }
226 
227  primitive->initialize(config, i);
228  }
229  }
230 
231  virtual void apply(Context &context) DAAL_C11_OVERRIDE
232  {
233  services::BufferView<DAAL_DATA_TYPE> outputBuffer = context.getOutputBuffer();
234  for (size_t i = 0; i < outputBuffer.size(); i++)
235  {
236  outputBuffer[i] = _primitives[i].apply(context, i);
237  }
238  }
239 
240  virtual void finalize(Config &config) DAAL_C11_OVERRIDE
241  {
242  FeatureModifier::finalize(config);
243 
244  const size_t numberOfOutputFeatures = config.getNumberOfInputFeatures();
245  for (size_t i = 0; i < numberOfOutputFeatures; i++)
246  {
247  _primitives[i].finalize(config, i);
248  }
249  }
250 
251 private:
252  FeatureModifierPrimitive *createPrimitive(features::FeatureType featureType)
253  {
254  switch (featureType)
255  {
256  case features::DAAL_CONTINUOUS:
257  return new ContinuousFeatureModifierPrimitive();
258 
259  case features::DAAL_ORDINAL:
260  case features::DAAL_CATEGORICAL:
261  return new CategoricalFeatureModifierPrimitive();
262  }
263  return new DefaultFeatureModifierPrimitive();
264  }
265 
266 private:
267  services::internal::ObjectPtrCollection<FeatureModifierPrimitive> _primitives;
268 };
269 typedef services::SharedPtr<AutomaticFeatureModifier> AutomaticFeatureModifierPtr;
270 
271 } // namespace internal
272 } // namespace csv
273 } // namespace modifiers
274 } // namespace data_management
275 } // namespace daal
276 
277 #endif
daal::services::internal::ObjectPtrCollection
Class that implements functionality of collection container and holds pointers to objects of specifie...
Definition: internal/collection.h:39
daal::Base
Base class for Intel(R) Data Analytics Acceleration Library objects
Definition: base.h:41
daal::data_management::modifiers::csv::internal::DefaultFeatureModifierPrimitive
Default implementation of primitive feature modifier.
Definition: csv/internal/default_modifiers.h:54
daal::data_management::modifiers::csv::internal::ContinuousFeatureModifierPrimitive
Primitive feature modifier that parses tokens as continuous features.
Definition: csv/internal/default_modifiers.h:67
daal::data_management::modifiers::csv::internal::CategoricalFeatureModifier
Feature modifier that parses tokens as categorical features.
Definition: csv/internal/default_modifiers.h:159
daal::data_management::modifiers::csv::internal::ContinuousFeatureModifier
Feature modifier that parses tokens as continuous features.
Definition: csv/internal/default_modifiers.h:131
daal::services::ErrorMemoryAllocationFailed
Definition: error_indexes.h:150
daal::data_management::modifiers::csv::internal::FeatureModifierPrimitive
Primitive modifier that applicable to a single column.
Definition: csv/internal/default_modifiers.h:42
daal::data_management::modifiers::csv::internal::AutomaticFeatureModifier
Feature modifier that determines suitable feature type and parses tokens according to determined type...
Definition: csv/internal/default_modifiers.h:208
daal::data_management::modifiers::csv::internal::CategoricalFeatureModifierPrimitive
Primitive feature modifier that parses tokens as categorical features.
Definition: csv/internal/default_modifiers.h:85

For more complete information about compiler optimizations, see our Optimization Notice.