C++ API Reference for Intel® Data Analytics Acceleration Library 2020 Update 1

data_source_dictionary.h
1 /* file: data_source_dictionary.h */
2 /*******************************************************************************
3 * Copyright 2014-2020 Intel Corporation
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *******************************************************************************/
17 
18 /*
19 //++
20 // Implementation of a data source dictionary.
21 //--
22 */
23 
24 #ifndef __DATA_SOURCE_DICTIONARY_H__
25 #define __DATA_SOURCE_DICTIONARY_H__
26 
27 #include <map>
28 #include <string>
29 
30 #include "services/internal/buffer.h"
31 #include "data_management/features/defines.h"
32 #include "data_management/data/data_dictionary.h"
33 
34 namespace daal
35 {
36 namespace data_management
37 {
38 namespace interface1
39 {
40 
46 class CategoricalFeatureDictionary : public std::map<std::string, std::pair<int, int> > { };
47 typedef services::SharedPtr<CategoricalFeatureDictionary> CategoricalFeatureDictionaryPtr;
48 
53 class DataSourceFeature : public SerializationIface
54 {
55 public:
56  NumericTableFeature ntFeature;
57  size_t name_length;
58  char *name;
59 
60  CategoricalFeatureDictionary *cat_dict;
61 
62 public:
66  DataSourceFeature() :
67  name(NULL),
68  name_length(0),
69  cat_dict(NULL) { }
70 
74  DataSourceFeature(const DataSourceFeature &other)
75  {
76  assign(other);
77  }
78 
82  DataSourceFeature &operator= (const DataSourceFeature &other)
83  {
84  return assign(other);
85  }
86 
88  virtual ~DataSourceFeature()
89  {
90  if (_catDictPtr.get() != cat_dict)
91  {
92  delete cat_dict;
93  }
94  }
95 
99  services::String getFeatureName() const
100  {
101  return services::String(name);
102  }
103 
108  CategoricalFeatureDictionary *getCategoricalDictionary()
109  {
110  if (!cat_dict)
111  {
112  cat_dict = new CategoricalFeatureDictionary();
113  _catDictPtr = CategoricalFeatureDictionaryPtr(cat_dict);
114  }
115 
116  return cat_dict;
117  }
118 
119  void setCategoricalDictionary(const CategoricalFeatureDictionaryPtr &dictionary)
120  {
121  if (_catDictPtr.get() != cat_dict)
122  {
123  delete cat_dict;
124  cat_dict = NULL;
125  }
126 
127  _catDictPtr = dictionary;
128  cat_dict = dictionary.get();
129  }
130 
135  void setFeatureName(const services::String &featureName)
136  {
137  _name = featureName;
138  synchRawAndStringNames();
139  }
140 
145  template<typename T>
146  void setType()
147  {
148  ntFeature.setType<T>();
149  }
150 
152  services::Status serializeImpl(InputDataArchive *arch) DAAL_C11_OVERRIDE
153  {
154  return serialImpl<InputDataArchive, false>(arch);
155  }
156 
158  services::Status deserializeImpl(const OutputDataArchive *arch) DAAL_C11_OVERRIDE
159  {
160  return serialImpl<const OutputDataArchive, true>(arch);
161  }
162 
164  template<typename Archive, bool onDeserialize>
165  services::Status serialImpl( Archive *arch )
166  {
167  services::Status status;
168 
169  arch->setObj(&ntFeature);
170  arch->set(name_length);
171 
172  if (onDeserialize)
173  {
174  if (name_length > 0)
175  {
176  _name = services::String(name_length);
177  synchRawAndStringNames();
178  }
179  }
180 
181  arch->set(name, name_length);
182 
183  const int categoricalFeatureDictionaryFlag = (cat_dict != 0);
184  arch->set(categoricalFeatureDictionaryFlag);
185 
186  if (categoricalFeatureDictionaryFlag)
187  {
188  if (onDeserialize)
189  {
190  /* Make sure that dictionary is allocated */
191  getCategoricalDictionary();
192  /* Make sure that dictionary is empty */
193  cat_dict->empty();
194  }
195 
196  size_t size = cat_dict->size();
197  arch->set(size);
198 
199  if (onDeserialize)
200  {
201  const size_t initialBuffSize = 10;
202  services::internal::Buffer<char> buff(initialBuffSize, &status);
203  DAAL_CHECK_STATUS_VAR(status);
204 
205  for (size_t i = 0; i < size; i++)
206  {
207  size_t catNameLen = 0;
208  int catV1 = 0;
209  int catV2 = 0;
210 
211  arch->set(catNameLen);
212  if (catNameLen > buff.size())
213  {
214  DAAL_CHECK_STATUS( status, buff.reallocate(catNameLen) );
215  }
216  arch->set(buff.data(), catNameLen);
217  arch->set(catV1);
218  arch->set(catV2);
219 
220  (*cat_dict)[ std::string(buff.data(), catNameLen) ] = std::pair<int,int>(catV1, catV2);
221  }
222  }
223  else
224  {
225  typedef CategoricalFeatureDictionary::iterator it_type;
226 
227  for (it_type it=cat_dict->begin(); it != cat_dict->end(); it++)
228  {
229  const std::string & catName = it->first;
230  size_t catNameLen = catName.size();
231  int catV1 = it->second.first;
232  int catV2 = it->second.second;
233 
234  arch->set(catNameLen);
235  arch->set(catName.c_str(), catNameLen);
236  arch->set(catV1);
237  arch->set(catV2);
238  }
239  }
240  }
241  else
242  {
243  cat_dict = NULL;
244  _catDictPtr = CategoricalFeatureDictionaryPtr();
245  }
246 
247  return status;
248  }
249 
250  virtual int getSerializationTag() const DAAL_C11_OVERRIDE
251  {
252  return SERIALIZATION_DATAFEATURE_NT_ID;
253  }
254 
255  features::IndexNumType getIndexType() const
256  {
257  return ntFeature.indexType;
258  }
259 
260 private:
261  DataSourceFeature &assign(const DataSourceFeature& other)
262  {
263  _name = other._name;
264  _catDictPtr = other._catDictPtr;
265  ntFeature = other.ntFeature;
266  cat_dict = other.cat_dict;
267 
268  if (other.name == other._name.c_str())
269  {
270  synchRawAndStringNames();
271  }
272  else
273  {
274  name = other.name;
275  name_length = other.name_length;
276  }
277 
278  return *this;
279  }
280 
281  void synchRawAndStringNames()
282  {
283  name_length = _name.length();
284  name = const_cast<char *>(_name.c_str());
285  }
286 
287 private:
288  services::String _name;
289  CategoricalFeatureDictionaryPtr _catDictPtr;
290 };
291 
292 typedef Dictionary<DataSourceFeature, SERIALIZATION_DATADICTIONARY_DS_ID> DataSourceDictionary;
293 typedef services::SharedPtr<DataSourceDictionary> DataSourceDictionaryPtr;
296 } // namespace interface1
297 
298 using interface1::CategoricalFeatureDictionary;
299 using interface1::CategoricalFeatureDictionaryPtr;
300 using interface1::DataSourceFeature;
301 using interface1::DataSourceDictionary;
302 using interface1::DataSourceDictionaryPtr;
303 
304 } // namespace data_management
305 } // namespace daal
306 
307 #endif
daal::data_management::interface1::CategoricalFeatureDictionary
Definition: data_source_dictionary.h:46
daal::data_management::interface1::DataSourceFeature::DataSourceFeature
DataSourceFeature()
Definition: data_source_dictionary.h:66
daal::data_management::interface1::DataSourceFeature::getSerializationTag
virtual int getSerializationTag() const DAAL_C11_OVERRIDE
Definition: data_source_dictionary.h:250
daal::data_management::interface1::NumericTableFeature::setType
void setType()
Definition: data_dictionary.h:96
daal::data_management::interface1::DataSourceFeature::setFeatureName
void setFeatureName(const services::String &featureName)
Definition: data_source_dictionary.h:135
daal::data_management::interface1::SerializationIface
Abstract interface class that defines the interface for serialization and deserialization.
Definition: data_serialize.h:52
daal::data_management::interface1::DataSourceFeature::operator=
DataSourceFeature & operator=(const DataSourceFeature &other)
Definition: data_source_dictionary.h:82
daal::data_management::interface1::DataSourceFeature::getFeatureName
services::String getFeatureName() const
Definition: data_source_dictionary.h:99
daal::data_management::interface1::DataSourceFeature::DataSourceFeature
DataSourceFeature(const DataSourceFeature &other)
Definition: data_source_dictionary.h:74
daal::data_management::interface1::DataSourceFeature::setType
void setType()
Definition: data_source_dictionary.h:146
daal::data_management::interface1::NumericTableFeature
Data structure describes the Numeric Table feature.
Definition: data_dictionary.h:53
daal::data_management::interface1::InputDataArchive
Provides methods to create an archive data object (serialized) and access this object.
Definition: data_archive.h:725
daal::data_management::interface1::DataSourceFeature
Data structure that describes the Data Source feature.
Definition: data_source_dictionary.h:53
daal::data_management::interface1::DataSourceFeature::getCategoricalDictionary
CategoricalFeatureDictionary * getCategoricalDictionary()
Definition: data_source_dictionary.h:108

For more complete information about compiler optimizations, see our Optimization Notice.