KSquare Utilities
KKMLL::FeatureEncoder2 Class Reference

#include <FeatureEncoder2.h>

Classes

struct  FeatureVar2
 
class  FeatureVar2List
 

Public Types

typedef FeatureEncoder2FeatureEncoder2Ptr
 
enum  FeWhatToDo { FeWhatToDo::FeAsIs, FeWhatToDo::FeBinary, FeWhatToDo::FeScale }
 
typedef FeWhatToDoFeWhatToDoPtr
 

Public Member Functions

 FeatureEncoder2 (const ModelParam &_param, FileDescPtr _fileDesc)
 Constructs a Feature Encoder object. More...
 
 FeatureEncoder2 (const FeatureEncoder2 &_encoder)
 
 ~FeatureEncoder2 ()
 Frees any memory allocated by, and owned by the FeatureEncoder2. More...
 
kkint32 CodedNumOfFeatures () const
 
FileDescPtr CreateEncodedFileDesc (ostream *o, RunLog &log) const
 
FeatureVectorPtr EncodeAExample (FeatureVectorPtr src) const
 
FeatureVectorListPtr EncodeAllExamples (const FeatureVectorListPtr srcData)
 
FeatureVectorListPtr EncodedFeatureVectorList (const FeatureVectorList &srcData) const
 
kkint32 MemoryConsumedEstimated () const
 
kkint32 NumEncodedFeatures () const
 
void ReadXML (istream &i)
 
void WriteXML (istream &o)
 

Detailed Description

*
This will eventually replace "FeatureEncoder" when we shift over *
to the new paradigm where all Machine Learning Algorithms will be *
derived from 'Model'. *

Definition at line 27 of file FeatureEncoder2.h.

Member Typedef Documentation

Member Enumeration Documentation

Constructor & Destructor Documentation

FeatureEncoder2::FeatureEncoder2 ( const ModelParam _param,
FileDescPtr  _fileDesc 
)

Constructs a Feature Encoder object.

Parameters
[in]_param
[in]_fileDesc
[in]_logA log-file stream. All important events will be output to this stream
[in]_param
[in]_fileDesc
[in]_logA log file stream. All important events will be output to this stream

Definition at line 33 of file FeatureEncoder2.cpp.

References KKMLL::FileDesc::AttributeVector(), KKMLL::FileDesc::CardinalityVector(), KKMLL::ModelParam::EncodingMethod(), FeAsIs, KKMLL::FileDesc::NewContinuousDataOnly(), KKMLL::ModelParam::NoEncoding, KKMLL::FeatureNumList::NumOfFeatures(), KKMLL::FeatureNumList::operator[](), KKMLL::ModelParam::Scaled, and KKMLL::ModelParam::SelectedFeatures().

Referenced by KKMLL::Model::TrainModel().

35  :
36  attributeVector (_fileDesc->AttributeVector ()),
37  cardinalityDest (NULL),
38  cardinalityVector (_fileDesc->CardinalityVector ()),
39  codedNumOfFeatures (0),
40  destFeatureNums (NULL),
41  destWhatToDo (NULL),
42  encodedFileDesc (NULL),
44  fileDesc (_fileDesc),
45  numOfFeatures (0),
46  srcFeatureNums (NULL),
47  param (_param)
48 
49 {
50  FeatureNumListConstPtr selectedFeatures = param.SelectedFeatures ();
51  numOfFeatures = param.SelectedFeatures ()->NumOfFeatures ();
52 
53  encodingMethod = param.EncodingMethod ();
54 
55  srcFeatureNums = new kkuint16 [numOfFeatures];
56  cardinalityDest = new kkint32 [numOfFeatures];
57  destFeatureNums = new kkint32 [numOfFeatures];
58  destWhatToDo = new FeWhatToDo[numOfFeatures];
59 
60  VectorKKStr destFieldNames;
61 
62  kkint32 x;
63 
64  for (x = 0; x < numOfFeatures; x++)
65  {
66  kkuint16 srcFeatureNum = (*selectedFeatures)[x];
67  srcFeatureNums [x] = srcFeatureNum;
68  destFeatureNums [x] = codedNumOfFeatures;
69  cardinalityDest [x] = 1;
70  destWhatToDo [x] = FeWhatToDo::FeAsIs;
71 
72  Attribute srcAttribute = (fileDesc->Attributes ())[srcFeatureNum];
73 
74  switch (encodingMethod)
75  {
77  if ((attributeVector[srcFeatureNum] == AttributeType::Nominal) ||
78  (attributeVector[srcFeatureNum] == AttributeType::Symbolic)
79  )
80  {
81  destWhatToDo [x] = FeWhatToDo::FeBinary;
82  cardinalityDest [x] = cardinalityVector[srcFeatureNums [x]];
83  codedNumOfFeatures += cardinalityDest[x];
84  for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++)
85  {
86  KKStr fieldName = srcAttribute.Name () + "_" + srcAttribute.GetNominalValue (zed);
87  destFieldNames.push_back (fieldName);
88  }
89  }
90  else
91  {
92  codedNumOfFeatures++;
93  destWhatToDo [x] = FeWhatToDo::FeAsIs;
94  destFieldNames.push_back (srcAttribute.Name ());
95  }
96  break;
97 
98 
100  codedNumOfFeatures++;
101  if ((attributeVector[srcFeatureNums[x]] == AttributeType::Nominal) ||
102  (attributeVector[srcFeatureNums[x]] == AttributeType::Symbolic)
103  )
104  destWhatToDo [x] = FeWhatToDo::FeScale;
105  else
106  destWhatToDo [x] = FeWhatToDo::FeAsIs;
107 
108  destFieldNames.push_back (srcAttribute.Name ());
109  break;
110 
111 
113  default:
114  codedNumOfFeatures++;
115  destWhatToDo [x] = FeWhatToDo::FeAsIs;
116  destFieldNames.push_back (srcAttribute.Name ());
117  break;
118  }
119  }
120 
121  encodedFileDesc = FileDesc::NewContinuousDataOnly (destFieldNames);
122 }
const VectorInt32 & CardinalityVector() const
Definition: FileDesc.h:116
__int32 kkint32
Definition: KKBaseTypes.h:88
const KKStr & GetNominalValue(kkint32 code) const
Returns the nominal value for the given ordinal value.
Definition: Attribute.cpp:143
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
const AttributeTypeVector & AttributeVector() const
Definition: FileDesc.h:115
virtual EncodingMethodType EncodingMethod() const
Definition: ModelParam.h:111
describes a single Feature, Type and possible values.
Definition: Attribute.h:74
virtual FeatureNumListConstPtr SelectedFeatures() const
Definition: ModelParam.h:116
static FileDescPtr NewContinuousDataOnly(VectorKKStr &_fieldNames)
Creates a simple FileDesc that consists of continuous data only.
Definition: FileDesc.cpp:116
FeatureNumListConst * FeatureNumListConstPtr
const KKStr & Name() const
Definition: Attribute.h:122
const KKMLL::AttributeList & Attributes() const
Definition: FileDesc.h:114
FeatureEncoder2::FeatureEncoder2 ( const FeatureEncoder2 _encoder)

Definition at line 127 of file FeatureEncoder2.cpp.

Referenced by KKMLL::Model::Model().

127  :
128  attributeVector (_encoder.attributeVector),
129  cardinalityDest (NULL),
130  cardinalityVector (_encoder.cardinalityVector),
131  codedNumOfFeatures (_encoder.codedNumOfFeatures),
132  destFeatureNums (NULL),
133  destWhatToDo (NULL),
134  encodedFileDesc (_encoder.encodedFileDesc),
135  encodingMethod (_encoder.encodingMethod),
136  fileDesc (_encoder.fileDesc),
137  numOfFeatures (_encoder.numOfFeatures),
138  srcFeatureNums (NULL),
139  param (_encoder.param)
140 {
141  cardinalityDest = new kkint32[numOfFeatures];
142  destFeatureNums = new kkint32[numOfFeatures];
143  destWhatToDo = new FeWhatToDo[numOfFeatures];
144  srcFeatureNums = new kkuint16[numOfFeatures];
145 
146  kkint32 x;
147  for (x = 0; x < numOfFeatures; x++)
148  {
149  srcFeatureNums [x] = _encoder.srcFeatureNums [x];
150  destFeatureNums [x] = _encoder.destFeatureNums[x];
151  cardinalityDest [x] = _encoder.cardinalityDest[x];
152  destWhatToDo [x] = _encoder.destWhatToDo [x];
153  srcFeatureNums [x] = _encoder.srcFeatureNums [x];
154  }
155 }
__int32 kkint32
Definition: KKBaseTypes.h:88
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
FeatureEncoder2::~FeatureEncoder2 ( )

Frees any memory allocated by, and owned by the FeatureEncoder2.

Definition at line 162 of file FeatureEncoder2.cpp.

163 {
164  delete srcFeatureNums;
165  delete destFeatureNums;
166  delete cardinalityDest;
167  delete destWhatToDo;
168 }

Member Function Documentation

kkint32 KKMLL::FeatureEncoder2::CodedNumOfFeatures ( ) const
inline

Definition at line 55 of file FeatureEncoder2.h.

55 {return codedNumOfFeatures;}
FileDescPtr FeatureEncoder2::CreateEncodedFileDesc ( ostream *  o,
RunLog log 
) const

If 'o' is not NULL will write out a table showing assignments from old to new.

Definition at line 195 of file FeatureEncoder2.cpp.

References KKMLL::FileDesc::AddAAttribute(), KKB::KKStr::Concat(), FeAsIs, FeBinary, FeScale, KKMLL::FileDesc::FieldName(), KKMLL::FileDesc::FileDesc(), KKMLL::FileDesc::GetExistingFileDesc(), KKMLL::FileDesc::GetNominalValue(), KKMLL::Numeric, KKB::KKStr::operator+(), KKB::osWaitForEnter(), KKB::StrFormatInt(), and KKMLL::FileDesc::TypeStr().

198 {
199  log.Level (40) << "FeatureEncoder2::CreateEncodedFileDesc" << endl;
200  FileDescPtr newFileDesc = new FileDesc ();
201 
202  if (o)
203  {
204  *o << endl
205  << "Orig" << "\t" << "Orig" << "\t" << "Field" << "\t" << "Encoded" << "\t" << "Encoded" << endl;
206  *o << "FieldNum" << "\t" << "FieldName" << "\t" << "Type" << "\t" << "FieldNum" << "\t" << "FieldName" << endl;
207  }
208 
209  kkint32 x;
210 
211  bool alreadyExist;
212 
213  for (x = 0; x < numOfFeatures; x++)
214  {
215  kkuint16 srcFeatureNum = srcFeatureNums[x];
216  kkint32 y = destFeatureNums[x];
217 
218  if (y >= codedNumOfFeatures)
219  {
220  log.Level(-1)
221  << endl
222  << "FeatureEncoder2::CreateEncodedFileDesc ***ERROR***" << endl
223  << " overriding number of encoded features. This should never be able to happen." << endl
224  << " Something is wrong with object." << endl
225  << endl;
226  osWaitForEnter ();
227  exit (-1);
228  }
229 
230  KKStr origFieldDesc = StrFormatInt (srcFeatureNum, "zz0") + "\t" +
231  fileDesc->FieldName (srcFeatureNum) + "\t" +
232  fileDesc->TypeStr (srcFeatureNum);
233 
234  switch (destWhatToDo[x])
235  {
236  case FeWhatToDo::FeAsIs:
237  {
238  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
239  if (o)
240  {
241  *o << origFieldDesc << "\t"
242  << y << "\t"
243  << fileDesc->FieldName (x)
244  << endl;
245  }
246  }
247  break;
248 
250  {
251  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
252  {
253  KKStr nominalValue = fileDesc->GetNominalValue (srcFeatureNums[x], z);
254  KKStr encodedName = fileDesc->FieldName (x) + "_" + nominalValue;
255  newFileDesc->AddAAttribute (encodedName, AttributeType::Numeric, alreadyExist);
256  if (o)
257  {
258  *o << origFieldDesc << "\t"
259  << y << "\t"
260  << encodedName
261  << endl;
262  }
263 
264  y++;
265  }
266  }
267 
268  break;
269 
270  case FeWhatToDo::FeScale:
271  {
272  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
273  if (o)
274  {
275  *o << origFieldDesc << "\t"
276  << y << "\t"
277  << fileDesc->FieldName (x)
278  << endl;
279  }
280  }
281  break;
282  }
283  }
284 
285  newFileDesc = FileDesc::GetExistingFileDesc (newFileDesc);
286 
287  return newFileDesc;
288 } /* CreateEncodedFileDesc */
KKStr TypeStr(kkint32 fieldNum) const
Definition: FileDesc.cpp:378
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
static FileDescPtr GetExistingFileDesc(FileDescPtr fileDesc)
Returns a pointer to an existing instance of &#39;fileDesc&#39; if it exists, otherwise will use one being pa...
Definition: FileDesc.cpp:555
__int32 kkint32
Definition: KKBaseTypes.h:88
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
void osWaitForEnter()
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
const KKStr & FieldName(kkint32 fieldNum) const
Definition: FileDesc.cpp:387
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
Definition: FileDesc.cpp:395
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
FeatureVectorPtr FeatureEncoder2::EncodeAExample ( FeatureVectorPtr  src) const

Definition at line 294 of file FeatureEncoder2.cpp.

References KKMLL::FeatureVector::AddFeatureData(), FeAsIs, KKMLL::FeatureVector::FeatureData(), KKMLL::FeatureVector::FeatureVector(), FeBinary, FeScale, KKMLL::FeatureVector::MLClass(), KKMLL::FeatureVector::PredictedClass(), and KKMLL::FeatureVector::TrainWeight().

Referenced by EncodeAllExamples(), EncodedFeatureVectorList(), and KKMLL::Model::PrepExampleForPrediction().

295 {
296  FeatureVectorPtr encodedImage = new FeatureVector (codedNumOfFeatures);
297  encodedImage->MLClass (src->MLClass ());
298  encodedImage->PredictedClass (src->PredictedClass ());
299  encodedImage->TrainWeight (src->TrainWeight ());
300 
301  const float* featureData = src->FeatureData ();
302  kkint32 x;
303 
304  for (x = 0; x < numOfFeatures; x++)
305  {
306  float featureVal = featureData [srcFeatureNums[x]];
307  kkint32 y = destFeatureNums[x];
308 
309  switch (destWhatToDo[x])
310  {
311  case FeWhatToDo::FeAsIs:
312  {
313  encodedImage->AddFeatureData (y, featureVal);
314  }
315  break;
316 
318  {
319  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
320  {
321  float bVal = ((kkint32)featureVal == z);
322  encodedImage->AddFeatureData (y, bVal);
323  y++;
324  }
325  }
326 
327  break;
328 
329  case FeWhatToDo::FeScale:
330  {
331  encodedImage->AddFeatureData (y, (featureVal / (float)cardinalityDest[x]));
332  }
333  break;
334  }
335  }
336 
337  return encodedImage;
338 } /* EncodeAExample */
__int32 kkint32
Definition: KKBaseTypes.h:88
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
void AddFeatureData(kkint32 _featureNum, float _featureData)
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
void FeatureData(kkint32 _featureNum, float _featureValue)
Assign a value to a specific feature number for the feature vector.
void PredictedClass(MLClassPtr _predictedClass)
Definition: FeatureVector.h:78
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureVectorListPtr FeatureEncoder2::EncodeAllExamples ( const FeatureVectorListPtr  srcData)

Definition at line 344 of file FeatureEncoder2.cpp.

References EncodeAExample(), KKMLL::FeatureVectorList::FeatureVectorList(), and KKMLL::FeatureVectorList::PushOnBack().

Referenced by KKMLL::Model::TrainModel().

345 {
346  FeatureVectorListPtr encodedExamples = new FeatureVectorList (encodedFileDesc,
347  true // Will own the contents
348  );
349 
351 
352  for (idx = srcData->begin (); idx != srcData->end (); idx++)
353  {
354  const FeatureVectorPtr srcExample = *idx;
355  FeatureVectorPtr encodedExample = EncodeAExample (srcExample);
356  encodedExamples->PushOnBack (encodedExample);
357  }
358 
359  return encodedExamples;
360 } /* EncodeAllImages */
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
std::vector< FeatureVector * >::const_iterator const_iterator
Definition: KKQueue.h:89
Container class for FeatureVector derived objects.
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureVectorPtr EncodeAExample(FeatureVectorPtr src) const
FeatureVectorListPtr FeatureEncoder2::EncodedFeatureVectorList ( const FeatureVectorList srcData) const

Definition at line 365 of file FeatureEncoder2.cpp.

References KKMLL::FeatureVectorList::AllFieldsAreNumeric(), KKMLL::FeatureVectorList::DuplicateListAndContents(), EncodeAExample(), KKMLL::FeatureVectorList::FeatureVectorList(), KKMLL::FeatureVector::MLClass(), and KKMLL::FeatureVectorList::PushOnBack().

366 {
367  if (srcData.AllFieldsAreNumeric ())
368  return srcData.DuplicateListAndContents ();
369 
370  FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (encodedFileDesc, true);
371 
373  for (idx = srcData.begin (); idx != srcData.end (); idx++)
374  {
375  FeatureVectorPtr srcExample = *idx;
376  FeatureVectorPtr encodedFeatureVector = EncodeAExample (srcExample);
377  encodedFeatureVector->MLClass (srcExample->MLClass ());
378  encodedFeatureVectorList->PushOnBack (encodedFeatureVector);
379  }
380 
381  return encodedFeatureVectorList;
382 } /* EncodedFeatureVectorList */
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
std::vector< FeatureVector * >::const_iterator const_iterator
Definition: KKQueue.h:89
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
Container class for FeatureVector derived objects.
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureVectorPtr EncodeAExample(FeatureVectorPtr src) const
kkint32 FeatureEncoder2::MemoryConsumedEstimated ( ) const

Definition at line 171 of file FeatureEncoder2.cpp.

Referenced by KKMLL::Model::MemoryConsumedEstimated().

172 {
173  kkint32 memoryConsumedEstimated = sizeof (FeatureEncoder2)
174  + attributeVector.size () * sizeof (AttributeType)
175  + cardinalityVector.size () * sizeof (kkint32);
176 
177  if (cardinalityDest) memoryConsumedEstimated += 2 * numOfFeatures * sizeof (kkint32); // For 'cardinalityDest', 'destFeatureNums'
178  if (destFeatureNums) memoryConsumedEstimated += numOfFeatures * sizeof (kkint32);
179  if (destWhatToDo) memoryConsumedEstimated += numOfFeatures * sizeof (FeWhatToDo);
180  if (srcFeatureNums) memoryConsumedEstimated += numOfFeatures * sizeof (kkuint16);
181 
182  return memoryConsumedEstimated;
183 }
FeatureEncoder2(const ModelParam &_param, FileDescPtr _fileDesc)
Constructs a Feature Encoder object.
__int32 kkint32
Definition: KKBaseTypes.h:88
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
AttributeType
Definition: Attribute.h:36
kkint32 FeatureEncoder2::NumEncodedFeatures ( ) const

Definition at line 187 of file FeatureEncoder2.cpp.

References KKMLL::FileDesc::NumOfFields().

188 {
189  return encodedFileDesc->NumOfFields ();
190 }
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
void KKMLL::FeatureEncoder2::ReadXML ( istream &  i)
void KKMLL::FeatureEncoder2::WriteXML ( istream &  o)

The documentation for this class was generated from the following files: