KSquare Utilities
KKMLL::FeatureEncoder Class Reference

#include <FeatureEncoder.h>

Public Member Functions

 FeatureEncoder ()
 
 FeatureEncoder (FileDescPtr _fileDesc, MLClassPtr _class1, MLClassPtr _class2, const FeatureNumList &_selectedFeatures, SVM_EncodingMethod _encodingMethod, double _c_Param)
 Constructs a Feature Encoder object. More...
 
 ~FeatureEncoder ()
 Frees any memory allocated by, and owned by the FeatureEncoder. More...
 
MLClassPtr Class1 () const
 
MLClassPtr Class2 () const
 
kkint32 CodedNumOfFeatures () const
 
void CompressExamples (FeatureVectorListPtr srcExamples, FeatureVectorListPtr compressedExamples, ClassAssignments &assignments)
 Left over from BitReduction days; removed all code except that which processed the NO bit reduction option. More...
 
FeatureVectorListPtr CreateEncodedFeatureVector (FeatureVectorList &srcData)
 
FileDescPtr CreateEncodedFileDesc (ostream *o)
 
XSpacePtr EncodeAExample (FeatureVectorPtr example)
 Converts a single example into the svm_problem format. More...
 
void EncodeAExample (FeatureVectorPtr example, svm_node *xSpace, kkint32 &xSpaceUsed)
 Converts a single example into the svm_problem format. More...
 
FeatureVectorPtr EncodeAExample (FileDescPtr encodedFileDesc, FeatureVectorPtr src)
 
FeatureVectorListPtr EncodeAllExamples (const FeatureVectorListPtr srcData)
 
void EncodeIntoSparseMatrix (FeatureVectorListPtr src, ClassAssignments &assignments, XSpacePtr &xSpace, kkint32 &totalxSpaceUsed, struct svm_problem &prob, RunLog &log)
 Compresses 'src' examples, allocating new 'xSpace' data structure. More...
 
kkint32 MemoryConsumedEstimated () const
 
virtual void ReadXML (XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)
 
virtual void WriteXML (const KKStr &varName, ostream &o) const
 
kkint32 XSpaceNeededPerExample ()
 

Detailed Description

Definition at line 44 of file FeatureEncoder.h.

Constructor & Destructor Documentation

FeatureEncoder::FeatureEncoder ( )

Definition at line 31 of file FeatureEncoder.cpp.

References KKMLL::NoEncoding.

31  :
32  cardinalityDest (NULL),
33  class1 (NULL),
34  class2 (NULL),
35  codedNumOfFeatures (0),
36  c_Param (1.0),
37  destFeatureNums (NULL),
38  destFileDesc (NULL),
39  destWhatToDo (NULL),
40  encodingMethod (SVM_EncodingMethod::NoEncoding),
41  fileDesc (NULL),
42  numEncodedFeatures (0),
43  numOfFeatures (0),
44  selectedFeatures (),
45  srcFeatureNums (NULL),
46  xSpaceNeededPerExample (0)
47 {
48 }
FeatureEncoder::FeatureEncoder ( FileDescPtr  _fileDesc,
MLClassPtr  _class1,
MLClassPtr  _class2,
const FeatureNumList _selectedFeatures,
SVM_EncodingMethod  _encodingMethod,
double  _c_Param 
)

Constructs a Feature Encoder object.

Parameters
[in]_fileDesc
[in]_class1
[in]_class2
[in]_logA log-file stream. All important events will be output to this stream

Definition at line 59 of file FeatureEncoder.cpp.

References KKMLL::Binary, KKMLL::Attribute::Cardinality(), KKB::KKStr::Concat(), KKMLL::FeAsIs, KKMLL::FeatureNumList::FeatureNumList(), KKMLL::FeBinary, KKMLL::FeScale, KKMLL::FileDesc::GetAAttribute(), KKMLL::Attribute::GetNominalValue(), KKMLL::Attribute::Name(), KKMLL::FileDesc::NewContinuousDataOnly(), KKMLL::NoEncoding, KKMLL::Nominal, KKMLL::FeatureNumList::NumOfFeatures(), KKB::KKStr::operator+(), KKMLL::FeatureNumList::operator[](), KKMLL::Scaled, KKMLL::Symbolic, and KKMLL::Attribute::Type().

Referenced by KKMLL::SVMModel::ReadXML().

65  :
66 
67  cardinalityDest (NULL),
68  class1 (_class1),
69  class2 (_class2),
70  codedNumOfFeatures (0),
71  c_Param (_c_Param),
72  destFeatureNums (NULL),
73  destFileDesc (NULL),
74  destWhatToDo (NULL),
75  encodingMethod (_encodingMethod),
76  fileDesc (_fileDesc),
77  numEncodedFeatures (0),
78  numOfFeatures (0),
79  selectedFeatures (_selectedFeatures),
80  srcFeatureNums (NULL),
81  xSpaceNeededPerExample (0)
82 {
83  numOfFeatures = selectedFeatures.NumOfFeatures ();
84 
85  xSpaceNeededPerExample = 0;
86  srcFeatureNums = new kkint32[numOfFeatures];
87  cardinalityDest = new kkint32[numOfFeatures];
88  destFeatureNums = new kkint32[numOfFeatures];
89  destWhatToDo = new FeWhatToDo[numOfFeatures];
90 
91  VectorKKStr destFieldNames;
92 
93  kkint32 x;
94 
95  for (x = 0; x < numOfFeatures; x++)
96  {
97  kkint32 srcFeatureNum = selectedFeatures[x];
98  srcFeatureNums [x] = srcFeatureNum;
99  destFeatureNums [x] = xSpaceNeededPerExample;
100  cardinalityDest [x] = 1;
101  destWhatToDo [x] = FeWhatToDo::FeAsIs;
102 
103  const Attribute& attribute = fileDesc->GetAAttribute (srcFeatureNum);
104  AttributeType attributeType = attribute.Type ();
105  kkint32 cardinality = attribute.Cardinality ();
106 
107  switch (encodingMethod)
108  {
110  if ((attributeType == AttributeType::Nominal) || (attributeType == AttributeType::Symbolic))
111  {
112  destWhatToDo [x] = FeWhatToDo::FeBinary;
113  cardinalityDest [x] = cardinality;
114  xSpaceNeededPerExample += cardinalityDest[x];
115  numEncodedFeatures += cardinalityDest[x];
116  for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++)
117  {
118  KKStr fieldName = attribute.Name () + "_" + attribute.GetNominalValue (zed);
119  destFieldNames.push_back (fieldName);
120  }
121  }
122  else
123  {
124  xSpaceNeededPerExample++;
125  numEncodedFeatures++;
126  destWhatToDo [x] = FeWhatToDo::FeAsIs;
127  destFieldNames.push_back (attribute.Name ());
128  }
129  break;
130 
131 
133  xSpaceNeededPerExample++;
134  numEncodedFeatures++;
135  if ((attributeType == AttributeType::Nominal) ||
136  (attributeType == AttributeType::Symbolic)
137  )
138  destWhatToDo [x] = FeWhatToDo::FeScale;
139  else
140  destWhatToDo [x] = FeWhatToDo::FeAsIs;
141 
142  destFieldNames.push_back (attribute.Name ());
143  break;
144 
145 
147  default:
148  xSpaceNeededPerExample++;
149  numEncodedFeatures++;
150  destWhatToDo [x] = FeWhatToDo::FeAsIs;
151  destFieldNames.push_back (attribute.Name ());
152  break;
153  }
154  }
155 
156  codedNumOfFeatures = xSpaceNeededPerExample;
157 
158  destFileDesc = FileDesc::NewContinuousDataOnly (destFieldNames);
159 
160  xSpaceNeededPerExample++; // Add one more for the terminating (-1)
161 }
__int32 kkint32
Definition: KKBaseTypes.h:88
const KKStr & GetNominalValue(kkint32 code) const
Returns the nominal value for the given ordinal value.
Definition: Attribute.cpp:143
describes a single Feature, Type and possible values.
Definition: Attribute.h:74
kkint32 NumOfFeatures() const
AttributeType
Definition: Attribute.h:36
AttributeType Type() const
Definition: Attribute.h:133
static FileDescPtr NewContinuousDataOnly(VectorKKStr &_fieldNames)
Creates a simple FileDesc that consists of continuous data only.
Definition: FileDesc.cpp:116
const KKStr & Name() const
Definition: Attribute.h:122
kkint32 Cardinality() const
Returns back the cardinality of the attribute; the number of possible values it can take...
Definition: Attribute.cpp:173
const KKMLL::Attribute & GetAAttribute(kkint32 fieldNum) const
Definition: FileDesc.cpp:210
FeatureEncoder::~FeatureEncoder ( )

Frees any memory allocated by, and owned by the FeatureEncoder.

Definition at line 172 of file FeatureEncoder.cpp.

173 {
174  delete srcFeatureNums;
175  delete destFeatureNums;
176  delete cardinalityDest;
177  delete destWhatToDo;
178 }

Member Function Documentation

MLClassPtr KKMLL::FeatureEncoder::Class1 ( ) const
inline

Definition at line 64 of file FeatureEncoder.h.

Referenced by KKMLL::SVMModel::DistanceFromDecisionBoundary().

64 {return class1;}
MLClassPtr KKMLL::FeatureEncoder::Class2 ( ) const
inline

Definition at line 65 of file FeatureEncoder.h.

Referenced by KKMLL::SVMModel::DistanceFromDecisionBoundary().

65 {return class2;}
kkint32 KKMLL::FeatureEncoder::CodedNumOfFeatures ( ) const
inline

Definition at line 61 of file FeatureEncoder.h.

61 {return codedNumOfFeatures;}
void FeatureEncoder::CompressExamples ( FeatureVectorListPtr  srcExamples,
FeatureVectorListPtr  compressedExamples,
ClassAssignments assignments 
)

Left over from BitReduction days; removed all code except that which processed the NO bit reduction option.

Parameters
[in]examples_listThe list of examples you want to attempt to reduce
[out]compressed_examples_listThe reduced list of examples

Definition at line 637 of file FeatureEncoder.cpp.

References KKMLL::FeatureVectorList::AddQueue(), and KKB::osGetSystemTimeUsed().

641 {
642  double time_before, time_after;
643  time_before = osGetSystemTimeUsed ();
644  compressedExamples->AddQueue (*srcExamples);
645  time_after = osGetSystemTimeUsed ();
646  compressedExamples->Owner (false);
647  return;
648 } /* CompressExamples */
void AddQueue(const FeatureVectorList &examplesToAdd)
Add the contents of &#39;examplesToAdd&#39; to the end of this list.
bool Owner() const
Definition: KKQueue.h:305
double osGetSystemTimeUsed()
Returns the number of CPU seconds used by current process.
FeatureVectorListPtr FeatureEncoder::CreateEncodedFeatureVector ( FeatureVectorList srcData)

Definition at line 653 of file FeatureEncoder.cpp.

References KKMLL::FeatureVector::AddFeatureData(), KKMLL::FeatureVectorList::AllFieldsAreNumeric(), KKMLL::FeatureVectorList::DuplicateListAndContents(), EncodeAExample(), KKMLL::FeatureVector::FeatureVector(), KKMLL::FeatureVectorList::FeatureVectorList(), SVM233::svm_node::index, KKMLL::FeatureVector::MLClass(), KKMLL::FeatureVectorList::PushOnBack(), and SVM233::svm_node::value.

654 {
655  if (srcData.AllFieldsAreNumeric ())
656  return srcData.DuplicateListAndContents ();
657 
658  FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true);
659 
661  for (idx = srcData.begin (); idx != srcData.end (); idx++)
662  {
663  FeatureVectorPtr srcExample = *idx;
664  XSpacePtr encodedData = EncodeAExample (srcExample);
665 
666  kkint32 zed = 0;
667  FeatureVectorPtr encodedFeatureVector = new FeatureVector (codedNumOfFeatures);
668  while (encodedData[zed].index != -1)
669  {
670  encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value);
671  zed++;
672  }
673 
674  encodedFeatureVector->MLClass (srcExample->MLClass ());
675  encodedFeatureVectorList->PushOnBack (encodedFeatureVector);
676 
677  delete encodedData;
678  encodedData = NULL;
679  }
680 
681  return encodedFeatureVectorList;
682 } /* CreateEncodedFeatureVector */
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
std::vector< FeatureVector * >::iterator iterator
Definition: KKQueue.h:88
__int32 kkint32
Definition: KKBaseTypes.h:88
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
void AddFeatureData(kkint32 _featureNum, float _featureData)
Container class for FeatureVector derived objects.
XSpacePtr EncodeAExample(FeatureVectorPtr example)
Converts a single example into the svm_problem format.
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FileDescPtr FeatureEncoder::CreateEncodedFileDesc ( ostream *  o)

Definition at line 201 of file FeatureEncoder.cpp.

References KKMLL::FileDesc::AddAAttribute(), KKB::KKStr::Concat(), KKMLL::FeAsIs, KKMLL::FeBinary, KKMLL::FeScale, KKMLL::FileDesc::FieldName(), KKMLL::FileDesc::FileDesc(), KKMLL::FileDesc::GetNominalValue(), KKB::KKException::KKException(), KKB::KKStr::KKStr(), KKMLL::Numeric, KKB::KKStr::operator+(), KKB::StrFormatInt(), and KKMLL::FileDesc::TypeStr().

Referenced by EncodeAllExamples().

202 {
203  FileDescPtr newFileDesc = new FileDesc ();
204 
205  if (o)
206  {
207  *o << endl
208  << "Orig" << "\t" << "Orig" << "\t" << "Field" << "\t" << "Encoded" << "\t" << "Encoded" << endl;
209  *o << "FieldNum" << "\t" << "FieldName" << "\t" << "Type" << "\t" << "FieldNum" << "\t" << "FieldName" << endl;
210  }
211 
212  kkint32 x;
213 
214  bool alreadyExist;
215 
216  for (x = 0; x < numOfFeatures; x++)
217  {
218  kkint32 srcFeatureNum = srcFeatureNums[x];
219  kkint32 y = destFeatureNums[x];
220 
221  if (y >= numEncodedFeatures)
222  {
223  KKStr errMsg (128);
224  errMsg << "FeatureEncoder::CreateEncodedFileDesc numEncodedFeatures [" << numEncodedFeatures << "] exceeded.";
225  cerr << endl
226  << "FeatureEncoder::CreateEncodedFileDesc *** ERROR ***" << endl
227  << " " << errMsg << endl
228  << endl;
229  throw KKException (errMsg);
230  exit (-1);
231  }
232 
233  KKStr origFieldDesc = StrFormatInt (srcFeatureNum, "zz0") + "\t" +
234  fileDesc->FieldName (srcFeatureNum) + "\t" +
235  fileDesc->TypeStr (srcFeatureNum);
236 
237 
238  switch (destWhatToDo[x])
239  {
240  case FeWhatToDo::FeAsIs:
241  {
242  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
243  if (o)
244  {
245  *o << origFieldDesc << "\t"
246  << y << "\t"
247  << fileDesc->FieldName (x)
248  << endl;
249  }
250  }
251  break;
252 
254  {
255  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
256  {
257  KKStr nominalValue = fileDesc->GetNominalValue (srcFeatureNums[x], z);
258  KKStr encodedName = fileDesc->FieldName (x) + "_" + nominalValue;
259  newFileDesc->AddAAttribute (encodedName, AttributeType::Numeric, alreadyExist);
260  if (o)
261  {
262  *o << origFieldDesc << "\t"
263  << y << "\t"
264  << encodedName
265  << endl;
266  }
267 
268  y++;
269  }
270  }
271 
272  break;
273 
274  case FeWhatToDo::FeScale:
275  {
276  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
277  if (o)
278  {
279  *o << origFieldDesc << "\t"
280  << y << "\t"
281  << fileDesc->FieldName (x)
282  << endl;
283  }
284  }
285  break;
286  }
287  }
288 
289  return newFileDesc;
290 } /* CreateEncodedFileDesc */
KKStr TypeStr(kkint32 fieldNum) const
Definition: FileDesc.cpp:378
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
const KKStr & FieldName(kkint32 fieldNum) const
Definition: FileDesc.cpp:387
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
Definition: FileDesc.cpp:395
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
XSpacePtr FeatureEncoder::EncodeAExample ( FeatureVectorPtr  example)

Converts a single example into the svm_problem format.

Parameters
[in]exampleThat we're converting

Definition at line 301 of file FeatureEncoder.cpp.

References EncodeAExample().

Referenced by CreateEncodedFeatureVector(), and KKMLL::SVMModel::ProbabilitiesByClass().

302 {
303  // XSpacePtr xSpace = (struct svm_node*)malloc (xSpaceNeededPerExample * sizeof (struct svm_node));
304  XSpacePtr xSpace = new svm_node[xSpaceNeededPerExample];
305  kkint32 xSpaceUsed = 0;
306  EncodeAExample (example, xSpace, xSpaceUsed);
307  return xSpace;
308 } /* EncodeAExample */
__int32 kkint32
Definition: KKBaseTypes.h:88
XSpacePtr EncodeAExample(FeatureVectorPtr example)
Converts a single example into the svm_problem format.
void FeatureEncoder::EncodeAExample ( FeatureVectorPtr  example,
svm_node xSpace,
kkint32 xSpaceUsed 
)

Converts a single example into the svm_problem format.

Parameters
[in]Theexample That we're converting
[in]Therow kkint32 he svm_problem structure that the converted data will be stored

Definition at line 392 of file FeatureEncoder.cpp.

References KKB::KKStr::Concat(), KKMLL::FeAsIs, KKMLL::FeatureVector::FeatureData(), KKMLL::FeBinary, KKMLL::FeScale, SVM233::svm_node::index, KKB::KKException::KKException(), KKB::KKStr::KKStr(), and SVM233::svm_node::value.

Referenced by KKMLL::SVMModel::DistanceFromDecisionBoundary(), EncodeAExample(), EncodeIntoSparseMatrix(), KKMLL::SVMModel::FindWorstSupportVectors(), and KKMLL::SVMModel::FindWorstSupportVectors2().

396 {
397  const float* featureData = example->FeatureData ();
398  kkint32 x;
399 
400  xSpaceUsed = 0;
401 
402  for (x = 0; x < numOfFeatures; x++)
403  {
404  float featureVal = featureData [srcFeatureNums[x]];
405  kkint32 y = destFeatureNums[x];
406 
407  if (y >= xSpaceNeededPerExample)
408  {
409  KKStr errMsg (128);
410  errMsg << "FeatureEncoder::EncodeAExample ***ERROR*** xSpaceNeededPerExample[" << xSpaceNeededPerExample << "].";
411  cerr << endl
412  << "FeatureEncoder::EncodeAExample *** ERROR ***" << endl
413  << " " << errMsg << endl
414  << endl;
415  throw KKException (errMsg);
416  }
417 
418  switch (destWhatToDo[x])
419  {
420  case FeWhatToDo::FeAsIs:
421  {
422  if (featureVal != 0.0)
423  {
424  xSpace[xSpaceUsed].index = y;
425  xSpace[xSpaceUsed].value = featureVal;
426  xSpaceUsed++;
427  }
428  }
429  break;
430 
432  {
433  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
434  {
435  float bVal = ((kkint32)featureVal == z);
436  if (bVal != 0.0)
437  {
438  xSpace[xSpaceUsed].index = y;
439  xSpace[xSpaceUsed].value = bVal;
440  xSpaceUsed++;
441  }
442  y++;
443  }
444  }
445 
446  break;
447 
448  case FeWhatToDo::FeScale:
449  {
450  if (featureVal != (float)0.0)
451  {
452  xSpace[xSpaceUsed].index = y;
453  xSpace[xSpaceUsed].value = featureVal / (float)cardinalityDest[x];
454  xSpaceUsed++;
455  }
456  }
457  break;
458  }
459  }
460 
461  xSpace[xSpaceUsed].index = -1;
462  xSpace[xSpaceUsed].value = -1;
463  xSpaceUsed++;
464 } /* EncodeAExample */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
double value
Definition: svm.h:39
kkint16 index
Definition: svm.h:38
void FeatureData(kkint32 _featureNum, float _featureValue)
Assign a value to a specific feature number for the feature vector.
FeatureVectorPtr FeatureEncoder::EncodeAExample ( FileDescPtr  encodedFileDesc,
FeatureVectorPtr  src 
)

Definition at line 313 of file FeatureEncoder.cpp.

References KKMLL::FeatureVector::AddFeatureData(), KKMLL::FeAsIs, KKMLL::FeatureVector::FeatureData(), KKMLL::FeatureVector::FeatureVector(), KKMLL::FeBinary, KKMLL::FeScale, KKMLL::FeatureVector::MLClass(), KKMLL::FeatureVector::PredictedClass(), and KKMLL::FeatureVector::TrainWeight().

Referenced by EncodeAllExamples().

316 {
317  FeatureVectorPtr encodedExample = new FeatureVector (numEncodedFeatures);
318  encodedExample->MLClass (src->MLClass ());
319  encodedExample->PredictedClass (src->PredictedClass ());
320  //encodedExample->Version (src->Version ());
321  encodedExample->TrainWeight (src->TrainWeight ());
322 
323  const float* featureData = src->FeatureData ();
324  kkint32 x;
325 
326  for (x = 0; x < numOfFeatures; x++)
327  {
328  float featureVal = featureData [srcFeatureNums[x]];
329  kkint32 y = destFeatureNums[x];
330 
331  switch (destWhatToDo[x])
332  {
333  case FeWhatToDo::FeAsIs:
334  {
335  encodedExample->AddFeatureData (y, featureVal);
336  }
337  break;
338 
340  {
341  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
342  {
343  float bVal = ((kkint32)featureVal == z);
344  encodedExample->AddFeatureData (y, bVal);
345  y++;
346  }
347  }
348 
349  break;
350 
351  case FeWhatToDo::FeScale:
352  {
353  encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x]));
354  }
355  break;
356  }
357  }
358 
359  return encodedExample;
360 } /* EncodeAExample */
__int32 kkint32
Definition: KKBaseTypes.h:88
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
void AddFeatureData(kkint32 _featureNum, float _featureData)
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
void FeatureData(kkint32 _featureNum, float _featureValue)
Assign a value to a specific feature number for the feature vector.
void PredictedClass(MLClassPtr _predictedClass)
Definition: FeatureVector.h:78
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureVectorListPtr FeatureEncoder::EncodeAllExamples ( const FeatureVectorListPtr  srcData)

Definition at line 366 of file FeatureEncoder.cpp.

References CreateEncodedFileDesc(), EncodeAExample(), KKMLL::FeatureVectorList::FeatureVectorList(), and KKMLL::FeatureVectorList::PushOnBack().

367 {
368  FileDescPtr encodedFileDesc = CreateEncodedFileDesc (NULL);
369 
370  FeatureVectorListPtr encodedExamples = new FeatureVectorList (encodedFileDesc, true);
371 
373 
374  for (idx = srcData->begin (); idx != srcData->end (); idx++)
375  {
376  const FeatureVectorPtr srcExample = *idx;
377  FeatureVectorPtr encodedExample = EncodeAExample (encodedFileDesc, srcExample);
378  encodedExamples->PushOnBack (encodedExample);
379  }
380 
381  return encodedExamples;
382 } /* EncodeAllExamples */
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
std::vector< FeatureVector * >::const_iterator const_iterator
Definition: KKQueue.h:89
FileDescPtr CreateEncodedFileDesc(ostream *o)
Container class for FeatureVector derived objects.
XSpacePtr EncodeAExample(FeatureVectorPtr example)
Converts a single example into the svm_problem format.
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
void FeatureEncoder::EncodeIntoSparseMatrix ( FeatureVectorListPtr  src,
ClassAssignments assignments,
XSpacePtr xSpace,
kkint32 totalxSpaceUsed,
struct svm_problem prob,
RunLog log 
)

Compresses 'src' examples, allocating new 'xSpace' data structure.

Parameters
[in]srcExamples that are to be compressed
[in]assignmentsClass Assignments
[in]xSpacewill allocate enough xSpace nodes and place compressed results in this structure.
[out]totalxSpaceUsednumber nodes used in xSpace
Parameters
[out]probData Structure that is used by SVMLib
[in]log

Definition at line 514 of file FeatureEncoder.cpp.

References EncodeAExample(), KKMLL::ClassAssignments::GetNumForClass(), SVM233::svm_problem::index, SVM233::svm_problem::l, KKMLL::FeatureVector::MLClass(), KKMLL::FeatureVector::TrainWeight(), SVM233::svm_problem::W, SVM233::svm_problem::x, and SVM233::svm_problem::y.

522 {
523  FeatureVectorListPtr compressedExamples = NULL;
524  FeatureVectorListPtr examplesToUseFoXSpace = NULL;
525  kkint32 xSpaceUsed = 0;
526 
527  totalxSpaceUsed = 0;
528 
529  examplesToUseFoXSpace = src;
530 
531  kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize ();
532  //kkint32 elements = numOfExamples * xSpaceNeededPerExample;
533 
534  prob.l = numOfExamples;
535  prob.y = (double*)malloc (prob.l * sizeof (double));
536  prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*));
537  prob.index = new kkint32[prob.l];
538  prob.exampleNames.clear ();
539 
540  kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace);
541 
542  kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid
543 
544  xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded);
545  if (xSpace == NULL)
546  {
547  log.Level (-1) << endl << endl << endl
548  << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl
549  << endl
550  << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl
551  << " Num of Examples [" << numOfExamples << "]" << endl
552  << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl
553  << endl;
554  // we sill have to allocate space for each individual training example separately.
555  //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed.";
556  }
557 
558  prob.W = NULL;
559 
560  kkint32 i = 0;
561 
562  FeatureVectorPtr example = NULL;
563  MLClassPtr lastMlClass = NULL;
564  kkint16 lastClassNum = -1;
565 
566  kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node);
567 
568  for (i = 0; i < prob.l; i++)
569  {
570  if (totalxSpaceUsed > numNeededXspaceNodes)
571  {
572  log.Level (-1) << endl << endl
573  << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl
574  << endl;
575  }
576 
577  example = examplesToUseFoXSpace->IdxToPtr (i);
578 
579  if (example->MLClass () != lastMlClass)
580  {
581  lastMlClass = example->MLClass ();
582  lastClassNum = assignments.GetNumForClass (lastMlClass);
583  }
584 
585  prob.y[i] = lastClassNum;
586  prob.index[i] = i;
587  prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ()));
588 
589  if (prob.W)
590  {
591  prob.W[i] = example->TrainWeight () * c_Param;
592  if (example->TrainWeight () <= 0.0f)
593  {
594  log.Level (-1) << endl
595  << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl
596  << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl
597  << endl;
598  prob.W[i] = 1.0 * c_Param;
599  }
600  }
601 
602  if (xSpace == NULL)
603  {
604  struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample);
605  prob.x[i] = xSpaceThisExample;
606  EncodeAExample (example, prob.x[i], xSpaceUsed);
607  if (xSpaceUsed < xSpaceNeededPerExample)
608  {
609  kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node);
610  struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample);
611  memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample);
612  free (xSpaceThisExample);
613  prob.x[i] = smallerXSpaceThisExample;
614  }
615  }
616  else
617  {
618  prob.x[i] = &xSpace[totalxSpaceUsed];
619  EncodeAExample (example, prob.x[i], xSpaceUsed);
620  }
621  totalxSpaceUsed += xSpaceUsed;
622  }
623 
624  delete compressedExamples;
625  return;
626 } /* Compress */
__int16 kkint16
16 bit signed integer.
Definition: KKBaseTypes.h:85
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
Definition: FeatureVector.h:75
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
kkint16 GetNumForClass(MLClassPtr mlClass) const
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
EntryPtr IdxToPtr(kkuint32 idx) const
Definition: KKQueue.h:732
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
kkint32 l
Definition: svm.h:48
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
Container class for FeatureVector derived objects.
struct svm_node ** x
Definition: svm.h:52
double * W
Definition: svm.h:53
XSpacePtr EncodeAExample(FeatureVectorPtr example)
Converts a single example into the svm_problem format.
double * y
Definition: svm.h:49
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
VectorKKStr exampleNames
Definition: svm.h:50
kkint32 * index
Definition: svm.h:51
kkint32 QueueSize() const
Definition: KKQueue.h:313
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
KKStr osGetRootName(const KKStr &fullFileName)
kkint32 FeatureEncoder::MemoryConsumedEstimated ( ) const

Definition at line 182 of file FeatureEncoder.cpp.

References KKMLL::FeatureNumList::MemoryConsumedEstimated().

Referenced by KKMLL::SVMModel::MemoryConsumedEstimated().

183 {
184  kkint32 memoryConsumedEstimated = sizeof (FeatureEncoder)
185  + selectedFeatures.MemoryConsumedEstimated ()
186  + numOfFeatures * sizeof (kkint32);
187 
188  if (cardinalityDest)
189  memoryConsumedEstimated += 3 * sizeof (kkint32) * numOfFeatures; // cardinalityDest + destFeatureNums + srcFeatureNums
190 
191  // We do not own 'destFileDesc' and 'fileDesc'
192  if (destWhatToDo)
193  memoryConsumedEstimated += sizeof (FeWhatToDo) * numOfFeatures;
194 
195  return memoryConsumedEstimated;
196 } /* MemoryConsumedEstimated */
__int32 kkint32
Definition: KKBaseTypes.h:88
kkint32 MemoryConsumedEstimated() const
void FeatureEncoder::ReadXML ( XmlStream s,
XmlTagConstPtr  tag,
VolConstBool cancelFlag,
RunLog log 
)
virtual

Definition at line 737 of file FeatureEncoder.cpp.

References KKB::KKStr::Concat(), KKMLL::MLClass::CreateNewMLClass(), KKMLL::EncodingMethodFromStr(), KKB::KKStr::EqualIgnoreCase(), KKB::XmlStream::GetNextToken(), KKB::XmlElement::ToDouble(), KKB::XmlElement::ToInt32(), KKB::XmlToken::tokElement, KKB::XmlToken::TokenType(), KKB::XmlElement::ToKKStr(), KKMLL::XmlElementFileDesc::Value(), and KKB::XmlElement::VarName().

742 {
743  XmlTokenPtr t = s.GetNextToken (cancelFlag, log);
744  while (t && (!cancelFlag))
745  {
746  if (t->TokenType () == XmlToken::TokenTypes::tokElement)
747  {
748  XmlElementPtr e = dynamic_cast<XmlElementPtr> (t);
749  if (e)
750  {
751  KKStr varName = e->VarName ();
752 
753  if (varName.EqualIgnoreCase ("CodedNumOfFeatures"))
754  codedNumOfFeatures= e->ToInt32 ();
755 
756  else if (varName.EqualIgnoreCase ("C_Param"))
757  c_Param = e->ToDouble ();
758 
759  else if (varName.EqualIgnoreCase ("NumEncodedFeatures"))
760  numEncodedFeatures = e->ToInt32 ();
761 
762  else if (varName.EqualIgnoreCase ("NumOfFeatures"))
763  numOfFeatures = e->ToInt32 ();
764 
765  else if (varName.EqualIgnoreCase ("xSpaceNeededPerExample"))
766  xSpaceNeededPerExample = e->ToInt32 ();
767 
768  else if (typeid (*e) == typeid (XmlElementArrayInt32))
769  {
770  XmlElementArrayInt32Ptr xmlArray = dynamic_cast<XmlElementArrayInt32Ptr> (e);
771  kkuint32 count = xmlArray->Count ();
772  if (count != numOfFeatures)
773  {
774  log.Level (-1) << endl
775  << "FeatureEncoder::ReadXML ***ERROR*** Variable[" << varName << "] Invalid Length[" << count << "] Expected[" << numOfFeatures << "]" << endl
776  << endl;
777  }
778  else
779  {
780  if (varName.EqualIgnoreCase ("CardinalityDest"))
781  {
782  delete cardinalityDest;
783  cardinalityDest = xmlArray->TakeOwnership ();
784  }
785 
786  else if (varName.EqualIgnoreCase ("DestFeatureNums"))
787  {
788  delete destFeatureNums;
789  destFeatureNums = xmlArray->TakeOwnership ();
790  }
791 
792  else if (varName.EqualIgnoreCase ("SrcFeatureNums"))
793  {
794  delete srcFeatureNums;
795  srcFeatureNums = xmlArray->TakeOwnership ();
796  }
797  }
798  }
799 
800  else if (varName.EqualIgnoreCase ("Class1"))
801  class1 = MLClass::CreateNewMLClass (e->ToKKStr (), -1);
802 
803  else if (varName.EqualIgnoreCase ("Class2"))
804  class2 = MLClass::CreateNewMLClass (e->ToKKStr (), -1);
805 
806  else if (varName.EqualIgnoreCase ("FileDesc") && (typeid (*e) == typeid (XmlElementFileDesc)))
807  fileDesc = dynamic_cast<XmlElementFileDescPtr> (e)->Value ();
808 
809  else if (varName.EqualIgnoreCase ("DestFileDesc") && (typeid (*e) == typeid (XmlElementFileDesc)))
810  destFileDesc = dynamic_cast<XmlElementFileDescPtr> (e)->Value ();
811 
812  else if (varName.EqualIgnoreCase ("DestWhatToDo") && (typeid (*e) == typeid (XmlElementVectorInt32)))
813  {
814  XmlElementVectorInt32Ptr xmlVect = dynamic_cast<XmlElementVectorInt32Ptr> (e);
815  if (xmlVect && xmlVect->Value ())
816  {
817  const VectorInt32& v = *(xmlVect->Value ());
818  if (v.size () != numOfFeatures)
819  {
820  log.Level (-1) << endl
821  << "FeatureEncoder::ReadXML ***ERROR*** Variable[" << varName << "] Invalid Size[" << v.size () << "] Expected[" << numOfFeatures << "]." << endl
822  << endl;
823  }
824  else
825  {
826  delete destWhatToDo;
827  destWhatToDo = new FeWhatToDo[v.size ()];
828  for (kkuint32 x = 0; x < v.size (); ++x)
829  destWhatToDo[x] = (FeWhatToDo)v[x];
830  }
831  }
832  }
833 
834  else if (varName.EqualIgnoreCase ("EncodingMethod"))
835  encodingMethod = EncodingMethodFromStr (e->ToKKStr ());
836 
837  else
838  {
839  log.Level (-1) << "XmlElementTrainingClassList ***ERROR*** Un-expected Section Element[" << e->SectionName () << "]" << endl;
840  }
841  }
842  }
843 
844  delete t;
845  t = s.GetNextToken (cancelFlag, log);
846  }
847  delete t;
848  t = NULL;
849 } /* ReadXML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
virtual double ToDouble() const
Definition: XmlStream.h:315
virtual KKStr ToKKStr() const
Definition: XmlStream.h:314
SVM_EncodingMethod EncodingMethodFromStr(const KKStr &encodingMethodStr)
Definition: SVMparam.cpp:899
bool EqualIgnoreCase(const KKStr &s2) const
Definition: KKStr.cpp:1250
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
virtual kkint32 ToInt32() const
Definition: XmlStream.h:317
std::vector< kkint32 > VectorInt32
Vector of signed 32 bit integers.
Definition: KKBaseTypes.h:144
std::vector< kkint32 > *const Value() const
Definition: XmlStream.h:926
virtual const KKStr & VarName() const
Definition: XmlStream.cpp:794
virtual const KKStr & SectionName() const
Definition: XmlStream.cpp:785
static MLClassPtr CreateNewMLClass(const KKStr &_name, kkint32 _classId=-1)
Static method used to create a new instance of a MLClass object.
Definition: MLClass.cpp:100
kkuint32 Count() const
Definition: XmlStream.h:915
virtual TokenTypes TokenType()=0
virtual XmlTokenPtr GetNextToken(VolConstBool &cancelFlag, RunLog &log)
Definition: XmlStream.cpp:116
void FeatureEncoder::WriteXML ( const KKStr varName,
ostream &  o 
) const
virtual

Definition at line 687 of file FeatureEncoder.cpp.

References KKB::XmlTag::AddAtribute(), KKB::KKStr::Empty(), KKMLL::EncodingMethodToStr(), KKMLL::MLClass::Name(), KKB::XmlTag::tagEnd, KKB::XmlTag::tagStart, KKMLL::FeatureNumList::WriteXML(), KKB::XmlTag::WriteXML(), KKMLL::FileDesc::WriteXML(), KKB::KKStr::WriteXML(), and KKB::XmlTag::XmlTag().

690 {
691  XmlTag tagStart ("TrainingClassList", XmlTag::TagTypes::tagStart);
692  if (!varName.Empty ())
693  tagStart.AddAtribute ("VarName", varName);
694 
695  tagStart.WriteXML (o);
696  o << endl;
697 
698  XmlElementInt32::WriteXML (codedNumOfFeatures, "CodedNumOfFeatures", o);
699  XmlElementDouble::WriteXML (c_Param, "c_Param", o);
700  XmlElementInt32::WriteXML (numEncodedFeatures, "NumEncodedFeatures", o);
701  XmlElementInt32::WriteXML (numOfFeatures, "NumOfFeatures", o);
702  XmlElementInt32::WriteXML (xSpaceNeededPerExample, "xSpaceNeededPerExample", o);
703 
704  if (cardinalityDest)
705  XmlElementArrayInt32::WriteXML (numOfFeatures, cardinalityDest, "CardinalityDest", o);
706 
707  if (class1) class1->Name ().WriteXML ("Class1", o);
708  if (class2) class2->Name ().WriteXML ("Class2", o);
709  if (destFeatureNums)
710  XmlElementArrayInt32::WriteXML (numOfFeatures, destFeatureNums, "DestFeatureNums", o);
711 
712  if (fileDesc) fileDesc->WriteXML ("FileDesc", o);
713  if (destFileDesc) destFileDesc->WriteXML ("DestFileDesc", o);
714 
715  if (destWhatToDo)
716  {
717  VectorInt32 v;
718  for (kkint32 x = 0; x < numOfFeatures; ++x)
719  v.push_back ((kkint32)(destWhatToDo[x]));
720  XmlElementVectorInt32::WriteXML (v, "DestWhatToDo", o);
721  }
722 
723  EncodingMethodToStr (encodingMethod).WriteXML ("EncodingMethod", o);
724 
725  selectedFeatures.WriteXML ("selectedFeatures", o);
726 
727  if (srcFeatureNums)
728  XmlElementArrayInt32::WriteXML (numOfFeatures, srcFeatureNums, "SrcFeatureNums", o);
729 
730  XmlTag tagEnd ("TrainingClassList", XmlTag::TagTypes::tagEnd);
731  tagEnd.WriteXML (o);
732  o << endl;
733 }
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr EncodingMethodToStr(SVM_EncodingMethod encodingMethod)
bool Empty() const
Definition: KKStr.h:241
std::vector< kkint32 > VectorInt32
Vector of signed 32 bit integers.
Definition: KKBaseTypes.h:144
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: KKStr.cpp:4420
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: FileDesc.cpp:875
void WriteXML(const KKStr &varName, std::ostream &o) const
kkint32 KKMLL::FeatureEncoder::XSpaceNeededPerExample ( )
inline

Definition at line 128 of file FeatureEncoder.h.

128 {return xSpaceNeededPerExample;}

The documentation for this class was generated from the following files: