KSquare Utilities
FeatureEncoder.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 
3 #include <stdio.h>
4 #include <string>
5 #include <iostream>
6 #include <fstream>
7 #include <math.h>
8 #include <vector>
9 #include <sstream>
10 #include <string.h>
11 #include <string>
12 #include <iomanip>
13 #include "MemoryDebug.h"
14 using namespace std;
15 
16 
17 #include "KKBaseTypes.h"
18 #include "OSservices.h"
19 #include "RunLog.h"
20 using namespace KKB;
21 
22 
23 #include "FeatureEncoder.h"
24 #include "BinaryClassParms.h"
25 #include "FeatureNumList.h"
26 #include "FeatureVector.h"
27 #include "SvmWrapper.h"
28 using namespace KKMLL;
29 
30 
32  cardinalityDest (NULL),
33  class1 (NULL),
34  class2 (NULL),
35  codedNumOfFeatures (0),
36  c_Param (1.0),
37  destFeatureNums (NULL),
38  destFileDesc (NULL),
39  destWhatToDo (NULL),
40  encodingMethod (SVM_EncodingMethod::NoEncoding),
41  fileDesc (NULL),
42  numEncodedFeatures (0),
43  numOfFeatures (0),
44  selectedFeatures (),
45  srcFeatureNums (NULL),
46  xSpaceNeededPerExample (0)
47 {
48 }
49 
50 
51 
52 /**
53  * @brief Constructs a Feature Encoder object.
54  * @param[in] _fileDesc
55  * @param[in] _class1
56  * @param[in] _class2
57  * @param[in] _log A log-file stream. All important events will be output to this stream
58  */
60  MLClassPtr _class1,
61  MLClassPtr _class2,
62  const FeatureNumList& _selectedFeatures,
63  SVM_EncodingMethod _encodingMethod,
64  double _c_Param
65  ):
66 
67  cardinalityDest (NULL),
68  class1 (_class1),
69  class2 (_class2),
70  codedNumOfFeatures (0),
71  c_Param (_c_Param),
72  destFeatureNums (NULL),
73  destFileDesc (NULL),
74  destWhatToDo (NULL),
75  encodingMethod (_encodingMethod),
76  fileDesc (_fileDesc),
77  numEncodedFeatures (0),
78  numOfFeatures (0),
79  selectedFeatures (_selectedFeatures),
80  srcFeatureNums (NULL),
81  xSpaceNeededPerExample (0)
82 {
83  numOfFeatures = selectedFeatures.NumOfFeatures ();
84 
85  xSpaceNeededPerExample = 0;
86  srcFeatureNums = new kkint32[numOfFeatures];
87  cardinalityDest = new kkint32[numOfFeatures];
88  destFeatureNums = new kkint32[numOfFeatures];
89  destWhatToDo = new FeWhatToDo[numOfFeatures];
90 
91  VectorKKStr destFieldNames;
92 
93  kkint32 x;
94 
95  for (x = 0; x < numOfFeatures; x++)
96  {
97  kkint32 srcFeatureNum = selectedFeatures[x];
98  srcFeatureNums [x] = srcFeatureNum;
99  destFeatureNums [x] = xSpaceNeededPerExample;
100  cardinalityDest [x] = 1;
101  destWhatToDo [x] = FeWhatToDo::FeAsIs;
102 
103  const Attribute& attribute = fileDesc->GetAAttribute (srcFeatureNum);
104  AttributeType attributeType = attribute.Type ();
105  kkint32 cardinality = attribute.Cardinality ();
106 
107  switch (encodingMethod)
108  {
110  if ((attributeType == AttributeType::Nominal) || (attributeType == AttributeType::Symbolic))
111  {
112  destWhatToDo [x] = FeWhatToDo::FeBinary;
113  cardinalityDest [x] = cardinality;
114  xSpaceNeededPerExample += cardinalityDest[x];
115  numEncodedFeatures += cardinalityDest[x];
116  for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++)
117  {
118  KKStr fieldName = attribute.Name () + "_" + attribute.GetNominalValue (zed);
119  destFieldNames.push_back (fieldName);
120  }
121  }
122  else
123  {
124  xSpaceNeededPerExample++;
125  numEncodedFeatures++;
126  destWhatToDo [x] = FeWhatToDo::FeAsIs;
127  destFieldNames.push_back (attribute.Name ());
128  }
129  break;
130 
131 
133  xSpaceNeededPerExample++;
134  numEncodedFeatures++;
135  if ((attributeType == AttributeType::Nominal) ||
136  (attributeType == AttributeType::Symbolic)
137  )
138  destWhatToDo [x] = FeWhatToDo::FeScale;
139  else
140  destWhatToDo [x] = FeWhatToDo::FeAsIs;
141 
142  destFieldNames.push_back (attribute.Name ());
143  break;
144 
145 
147  default:
148  xSpaceNeededPerExample++;
149  numEncodedFeatures++;
150  destWhatToDo [x] = FeWhatToDo::FeAsIs;
151  destFieldNames.push_back (attribute.Name ());
152  break;
153  }
154  }
155 
156  codedNumOfFeatures = xSpaceNeededPerExample;
157 
158  destFileDesc = FileDesc::NewContinuousDataOnly (destFieldNames);
159 
160  xSpaceNeededPerExample++; // Add one more for the terminating (-1)
161 }
162 
163 
164 
165 
166 
167 
168 
169 /**
170  * @brief Frees any memory allocated by, and owned by the FeatureEncoder
171  */
173 {
174  delete srcFeatureNums;
175  delete destFeatureNums;
176  delete cardinalityDest;
177  delete destWhatToDo;
178 }
179 
180 
181 
183 {
184  kkint32 memoryConsumedEstimated = sizeof (FeatureEncoder)
185  + selectedFeatures.MemoryConsumedEstimated ()
186  + numOfFeatures * sizeof (kkint32);
187 
188  if (cardinalityDest)
189  memoryConsumedEstimated += 3 * sizeof (kkint32) * numOfFeatures; // cardinalityDest + destFeatureNums + srcFeatureNums
190 
191  // We do not own 'destFileDesc' and 'fileDesc'
192  if (destWhatToDo)
193  memoryConsumedEstimated += sizeof (FeWhatToDo) * numOfFeatures;
194 
195  return memoryConsumedEstimated;
196 } /* MemoryConsumedEstimated */
197 
198 
199 
200 
202 {
203  FileDescPtr newFileDesc = new FileDesc ();
204 
205  if (o)
206  {
207  *o << endl
208  << "Orig" << "\t" << "Orig" << "\t" << "Field" << "\t" << "Encoded" << "\t" << "Encoded" << endl;
209  *o << "FieldNum" << "\t" << "FieldName" << "\t" << "Type" << "\t" << "FieldNum" << "\t" << "FieldName" << endl;
210  }
211 
212  kkint32 x;
213 
214  bool alreadyExist;
215 
216  for (x = 0; x < numOfFeatures; x++)
217  {
218  kkint32 srcFeatureNum = srcFeatureNums[x];
219  kkint32 y = destFeatureNums[x];
220 
221  if (y >= numEncodedFeatures)
222  {
223  KKStr errMsg (128);
224  errMsg << "FeatureEncoder::CreateEncodedFileDesc numEncodedFeatures [" << numEncodedFeatures << "] exceeded.";
225  cerr << endl
226  << "FeatureEncoder::CreateEncodedFileDesc *** ERROR ***" << endl
227  << " " << errMsg << endl
228  << endl;
229  throw KKException (errMsg);
230  exit (-1);
231  }
232 
233  KKStr origFieldDesc = StrFormatInt (srcFeatureNum, "zz0") + "\t" +
234  fileDesc->FieldName (srcFeatureNum) + "\t" +
235  fileDesc->TypeStr (srcFeatureNum);
236 
237 
238  switch (destWhatToDo[x])
239  {
240  case FeWhatToDo::FeAsIs:
241  {
242  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
243  if (o)
244  {
245  *o << origFieldDesc << "\t"
246  << y << "\t"
247  << fileDesc->FieldName (x)
248  << endl;
249  }
250  }
251  break;
252 
254  {
255  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
256  {
257  KKStr nominalValue = fileDesc->GetNominalValue (srcFeatureNums[x], z);
258  KKStr encodedName = fileDesc->FieldName (x) + "_" + nominalValue;
259  newFileDesc->AddAAttribute (encodedName, AttributeType::Numeric, alreadyExist);
260  if (o)
261  {
262  *o << origFieldDesc << "\t"
263  << y << "\t"
264  << encodedName
265  << endl;
266  }
267 
268  y++;
269  }
270  }
271 
272  break;
273 
274  case FeWhatToDo::FeScale:
275  {
276  newFileDesc->AddAAttribute (fileDesc->FieldName (x), AttributeType::Numeric, alreadyExist);
277  if (o)
278  {
279  *o << origFieldDesc << "\t"
280  << y << "\t"
281  << fileDesc->FieldName (x)
282  << endl;
283  }
284  }
285  break;
286  }
287  }
288 
289  return newFileDesc;
290 } /* CreateEncodedFileDesc */
291 
292 
293 
294 
295 
296 
297 /**
298  * @brief Converts a single example into the svm_problem format
299  * @param[in] example That we're converting
300  */
301 XSpacePtr FeatureEncoder::EncodeAExample (FeatureVectorPtr example)
302 {
303  // XSpacePtr xSpace = (struct svm_node*)malloc (xSpaceNeededPerExample * sizeof (struct svm_node));
304  XSpacePtr xSpace = new svm_node[xSpaceNeededPerExample];
305  kkint32 xSpaceUsed = 0;
306  EncodeAExample (example, xSpace, xSpaceUsed);
307  return xSpace;
308 } /* EncodeAExample */
309 
310 
311 
312 
313 FeatureVectorPtr FeatureEncoder::EncodeAExample (FileDescPtr encodedFileDesc,
314  FeatureVectorPtr src
315  )
316 {
317  FeatureVectorPtr encodedExample = new FeatureVector (numEncodedFeatures);
318  encodedExample->MLClass (src->MLClass ());
319  encodedExample->PredictedClass (src->PredictedClass ());
320  //encodedExample->Version (src->Version ());
321  encodedExample->TrainWeight (src->TrainWeight ());
322 
323  const float* featureData = src->FeatureData ();
324  kkint32 x;
325 
326  for (x = 0; x < numOfFeatures; x++)
327  {
328  float featureVal = featureData [srcFeatureNums[x]];
329  kkint32 y = destFeatureNums[x];
330 
331  switch (destWhatToDo[x])
332  {
333  case FeWhatToDo::FeAsIs:
334  {
335  encodedExample->AddFeatureData (y, featureVal);
336  }
337  break;
338 
340  {
341  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
342  {
343  float bVal = ((kkint32)featureVal == z);
344  encodedExample->AddFeatureData (y, bVal);
345  y++;
346  }
347  }
348 
349  break;
350 
351  case FeWhatToDo::FeScale:
352  {
353  encodedExample->AddFeatureData (y, (featureVal / (float)cardinalityDest[x]));
354  }
355  break;
356  }
357  }
358 
359  return encodedExample;
360 } /* EncodeAExample */
361 
362 
363 
364 
365 
366 FeatureVectorListPtr FeatureEncoder::EncodeAllExamples (const FeatureVectorListPtr srcData)
367 {
368  FileDescPtr encodedFileDesc = CreateEncodedFileDesc (NULL);
369 
370  FeatureVectorListPtr encodedExamples = new FeatureVectorList (encodedFileDesc, true);
371 
372  FeatureVectorList::const_iterator idx;
373 
374  for (idx = srcData->begin (); idx != srcData->end (); idx++)
375  {
376  const FeatureVectorPtr srcExample = *idx;
377  FeatureVectorPtr encodedExample = EncodeAExample (encodedFileDesc, srcExample);
378  encodedExamples->PushOnBack (encodedExample);
379  }
380 
381  return encodedExamples;
382 } /* EncodeAllExamples */
383 
384 
385 
386 
387 /**
388  * @brief Converts a single example into the svm_problem format.
389  * @param[in] The example That we're converting
390  * @param[in] The row kkint32 he svm_problem structure that the converted data will be stored
391  */
392 void FeatureEncoder::EncodeAExample (FeatureVectorPtr example,
393  svm_node* xSpace,
394  kkint32& xSpaceUsed
395  )
396 {
397  const float* featureData = example->FeatureData ();
398  kkint32 x;
399 
400  xSpaceUsed = 0;
401 
402  for (x = 0; x < numOfFeatures; x++)
403  {
404  float featureVal = featureData [srcFeatureNums[x]];
405  kkint32 y = destFeatureNums[x];
406 
407  if (y >= xSpaceNeededPerExample)
408  {
409  KKStr errMsg (128);
410  errMsg << "FeatureEncoder::EncodeAExample ***ERROR*** xSpaceNeededPerExample[" << xSpaceNeededPerExample << "].";
411  cerr << endl
412  << "FeatureEncoder::EncodeAExample *** ERROR ***" << endl
413  << " " << errMsg << endl
414  << endl;
415  throw KKException (errMsg);
416  }
417 
418  switch (destWhatToDo[x])
419  {
420  case FeWhatToDo::FeAsIs:
421  {
422  if (featureVal != 0.0)
423  {
424  xSpace[xSpaceUsed].index = y;
425  xSpace[xSpaceUsed].value = featureVal;
426  xSpaceUsed++;
427  }
428  }
429  break;
430 
432  {
433  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
434  {
435  float bVal = ((kkint32)featureVal == z);
436  if (bVal != 0.0)
437  {
438  xSpace[xSpaceUsed].index = y;
439  xSpace[xSpaceUsed].value = bVal;
440  xSpaceUsed++;
441  }
442  y++;
443  }
444  }
445 
446  break;
447 
448  case FeWhatToDo::FeScale:
449  {
450  if (featureVal != (float)0.0)
451  {
452  xSpace[xSpaceUsed].index = y;
453  xSpace[xSpaceUsed].value = featureVal / (float)cardinalityDest[x];
454  xSpaceUsed++;
455  }
456  }
457  break;
458  }
459  }
460 
461  xSpace[xSpaceUsed].index = -1;
462  xSpace[xSpaceUsed].value = -1;
463  xSpaceUsed++;
464 } /* EncodeAExample */
465 
466 
467 
468 kkint32 FeatureEncoder::DetermineNumberOfNeededXspaceNodes (FeatureVectorListPtr src) const
469 {
470  kkint32 xSpaceNodesNeeded = 0;
471  FeatureVectorList::const_iterator idx;
472  for (idx = src->begin (); idx != src->end (); ++idx)
473  {
474  FeatureVectorPtr fv = *idx;
475  const float* featureData = fv->FeatureData ();
476 
477  for (kkint32 x = 0; x < numOfFeatures; x++)
478  {
479  float featureVal = featureData [srcFeatureNums[x]];
480  kkint32 y = destFeatureNums[x];
481 
482  switch (destWhatToDo[x])
483  {
484  case FeWhatToDo::FeAsIs:
485  if (featureVal != 0.0)
486  xSpaceNodesNeeded++;
487  break;
488 
490  for (kkint32 z = 0; z < cardinalityDest[x]; z++)
491  {
492  float bVal = ((kkint32)featureVal == z);
493  if (bVal != 0.0)
494  xSpaceNodesNeeded++;
495  y++;
496  }
497  break;
498 
499  case FeWhatToDo::FeScale:
500  if (featureVal != (float)0.0)
501  xSpaceNodesNeeded++;
502  break;
503  }
504  }
505  xSpaceNodesNeeded++;
506  }
507 
508  return xSpaceNodesNeeded;
509 } /* DetermineNumberOfNeededXspaceNodes */
510 
511 
512 
514  (FeatureVectorListPtr src,
515  ClassAssignments& assignments,
516  XSpacePtr& xSpace,
517  kkint32& totalxSpaceUsed,
518  struct svm_problem& prob,
519  RunLog& log
520  )
521 
522 {
523  FeatureVectorListPtr compressedExamples = NULL;
524  FeatureVectorListPtr examplesToUseFoXSpace = NULL;
525  kkint32 xSpaceUsed = 0;
526 
527  totalxSpaceUsed = 0;
528 
529  examplesToUseFoXSpace = src;
530 
531  kkint32 numOfExamples = examplesToUseFoXSpace->QueueSize ();
532  //kkint32 elements = numOfExamples * xSpaceNeededPerExample;
533 
534  prob.l = numOfExamples;
535  prob.y = (double*)malloc (prob.l * sizeof (double));
536  prob.x = (struct svm_node **) malloc (prob.l * sizeof (struct svm_node*));
537  prob.index = new kkint32[prob.l];
538  prob.exampleNames.clear ();
539 
540  kkint32 numNeededXspaceNodes = DetermineNumberOfNeededXspaceNodes (examplesToUseFoXSpace);
541 
542  kkint32 totalBytesForxSpaceNeeded = (numNeededXspaceNodes + 10) * sizeof (struct svm_node); // I added '10' to elements because I am paranoid
543 
544  xSpace = (struct svm_node*) malloc (totalBytesForxSpaceNeeded);
545  if (xSpace == NULL)
546  {
547  log.Level (-1) << endl << endl << endl
548  << " FeatureEncoder::Compress *** Failed to allocates space for 'xSpace' ****" << endl
549  << endl
550  << " Space needed [" << totalBytesForxSpaceNeeded << "]" << endl
551  << " Num of Examples [" << numOfExamples << "]" << endl
552  << " Num XSpaceNodesNeeded [" << numNeededXspaceNodes << "]" << endl
553  << endl;
554  // we sill have to allocate space for each individual training example separately.
555  //throw "FeatureEncoder::Compress Allocation of memory for xSpace Failed.";
556  }
557 
558  prob.W = NULL;
559 
560  kkint32 i = 0;
561 
562  FeatureVectorPtr example = NULL;
563  MLClassPtr lastMlClass = NULL;
564  kkint16 lastClassNum = -1;
565 
566  kkint32 bytesOfxSpacePerExample = xSpaceNeededPerExample * sizeof (struct svm_node);
567 
568  for (i = 0; i < prob.l; i++)
569  {
570  if (totalxSpaceUsed > numNeededXspaceNodes)
571  {
572  log.Level (-1) << endl << endl
573  << "FeatureEncoder::Compress ***ERROR*** We have exceeded the number of XSpace nodes allocated." << endl
574  << endl;
575  }
576 
577  example = examplesToUseFoXSpace->IdxToPtr (i);
578 
579  if (example->MLClass () != lastMlClass)
580  {
581  lastMlClass = example->MLClass ();
582  lastClassNum = assignments.GetNumForClass (lastMlClass);
583  }
584 
585  prob.y[i] = lastClassNum;
586  prob.index[i] = i;
587  prob.exampleNames.push_back (osGetRootName (example->ExampleFileName ()));
588 
589  if (prob.W)
590  {
591  prob.W[i] = example->TrainWeight () * c_Param;
592  if (example->TrainWeight () <= 0.0f)
593  {
594  log.Level (-1) << endl
595  << "FeatureEncoder::EncodeIntoSparseMatrix ***ERROR*** Example[" << example->ExampleFileName () << "]" << endl
596  << " has a TrainWeight value of 0 or less defaulting to 1.0" << endl
597  << endl;
598  prob.W[i] = 1.0 * c_Param;
599  }
600  }
601 
602  if (xSpace == NULL)
603  {
604  struct svm_node* xSpaceThisExample = (struct svm_node*) malloc (bytesOfxSpacePerExample);
605  prob.x[i] = xSpaceThisExample;
606  EncodeAExample (example, prob.x[i], xSpaceUsed);
607  if (xSpaceUsed < xSpaceNeededPerExample)
608  {
609  kkint32 bytesNeededForThisExample = xSpaceUsed * sizeof (struct svm_node);
610  struct svm_node* smallerXSpaceThisExample = (struct svm_node*) malloc (bytesNeededForThisExample);
611  memcpy (smallerXSpaceThisExample, xSpaceThisExample, bytesNeededForThisExample);
612  free (xSpaceThisExample);
613  prob.x[i] = smallerXSpaceThisExample;
614  }
615  }
616  else
617  {
618  prob.x[i] = &xSpace[totalxSpaceUsed];
619  EncodeAExample (example, prob.x[i], xSpaceUsed);
620  }
621  totalxSpaceUsed += xSpaceUsed;
622  }
623 
624  delete compressedExamples;
625  return;
626 } /* Compress */
627 
628 
629 
630 
631 
632 /**
633  * @brief Left over from BitReduction days; removed all code except that which processed the NO bit reduction option.
634  * @param[in] examples_list The list of examples you want to attempt to reduce
635  * @param[out] compressed_examples_list The reduced list of examples
636  */
637 void FeatureEncoder::CompressExamples (FeatureVectorListPtr srcExamples,
638  FeatureVectorListPtr compressedExamples,
639  ClassAssignments& assignments
640  )
641 {
642  double time_before, time_after;
643  time_before = osGetSystemTimeUsed ();
644  compressedExamples->AddQueue (*srcExamples);
645  time_after = osGetSystemTimeUsed ();
646  compressedExamples->Owner (false);
647  return;
648 } /* CompressExamples */
649 
650 
651 
652 
654 {
655  if (srcData.AllFieldsAreNumeric ())
656  return srcData.DuplicateListAndContents ();
657 
658  FeatureVectorListPtr encodedFeatureVectorList = new FeatureVectorList (destFileDesc, true);
659 
660  FeatureVectorList::iterator idx;
661  for (idx = srcData.begin (); idx != srcData.end (); idx++)
662  {
663  FeatureVectorPtr srcExample = *idx;
664  XSpacePtr encodedData = EncodeAExample (srcExample);
665 
666  kkint32 zed = 0;
667  FeatureVectorPtr encodedFeatureVector = new FeatureVector (codedNumOfFeatures);
668  while (encodedData[zed].index != -1)
669  {
670  encodedFeatureVector->AddFeatureData (encodedData[zed].index, (float)encodedData[zed].value);
671  zed++;
672  }
673 
674  encodedFeatureVector->MLClass (srcExample->MLClass ());
675  encodedFeatureVectorList->PushOnBack (encodedFeatureVector);
676 
677  delete encodedData;
678  encodedData = NULL;
679  }
680 
681  return encodedFeatureVectorList;
682 } /* CreateEncodedFeatureVector */
683 
684 
685 
686 
687 void FeatureEncoder::WriteXML (const KKStr& varName,
688  ostream& o
689  ) const
690 {
691  XmlTag tagStart ("TrainingClassList", XmlTag::TagTypes::tagStart);
692  if (!varName.Empty ())
693  tagStart.AddAtribute ("VarName", varName);
694 
695  tagStart.WriteXML (o);
696  o << endl;
697 
698  XmlElementInt32::WriteXML (codedNumOfFeatures, "CodedNumOfFeatures", o);
699  XmlElementDouble::WriteXML (c_Param, "c_Param", o);
700  XmlElementInt32::WriteXML (numEncodedFeatures, "NumEncodedFeatures", o);
701  XmlElementInt32::WriteXML (numOfFeatures, "NumOfFeatures", o);
702  XmlElementInt32::WriteXML (xSpaceNeededPerExample, "xSpaceNeededPerExample", o);
703 
704  if (cardinalityDest)
705  XmlElementArrayInt32::WriteXML (numOfFeatures, cardinalityDest, "CardinalityDest", o);
706 
707  if (class1) class1->Name ().WriteXML ("Class1", o);
708  if (class2) class2->Name ().WriteXML ("Class2", o);
709  if (destFeatureNums)
710  XmlElementArrayInt32::WriteXML (numOfFeatures, destFeatureNums, "DestFeatureNums", o);
711 
712  if (fileDesc) fileDesc->WriteXML ("FileDesc", o);
713  if (destFileDesc) destFileDesc->WriteXML ("DestFileDesc", o);
714 
715  if (destWhatToDo)
716  {
717  VectorInt32 v;
718  for (kkint32 x = 0; x < numOfFeatures; ++x)
719  v.push_back ((kkint32)(destWhatToDo[x]));
720  XmlElementVectorInt32::WriteXML (v, "DestWhatToDo", o);
721  }
722 
723  EncodingMethodToStr (encodingMethod).WriteXML ("EncodingMethod", o);
724 
725  selectedFeatures.WriteXML ("selectedFeatures", o);
726 
727  if (srcFeatureNums)
728  XmlElementArrayInt32::WriteXML (numOfFeatures, srcFeatureNums, "SrcFeatureNums", o);
729 
730  XmlTag tagEnd ("TrainingClassList", XmlTag::TagTypes::tagEnd);
731  tagEnd.WriteXML (o);
732  o << endl;
733 }
734 
735 
736 
738  XmlTagConstPtr tag,
739  VolConstBool& cancelFlag,
740  RunLog& log
741  )
742 {
743  XmlTokenPtr t = s.GetNextToken (cancelFlag, log);
744  while (t && (!cancelFlag))
745  {
747  {
748  XmlElementPtr e = dynamic_cast<XmlElementPtr> (t);
749  if (e)
750  {
751  KKStr varName = e->VarName ();
752 
753  if (varName.EqualIgnoreCase ("CodedNumOfFeatures"))
754  codedNumOfFeatures= e->ToInt32 ();
755 
756  else if (varName.EqualIgnoreCase ("C_Param"))
757  c_Param = e->ToDouble ();
758 
759  else if (varName.EqualIgnoreCase ("NumEncodedFeatures"))
760  numEncodedFeatures = e->ToInt32 ();
761 
762  else if (varName.EqualIgnoreCase ("NumOfFeatures"))
763  numOfFeatures = e->ToInt32 ();
764 
765  else if (varName.EqualIgnoreCase ("xSpaceNeededPerExample"))
766  xSpaceNeededPerExample = e->ToInt32 ();
767 
768  else if (typeid (*e) == typeid (XmlElementArrayInt32))
769  {
770  XmlElementArrayInt32Ptr xmlArray = dynamic_cast<XmlElementArrayInt32Ptr> (e);
771  kkuint32 count = xmlArray->Count ();
772  if (count != numOfFeatures)
773  {
774  log.Level (-1) << endl
775  << "FeatureEncoder::ReadXML ***ERROR*** Variable[" << varName << "] Invalid Length[" << count << "] Expected[" << numOfFeatures << "]" << endl
776  << endl;
777  }
778  else
779  {
780  if (varName.EqualIgnoreCase ("CardinalityDest"))
781  {
782  delete cardinalityDest;
783  cardinalityDest = xmlArray->TakeOwnership ();
784  }
785 
786  else if (varName.EqualIgnoreCase ("DestFeatureNums"))
787  {
788  delete destFeatureNums;
789  destFeatureNums = xmlArray->TakeOwnership ();
790  }
791 
792  else if (varName.EqualIgnoreCase ("SrcFeatureNums"))
793  {
794  delete srcFeatureNums;
795  srcFeatureNums = xmlArray->TakeOwnership ();
796  }
797  }
798  }
799 
800  else if (varName.EqualIgnoreCase ("Class1"))
802 
803  else if (varName.EqualIgnoreCase ("Class2"))
805 
806  else if (varName.EqualIgnoreCase ("FileDesc") && (typeid (*e) == typeid (XmlElementFileDesc)))
807  fileDesc = dynamic_cast<XmlElementFileDescPtr> (e)->Value ();
808 
809  else if (varName.EqualIgnoreCase ("DestFileDesc") && (typeid (*e) == typeid (XmlElementFileDesc)))
810  destFileDesc = dynamic_cast<XmlElementFileDescPtr> (e)->Value ();
811 
812  else if (varName.EqualIgnoreCase ("DestWhatToDo") && (typeid (*e) == typeid (XmlElementVectorInt32)))
813  {
814  XmlElementVectorInt32Ptr xmlVect = dynamic_cast<XmlElementVectorInt32Ptr> (e);
815  if (xmlVect && xmlVect->Value ())
816  {
817  const VectorInt32& v = *(xmlVect->Value ());
818  if (v.size () != numOfFeatures)
819  {
820  log.Level (-1) << endl
821  << "FeatureEncoder::ReadXML ***ERROR*** Variable[" << varName << "] Invalid Size[" << v.size () << "] Expected[" << numOfFeatures << "]." << endl
822  << endl;
823  }
824  else
825  {
826  delete destWhatToDo;
827  destWhatToDo = new FeWhatToDo[v.size ()];
828  for (kkuint32 x = 0; x < v.size (); ++x)
829  destWhatToDo[x] = (FeWhatToDo)v[x];
830  }
831  }
832  }
833 
834  else if (varName.EqualIgnoreCase ("EncodingMethod"))
835  encodingMethod = EncodingMethodFromStr (e->ToKKStr ());
836 
837  else
838  {
839  log.Level (-1) << "XmlElementTrainingClassList ***ERROR*** Un-expected Section Element[" << e->SectionName () << "]" << endl;
840  }
841  }
842  }
843 
844  delete t;
845  t = s.GetNextToken (cancelFlag, log);
846  }
847  delete t;
848  t = NULL;
849 } /* ReadXML */
__int16 kkint16
16 bit signed integer.
Definition: KKBaseTypes.h:85
virtual void ReadXML(XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
XmlTag(const KKStr &_name, TagTypes _tagType)
Definition: XmlStream.cpp:586
KKStr TypeStr(kkint32 fieldNum) const
Definition: FileDesc.cpp:378
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
MLClass * MLClassPtr
Definition: MLClass.h:46
virtual double ToDouble() const
Definition: XmlStream.h:315
void AddQueue(const FeatureVectorList &examplesToAdd)
Add the contents of &#39;examplesToAdd&#39; to the end of this list.
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
bool EqualIgnoreCase(const char *s2) const
Definition: KKStr.cpp:1257
FeatureVectorListPtr CreateEncodedFeatureVector(FeatureVectorList &srcData)
__int32 kkint32
Definition: KKBaseTypes.h:88
const KKStr & GetNominalValue(kkint32 code) const
Returns the nominal value for the given ordinal value.
Definition: Attribute.cpp:143
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
const float * FeatureData() const
Returns as a pointer to the feature data itself.
virtual KKStr ToKKStr() const
Definition: XmlStream.h:314
Keeps track of selected features.
FeatureVector(kkint32 _numOfFeatures)
KKStr EncodingMethodToStr(SVM_EncodingMethod encodingMethod)
kkint16 GetNumForClass(MLClassPtr mlClass) const
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
SVM_EncodingMethod EncodingMethodFromStr(const KKStr &encodingMethodStr)
Definition: SVMparam.cpp:899
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
kkint32 l
Definition: svm.h:48
SVM_EncodingMethod
Definition: SVMparam.h:46
KKStr operator+(const char *right) const
Definition: KKStr.cpp:3986
~FeatureEncoder()
Frees any memory allocated by, and owned by the FeatureEncoder.
XmlToken * XmlTokenPtr
Definition: XmlStream.h:18
const KKStr & FieldName(kkint32 fieldNum) const
Definition: FileDesc.cpp:387
FileDescPtr CreateEncodedFileDesc(ostream *o)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
void AddFeatureData(kkint32 _featureNum, float _featureData)
MLClassPtr PredictedClass() const
FeatureNumList(const FeatureNumList &featureNumList)
Copy constructor.
FeatureVectorListPtr EncodeAllExamples(const FeatureVectorListPtr srcData)
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
describes a single Feature, Type and possible values.
Definition: Attribute.h:74
double value
Definition: svm.h:39
Container class for FeatureVector derived objects.
virtual kkint32 ToInt32() const
Definition: XmlStream.h:317
KKTHread * KKTHreadPtr
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
kkint16 index
Definition: svm.h:38
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
Definition: FileDesc.cpp:395
virtual void WriteXML(const KKStr &varName, ostream &o) const
XmlElement * XmlElementPtr
Definition: XmlStream.h:21
void AddAtribute(const KKStr &attributeName, const KKStr &attributeValue)
Definition: XmlStream.cpp:602
kkint32 NumOfFeatures() const
struct svm_node ** x
Definition: svm.h:52
bool Empty() const
Definition: KKStr.h:241
XmlTag const * XmlTagConstPtr
Definition: KKStr.h:45
double * W
Definition: svm.h:53
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
XSpacePtr EncodeAExample(FeatureVectorPtr example)
Converts a single example into the svm_problem format.
Binds MLClass objects to the appropriate number that the Learning Algorithm expects.
double * y
Definition: svm.h:49
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
std::vector< kkint32 > VectorInt32
Vector of signed 32 bit integers.
Definition: KKBaseTypes.h:144
AttributeType
Definition: Attribute.h:36
void EncodeIntoSparseMatrix(FeatureVectorListPtr src, ClassAssignments &assignments, XSpacePtr &xSpace, kkint32 &totalxSpaceUsed, struct svm_problem &prob, RunLog &log)
Compresses &#39;src&#39; examples, allocating new &#39;xSpace&#39; data structure.
MLClassPtr MLClass() const
Class that is example is assigned to.
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
FileDesc * FileDescPtr
virtual const KKStr & VarName() const
Definition: XmlStream.cpp:794
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
const KKStr & Name() const
Definition: MLClass.h:154
AttributeType Type() const
Definition: Attribute.h:133
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: KKStr.cpp:4420
KKStr operator+(const KKStr &right) const
Definition: KKStr.cpp:3998
kkint32 * index
Definition: svm.h:51
static FileDescPtr NewContinuousDataOnly(VectorKKStr &_fieldNames)
Creates a simple FileDesc that consists of continuous data only.
Definition: FileDesc.cpp:116
static MLClassPtr CreateNewMLClass(const KKStr &_name, kkint32 _classId=-1)
Static method used to create a new instance of a MLClass object.
Definition: MLClass.cpp:100
FeatureVectorPtr EncodeAExample(FileDescPtr encodedFileDesc, FeatureVectorPtr src)
double osGetSystemTimeUsed()
Returns the number of CPU seconds used by current process.
void CompressExamples(FeatureVectorListPtr srcExamples, FeatureVectorListPtr compressedExamples, ClassAssignments &assignments)
Left over from BitReduction days; removed all code except that which processed the NO bit reduction o...
void PredictedClass(MLClassPtr _predictedClass)
Definition: FeatureVector.h:78
void WriteXML(std::ostream &o)
Definition: XmlStream.cpp:723
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
FeatureEncoder(FileDescPtr _fileDesc, MLClassPtr _class1, MLClassPtr _class2, const FeatureNumList &_selectedFeatures, SVM_EncodingMethod _encodingMethod, double _c_Param)
Constructs a Feature Encoder object.
float TrainWeight() const
const KKStr & Name() const
Definition: Attribute.h:122
void EncodeAExample(FeatureVectorPtr example, svm_node *xSpace, kkint32 &xSpaceUsed)
Converts a single example into the svm_problem format.
virtual TokenTypes TokenType()=0
FileDescPtr Value() const
Definition: FileDesc.cpp:947
virtual XmlTokenPtr GetNextToken(VolConstBool &cancelFlag, RunLog &log)
Definition: XmlStream.cpp:116
KKException(const KKStr &_exceptionStr)
Definition: KKException.cpp:45
kkint32 Cardinality() const
Returns back the cardinality of the attribute; the number of possible values it can take...
Definition: Attribute.cpp:173
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
kkint32 MemoryConsumedEstimated() const
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: FileDesc.cpp:875
kkint32 MemoryConsumedEstimated() const
void WriteXML(const KKStr &varName, std::ostream &o) const
const KKMLL::Attribute & GetAAttribute(kkint32 fieldNum) const
Definition: FileDesc.cpp:210
XmlElementFileDesc * XmlElementFileDescPtr
Definition: FileDesc.h:337
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163