KSquare Utilities
FeatureVector.h
Go to the documentation of this file.
1 #if !defined(_FEATUREVECTOR_)
2 #define _FEATUREVECTOR_
3 
4 
5 /**
6  *@class KKMLL::FeatureVector
7  *@brief Represents a Feature Vector of a single example, labeled or unlabeled
8  *@author Kurt Kramer
9  *@details Used for the representation of a Single example. You create an instance of
10  * this object for each single feature vector. You can subclass from this Class
11  * to make a specialized FeatureVector as in the PostLarvaeFV class. Besides
12  * keeping track of feature data this class will also track other fields such as
13  * ExampleFileName which should indicate where the FeatureVector was derived from,
14  * probability, breakTie, and others.
15  *@see FeatureVectorList
16  *@see PostLarvaeFV
17  *@see FeatureFileIO
18  */
19 
20 
21 #include "KKStr.h"
22 #include "KKQueue.h"
23 #include "RunLog.h"
24 
25 #include "Attribute.h"
26 #include "ClassStatistic.h"
27 #include "FeatureFileIO.h"
28 #include "FeatureNumList.h"
29 #include "FileDesc.h"
30 #include "MLClass.h"
31 
32 
33 namespace KKMLL
34 {
35 
36 
38  class FeatureFileIO;
40  #endif
41 
42 #if !defined(_ClassProbList_Defined_)
43  class ClassProbList;
45 #endif
46 
47 
48  /**
49  *@class FeatureVector
50  *@brief Represents a Feature Vector of a single example, labeled or unlabeled
51  *@details Used for the representation of a Single example. You create an instance of this object for each single feature
52  * vector. You can subclass from this Class to make a specialized FeatureVector as in the PostLarvaeFV class. Besides
53  * keeping track of feature data this class will also track other fields such as ExampleFileName which should indicate
54  *where the FeatureVector was derived from, probability, breakTie, and others.
55  *@see FeatureVectorList
56  *@see PostLarvaeFV
57  *@see FeatureFileIO
58  */
60  {
61  public:
62  typedef float FVFloat;
64 
65  FeatureVector (kkint32 _numOfFeatures);
66 
67  FeatureVector (const FeatureVector& _example);
68 
69  virtual ~FeatureVector ();
70 
71  virtual FeatureVectorPtr Duplicate () const;
72 
73  void BreakTie (float _breakTie) {breakTie = _breakTie;} /**< @brief Update the BreakTie value. */
74  void MLClass (MLClassPtr _mlClass) {mlClass = _mlClass;} /**< @brief Assign a class to this example. */
75  void ExampleFileName (const KKStr& _exampleFileName) {exampleFileName = _exampleFileName;} /**< @brief Name of source of feature vector, ex: file name of image that the feature vector was computed from. */
76  void MissingData (bool _missingData) {missingData = _missingData;} /**< @brief True indicates that not all the feature data was present when this example was loaded from a data file. */
77  void OrigSize (float _origSize) {origSize = _origSize;} /**< @brief The value of Feature[0] before normalization. */
78  void PredictedClass (MLClassPtr _predictedClass) {predictedClass = _predictedClass;}
79  void Probability (float _probability) {probability = _probability;} /**< @brief Assign a prediction probability to this example. */
80  void Version (kkint16 _version) {version = _version;}
81 
82  /**
83  *@brief Assign a value to a specific feature number for the feature vector.
84  *@details This method will validate that '_featureNum' is not out of range (0 - 'numOfFeatures').
85  * This will prevent the caller from corrupting memory.
86  *@param[in] _featureNum Feature Number to assign '_featureValue' to.
87  *@param[in] _featureValue Value to assign to feature '_featureNum'.
88  */
89  void FeatureData (kkint32 _featureNum,
90  float _featureValue
91  );
92 
93 
94  /** @brief Returns the total of all Feature Attributes for this feature vector. */
95  float TotalOfFeatureData () const;
96 
97  virtual
99 
100  /**
101  * @brief Assign a specific example a higher weight for training purposes.
102  * @details The SVM will multiply the cost parameter by this amount when training the classifier
103  * for this specific example.
104  */
105  void TrainWeight (float _trainWeight) {trainWeight = _trainWeight;}
106 
107 
108  /** @brief Indicated whether an expert has validated the class assignment. */
109  void Validated (bool _validated) {validated = _validated;}
110 
111 
112  float BreakTie () const {return breakTie;} /**< @brief The difference in probability between the two most likely classes. */
113  const KKStr& ClassName () const; /**< @brief Name of class that this example is assigned to. */
114  MLClassPtr MLClass () const {return mlClass;} /**< @brief Class that is example is assigned to. */
115  const KKStr& MLClassName () const; /**< @brief Name of class that this example is assigned to. */
116  const KKStr& ExampleFileName () const {return exampleFileName;} /**< @brief Name of file that this FeatureVector was computed from. */
117  bool MissingData () const {return missingData;} /**< @brief True indicates that one or more features were missing. */
118  kkint32 NumOfFeatures () const {return numOfFeatures;} /**< @brief Number of features in this FeatureVector. */
119  float OrigSize () const {return origSize;} /**< @brief The value of Feature[0] before normalization. */
120  MLClassPtr PredictedClass () const {return predictedClass;}
121  const KKStr& PredictedClassName () const;
122  float Probability () const {return probability;} /**< @brief The probability assigned by classifier to the predicted class. */
123  float TrainWeight () const {return trainWeight;}
124  bool Validated () const {return validated;}
125  kkint16 Version () const {return version;}
126 
127  float FeatureData (kkint32 featureNum) const; /**< @returns The value of 'featureNum' */
128  const float* FeatureData () const {return featureData;} /**< @brief Returns as a pointer to the feature data itself. */
129  float* FeatureDataAlter () {return featureData;} /**< @brief Same as 'FeatureData() except you can modify the data. */
130 
131  const float* FeatureDataConst () const {return featureData;}
132  bool FeatureDataValid ();
133 
134  void ResetNumOfFeatures (kkint32 newNumOfFeatures); /**< Used to reallocate memory for feature data. */
135 
136  void AddFeatureData (kkint32 _featureNum, /**< Indicates which feature number to update. */
137  float _featureData /**< New value to assign to '_featureNum'. */
138  );
139 
140  bool operator== (FeatureVector &other_example) const;
141 
142 
143  /** @brief Used by container classes such as 'FeatureVectorList'. This way they can determine real underlying class. */
144  //virtual const char* UnderlyingClass () const {return "FeatureVector";}
145 
146 
147  protected:
148  void AllocateFeatureDataArray ();
149 
150  float* featureData;
152 
153 
154  private:
155  float breakTie; /**< @brief The difference in probability between the two most likeliest
156  * classes as per the classifier.
157  */
158  MLClassPtr mlClass;
159  KKStr exampleFileName;
160  bool missingData; /**< @brief Indicates that some features were flagged as missing in
161  * data file.
162  */
163  float origSize;
164  MLClassPtr predictedClass; /**< @brief Represents the class that the Classifier assigned to this
165  * image; added to aid in the grading function.2
166  */
167 
168  float probability; /**< @brief Probability assigned by classifier to predicted Class. */
169 
170  float trainWeight; /**< @brief Weight to assign to this training image during Training.
171  *@details Will default to 1.0. during the SVM training process the
172  * Cost parameter will be multiplied by this amount.
173  */
174 
175  bool validated; /**< @brief If true then the 'mlClass' entry has been validated by
176  * an expert; was introduced when the DataBase was implemented.
177  */
178 
179  kkint16 version; /**< This is the same versionNumber as in FeatureVectorList
180  * It is related to the Feature calculation routine. This
181  * will assist in us changing the feature calculations in the
182  * future and objects and methods having a means of
183  * knowing if the features are similar.
184  */
185 
186  }; /* FeatureVector */
187 
188 
190 
191  #define _FeatureVector_Defined_
192 
193 
194  class FeatureVectorComparison;
195 
196 
197 
198  /**
199  *@class FeatureVectorList
200  *@brief Container class for FeatureVector derived objects.
201  *@details Supports various functions with respect to maintaining a list of FeatureVector's. These
202  * include randomizing there order creating a stratified list by class, extracting a list
203  * of classes, sorting by various criteria.
204  */
206  {
207  public:
208  typedef float FVFloat;
209 
211 
212  /**
213  * When you use this constructor you need to define immediately provide the appropriate FileDesc instance via
214  * calling "ResetFileDesc" method.
215  */
217 
218 
219  /**
220  *@brief Will create a new empty list of FeatureVector's.
221  *@param[in] _fileDesc Describes the feature data such as number of features and their attributes.
222  *@param[in] _owner True indicates that this list will own its contents and when this list is deleted it
223  * will call the destructor for all its contents.
224  *@param[out] _log Log file to send messages to.
225  */
226  FeatureVectorList (FileDescPtr _fileDesc,
227  bool _owner
228  );
229 
230  private:
231  /**
232  *@brief Will create a duplicate List of examples, in the same order.
233  *@details If the source 'examples' owns its entries, then new duplicate entries will be created, and the
234  * new 'FeatureVectorList' will own them, otherwise will only get pointers to existing instances in 'examples'.
235  */
236  FeatureVectorList (FeatureVectorList& examples);
237 
238 
239  public:
240  /**
241  *@brief Create a duplicate list, depending on the '_owner' parameter may also duplicate the contents.
242  *@details If '_owner' = true will create new instances of contents and own them. If 'owner' = false, will
243  * copy over pointers to existing instances.
244  *@param[in] examples Existing list of examples that we are going to copy.
245  *@param[in] _owner If set to true will make a duplicate of the FeatureVectors in 'examples' and own then
246  * otherwise will just point to the same existing examples and not own them.
247  */
248  FeatureVectorList (const FeatureVectorList& examples,
249  bool _owner
250  );
251 
252 
253  /**
254  *@brief Will create a list of consisting of the subset of examples in '_examples' which are members of ImagesClasses.
255  *@details Will not own the contents; it will just point to the existing examples that were in '_examples'.
256  *@param[in] _mlClasses List of classes that we want to include.
257  *@param[in] _examples Source of feature Vectors to extract from.
258  *@param[out] _log Log file to send messages to.
259  */
260  FeatureVectorList (MLClassList& _mlClasses,
261  FeatureVectorList& _examples
262  );
263 
264 
265  /**
266  *@enum IFL_SortOrder
267  *@brief Represents the different orders that a list of FeatureVector instances in a FeatureVectorList object can be in.
268  */
269  enum class IFL_SortOrder
270  {IFL_UnSorted,
271  IFL_ByName,
276  };
277 
278 
279  virtual ~FeatureVectorList ();
280 
281 
282  // Access methods.
283  IFL_SortOrder CurSortOrder () const {return curSortOrder;}
284  kkint32 FeatureCount () const {return numOfFeatures;}
285  const FileDescPtr FileDesc () const {return fileDesc;}
286  const KKStr& FileName () const {return fileName;}
287  kkint32 NumOfFeatures () const {return numOfFeatures;}
288  kkint16 Version () const {return version;}
289  //virtual const char* UnderlyingClass () const {return "FeatureVectorList";}
290 
291 
292  void FileName (const KKStr& _fileName) {fileName = _fileName;}
293  void Version (kkint16 _version) {version = _version;}
294 
295  void AddSingleExample (FeatureVectorPtr _imageFeatures); /**< @brief Same as PushOnBack */
296 
297  void AddQueue (const FeatureVectorList& examplesToAdd); /**< @brief Add the contents of 'examplesToAdd' to the end of this list. */
298 
299  FeatureNumList AllFeatures (); /**< @brief Will return a FeatureNumList instance with all features selected. */
300 
301  void AppendToFile (KKStr _fileName,
302  FeatureFileIOPtr _driver,
303  const FeatureNumList& _selFeatures
304  );
305 
306  /**
307  *@brief Will search for the example with the same name as '_imageFileName'.
308  *@details If the list is already sorted in name order will use a Binary Search otherwise a linear search.
309  * The method 'SortByImageFileName' will set a flag 'curSortOrder' indicating if the examples are sorted.
310  * The idea is that if you know that will be doing many searches then for performance reasons you
311  * should call 'SortByImageFileName' first. The methods 'PushOnBack', 'PushOnFront', and 'AddSingleExample'
312  * will reset 'curSortOrder' to unsorted.
313  */
314  FeatureVectorPtr BinarySearchByName (const KKStr& _imageFileName) const;
315 
316 
317  void CalcStatsForFeatureNum (kkint32 _featureNum,
318  kkint32& _count,
319  float& _total,
320  float& _mean,
321  float& _var,
322  float& _stdDev
323  );
324 
325 
326  KKStr ClassStatisticsStr () const;
327 
329 
331 
332 
333  /**
334  *@brief Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy level specified by 'level'.
335  *@details The hierarchy of a given class will be indicated by underscore characters in the class name.
336  *@code
337  * ex: Level 1: gelatinous
338  * Level 2: gelatinous_hydromedusae
339  * Level 3: gelatinous_hydromedusae_solmundella
340  * If the 'level' parameter is set to 1 then all FeatureVectors who's class name starts with 'gelatinous' will be grouped
341  * together under the class name 'gelatinous_hydromedusae'.
342  *@endcode
343  */
345 
346 
347  /**
348  *@brief Creates a duplicate of list using the same container.
349  */
350  virtual
351  FeatureVectorListPtr Duplicate (bool _owner) const;
352 
353 
354  /**
355  *@brief Creates a duplicate of list and also duplicates it contents.
356  *@return Duplicated list with hard-copy of its contents.
357  */
358  virtual
360 
361  KKStrListPtr ExtractDuplicatesByExampleFileName ();
362 
363 
364  /**
365  *@brief Returns: a list of 'FeatureVector' objects that have duplicate root file names.
366  *@details The returned list will not own these items. All instances of the duplicate objects will be returned.
367  * Ex: if three instances have the same ExampleFileName all three will be returned.
368  */
370 
371 
373  kkint32 _maxToExtract = -1,
374  float _minSize = -1.0f
375  ) const;
376 
377  FeatureVectorListPtr ExtractExamplesForClassList (MLClassListPtr classes);
378 
380 
381 
382  /**
383  *@brief Will return a random sampling by class of our FeatureVector's; with a minimum per class of 'minClassCount'.
384  *@param[in] percentage The percentage between 0.0 and 100.0 of each class to randomly sample.
385  *@param[in] minClassCount The minimum per class to keep.
386  */
387  FeatureVectorListPtr ExtractRandomSampling (float percentage, /**< A percentage between 0.0 and 100.0 */
388  kkint32 minClassCount
389  );
390 
391  /**
392  *@brief Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy level specified by 'level'.
393  *@details The hierarchy of a given class will be indicated by underscore characters in the class name.
394  *@code
395  * ex: Level 1: gelatinous
396  * Level 2: gelatinous_hydromedusae
397  * Level 3: gelatinous_hydromedusae_solmundella
398  * If the 'level' parameter is set to 1 then all FeatureVectors who's class name starts with 'gelatinous' will be
399  * grouped together under the class name 'gelatinous_hydromedusae'.
400  *@endcode
401  *@bug This method appears to be a duplicate of 'CreateListForAGivenLevel'; We should verify this and get rid of one of them.
402  */
404 
405  MLClassListPtr ExtractListOfClasses () const;
406 
407  bool AllFieldsAreNumeric () const; /**< @brief Returns true if all fields are numeric, no nominal fields. */
408  KKMLL::AttributeType FeatureType (kkint32 featureNum) const; /**< @brief Returns the type of attribute for specified 'featureNum'. @see FileDesc */
409  KKStr FeatureTypeStr (kkint32 featureNum) const;
410  kkint32 FeatureCardinality (kkint32 featureNum) const; /**< @brief Returns the number of values defined for a Nominal Field. @see FileDesc::Cardinality */
411  const KKStr& FieldName (kkint32 featureNum) const; /**< @brief Returns name of Attribute Field. */
412 
413  ClassStatisticListPtr GetClassStatistics () const; /**< @brief Returns the number of FeatureVectors per class @see ClassStatisticList */
415 
416  kkint32 GetClassCount (MLClassPtr c) const; /**< @brief Returns number of examples for a specific Class (MLClass). */
417 
418  //RunLog& Log () {return log;}
419 
420 
421  /**
422  * @brief Returns a pointer to the FeatureVector which has '_imageFileName'
423  *@details If the list is currently sorted by ExampleFileName (curSortOrder == IFL_ByName) then a Binary Search is performed
424  * otherwise a sequential search is performed.
425  */
426  FeatureVectorPtr LookUpByImageFileName (const KKStr& _imageFileName) const;
427 
428 
429  /**
430  *@brief Returns a pointer to the FeatureVector who's ExampleFileName rootname = _rootName *\
431  *@details If the list is currently sorted by ExampleFileName (curSortOrder == IFL_ByRootName) then a Binary Search is performed
432  * otherwise a sequential search is performed. The parameter _rootName is assumed to be just the root name of the file.
433  * that is you used osGetRootName to et the root part.
434  */
435  FeatureVectorPtr LookUpByRootName (const KKStr& _rootName);
436 
437  // void Sort (FeatureVectorComparison comparison);
438 
439  float MajorityClassFraction () const; /**< Return's the fraction that the majority class makes up in this list. */
440 
441  /**
442  *@brief Creates an instance of a Empty FeatureVectorList
443  */
444  virtual
445  FeatureVectorListPtr ManufactureEmptyList (bool _owner) const;
446 
447  virtual
449 
450  bool MissingData () const; /**< Returns true if 1 or more entries have missing data. */
451 
452  kkint32 NumEntriesOfAGivenClass (MLClassPtr mlClass) const {return GetClassCount (mlClass);}
453 
454  /**
455  *@brief Using list of ImageFileNames in a file('fileName') create a new FeatureVectorList instance with examples in order based
456  * off contents of file. If error occurs will return NULL.
457  *@param[in] fileName Name of file that contains a list of ExampleFileName's with one entry per line.
458  *@returns A new list of FeatureVector instances in the order dictated by 'fileName'.
459  */
461  RunLog& log
462  );
463 
464  void RemoveDuplicateEntries (bool allowDupsInSameClass,
465  RunLog& runLog
466  );
467 
468  /**
469  *@brief Will save into a file the current ordering of FeatureVector instances in list.
470  *@details This file can then be used at a later time to reproduce the exact same ordering of FeatureVector objects from a file.
471  *@see OrderUsingNamesFromAFile
472  *@param[in] fileName Name of file where ImagFileNames will be written to.
473  *@param[out] successful Indicates if list is successfully written.
474  */
475  void SaveOrderingOfImages (const KKStr& fileName,
476  bool& successful
477  );
478 
480  RunLog& log
481  );
482 
483  void PrintClassStatistics (std::ostream& o) const;
484 
485  void PrintClassStatisticsHTML (std::ostream& o) const;
486 
487  void PrintFeatureStatisticsByClass (std::ostream& o) const;
488 
489  void PushOnBack (FeatureVectorPtr image); /**< @brief Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order. */
490 
491  void PushOnFront (FeatureVectorPtr image); /**< @brief Overloading the PushOnFront function in KKQueue so we can monitor the Version and Sort Order. */
492 
493  void ResetNumOfFeaturs (kkint32 newNumOfFeatures);
494 
495  void ResetFileDesc (FileDescPtr newFileDesc); /**< You would use this if youRecalc all the data to a newer version of the file. */
496 
497  void ReSyncSymbolicData (FileDescPtr newFileDesc);
498 
499  void RemoveEntriesWithMissingFeatures (RunLog& log); /**< Will delete entries from list that have missing data. */
500 
501 
502  /**
503  * @details
504  * Determines if the other FeatreVectorList has the same underlining layout; that is each
505  * field is of the same type and meaning. This way we can determine if one list contains
506  * Apples while the other contains Oranges.
507  */
508  bool SameExceptForSymbolicData (const FeatureVectorList& otherData,
509  RunLog& log
510  ) const;
511 
512 
514  RunLog& log
515  );
516 
517 
518  FeatureVectorListPtr StratifyAmoungstClasses (MLClassListPtr mlClasses,
519  kkint32 maxImagesPerClass,
520  kkint32 numOfFolds,
521  RunLog& log
522  );
523 
524  void SortByClass (bool reversedOrder = false);
525 
526  void SortByImageFileName (bool reversedOrder = false);
527 
528  void SortByProbability (bool reversedOrder = false);
529 
530  void SortByBreakTie (bool reversedOrder = false);
531 
532  void SortByRootName (bool reversedOrder = false);
533 
534  private:
535  class BreakTieComparison;
537  class ClassNameComparrison;
541  class ProbabilityComparison;
543  class RootNameComparrison;
545 
546 
547  void ValidateFileDescAndFieldNum (kkint32 fieldNum,
548  const char* funcDesc
549  ) const;
550 
551 
552 
553  /**
554  * @brief Keeps track of the current order of FeatureVector entries in the list.
555  * @details This helps functions such as LookUpByImageFileName to work more efficiently. If in ExampleFileName order
556  * it can then perform a binary search rather than a seq. scan. This field is updated by the different sort
557  * routines, and by the methods that allow you to add an entry.
558  */
559  IFL_SortOrder curSortOrder;
560 
561  FileDescPtr fileDesc;
562 
563  KKStr fileName;
564 
565  kkint32 numOfFeatures;
566 
567  kkint16 version; /**< Represents the version of the Feature data, when ever I update the
568  * way Features are calculated I increment the VersionNum in the respective
569  * "FeatureVectorProducer" derived class. This way if we load a older
570  * FeatureData file we can be aware of this. Methods like FeatureDataReSink
571  * will force the recalculation of Feature data if not up-to-date. Also
572  * works in coordination with the version field in the PostLarvaeFV object.
573  * A value of 0 indicates that we do not know what Version the feature data
574  * is. This can happen when not all the PostLarvaeFV objects in the list have
575  * the same version number.
576  */
577 
578  }; /* FeatureVectorList */
579 
580 
581  #define _FeatureVectorList_Defined_
582 
583 
585 
586 } /* namespace KKMLL */
587 
588 #endif
__int16 kkint16
16 bit signed integer.
Definition: KKBaseTypes.h:85
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
Definition: FeatureVector.h:75
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
void Validated(bool _validated)
Indicated whether an expert has validated the class assignment.
void PushOnFront(FeatureVectorPtr image)
Overloading the PushOnFront function in KKQueue so we can monitor the Version and Sort Order...
void AddQueue(const FeatureVectorList &examplesToAdd)
Add the contents of &#39;examplesToAdd&#39; to the end of this list.
void ResetNumOfFeatures(kkint32 newNumOfFeatures)
KKMLL::AttributeTypeVector CreateAttributeTypeTable() const
FeatureVectorListPtr StratifyAmoungstClasses(kkint32 numOfFolds, RunLog &log)
void BreakTie(float _breakTie)
Update the BreakTie value.
Definition: FeatureVector.h:73
KKStrListPtr ExtractDuplicatesByExampleFileName()
void SortByBreakTie(bool reversedOrder=false)
void AddSingleExample(FeatureVectorPtr _imageFeatures)
Same as PushOnBack.
__int32 kkint32
Definition: KKBaseTypes.h:88
FeatureVector * FeatureVectorPtr
Definition: Model.h:44
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
FeatureVectorList(const FeatureVectorList &examples, bool _owner)
Create a duplicate list, depending on the &#39;_owner&#39; parameter may also duplicate the contents...
virtual FeatureVectorListPtr Duplicate(bool _owner) const
Creates a duplicate of list using the same container.
const float * FeatureData() const
Returns as a pointer to the feature data itself.
kkint32 GetClassCount(MLClassPtr c) const
Returns number of examples for a specific Class (MLClass).
FeatureVectorPtr BinarySearchByName(const KKStr &_imageFileName) const
Will search for the example with the same name as &#39;_imageFileName&#39;.
float FeatureData(kkint32 featureNum) const
virtual kkint32 MemoryConsumedEstimated() const
ClassProbListPtr GetClassDistribution() const
Keeps track of selected features.
bool MissingData() const
True indicates that one or more features were missing.
FeatureVector(kkint32 _numOfFeatures)
kkint32 NumOfFeatures() const
Number of features in this FeatureVector.
float OrigSize() const
The value of Feature[0] before normalization.
MLClassListPtr ExtractListOfClasses() const
FeatureNumList AllFeatures()
Will return a FeatureNumList instance with all features selected.
const FileDescPtr FileDesc() const
void PrintClassStatisticsHTML(std::ostream &o) const
float Probability() const
The probability assigned by classifier to the predicted class.
const KKStr & MLClassName() const
Name of class that this example is assigned to.
void RemoveEntriesWithMissingFeatures(RunLog &log)
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
void SortByProbability(bool reversedOrder=false)
KKMLL::AttributeType FeatureType(kkint32 featureNum) const
Returns the type of attribute for specified &#39;featureNum&#39;.
virtual FeatureVectorListPtr ManufactureEmptyList(bool _owner) const
Creates an instance of a Empty FeatureVectorList.
void SortByImageFileName(bool reversedOrder=false)
#define _FeatureFileIO_Defined_
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
VectorDouble ExtractMeanFeatureValues()
void AddFeatureData(kkint32 _featureNum, float _featureData)
MLClassPtr PredictedClass() const
float BreakTie() const
The difference in probability between the two most likely classes.
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
virtual FeatureVectorPtr Duplicate() const
FeatureVector(const FeatureVector &_example)
Container class for FeatureVector derived objects.
void PrintFeatureStatisticsByClass(std::ostream &o) const
void CalcStatsForFeatureNum(kkint32 _featureNum, kkint32 &_count, float &_total, float &_mean, float &_var, float &_stdDev)
const float * FeatureDataConst() const
kkint16 Version() const
void AppendToFile(KKStr _fileName, FeatureFileIOPtr _driver, const FeatureNumList &_selFeatures)
kkint32 FeatureCount() const
void AllocateFeatureDataArray()
Used by container classes such as &#39;FeatureVectorList&#39;. This way they can determine real underlying cl...
bool Validated() const
FeatureFileIO * FeatureFileIOPtr
Definition: FileDesc.h:45
FeatureVectorListPtr ExtractExamplesForAGivenClass(MLClassPtr _mlClass, kkint32 _maxToExtract=-1, float _minSize=-1.0f) const
kkint32 NumOfFeatures() const
FeatureVector * FeatureVectorPtr
Definition: FeatureVector.h:63
void SortByRootName(bool reversedOrder=false)
vector< kkint32 > CreateCardinalityTable() const
FeatureVectorList(MLClassList &_mlClasses, FeatureVectorList &_examples)
Will create a list of consisting of the subset of examples in &#39;_examples&#39; which are members of Images...
void ResetFileDesc(FileDescPtr newFileDesc)
kkint16 Version() const
void SaveOrderingOfImages(const KKStr &fileName, bool &successful)
Will save into a file the current ordering of FeatureVector instances in list.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
ClassProbList * ClassProbListPtr
Definition: Classifier2.h:30
ClassStatisticListPtr GetClassStatistics() const
Returns the number of FeatureVectors per class.
AttributeType
Definition: Attribute.h:36
MLClassPtr MLClass() const
Class that is example is assigned to.
FeatureVectorListPtr OrderUsingNamesFromAFile(const KKStr &fileName, RunLog &log)
Using list of ImageFileNames in a file(&#39;fileName&#39;) create a new FeatureVectorList instance with examp...
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
void FileName(const KKStr &_fileName)
FeatureVectorListPtr StratifyAmoungstClasses(MLClassListPtr mlClasses, kkint32 maxImagesPerClass, kkint32 numOfFolds, RunLog &log)
bool SameExceptForSymbolicData(const FeatureVectorList &otherData, RunLog &log) const
FileDesc * FileDescPtr
void FeatureData(kkint32 _featureNum, float _featureValue)
Assign a value to a specific feature number for the feature vector.
float TotalOfFeatureData() const
Returns the total of all Feature Attributes for this feature vector.
ClassStatisticList * ClassStatisticListPtr
FeatureVectorListPtr ExtractRandomSampling(float percentage, kkint32 minClassCount)
Will return a random sampling by class of our FeatureVector&#39;s; with a minimum per class of &#39;minClassC...
void Probability(float _probability)
Assign a prediction probability to this example.
Definition: FeatureVector.h:79
FeatureVectorPtr LookUpByRootName(const KKStr &_rootName)
Returns a pointer to the FeatureVector who&#39;s ExampleFileName rootname = _rootName *...
bool operator==(FeatureVector &other_example) const
IFL_SortOrder
Represents the different orders that a list of FeatureVector instances in a FeatureVectorList object ...
FeatureVectorListPtr ExtractExamplesForHierarchyLevel(kkuint32 level)
Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy ...
FeatureVectorListPtr ExtractDuplicatesByRootImageFileName()
Returns: a list of &#39;FeatureVector&#39; objects that have duplicate root file names.
void Version(kkint16 _version)
Definition: FeatureVector.h:80
float MajorityClassFraction() const
void ReSyncSymbolicData(FileDescPtr newFileDesc)
const KKStr & FileName() const
const KKStr & FieldName(kkint32 featureNum) const
Returns name of Attribute Field.
void OrigSize(float _origSize)
The value of Feature[0] before normalization.
Definition: FeatureVector.h:77
void PredictedClass(MLClassPtr _predictedClass)
Definition: FeatureVector.h:78
FeatureVectorListPtr CreateListForAGivenLevel(kkint32 level)
Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy ...
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
IFL_SortOrder CurSortOrder() const
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
kkint32 FeatureCardinality(kkint32 featureNum) const
Returns the number of values defined for a Nominal Field.
const KKStr & PredictedClassName() const
float TrainWeight() const
void RemoveDuplicateEntries(bool allowDupsInSameClass, RunLog &runLog)
const KKStr & ClassName() const
Name of class that this example is assigned to.
KKStr ClassStatisticsStr() const
void SynchronizeSymbolicData(FeatureVectorList &otherData, RunLog &log)
Maintains a list of MLClass instances.
Definition: MLClass.h:233
virtual kkint32 MemoryConsumedEstimated() const
FeatureVectorList * FeatureVectorListPtr
Definition: Model.h:46
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureVectorPtr LookUpByImageFileName(const KKStr &_imageFileName) const
Returns a pointer to the FeatureVector which has &#39;_imageFileName&#39;.
kkint32 NumEntriesOfAGivenClass(MLClassPtr mlClass) const
FeatureVectorListPtr ExtractExamplesForClassList(MLClassListPtr classes)
float * FeatureDataAlter()
Same as &#39;FeatureData() except you can modify the data.
void MissingData(bool _missingData)
True indicates that not all the feature data was present when this example was loaded from a data fil...
Definition: FeatureVector.h:76
void Version(kkint16 _version)
void SortByClass(bool reversedOrder=false)
KKStr FeatureTypeStr(kkint32 featureNum) const
std::vector< double > VectorDouble
Vector of doubles.
Definition: KKBaseTypes.h:148
FeatureVectorList * FeatureVectorListPtr
const KKStr & ExampleFileName() const
Name of file that this FeatureVector was computed from.
void ResetNumOfFeaturs(kkint32 newNumOfFeatures)
void PrintClassStatistics(std::ostream &o) const