KSquare Utilities
SVMModel.h
Go to the documentation of this file.
1 #ifndef _SVMMODEL_
2 #define _SVMMODEL_
3 //***********************************************************************
4 //* SVMModel *
5 //* *
6 //* Represents a training model for svmlib. Uses Example Feature data *
7 //* MLClasses, libsvm parameters, and included features to build *
8 //* a svm training model. You can save this model to disk for later *
9 //* use without having to train again. An application can create *
10 //* several instances of this model for different phases of a *
11 //* classification architecture. *
12 //***********************************************************************
13 
14 
15 #include "KKStr.h"
16 #include "ClassAssignments.h"
17 #include "FileDesc.h"
18 #include "MLClass.h"
19 #include "FeatureVector.h"
20 #include "svm.h"
21 #include "SVMparam.h"
22 
23 
24 
25 namespace KKMLL
26 {
27 
28 #ifndef _FEATURENUMLIST_
29 class FeatureNumList;
33 #endif
34 
35 
36 #ifndef _FEATUREENCODER_
39 #endif
40 
41 
42 
43 #ifndef _RUNLOG_
44 class RunLog;
45 typedef RunLog* RunLogPtr;
46 #endif
47 
48 
49 #ifndef _FILEDESC_
50 class FileDesc;
51 typedef FileDesc* FileDescPtr;
52 #endif
53 
54 
55 
56 
57 
58 typedef struct svm_node* XSpacePtr;
59 
60 
61 
63  {
64  public:
66  double _probability
67  ):
68  name (_name), probability (_probability)
69  {}
70 
72  double probability;
73  };
74 
75 
76 
77  class SVMModel
78  {
79  public:
80  /**
81  *@brief Default constructor used by XmlElementSVMModel to create and load a new instance from a XML Stream.
82  */
83  SVMModel ();
84 
85 
86  /**
87  *@brief Constructor that will create a svmlib training model using the
88  * features and classes for training purposes.
89  *@param[in] _svmParam Specifies the parameters to be used for training. These
90  * are the same parameters that you would specify in the command line
91  * to svm_train. Plus the feature numbers to be used.
92  *@param[in] _examples Training data for the classifier.
93  *@param[in] _assignments List of classes and there associated number ti be
94  * used by the SVM. You can merge 1 or more classes by assigning them
95  * the same number. This number will be used by the SVM. When
96  * predictions are done by SVM it return a number, _assignments will
97  * then be used to map back-to the correct class.
98  *@param[in] _fileDesc File-Description that describes the training data.
99  *@param[out] _log Log file to log messages to.
100  */
101  SVMModel (const SVMparam& _svmParam,
102  FeatureVectorList& _examples,
103  ClassAssignments& _assignments,
104  FileDescPtr _fileDesc,
105  RunLog& _log
106  );
107 
108 
109  /**
110  *@brief Frees any memory allocated by, and owned by the SVMModel
111  */
112  virtual ~SVMModel ();
113 
114  virtual void CancelFlag (bool _cancelFlag);
115 
117 
119 
121  MLClassPtr class1,
122  MLClassPtr class2
123  ) const;
124 
125  //MLClassList& MLClasses () {return mlClasses;}
126  const ClassAssignments& Assignments () const {return assignments;}
127 
128  //kkint32 DuplicateDataCount () const {return duplicateCount;}
129 
131 
132  virtual
133  bool NormalizeNominalAttributes (); /**< Return true, if nominal fields need to be normalized. */
134 
135  kkint32 NumOfClasses () const {return numOfClasses;}
136 
137  kkint32 NumOfSupportVectors () const;
138 
139  void SupportVectorStatistics (kkint32& numSVs,
140  kkint32& totalNumSVs
141  );
142 
143  SVMparam const * SVMParameters () const {return svmParam;}
144 
145  const KKStr& RootFileName () const {return rootFileName;}
146 
147 
149 
150  double TrainingTime () const {return trainingTime;}
151 
152  bool ValidModel () const {return validModel;}
153 
154 
156  MLClassPtr class1,
157  MLClassPtr class2
158  );
159 
160 
161  MLClassPtr Predict (FeatureVectorPtr example);
162 
163 
164  /**
165  *@brief Will predict the two most likely classes of 'example'.
166  *@param[in] example The Example to predict on.
167  *@param[in] knownClass Class that we already no the example to be; can be pointing to NULL indicating that you do not know.
168  *@param[out] predClass1 Most likely class; depending on classifier parameters this could be by number of votes or probability.
169  *@param[out] predClass2 Second most likely class.
170  *@param[out] predClass1Votes Number votes 'predClass1' received.
171  *@param[out] predClass2Votes Number votes 'predClass2' received.
172  *@param[out] probOfKnownClass Probability of 'knownClass' if specified.
173  *@param[out] predClass1Prob Probability of 'predClass1' if specified.
174  *@param[out] predClass2Prob Probability of 'predClass2' if specified.
175  *@param[out] numOfWinners Number of classes that had the same number of votes as the winning class.
176  *@param[out] knownClassOneOfTheWinners Will return true if the known class was one of the classes that had the highest number of votes.
177  *@param[out] breakTie The difference in probability between the classes with the two highest probabilities.
178  */
179  void Predict (FeatureVectorPtr example,
180  MLClassPtr knownClass,
181  MLClassPtr& predClass1,
182  MLClassPtr& predClass2,
183  kkint32& predClass1Votes,
184  kkint32& predClass2Votes,
185  double& probOfKnownClass,
186  double& predClass1Prob,
187  double& predClass2Prob,
188  kkint32& numOfWinners,
189  bool& knownClassOneOfTheWinners,
190  double& breakTie
191  );
192 
193 
194  /**
195  *@brief Returns the distance from the decision border of the SVM.
196  */
197  void PredictRaw (FeatureVectorPtr example,
198  MLClassPtr & predClass,
199  double& dist
200  );
201 
202  /**
203  *@brief Will get the probabilities assigned to each class.
204  *@param[in] example unknown example that we want to get predicted probabilities for.
205  *@param[in] _mlClasses List classes that caller is aware of. This should be the same list that
206  * was used when constructing this SVMModel object. The list must be the
207  * same but not necessarily in the same order as when SVMModel was 1st
208  * constructed.
209  *@param[out] _votes An array that must be as big as the number of classes in _mlClasses.
210  *@param[out] _probabilities An array that must be as big as the number of classes in _mlClasses.
211  * The probability of class in _mlClasses[x] will be returned in _probabilities[x].
212  *@param[in] _log Logging file.
213  */
215  const MLClassList& _mlClasses,
216  kkint32* _votes,
217  double* _probabilities,
218  RunLog& _log
219  );
220 
221 
222 
223  /**
224  *@brief For a given two class pair return the names of the 'numToFind' worst S/V's.
225  *@details This method will iterate through all the S/V's removing them one at a
226  * time and re-compute the decision boundary and probability. It will then
227  * return the S/V's that when removed improve the probability in 'c1's
228  * the most.
229  *@param[in] example The Example that was classified incorrectly.
230  *@param[in] numToFind The number of the worst examples you are looking for.
231  *@param[in] c1 Class that the 'example; parameter should have been classed as.
232  *@param[in] c2 Class that it was classified as.
233  */
236  MLClassPtr c1,
237  MLClassPtr c2
238  );
239 
240 
241  /**
242  *@brief For a given two class pair return the names of the 'numToFind' worst S/V's.
243  *@details This method will iterate through all the S/V's removing them one at a
244  * time and retraining a new SVM and then comparing with the new prediction results.
245  *@param[in] example The Example that was classified incorrectly.
246  *@param[in] numToFind The number of the worst examples you are looking for.
247  *@param[in] c1 Class that the 'example; parameter should have been classed as.
248  *@param[in] c2 Class that it was classified as.
249  */
252  MLClassPtr c1,
253  MLClassPtr c2
254  );
255 
256 
257  std::vector<KKStr> SupportVectorNames () const;
258 
259 
261  MLClassPtr c2
262  ) const;
263 
264 
265  /**
266  *@brief Will return the probabilities for all pairs of the classes listed in 'classes'.
267  *@param[in] classes The Classes that you wish to get class pair probabilities for; the order will
268  * dictate how the two dimensional matrix 'crossProbTable' will be populated.
269  *@param[out] crossProbTable Will contain the probabilities of all the class pairs that can be formed from
270  * the list of classes in 'classes'. 'crossProbTable' will be a two dimension square matrix
271  * size will be dictated by the number of classes in 'classes'. Ex: Entry[3][2] will have the
272  * contain the probability between classes[3] and classes[2].
273  *@param[in] log Logging file.
274  */
275  void RetrieveCrossProbTable (MLClassList& classes,
276  double** crossProbTable, // two dimension matrix that needs to be classes.QueueSize () squared.
277  RunLog& log
278  );
279 
280 
281  virtual void ReadXML (XmlStream& s,
282  XmlTagConstPtr tag,
283  VolConstBool& cancelFlag,
284  RunLog& log
285  );
286 
287 
288  virtual void WriteXML (const KKStr& varName,
289  std::ostream& o
290  ) const;
291 
292 
293  private:
294  typedef struct SvmModel233** ModelPtr;
295 
296  FeatureVectorListPtr* BreakDownExamplesByClass (FeatureVectorListPtr examples);
297 
298 
299  void DeleteModels ();
300  void DeleteXSpaces ();
301 
302  void AllocateModels ();
303  void AllocateXSpaces ();
304 
305  void BuildClassIdxTable ();
306 
307 
308  void BuildCrossClassProbTable ();
309 
310 
311  /**
312  *@brief Constructs svm_problem structure from the examples passed to it.
313  *@details This is called once for each logical class that is going to be built in
314  * the One-vs-All. For each class there will be a binary SVM where one class
315  * is pitted against all other classes. Using the 'classesThisAssignment'
316  * parameter several classes can be grouped together as one logical class.
317  *@param[in] examples The examples to build the svm_problem(s) with
318  *@param[out] prob The svm_problem structure that will be constructed
319  *@param[in] xSpace A list of pointers to the memory allocated for all of the
320  * svm_node structures used in building the svm_problem.
321  *@param[in] classesThisAssignment The list of classes that are to be treated
322  * as class '0' all other classes are treated as class '1'.
323  *@param[in] featureEncoder Used to encode the feature data in 'examples' into
324  * the format expected by libSVM.
325  *@param[in] allClasses List of all classes; The ones that are contained in
326  * 'classesThisAssignment' will be the '0' class or the "One" in "One-Vs-All"
327  * while classes that are not in 'classesThisAssignment' will be coded as
328  * '1' or the "All" in "One-Vs-All".
329  */
330  void BuildProblemOneVsAll (FeatureVectorList& examples,
331  struct svm_problem& prob,
332  XSpacePtr& xSpace,
333  const MLClassList& classesThisAssignment,
334  FeatureEncoderPtr featureEncoder,
335  MLClassList& allClasses,
336  ClassAssignmentsPtr& classAssignments,
337  RunLog& log
338  );
339 
340 
341  /**
342  *@brief Constructs svm_problem For two classes SVM.
343  *@param[in] examples The examples to build the svm_problem(s) with
344  *@param[in] class1Examples Examples for 1st class.
345  *@param[in] class2Examples Examples for 2nd class.
346  *@param[in] _svmParam SVM Parameters used for building overall classifier.
347  *@param[in] _twoClassParms Parameters for the specific two classes in question.
348  *@param[out] _encoder Based off parameters a FeatureEncoder will be built and
349  * returned to caller. Caller will get ownership and be responsible
350  * for deleting it.
351  *@param[out] prob The Resultant two class classifier that will be built; caller
352  * will get ownership.
353  *@param[out] xSpace A list of pointers to the S/V's that the built classifier 'prob'
354  * will be referring to.
355  *@param[in] class1 The first class that 'class1Examples' represent.
356  *@param[in] class2 The second class that 'class1Examples' represent.
357  */
358  void BuildProblemBinaryCombos (FeatureVectorListPtr class1Examples,
359  FeatureVectorListPtr class2Examples,
360  BinaryClassParmsPtr& _twoClassParms,
361  FeatureEncoderPtr& _encoder,
362  struct svm_problem& prob,
363  XSpacePtr& xSpace,
364  MLClassPtr class1,
365  MLClassPtr class2,
366  RunLog& log
367  );
368 
369 
370  void PredictProbabilitiesByBinaryCombos (FeatureVectorPtr example,
371  const MLClassList& _mlClasses,
372  kkint32* _votes,
373  double* _probabilities,
374  RunLog& _log
375  );
376 
377 
378 
379  /**
380  *@brief calculates the number of features that will be present after encoding, and allocates
381  * predictedXSpace to accommodate the size.
382  */
383  void CalculatePredictXSpaceNeeded (RunLog& log);
384 
385 
386 
387  /**
388  *@brief Builds a BinaryCombo svm model
389  *@param[in] examples The examples to use when training the new model
390  */
391  void ConstructBinaryCombosModel (FeatureVectorListPtr examples,
392  RunLog& log
393  );
394 
395 
396  /**
397  *@brief Builds a OneVsAll svm model to be used by SvnLib
398  *@param[in] examples The examples to use when training the new model.
399  *@param[out] prob Data structure used by SvnLib.
400  */
401  void ConstructOneVsAllModel (FeatureVectorListPtr examples,
402  svm_problem& prob,
403  RunLog& log
404  );
405 
406 
407  /**
408  *@brief Builds a OneVsOne svm model
409  *@param[in] examples The examples to use when training the new model
410  *@param[out] prob Data structure used by SvnLib.
411  */
412  void ConstructOneVsOneModel (FeatureVectorListPtr examples,
413  svm_problem& prob,
414  RunLog& log
415  );
416 
417 
418  /**
419  *@brief Converts a single example into the svm_problem format, using the method specified
420  * by the EncodingMethod() value returned by svmParam
421  *@param[in] example That we're converting
422  *@param[in] row The svm_problem structure that the converted data will be stored
423  */
424  kkint32 EncodeExample (FeatureVectorPtr example,
425  svm_node* row
426  );
427 
428 
429  static
430  bool GreaterThan (kkint32 leftVotes,
431  double leftProb,
432  kkint32 rightVotes,
433  double rightProb
434  );
435 
436 
437  static
438  void GreaterVotes (bool useProbability,
439  kkint32 numClasses,
440  kkint32* votes,
441  kkint32& numOfWinners,
442  double* probabilities,
443  kkint32& pred1Idx,
444  kkint32& pred2Idx
445  );
446 
447 
448  void InializeProbClassPairs ();
449 
450 
451  void SetSelectedFeatures (FeatureNumListConst& _selectedFeatures,
452  RunLog& _log
453  );
454 
455  void SetSelectedFeatures (FeatureNumListConstPtr _selectedFeatures,
456  RunLog& _log
457  );
458 
459 
460  void PredictOneVsAll (XSpacePtr xSpace,
461  MLClassPtr knownClass,
462  MLClassPtr &predClass1,
463  MLClassPtr& predClass2,
464  double& probOfKnownClass,
465  double& predClass1Prob,
466  double& predClass2Prob,
467  kkint32& numOfWinners,
468  bool& knownClassOneOfTheWinners,
469  double& breakTie
470  );
471 
472 
473 
474  void PredictByBinaryCombos (FeatureVectorPtr example,
475  MLClassPtr knownClass,
476  MLClassPtr &predClass1,
477  MLClassPtr& predClass2,
478  kkint32& predClass1Votes,
479  kkint32& predClass2Votes,
480  double& probOfKnownClass,
481  double& predClass1Prob,
482  double& predClass2Prob,
483  double& breakTie,
484  kkint32& numOfWinners,
485  bool& knownClassOneOfTheWinners
486  );
487 
488 
489  ClassAssignments assignments;
490 
491  FeatureEncoderPtr* binaryFeatureEncoders;
492 
493  BinaryClassParmsPtr* binaryParameters; /**< only used when doing Classification with diff Feature
494  * Selection by 2 class combo's
495  */
496  volatile bool cancelFlag;
497 
498  VectorInt32 cardinality_table;
499 
500  MLClassPtr* classIdxTable; /**< Supports reverse class lookUp, indexed by ClassAssignments number,
501  * works with assignments.
502  */
503 
504  double** crossClassProbTable; /**< Probabilities between Binary Classes From last Prediction */
505 
506  kkint32 crossClassProbTableSize; /**< Dimension of each side of 'crossClassProbTable' */
507 
508  FeatureEncoderPtr featureEncoder; /**< used when doing OneVsOne or OnevsAll processing
509  * When doing binary feature selection will use
510  * binaryFeatureEncoders.
511  */
512  FileDescPtr fileDesc;
513 
514  ModelPtr* models;
515 
516  kkint32 numOfClasses; /**< Number of Classes defined in crossClassProbTable. */
517  kkint32 numOfModels;
518 
519  VectorShort oneVsAllAssignment;
520  ClassAssignmentsPtr* oneVsAllClassAssignments;
521 
522  kkuint32 predictXSpaceWorstCase;
523 
524  XSpacePtr predictXSpace; /**< Used by Predict OneVsOne, to avoid deleting and reallocating every call. */
525 
526  double* probabilities;
527 
528  KKStr rootFileName; /**< This is the root name to be used by all component
529  * objects; such as SvmModel233, mlClasses, and
530  * svmParam(including selected features). Each one
531  * will have the same rootName with a different Suffix
532  * mlClasses "<rootName>.example_classes"
533  * svmParam "<rootName>.svm_parm"
534  * model "<rootName>"
535  */
536 
537  FeatureNumListPtr selectedFeatures;
538 
539  SVMparamPtr svmParam;
540 
541  double trainingTime;
542 
543  KKMLL::AttributeTypeVector type_table;
544 
545  bool validModel;
546 
547  kkint32* votes;
548 
549  XSpacePtr* xSpaces; /**< There will be one xSpace structure for each libSVM classifier that has
550  * to be built; for a total of 'numOfModels'. This will be the input to
551  * the trainer for each one.
552  */
553 
554  kkint32 xSpacesTotalAllocated;
555  };
556 
558 
559 
560 
563 
564 
565 
566 } /* namespace KKMLL */
567 
568 
569 
570 #endif
double probability
Definition: SVMModel.h:72
kkint32 MemoryConsumedEstimated() const
Definition: SVMModel.cpp:437
ClassAssignments * ClassAssignmentsPtr
SVMModel * SVMModelPtr
Definition: SVMModel.h:557
SVM_SelectionMethod
Definition: SVMparam.h:34
__int32 kkint32
Definition: KKBaseTypes.h:88
FeatureVector * FeatureVectorPtr
Definition: Model.h:44
void SupportVectorStatistics(kkint32 &numSVs, kkint32 &totalNumSVs)
Definition: SVMModel.cpp:1328
Keeps track of selected features.
virtual void CancelFlag(bool _cancelFlag)
Definition: SVMModel.cpp:515
FeatureNumListConstPtr GetFeatureNums() const
Definition: SVMModel.cpp:701
std::vector< ProbNamePair > FindWorstSupportVectors2(FeatureVectorPtr example, kkint32 numToFind, MLClassPtr c1, MLClassPtr c2)
For a given two class pair return the names of the &#39;numToFind&#39; worst S/V&#39;s.
Definition: SVMModel.cpp:1745
ProbNamePair(KKStr _name, double _probability)
Definition: SVMModel.h:65
SVM_SelectionMethod SelectionMethod() const
Definition: SVMModel.h:148
FeatureNumList const FeatureNumListConst
MLClassPtr Predict(FeatureVectorPtr example)
Definition: SVMModel.cpp:1131
const ClassAssignments & Assignments() const
Definition: SVMModel.h:126
SVMModel()
Default constructor used by XmlElementSVMModel to create and load a new instance from a XML Stream...
Definition: SVMModel.cpp:169
SVMparam * SVMparamPtr
Definition: SVMparam.h:286
FeatureNumListConstPtr GetFeatureNums(FileDescPtr fileDesc) const
Definition: SVMModel.cpp:708
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
Container class for FeatureVector derived objects.
kkint32 NumOfClasses() const
Definition: SVMModel.h:135
struct svm_node * XSpacePtr
Definition: SVMModel.h:58
FeatureNumList * FeatureNumListPtr
KKStr(const KKStr &str)
Copy Constructor.
Definition: KKStr.cpp:561
virtual ~SVMModel()
Frees any memory allocated by, and owned by the SVMModel.
Definition: SVMModel.cpp:303
const KKStr & RootFileName() const
Definition: SVMModel.h:145
XmlTag const * XmlTagConstPtr
Definition: KKStr.h:45
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
XmlElementTemplate< SVMModel > XmlElementSVMModel
Definition: SVMModel.h:561
std::vector< KKStr > SupportVectorNames() const
Definition: SVMModel.cpp:1618
double DistanceFromDecisionBoundary(FeatureVectorPtr example, MLClassPtr class1, MLClassPtr class2)
Definition: SVMModel.cpp:728
Binds MLClass objects to the appropriate number that the Learning Algorithm expects.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
std::vector< kkint32 > VectorInt32
Vector of signed 32 bit integers.
Definition: KKBaseTypes.h:144
This class encapsulates are the information necessary to build a SVMModel class.
Definition: SVMparam.h:74
void RetrieveCrossProbTable(MLClassList &classes, double **crossProbTable, RunLog &log)
Will return the probabilities for all pairs of the classes listed in &#39;classes&#39;.
Definition: SVMModel.cpp:2279
FileDesc * FileDescPtr
virtual void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: SVMModel.cpp:2354
void ProbabilitiesByClass(FeatureVectorPtr example, const MLClassList &_mlClasses, kkint32 *_votes, double *_probabilities, RunLog &_log)
Will get the probabilities assigned to each class.
Definition: SVMModel.cpp:1354
void Predict(FeatureVectorPtr example, MLClassPtr knownClass, MLClassPtr &predClass1, MLClassPtr &predClass2, kkint32 &predClass1Votes, kkint32 &predClass2Votes, double &probOfKnownClass, double &predClass1Prob, double &predClass2Prob, kkint32 &numOfWinners, bool &knownClassOneOfTheWinners, double &breakTie)
Will predict the two most likely classes of &#39;example&#39;.
Definition: SVMModel.cpp:792
SVM_SelectionMethod SelectionMethod() const
Definition: SVMparam.h:169
FeatureEncoder * FeatureEncoderPtr
Definition: SVMModel.h:37
XmlElementSVMModel * XmlElementSVMModelPtr
Definition: SVMModel.h:562
FeatureNumListConst * FeatureNumListConstPtr
double TrainingTime() const
Definition: SVMModel.h:150
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
SVMModel(const SVMparam &_svmParam, FeatureVectorList &_examples, ClassAssignments &_assignments, FileDescPtr _fileDesc, RunLog &_log)
Constructor that will create a svmlib training model using the features and classes for training purp...
Definition: SVMModel.cpp:203
void PredictRaw(FeatureVectorPtr example, MLClassPtr &predClass, double &dist)
Returns the distance from the decision border of the SVM.
Definition: SVMModel.cpp:921
std::vector< short > VectorShort
Definition: KKBaseTypes.h:142
virtual void ReadXML(XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)
Definition: SVMModel.cpp:2421
FeatureNumListConstPtr GetFeatureNums(FileDescPtr fileDesc, MLClassPtr class1, MLClassPtr class2) const
Definition: SVMModel.cpp:715
Maintains a list of MLClass instances.
Definition: MLClass.h:233
std::vector< ProbNamePair > FindWorstSupportVectors(FeatureVectorPtr example, kkint32 numToFind, MLClassPtr c1, MLClassPtr c2)
For a given two class pair return the names of the &#39;numToFind&#39; worst S/V&#39;s.
Definition: SVMModel.cpp:1653
FeatureVectorList * FeatureVectorListPtr
Definition: Model.h:46
virtual bool NormalizeNominalAttributes()
Definition: SVMModel.cpp:2246
bool ValidModel() const
Definition: SVMModel.h:152
kkint32 NumOfSupportVectors() const
Definition: SVMModel.cpp:1304
SVMparam const * SVMParameters() const
Definition: SVMModel.h:143
std::vector< KKStr > SupportVectorNames(MLClassPtr c1, MLClassPtr c2) const
Definition: SVMModel.cpp:1576
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163