KSquare Utilities
Model.h
Go to the documentation of this file.
1 #ifndef _MODEL_
2 #define _MODEL_
3 /**
4  @class KKMLL::Model
5  @brief Base class to all Learning Algorithms.
6  @author Kurt Kramer
7  @details
8  Base class to be used by all Learning Algorithm Models. The idea is that all learning algorithms
9  all follow the same basic flow. The two obvious functions that they all support are Training from
10  supplied labeled examples(List of FeatureVector objects), Prediction of an unlabeled example.
11  */
12 
13 #include "DateTime.h"
14 #include "KKBaseTypes.h"
15 #include "KKStr.h"
16 #include "RunLog.h"
17 
18 #include "ModelParam.h"
19 
20 namespace KKMLL
21 {
22  #if !defined(_CLASSPROB_)
23  class ClassProb;
24  typedef ClassProb* ClassProbPtr;
25  class ClassProbList;
27  #endif
28 
29  #if !defined(_FEATUREENCODER2_)
32  #endif
33 
34 
36  class FeatureNumList;
38  typedef FeatureNumList const FeatureNumListConst;
40  #endif
41 
42 
43  #if !defined(_FEATUREVECTOR_)
48  #endif
49 
50 
51  #if !defined(_FileDesc_Defined_)
52  class FileDesc;
53  typedef FileDesc* FileDescPtr;
54  #endif
55 
56 
57  #if !defined(_MLCLASS_)
58  class MLClass;
59  typedef MLClass* MLClassPtr;
60  class MLClassList;
61  typedef MLClassList* MLClassListPtr;
62  class MLClassIndexList;
63  typedef MLClassIndexList* MLClassIndexListPtr;
64  #endif
65 
66 
67  #if !defined(_NORMALIZATIONPARMS_)
70  #endif
71 
72 
73 
74  #if !defined(_FactoryFVProducer_Defined_)
77  #endif
78 
79 
80 
81 
82  class Model
83  {
84  public:
85  typedef Model* ModelPtr;
86 
87  enum class ModelTypes: int
88  {
89  Null = 0,
90  OldSVM = 1,
91  SvmBase = 2,
92  KNN = 3,
93  UsfCasCor = 4,
94  Dual = 5
95  };
96 
97  static KKStr ModelTypeToStr (ModelTypes _modelingType);
98  static ModelTypes ModelTypeFromStr (const KKStr& _modelingTypeStr);
99 
100 
101  Model ();
102 
103  /**
104  *@brief Use this when you are planning on creating a empty model without parameters.
105  */
106  Model (FactoryFVProducerPtr _factoryFVProducer);
107 
108 
109  /**
110  *@brief Construct a instance of 'Model' using the parameters specified in '_param'.
111  *@param[in] _name Name of training model.
112  *@param[in] _param Parameters for learning algorithm; we will create a duplicate copy.
113  *@param[in] _fileDesc Description of the dataset that will be used to train the classifier and examples that will be classified.
114  *@param[in] _cancelFlag Will monitor; if at any point it turns true this instance is to terminate and return to caller.
115  *@param[in,out] _log Logging file.
116  */
117  Model (const KKStr& _name,
118  const ModelParam& _param, // Create new model from
119  FactoryFVProducerPtr _factoryFVProducer
120  );
121 
122 
123  /**
124  *@brief Copy Constructor.
125  */
126  Model (const Model& _madel);
127 
128  virtual ~Model ();
129 
130 
131 
132  /**
133  *@brief A factory method that will instantiate the appropriate class of training model based off '_modelType'.
134  *@details This method is used to construct a model that is going to be built from training data.
135  *@param[in] _modelType Type of model to be created; ex: mtOldSVM, mtSvmBase, or mtKNN.
136  *@param[in] _name
137  *@param[in] _param Parameters used to drive the creating of the model.
138  *@param[in] _fileDesc Description of the dataset that will be used to train the classifier and examples that will be classified.
139  *@param[in] _cancelFlag Will monitor; if at any point it turns true this instance is to terminate and return to caller.
140  *@param[in,out] _log Logging file.
141  */
142  static
143  ModelPtr CreateAModel (ModelTypes _modelType,
144  const KKStr& _name,
145  const ModelParam& _param, /**< Will make a duplicate copy of */
146  FactoryFVProducerPtr _factoryFVProducer,
147  VolConstBool& _cancelFlag,
148  RunLog& _log
149  );
150 
151 
152  virtual
153  ModelPtr Duplicate () const = 0;
154 
155 
156  // Access Methods
157  bool AlreadyNormalized () const {return alreadyNormalized;}
158 
159  virtual
160  KKStr Description () const; /**< Return short user readable description of model. */
161 
162  const FeatureEncoder2& Encoder () const;
163 
165 
166  virtual FeatureNumListConstPtr GetFeatureNums () const;
167 
168  virtual kkint32 MemoryConsumedEstimated () const;
169 
170  MLClassListPtr MLClasses () const {return classes;}
171 
172  MLClassListPtr MLClassesNewInstance () const; /**< Returns a new instances of 'classes' by calling copy constructor. */
173 
174  virtual ModelTypes ModelType () const = 0;
175 
176  virtual KKStr ModelTypeStr () const {return ModelTypeToStr (ModelType ());}
177 
178  const KKStr& Name () const {return name;}
179  void Name (const KKStr& _name) {name = _name;}
180 
181  virtual bool NormalizeNominalAttributes () const; /**< Return true, if nominal fields need to be normalized. */
182 
183  ModelParamPtr Param () const {return param;}
184 
185  virtual FeatureNumListConstPtr SelectedFeatures () const;
186 
187  const KKStr& RootFileName () const {return rootFileName;}
188 
189  const KKB::DateTime& TimeSaved () const {return timeSaved;}
190 
191  double TrainingTime () const {return trainingTime;}
192 
193  double TrianingPrepTime () const {return trianingPrepTime;} //*< Time ins sec's spent preparing training data in Model::TrainModel */
194 
195  bool ValidModel () const {return validModel;}
196 
197 
198  // Access Update Methods
199  void RootFileName (const KKStr& _rootFileName) {rootFileName = _rootFileName;}
200 
201 
202 
203  /**
204  @brief Derived classes call this method to start the clock for 'trainingTime'.
205  */
206  void TrainingTimeStart ();
207 
208  /**
209  @brief Derived classes call this method to stop the clock for 'trainingTime'.
210  */
211  void TrainingTimeEnd ();
212 
213 
214 
215  /**
216  *@brief Every prediction method in every class that is inherited from this class should call
217  * this method before performing there prediction. Such things as Normalization and
218  * Feature Encoding.
219  *@param[in] fv Feature vector of example that needs to be prepared.
220  *@param[out] newExampleCreated Indicates if either Feature Encoding and/or Normalization needed
221  * to be done. If neither then the original instance is returned. If Yes then
222  * a new instance which the caller will have to delete will be returned.
223  */
224  virtual
226  bool& newExampleCreated
227  );
228 
229 
230  virtual void PredictRaw (FeatureVectorPtr example,
231  MLClassPtr & predClass,
232  double& dist
233  )
234  {
235  predClass = NULL;
236  dist = 0.0;
237  }
238 
239  //*********************************************************************
240  //* Routines that should be implemented by descendant classes. *
241  //*********************************************************************
242 
243  virtual
244  MLClassPtr Predict (FeatureVectorPtr example,
245  RunLog& log
246  ) = 0;
247 
248  virtual
249  void Predict (FeatureVectorPtr example,
250  MLClassPtr knownClass,
251  MLClassPtr& predClass1,
252  MLClassPtr& predClass2,
253  kkint32& predClass1Votes,
254  kkint32& predClass2Votes,
255  double& probOfKnownClass,
256  double& predClass1Prob,
257  double& predClass2Prob,
258  kkint32& numOfWinners,
259  bool& knownClassOneOfTheWinners,
260  double& breakTie,
261  RunLog& log
262  ) = 0;
263 
264 
265  virtual
267  RunLog& log
268  ) = 0;
269 
270 
271  /**@brief Only applied to ModelDual classifier. */
272  virtual
274  KKStr& classifier1Desc,
275  KKStr& classifier2Desc,
276  ClassProbListPtr& classifier1Results,
277  ClassProbListPtr& classifier2Results,
278  RunLog& log
279  );
280 
281 
282  virtual
284  const MLClassList& _mlClasses,
285  kkint32* _votes,
286  double* _probabilities,
287  RunLog& _log
288  ) = 0;
289 
290  /**
291  *@brief Derives predicted probabilities by class.
292  *@details Will get the probabilities assigned to each class by the classifier. The
293  * '_mlClasses' parameter dictates the order of the classes. That is the
294  * probabilities for any given index in '_probabilities' will be for the class
295  * specified in the same index in '_mlClasses'.
296  *@param[in] _example FeatureVector object to calculate predicted probabilities for.
297  *@param[in] _mlClasses List of classes that caller is aware of. This should be the
298  * same list that was used when constructing this Model object. The list must
299  * be the same but not necessarily in the same order as when Model was 1st
300  * constructed. The ordering of this list will dictate the order that '_probabilities'
301  * will be populated.
302  *@param[out] _probabilities An array that must be as big as the number of classes in
303  * '_mlClasses'. The probability of class in '_mlClasses[x]' will be
304  * returned in probabilities[x].
305  */
306  virtual
307  void ProbabilitiesByClass (FeatureVectorPtr _example,
308  const MLClassList& _mlClasses,
309  double* _probabilities,
310  RunLog& _log
311  ) = 0;
312 
313 
314  virtual
315  void RetrieveCrossProbTable (MLClassList& classes,
316  double** crossProbTable, /**< two dimension matrix that needs to be classes.QueueSize () squared. */
317  RunLog& log
318  );
319 
320  /**
321  *@brief Performs operations such as FeatureEncoding, and Normalization. The actual training
322  * of models occurs in the specific derived implementation of 'Model'.
323  *@param[in] _trainExamples Training data that classifier will be built from. If the examples need to be
324  * normalized or encoded and we are not taking ownership then a duplicate list of
325  * examples will be created that this method and class will be free to modify.
326  *@param[in] _alreadyNormalized Indicates if contents of '_trainExamples' are normalized already; if not
327  * they will be normalized.
328  *@param[in] _takeOwnership This instance of Model will take ownership of '_examples' and is free to
329  * modify its contents.
330  */
331  virtual
332  void TrainModel (FeatureVectorListPtr _trainExamples,
333  bool _alreadyNormalized,
334  bool _takeOwnership,
335  VolConstBool& _cancelFlag,
336  RunLog& _log
337  );
338 
339 
340  /**
341  *@brief To be implemented by derived classes; the parent classes fields will be updated by the
342  * derived class calling ReadXMLModelToken.
343  */
344  virtual void ReadXML (XmlStream& s,
345  XmlTagConstPtr tag,
346  VolConstBool& cancelFlag,
347  RunLog& log
348  ) = 0;
349 
350 
351  virtual void WriteXML (const KKStr& varName,
352  std::ostream& o
353  ) const = 0;
354 
355 
356  /**
357  *@brief The "WriteXML" method in Derived classes call this method to include the parents classes fields in the XML data.
358  */
359  void WriteModelXMLFields (std::ostream& o) const;
360 
361  protected:
362  void AddErrorMsg (const KKStr& errMsg,
363  kkint32 lineNum
364  );
365 
367 
368 
369  void DeAllocateSpace ();
370 
371 
373  double* probabilities,
374  double minProbability
375  );
376 
377  /** @brief Will process any tokens that belong to 'ModelParam' and return NULL ones that are not will be passed back. */
379  RunLog& log
380  );
381 
382  void ReadXMLModelPost (RunLog& log);
383 
384  void ReduceTrainExamples (RunLog& log);
385 
386 
387 
389 
390  MLClassListPtr classes;
391 
392  MLClassIndexListPtr classesIndex;
393 
394  double* classProbs;
395 
396  double** crossClassProbTable; /*!< Probabilities between Binary Classes From last Prediction */
397 
399 
401 
403 
405 
407 
409 
410  kkint32 numOfClasses; /**< Number of Classes defined in crossClassProbTable. */
411 
412  ModelParamPtr param; /**< Will own this instance, */
413 
414  KKStr rootFileName; /**< This is the root name to be used by all component objects; such as svm_model,
415  * mlClasses, and svmParam(including selected features). Each one will have the
416  * same rootName with a different suffix.
417  *@code
418  * mlClasses "<rootName>.classes"
419  * svmParam "<rootName>.svm_parm"
420  * model "<rootName>"
421  *@endcode
422  */
423 
425 
427 
429 
430  bool weOwnTrainExamples; /**< Indicates if we own the 'trainExamples'. This does not mean that we own its
431  * contents. That is determined by 'trainExamples->Owner ()'.
432  */
433 
434 
435  private:
436  double trianingPrepTime; /**< Time that it takes to perform normalization, and encoding */
437  double trainingTime;
438  double trainingTimeStart; /**< Time that the clock for TraininTime was started. */
439  KKStr name;
440  KKB::DateTime timeSaved; /**< Date and Time that this model was saved. */
441  }; /* Model */
442 
443  typedef Model::ModelPtr ModelPtr;
444 
445 #define _Model_Defined_
446 
447 
448 
449  /**
450  *@brief The base class to be used for the manufacturing if "Model" derived classes.
451  */
453  {
454  public:
456  XmlStream& s,
457  RunLog& log
458  ):
459  XmlElement (tag, s, log)
460  {}
461 
462  virtual ~XmlElementModel ()
463  {
464  delete value;
465  value = NULL;
466  }
467 
468  ModelPtr Value () const {return value;}
469 
470  ModelPtr TakeOwnership ()
471  {
472  ModelPtr v = value;
473  value = NULL;
474  return v;
475  }
476 
477  protected:
478  ModelPtr value;
479  };
481 
482 
483 
484  template<class ModelType>
486  {
487  public:
489  XmlStream& s,
490  VolConstBool& cancelFlag,
491  RunLog& log
492  ):
494  {
495  value = new ModelType();
497  }
498 
500  {
501  }
502 
503  ModelType* Value () const {return dynamic_cast<ModelType*>(value);}
504 
505 
506  ModelType* TakeOwnership () {return dynamic_cast<ModelType*> (XmlElementModel::TakeOwnership ());}
507  }; /* XmlElementModelTemplate */
508 
509 
510 
511 } /* namespace MML */
512 
513 #endif
const KKStr & Name() const
Definition: Model.h:178
ModelType * Value() const
Definition: Model.h:503
void DeAllocateSpace()
Definition: Model.cpp:383
Base class to all Learning Algorithms.
Definition: Model.h:82
XmlTag * XmlTagPtr
Definition: Atom.h:31
virtual void Predict(FeatureVectorPtr example, MLClassPtr knownClass, MLClassPtr &predClass1, MLClassPtr &predClass2, kkint32 &predClass1Votes, kkint32 &predClass2Votes, double &probOfKnownClass, double &predClass1Prob, double &predClass2Prob, kkint32 &numOfWinners, bool &knownClassOneOfTheWinners, double &breakTie, RunLog &log)=0
kkint32 * votes
Definition: Model.h:428
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
bool alreadyNormalized
Definition: Model.h:388
static ModelPtr CreateAModel(ModelTypes _modelType, const KKStr &_name, const ModelParam &_param, FactoryFVProducerPtr _factoryFVProducer, VolConstBool &_cancelFlag, RunLog &_log)
A factory method that will instantiate the appropriate class of training model based off &#39;_modelType&#39;...
Definition: Model.cpp:287
void Name(const KKStr &_name)
Definition: Model.h:179
__int32 kkint32
Definition: KKBaseTypes.h:88
ModelPtr TakeOwnership()
Definition: Model.h:470
Maintains a list of classes and their associated integer index.
Definition: MLClass.h:459
FeatureVector * FeatureVectorPtr
Definition: Model.h:44
static KKStr ModelTypeToStr(ModelTypes _modelingType)
Definition: Model.cpp:256
XmlElement(XmlTagPtr _nameTag, XmlStream &s, RunLog &log)
Definition: XmlStream.cpp:758
virtual void WriteXML(const KKStr &varName, std::ostream &o) const =0
NormalizationParmsPtr normParms
Definition: Model.h:408
NormalizationParms * NormalizationParmsPtr
Definition: Model.h:68
XmlTokenPtr ReadXMLModelToken(XmlTokenPtr t, RunLog &log)
Will process any tokens that belong to &#39;ModelParam&#39; and return NULL ones that are not will be passed ...
Definition: Model.cpp:884
ModelParamPtr param
Definition: Model.h:412
virtual FeatureNumListConstPtr GetFeatureNums() const
Definition: Model.cpp:415
FeatureVectorListPtr trainExamples
Definition: Model.h:424
XmlElementModelTemplate(XmlTagPtr tag, XmlStream &s, VolConstBool &cancelFlag, RunLog &log)
Definition: Model.h:488
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
void AddErrorMsg(const KKStr &errMsg, kkint32 lineNum)
Definition: Model.cpp:239
virtual ~XmlElementModelTemplate()
Definition: Model.h:499
KKStr rootFileName
Definition: Model.h:414
void WriteModelXMLFields(std::ostream &o) const
The "WriteXML" method in Derived classes call this method to include the parents classes fields in th...
Definition: Model.cpp:854
FactoryFVProducerPtr factoryFVProducer
Definition: Model.h:404
double TrianingPrepTime() const
Definition: Model.h:193
double * classProbs
Definition: Model.h:394
virtual void PredictRaw(FeatureVectorPtr example, MLClassPtr &predClass, double &dist)
Definition: Model.h:230
static ModelTypes ModelTypeFromStr(const KKStr &_modelingTypeStr)
Definition: Model.cpp:271
XmlToken * XmlTokenPtr
Definition: XmlStream.h:18
kkint32 numOfClasses
Definition: Model.h:410
XmlElementModel(XmlTagPtr tag, XmlStream &s, RunLog &log)
Definition: Model.h:455
virtual ModelPtr Duplicate() const =0
virtual FeatureVectorPtr PrepExampleForPrediction(FeatureVectorPtr fv, bool &newExampleCreated)
Every prediction method in every class that is inherited from this class should call this method befo...
Definition: Model.cpp:574
const KKB::DateTime & TimeSaved() const
Definition: Model.h:189
Container class for FeatureVector derived objects.
KKTHread * KKTHreadPtr
ModelPtr Value() const
Definition: Model.h:468
Model * ModelPtr
Definition: Model.h:85
const KKStr & RootFileName() const
Definition: Model.h:187
virtual void ProbabilitiesByClass(FeatureVectorPtr example, const MLClassList &_mlClasses, kkint32 *_votes, double *_probabilities, RunLog &_log)=0
Used to record probability for a specified class; and a list of classes.
Definition: ClassProb.h:25
bool AlreadyNormalized() const
Definition: Model.h:157
XmlTag const * XmlTagConstPtr
Definition: KKStr.h:45
#define _FeatureNumList_Defined_
bool validModel
Definition: Model.h:426
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
FactoryFVProducerPtr FactoryFVProducer() const
Definition: Model.h:164
void TrainingTimeEnd()
Derived classes call this method to stop the clock for &#39;trainingTime&#39;.
Definition: Model.cpp:452
virtual kkint32 MemoryConsumedEstimated() const
Definition: Model.cpp:208
ModelParamPtr Param() const
Definition: Model.h:183
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
ClassProbList * ClassProbListPtr
Definition: Classifier2.h:30
The base class to be used for the manufacturing if "Model" derived classes.
Definition: Model.h:452
virtual bool NormalizeNominalAttributes() const
Definition: Model.cpp:423
virtual void RetrieveCrossProbTable(MLClassList &classes, double **crossProbTable, RunLog &log)
Definition: Model.cpp:755
FileDesc * FileDescPtr
void NormalizeProbabilitiesWithAMinumum(kkint32 numClasses, double *probabilities, double minProbability)
Definition: Model.cpp:614
virtual void ProbabilitiesByClassDual(FeatureVectorPtr example, KKStr &classifier1Desc, KKStr &classifier2Desc, ClassProbListPtr &classifier1Results, ClassProbListPtr &classifier2Results, RunLog &log)
Only applied to ModelDual classifier.
Definition: Model.cpp:830
Model(const KKStr &_name, const ModelParam &_param, FactoryFVProducerPtr _factoryFVProducer)
Construct a instance of &#39;Model&#39; using the parameters specified in &#39;_param&#39;.
Definition: Model.cpp:152
bool ValidModel() const
Definition: Model.h:195
void ReduceTrainExamples(RunLog &log)
Reduces the Training Images down to the size dictated by the &#39;examplesPerClass&#39; parameter.
Definition: Model.cpp:649
MLClassListPtr MLClasses() const
Definition: Model.h:170
virtual ClassProbListPtr ProbabilitiesByClass(FeatureVectorPtr example, RunLog &log)=0
virtual ModelTypes ModelType() const =0
VectorKKStr errors
Definition: Model.h:402
double ** crossClassProbTable
Definition: Model.h:396
FileDescPtr fileDesc
Definition: Model.h:406
virtual KKStr ModelTypeStr() const
Definition: Model.h:176
virtual ~XmlElementModel()
Definition: Model.h:462
virtual MLClassPtr Predict(FeatureVectorPtr example, RunLog &log)=0
Normalization Parameters; calculation and implementation.
FeatureNumListConst * FeatureNumListConstPtr
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
virtual void TrainModel(FeatureVectorListPtr _trainExamples, bool _alreadyNormalized, bool _takeOwnership, VolConstBool &_cancelFlag, RunLog &_log)
Performs operations such as FeatureEncoding, and Normalization. The actual training of models occurs ...
Definition: Model.cpp:467
FeatureEncoder2 * FeatureEncoder2Ptr
Definition: Model.h:30
double TrainingTime() const
Definition: Model.h:191
void TrainingTimeStart()
Derived classes call this method to start the clock for &#39;trainingTime&#39;.
Definition: Model.cpp:445
const FeatureEncoder2 & Encoder() const
Definition: Model.cpp:406
void ReadXMLModelPost(RunLog &log)
Definition: Model.cpp:987
virtual ~Model()
Frees any memory allocated by, and owned by the Model.
Definition: Model.cpp:188
Responsible for creating a FeatureFectorProducer instance.
void RootFileName(const KKStr &_rootFileName)
Definition: Model.h:199
Model(const Model &_madel)
Copy Constructor.
Definition: Model.cpp:69
XmlElementModel * XmlElementModelPtr
Definition: Model.h:480
kkint32 crossClassProbTableSize
Definition: Model.h:398
Maintains a list of MLClass instances.
Definition: MLClass.h:233
FeatureVectorList * FeatureVectorListPtr
Definition: Model.h:46
virtual void ProbabilitiesByClass(FeatureVectorPtr _example, const MLClassList &_mlClasses, double *_probabilities, RunLog &_log)=0
Derives predicted probabilities by class.
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureEncoder2Ptr encoder
Definition: Model.h:400
bool weOwnTrainExamples
Definition: Model.h:430
virtual void ReadXML(XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)=0
To be implemented by derived classes; the parent classes fields will be updated by the derived class ...
virtual KKStr Description() const
Definition: Model.cpp:248
Abstract Base class for Machine Learning parameters.
Definition: ModelParam.h:35
MLClassIndexListPtr classesIndex
Definition: Model.h:392
FactoryFVProducer * FactoryFVProducerPtr
Definition: Model.h:75
MLClassListPtr classes
Definition: Model.h:390
ModelType * TakeOwnership()
Definition: Model.h:506
Model(FactoryFVProducerPtr _factoryFVProducer)
Use this when you are planning on creating a empty model without parameters.
Definition: Model.cpp:117
void AllocatePredictionVariables()
Definition: Model.cpp:343
MLClassListPtr MLClassesNewInstance() const
Definition: Model.cpp:228
virtual FeatureNumListConstPtr SelectedFeatures() const
Definition: Model.cpp:436
ClassProb * ClassProbPtr
Definition: Classifier2.h:28
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163