KSquare Utilities
TrainingProcess2.h
Go to the documentation of this file.
1 #ifndef _TRAININGPROCESS2_
2 #define _TRAININGPROCESS2_
3 
4 #include <ostream>
5 
6 #include "XmlStream.h"
7 
8 #include "Model.h"
9 #include "SVMModel.h"
11 
12 
13 
14 
15 namespace KKMLL
16 {
17  #if !defined(_FEATUREVECTOR_)
18  class FeatureVector;
20  class FeatureVectorList;
22  #endif
23 
24 
25  #if !defined(FileDesc_Defined_)
26  class FileDesc;
27  typedef FileDesc* FileDescPtr;
28  #endif
29 
30 
31  #if !defined(_MLCLASS_)
32  class MLClass;
33  typedef MLClass* MLClassPtr;
34  class MLClassList;
35  typedef MLClassList* MLClassListPtr;
36  #endif
37 
38 
39  #if !defined(_MODELOLDSVM_)
40  class ModelOldSVM;
41  typedef ModelOldSVM* ModelOldSVMPtr;
42  #endif
43 
49  #endif
50 
53 
54 
55  ///<summary>Manages the creation and loading of training models.</summary>
56  ///<remarks>
57  /// A trained model can either be built from scratch using specified training data or a persistent instance
58  /// can be loaded from a XMLStream. There are several static methods that are best used to manage the various situations
59  /// such as &quot;CreateTrainingProcess&quot;, &quot;CreateTrainingProcessForLevel&quot;, &quot;CreateTrainingProcessFromTrainingExamples&quot;,
60  /// &quot;LoadExistingTrainingProcess&quot;. The result of any of these methods is a TrainigProcess2 instance that you
61  /// use to create a Classifier instance.
62  ///
63  /// Supporting Classes:
64  /// - TrainingConfiguration2 Manages defines the parameters of a classifier such as:
65  /// -# Type of algorithm
66  /// -# List Classes of and where training examples can be loaded from.
67  /// -# Sub-Classifiers that is for any given class another &quot;TrainingConfiguration2&quot; can be specified indicating another
68  /// classifier to be used when that class is predicted.
69  /// -# Class Weights
70  ///
71  /// - Model Base class for all algorithms. SupportVectorMachine(SVM), USFCasCor, BFS-SVM, etc ....
72  ///
73  /// - Classifier2 You construct an instance from a 'TrainingProcess2' instance; this is the class that manages predictions.
74  ///
75  /// Sub-Classifiers: For each sub-classifiers specified in the &quot;TrainingConfiguration2&quot; another instance of &quot;TrainingProcess2&quot;
76  /// will be created. Data member &quot;subTrainingProcesses&quot; will keep a list of these &quot;TrainingProcess2&quot; instances.
77  ///</remarks>
79  {
80  public:
82 
83  enum class WhenToRebuild
84  {
87  NotValid,
89  };
90 
91 
92 
93  ///<summary>
94  /// Creates a TrainingPorcess based off a specified configuration; depending on the &quot;_whenToRebuild&quot; parameter
95  /// and the current status of the corresponding &quot;save&quot; file will either load existing trained classifier or build a
96  /// new one from scratch.
97  ///</summary>
98  ///<param name="config"> A previously loaded configuration file that specifies directories where example images for
99  /// each class can be found. Caller will still own config and be responsible for deleting it.</param>
100  ///<param name="checkForDuplicates"> If set to true will look for duplicates in the training data. Two FeatureVectors
101  /// will be considered duplicate if they have the Same ExampleFileName or the save Feature Values. If duplicates
102  /// are in same class then all but one will be removes. If they are in more then one class then they will both
103  /// be removed.</param>
104  ///<param name="whenToRebuild"> Specify when to rebuild the Models; see definition of enumerator WhenToRebuild.</param>
105  ///<param name="saveTrainedModel"> Specifies whether to the TrainingPorcess if it needs to be trained. </param>
106  ///<param name="cancelFlag"> Will monitor; if it ever is set to true stop processing at earliest convenience and return to caller. </param>
107  ///<param name="log"> Logging file. </param>
108  static
109  TrainingProcess2Ptr CreateTrainingProcess (TrainingConfiguration2Const* config,
110  bool checkForDuplicates,
111  WhenToRebuild whenToRebuild,
112  bool saveTrainedModel,
113  VolConstBool& cancelFlag,
114  RunLog& log
115  );
116 
117 
118 
119  ///<summary>Build a new model from scratch for the specified class level removing duplicate training examples.</summary>
120  ///<remarks>
121  /// Using the parameter level will construct a classifier that groups classes together by group hierarchy. Underscore
122  /// characters in the class name will be used to differentiate group levels. Ex: Crustacean_Copepod_Calanoid has three
123  /// levels of grouping where Crustacean belongs to level 1, Copeod to level 2, and Calanoid to level 3.
124  ///</remarks>
125  ///<param name="config"> Configuration that will provide parameters such as classes and their related directories where
126  /// training examples are found and sub-classifiers.
127  ///</param>
128  ///<param name="level"> The grouping level to build a classifier for. Ex: if _level = 2 is specified and referring to the
129  /// class name "Crustacean_Copepod_Calanoid" above all classes that start with "Crustacean_Copepod_" will be combined
130  /// as one logical class.
131  ///</param>
132  ///<param name="cancelFlag"> Will monitor if it ever is set to true will stop processing at earliest convenience and return
133  /// to caller.
134  ///</param>
135  ///<param name="log"> Logging file.</param>
136  static
137  TrainingProcess2Ptr CreateTrainingProcessForLevel (TrainingConfiguration2Const* config,
138  kkuint32 level,
139  VolConstBool& cancelFlag,
140  RunLog& log
141  );
142 
143 
144  ///<summary> Build a new model from scratch for the specified class level removing duplicate training examples. </summary>
145  ///<remarks>
146  /// Using the parameter level will construct a classifier that groups classes together by group hierarchy. Underscore
147  /// characters in the class name will be used to differentiate group levels. Ex: Crustacean_Copepod_Calanoid has three
148  /// levels of grouping where Crustacean, Copepod, and Calanoid belong to levels 1, 2, 3 respectively.
149  ///</remarks>
150  ///<param name="configFileName"> Name of Configuration file that is to be used to construct instance of TrainingConfiguration2.
151  /// Will provide parameters such as classes and their related directories where training examples are found and sub-classifiers.
152  ///</param>
153  ///<param name="level"> The grouping level to build a classifier for. Ex: if _level = 2 is specified and referring to the class
154  /// name Crustacean_Copepod_Calanoid above all classes that start with Crustacean_Copepod_ will be combined as one logical class.
155  ///</param>
156  ///<param name="cancelFlag"> Will monitor if it ever is set to true will stop processing at earliest convenience and return to caller. </param>
157  ///<param name="log">Logging file.</param>
158  static
160  kkuint32 level,
161  VolConstBool& cancelFlag,
162  RunLog& log
163  );
164 
165 
166  /**
167  *@brief Will Construct an instance using provided list of examples rather than loading from training library.
168  *@details Training examples are typically loaded from a training library as specified in the TrainingConfiguration2
169  * structure. Rather than loading and/or computing feature data it will utilize the feature-vectors provided by the
170  * 'trainingExamples' parameter.
171  *@param[in] config A configuration that is already loaded in memory.
172  *@param[in] trainingExamples Training data to train classifier with.
173  *@param[in] takeOwnershipOfTrainingExamples If set to true then we will take ownership of the feature-vectors in
174  * 'trainingExamples'. This means we are free to modify or delete them as needed.
175  * If this flag is set to false will make duplicate copy of the feature-vectors
176  * if it is required to modify them; such as normalize them.
177  *@param[in] featuresAlreadyNormalized If set to true will assume that all features in the training data are normalized.
178  *@param[in] cancelFlag Will monitor if it ever is set to true will stop processing at earliest convenience
179  * and return to caller.
180  *@param[in] log Logging file.
181  */
182  static
183  TrainingProcess2Ptr CreateTrainingProcessFromTrainingExamples (TrainingConfiguration2Const* config,
184  FeatureVectorListPtr trainingExamples,
185  bool takeOwnershipOfTrainingExamples,
186  bool featuresAlreadyNormalized,
187  VolConstBool& cancelFlag,
188  RunLog& log
189  );
190 
191 
192 
193  /**
194  *@brief Loads an existing TrainingProcess; if one does not exist will return NULL.
195  *@param[in] configRootName Root name of training model;
196  *@param[in] cancelFlag Will monitor if it ever is set to true will stop processing at earliest convenience
197  * and return to caller.
198  *@param[in] log Logging file.
199  */
200  static
202  VolConstBool& cancelFlag,
203  RunLog& log
204  );
205 
206 
207 
208  /**
209  *@brief The default constructor; What will be used when creating an instance while reading in
210  * from a XML Stream file. All members will be set to default values. The XMLRead method
211  */
212  TrainingProcess2 ();
213 
214 
215  virtual
216  ~TrainingProcess2 ();
217 
219 
220 
221 
222  /**
223  *@brief Call this method just after you construct a new instance of "TrainingProcess2"
224  *@param[in] _config
225  *@param[in] _whenToRebuild Used for any sub classifiers that this instance of TrainingProcess2 might need to build.
226  *@param[in] _trainingExamples
227  *@param[in] _takeOwnerShipOfTrainingExamples If true this instance of 'TrainingProcess2' will take ownership of '_trainingExamples' and delete it when done with them.
228  *@param[in] _checkForDuplicates If true will remove duplicate examples from '_trainingExamples'.
229  *@param[in] _cancelFlag
230  *@param[in] _log
231  */
232  void BuildTrainingProcess (TrainingConfiguration2Const* _config,
233  WhenToRebuild _whenToRebuild,
234  FeatureVectorListPtr _trainingExamples,
235  bool _takeOwnerShipOfTrainingExamples,
236  bool _checkForDuplicates,
237  VolConstBool& _cancelFlag,
238  RunLog& _log
239  );
240 
241 
242  // Access Members
243  void Abort (bool _abort) {abort = _abort;}
244 
245  bool Abort () const {return abort;}
246  const KKB::DateTime& BuildDateTime () const {return buildDateTime;}
247  TrainingConfiguration2Const* Config () {return config;}
248  const KKStr& ConfigFileName () const {return configFileName;}
250  kkint32 DuplicateCount () const {return duplicateCount;}
251  kkint32 DuplicateDataCount () const {return duplicateDataCount;}
252  bool FeaturesAlreadyNormalized () const {return featuresAlreadyNormalized;}
253  FactoryFVProducerPtr FvFactoryProducer () const {return fvFactoryProducer;}
254  FeatureVectorListPtr Images () {return trainingExamples;}
255  MLClassListPtr MLClasses () const {return mlClasses;}
256  Model::ModelTypes ModelType () const;
257  KKStr ModelTypeStr () const;
258  KKStr ModelDescription () const;
259  SVMModelPtr Model3 ();
260  kkint32 NumOfSupportVectors () const;
261  ModelOldSVMPtr OldSVMModel () const;
262  ClassProbList const * PriorProbability () const {return priorProbability;}
263  ModelParamPtr Parameters () const;
264  TrainingProcess2ListPtr SubTrainingProcesses () const {return subTrainingProcesses;}
265  ModelPtr TrainedModel () const {return model;}
266  double TrainingTime () const;
267 
268 
269 
270  void FeaturesAlreadyNormalized (bool _featuresAlreadyNormalized) {featuresAlreadyNormalized = _featuresAlreadyNormalized;}
271 
272 
273  void CreateModelsFromTrainingData (WhenToRebuild whenToRebuild,
274  VolConstBool& cancelFlag,
275  RunLog& log
276  );
277 
278 
279  /**@brief Extracts the list of classes including ones from Sub-Classifiers */
280  MLClassListPtr ExtractFullHierachyOfClasses () const;
281 
282  static
283  FeatureVectorListPtr ExtractTrainingClassFeatures (TrainingConfiguration2ConstPtr config,
284  KKB::DateTime& latestImageTimeStamp,
285  bool& changesMadeToTrainingLibraries,
286  VolConstBool& cancelFlag,
287  RunLog& log
288  );
289 
290  void LoadPrevTrainedOtherwiseRebuild (bool _forceRebuild,
291  bool _checkForDuplicates
292  );
293 
294  void ReportTraningClassStatistics (std::ostream& report);
295 
296  /**
297  * @brief Saves the built training model into the Save file in Xml Format.
298  */
299  void SaveTrainingProcess (RunLog& log);
300 
301  void SupportVectorStatistics (kkint32& numSVs,
302  kkint32& totalNumSVs
303  );
304 
305  ///<summary>
306  /// Returns back pointer to 1st classifier of Dual Classifier; if not a Dual classifier will return back NULL. Keep in mind
307  /// that you will not own this classifier and that it can be deleted at any time.
308  ///</summary>
310 
311 
312  ///<summary> Returns back pointer to 2nd classifier of Dual Classifier; if not a Dual classifier will return back NULL. </summary>
314 
315  void ValidateConfiguration ();
316 
317  virtual void ReadXML (XmlStream& s,
318  XmlTagConstPtr tag,
319  VolConstBool& cancelFlag,
320  RunLog& log
321  );
322 
323  virtual void WriteXML (const KKStr& varName, std::ostream& o) const;
324 
325 
326  private:
327  void AddImagesToTrainingLibray (FeatureVectorList& trainingExamples,
328  FeatureVectorList& examplesToAdd,
329  RunLog& log
330  );
331 
332  void BuildModel3 ();
333 
334  void CheckForDuplicates (bool allowDupsInSameClass, RunLog& log);
335 
336  void LoadSubClassifiers (WhenToRebuild whenToRebuild,
337  bool checkForDuplicates,
338  VolConstBool& cancelFlag,
339  RunLog& log
340  );
341 
342 
343  static
344  FeatureVectorListPtr ExtractFeatures (TrainingConfiguration2ConstPtr config,
345  MLClassList& mlClasses,
346  const TrainingClassPtr trainingClass,
347  KKB::DateTime& latestTimeStamp,
348  bool& changesMade,
349  VolConstBool& cancelFlag,
350  RunLog& log
351  );
352 
353 
354 
355  //************************************************************
356  // Variables that are Global to TrainingProcess2 application. *
357  //************************************************************
358 
359  bool abort; /**< If problem building a model or loading will be set to True. */
360 
361  KKB::DateTime buildDateTime;
362 
363  TrainingConfiguration2Const* config;
364  TrainingConfiguration2* configOurs; /**< If we own the instance of 'config' we assign to this member as well as 'config'; the
365  * destructor will delete 'configOurs'
366  */
367 
368  KKStr configFileName; /**< The directory path where this file is actually located will be added to this name. */
369 
370  KKStr configFileNameSpecified; /**< This will be the ConfigFileName specified by caller before the directory
371  * that is added for actual location of config file.
372  */
373 
374  kkint32 duplicateCount;
375  kkint32 duplicateDataCount;
376 
377  bool featuresAlreadyNormalized;
378 
379  FileDescPtr fileDesc;
380 
381  FactoryFVProducerPtr fvFactoryProducer;
382 
383  MLClassListPtr mlClasses; /**< List of all classes that are to be processed. There will be one entry for each MLClass,
384  * Including one for noise trainingExamples(unknown trainingExamples).
385  */
386 
387  ModelPtr model;
388 
389  ClassProbListPtr priorProbability; /**< Based on Training example distribution. */
390 
391  std::ostream* report;
392 
393  KKStr savedModelName;
394 
395  TrainingProcess2ListPtr subTrainingProcesses;
396 
397  FeatureVectorListPtr trainingExamples; /**< All Images Loaded. Own's all trainingExamples. All other ImageList's will only point to
398  * these trainingExamples.
399  */
400 
401  bool weOwnMLClasses;
402  bool weOwnTrainingExamples;
403  }; /* TrainingProcess2 */
404 
406 
407 #define _TrainingProcess2_Defined_
408 
410  {
411  public:
412  TrainingProcess2List (bool _owner);
413  virtual ~TrainingProcess2List ();
414 
416 
417  };
418 
420 
421 #define _TrainingProcess2List_Defined_
422 
423 
424  // XlmStream instances.
425 
428 
429 } /* namespace KKMLL */
430 
431 #endif
void SupportVectorStatistics(kkint32 &numSVs, kkint32 &totalNumSVs)
Base class to all Learning Algorithms.
Definition: Model.h:82
VectorKKStr ConfigFileFormatErrors() const
If there is a config file; will return a list of its FormatErrors ().
SVMModel * SVMModelPtr
Definition: SVMModel.h:557
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
FactoryFVProducerPtr FvFactoryProducer() const
__int32 kkint32
Definition: KKBaseTypes.h:88
FeatureVectorListPtr Images()
static TrainingProcess2Ptr CreateTrainingProcess(TrainingConfiguration2Const *config, bool checkForDuplicates, WhenToRebuild whenToRebuild, bool saveTrainedModel, VolConstBool &cancelFlag, RunLog &log)
MLClassListPtr ExtractFullHierachyOfClasses() const
Extracts the list of classes including ones from Sub-Classifiers.
void LoadPrevTrainedOtherwiseRebuild(bool _forceRebuild, bool _checkForDuplicates)
virtual void ReadXML(XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)
TrainingProcess2Ptr TrainingProcessRight()
kkint32 MemoryConsumedEstimated() const
ModelPtr TrainedModel() const
ModelParamPtr Parameters() const
TrainingConfiguration2Const * Config()
ClassProbList const * PriorProbability() const
void FeaturesAlreadyNormalized(bool _featuresAlreadyNormalized)
#define _TrainingConfiguration2_Defined_
const KKB::DateTime & BuildDateTime() const
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
virtual void WriteXML(const KKStr &varName, std::ostream &o) const
void BuildTrainingProcess(TrainingConfiguration2Const *_config, WhenToRebuild _whenToRebuild, FeatureVectorListPtr _trainingExamples, bool _takeOwnerShipOfTrainingExamples, bool _checkForDuplicates, VolConstBool &_cancelFlag, RunLog &_log)
Call this method just after you construct a new instance of "TrainingProcess2".
MLClassListPtr MLClasses() const
Container class for FeatureVector derived objects.
kkint32 NumOfSupportVectors() const
KKTHread * KKTHreadPtr
XmlElementTemplate< TrainingProcess2 > XmlElementTrainingProcess2
static TrainingProcess2Ptr CreateTrainingProcessForLevel(TrainingConfiguration2Const *config, kkuint32 level, VolConstBool &cancelFlag, RunLog &log)
static TrainingProcess2Ptr CreateTrainingProcessFromTrainingExamples(TrainingConfiguration2Const *config, FeatureVectorListPtr trainingExamples, bool takeOwnershipOfTrainingExamples, bool featuresAlreadyNormalized, VolConstBool &cancelFlag, RunLog &log)
Will Construct an instance using provided list of examples rather than loading from training library...
static TrainingProcess2Ptr LoadExistingTrainingProcess(const KKStr &configRootName, VolConstBool &cancelFlag, RunLog &log)
Loads an existing TrainingProcess; if one does not exist will return NULL.
XmlTag const * XmlTagConstPtr
Definition: KKStr.h:45
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
void ReportTraningClassStatistics(std::ostream &report)
ModelOldSVMPtr OldSVMModel() const
void SaveTrainingProcess(RunLog &log)
Saves the built training model into the Save file in Xml Format.
kkint32 MemoryConsumedEstimated() const
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
ClassProbList * ClassProbListPtr
Definition: Classifier2.h:30
void Abort(bool _abort)
TrainingProcess2List(bool _owner)
void CreateModelsFromTrainingData(WhenToRebuild whenToRebuild, VolConstBool &cancelFlag, RunLog &log)
TrainingProcess2()
The default constructor; What will be used when creating an instance while reading in from a XML Stre...
kkint32 DuplicateCount() const
FileDesc * FileDescPtr
TrainingProcess2 * TrainingProcess2Ptr
TrainingProcess2 * TrainingProcess2Ptr
Definition: Classifier2.h:62
kkint32 DuplicateDataCount() const
bool FeaturesAlreadyNormalized() const
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
const KKStr & ConfigFileName() const
Model::ModelTypes ModelType() const
TrainingProcess2ListPtr SubTrainingProcesses() const
Maintains a list of MLClass instances.
Definition: MLClass.h:233
static FeatureVectorListPtr ExtractTrainingClassFeatures(TrainingConfiguration2ConstPtr config, KKB::DateTime &latestImageTimeStamp, bool &changesMadeToTrainingLibraries, VolConstBool &cancelFlag, RunLog &log)
FeatureVectorList * FeatureVectorListPtr
Definition: Model.h:46
XmlElementTrainingProcess2 * XmlElementTrainingProcess2Ptr
TrainingProcess2Ptr TrainingProcessLeft()
summary> Returns back pointer to 2nd classifier of Dual Classifier; if not a Dual classifier will ret...
TrainingClass * TrainingClassPtr
TrainingProcess2List * TrainingProcess2ListPtr
Definition: Classifier2.h:68
FactoryFVProducer * FactoryFVProducerPtr
Definition: Model.h:75
static TrainingProcess2Ptr CreateTrainingProcessForLevel(const KKStr &configFileName, kkuint32 level, VolConstBool &cancelFlag, RunLog &log)
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163