KSquare Utilities
CrossValidationMxN.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <fstream>
4 #include <iostream>
5 #include <map>
6 #include <vector>
7 #include "MemoryDebug.h"
8 using namespace std;
9 
10 
11 #include "KKBaseTypes.h"
12 #include "OSservices.h"
13 #include "KKStr.h"
14 using namespace KKB;
15 
16 
17 
19 
20 #include "ConfusionMatrix2.h"
21 #include "CrossValidation.h"
22 #include "FeatureVector.h"
23 #include "Orderings.h"
26 using namespace KKMLL;
27 
28 
29 
30 CrossValidationMxN::CrossValidationMxN (TrainingConfiguration2Ptr _config,
31  kkuint32 _numOfOrderings,
32  kkuint32 _numOfFolds,
33  FeatureVectorListPtr _data,
34  bool& _cancelFlag,
35  RunLog& _log
36  ):
37 
38  cancelFlag (_cancelFlag),
39  config (_config),
40  fileDesc (_config->FileDesc ()),
41  meanConfusionMatrix (NULL),
42  numOfFolds (_numOfFolds),
43  numOfOrderings (_numOfOrderings),
44  orderings (NULL),
45  weOwnOrderings (false),
46 
47  trainingTimes (),
48  trainingTimeMean (0.0),
49  trainingTimeStdDev (0.0),
50  testTimes (),
51  testTimeMean (0.0),
52  testTimeStdDev (0.0)
53 
54 {
55  CheckFileDescCopasetic (_log);
56  orderings = new Orderings (_data, numOfOrderings, _numOfFolds, _log);
57  weOwnOrderings = true;
58 }
59 
60 
61 
62 
63 CrossValidationMxN::CrossValidationMxN (TrainingConfiguration2Ptr _config,
64  OrderingsPtr _orderings,
65  bool& _cancelFlag,
66  RunLog& _log
67  ):
68 
69  cancelFlag (_cancelFlag),
70  config (_config),
71  fileDesc (_config->FileDesc ()),
72  meanConfusionMatrix (NULL),
73  numOfFolds (_orderings->NumOfFolds ()),
74  numOfOrderings (_orderings->NumOfOrderings ()),
75  orderings (_orderings),
76  weOwnOrderings (false),
77  trainingTimes (),
78  trainingTimeMean (0.0),
79  trainingTimeStdDev (0.0),
80  testTimes (),
81  testTimeMean (0.0),
82  testTimeStdDev (0.0)
83 {
84  CheckFileDescCopasetic (_log);
85 }
86 
87 
88 
89 
91 {
92  CleanUpMemory ();
93 
94  if (weOwnOrderings)
95  delete orderings;
96  orderings = NULL;
97 }
98 
99 
100 
101 void CrossValidationMxN::CheckFileDescCopasetic (RunLog& log)
102 {
103  if (config->FileDesc () != orderings->FileDesc ())
104  {
105  // The Configuration 'fileDesc' is different than the orderings 'FileDesc'.
106  // This is a VERY VERY bad situation. Processing needs to stop NOW NOW NOW.
107  KKStr errMsg = "CrossValidationMxN ***ERROR*** File Description between config and orderings don't match.";
108  throw KKException (errMsg);
109  }
110 } /* CheckFileDescCopesetic */
111 
112 
113 
114 
115 void CrossValidationMxN::CleanUpMemory ()
116 {
117  accuracies.erase (accuracies.begin (), accuracies.end ());
118  supportPoints.erase (supportPoints.begin (), supportPoints.end ());
119  trainingTimes.erase (trainingTimes.begin (), trainingTimes.end ());
120  testTimes.erase (testTimes.begin (), testTimes.end ());
121 
122  delete meanConfusionMatrix;
123  meanConfusionMatrix = NULL;
124 } /* CleanUpMemory */
125 
126 
127 
128 
130 {
131  CleanUpMemory ();
132 
133  meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ()));
134 
135  kkuint32 cvIDX = 0;
136 
137  MLClassListPtr mlClasses = orderings->MLClasses ();
138 
139  for (cvIDX = 0; cvIDX < numOfOrderings; cvIDX++)
140  {
141  FeatureVectorListPtr data = orderings->Ordering (cvIDX);
142 
143  CrossValidationPtr cv = new CrossValidation (config,
144  data,
145  mlClasses,
146  numOfFolds,
147  false,
148  fileDesc,
149  log,
150  cancelFlag
151  );
152 
154 
155  accuracies.push_back (cv->Accuracy ());
156  supportPoints.push_back ((float)cv->NumOfSupportVectors ());
157  trainingTimes.push_back (cv->TrainTimeTotal ());
158  testTimes.push_back (cv->TestTimeTotal ());
159 
160  meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log);
161 
162  delete cv;
163  }
164 
165  CalcMeanAndStdDev (accuracies, accuracyMean, accuracyStdDev);
166  CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev);
167  CalcMeanAndStdDev (trainingTimes, trainingTimeMean, trainingTimeStdDev);
168  CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev);
169 
170  double factor = 1.0 / (double)numOfOrderings;
171 
172  meanConfusionMatrix->FactorCounts (factor);
173 } /* RunValidations */
174 
175 
176 
177 
178 /**
179  * @brief Will run M number of Train then Test passes.
180  *
181  * @param[in] numExamplsToUseForTraining The number examples in each ordering(group)
182  * that are to be used for training, the remaining examples will be
183  * used as test data.
184  */
185 void CrossValidationMxN::RunTrainAndTest (kkint32 numExamplsToUseForTraining,
186  RunLog& log
187  )
188 {
189  CleanUpMemory ();
190 
191  meanConfusionMatrix = new ConfusionMatrix2 (*(orderings->MLClasses ()));
192 
193  kkuint32 cvIDX = 0;
194 
195  MLClassListPtr mlClasses = orderings->MLClasses ();
196 
197  for (cvIDX = 0; cvIDX < numOfOrderings; cvIDX++)
198  {
199  FeatureVectorListPtr data = orderings->Ordering (cvIDX);
200 
201  FeatureVectorList trainingData (fileDesc, false);
202  FeatureVectorList testData (fileDesc, false);
203 
204  FeatureVectorList::iterator fvIDX;
205 
206  for (fvIDX = data->begin (); fvIDX != data->end (); fvIDX++)
207  {
208  FeatureVectorPtr example = *fvIDX;
209 
210  if (trainingData.QueueSize () < numExamplsToUseForTraining)
211  trainingData.PushOnBack (example);
212  else
213  testData.PushOnBack (example);
214  }
215 
216  CrossValidationPtr cv = new CrossValidation (config,
217  &trainingData,
218  mlClasses,
219  numOfFolds,
220  false,
221  fileDesc,
222  log,
223  cancelFlag
224  );
225 
226  cv->RunValidationOnly (&testData,
227  NULL, // No McNemars test going to be performed.
228  log
229  );
230 
231  accuracies.push_back (cv->Accuracy ());
232  supportPoints.push_back ((float)cv->NumOfSupportVectors ());
233  trainingTimes.push_back (cv->TrainTimeTotal ());
234  testTimes.push_back (cv->TestTimeTotal ());
235 
236  meanConfusionMatrix->AddIn (*(cv->ConfussionMatrix ()), log);
237 
238  delete cv;
239  }
240 
241  CalcMeanAndStdDev (accuracies, accuracyMean, accuracyStdDev);
242  CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev);
243  CalcMeanAndStdDev (trainingTimes, trainingTimeMean, trainingTimeStdDev);
244  CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev);
245 
246  double factor = 1.0 / (double)numOfOrderings;
247 
248  meanConfusionMatrix->FactorCounts (factor);
249 } /* RunTrainAndTest */
250 
251 
252 
253 const
254 ConfusionMatrix2Ptr CrossValidationMxN::ConfussionMatrix () const
255 {
256  return meanConfusionMatrix;
257 } /* ConfussionMatrix */
Used to maintain multiple orderings of a single list of FeatureVector objects.
Definition: Orderings.h:58
kkuint32 NumOfFolds() const
Definition: Orderings.h:168
Orderings(const FeatureVectorListPtr _data, kkuint32 _numOfOrderings, kkuint32 _numOfFolds, RunLog &_log)
Constructs Orderings object from ImageFeatursList object.
Definition: Orderings.cpp:25
__int32 kkint32
Definition: KKBaseTypes.h:88
const ConfusionMatrix2Ptr ConfussionMatrix() const
const ConfusionMatrix2Ptr ConfussionMatrix() const
kkuint32 NumOfOrderings() const
Definition: Orderings.h:169
const MLClassListPtr MLClasses() const
Definition: Orderings.h:166
A class that is meant to manage a n-Fold Cross Validation.
void RunCrossValidation(RunLog &log)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
Container class for FeatureVector derived objects.
KKTHread * KKTHreadPtr
CrossValidationMxN(TrainingConfiguration2Ptr _comfig, OrderingsPtr _data, bool &_cancelFlag, RunLog &_log)
void FactorCounts(double factor)
void AddIn(const ConfusionMatrix2 &cm, RunLog &log)
ConfusionMatrix2(const MLClassList &_classes)
const FeatureVectorListPtr Ordering(kkuint32 orderingIdx) const
Definition: Orderings.cpp:614
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
CrossValidationMxN(TrainingConfiguration2Ptr _comfig, kkuint32 _numOfOrderings, kkuint32 _numOfFolds, FeatureVectorListPtr _data, bool &_cancelFlag, RunLog &_log)
void RunTrainAndTest(kkint32 numExamplsToUseForTraining, RunLog &log)
Will run M number of Train then Test passes.
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
Orderings * OrderingsPtr
KKException(const KKStr &_exceptionStr)
Definition: KKException.cpp:45
CrossValidation(TrainingConfiguration2Ptr _config, FeatureVectorListPtr _examples, MLClassListPtr _mlClasses, kkint32 _numOfFolds, bool _featuresAreAlreadyNormalized, FileDescPtr _fileDesc, RunLog &_log, bool &_cancelFlag)
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
void RunValidationOnly(FeatureVectorListPtr validationData, bool *classedCorrectly, RunLog &log)
const FileDescPtr FileDesc() const
Definition: Orderings.h:165