KSquare Utilities
CrossValidationVoting.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <iomanip>
4 #include <string>
5 #include <iostream>
6 #include <fstream>
7 #include <vector>
8 #include "MemoryDebug.h"
9 using namespace std;
10 
11 
12 #include "KKBaseTypes.h"
13 #include "OSservices.h"
14 #include "RunLog.h"
15 using namespace KKB;
16 
17 
18 
20 #include "Classifier2.h"
21 #include "ConfusionMatrix2.h"
22 #include "FileDesc.h"
23 #include "MLClass.h"
24 #include "FeatureVector.h"
26 #include "TrainingProcess2.h"
27 using namespace KKMLL;
28 
29 
30 
31 CrossValidationVoting::CrossValidationVoting (TrainingConfiguration2ListPtr _configs,
32  FeatureVectorListPtr _examples,
33  MLClassListPtr _mlClasses,
34  kkint32 _numOfFolds,
35  bool _featuresAreAlreadyNormalized,
36  FileDescPtr _fileDesc
37  ):
38 
39  configs (_configs),
40  featuresAreAlreadyNormalized (_featuresAreAlreadyNormalized),
41  fileDesc (_fileDesc),
42  foldAccuracies (NULL),
43  foldCounts (NULL),
44  confusionMatrix (NULL),
45  cmByNumOfConflicts (NULL),
46  examples (NULL),
47  mlClasses (_mlClasses),
48  examplesPerClass (0),
49  maxNumOfConflicts (0),
50  numOfFolds (_numOfFolds),
51  numOfSupportVectors (0),
52  numOfWinnersCounts (NULL),
53  numOfWinnersCorrects (NULL),
54  numOfWinnersOneOfTheWinners (NULL),
55  classificationTime (0.0),
56  trainingTime (0.0)
57 
58 {
59  examples = _examples->ExtractExamplesForClassList (mlClasses);
60  examplesPerClass = 999999;
61 }
62 
63 
64 
66 {
67  DeleteAllocatedMemory ();
68  delete examples; examples = NULL;
69 }
70 
71 
72 
73 
74 void CrossValidationVoting::AllocateMemory (RunLog& log)
75 {
76  maxNumOfConflicts = mlClasses->QueueSize () + 1;
77 
78  confusionMatrix = new ConfusionMatrix2 (*mlClasses);
79  cmByNumOfConflicts = new ConfusionMatrix2Ptr[maxNumOfConflicts];
80 
81  numOfWinnersCounts = new kkint32[maxNumOfConflicts];
82  numOfWinnersCorrects = new kkint32[maxNumOfConflicts];
83  numOfWinnersOneOfTheWinners = new kkint32[maxNumOfConflicts];
84 
85  kkint32 conflictIDX;
86 
87  for (conflictIDX = 0; conflictIDX < maxNumOfConflicts; conflictIDX++)
88  {
89  cmByNumOfConflicts [conflictIDX] = new ConfusionMatrix2 (*mlClasses);
90  numOfWinnersCounts [conflictIDX] = 0;
91  numOfWinnersCorrects [conflictIDX] = 0;
92  numOfWinnersOneOfTheWinners [conflictIDX] = 0;
93  }
94 
95  foldAccuracies = new float[numOfFolds];
96  foldCounts = new kkint32[numOfFolds];
97 
98  kkint32 foldNum;
99 
100  for (foldNum = 0; foldNum < numOfFolds; foldNum++)
101  {
102  foldAccuracies [foldNum] = 0.0;
103  foldCounts [foldNum] = 0;
104  }
105 } /* AllocateMemory */
106 
107 
108 
109 
110 void CrossValidationVoting::DeleteAllocatedMemory ()
111 {
112  maxNumOfConflicts = mlClasses->QueueSize () + 1;
113  if (confusionMatrix)
114  {
115  delete confusionMatrix;
116  confusionMatrix = NULL;
117  }
118 
119  kkint32 conflictIDX;
120 
121  if (cmByNumOfConflicts)
122  {
123  for (conflictIDX = 0; conflictIDX < maxNumOfConflicts; conflictIDX++)
124  {
125  delete cmByNumOfConflicts[conflictIDX];
126  cmByNumOfConflicts[conflictIDX] = NULL;
127  }
128 
129  delete cmByNumOfConflicts;
130  cmByNumOfConflicts = NULL;
131  }
132 
133  delete foldAccuracies; foldAccuracies = NULL;
134  delete foldCounts; foldCounts = NULL;
135 
136  delete numOfWinnersCounts; numOfWinnersCounts = NULL;
137  delete numOfWinnersCorrects; numOfWinnersCorrects = NULL;
138  delete numOfWinnersOneOfTheWinners; numOfWinnersOneOfTheWinners = NULL;
139 
140 } /* DeleteAllocatedMemory */
141 
142 
143 
145 {
146  log.Level (10) << "CrossValidationVoting::RunCrossValidationVoting" << endl;
147 
148  if (numOfFolds < 1)
149  {
150  log.Level (-1) << endl
151  << endl
152  << "CrossValidationVoting::RunCrossValidationVoting **** ERROR ****" << endl
153  << endl
154  << " Invalid numOfFolds[" << numOfFolds << "]." << endl
155  << endl;
156  return;
157  }
158 
159  DeleteAllocatedMemory ();
160  AllocateMemory (log);
161 
162  kkint32 imageCount = examples->QueueSize ();
163  kkint32 numImagesPerFold = (imageCount + numOfFolds - 1) / numOfFolds;
164  kkint32 firstInGroup = 0;
165 
166  kkint32 foldNum;
167 
168  for (foldNum = 0; foldNum < numOfFolds; foldNum++)
169  {
170  kkint32 lastInGroup;
171 
172  // If We are doing the last Fold Make sure that we are including all the examples
173  // that have not been tested.
174  if (foldNum == (numOfFolds - 1))
175  lastInGroup = imageCount;
176  else
177  lastInGroup = firstInGroup + numImagesPerFold - 1;
178 
179 
180  cout << "Fold [" << (foldNum + 1) << "] of [" << numOfFolds << "]" << endl;
181 
182  FeatureVectorListPtr trainingExamples = examples->ManufactureEmptyList (true);
183  FeatureVectorListPtr testImages = examples->ManufactureEmptyList (true);
184 
185  log.Level (30) << "Fold Num[" << foldNum << "] "
186  << "FirstTestImage[" << firstInGroup << "] "
187  << "LastInGroup[" << lastInGroup << "]."
188  << endl;
189 
190  for (kkint32 x = 0; x < imageCount; x++)
191  {
192  FeatureVectorPtr newImage = examples->IdxToPtr (x)->Duplicate ();
193  if ((x >= firstInGroup) && (x <= lastInGroup))
194  testImages->PushOnBack (newImage);
195  else
196  trainingExamples->PushOnBack (newImage);
197  }
198 
199  cout << "Number Of Training Images : " << trainingExamples->QueueSize () << endl;
200  cout << "Number Of Test Images : " << testImages->QueueSize () << endl;
201 
202  CrossValidate (testImages, trainingExamples, foldNum, NULL, log);
203 
204  delete trainingExamples; trainingExamples = NULL;
205  delete testImages; testImages = NULL;
206  firstInGroup = firstInGroup + numImagesPerFold;
207  }
208 } /* RunCrossValidationVoting */
209 
210 
211 
212 
213 
214 void CrossValidationVoting::RunValidationOnly (FeatureVectorListPtr validationData,
215  bool* classedCorrectly,
216  RunLog& log
217  )
218 {
219  log.Level (10) << "CrossValidationVoting::RunValidationOnly" << endl;
220  DeleteAllocatedMemory ();
221  AllocateMemory (log);
222 
223  // We need to get a duplicate copy of each image data because the trainer and classifier
224  // will normalize the data.
225  FeatureVectorListPtr trainingExamples = examples->DuplicateListAndContents ();
226  FeatureVectorListPtr testImages = validationData->DuplicateListAndContents ();
227 
228  CrossValidate (testImages, trainingExamples, 0, classedCorrectly, log);
229 
230  delete trainingExamples;
231  delete testImages;
232 } /* RunValidationOnly */
233 
234 
235 
236 
237 void CrossValidationVoting::CrossValidate (FeatureVectorListPtr testImages,
238  FeatureVectorListPtr trainingExamples,
239  kkint32 foldNum,
240  bool* classedCorrectly,
241  RunLog& log
242  )
243 {
244  log.Level (20) << "CrossValidationVoting::CrossValidate FoldNum[" << foldNum << "]." << endl;
245 
246  kkint32 numOfClasses = mlClasses->QueueSize ();
247 
248  bool cancelFlag = false;
249  KKStr statusMessage;
250 
251  vector<TrainingProcess2Ptr> trainers;
252  vector<Classifier2Ptr> classifiers;
253 
254  kkint32 idx;
255  for (idx = 0; idx < configs->QueueSize (); idx++)
256  {
257  TrainingConfiguration2Ptr config = configs->IdxToPtr (idx);
258 
259  TrainingProcess2Ptr trainer =
261  (config,
262  trainingExamples,
263  false,
264  featuresAreAlreadyNormalized,
265  cancelFlag,
266  log
267  );
268 
269  trainingTime += trainer->TrainingTime ();
270  numOfSupportVectors += trainer->NumOfSupportVectors ();
271 
272  log.Level (20) << "CrossValidate Creating Classification Object" << endl;
273 
274  Classifier2Ptr classifier = new Classifier2 (trainer, log);
275 
276  trainers.push_back (trainer);
277  classifiers.push_back (classifier);
278  }
279 
280  {
281  // Force the creation of a noise class
282  MLClassPtr noiseMLClass = mlClasses->GetNoiseClass ();
283  noiseMLClass = NULL;
284  }
285 
286  FeatureVectorList::iterator imageIDX = testImages->begin ();
287 
288  double probability;
289 
290  kkint32 foldCorrect = 0;
291 
292  kkint32 foldCount = 0;
293 
294  log.Level (20) << "CrossValidate Classifying Test Images." << endl;
295 
296  double startClassificationTime = osGetSystemTimeUsed ();
297 
298  for (imageIDX = testImages->begin (); imageIDX != testImages->end (); imageIDX++)
299  {
300  MLClassPtr knownClass = (*imageIDX)->MLClass ();
301 
302  kkint32 numOfWinners = 0;
303  bool knownClassOneOfTheWinners = false;
304  MLClassPtr predictedClass = NULL;
305  double breakTie = 0.0;
306 
307  Classifier2Ptr classifier = NULL;
308 
309  vector<kkint32> voteTable (numOfClasses, 0);
310  vector<double> probTable (numOfClasses, 0.0f);
311 
312  for (idx = 0; idx < (kkint32)classifiers.size (); idx++)
313  {
314  classifier = classifiers[idx];
315  predictedClass = classifier->ClassifyAExample (*(*imageIDX),
316  probability,
317  numOfWinners,
318  knownClassOneOfTheWinners,
319  breakTie
320  );
321  kkint32 predictedIdx = mlClasses->PtrToIdx (predictedClass);
322 
323  if ((predictedIdx < 0) || (predictedIdx >= mlClasses->QueueSize ()))
324  {
325  // We are screwed, don't know what class was predicted.
326  log.Level (-1) << endl
327  << endl
328  << "CrossValidationVoting::CrossValidate *** ERROR ***" << endl
329  << endl
330  << "UnKnown Class was returned[" << predictedClass->Name () << "]" << endl
331  << "predictedIdx[" << predictedIdx << "]" << endl
332  << endl;
333  osWaitForEnter ();
334  exit (-1);
335  }
336 
337  voteTable[predictedIdx]++;
338 
339  if (probTable[predictedIdx] == 0.0f)
340  probTable[predictedIdx] = probability;
341  else
342  probTable[predictedIdx] *= probability;
343  }
344 
345  {
346  // Normalize Probability
347  kkint32 x = 0;
348  double probTotal = 0.0f;
349  for (x = 0; x < numOfClasses; x++)
350  probTotal += probTable[x];
351 
352  for (x = 0; x < numOfClasses; x++)
353  probTable[x] = probTable[x] / probTotal;
354  }
355 
356 
357  {
358  // Determine winning vote
359  kkint32 highVote = 0;
360  numOfWinners = 0;
361  kkint32 idxWithHighVote = -1;
362  kkint32 winnerIdx = -1;
363  kkint32 x;
364 
365  for (x = 0; x < numOfClasses; x++)
366  {
367  if (voteTable[x] > highVote)
368  {
369  highVote = voteTable[x];
370  numOfWinners = 1;
371  idxWithHighVote = x;
372  winnerIdx = x;
373  }
374  else if (voteTable[x] == highVote)
375  {
376  numOfWinners++;
377  }
378  }
379 
380  if (numOfWinners > 1)
381  {
382  // Select winner by high probability
383  double highProbability = 0.0f;
384 
385  for (x = 0; x < numOfClasses; x++)
386  {
387  if (voteTable[x] >= highVote)
388  {
389  if (probTable[x] > highProbability)
390  {
391  highProbability = probTable[x];
392  winnerIdx = x;
393  }
394  }
395  }
396  }
397 
398  predictedClass = mlClasses->IdxToPtr (winnerIdx);
399  probability = probTable[winnerIdx];
400  }
401 
402  confusionMatrix->Increment (knownClass,
403  predictedClass,
404  (kkint32)(*imageIDX)->OrigSize (),
405  probability,
406  log
407  );
408 
409  cmByNumOfConflicts[numOfWinners]->Increment (knownClass,
410  predictedClass,
411  (kkint32)(*imageIDX)->OrigSize (),
412  probability,
413  log
414  );
415 
416  bool correctClassificationMade = false;
417  numOfWinnersCounts[numOfWinners]++;
418  if (knownClass->UpperName () == predictedClass->UpperName ())
419  {
420  correctClassificationMade = true;
421  numOfWinnersCorrects[numOfWinners]++;
422  foldCorrect++;
423  }
424 
425  if (classedCorrectly)
426  {
427  classedCorrectly[foldCount] = correctClassificationMade;
428  }
429 
430  if (knownClassOneOfTheWinners)
431  numOfWinnersOneOfTheWinners[numOfWinners]++;
432 
433  log.Level (50) << "CrossValidate - Known Class[" << knownClass->Name () << "] "
434  << "Predicted Class[" << predictedClass->Name () << "]."
435  << endl;
436 
437  foldCount++;
438  }
439 
440  double endClassificationTime = osGetSystemTimeUsed ();
441  classificationTime += (endClassificationTime - startClassificationTime);
442 
443  float foldAccuracy = 0.0;
444 
445  if (foldCount > 0)
446  foldAccuracy = 100.0f * (float)foldCorrect / (float)foldCount;
447 
448  foldAccuracies [foldNum] = foldAccuracy;
449  foldCounts [foldNum] = foldCount;
450 
451 
452  for (idx = 0; idx < (kkint32)trainers.size (); idx++)
453  {delete trainers[idx]; trainers[idx] = NULL;}
454 
455  for (idx = 0; idx < (kkint32)classifiers.size (); idx++)
456  {delete classifiers[idx]; classifiers[idx] = NULL;}
457 
458  log.Level (20) << "CrossValidationVoting::CrossValidate - Done." << endl;
459 } /* CrossValidate */
460 
461 
462 
463 
464 
465 
466 
468 {
469  if (confusionMatrix)
470  return (float)confusionMatrix->Accuracy ();
471  else
472  return 0.0;
473 } /* Accuracy */
474 
475 
476 
477 
479 {
480  KKStr foldAccuracyStr (9 * numOfFolds); // Pre Reserving enough space for all Accuracies.
481 
482  for (kkint32 foldNum = 0; foldNum < numOfFolds; foldNum++)
483  {
484  if (foldNum > 0)
485  foldAccuracyStr << "\t";
486  foldAccuracyStr << StrFormatDouble (foldAccuracies[foldNum], "ZZ,ZZ0.00%");
487  }
488 
489  return foldAccuracyStr;
490 } /* FoldAccuracysToStr */
491 
492 
493 
494 
495 
497 {
498  if (!foldAccuracies)
499  return 0.0f;
500 
501  if ((foldNum < 0) || (foldNum >= numOfFolds))
502  return 0.0f;
503 
504  return foldAccuracies[foldNum];
505 } /* FoldAccuracy */
506 
507 
508 
509 
511 {
512  if ((numOfFolds <= 0) || (!foldAccuracies))
513  return "";
514 
515  kkint32 x;
516 
517  KKStr result (numOfFolds + 10);
518 
519  for (x = 0; x < numOfFolds; x++)
520  {
521  if (x > 0)
522  result << ",";
523  result << foldAccuracies[x];
524  }
525 
526  return result;
527 } /* FoldStr */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
__int32 kkint32
Definition: KKBaseTypes.h:88
MLClassPtr GetNoiseClass() const
Definition: MLClass.cpp:875
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
virtual FeatureVectorListPtr ManufactureEmptyList(bool _owner) const
Creates an instance of a Empty FeatureVectorList.
kkint32 NumOfSupportVectors() const
KKTHread * KKTHreadPtr
static TrainingProcess2Ptr CreateTrainingProcessFromTrainingExamples(TrainingConfiguration2Const *config, FeatureVectorListPtr trainingExamples, bool takeOwnershipOfTrainingExamples, bool featuresAlreadyNormalized, VolConstBool &cancelFlag, RunLog &log)
Will Construct an instance using provided list of examples rather than loading from training library...
ConfusionMatrix2(const MLClassList &_classes)
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
Classifier2(TrainingProcess2Ptr _trainer, RunLog &_log)
Definition: Classifier2.cpp:42
TrainingProcess2 * TrainingProcess2Ptr
Definition: Classifier2.h:62
bool operator==(const KKStr &right) const
Definition: KKStr.cpp:1550
const KKStr & UpperName() const
Definition: MLClass.h:155
void RunValidationOnly(FeatureVectorListPtr validationData, bool *classedCorrectly, RunLog &log)
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
double osGetSystemTimeUsed()
Returns the number of CPU seconds used by current process.
CrossValidationVoting(TrainingConfiguration2ListPtr _configs, FeatureVectorListPtr _examples, MLClassListPtr _mlClasses, kkint32 _numOfFolds, bool _featuresAreAlreadyNormalized, FileDescPtr _fileDesc)
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
FeatureVectorListPtr ExtractExamplesForClassList(MLClassListPtr classes)
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
Classifier2 * Classifier2Ptr
Definition: Classifier2.h:284
float FoldAccuracy(kkint32 foldNum) const