28 using namespace KKMLL;
33 FeatureVectorListPtr _examples,
34 MLClassListPtr _mlClasses,
36 bool _featuresAreAlreadyNormalized,
37 FileDescPtr _fileDesc,
42 cancelFlag (_cancelFlag),
44 duplicateTrainDataCount (0),
45 featuresAreAlreadyNormalized (_featuresAreAlreadyNormalized),
49 fvProducerFactory (NULL),
50 confusionMatrix (NULL),
51 cmByNumOfConflicts (NULL),
53 mlClasses (_mlClasses),
55 maxNumOfConflicts (0),
56 numOfFolds (_numOfFolds),
59 numOfWinnersCounts (NULL),
60 numOfWinnersCorrects (NULL),
61 numOfWinnersOneOfTheWinners (NULL),
66 accuracyStdDev (0.0f),
71 supportPointsMean (0.0f),
72 supportPointsStdDev (0.0f),
80 trainTimeStdDev (0.0),
83 weOwnConfusionMatrix (
false)
98 DeleteAllocatedMemory ();
99 delete examples; examples = NULL;
107 maxNumOfConflicts = mlClasses->QueueSize () + 1;
109 weOwnConfusionMatrix =
true;
110 cmByNumOfConflicts =
new ConfusionMatrix2Ptr[maxNumOfConflicts];
112 numOfWinnersCounts =
new kkint32[maxNumOfConflicts];
113 numOfWinnersCorrects =
new kkint32[maxNumOfConflicts];
114 numOfWinnersOneOfTheWinners =
new kkint32[maxNumOfConflicts];
118 for (conflictIDX = 0; conflictIDX < maxNumOfConflicts; conflictIDX++)
121 numOfWinnersCounts [conflictIDX] = 0;
122 numOfWinnersCorrects [conflictIDX] = 0;
123 numOfWinnersOneOfTheWinners [conflictIDX] = 0;
135 if (weOwnConfusionMatrix)
137 delete confusionMatrix;
138 confusionMatrix = NULL;
143 if (cmByNumOfConflicts)
145 for (conflictIDX = 0; conflictIDX < maxNumOfConflicts; conflictIDX++)
147 delete cmByNumOfConflicts[conflictIDX];
148 cmByNumOfConflicts[conflictIDX] = NULL;
151 delete cmByNumOfConflicts;
152 cmByNumOfConflicts = NULL;
158 delete numOfWinnersCounts; numOfWinnersCounts = NULL;
159 delete numOfWinnersCorrects; numOfWinnersCorrects = NULL;
160 delete numOfWinnersOneOfTheWinners; numOfWinnersOneOfTheWinners = NULL;
168 log.Level (10) <<
"CrossValidation::RunCrossValidation numOfFolds[" << numOfFolds <<
"]" << endl;
172 log.Level (-1) << endl
173 <<
"CrossValidation::RunCrossValidation **** ERROR ****" << endl
175 <<
" Invalid numOfFolds[" << numOfFolds <<
"]." << endl
180 DeleteAllocatedMemory ();
183 kkint32 imageCount = examples->QueueSize ();
184 kkint32 numImagesPerFold = (imageCount + numOfFolds - 1) / numOfFolds;
192 for (foldNum = 0; foldNum < numOfFolds; foldNum++)
198 if (foldNum == (numOfFolds - 1))
199 lastInGroup = imageCount;
201 lastInGroup = firstInGroup + numImagesPerFold - 1;
204 log.Level (20) <<
"Fold [" << (foldNum + 1) <<
"] of [" << numOfFolds <<
"]" << endl;
209 log.Level (30) <<
"Fold Num[" << foldNum <<
"] " 210 <<
"FirstTestImage[" << firstInGroup <<
"] " 211 <<
"LastInGroup[" << lastInGroup <<
"]." 214 for (
kkint32 x = 0; (x < imageCount) && (!cancelFlag); x++)
217 if ((x >= firstInGroup) && (x <= lastInGroup))
227 log.Level (20) <<
"Number Of Training Images : " << trainingExamples->QueueSize () << endl;
228 log.Level (20) <<
"Number Of Test Images : " << testImages->QueueSize () << endl;
233 CrossValidate (testImages, trainingExamples, foldNum, NULL, log);
235 delete trainingExamples; trainingExamples = NULL;
236 delete testImages; testImages = NULL;
238 firstInGroup = firstInGroup + numImagesPerFold;
243 avgPredProb = totalPredProb / imageCount;
245 CalcMeanAndStdDev (foldAccuracies, accuracyMean, accuracyStdDev);
246 CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev);
247 CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev);
248 CalcMeanAndStdDev (trainTimes, trainTimeMean, trainTimeStdDev);
257 bool* classedCorrectly,
261 log.Level (10) <<
"CrossValidation::RunValidationOnly" << endl;
262 DeleteAllocatedMemory ();
272 CrossValidate (testImages, trainingExamples, 0, classedCorrectly, log);
274 if (testImages->QueueSize () > 0)
275 avgPredProb = totalPredProb / testImages->QueueSize ();
279 delete trainingExamples; trainingExamples = NULL;
280 delete testImages; testImages = NULL;
285 CalcMeanAndStdDev (foldAccuracies, accuracyMean, accuracyStdDev);
286 CalcMeanAndStdDev (supportPoints, supportPointsMean, supportPointsStdDev);
287 CalcMeanAndStdDev (testTimes, testTimeMean, testTimeStdDev);
288 CalcMeanAndStdDev (trainTimes, trainTimeMean, trainTimeStdDev);
296 FeatureVectorListPtr trainingExamples,
298 bool* classedCorrectly,
302 log.Level (20) <<
"CrossValidation::CrossValidate FoldNum[" << foldNum <<
"]." << endl;
304 bool cancelFlag =
false;
312 featuresAreAlreadyNormalized
, 325 numSVs += foldNumSVs;
326 totalNumSVs += foldTotalNumSVs;
328 log.Level (20) <<
"CrossValidate Creating Classification Object" << endl;
336 log.Level (20) <<
"CrossValidate Classifying Test Images." << endl;
338 double breakTie = 0.0f;
340 MLClassPtr knownClass = NULL;
341 bool knownClassOneOfTheWinners =
false;
343 MLClassPtr predictedClass = NULL;
344 double probability = 0.0f;
346 kkint32 numTestExamples = testImages->QueueSize ();
351 vector<FeatureVectorPtr> exampleHist (numTestExamples);
352 vector<MLClassPtr> knownClassHist (numTestExamples);
353 vector<
bool> knownClassOneOfTheWinnersHist (numTestExamples,
false);
354 vector<kkint32> numOfWinersHist (numTestExamples, 0);
355 vector<MLClassPtr> predictedClassHist (numTestExamples);
356 vector<
double> probabilityHist (numTestExamples, 0.0f);
358 FeatureVectorList::iterator fvIDX;
362 for (fvIDX = testImages->begin (); (fvIDX != testImages->end ()) && (!cancelFlag); fvIDX++)
371 knownClassOneOfTheWinners
, 375 exampleHist [foldCount] = example;
376 knownClassHist [foldCount] = knownClass;
377 predictedClassHist [foldCount] = predictedClass;
378 probabilityHist [foldCount] = probability;
379 numOfWinersHist [foldCount] = numOfWinners;
380 knownClassOneOfTheWinnersHist [foldCount] = knownClassOneOfTheWinners;
386 double testTimeThisFold = (endClassificationTime - startClassificationTime);
387 testTime += testTimeThisFold;
391 for (foldCount = 0; (foldCount < numTestExamples) && (!cancelFlag); foldCount++)
393 example = exampleHist [foldCount];
394 predictedClass = predictedClassHist [foldCount];
395 probability = probabilityHist [foldCount];
396 numOfWinners = numOfWinersHist [foldCount];
397 knownClass = knownClassHist [foldCount];
398 knownClassOneOfTheWinners = knownClassOneOfTheWinnersHist [foldCount];
401 totalPredProb += probability;
418 bool correctClassificationMade =
false;
419 numOfWinnersCounts[numOfWinners]++;
420 if (knownClass == predictedClass)
422 correctClassificationMade =
true;
423 numOfWinnersCorrects[numOfWinners]++;
427 if (classedCorrectly)
429 classedCorrectly[foldCount] = correctClassificationMade;
432 if (knownClassOneOfTheWinners)
433 numOfWinnersOneOfTheWinners[numOfWinners]++;
436 float foldAccuracy = 0.0;
439 foldAccuracy = 100.0f * (
float)foldCorrect / (
float)foldCount;
441 foldAccuracies.push_back (foldAccuracy);
442 foldCounts.push_back (foldCount);
444 supportPoints.push_back ((
float)trainer->NumOfSupportVectors ());
445 trainTimes.push_back (trainer->TrainingTime ());
446 testTimes.push_back (testTimeThisFold);
451 log.Level (20) <<
"CrossValidation::CrossValidate - Done." << endl;
479 KKStr foldAccuracyStr
(9 * numOfFolds
);
481 for (
kkuint32 foldNum = 0; foldNum < foldAccuracies.size (); foldNum++)
484 foldAccuracyStr <<
"\t";
485 foldAccuracyStr << StrFormatDouble (foldAccuracies[foldNum],
"ZZ,ZZ0.00%");
488 return foldAccuracyStr;
497 if ((foldNum < 0) || (foldNum >= (kkint32)foldAccuracies.size ()))
502 return foldAccuracies[foldNum];
510 weOwnConfusionMatrix =
false;
511 return confusionMatrix;
void SupportVectorStatistics(kkint32 &numSVs, kkint32 &totalNumSVs)
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for 'size' characters.
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
double TrainingTime() const
ConfusionMatrix2Ptr GiveMeOwnershipOfConfusionMatrix()
FeatureVector * FeatureVectorPtr
MLClassPtr GetNoiseClass() const
float OrigSize() const
The value of Feature[0] before normalization.
A class that is meant to manage a n-Fold Cross Validation.
void RunCrossValidation(RunLog &log)
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
virtual FeatureVectorListPtr ManufactureEmptyList(bool _owner) const
Creates an instance of a Empty FeatureVectorList.
unsigned __int32 kkuint32
static TrainingProcess2Ptr CreateTrainingProcessFromTrainingExamples(TrainingConfiguration2Const *config, FeatureVectorListPtr trainingExamples, bool takeOwnershipOfTrainingExamples, bool featuresAlreadyNormalized, VolConstBool &cancelFlag, RunLog &log)
Will Construct an instance using provided list of examples rather than loading from training library...
KKStr FoldAccuracysToStr() const
ConfusionMatrix2(const MLClassList &_classes)
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
Classifier2(TrainingProcess2Ptr _trainer, RunLog &_log)
MLClassPtr MLClass() const
Class that is example is assigned to.
TrainingProcess2 * TrainingProcess2Ptr
kkint32 DuplicateDataCount() const
double osGetSystemTimeUsed()
Returns the number of CPU seconds used by current process.
void Increment(MLClassPtr _knownClass, MLClassPtr _predClass, kkint32 _size, double _probability, RunLog &_log)
MLClassPtr ClassifyAExample(FeatureVector &example, double &probability, kkint32 &numOfWinners, bool &knownClassOneOfTheWinners, double &breakTie)
float FoldAccuracy(kkint32 foldNum) const
kkint32 ExamplesPerClass() const
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
CrossValidation(TrainingConfiguration2Ptr _config, FeatureVectorListPtr _examples, MLClassListPtr _mlClasses, kkint32 _numOfFolds, bool _featuresAreAlreadyNormalized, FileDescPtr _fileDesc, RunLog &_log, bool &_cancelFlag)
FeatureVectorListPtr ExtractExamplesForClassList(MLClassListPtr classes)
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
void RunValidationOnly(FeatureVectorListPtr validationData, bool *classedCorrectly, RunLog &log)
FactoryFVProducerPtr FvFactoryProducer(RunLog &log) const