26 using namespace KKMLL;
38 predictedClass (NULL),
53 breakTie (_example.breakTie),
54 mlClass (_example.mlClass),
55 exampleFileName
(_example.exampleFileName
),
57 origSize (_example.origSize),
58 predictedClass (_example.predictedClass),
59 probability (_example.probability),
60 trainWeight (_example.trainWeight),
61 validated (_example.validated),
89 return memoryConsumedEstimated;
104 if (newNumOfFeatures < 1)
107 errMsg <<
"FeatureVector::ResetNumOfFeatures ***ERROR*** NewNumOfFeatures[" << newNumOfFeatures <<
"] is invalid.";
108 cerr << endl << errMsg << endl << endl;
113 float* newFeatureData =
new float[newNumOfFeatures];
114 for (x = 0; x < newNumOfFeatures; x++)
119 newFeatureData[x] = 0.0f;
150 <<
"*** ERROR *** FeatureVector::FeatureData(" << featureNum <<
") Index out of bounds." << endl
169 <<
"*** ERROR *** FeatureVector::FeatureData(" << _featureNum <<
") Index out of bounds, no value set." << endl
182 float totalOfFeatureData = 0.0f;
185 return totalOfFeatureData;
195 KKStr imageFeaturesClassNameInvalid =
"*INVALID*";
200 <<
"FeatureVector::ClassName *** ERROR ***" << endl
202 <<
" Attempt to get ClassName, But not pointing to valid MLClass Object" << endl
204 return imageFeaturesClassNameInvalid;
223 errMsg <<
"FeatureVector::AddFeatureData ***ERROR*** FeatureNum[" << _featureNum <<
"] exceeds maximum allowed Feature Number["<<
numOfFeatures <<
"].";
224 cerr << endl << errMsg << endl << endl;
238 for (featureNum = 0; featureNum <
numOfFeatures; featureNum++)
309 fileDesc (_fileDesc),
316 KKStr errMsg =
"FeatureVectorList::FeatureVectorList *** ERROR *** FileDesc == NULL";
317 cerr << endl << errMsg << endl <<endl;
328 KKQueue<FeatureVector> (examples),
331 fileDesc (examples.fileDesc),
332 fileName
(examples.fileName
),
333 numOfFeatures (examples.numOfFeatures),
334 version (examples.version)
353 fileDesc (examples.fileDesc),
354 fileName
(examples.fileName
),
355 numOfFeatures (examples.numOfFeatures),
356 version (examples.version)
378 fileDesc (_examples.fileDesc),
379 fileName
(_examples.fileName
),
380 numOfFeatures (_examples.numOfFeatures),
381 version (_examples.version)
384 for (
auto idx: _examples)
386 if (classIdx->GetClassIndex (idx->MLClass ()) >= 0)
404 FeatureVectorList::const_iterator idx;
405 for (idx = begin (); idx != end (); ++idx)
407 FeatureVectorPtr fv = *idx;
410 return memoryConsumedEstimated;
447 log.Level (50) <<
"FeatureVectorList::RemoveEntriesWithMissingFeatures" << endl;
449 vector<FeatureVectorPtr> entriesToBeDeleted;
451 for (iterator idx = begin (); idx != end (); idx++)
453 FeatureVectorPtr example = *idx;
454 if (example->MissingData ())
455 entriesToBeDeleted.push_back (example);
458 for (
kkint32 x = 0; x < (kkint32)entriesToBeDeleted.size (); x++)
460 FeatureVectorPtr example = entriesToBeDeleted[x];
461 DeleteEntry (example);
479 msg <<
"FeatureVectorList::" << funcName <<
" *** ERROR *** 'fileDesc == NULL'";
486 msg <<
"FeatureVectorList::" << funcName <<
" *** ERROR *** FeatureNum[" << fieldNum <<
"] is out of range.";
502 ValidateFileDescAndFieldNum (featureNum,
"FieldName");
511 ValidateFileDescAndFieldNum (featureNum,
"FeatureType");
520 ValidateFileDescAndFieldNum (featureNum,
"FeatureTypeStr");
528 ValidateFileDescAndFieldNum (featureNum,
"FeatureCardinality");
537 ValidateFileDescAndFieldNum (0,
"CreateAttributeTypeTable");
546 ValidateFileDescAndFieldNum (0,
"CreateCardinalityTable");
563 numOfFeatures = newNumOfFeatures;
565 for (iterator idx = begin (); idx != end (); idx++)
567 FeatureVectorPtr i = *idx;
579 KKStr errMsg =
"FeatureVector::ResetFileDesc ***ERROR*** newFileDesc == NULL.";
580 cerr << endl << errMsg << endl << endl;
584 fileDesc = newFileDesc;
588 for (iterator idx = begin (); idx != end (); idx++)
590 FeatureVectorPtr i = *idx;
603 errMsg <<
"FeatureVectorList::PushOnBack ***ERROR*** Mismatch numOfFeatures[" << numOfFeatures <<
"] example->NumOfFeaturess[" << example
->NumOfFeatures () <<
"]";
604 cerr << endl << errMsg << endl << endl;
608 KKQueue<FeatureVector>::PushOnBack (example);
620 errMsg <<
"FeatureVectorList::PushOnFront ***ERROR*** Mismatch numOfFeatures[" << numOfFeatures <<
"] example->NumOfFeaturess[" << example
->NumOfFeatures () <<
"]";
621 cerr << endl << errMsg << endl << endl;
625 KKQueue<FeatureVector>::PushOnFront (example);
635 map<MLClassPtr,MLClassPtr> ptrIndex;
636 map<MLClassPtr,MLClassPtr>::iterator ptrIndexItr;
637 FeatureVectorList::const_iterator idx;
638 for (idx = begin (); idx != end (); ++idx)
640 FeatureVectorPtr example = *idx;
642 if (newClass == lastClass)
645 lastClass = newClass;
646 ptrIndexItr = ptrIndex.find (newClass);
647 if (ptrIndexItr == ptrIndex.end ())
649 lastClass = newClass;
650 ptrIndex.insert (pair<MLClassPtr,MLClassPtr> (newClass, newClass));
655 for (ptrIndexItr = ptrIndex.begin (); ptrIndexItr != ptrIndex.end (); ++ptrIndexItr)
656 classes->PushOnBack (ptrIndexItr->first);
687 KKStr errMsg =
"FeatureVectorList::AddQueue ***ERROR*** 'examplesToAdd' has different 'NumOfFeatures'.";
688 errMsg << endl <<
" numOfFeatures [" << numOfFeatures <<
"]" << endl
689 <<
" examplesToAdd.numOfFeatures[" << examplesToAdd.numOfFeatures <<
"]";
690 cerr << endl << errMsg << endl << endl;
694 if (QueueSize () < 1)
698 Version (examplesToAdd.Version ());
704 if (examplesToAdd.Version () != Version ())
708 KKQueue<FeatureVector>::AddQueue (examplesToAdd);
717 FeatureVectorList::const_iterator idx;
718 for (idx = begin (); idx != end (); idx++)
720 if ((*idx)->MLClass () == c)
733 FeatureVectorList::const_iterator idx;
734 for (idx = begin (); idx != end (); idx++)
736 const FeatureVectorPtr fv = *idx;
757 FeatureVectorPtr example;
760 if (_maxToExtract < 1)
761 _maxToExtract = QueueSize ();
767 if (!extractedImages)
769 KKStr err =
"***ERROR***, ExtractExamplesForAGivenClass, Could not allocate more space.";
770 cerr << endl << err << endl;
775 for (idx = 0; ((idx < qSize)); idx++)
777 example = IdxToPtr (idx);
778 if (((example
->MLClass () == _mlClass) || (!_mlClass)))
784 if (numExtracted >= _maxToExtract)
790 return extractedImages;
798 for (
auto idx: *classes)
800 FeatureVectorListPtr examplesForClass = ExtractExamplesForAGivenClass (idx);
801 if (examplesForClass)
802 subSetForClassList->AddQueue (*examplesForClass);
804 return subSetForClassList;
814 KKStrListPtr duplicateExamples =
new KKStrList (true);
816 if (QueueSize () < 2)
817 return duplicateExamples;
819 FeatureVectorList::iterator iIDX =
this->begin ();
821 while (iIDX != end ())
823 FeatureVectorPtr example = *iIDX; ++iIDX;
827 KKStr lastFileName = example->ExampleFileName ();
829 if (example->ExampleFileName () == lastFileName)
831 duplicateExamples->PushOnBack (
new KKStr (example->ExampleFileName ()));
843 while ((example != NULL) && (example->ExampleFileName () == lastFileName))
858 return duplicateExamples;
873 <<
"FeatureVectorList::BinarySearchByName **** ERROR ****" << endl
875 <<
" List is Not sorted in ExampleFileName Order" << endl
882 kkint32 high = QueueSize () - 1;
885 FeatureVectorPtr example = NULL;
889 mid = (low + high) / 2;
891 example = IdxToPtr (mid);
918 FeatureVectorPtr example = NULL;
923 <<
"FeatureVectorList::LookUpByRootName ***WARNING*** List is NOT SORTED by RootName." << endl
926 FeatureVectorList::iterator idx;
927 for (idx = begin (); idx != end (); idx++)
938 kkint32 high = QueueSize () - 1;
943 mid = (low + high) / 2;
945 example = IdxToPtr (mid);
949 if (tempName
< _rootName)
954 else if (tempName
> _rootName)
980 FeatureVectorPtr example = NULL;
981 for (
auto tempExample: *
this)
983 if (tempExample->ExampleFileName () == _imageFileName)
984 example = tempExample;
998 log.Level (-1) << endl
999 <<
"FeatureVectorList::OrderUsingNamesFromAFile *** ERROR ***" << endl
1000 <<
" Could not open file[" << fileName <<
"]." << endl
1005 FeatureVectorPtr example = NULL;
1009 while (fgets (buff,
sizeof (buff), in))
1011 KKStr txtLine (buff);
1013 if (txtLine.SubStrPart (0, 1) ==
"//")
1019 KKStr exampleFileName = txtLine.ExtractToken (
"\n\r\t");
1020 if (orderedImages->LookUpByImageFileName (exampleFileName))
1023 log.Level (-1) << endl
1024 <<
"FeatureVectorList::OrderUsingNamesFromAFile *** ERROR ***" << endl
1025 <<
" ExampleFileName[" << exampleFileName <<
"] occurred more than once in file." << endl
1028 delete orderedImages;
1032 example = LookUpByImageFileName (exampleFileName);
1036 log.Level (-1) << endl
1037 <<
"FeatureVectorList::OrderUsingNamesFromAFile *** ERROR ***" << endl
1038 <<
" ExampleFileName[" << exampleFileName <<
"] Not in list." << endl
1041 delete orderedImages;
1045 orderedImages->PushOnBack (example);
1050 return orderedImages;
1062 ofstream o (_fileName.Str ());
1065 _successful =
false;
1069 o <<
"// " <<
"Time Written [" << osGetLocalDateTime () <<
"]" << endl;
1070 o <<
"// " <<
"File Name [" << _fileName <<
"]" << endl;
1071 o <<
"// " <<
"Size [" << QueueSize () <<
"]" << endl;
1075 FeatureVectorList::iterator idx;
1077 for (idx = begin (); idx != end (); idx++)
1078 o << (*idx)->ExampleFileName () << endl;
1098 for (
kkint32 idx = 0; idx < QueueSize (); idx++)
1100 FeatureVectorPtr curImage = IdxToPtr (idx);
1115 KKStr s (stats->QueueSize () * 30 + 100);
1117 s <<
"Total_Images\t" << QueueSize () << endl;
1119 s <<
"Total_Classes\t" << stats->QueueSize () << endl;
1122 s <<
"Class_Name" <<
"\t" <<
"Count" << endl;
1123 ClassStatisticList::iterator statsIDX;
1124 for (statsIDX = stats->begin (); statsIDX != stats->end (); statsIDX++)
1127 s << cs->Name () <<
"\t" << cs->Count () << endl;
1143 FeatureVectorPtr example = NULL;
1145 FeatureVectorList::const_iterator idx;
1147 for (idx = begin (); idx != end (); ++idx)
1153 if (classStatistic == NULL)
1165 return classStatistics;
1176 ClassStatisticList::const_iterator idx;
1177 for (idx = stats->begin (); idx != stats->end (); ++idx)
1185 for (idx = stats->begin (); idx != stats->end (); ++idx)
1195 return distribution;
1205 if (QueueSize () < 2)
1209 return duplicateList;
1218 FeatureVectorList::iterator idx;
1219 idx = workList.begin ();
1221 FeatureVectorPtr lastExample = *idx; ++idx;
1222 FeatureVectorPtr example = *idx; ++idx;
1230 if (rootName
!= lastRootName)
1232 lastRootName
= rootName;
1233 lastExample = example;
1234 if (idx == workList.end ())
1245 while ((example != NULL) && (rootName
== lastRootName))
1248 if (idx == workList.end ())
1262 return duplicateList;
1270 FeatureVectorListPtr* folds,
1271 FeatureVectorListPtr src
1274 src->RandomizeOrder ();
1275 src->RandomizeOrder ();
1277 kkint32 imagesInThisList = src->QueueSize ();
1278 if (maxImagesPerClass > 0)
1279 imagesInThisList = Min (imagesInThisList, maxImagesPerClass);
1283 for (x = 0; x < imagesInThisList; x++)
1285 folds[x % numOfFolds]->AddSingleExample (src->IdxToPtr (x));
1309 <<
"FeatureVectorList::CalcStatsForFeatureNum *** ERROR *** Invalid FeatureNum[" << _featureNum <<
"]" << endl
1310 <<
" FeatureNum [" << _featureNum <<
"]" << endl
1311 <<
" NumOfFeatures[" << NumOfFeatures () <<
"]" << endl
1317 if (QueueSize () == 0)
1323 for (idx = begin (); idx != end (); idx++)
1325 FeatureVectorPtr i = *idx;
1338 _mean = _total / (
float)_count;
1340 float totalSquareDelta = 0.0f;
1341 for (idx = begin (); idx != end (); idx++)
1343 FeatureVectorPtr i = *idx;
1345 totalSquareDelta += delta * delta;
1348 _var = totalSquareDelta / _count;
1349 _stdDev = sqrt (_var);
1361 delete classes; classes = NULL;
1363 return stratifiedExamples;
1376 log.Level (10) <<
"FeatureVectorList::StratifyAmoungstClasses" << endl;
1382 for (x = 0; x < numOfFolds; x++)
1386 MLClassList::iterator icIDX;
1388 for (icIDX = mlClasses->begin (); icIDX != mlClasses->end (); ++icIDX)
1393 if (imagesInClass->QueueSize () < numOfFolds)
1395 log.Level (-1) << endl
1396 <<
"FeatureVectorList::DistributesImagesRandomlyWithInFolds ***ERROR***" << endl
1398 <<
"*** ERROR ***, Not enough examples to split amongst the different folds." << endl
1399 <<
" Class [" << mlClass->Name () <<
"]." << endl
1400 <<
" Number of Images[" << imagesInClass->QueueSize () <<
"]." << endl
1401 <<
" Number of Folds [" << numOfFolds <<
"]." << endl
1406 msg <<
"Not enough Images[" << imagesInClass->QueueSize () <<
"] " 1407 <<
"for Class[" << mlClass->Name () <<
"] " 1408 <<
"to distribute in Folds.";
1410 if (!osIsBackGroundProcess ())
1411 osDisplayWarning (msg);
1414 imagesInClass->RandomizeOrder ();
1415 imagesInClass->RandomizeOrder ();
1418 delete imagesInClass;
1419 imagesInClass = NULL;
1424 for (foldNum = 0; foldNum < numOfFolds; foldNum++)
1426 folds[foldNum]->RandomizeOrder ();
1427 folds[foldNum]->RandomizeOrder ();
1429 delete folds[foldNum];
1430 folds[foldNum] = NULL;
1435 return stratafiedImages;
1452 ClassStatisticList::const_iterator idx;
1453 for (idx = classStats->begin (); idx != classStats->end (); idx++)
1456 if (classStats
->Count () > largestClassSize)
1458 largestClassSize = classStats
->Count ();
1459 largestClassStat = classStats;
1463 float fraction =
float (largestClassSize) /
float (size ());
1477 o <<
"Total_Images\t" << QueueSize () << endl;
1479 o <<
"Total_Classes\t" << stats->QueueSize () << endl;
1482 ClassStatisticList::const_iterator statsIDX;
1485 o <<
"Class_Name" <<
"\t" <<
"Index" <<
"\t" <<
"Count" << endl;
1486 for (statsIDX = stats->begin (); statsIDX != stats->end (); ++statsIDX)
1488 o << (*statsIDX)->Name () <<
"\t" << index <<
"\t" << (*statsIDX)->Count () << endl;
1503 o <<
"<table align=\"center\" border=\"2\" cellpadding=\"4\">" << endl
1504 <<
"<thead>" << endl;
1507 <<
"<th align=\"left\">Class<br />Name</th>" 1508 <<
"<th align=\"center\">Index</th>" 1509 <<
"<th align=\"center\">Count</th>" 1512 <<
"</thead>" << endl;
1514 o <<
"<tbody>" << endl;
1516 ClassStatisticList::iterator statsIDX;
1518 for (statsIDX = stats->begin (); statsIDX != stats->end (); statsIDX++)
1522 <<
"<td align=\"left\">" << statistic.Name () <<
"</td>" 1523 <<
"<td align=\"center\">" << index <<
"</td>" 1524 <<
"<td align=\"center\">" << statistic.Count () <<
"</td>" 1531 <<
"<td align=\"left\">" <<
"Total" <<
"</td>" 1532 <<
"<td align=\"center\">" <<
" " <<
"</td>" 1533 <<
"<td align=\"center\">" << QueueSize () <<
"</td>" 1537 o <<
"</tbody>" << endl;
1538 o <<
"</table>" << endl;
1549 o <<
"Class" <<
"\t" 1550 <<
"ClassIdx" <<
"\t" 1551 <<
"FeatureNum" <<
"\t" 1552 <<
"FieldName" <<
"\t" 1563 MLClassList::const_iterator cIDX;
1567 for (cIDX = mlClasses->begin (); cIDX != mlClasses->end (); cIDX++)
1573 for (featureNum = 0; featureNum < imagesThisClass
->NumOfFeatures (); featureNum++)
1576 float total, mean, var, stdDev;
1578 o << mlClass->Name () <<
"\t" 1580 << featureNum <<
"\t" 1581 << FieldName (featureNum) <<
"\t" 1582 << FeatureTypeStr (featureNum) <<
"\t" 1591 delete imagesThisClass;
1607 FeatureVectorList::const_iterator idx;
1610 for (idx = begin (); idx != end (); idx++)
1612 const FeatureVectorPtr fv = *idx;
1615 for (fn = 0; fn < NumOfFeatures (); fn++)
1616 totals[fn] += fd[fn];
1620 for (fn = 0; fn < NumOfFeatures (); fn++)
1621 means[fn] = totals[fn] / (
double)
this->QueueSize ();
1635 if (percentage <= 0.0f)
1640 if (percentage > 100.0f)
1645 kkint32 newSize = (kkint32)(0.5f + (
float)QueueSize () * percentage / 100.0f);
1650 MLClassList::iterator idx;
1651 for (idx = classes->begin (); idx != classes->end (); idx++)
1655 examplesThisClass->RandomizeOrder ();
1657 kkint32 numExamplesThisClass = Max (minClassCount, ((kkint32)(0.5f + (
float)(examplesThisClass->QueueSize ()) * percentage / 100.0f)));
1658 if (numExamplesThisClass > examplesThisClass->QueueSize ())
1659 numExamplesThisClass = examplesThisClass->QueueSize ();
1660 for (kkint32 zed = 0; zed < numExamplesThisClass; zed++)
1661 randomSampled->PushOnBack (examplesThisClass->IdxToPtr (zed));
1662 delete examplesThisClass; examplesThisClass = NULL;
1665 delete classes; classes = NULL;
1667 randomSampled->RandomizeOrder ();
1668 return randomSampled;
1676 FeatureVectorList::const_iterator idx;
1678 FeatureVectorPtr i = NULL;
1680 for (idx = begin (); idx != end (); idx++)
1700 vector<VectorInt> lookUpTables;
1706 symbolicFields.push_back (fieldNum);
1717 errMsg <<
"FeatureVectorList::ReSyncSymbolicData ***ERROR*** FieldNum[" << fieldNum <<
"] FieldName[" << newFileDesc
->FieldName (fieldNum
) <<
"] Nominal Value[" << nominalValue <<
"] is missing.";
1722 lookUpTable.push_back (newCd);
1725 lookUpTables.push_back (lookUpTable);
1729 FeatureVectorList::iterator idx;
1730 for (idx = begin (); idx != end (); idx++)
1732 FeatureVectorPtr i = *idx;
1736 for (x = 0; x < symbolicFields.size (); x++)
1738 fieldNum = symbolicFields[x];
1740 kkint32 newCode = lookUpTables[x][oldCode];
1745 fileDesc = newFileDesc;
1757 errMsg
= "FeatureVectorList::SynchronizeSymbolicData ***ERROR*** The two datasets have more than SymbolicData differences.";
1758 log.Level (-1) << endl << errMsg << endl << endl;
1775 KKStr fullLevelName =
"";
1777 while ((curLevel < level) && (!nextLevelName
.Empty ()))
1781 fullLevelName <<
"_";
1782 fullLevelName << nextLevelName;
1786 return fullLevelName;
1798 MLClassList::iterator idx;
1799 idx = allClasses->begin ();
1801 while (idx != allClasses->end ())
1803 MLClassPtr curClass = *idx;
1804 KKStr curClassNameForThisLevel = GetClassNameByHierarchyLevel (curClass->Name (), level);
1805 if (curClassNameForThisLevel.Empty ())
1812 MLClassPtr curClassForThisLevel = MLClass::CreateNewMLClass (curClassNameForThisLevel);
1813 MLClassPtr nextClassForThisLevel = curClassForThisLevel;
1815 while ((idx != allClasses->end ()) && (nextClassForThisLevel == curClassForThisLevel))
1818 FeatureVectorListPtr examplesForCurClass = ExtractExamplesForAGivenClass (curClass);
1819 FeatureVectorListPtr reLabeledExamples = examplesForCurClass->DuplicateListAndContents ();
1820 delete examplesForCurClass; examplesForCurClass = NULL;
1822 FeatureVectorList::iterator idx2;
1823 for (idx2 = reLabeledExamples->begin (); idx2 != reLabeledExamples->end (); idx2++)
1825 FeatureVectorPtr fv = *idx2;
1826 fv->MLClass (curClassForThisLevel);
1828 examplesLabeledForAppropriateLevel->AddQueue (*reLabeledExamples);
1829 reLabeledExamples->Owner (
false);
1830 delete reLabeledExamples; reLabeledExamples = NULL;
1835 if (idx != allClasses->end ())
1838 curClassNameForThisLevel = GetClassNameByHierarchyLevel (curClass->Name (), level);
1839 nextClassForThisLevel = MLClass::CreateNewMLClass (curClassNameForThisLevel);
1845 delete allClasses; allClasses = NULL;
1847 return examplesLabeledForAppropriateLevel;
1975 return (root1
< root2);
1993 return root1
> root2;
2080 sort (begin (), end (), c);
2085 sort (begin (), end (), c);
2098 sort (begin (), end (), c);
2103 sort (begin (), end (), c);
2118 sort (begin (), end (), c);
2123 sort (begin (), end (), c);
2137 sort (begin (), end (), c);
2142 sort (begin (), end (), c);
2157 sort (begin (), end (), c);
2162 sort (begin (), end (), c);
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for 'size' characters.
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
void PushOnFront(FeatureVectorPtr image)
Overloading the PushOnFront function in KKQueue so we can monitor the Version and Sort Order...
void AddQueue(const FeatureVectorList &examplesToAdd)
Add the contents of 'examplesToAdd' to the end of this list.
void ResetNumOfFeatures(kkint32 newNumOfFeatures)
KKMLL::AttributeTypeVector CreateAttributeTypeTable() const
FeatureVectorListPtr StratifyAmoungstClasses(kkint32 numOfFolds, RunLog &log)
KKStrListPtr ExtractDuplicatesByExampleFileName()
Provides a detailed description of the attributes of a dataset.
void SortByBreakTie(bool reversedOrder=false)
void AddSingleExample(FeatureVectorPtr _imageFeatures)
Same as PushOnBack.
kkint32 MemoryConsumedEstimated() const
Maintains a list of classes and their associated integer index.
MLClassPtr MLClass() const
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
kkuint32 NumOfFields() const
FeatureVectorList(const FeatureVectorList &examples, bool _owner)
Create a duplicate list, depending on the '_owner' parameter may also duplicate the contents...
virtual FeatureVectorListPtr Duplicate(bool _owner) const
Creates a duplicate of list using the same container.
kkint32 GetClassCount(MLClassPtr c) const
Returns number of examples for a specific Class (MLClass).
FeatureVectorPtr BinarySearchByName(const KKStr &_imageFileName) const
Will search for the example with the same name as '_imageFileName'.
float FeatureData(kkint32 featureNum) const
virtual kkint32 MemoryConsumedEstimated() const
ClassProbListPtr GetClassDistribution() const
Keeps track of selected features.
bool MissingData() const
True indicates that one or more features were missing.
FeatureVector(kkint32 _numOfFeatures)
KKMLL::AttributeType Type(kkint32 fieldNum) const
BreakTieComparisonReversed()
KKStr ExtractToken(const char *delStr="\n\t\r ")
kkint32 NumOfFeatures() const
Number of features in this FeatureVector.
virtual void PushOnBack(ClassProbPtr cp)
virtual ~FeatureVectorList()
DuplicateImages(FeatureVectorListPtr _examples, RunLog &_log)
You would use this instance to search for duplicates in the list of 'examples'.
std::vector< int > VectorInt
float OrigSize() const
The value of Feature[0] before normalization.
MLClassListPtr ExtractListOfClasses() const
bool operator>(const KKStr &right) const
KKStr & operator=(const char *src)
RootNameComparrisonReversed()
void PushOnBack(ClassStatisticPtr stat)
FeatureNumList AllFeatures()
Will return a FeatureNumList instance with all features selected.
const FileDescPtr FileDesc() const
ClassStatisticPtr LookUpByMLClass(MLClassPtr mlClass) const
void PrintClassStatisticsHTML(std::ostream &o) const
float Probability() const
The probability assigned by classifier to the predicted class.
const KKStr & MLClassName() const
Name of class that this example is assigned to.
bool SameExceptForSymbolicData(const FileDesc &otherFd, RunLog &log) const
void RemoveEntriesWithMissingFeatures(RunLog &log)
ClassStatistic * ClassStatisticPtr
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
void SortByProbability(bool reversedOrder=false)
KKMLL::AttributeType FeatureType(kkint32 featureNum) const
Returns the type of attribute for specified 'featureNum'.
virtual FeatureVectorListPtr ManufactureEmptyList(bool _owner) const
Creates an instance of a Empty FeatureVectorList.
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
const KKStr & FieldName(kkint32 fieldNum) const
void SortByImageFileName(bool reversedOrder=false)
unsigned __int32 kkuint32
VectorDouble ExtractMeanFeatureValues()
void AddFeatureData(kkint32 _featureNum, float _featureData)
float BreakTie() const
The difference in probability between the two most likely classes.
VectorInt32 CreateCardinalityTable() const
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector's.
kkint32 LookUpNominalCode(kkint32 fieldNum, const KKStr &nominalValue) const
KKStr & operator=(KKStr &&src)
virtual FeatureVectorPtr Duplicate() const
MLClassIndexList(const MLClassList &_classes)
FeatureVector(const FeatureVector &_example)
Container class for FeatureVector derived objects.
void PrintFeatureStatisticsByClass(std::ostream &o) const
void CalcStatsForFeatureNum(kkint32 _featureNum, kkint32 &_count, float &_total, float &_mean, float &_var, float &_stdDev)
const float * FeatureDataConst() const
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
bool AllFieldsAreNumeric() const
Allows the user to quickly determine if there are no nominal fields.
ImageFileNameComparisonReversed()
void AllocateFeatureDataArray()
Used by container classes such as 'FeatureVectorList'. This way they can determine real underlying cl...
KKStr(const KKStr &str)
Copy Constructor.
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
Used to record probability for a specified class; and a list of classes.
FeatureVectorListPtr ExtractExamplesForAGivenClass(MLClassPtr _mlClass, kkint32 _maxToExtract=-1, float _minSize=-1.0f) const
kkint32 NumOfFeatures() const
void SplitImagesAmongstFolds(kkint32 numOfFolds, kkint32 maxImagesPerClass, FeatureVectorListPtr *folds, FeatureVectorListPtr src)
void SortByRootName(bool reversedOrder=false)
ProbabilityComparisonReversed()
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
vector< kkint32 > CreateCardinalityTable() const
FeatureVectorList(MLClassList &_mlClasses, FeatureVectorList &_examples)
Will create a list of consisting of the subset of examples in '_examples' which are members of Images...
void ResetFileDesc(FileDescPtr newFileDesc)
ClassProbList(bool owner)
void SaveOrderingOfImages(const KKStr &fileName, bool &successful)
Will save into a file the current ordering of FeatureVector instances in list.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
ClassStatisticListPtr GetClassStatistics() const
Returns the number of FeatureVectors per class.
static const KKStr & EmptyStr()
Static method that returns an Empty String.
MLClassPtr MLClass() const
Class that is example is assigned to.
FeatureVectorListPtr OrderUsingNamesFromAFile(const KKStr &fileName, RunLog &log)
Using list of ImageFileNames in a file('fileName') create a new FeatureVectorList instance with examp...
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
FeatureVectorListPtr StratifyAmoungstClasses(MLClassListPtr mlClasses, kkint32 maxImagesPerClass, kkint32 numOfFolds, RunLog &log)
bool SameExceptForSymbolicData(const FeatureVectorList &otherData, RunLog &log) const
void FeatureData(kkint32 _featureNum, float _featureValue)
Assign a value to a specific feature number for the feature vector.
bool operator!=(const KKStr &right) const
static FeatureNumList AllFeatures(FileDescPtr fileDesc)
Create a FeatureNumList object where all features are selected, except ones that are flagged as Ignor...
ClassStatisticList(bool _owner)
void PurgeDuplicates(FeatureVectorListPtr examples, bool allowDupsInSameClass, std::ostream *report)
Delete duplicate examples from FeatureVectorList structure provided in constructor.
float TotalOfFeatureData() const
Returns the total of all Feature Attributes for this feature vector.
bool operator==(const KKStr &right) const
const KKStr & UpperName() const
ClassStatisticList * ClassStatisticListPtr
const KKStr & Name() const
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
FeatureVectorListPtr ExtractRandomSampling(float percentage, kkint32 minClassCount)
Will return a random sampling by class of our FeatureVector's; with a minimum per class of 'minClassC...
FeatureVectorPtr LookUpByRootName(const KKStr &_rootName)
Returns a pointer to the FeatureVector who's ExampleFileName rootname = _rootName *...
Used by routines that retrieve Class statistics from FeatureVectorList instances. ...
bool operator==(FeatureVector &other_example) const
IFL_SortOrder
Represents the different orders that a list of FeatureVector instances in a FeatureVectorList object ...
FeatureVectorListPtr ExtractExamplesForHierarchyLevel(kkuint32 level)
Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy ...
const char * Str() const
Returns a pointer to a ascii string.
FeatureVectorListPtr ExtractDuplicatesByRootImageFileName()
Returns: a list of 'FeatureVector' objects that have duplicate root file names.
float MajorityClassFraction() const
KKStr GetClassNameByHierarchyLevel(KKStr className, kkint32 level)
void ReSyncSymbolicData(FileDescPtr newFileDesc)
AttributeTypeVector CreateAttributeTypeTable() const
FILE * osFOPEN(const char *fileName, const char *mode)
const KKStr & FieldName(kkint32 featureNum) const
Returns name of Attribute Field.
ClassStatistic(MLClassPtr _mlClass, kkuint32 _count)
FeatureVectorListPtr CreateListForAGivenLevel(kkint32 level)
Will create a list of FeatureVectors where the class assignment will reflect the specified Hierarchy ...
Detects duplicate images in a given FeaureVectorList objects.
KKStr & operator=(const KKStr &src)
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
bool operator<(const KKStr &right) const
kkint32 FeatureCardinality(kkint32 featureNum) const
Returns the number of values defined for a Nominal Field.
MLClassList * MLClassListPtr
const KKStr & PredictedClassName() const
KKStr osGetRootNameWithExtension(const KKStr &fullFileName)
void RemoveDuplicateEntries(bool allowDupsInSameClass, RunLog &runLog)
const KKStr & ClassName() const
Name of class that this example is assigned to.
KKStr ClassStatisticsStr() const
kkint32 Cardinality(kkint32 fieldNum) const
KKException(const KKStr &_exceptionStr)
ClassProb(MLClassPtr _classLabel, double _probability, float _votes)
void SynchronizeSymbolicData(FeatureVectorList &otherData, RunLog &log)
Maintains a list of MLClass instances.
virtual kkint32 MemoryConsumedEstimated() const
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
Represents a Feature Vector of a single example, labeled or unlabeled.
ClassNameComparrisonReversed()
static FileDescPtr MergeSymbolicFields(const FileDesc &left, const FileDesc &right, RunLog &log)
Merges the Symbolic fields of two different 'FileDesc' instances producing a new instance of 'FileDes...
FeatureVectorPtr LookUpByImageFileName(const KKStr &_imageFileName) const
Returns a pointer to the FeatureVector which has '_imageFileName'.
FeatureVectorListPtr ExtractExamplesForClassList(MLClassListPtr classes)
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
MLClassPtr MLClassForGivenHierarchialLevel(KKB::kkuint16 level) const
const KKStr & AttributeTypeToStr(AttributeType type)
void SortByClass(bool reversedOrder=false)
KKStr FeatureTypeStr(kkint32 featureNum) const
std::vector< double > VectorDouble
Vector of doubles.
FeatureVectorList * FeatureVectorListPtr
ImageFileNameComparison()
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
const KKStr & ExampleFileName() const
Name of file that this FeatureVector was computed from.
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
KKStr osGetRootName(const KKStr &fullFileName)
void ResetNumOfFeaturs(kkint32 newNumOfFeatures)
void osDisplayWarning(KKStr _message)
bool operator()(FeatureVectorPtr p1, FeatureVectorPtr p2)
void PrintClassStatistics(std::ostream &o) const