26 using namespace KKMLL;
56 _log.Level (10) <<
"FeatureFileIOC45::LoadFeatureFile File[" << _fileName <<
"] FileFormat[" << DriverName () <<
"]" << endl;
70 namesFileName
= _fileName
+ ".names";
71 dataFileName
= _fileName;
73 dataFileName
= _fileName
+ ".data";
80 if (extension
== "NAMES")
83 namesFileName
= _fileName;
84 dataFileName
= leadingPart;
87 dataFileName
= leadingPart
+ ".data";
89 dataFileName
= leadingPart
+ ".test";
93 else if ((extension
== "DATA") || (extension
== "TEST"))
95 dataFileName
= _fileName;
96 namesFileName
= leadingPart
+ ".names";
99 namesFileName
= leadingPart
+ ".names";
102 namesFileName
= _fileName
+ ".names";
104 namesFileName
= leadingPart
+ ".names";
111 dataFileName
= _fileName;
112 namesFileName
= _fileName
+ ".names";
117 _changesMade =
false;
119 kkint32 estimatedNumOfDataItems = -1;
123 ifstream namesFile (namesFileName.Str (), ios_base::in);
124 if (!namesFile.is_open ())
126 _log.Level (-1) <<
"FeatureFileIOC45::LoadFeatureFile ***ERROR*** Error Opening File[" << dataFileName <<
"]." << endl;
133 FileDescPtr fileDesc = GetFileDesc (namesFileName, namesFile, &_mlClasses, estimatedNumOfDataItems, errorMessage, _log);
134 if (fileDesc == NULL)
136 _log.Level (-1) << endl << endl
137 <<
"FeatureFileIOC45::LoadFeatureFile ***ERROR*** Loading Feature File[" << namesFileName <<
"]" << endl
148 ifstream dataFile (dataFileName.Str (), ios_base::in);
149 if (!dataFile.is_open ())
151 _log.Level (-1) <<
"FeatureFileIOC45::LoadFeatureFile ***ERROR*** Error Opening File[" << dataFileName <<
"]." << endl;
157 FeatureVectorListPtr examples = LoadFile (dataFileName, fileDesc, _mlClasses, dataFile, _maxCount, _cancelFlag, _changesMade, errorMessage, _log);
158 if (examples == NULL)
203 nextChar = ln
[x + 1
];
205 if (strchr (
" \t", thisChar))
208 while ((x < ln.Len ()) && (strchr (
" \t", nextChar)))
212 nextChar = ln[x + 1];
216 else if (thisChar ==
'\\')
219 if (strchr (
",:?", nextChar))
223 nextChar = ln[x + 1];
249 if (txt
[x - 1
] !=
'\\')
break;
274 kkint32 colPos = C45LocateNextCharacter (attrStr,
':');
278 _log.Level (-1) << endl
279 <<
"FeatureFileIOC45::ProcessC45AttrStr ***ERROR*** Missing Attribute Specification (No Colon)." << endl
280 <<
" attrStr[" << attrStr <<
"]" << endl
288 C45StrPreProcessName (name);
292 _log.Level (-1) << endl
293 <<
"FeatureFileIOC45::ProcessC45AttrStr ***ERROR*** Field Name is Empty" << endl
294 <<
" AttrStr[" << attrStr <<
"]" << endl
307 if (typeStrUpper
== "CONTINUOUS")
312 else if (typeStrUpper
== "IGNORE")
317 else if (typeStrUpper
== "SYMBOLIC")
328 bool alreadyExists =
false;
333 _log.Level (-1) << endl
334 <<
"FeatureFileIOC45::ProcessC45AttrStr *** ERROR *** Field Name Occurs more than once." 335 <<
" AttrStr[" << attrStr <<
"]" << endl
349 kkint32 commaPos = C45LocateNextCharacter (typeStr,
',');
352 nominalValue
= typeStr;
361 C45StrPreProcessName (nominalValue);
366 _log.Level (-1) << endl
367 <<
"FileDesc::AddANominalValue *** ERROR ***" << endl
368 <<
" Blank NominalValue (\"\")" << endl
369 <<
" AttrStr [" << attrStr <<
"]." << endl
378 _log.Level (-1) << endl
379 <<
"FileDesc::AddANominalValue *** ERROR ***" << endl
380 <<
" Nominal Value [" << nominalValue <<
"] occurs more than once." << endl
381 <<
" AttrStr [" << attrStr <<
"]." << endl
400 KKStr& _errorMessage,
407 bool classLineRead =
false;
414 while ((!eof) && (!classLineRead))
416 C45StripComments (ln);
428 for (
kkint32 idx = 0; idx < (kkint32)classNames.size (); idx++)
430 KKStr className = classNames[idx];
431 C45StrPreProcessName (className);
435 classLineRead =
true;
445 _log.Level (-1) << endl
446 <<
"FeatureFileIOC45::GetFileDesc *** ERROR *** No class line in Names File." << endl
448 _errorMessage
= "No class line in Names File.";
460 C45StripComments (ln);
465 bool validStr =
true;
470 while (dotPos < ln
.Len ())
474 if (dotPos >= (ln
.Len () - 1))
477 else if (strchr (
" \t\r\n", ln[dotPos + 1]))
498 ProcessC45AttrStr (fileDesc, attrStr, validStr, _log);
501 _log.Level (-1) << endl
502 <<
"FeatureFileIOC45::GetFileDesc ***ERROR*** Invalid AttributeStr[" << origAttrStr <<
"]." << endl
503 <<
" LineNum[" << lineNum <<
"]" << endl
505 _errorMessage
= "No class line in Names File.";
506 _errorMessage <<
"Invalid AttributeStr[" << origAttrStr <<
"], LineNum[" << lineNum <<
"]";
530 const char* delimiters,
538 const kkint32 maxTokenLen = 1024;
539 char token[maxTokenLen];
542 kkint32 ch = in.get (); eof = in.eof ();
543 while ((!eof) && ((ch ==
' ') || (ch ==
'\r') || (ch ==
'\t')) && (ch !=
'\n'))
544 {ch = in.get (); eof = in.eof ();}
549 if (in.peek () ==
'\r')
557 if (in.peek () ==
'\n')
566 char nextCh = in.peek ();
567 if (strchr (
" \t\r\n|", nextCh))
579 while ((!eof) && (ch !=
'\n') && (ch !=
'\r'))
580 {ch = in.get (); eof = in.eof ();}
585 if ((ch ==
'\n') && (in.peek () ==
'\r'))
588 else if ((ch ==
'\r') && (in.peek () ==
'\n'))
598 while ((!eof) && (!strchr (delimiters, ch)))
600 if ((ch ==
'\n') || (ch ==
'|'))
610 char nextCh = in.get ();
bool nextEOF = in.eof ();
615 nextCh = in.get (); nextEOF = in.eof ();
621 if (strchr (
" \r\t", nextCh))
629 else if ((nextCh ==
'\n') || (nextCh ==
'|'))
652 char nextCh = in.get ();
bool nextEOF = in.eof ();
659 nextCh = in.get (); nextEOF = in.eof ();
663 if (strchr (
",?:", nextCh))
675 else if (strchr (
" \t\r", ch))
680 char nextCh = in.get ();
bool nextEOF = in.eof ();
681 while ((!nextEOF) && (strchr (
" \t\r", nextCh)))
682 {nextCh = in.get (); nextEOF = in.eof ();}
690 nextCh = in.get (); nextEOF = in.eof ();
701 token[tokenLen] = ch;
703 ch = in.get (); eof = in.eof ();
712 if (strchr (
" \r\t", token[tokenLen - 1]) == 0)
732 KKStr& _errorMessage,
736 _log.Level (10) <<
"FeatureFileIOC45::LoadFile FileName[" << _fileName <<
"]" << endl;
750 bool lineIsValid =
true;
752 KKStr imageFileName =
"";
760 KKStr field = C45ReadNextToken (_in,
",", eof, eol);
775 for (fieldNum = 0; fieldNum < numOfFeatures; fieldNum++)
779 _errorMessage <<
"Not all Features were accounted for on Line[" << lineCount <<
"].";
780 _log.Level (-1) << endl << endl
781 <<
"FeatureFileIOC45::LoadFile " << _errorMessage << endl
783 delete examples; examples = NULL;
784 delete example; example = NULL;
788 switch (attributeTable[fieldNum]
->Type ())
812 _errorMessage <<
"Invalid NominalValue[" << field <<
"] on line[" << lineCount <<
"].";
813 _log.Level (-1) << endl << endl
814 <<
"FeatureFileIOC45::LoadFile " << _errorMessage << endl
816 delete examples; examples = NULL;
817 delete example; example = NULL;
840 imageFileName
= field;
845 bool alreadyExists =
false;
856 _log.Level (-1) << endl << endl
857 <<
"FeatureFileIOC45::LoadFile *** Undefined Field Type ***" << endl
863 field
= C45ReadNextToken (_in,
" ,", eof, eol);
869 _errorMessage <<
"Line[" << lineCount <<
"] Missing ClassName.";
870 _log.Level (-1) << endl << endl
871 <<
"FeatureFileIOC45::LoadFile " << _errorMessage << endl
874 delete examples; examples = NULL;
875 delete example; example = NULL;
892 _errorMessage <<
"Line[" << lineCount <<
"] Invalid Class[" << field <<
"]";
893 _log.Level (-1) << endl << endl
894 <<
"FeatureFileIOC45::LoadFile " << _errorMessage << endl
896 delete examples; examples = NULL;
897 delete example; example = NULL;
920 while ((ch !=
'\n') && (ch !=
'\r') && (!_in.eof ()))
928 if ((ch ==
'\n') && (_in.peek () ==
'\r'))
931 else if ((ch ==
'\r') && (_in.peek () ==
'\n'))
936 if ((lineCount % 1000) == 0)
937 cout <<
"Records Loaded " << lineCount << endl;
939 if ((kkint32)examples->size () > _maxCount)
944 delete [] attributeTable;
952 KKStr& namesFileName,
963 namesFileName
= fileName
+ ".names";
964 dataFileName
= fileName
+ ".data";
971 if ((extension
== "NAMES") || (extension
== "NAME"))
973 namesFileName
= fileName;
974 dataFileName
= leedingPart
+ "data";
977 else if ((extension
== "DATA") || (extension
== "TEST"))
981 namesFileName
= leedingPart
+ "names";
982 dataFileName
= fileName;
987 namesFileName
= fileName
+ ".names";
988 dataFileName
= fileName;
1001 for (x = 0; x < oldName
.Len (); x++)
1003 char ch = oldName
[x];
1005 if (strchr (
",:?", ch))
1007 newName.Append (
'\\');
1021 const KKStr& _fileName,
1027 KKStr& _errorMessage,
1031 KKStr namesFileName;
1034 _numExamplesWritten = 0;
1036 C45ConstructFileNameForWritting (_fileName, namesFileName, dataFileName);
1045 ofstream nf (namesFileName.Str ());
1047 for (x = 0; x < classes->QueueSize (); x++)
1051 nf << C45AdjName (classes->IdxToPtr (x)->Name ());
1059 nf << C45AdjName (attr->Name ()) <<
": ";
1065 if (y > 0) nf <<
", ";
1066 nf << C45AdjName (attr->GetNominalValue (y));
1088 nf <<
"ExampleFileName" <<
": " <<
"Symbolic" <<
"." << endl;
1096 ClassStatisticList::iterator idx;
1098 _out <<
"| FileName [" << _fileName <<
"]" << endl;
1099 _out <<
"| DateWritten [" << osGetLocalDateTime () <<
"]" << endl;
1100 _out <<
"| SelectedFeatures [" << _selFeatures.ToString () <<
"]" << endl;
1101 _out <<
"| TotalRecords [" << _data.QueueSize () <<
"]" << endl;
1102 _out <<
"| NumAttributes [" << _selFeatures.NumOfFeatures () <<
"]" << endl;
1103 _out <<
"|" << endl;
1104 _out <<
"| Class Statistics" << endl;
1105 _out <<
"| Name" <<
"\t" <<
"Count" << endl;
1107 for (idx = stats->begin (); idx != stats->end (); idx++)
1110 _out <<
"| " << stat->Name () <<
"\t" << stat->Count () << endl;
1112 _out <<
"|" << endl;
1117 kkint32 origPrecision = (kkint32)_out.precision ();
1120 FeatureVectorPtr example = NULL;
1123 for (idx = 0; (idx < _data.QueueSize ()) && (!_cancelFlag); idx++)
1125 example = _data.IdxToPtr (idx);
1151 _out << example->ExampleFileName () <<
",";
1154 _numExamplesWritten++;
1157 _out.precision (origPrecision);
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for 'size' characters.
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
VectorKKStr Split(const char *delStr="\n\r\t, ") const
Breaks up the contents of the string into tokens where the characters in 'delStr' acts as separates e...
void AddANominalValue(const KKStr &nominalValue, bool &alreadyExists)
Adds a allowable Nominal value to the Nominal or Symbolic field that this attribute represents...
Provides a detailed description of the attributes of a dataset.
static FileDescPtr GetExistingFileDesc(FileDescPtr fileDesc)
Returns a pointer to an existing instance of 'fileDesc' if it exists, otherwise will use one being pa...
bool EqualIgnoreCase(const char *s2) const
const KKStr & GetNominalValue(kkint32 code) const
Returns the nominal value for the given ordinal value.
kkuint32 NumOfFields() const
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
float FeatureData(kkint32 featureNum) const
KKStr & TrimRight(const char *whiteSpaceChars="\n\r\t ")
FeatureVector(kkint32 _numOfFeatures)
virtual FeatureVectorListPtr LoadFeatureFile(const KKStr &_fileName, MLClassList &_mlClasses, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_successful, bool &_changesMade, RunLog &_log)
Loads the contents of a feature data file and returns a ImageFeaturesList container object...
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, istream &_in, MLClassListPtr _classList, kkint32 &_estSize, KKStr &_errorMessage, RunLog &log)
MLClassListPtr ExtractListOfClasses() const
kkint32 GetNominalCode(const KKStr &nominalValue) const
Supports the reading and writing of feature data from C45 formated feature files. ...
KKStr & operator=(const char *src)
MLClassPtr LookUpUnKnownMLClass()
const FileDescPtr FileDesc() const
FeatureNumList const FeatureNumListConst
bool operator==(const char *rtStr) const
ClassStatistic * ClassStatisticPtr
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)
KKStr operator+(const char *right) const
void AddClasses(const MLClassList &classesToAdd)
void GetLine(std::istream &_in, KKStr &_line, bool &_eof)
unsigned __int32 kkuint32
void AddFeatureData(kkint32 _featureNum, float _featureData)
MLClassPtr LookUpMLClassByName(const KKStr &className)
char operator[](kkuint32 i) const
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector's.
KKStr & operator=(KKStr &&src)
Container class for FeatureVector derived objects.
kkuint32 Len() const
Returns the number of characters in the string.
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
KKStr(const KKStr &str)
Copy Constructor.
kkint32 NumOfFeatures() const
void TrimLeft(const char *whiteSpaceChars="\n\r\t ")
Base class for all FeatureFileIO classes.
KKStr SubStrPart(kkint32 firstChar, kkint32 lastChar) const
returns a SubString consisting of all characters starting at index 'firstChar' and ending at 'lastInd...
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
ClassStatisticListPtr GetClassStatistics() const
Returns the number of FeatureVectors per class.
void Upper()
Converts all characters in string to their Upper case equivalents via 'toupper'.
kkint32 LocateLastOccurrence(char ch) const
Returns index of last occurrence of 'ch' otherwise -1.
void AddANominalValue(const KKStr &nominalValue, bool &alreadyExist, RunLog &log)
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
KKStr StrFormatInt(kkint32 val, const char *mask)
ClassStatisticList * ClassStatisticListPtr
AttributeType Type() const
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
KKStr operator+(const KKStr &right) const
const KKMLL::AttributePtr * CreateAAttributeTable() const
bool osFileExists(const KKStr &_fileName)
KKStr & operator=(const KKStr &src)
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
virtual MLClassPtr GetMLClassPtr(const KKStr &_name)
return pointer to instance with '_name'; if none exists, create one and add to list.
const KKStr & Name() const
const KKStr & ClassName() const
Name of class that this example is assigned to.
Maintains a list of MLClass instances.
kkint32 Cardinality() const
Returns back the cardinality of the attribute; the number of possible values it can take...
Represents a Feature Vector of a single example, labeled or unlabeled.
virtual FeatureVectorListPtr LoadFile(const KKStr &_fileName, const FileDescPtr _fileDesc, MLClassList &_classes, istream &_in, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_changesMade, KKStr &_errorMessage, RunLog &_log)
KKStr SubStrPart(kkint32 firstChar) const
returns a SubString consisting of all characters starting at index 'firstChar' until the end of the s...
void MissingData(bool _missingData)
True indicates that not all the feature data was present when this example was loaded from a data fil...
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
KKStr osGetRootName(const KKStr &fullFileName)
volatile const bool VolConstBool