KSquare Utilities
FeatureFileIOUCI.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 
3 #include <stdio.h>
4 #include <math.h>
5 #include <ctype.h>
6 #include <time.h>
7 
8 #include <string>
9 #include <iostream>
10 #include <fstream>
11 #include <vector>
12 
13 #include "MemoryDebug.h"
14 
15 using namespace std;
16 
17 #include "KKBaseTypes.h"
18 #include "DateTime.h"
19 #include "OSservices.h"
20 #include "RunLog.h"
21 #include "KKStr.h"
22 using namespace KKB;
23 
24 
25 #include "FeatureFileIOUCI.h"
26 #include "FileDesc.h"
27 #include "MLClass.h"
28 using namespace KKMLL;
29 
30 
31 
33 
34 
36  FeatureFileIO ("UCI", true, true)
37 {
38 }
39 
40 
41 
43 {
44 }
45 
46 
47 
49  istream& _in,
50  MLClassListPtr _classes,
51  kkint32& _estSize,
52  KKStr& _errorMessage,
53  RunLog& _log
54  )
55 {
56  _log.Level (20) << "FeatureFileIOUCI::GetFileDesc FileName[" << _fileName << "]." << endl;
57 
58 
59  // We are just going to read the first few lines to determine number of fields, etc
60 
61  kkint32 numOfFields = 0;
62  kkint32 numFieldsThisLine = 0;
63 
64 
65  KKStr ln;
66  bool eof;
67 
68  _estSize = 0;
69 
70  GetLine (_in, ln, eof);
71  while (!eof)
72  {
73  ln.TrimLeft ();
74  ln.TrimRight ();
75  if ((ln.SubStrPart (0, 1) != "//") && (!ln.Empty ()))
76  {
77  numFieldsThisLine = 0;
78 
79  KKStr className = ln.ExtractToken (" ,\n\r\t");
80  while (!ln.Empty ())
81  {
82  numFieldsThisLine++;
83  className = ln.ExtractToken (" ,\n\r\t");
84  }
85 
86  if (className.Empty ())
87  className = "UnKnown";
88 
89  // make sure that 'className' exists in '_classes'.
90  _classes->GetMLClassPtr (className);
91 
92  numOfFields = Max (numOfFields, numFieldsThisLine);
93  _estSize++;
94  }
95  GetLine (_in, ln, eof);
96  }
97 
98  bool alreadyExists = false;
99  kkint32 fieldNum = 0;
100 
101  FileDescPtr fileDesc = new FileDesc ();
102 
103  for (fieldNum = 0; fieldNum < numOfFields; fieldNum++)
104  {
105  fileDesc->AddAAttribute ("Field_" + StrFormatInt (fieldNum, "ZZZZ0"), AttributeType::Numeric, alreadyExists);
106  }
107 
108  return fileDesc;
109 } /* GetFileDesc */
110 
111 
112 
113 
114 
115 
116 FeatureVectorListPtr FeatureFileIOUCI::LoadFile (const KKStr& _fileName,
117  const FileDescPtr _fileDesc,
118  MLClassList& _classes,
119  istream& _in,
120  kkint32 _maxCount, // Maximum # images to load.
121  VolConstBool& _cancelFlag,
122  bool& _changesMade,
123  KKStr& _errorMessage,
124  RunLog& _log
125  )
126 {
127  _log.Level (20) << "FeatureFileIOUCI::LoadFile FileName[" << _fileName << "]" << endl;
128 
129 
130  KKStr rootName = osGetRootName (_fileName);
131 
132 
133  kkint32 numOfFeatures = _fileDesc->NumOfFields ();
134  kkint32 lineCount = 0;
135 
136  KKStr ln (256);
137  bool eof;
138 
139  FeatureVectorListPtr examples = new FeatureVectorList (_fileDesc, true);
140 
141  GetLine (_in, ln, eof);
142  while (!eof)
143  {
144  ln.TrimLeft ();
145  ln.TrimRight ();
146 
147  if ((ln.SubStrPart (0, 1) != "//") && (!ln.Empty ()))
148  {
149 
150  kkint32 featureNum = 0;
151  FeatureVectorPtr example = new FeatureVector (numOfFeatures);
152 
153  for (featureNum = 0; featureNum < numOfFeatures; featureNum++)
154  {
155  KKStr featureStr = ln.ExtractToken (" ,\n\r\t");
156  example->AddFeatureData (featureNum, (float)atof (featureStr.Str ()));
157  }
158 
159  KKStr className = ln.ExtractToken (" ,\n\r\t");
160  MLClassPtr mlClass = _classes.GetMLClassPtr (className);
161  example->MLClass (mlClass);
162 
163  KKStr imageFileName = rootName + "_" + StrFormatInt (lineCount, "ZZZZZZ0");
164  example->ExampleFileName (imageFileName);
165 
166  examples->PushOnBack (example);
167 
168  lineCount++;
169  }
170  GetLine (_in, ln, eof);
171  }
172 
173  return examples;
174 } /* LoadFile */
175 
176 
177 
178 
179 
180 
182  const KKStr& _fileName,
183  FeatureNumListConst& _selFeatures,
184  ostream& _out,
185  kkuint32& _numExamplesWritten,
186  VolConstBool& _cancelFlag,
187  bool& _successful,
188  KKStr& _errorMessage,
189  RunLog& _log
190  )
191 
192 {
193  FeatureVectorPtr example = NULL;
194 
195  _numExamplesWritten = 0;
196 
197  kkint32 idx;
198  kkint32 x;
199 
200  FileDescPtr fileDesc = _data.FileDesc ();
201 
202  _out << "ExampleFileName";
203  for (x = 0; x < _selFeatures.NumOfFeatures (); x++)
204  {
205  kkint32 featureNum = _selFeatures[x];
206  _out << "," << fileDesc->FieldName (featureNum);
207  }
208  _out << "," << "ClassLabel" << endl;
209 
210  for (idx = 0; idx < _data.QueueSize (); idx++)
211  {
212  example = _data.IdxToPtr (idx);
213 
214  _out << ("Train_" + KKB::osGetRootName (example->ExampleFileName ())) << ",";
215 
216  for (x = 0; x < _selFeatures.NumOfFeatures (); x++)
217  {
218  kkint32 featureNum = _selFeatures[x];
219  _out << example->FeatureData (featureNum) << ",";
220  }
221  _out << example->ClassName ();
222  _out << endl;
223  _numExamplesWritten++;
224  }
225 
226  _successful = true;
227  return;
228 } /* WriteUCIFile */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
Definition: FeatureVector.h:75
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
MLClass * MLClassPtr
Definition: MLClass.h:46
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
KKStr & TrimRight(const char *whiteSpaceChars="\n\r\t ")
Definition: KKStr.cpp:1695
FeatureVector(kkint32 _numOfFeatures)
KKStr ExtractToken(const char *delStr="\n\t\r ")
Definition: KKStr.cpp:2969
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, istream &_in, MLClassListPtr _classList, kkint32 &_estSize, KKStr &_errorMessage, RunLog &_log)
KKStr & operator=(const char *src)
Definition: KKStr.cpp:1442
const FileDescPtr FileDesc() const
FeatureNumList const FeatureNumListConst
Supports the reading and writing of Feature data from a file format commonly used by many dataset&#39;s i...
KKStr operator+(const char *right) const
Definition: KKStr.cpp:3986
void GetLine(std::istream &_in, KKStr &_line, bool &_eof)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
KKStr & operator=(KKStr &&src)
Definition: KKStr.cpp:1369
bool operator!=(const char *rtStr) const
Definition: KKStr.cpp:1596
Container class for FeatureVector derived objects.
KKTHread * KKTHreadPtr
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
KKStr operator+(const char *left, const KKStr &right)
Definition: KKStr.cpp:3976
kkint32 NumOfFeatures() const
void TrimLeft(const char *whiteSpaceChars="\n\r\t ")
Definition: KKStr.cpp:1745
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
bool Empty() const
Definition: KKStr.h:241
KKStr SubStrPart(kkint32 firstChar, kkint32 lastChar) const
returns a SubString consisting of all characters starting at index &#39;firstChar&#39; and ending at &#39;lastInd...
Definition: KKStr.cpp:2802
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
AttributeType
Definition: Attribute.h:36
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
FileDesc * FileDescPtr
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
KKStr operator+(const KKStr &right) const
Definition: KKStr.cpp:3998
virtual FeatureVectorListPtr LoadFile(const KKStr &_fileName, const FileDescPtr _fileDesc, MLClassList &_classes, istream &_in, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_changesMade, KKStr &_errorMessage, RunLog &_log)
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
Definition: MLClass.h:49
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)
virtual MLClassPtr GetMLClassPtr(const KKStr &_name)
return pointer to instance with &#39;_name&#39;; if none exists, create one and add to list.
Definition: MLClass.cpp:861
const KKStr & ClassName() const
Name of class that this example is assigned to.
Maintains a list of MLClass instances.
Definition: MLClass.h:233
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
KKStr osGetRootName(const KKStr &fullFileName)
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163