KSquare Utilities
FeatureFileIOColumn.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <math.h>
4 #include <ctype.h>
5 #include <time.h>
6 #include <string>
7 #include <iostream>
8 #include <fstream>
9 #include <vector>
10 #include "MemoryDebug.h"
11 using namespace std;
12 
13 
14 #include "KKBaseTypes.h"
15 #include "DateTime.h"
16 #include "OSservices.h"
17 #include "RunLog.h"
18 #include "KKStr.h"
19 using namespace KKB;
20 
21 
23 #include "FileDesc.h"
24 #include "MLClass.h"
25 using namespace KKMLL;
26 
27 
28 
30 
31 
32 
34  FeatureFileIO ("Column", true, true)
35 {
36 }
37 
38 
39 
41 {
42 }
43 
44 
45 
47  istream& _in,
48  MLClassListPtr _classes,
49  kkint32& _estSize,
50  KKStr& _errorMessage,
51  RunLog& _log
52  )
53 {
54  _log.Level (20) << "FeatureFileIOColumn::GetFileDesc FileName[" << _fileName << "]" << endl;
55 
56  bool alreadyExists;
57  bool eof = false;
58  bool eol = false;
59 
60  kkint32 rowNum = 0;
61 
62  _estSize = 0;
63 
64  {
65  // Read first Row to get number of examples.
66  // First row specifies the Class for the corresponding Column.
67  KKStr field;
68  GetToken (_in, " ", field, eof, eol); rowNum++;
69  while ((!eol) && (!eof))
70  {
71  MLClassPtr mlClass = _classes->GetMLClassPtr (field);
72  _estSize++;
73  GetToken (_in, " ", field, eof, eol);
74  }
75  }
76 
77  FileDescPtr fileDesc = new FileDesc ();
78 
79 
80  kkint32 numOfFeatures = 0;
81  while (!eof)
82  {
83  // Read rest of file to get number of features. There will be on row per attribute.
84  KKStr field;
85  GetToken (_in, " ", field, eof, eol);
86  if (!eof)
87  {
88  rowNum++;
89  kkint32 numOfExamples = 0;
90 
91  while ((!eol) && (!eof))
92  {
93  numOfExamples++;
94  GetToken (_in, " ", field, eof, eol);
95  }
96 
97  if (numOfExamples != _estSize)
98  {
99  // We have a bad row of data
100  _errorMessage << "Row[" << rowNum << "] missing columns; Expected[" << _estSize << "] found[" << numOfExamples << "]";
101  _log.Level (-1) << endl << endl << endl
102  << "ReadColumnFile *** Invalid Row in File[" << _fileName << "]" << endl
103  << " Row [" << rowNum << "]" << endl
104  << " Num Examples[" << numOfExamples << "]" << endl
105  << " Not Match Header Row[" << _estSize << "]" << endl
106  << endl;
107 
108  // Can not delete an instance of a 'FileDesc' class once it has been created.
109  // delete fileDesc;
110 
111  return NULL;
112  }
113 
114 
115  fileDesc->AddAAttribute ("Field_" + StrFormatInt (numOfFeatures, "ZZZZ0"), AttributeType::Numeric, alreadyExists);
116  numOfFeatures++;
117  }
118  }
119 
120  return fileDesc;
121 } /* GetFileDesc */
122 
123 
124 
125 
126 
127 FeatureVectorListPtr FeatureFileIOColumn::LoadFile (const KKStr& _fileName,
128  const FileDescPtr _fileDesc,
129  MLClassList& _classes,
130  istream& _in,
131  kkint32 _maxCount, // Maximum # images to load.
132  VolConstBool& _cancelFlag,
133  bool& _changesMade,
134  KKStr& _errorMessage,
135  RunLog& _log
136  )
137 {
138  _log.Level (20) << "FeatureFileIOColumn::LoadFile FileName[" << _fileName << "]" << endl;
139 
140  KKStr rootName = osGetRootName (_fileName);
141 
142  bool eof = false;
143  bool eol = false;
144  MLClassPtr exampleClass = NULL;
145  KKStr field;
146  kkint32 lineCount = 0;
147  kkint32 numOfFeatures = _fileDesc->NumOfFields ();
148 
149  // Each row will represent a specific feature
150  // Will initially create empty examples, then populate
151  // each feature value as row's are read in.
152 
153  FeatureVectorListPtr examples = new FeatureVectorList (_fileDesc, true);
154 
155  {
156  // Read first row to get count of number of examples,
157 
158  lineCount = 0;
159 
160  GetToken (_in, " ", field, eof, eol);
161  while ((!eof) && (!eol))
162  {
163  exampleClass = _classes.GetMLClassPtr (field);
164  FeatureVectorPtr example = new FeatureVector (numOfFeatures);
165  example->MLClass (exampleClass);
166  example->ExampleFileName (rootName + "_" + StrFormatInt (lineCount, "00000"));
167  examples->PushOnBack (example);
168  lineCount++;
169  GetToken (_in, " ", field, eof, eol);
170  }
171  }
172 
173  kkint32 featureNum = 0;
174 
175  while (!eof)
176  {
177  // Process Next Row
178  GetToken (_in, " ", field, eof, eol);
179  if (eof || eol)
180  continue;
181 
182  if (featureNum >= numOfFeatures)
183  {
184  // We have more rows than there are supposed to be features. Something
185  // has gone VERY WRONG. Will abort load.
186  _errorMessage << "Line more rows that there are supposed to be features.";
187  _log.Level (-1) << endl << endl << endl
188  << "FeatureFileIOColumn::LoadFile ***ERROR***" << endl
189  << endl
190  << " FileName [" << _fileName << "]" << endl
191  << " Line Num [" << featureNum << "]" << endl
192  << " Error [" << "Number of rows exceed number of features." << "]" << endl
193  << endl;
194  delete examples;
195  return NULL;
196  }
197 
198 
199  kkint32 lineNum = 0;
200  while ((!eof) && (!eol))
201  {
202  if (lineNum >= lineCount)
203  {
204  // We have more columns in this line than we have examples.
205  // This is not a very good situation. We will need to flag
206  // as error and abort loading.
207  _errorMessage << "Line[" << featureNum << "] More columns than there are supposed to be examples.";
208  _log.Level (-1) << endl << endl << endl
209  << "FeatureFileIOColumn::LoadFile ***ERROR***" << endl
210  << endl
211  << " FileName [" << _fileName << "]" << endl
212  << " Line Num [" << lineNum << "]" << endl
213  << " Error [" << "Number of columns exceeds number of examples defined in first row." << "]" << endl
214  << endl;
215  delete examples;
216  return NULL;
217  }
218 
219  FeatureVectorPtr example = examples->IdxToPtr (lineNum);
220  example->AddFeatureData (featureNum, float (atof (field.Str ())));
221 
222  lineNum++;
223  GetToken (_in, " ", field, eof, eol);
224  }
225 
226  featureNum++;
227  }
228 
229 
230  return examples;
231 } /* LoadFile */
232 
233 
234 
235 
236 
238  const KKStr& _fileName,
239  FeatureNumListConst& _selFeatures,
240  ostream& _out,
241  kkuint32& _numExamplesWritten,
242  VolConstBool& _cancelFlag,
243  bool& _successful,
244  KKStr& _errorMessage,
245  RunLog& _log
246  )
247 {
248  _log.Level (20) << "FeatureFileIOColumn::SaveFile FileName[" << _fileName << "]." << endl;
249  kkint32 p = (kkint32)_out.precision ();
250  _out.precision (9);
251 
252  FileDescPtr fileDesc = _data.FileDesc ();
253 
254  {
255  // Lets first write out the ClassName row
256 
257  kkint32 lineNum = 0;
258 
259  while (lineNum < _data.QueueSize ())
260  {
261  if (lineNum > 0)
262  _out << " ";
263  _out << _data[lineNum].MLClassName ();
264  }
265  _out << endl;
266  }
267 
268  kkuint16 idx = 0;
269  for (idx = 0; idx < _selFeatures.NumSelFeatures (); ++idx)
270  {
271  kkuint16 featureNum = _selFeatures[idx];
272  {
273  FeatureVectorList::const_iterator idx2 = _data.begin ();
274  ++idx2;
275  _out << (*idx2)->FeatureData (featureNum);
276  while (idx2 != _data.end ())
277  {
278  _out << "\t" << (*idx2)->FeatureData (featureNum);
279  ++idx2;
280  }
281  }
282  _out << endl;
283 
284  _numExamplesWritten = (kkuint32)((double)(_data.QueueSize ()) * ((double)featureNum / (double)(fileDesc->NumOfFields ())));
285  }
286 
287  _out.precision (p);
288 
289  _successful = true;
290  return;
291 } /* SaveFile */
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
Definition: FeatureVector.h:75
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
MLClass * MLClassPtr
Definition: MLClass.h:46
void GetToken(std::istream &_in, const char *_delimiters, KKStr &_token, bool &_eof, bool &_eol)
Will retrieve the next token from the input stream.
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
FeatureVector(kkint32 _numOfFeatures)
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
const FileDescPtr FileDesc() const
FeatureNumList const FeatureNumListConst
KKStr operator+(const char *right) const
Definition: KKStr.cpp:3986
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
Container class for FeatureVector derived objects.
KKTHread * KKTHreadPtr
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
KKStr operator+(const char *left, const KKStr &right)
Definition: KKStr.cpp:3976
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
virtual FeatureVectorListPtr LoadFile(const KKStr &_fileName, const FileDescPtr _fileDesc, MLClassList &_classes, istream &_in, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_changesMade, KKStr &_errorMessage, RunLog &_log)
AttributeType
Definition: Attribute.h:36
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
Supports a simple Feature File format where each column represents a example and each row a feature v...
FileDesc * FileDescPtr
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
KKStr operator+(const KKStr &right) const
Definition: KKStr.cpp:3998
kkint32 NumSelFeatures() const
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, istream &_in, MLClassListPtr _classList, kkint32 &_estSize, KKStr &_errorMessage, RunLog &_log)
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
Definition: MLClass.h:49
virtual MLClassPtr GetMLClassPtr(const KKStr &_name)
return pointer to instance with &#39;_name&#39;; if none exists, create one and add to list.
Definition: MLClass.cpp:861
Maintains a list of MLClass instances.
Definition: MLClass.h:233
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
KKStr osGetRootName(const KKStr &fullFileName)
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163