KSquare Utilities
FeatureFileIODstWeb.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <math.h>
4 #include <ctype.h>
5 #include <time.h>
6 #include <string>
7 #include <iostream>
8 #include <fstream>
9 #include <vector>
10 #include "MemoryDebug.h"
11 using namespace std;
12 
13 
14 #include "KKBaseTypes.h"
15 #include "DateTime.h"
16 #include "OSservices.h"
17 #include "RunLog.h"
18 #include "KKStr.h"
19 using namespace KKB;
20 
22 #include "FileDesc.h"
23 #include "MLClass.h"
24 using namespace KKMLL;
25 
26 
27 
29 
30 
31 // Used for DstWeb data set
33 {
34 public:
36  {
37  KKStr aStr = desc.ExtractToken (",\n\r\t");
38 
39  code = desc.ExtractToken (",\n\r\t");
42 
43  KKStr oneStr = desc.ExtractToken (",\n\r\t");
44 
45  title = desc.ExtractToken (",\n\r\t");
46  if (title.FirstChar () == '"')
48  if (title.LastChar () == '"')
50 
51  root = desc.ExtractToken ();
52  if (root.FirstChar () == '"')
54  if (root.LastChar () == '"')
56  }
57 
61 
62 }; /* AttrDescLine */
63 
64 
65 
66 
68 {
69 public:
71  {}
72 
73  bool operator() (AttrDescLinePtr p1,
74  AttrDescLinePtr p2
75  )
76  {
77  return ((p1->code) < (p2->code));
78  }
79 };
80 
81 
82 
83 
85  FeatureFileIO ("DST", true, false) // only read is implemented.
86 {
87 }
88 
89 
90 
92 {
93 }
94 
95 
96 
97 
98 
100  istream& _in,
101  MLClassListPtr _classes,
102  kkint32& _estSize,
103  KKStr& _errorMessage,
104  RunLog& _log
105  )
106 {
107  KKStr line (1024);
108  bool eof;
109  KKStr classNameAttribute;
110 
111  {
112  // Make sure that the True and False _classes exist.
113  MLClassPtr trueClass = _classes->GetMLClassPtr ("True");
114  MLClassPtr falseClass = _classes->GetMLClassPtr ("True");
115  trueClass = NULL;
116  falseClass = NULL;
117  }
118 
119  // We must first determine which Attribute line represents class.
120  // this is done with a line that is added at beginning of file
121  // added by user with text editor. Must have the format of
122  // class = xxxx where xxxx is the attribute.
123 
124  GetLine (_in, line, eof);
125  if (eof)
126  {
127  _log.Level (-1) << endl << endl
128  << "FeatureFileIODstWeb::GetFileDesc ***ERROR*** File is empty." << endl
129  << endl;
130  return NULL;
131  }
132 
133  {
134  line.TrimLeft ();
135  line.TrimRight ();
136 
137  kkint32 equalLoc = line.LocateCharacter ('=');
138  if (equalLoc < 0)
139  {
140  _log.Level (-1) << endl << endl
141  << "FeatureFileIODstWeb::GetFileDesc *** ERROR *** First Line is not Class Identifier." << endl
142  << endl;
143  return NULL;
144  }
145 
146  KKStr leftSide = line.SubStrPart (0, equalLoc - 1);
147  KKStr rightSide = line.SubStrPart (equalLoc + 1);
148 
149  leftSide.Upper ();
150  if (leftSide != "CLASS")
151  {
152  _log.Level (-1) << endl << endl
153  << "FeatureFileIODstWeb::GetFileDesc *** ERROR *** First Line is not Class Identifier." << endl
154  << endl;
155  return NULL;
156  }
157 
158  rightSide.TrimLeft ();
159  rightSide.TrimRight ();
160 
161  classNameAttribute = rightSide;
162  }
163 
164 
165  FileDescPtr fileDesc = new FileDesc ();
166 
167 
168  vector<AttrDescLinePtr> attributes;
169 
170 
171  GetLine (_in, line, eof);
172  while (!eof)
173  {
174  line.TrimLeft ();
175  line.TrimRight ();
176 
177  if (line.FirstChar () != 'A')
178  continue;
179 
180 
181  // We have an attribute Line
182  AttrDescLinePtr a = new AttrDescLine (line);
183  if (a->code == classNameAttribute)
184  {
185  delete a;
186  }
187  else
188  {
189  attributes.push_back (a);
190  }
191  GetLine (_in, line, eof);
192  }
193 
195  sort (attributes.begin (), attributes.end (), c);
196 
197  kkuint32 x;
198  for (x = 0; x < attributes.size (); x++)
199  {
200  bool alreadyExists = false;
201  fileDesc->AddAAttribute (attributes[x]->code, AttributeType::Nominal, alreadyExists);
202  if (alreadyExists)
203  {
204  _log.Level (-1) << endl
205  << endl
206  << "FeatureFileIODstWeb::GetFileDesc *** ERROR *** Attribute Code Occurs more than once" << endl
207  << " code [" << attributes[x]->code << "]." << endl
208  << endl;
209  // Can not delete an instance of a 'FileDesc' class once it has been created.
210  // delete fileDesc;
211  return NULL;
212  }
213 
214  fileDesc->AddANominalValue ("F", alreadyExists, _log);
215  fileDesc->AddANominalValue ("T", alreadyExists, _log);
216  }
217 
218  for (x = 0; x < attributes.size (); x++)
219  delete attributes[x];
220 
221  return fileDesc;
222 } /* ReadDstWebFile */
223 
224 
225 
226 
227 FeatureVectorListPtr FeatureFileIODstWeb::LoadFile (const KKStr& _fileName,
228  FileDescPtr _fileDesc,
229  MLClassList& _classes,
230  istream& _in,
231  kkint32 _maxCount, /**< Maximum # images to load. */
232  VolConstBool& _cancelFlag,
233  bool& _changesMade,
234  KKStr& _errorMessage,
235  RunLog& _log
236  )
237 {
238  _log.Level (20) << "FeatureFileIODstWeb::LoadFile FileName[" << _fileName << "]" << endl;
239 
240  MLClassPtr trueClass = _classes.GetMLClassPtr ("TRUE");
241  MLClassPtr falseClass = _classes.GetMLClassPtr ("FALSE");
242 
243  kkint32 lineCount = 0;
244 
245  kkint32 numOfFeatures = _fileDesc->NumOfFields ();
246 
247  KKStr fileRootName = osGetRootName (_fileName);
248 
249  const
250  AttributePtr* attributeTable = _fileDesc->CreateAAttributeTable (); // Caller will be responsible for deleting
251 
252  KKStr line;
253  bool eof = false;
254 
255  // Skip all leading lines, until we reach a C line.
256  GetLine (_in, line, eof);
257  while (!eof)
258  {
259  if (line.FirstChar () == 'C')
260  break;
261  GetLine (_in, line, eof);
262  }
263 
264  if (eof)
265  {
266  delete attributeTable;
267  _errorMessage = "no 'C' line detected.";
268  return NULL;
269  }
270 
271  FeatureVectorListPtr examples = new FeatureVectorList (_fileDesc, true);
272 
273  KKStr classNameAttributeUpper (_fileDesc->ClassNameAttribute ());
274  classNameAttributeUpper.Upper ();
275 
276  while (!eof)
277  {
278  // We have a new user
279 
280  KKStr cStr = line.ExtractToken (",\n\r\t");
281  KKStr idStr = line.ExtractToken (",\n\r\t");
282  if (idStr.FirstChar () == '"')
283  idStr = idStr.SubStrPart (1);
284  if (idStr.LastChar () == '"')
285  idStr.ChopLastChar ();
286 
287  FeatureVectorPtr example = new FeatureVector (numOfFeatures);
288  example->MLClass (falseClass);
289  example->ExampleFileName (idStr);
290  {
291  // Set all fields to False
292  kkint32 x;
293  for (x = 0; x < numOfFeatures; x++)
294  {
295  kkint32 code = attributeTable[x]->GetNominalCode ("F");
296  example->AddFeatureData (x, (float)code);
297  }
298  }
299 
300  GetLine (_in, line, eof); lineCount++;
301 
302  while ((!eof) && (line.FirstChar () == 'V'))
303  {
304  KKStr vStr = line.ExtractToken (",\n\r\t");
305  KKStr idStr = line.ExtractToken (",\n\r\t");
306  idStr.Upper ();
307  if (idStr == classNameAttributeUpper)
308  {
309  example->MLClass (trueClass);
310  }
311  else
312  {
313  kkint32 fieldNum = _fileDesc->GetFieldNumFromAttributeName (idStr);
314  if (fieldNum < 0)
315  {
316  _errorMessage << "Invalid Attribute[" << idStr + "] Line[" << lineCount << "]";
317  _log.Level (-1) << endl
318  << "FeatureFileIODstWeb::LoadFile ***ERROR***" << endl
319  << endl
320  << " " << _errorMessage << endl
321  << endl;
322  delete examples;
323  delete example;
324  return NULL;
325  }
326 
327  kkint32 code = attributeTable[fieldNum]->GetNominalCode ("T");
328  example->AddFeatureData (fieldNum, (float)code);
329  }
330 
331  GetLine (_in, line, eof); lineCount++;
332  }
333 
334  examples->PushOnBack (example);
335  }
336 
337  delete [] attributeTable;
338 
339  return examples;
340 } /* LoadFile */
341 
342 
343 
344 
345 
347  const KKStr& _fileName,
348  FeatureNumListConst& _selFeatures,
349  ostream& _out,
350  kkuint32& _numExamplesWritten,
351  VolConstBool& _cancelFlag,
352  bool& _successful,
353  KKStr& _errorMessage,
354  RunLog& _log
355  )
356 {
357  _log.Level (-1) << endl << endl
358  << "FeatureFileIODstWeb::SaveFile FileName[" << _fileName << "] ***ERROR***." << endl
359  << endl
360  << " SaveFile not implemented." << endl
361  << endl;
362 
363  _errorMessage = "FeatureFileIODstWeb::SaveFile Not Implemented.";
364  _successful = false;
365  _numExamplesWritten = 0;
366  return;
367 } /* SaveFile */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
void ExampleFileName(const KKStr &_exampleFileName)
Name of source of feature vector, ex: file name of image that the feature vector was computed from...
Definition: FeatureVector.h:75
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
MLClass * MLClassPtr
Definition: MLClass.h:46
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, istream &_in, MLClassListPtr _classList, kkint32 &_estSize, KKStr &_errorMessage, RunLog &_log)
KKStr & TrimRight(const char *whiteSpaceChars="\n\r\t ")
Definition: KKStr.cpp:1695
FeatureVector(kkint32 _numOfFeatures)
void ChopLastChar()
Definition: KKStr.cpp:1668
KKStr ExtractToken(const char *delStr="\n\t\r ")
Definition: KKStr.cpp:2969
kkint32 GetNominalCode(const KKStr &nominalValue) const
Definition: Attribute.cpp:185
KKStr & operator=(const char *src)
Definition: KKStr.cpp:1442
FeatureNumList const FeatureNumListConst
char FirstChar() const
Definition: KKStr.cpp:1970
KKStr operator+(const char *right) const
Definition: KKStr.cpp:3986
void GetLine(std::istream &_in, KKStr &_line, bool &_eof)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
void AddFeatureData(kkint32 _featureNum, float _featureData)
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
KKStr & operator=(KKStr &&src)
Definition: KKStr.cpp:1369
bool operator!=(const char *rtStr) const
Definition: KKStr.cpp:1596
Container class for FeatureVector derived objects.
Attribute * AttributePtr
Definition: Attribute.h:156
char LastChar() const
Definition: KKStr.cpp:2007
KKTHread * KKTHreadPtr
KKStr(const KKStr &str)
Copy Constructor.
Definition: KKStr.cpp:561
void TrimLeft(const char *whiteSpaceChars="\n\r\t ")
Definition: KKStr.cpp:1745
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
bool operator()(AttrDescLinePtr p1, AttrDescLinePtr p2)
KKStr SubStrPart(kkint32 firstChar, kkint32 lastChar) const
returns a SubString consisting of all characters starting at index &#39;firstChar&#39; and ending at &#39;lastInd...
Definition: KKStr.cpp:2802
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)
const KKStr & ClassNameAttribute() const
Definition: FileDesc.h:118
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
void Upper()
Converts all characters in string to their Upper case equivalents via &#39;toupper&#39;.
Definition: KKStr.cpp:2461
kkint32 LocateCharacter(char ch) const
Returns index of 1st occurrence of &#39;ch&#39; otherwise -1.
Definition: KKStr.cpp:2021
void AddANominalValue(const KKStr &nominalValue, bool &alreadyExist, RunLog &log)
Definition: FileDesc.cpp:242
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
Definition: FeatureVector.h:74
FileDesc * FileDescPtr
bool operator==(const KKStr &right) const
Definition: KKStr.cpp:1550
const KKMLL::AttributePtr * CreateAAttributeTable() const
Definition: FileDesc.cpp:408
kkint32 GetFieldNumFromAttributeName(const KKStr &attributeName) const
Definition: FileDesc.cpp:690
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
bool operator<(const KKStr &right) const
Definition: KKStr.cpp:1635
MLClassList * MLClassListPtr
Definition: MLClass.h:49
virtual MLClassPtr GetMLClassPtr(const KKStr &_name)
return pointer to instance with &#39;_name&#39;; if none exists, create one and add to list.
Definition: MLClass.cpp:861
Maintains a list of MLClass instances.
Definition: MLClass.h:233
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
KKStr SubStrPart(kkint32 firstChar) const
returns a SubString consisting of all characters starting at index &#39;firstChar&#39; until the end of the s...
Definition: KKStr.cpp:2780
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
KKStr osGetRootName(const KKStr &fullFileName)
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163