KSquare Utilities
FeatureFileIOSparse.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 
3 #include <stdio.h>
4 #include <math.h>
5 #include <ctype.h>
6 #include <limits.h>
7 #include <time.h>
8 
9 #include <string>
10 #include <iostream>
11 #include <fstream>
12 #include <vector>
13 
14 #include "MemoryDebug.h"
15 
16 using namespace std;
17 
18 #include "KKBaseTypes.h"
19 #include "DateTime.h"
20 #include "OSservices.h"
21 #include "RunLog.h"
22 #include "KKStr.h"
23 using namespace KKB;
24 
26 #include "FileDesc.h"
27 #include "MLClass.h"
28 using namespace KKMLL;
29 
30 
31 
33 
34 
36  FeatureFileIO ("Sparse", true, true)
37 {
38 }
39 
40 
41 
43 {
44 }
45 
46 
47 
49  istream& _in,
50  MLClassListPtr _classes,
51  kkint32& _estSize,
52  KKStr& _errorMessage,
53  RunLog& _log
54  )
55 {
56  _log.Level (20) << "FeatureFileIOSparse::GetFileDesc FileName[" << _fileName << "]." << endl;
57  bool eof = false;
58  bool eol = true;
59 
60  _estSize = 0;
61 
62  kkint32 featureNumMin = int32_max;
63  kkint32 featureNumMax = int32_min;
64 
65  while (!eof)
66  {
67  KKStr className;
68  GetToken (_in, " \t", className, eof, eol);
69  if (eof)
70  break;
71 
72  if (eol)
73  {
74  //We have a blank line; we will ignore this line.
75  continue;
76  }
77 
78  if (className.SubStrPart (0, 1) == "//")
79  {
80  // We have a comment line. We will skip to end of line
81  while ((!eol) && (!eof))
82  GetToken (_in, " \t", className, eof, eol);
83  continue;
84  }
85 
86  MLClassPtr mlClass = _classes->GetMLClassPtr (className);
87 
88  mlClass = _classes->GetMLClassPtr (className);
89 
90  KKStr field;
91  GetToken (_in, " \t", field, eof, eol);
92  while (!eol)
93  {
94  KKStr featureNumStr = field.ExtractToken (":");
95  kkint32 featureNum = atoi (featureNumStr.Str ());
96 
97  if (featureNum > featureNumMax)
98  featureNumMax = featureNum;
99 
100  if (featureNum < featureNumMin)
101  featureNumMin = featureNum;
102  GetToken (_in, " \t", field, eof, eol);
103  }
104  _estSize++;
105  }
106 
107  FileDescPtr fileDesc = new FileDesc ();
108 
109  fileDesc->SparseMinFeatureNum (featureNumMin);
110 
111  for (kkint32 fieldNum = featureNumMin; fieldNum <= featureNumMax; fieldNum++)
112  {
113  bool alreadyExists = false;
114  fileDesc->AddAAttribute ("Field_" + StrFormatInt (fieldNum, "ZZZZ0"), AttributeType::Numeric, alreadyExists);
115  }
116 
117  return fileDesc;
118 } /* GetFileDesc */
119 
120 
121 
122 
123 
124 FeatureVectorListPtr FeatureFileIOSparse::LoadFile (const KKStr& _fileName,
125  const FileDescPtr _fileDesc,
126  MLClassList& _classes,
127  istream& _in,
128  kkint32 _maxCount, // Maximum # images to load.
129  VolConstBool& _cancelFlag,
130  bool& _changesMade,
131  KKStr& _errorMessage,
132  RunLog& _log
133  )
134 {
135  _log.Level (20) << "FeatureFileIOSparse::LoadFile FileName[" << _fileName << "]" << endl;
136 
137  bool eof = false;
138  bool eol = true;
139 
140  KKStr rootName = osGetRootName (_fileName);
141 
142  kkint32 numOfFeatures = _fileDesc->NumOfFields ();
143 
144  kkint32 lineCount = 0;
145 
146  kkint32 minFeatureNum = _fileDesc->SparseMinFeatureNum ();
147  kkint32 maxFeatureNum = minFeatureNum + numOfFeatures - 1;
148 
149  if (_maxCount < 1)
150  _maxCount = int32_max;
151 
152  FeatureVectorListPtr examples = new FeatureVectorList (_fileDesc, true);
153 
154  while ((!eof) && (!_cancelFlag) && ((kkint32)examples->size () < _maxCount))
155  {
156  KKStr className;
157 
158  GetToken (_in, " \t", className, eof, eol);
159  if (eof)
160  break;
161 
162  if (eol)
163  {
164  // We have a blank line;
165  continue;
166  }
167 
168  if (className.SubStrPart (0, 1) == "//")
169  {
170  // We have a coment line. We will skip to end of line
171  while ((!eol) && (!eof))
172  GetToken (_in, " \t", className, eof, eol);
173  continue;
174  }
175 
176  MLClassPtr mlClass = _classes.GetMLClassPtr (className);
177 
178  FeatureVectorPtr example = new FeatureVector (numOfFeatures);
179  example->MLClass (mlClass);
180 
181  KKStr exampleName = rootName + "_" + StrFormatInt (lineCount, "ZZZZZZ0");
182  example->ExampleFileName (exampleName);
183 
184  KKStr field = "";
185  GetToken (_in, " \t", field, eof, eol);
186  while ((!eol) && (!eof))
187  {
188  KKStr featureNumStr = field.ExtractToken (":");
189  kkint32 featureNum = atoi (featureNumStr.Str ());
190 
191  if ((featureNum < minFeatureNum) || (featureNum > maxFeatureNum))
192  {
193  _log << endl << endl
194  << "FeatureFileIOSparse::LoadFile FeatureNum[" << featureNumStr << "] out of range." << endl
195  << " FileName[" << _fileName << "] LineNum[" << lineCount << "]." << endl
196  << endl;
197  _errorMessage << "FeatureNum[" << featureNumStr << "] is out of range.";
198  delete example; example = NULL;
199  delete examples; examples = NULL;
200  return NULL;
201  }
202 
203  featureNum = featureNum - minFeatureNum;
204 
205  float value = (float)atof (field.Str ());
206  example->AddFeatureData (featureNum, value);
207  GetToken (_in, " \t", field, eof, eol);
208  }
209 
210  examples->PushOnBack (example);
211 
212  lineCount++;
213  }
214 
215  return examples;
216 } /* LoadFile */
217 
218 
219 
220 
222  const KKStr& _fileName,
223  FeatureNumListConst& _selFeatures,
224  ostream& _out,
225  kkuint32& _numExamplesWritten,
226  VolConstBool& _cancelFlag,
227  bool& _successful,
228  KKStr& _errorMessage,
229  RunLog& _log
230  )
231 {
232  _log.Level (20) << "FeatureFileIOSparse::SaveFile FileName[" << _fileName << "]." << endl;
233  FeatureVectorPtr example = NULL;
234  FileDescPtr fileDesc = _data.FileDesc ();
235 
236  _numExamplesWritten = 0;
237 
238  kkint32 idx;
239  kkint32 x;
240 
241  kkint32 minFeatureNum = fileDesc->SparseMinFeatureNum ();
242 
243  for (idx = 0; (idx < _data.QueueSize ()) && (!_cancelFlag); idx++)
244  {
245  example = _data.IdxToPtr (idx);
246 
247  _out << example->ClassName ();
248 
249  for (x = 0; x < _selFeatures.NumOfFeatures (); x++)
250  {
251  kkint32 featureNum = _selFeatures[x];
252  float value = example->FeatureData (featureNum);
253  if (value != (float)0.0)
254  _out << " " << (featureNum + minFeatureNum) << ":" << example->FeatureData (featureNum);
255  }
256  _out << endl;
257  _numExamplesWritten++;
258  }
259 
260  if (!_cancelFlag)
261  _successful = true;
262 
263  return;
264 } /* SaveFile */
MLClass * MLClassPtr
Definition: MLClass.h:46
void GetToken(std::istream &_in, const char *_delimiters, KKStr &_token, bool &_eof, bool &_eol)
Will retrieve the next token from the input stream.
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, istream &_in, MLClassListPtr _classList, kkint32 &_estSize, KKStr &_errorMessage, RunLog &_log)
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
float FeatureData(kkint32 featureNum) const
KKStr ExtractToken(const char *delStr="\n\t\r ")
Definition: KKStr.cpp:2969
const FileDescPtr FileDesc() const
kkint32 SparseMinFeatureNum() const
Definition: FileDesc.h:120
FeatureNumList const FeatureNumListConst
bool operator==(const char *rtStr) const
Definition: KKStr.cpp:1588
void SparseMinFeatureNum(kkint32 _sparseMinFeatureNum)
Definition: FileDesc.h:125
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector&#39;s.
Container class for FeatureVector derived objects.
virtual FeatureVectorListPtr LoadFile(const KKStr &_fileName, const FileDescPtr _fileDesc, MLClassList &_classes, istream &_in, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_changesMade, KKStr &_errorMessage, RunLog &_log)
KKTHread * KKTHreadPtr
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
KKStr operator+(const char *left, const KKStr &right)
Definition: KKStr.cpp:3976
kkint32 NumOfFeatures() const
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
KKStr SubStrPart(kkint32 firstChar, kkint32 lastChar) const
returns a SubString consisting of all characters starting at index &#39;firstChar&#39; and ending at &#39;lastInd...
Definition: KKStr.cpp:2802
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
AttributeType
Definition: Attribute.h:36
FileDesc * FileDescPtr
KKStr StrFormatInt(kkint32 val, const char *mask)
Definition: KKStr.cpp:5004
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
Definition: MLClass.h:49
virtual MLClassPtr GetMLClassPtr(const KKStr &_name)
return pointer to instance with &#39;_name&#39;; if none exists, create one and add to list.
Definition: MLClass.cpp:861
const KKStr & ClassName() const
Name of class that this example is assigned to.
Supports the reading and writing of Sparse feature files similar to the ones libSVM use...
Maintains a list of MLClass instances.
Definition: MLClass.h:233
#define int32_max
Definition: KKBaseTypes.h:119
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
KKStr osGetRootName(const KKStr &fullFileName)
#define int32_min
Definition: KKBaseTypes.h:120
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163