KSquare Utilities
FeatureFileIO.h
Go to the documentation of this file.
1 #ifndef _FEATUREFILEIO_
2 #define _FEATUREFILEIO_
3 
4 
5 /**
6  *@class KKMLL::FeatureFileIO
7  *@brief Base class for all FeatureFileIO classes.
8  *@details This is a abstract class. For each type of FeatureFile you will need to implement
9  * a separate class derived from this class that supports the specific file format.
10  * You only need to implement the pure virtual functions.
11  *
12  * If you create a new FeatureFileIO class you will need to modify 'RegisterAllDrivers'
13  * method in FeatureFileIO.cpp.
14  */
15 
16 #include "FeatureNumList.h"
17 #include "FeatureVector.h"
18 #include "GoalKeeper.h"
19 #include "MLClass.h"
20 #include "OSservices.h"
21 #include "RunLog.h"
22 #include "KKStr.h"
23 
24 namespace KKMLL
25 {
26  class FeatureFileIO;
28 
29 
30  #if !defined(_FeatureVector_Defined_)
31  class FeatureVector;
33  #endif
34 
35 
36  #if !defined(_FeatureVectorList_Defined_)
37  class FeatureVectorList;
39  #endif
40 
41 
42  #if !defined(_FactoryFVProducer_Defined_)
43  class FactoryFVProducer;
45  #endif
46 
47 
49  {
50  public:
52 
53  FeatureFileIO (const KKStr& _driverName,
54  bool _canRead,
55  bool _canWrite
56  );
57 
58  virtual ~FeatureFileIO ();
59 
60  bool CanRead () {return canRead;}
61  bool CanWrite () {return canWrite;}
62 
63 
64  void AppendToFile (const KKStr& _fileName,
65  FeatureNumListConst& _selFeatures,
66  FeatureVectorList& _examples,
67  kkuint32& _numExamplesWritten,
68  VolConstBool& _cancelFlag,
69  bool& _successful,
70  RunLog& log
71  );
72 
73 
74  /**
75  *@brief Loads the contents of a feature data file and returns a ImageFeaturesList container object.
76  *@param[in] _fileName Feature file that is being synchronized.
77  *@param[in,out] _mlClasses All classes encountered during the loading of the feature file will be added to this list.
78  *@param[in] _maxCount Maximum number of examples to load, -1 = load all
79  *@param[in] _cancelFlag If this flag turns true the load will terminate and return to caller.
80  *@param[out] _successful False will be returned if load failed.
81  *@param[out] _changesMade If the routine had loaded the feature data determined that it needed to make
82  * changes this flag will be set 'true'.
83  *@param[in] _log Where to send diagnostic messages to.
84  *@return A ImageFeaturesList container object; this object will own all the examples loaded.
85  */
86  virtual
87  FeatureVectorListPtr LoadFeatureFile (const KKStr& _fileName,
88  MLClassList& _mlClasses,
89  kkint32 _maxCount,
90  VolConstBool& _cancelFlag, // will be monitored, if set to True Load will terminate.
91  bool& _successful,
92  bool& _changesMade,
93  RunLog& _log
94  );
95 
96 
97  /** SaveFeatureFile
98  *@brief Save examples to 'fileName'
99  *@param[in] _fileName Name of file top same examples/images to.
100  *@param[in] _selFeatures Specify specific features to save, typically all features.
101  *@param[in] _examples Examples that are to be saved.
102  *@param[in] _numExamplesWritten Will reflect the number of examples written, caller will be able to monitor.
103  *@param[in] _cancelFlag If this flag turns true the writing of data will terminate and return to caller.
104  *@param[out] _successful False will be returned if the save failed.
105  *@param[in] _log log file to send messages to.
106  */
107  virtual
108  void SaveFeatureFile (const KKStr& _fileName,
109  FeatureNumListConst& _selFeatures,
110  FeatureVectorList& _examples,
111  kkuint32& _numExamplesWritten, /**< caller will be able to monitor this variable. */
112  VolConstBool& _cancelFlag,
113  bool& _successful,
114  RunLog& _log
115  );
116 
117 
118  /**
119  *@brief Saves the feature file in multiple parts with no one single part larger that 64k examples.
120  *@details Same as 'SaveFeatureFile', if more than 64k examples will break into multiple files.
121  * If there are more than 64k examples, will save all images into 'fileName', but also
122  * a second copy of them into files with same name plus seq number with max of 64k samples
123  * in each one.
124  *
125  *@param[in] _fileName Name of file top same examples/images to.
126  *@param[in] _selFeatures Specify specific features to save, typically all features.
127  *@param[in] _examples Examples that are to be saved.
128  *@param[in] _cancelFlag If this flag turns true the writing of data will terminate and return to caller.
129  *@param[out] _successful False will be returned if the save failed.
130  *@param[in] _log log file to send messages to.
131  */
132  void SaveFeatureFileMultipleParts (const KKStr& _fileName,
133  FeatureNumListConst& _selFeatures,
134  FeatureVectorList& _examples,
135  VolConstBool& _cancelFlag,
136  bool& _successful,
137  RunLog& _log
138  );
139 
140 
141 
142  /** FeatureDataReSink
143  *@brief Synchronizes the contents of a feature data file with a directory of images.
144  *@details Used with applications to verify that feature file is up-to-date.
145  * Was specifically meant to work with training libraries, to account for
146  * images being added and deleted from training library. If there are no
147  * changes, then function will run very quickly.
148  *@param[in] _fvProducerFactory Factory that specifies the FeatureVector's we want to produce.
149  *@param[in] _dirName, Directory where source images are located.
150  *@param[in] _fileName, Feature file that is being synchronized.
151  *@param[in] _unknownClass, Class to be used when class is unknown
152  *@param[in] _useDirectoryNameForClassName, if true then class name of each entry will be set to directory name.
153  *@param[in] _mlClasses, list of classes
154  *@param[in] _cancelFlag Will be monitored; if it goes to 'true' will exit as soon as possible.
155  *@param[out] _changesMade, If returns as true then there were changes made to the
156  * feature file 'fileName'. If set to false, then no changes were made.
157  *@param[out] _timeStamp of feature file.
158  *@param[in] _log where to send diagnostic messages to.
159  *@returns A FeatureVectorList derived instance ; This object will own all the examples loaded
160  *
161  * A change in feature file version number would also cause all entries in the feature
162  * file to be recomputed. The feature file version number gets incremented whenever we change
163  * the feature file computation routine.
164  */
165  virtual
167  const KKStr& _dirName,
168  const KKStr& _fileName,
169  MLClassPtr _unknownClass,
170  bool _useDirectoryNameForClassName,
171  MLClassList& _mlClasses,
172  VolConstBool& _cancelFlag,
173  bool& _changesMade,
174  KKB::DateTime& _timeStamp,
175  RunLog& _log
176  );
177 
178 
179 
180  /** LoadInSubDirectoryTree
181  *@brief Creates a feature vector list of all images located in the specified sub-directory tree.
182  *@details Meant to work with images, it starts at a specified sub-directory and
183  * processes all sub-directories. It makes use of FeatureDataReSink for each specific
184  * sub-directory. Will make use of FeatureData files that already exist in any of the
185  * sub-directories.
186  *@param[in] _fvProducerFactory Factory that specifies the FeatureVector's we want to produce.
187  *@param[in] _rootDir Starting directory.
188  *@param[in,out] _mlClasses, List of classes, any new classes in fileName will be added.
189  *@param[in] _useDirectoryNameForClassName, if true set class names to sub-directory name.
190  * This happens because the user may manually move images between directories using
191  * the sub-directory name as the class name.
192  *@param[in] _cancelFlag If turns to 'true' method is to exit asap.
193  *@param[in] _rewiteRootFeatureFile, If true rewrite the feature file in the specified 'rootDir'. This
194  * feature file will contain all entries from all sub-directories below it.
195  *@param[in] _log, where to send diagnostic messages to.
196  *@returns - A PostLarvaeFVList container object. This object will own all the examples loaded.
197  */
199  KKStr _rootDir,
200  MLClassList& _mlClasses,
201  bool _useDirectoryNameForClassName,
202  VolConstBool& _cancelFlag, /**< will be monitored, if set to True Load will terminate. */
203  bool _rewiteRootFeatureFile,
204  RunLog& _log
205  );
206 
207 
208 
209  //***************************************************************************
210  //* The following routines need to be implemented by derived classes. *
211  //***************************************************************************
212 
213  /**
214  *@brief Create a FileDesc object from the input stream '_in'.
215  *@details All derived classes must implement this method. It is called by 'LoadFeatureFile'
216  * before it starts reading in the feature data.
217  *@param[in] _fileName Name of file to read top get FileDesc data from. Ex in c45 this would be the names file.
218  *@param[in] _in Input Stream t read from.
219  *@param[out] _classes Must be pointing to a valid MLClassList object. As class names are encountered add them to this list.
220  *@param[out] _estSize If you can drive the number of examples in the feature file populate this parameter.
221  *@param[out] _errorMessage If a error in processing occurs; place a description of the error in this parameter.
222  *@param _log
223  */
224  virtual FileDescPtr GetFileDesc (const KKStr& _fileName,
225  std::istream& _in,
226  MLClassListPtr _classes,
227  kkint32& _estSize,
228  KKStr& _errorMessage,
229  RunLog& _log
230  ) = 0;
231 
232 
233 
234  /**
235  *@brief To be implemented by derived classes; loads the contents of a feature data file and returns a ImageFeaturesList container object.
236  *@param[in] _fileName Feature file that is being loaded.
237  *@param[in] _fileDesc Description of feature data that is to be loaded.
238  *@param[in,out] _classes All classes encountered during the loading of the feature file will be added to this list.
239  *@param[in] _in input stream that feature data is to be loaded/read from.
240  *@param[in] _maxCount Maximum number of examples to load, -1 = load all
241  *@param[in] _cancelFlag If this flag turns true the load will terminate and return to caller.
242  *@param[out] _changesMade If the routine had loaded the feature data determined that it needed to make
243  * changes this flag will be set 'true'.
244  *@param[out] _errorMessage If an error occurs during the loading a description of this error will be placed here.
245  *@param[in] _log Where to send diagnostic messages to.
246  *@return A ImageFeaturesList container object; this object will own all the examples loaded; if an error occurs NULL will be returned.
247  */
248  virtual FeatureVectorListPtr LoadFile (const KKStr& _fileName,
249  const FileDescPtr _fileDesc,
250  MLClassList& _classes,
251  std::istream& _in,
252  kkint32 _maxCount, /**< Maximum # images to load. */
253  VolConstBool& _cancelFlag,
254  bool& _changesMade,
255  KKStr& _errorMessage,
256  RunLog& _log
257  ) = 0;
258 
259 
260  /**
261  *@brief To be implemented by derived classes; save examples to output stream '_out'.
262  *@param[in] _data Examples that are to be written to saved to the output stream.
263  *@param[in] _fileName Name of file top same examples/images to.
264  *@param[in] _selFeatures Specify specific features to save, typically all features.
265  *@param[out] _out Output stream to save feature data to.
266  *@param[out] _numExamplesWritten Will reflect the number examples written, caller will be able to monitor.
267  *@param[in] _cancelFlag If this flag turns true the writing of data will terminate and return to caller.
268  *@param[out] _successful False will be returned if the save failed.
269  *@param[out] _errorMessage If the save fails (_successful == false) then a description of the error will be placed here.
270  *@param[in] _log log file to send messages to.
271  */
272  virtual void SaveFile (FeatureVectorList& _data,
273  const KKStr& _fileName,
274  FeatureNumListConst& _selFeatures,
275  std::ostream& _out,
276  kkuint32& _numExamplesWritten,
277  VolConstBool& _cancelFlag,
278  bool& _successful,
279  KKStr& _errorMessage,
280  RunLog& _log
281  ) = 0;
282 
283 
284  const KKStr& DriverName () {return driverName;}
285 
286 
287 
288  static FeatureFileIOPtr FileFormatFromStr (const KKStr& _fileFormatStr);
289 
290  static FeatureFileIOPtr FileFormatFromStr (const KKStr& _fileFormatStr,
291  bool _canRead,
292  bool _canWrite
293  );
294 
296 
298 
300 
301  static VectorKKStr RegisteredDriverNames (bool canRead,
302  bool canWrite
303  );
304 
305  static void FinalCleanUp ();
306 
307  /**
308  *@brief For each feature file format register the appropriate driver through this static method.
309  *@details You will be giving ownership of the driver to this class; it will call the destructor
310  *when the application shutdown.
311  */
312  static void RegisterFeatureFileIODriver (FeatureFileIOPtr _driver);
313 
314 
315  protected:
316  /**
317  *@brief Will retrieve the next token from the input stream.
318  *@details Leading and trailing blank characters will be skipped. A token will be separated
319  * by any character in '_delimiters' or 'EndOfLine', or 'EndOfFile'. If a 'EndOfLine'
320  * or 'EndOfFile' occur while reading in a token the respective flags '_eol' and
321  * '_eof' will be set to false but the following call to this function will set the
322  * respective flag to true and return a empty token.
323  *
324  *@param[in] _in Stream to read from,
325  *@param[in] _delimiters List of valid delimiter characters.
326  *@param[out] _token token extracted from '_in'. If either '_eof' or '_eol'
327  * are set to true; then token will be empty.
328  *@param[out] _eof Set true if at end of file;
329  *@param[out] _eol Set true if at end of line.
330  */
331  void GetToken (std::istream& _in,
332  const char* _delimiters,
333  KKStr& _token,
334  bool& _eof,
335  bool& _eol
336  );
337 
338 
339  void GetLine (std::istream& _in,
340  KKStr& _line,
341  bool& _eof
342  );
343 
344 protected:
345  static void RegisterDriver (FeatureFileIOPtr driver);
346 
347 
348  private:
349  bool canRead;
350  bool canWrite;
351  KKStr driverName;
352  KKStr driverNameLower;
353 
354  static void RegisterAllDrivers ();
355  static GoalKeeperPtr featureFileIOGoalKeeper;
356 
357 
358  static std::vector<FeatureFileIOPtr>* registeredDrivers;
359 
360  static std::vector<FeatureFileIOPtr>* RegisteredDrivers ();
361 
362  static FeatureFileIOPtr LookUpDriver (const KKStr& _driverName);
363  }; /* FeatureFileIO */
364 
365 
366 
368 
369 #define _FeatureFileIO_Defined_
370 
371 
372 } /* namespace KKMLL */
373 
374 
375 
376 #endif
void GetToken(std::istream &_in, const char *_delimiters, KKStr &_token, bool &_eof, bool &_eol)
Will retrieve the next token from the input stream.
__int32 kkint32
Definition: KKBaseTypes.h:88
FeatureVector * FeatureVectorPtr
Definition: Model.h:44
FeatureNumList const FeatureNumListConst
virtual FileDescPtr GetFileDesc(const KKStr &_fileName, std::istream &_in, MLClassListPtr _classes, kkint32 &_estSize, KKStr &_errorMessage, RunLog &_log)=0
Create a FileDesc object from the input stream &#39;_in&#39;.
void GetLine(std::istream &_in, KKStr &_line, bool &_eof)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
FeatureVectorListPtr LoadInSubDirectoryTree(FactoryFVProducerPtr _fvProducerFactory, KKStr _rootDir, MLClassList &_mlClasses, bool _useDirectoryNameForClassName, VolConstBool &_cancelFlag, bool _rewiteRootFeatureFile, RunLog &_log)
Creates a feature vector list of all images located in the specified sub-directory tree...
virtual FeatureVectorListPtr LoadFeatureFile(const KKStr &_fileName, MLClassList &_mlClasses, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_successful, bool &_changesMade, RunLog &_log)
Loads the contents of a feature data file and returns a ImageFeaturesList container object...
void SaveFeatureFileMultipleParts(const KKStr &_fileName, FeatureNumListConst &_selFeatures, FeatureVectorList &_examples, VolConstBool &_cancelFlag, bool &_successful, RunLog &_log)
Saves the feature file in multiple parts with no one single part larger that 64k examples.
static void FinalCleanUp()
Before you terminate your application and after all FeatureFileIO activity is done call this method t...
Container class for FeatureVector derived objects.
KKTHread * KKTHreadPtr
FeatureFileIO * FeatureFileIOPtr
Definition: FileDesc.h:45
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
virtual void SaveFeatureFile(const KKStr &_fileName, FeatureNumListConst &_selFeatures, FeatureVectorList &_examples, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, RunLog &_log)
Save examples to &#39;fileName&#39;.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
static VectorKKStr RegisteredDriverNames(bool canRead, bool canWrite)
FileDesc * FileDescPtr
virtual FeatureVectorListPtr LoadFile(const KKStr &_fileName, const FileDescPtr _fileDesc, MLClassList &_classes, std::istream &_in, kkint32 _maxCount, VolConstBool &_cancelFlag, bool &_changesMade, KKStr &_errorMessage, RunLog &_log)=0
To be implemented by derived classes; loads the contents of a feature data file and returns a ImageFe...
static void RegisterFeatureFileIODriver(FeatureFileIOPtr _driver)
For each feature file format register the appropriate driver through this static method.
static FeatureFileIOPtr FileFormatFromStr(const KKStr &_fileFormatStr)
static KKStr FileFormatsWrittenOptionsStr()
virtual void SaveFile(FeatureVectorList &_data, const KKStr &_fileName, FeatureNumListConst &_selFeatures, std::ostream &_out, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, KKStr &_errorMessage, RunLog &_log)=0
To be implemented by derived classes; save examples to output stream &#39;_out&#39;.
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
static KKStr FileFormatsReadOptionsStr()
FeatureFileIO * FeatureFileIOPtr
Definition: FeatureFileIO.h:51
Responsible for creating a FeatureFectorProducer instance.
Maintains a list of MLClass instances.
Definition: MLClass.h:233
static KKStr FileFormatsReadAndWriteOptionsStr()
FeatureVectorList * FeatureVectorListPtr
Definition: Model.h:46
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
virtual FeatureVectorListPtr FeatureDataReSink(FactoryFVProducerPtr _fvProducerFactory, const KKStr &_dirName, const KKStr &_fileName, MLClassPtr _unknownClass, bool _useDirectoryNameForClassName, MLClassList &_mlClasses, VolConstBool &_cancelFlag, bool &_changesMade, KKB::DateTime &_timeStamp, RunLog &_log)
Synchronizes the contents of a feature data file with a directory of images.
static void RegisterDriver(FeatureFileIOPtr driver)
void AppendToFile(const KKStr &_fileName, FeatureNumListConst &_selFeatures, FeatureVectorList &_examples, kkuint32 &_numExamplesWritten, VolConstBool &_cancelFlag, bool &_successful, RunLog &log)
const KKStr & DriverName()
FactoryFVProducer * FactoryFVProducerPtr
Definition: Model.h:75
FeatureFileIO(const KKStr &_driverName, bool _canRead, bool _canWrite)
static FeatureFileIOPtr FileFormatFromStr(const KKStr &_fileFormatStr, bool _canRead, bool _canWrite)
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163