KSquare Utilities
FileDesc.h
Go to the documentation of this file.
1 #if !defined(_FILEDESC_)
2 #define _FILEDESC_
3 
4 /**
5  @file FileDesc.h
6  @author Kurt Kramer
7  @details
8  @code
9  *************************************************************************************
10  ** Describes the different fields of a dataset. There will be one instance of this *
11  ** class for each type of Dataset that you have in an application. From this *
12  ** object you can get information such as number of attributes, Attribute types, *
13  ** weather they are nominal, continuous. If nominal what are the accepted *
14  ** values. *
15  ** *
16  ** Never delete an instance of a FileDesc object. *
17  ** *
18  ** Only one FileDesc object can exist for any Dataset. Example the Forest Cover *
19  ** dataset. You can split the data into many files and manage them separately but *
20  ** you will only have one instance of a FileDesc object that they will all refer *
21  ** to. See "GetExistingFileDesc" method below. You would initially create an *
22  ** instance of FileDesc and then use "GetExistingFileDesc" to make sure that it is *
23  ** unique. This typically happens in the FeatureFileIO derived classes. *
24  ** *
25  *************************************************************************************
26  @endcode
27  */
28 
29 #include "GoalKeeper.h"
30 #include "KKStr.h"
31 #include "RunLog.h"
32 #include "XmlStream.h"
33 
34 #include "Attribute.h"
35 #include "MLClass.h"
36 
37 
38 
39 namespace KKMLL
40 {
41  class FileDescList;
43 
44  #if !defined(_FeatureFileIO_Defined_)
47  #endif
48 
49 
50  /**
51  *@class FileDesc
52  *@brief Provides a detailed description of the attributes of a dataset.
53  *@author Kurt Kramer
54  *@details
55  * Describes the different fields of a dataset. There will be one instance of this
56  * class for each type of Dataset that you have in an application. From this
57  * object you can get information such as number of attributes, Attribute types,
58  * weather they are nominal, continuous. If nominal what are the accepted values. *
59  *
60  * Never delete an instance of a FileDesc object.
61  *
62  * Only one FileDesc object can exist for any Dataset. Example the Forest-Cover
63  * dataset. You can split the data into many files and manage them separately but
64  * you will only have one instance of a FileDesc object that they will all refer
65  * to. See "GetExistingFileDesc" method below. You would initially create an
66  * instance of FileDesc and then use "GetExistingFileDesc" to make sure that it is
67  * unique. This typically happens in the FeatureFileIO derived classes.
68  *@see GetExistingFileDesc
69  *@see FeatureVectorList
70  *@see FeatureFileIo
71  */
72  class FileDesc
73  {
74  public:
76 
77  typedef FileDesc* const FileDescConstPtr;
78 
79  /**
80  *@brief Clean up function, call just before exiting the application.
81  *@details
82  * Before you terminate your application you should call this method. It will
83  * clean up the FileDesc objects that were created during the runtime of your
84  * application.
85  */
86  static void FinalCleanUp ();
87 
88  static bool FinalCleanUpRanAlready () {return finalCleanUpRanAlready;}
89 
90  /**
91  @brief Creates a simple FileDesc that consists of continuous data only.
92  @details
93  * Creates a file description that will consist of continuous fields only.
94  * The vector '_fieldNames' will provide the list of field names.
95  @param[in] _log Logging file to use.
96  @param[in] _fieldNames Name of fields; one entry for each field.
97  */
98  static
100 
101  FileDesc ();
102 
103  protected:
104  ~FileDesc ();
105 
106  public:
107  friend class KKQueue<FileDesc>;
108 
110 
111 
112  public:
113  // Access Methods
114  const KKMLL::AttributeList& Attributes () const {return attributes;};
115  const AttributeTypeVector& AttributeVector () const {return attributeVector;};
116  const VectorInt32& CardinalityVector () const {return cardinalityVector;}
117  const MLClassList& Classes () const {return classes;}
118  const KKStr& ClassNameAttribute () const {return classNameAttribute;} /**< ClassNameAttribute added to support dstWeb data files. */
119  const KKStr& FileName () const {return fileName;}
120  kkint32 SparseMinFeatureNum () const {return sparseMinFeatureNum;}
121  kkint16 Version () const {return version;}
122 
123  void ClassNameAttribute (const KKStr& _classNameAttribute) {classNameAttribute = _classNameAttribute;}
124  void FileName (const KKStr& _fileName) {fileName = _fileName;}
125  void SparseMinFeatureNum (kkint32 _sparseMinFeatureNum) {sparseMinFeatureNum = _sparseMinFeatureNum;}
126  void Version (kkint16 _version) {version = _version;}
127 
128 
129  void AddAAttribute (const KKB::KKStr& _name,
130  KKMLL::AttributeType _type,
131  bool& alreadyExists
132  );
133 
134  void AddAAttribute (const KKMLL::Attribute& attribute);
135 
136  void AddClasses (const MLClassList& classesToAdd);
137 
138  bool AllFieldsAreNumeric () const;
139 
140  void AddANominalValue (kkint32 fieldNum,
141  const KKStr& nominalValue,
142  bool& alreadyExist,
143  RunLog& log
144  );
145 
146 
147  void AddANominalValue (const KKStr& nominalValue,
148  bool& alreadyExist,
149  RunLog& log
150  );
151 
152 
153  void AddANominalValue (const KKStr& attributeName,
154  const KKStr& nominalValue,
155  bool& alreadyExist,
156  RunLog& log
157  );
158 
159  void AddAttributes (const KKMLL::AttributeList& attributes);
160 
161 
162  kkint32 Cardinality (kkint32 fieldNum) const;
163 
164  const
165  KKMLL::AttributePtr* CreateAAttributeTable () const; /**< Caller will be responsible for deleting */
166 
168 
170 
171  void DisplayAttributeMappings ();
172 
173  const KKStr& FieldName (kkint32 fieldNum) const;
174 
175  const KKMLL::Attribute& GetAAttribute (kkint32 fieldNum) const;
176 
177  const
178  KKStr& GetNominalValue (kkint32 fieldNum,
179  kkint32 code
180  ) const;
181 
182  MLClassPtr GetMLClassPtr (const KKStr& className);
183 
184  kkint32 GetFieldNumFromAttributeName (const KKStr& attributeName) const;
185 
186  const
187  KKMLL::AttributePtr LookUpByName (const KKStr& attributeName) const;
188 
190  const KKStr& nominalValue
191  ) const;
192 
193  MLClassPtr LookUpMLClassByName (const KKStr& className);
194 
196 
197  kkuint32 NumOfFields () const {return (kkuint32)attributes.size ();}
198 
199  void ReadXML (XmlStream& s,
200  XmlTagConstPtr tag,
201  VolConstBool& cancelFlag,
202  RunLog& log
203  );
204 
205 
206  bool SameExceptForSymbolicData (const FileDesc& otherFd,
207  RunLog& log
208  ) const;
209 
210 
211  KKMLL::AttributeType Type (kkint32 fieldNum) const;
212 
213  KKStr TypeStr (kkint32 fieldNum) const;
214 
215 
216  /**
217  *@brief Returns a pointer to an existing instance of 'fileDesc' if it exists, otherwise will use one being passed in.
218  *@details
219  * > First looks to see if a the same FileDesc is already in the existing list
220  * in that case will return back the same pointer.
221  *
222  *@code
223  * > Second Will look for a existing FileDesc that is the same as the 'fileDesc'
224  * being passed in.
225  * if one is found them
226  * fileDesc is deleted
227  * exiting one will be returned.
228  * else
229  * fileDesc will be added to existinList (exisitingDescriptions) and returned.
230  * and returned.
231  *@endcode
232  @param[in] fileDesc Pointer to a FileDesc object that you want to look and see if one that is identical already exists.
233  @return pointer to the 'FileDesc' instance that the caller should be using.
234  */
235  static FileDescPtr GetExistingFileDesc (FileDescPtr fileDesc);
236 
237  /**
238  * @brief Merges the Symbolic fields of two different 'FileDesc' instances producing a new instance of 'FileDesc'.
239  * @details This method will only work if both instances have the same number of fields, their names must be
240  * the same(NOT case sensitive), and each field in both instances must be the same type. If all these conditions
241  * are not 'true' will return NULL. The fields that are of 'Symbolic' will have their values merged
242  * together.
243  *@see KKMLL:Attribute
244  */
245  static FileDescPtr MergeSymbolicFields (const FileDesc& left,
246  const FileDesc& right,
247  RunLog& log
248  );
249 
250 
251  void WriteXML (const KKStr& varName,
252  std::ostream& o
253  ) const;
254 
255 
256  /**
257  *@brief Returns true if file description on the right size is identical.
258  *@details Both FileDesc instances must have the same number of fields, the fields
259  * must have the same names(NOT case sensitive), and the fields must have
260  * matching types(ex numerical, ordinal, etc).
261  */
262  bool operator== (const FileDesc& rightSize) const;
263 
264 
265  /**
266  *@brief Returns true if file description on the right size is NOT identical.
267  *@details If both FileDesc instances have different number of fields, or any one
268  * of the fields has a different name(NOT case sensitive), or one of the fields
269  * is of a different type.
270  */
271  bool operator!= (const FileDesc& rightSize) const;
272 
273 
274  private:
275  static void CreateBlocker ();
276 
277  kkint32 NumOfAttributes () {return attributes.QueueSize ();}
278 
279 
280  void ValidateFieldNum (kkint32 fieldNum,
281  const char* funcName
282  ) const;
283 
284  KKMLL::AttributeList attributes;
285  AttributeTypeVector attributeVector;
286  VectorInt32 cardinalityVector;
287  MLClassList classes;
288  KKStr classNameAttribute; /**< Added to support DstWeb files; the name of the attribute that specifies the className */
289  KKMLL::AttributePtr curAttribute;
290  KKStr fileName;
291  kkint32 sparseMinFeatureNum; /**< Used specifically for sparse files. */
292  kkint16 version;
293 
294  static
295  KKB::GoalKeeperPtr blocker;
296 
297  static
298  FileDescListPtr exisitingDescriptions; /**< Will keep a list of all FileDesc s instantiated. */
299 
300  static
301  bool finalCleanUpRanAlready;
302 
303  }; /* FileDesc */
304 
305 
307  typedef FileDesc::FileDescConstPtr FileDescConstPtr;
308 
309  #define _FileDesc_Defined_
310 
311 
312 
313 
315  {
316  public:
317  XmlElementFileDesc (XmlTagPtr tag,
318  XmlStream& s,
319  VolConstBool& cancelFlag,
320  RunLog& log
321  );
322 
323  virtual ~XmlElementFileDesc ();
324 
325  FileDescPtr Value () const;
326 
328 
329  static
330  void WriteXML (const FileDesc& fileDesc,
331  const KKStr& varName,
332  std::ostream& o
333  );
334  private:
335  FileDescPtr value;
336  };
338 
339 
340 
341 
342 
343 } /* namespace KKMLL */
344 
345 
346 #endif
__int16 kkint16
16 bit signed integer.
Definition: KKBaseTypes.h:85
KKStr TypeStr(kkint32 fieldNum) const
Definition: FileDesc.cpp:378
const MLClassList & Classes() const
Definition: FileDesc.h:117
MLClass * MLClassPtr
Definition: MLClass.h:46
void FileName(const KKStr &_fileName)
Definition: FileDesc.h:124
const KKMLL::AttributePtr LookUpByName(const KKStr &attributeName) const
Definition: FileDesc.cpp:682
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
const VectorInt32 & CardinalityVector() const
Definition: FileDesc.h:116
static FileDescPtr GetExistingFileDesc(FileDescPtr fileDesc)
Returns a pointer to an existing instance of &#39;fileDesc&#39; if it exists, otherwise will use one being pa...
Definition: FileDesc.cpp:555
__int32 kkint32
Definition: KKBaseTypes.h:88
kkuint32 NumOfFields() const
Definition: FileDesc.h:197
FileDescList * FileDescListPtr
Definition: FileDesc.h:41
GoalKeeper * GoalKeeperPtr
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
Definition: FileDesc.cpp:169
static bool FinalCleanUpRanAlready()
Definition: FileDesc.h:88
MLClassPtr GetMLClassPtr(const KKStr &className)
Definition: FileDesc.cpp:298
KKMLL::AttributeType Type(kkint32 fieldNum) const
Definition: FileDesc.cpp:370
void AddANominalValue(const KKStr &attributeName, const KKStr &nominalValue, bool &alreadyExist, RunLog &log)
Definition: FileDesc.cpp:264
MLClassPtr LookUpUnKnownMLClass()
Definition: FileDesc.cpp:291
kkint32 SparseMinFeatureNum() const
Definition: FileDesc.h:120
const AttributeTypeVector & AttributeVector() const
Definition: FileDesc.h:115
XmlElementFileDesc(XmlTagPtr tag, XmlStream &s, VolConstBool &cancelFlag, RunLog &log)
Definition: FileDesc.cpp:925
void ReadXML(XmlStream &s, XmlTagConstPtr tag, VolConstBool &cancelFlag, RunLog &log)
Definition: FileDesc.cpp:800
void SparseMinFeatureNum(kkint32 _sparseMinFeatureNum)
Definition: FileDesc.h:125
bool SameExceptForSymbolicData(const FileDesc &otherFd, RunLog &log) const
Definition: FileDesc.cpp:472
void AddAAttribute(const KKMLL::Attribute &attribute)
Definition: FileDesc.cpp:140
void Version(kkint16 _version)
Definition: FileDesc.h:126
kkint32 MemoryConsumedEstimated() const
Definition: FileDesc.cpp:102
void AddClasses(const MLClassList &classesToAdd)
Definition: FileDesc.cpp:196
bool operator!=(const FileDesc &rightSize) const
Returns true if file description on the right size is NOT identical.
Definition: FileDesc.cpp:456
FileDesc * FileDescPtr
Definition: FileDesc.h:75
const KKStr & FieldName(kkint32 fieldNum) const
Definition: FileDesc.cpp:387
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
bool operator==(const FileDesc &rightSize) const
Returns true if file description on the right size is identical.
Definition: FileDesc.cpp:443
MLClassPtr LookUpMLClassByName(const KKStr &className)
Definition: FileDesc.cpp:284
VectorInt32 CreateCardinalityTable() const
Definition: FileDesc.cpp:430
describes a single Feature, Type and possible values.
Definition: Attribute.h:74
kkint32 LookUpNominalCode(kkint32 fieldNum, const KKStr &nominalValue) const
Definition: FileDesc.cpp:321
static void FinalCleanUp()
Clean up function, call just before exiting the application.
Definition: FileDesc.cpp:57
Attribute * AttributePtr
Definition: Attribute.h:156
FileDesc *const FileDescConstPtr
Definition: FileDesc.h:77
KKTHread * KKTHreadPtr
void AddAttributes(const KKMLL::AttributeList &attributes)
Definition: FileDesc.cpp:156
void AddANominalValue(kkint32 fieldNum, const KKStr &nominalValue, bool &alreadyExist, RunLog &log)
Definition: FileDesc.cpp:220
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
Definition: FileDesc.cpp:395
bool AllFieldsAreNumeric() const
Allows the user to quickly determine if there are no nominal fields.
Definition: FileDesc.cpp:706
FeatureFileIO * FeatureFileIOPtr
Definition: FileDesc.h:45
Base class for all FeatureFileIO classes.
Definition: FeatureFileIO.h:48
const KKStr & FileName() const
Definition: FileDesc.h:119
XmlTag const * XmlTagConstPtr
Definition: KKStr.h:45
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
const KKStr & ClassNameAttribute() const
Definition: FileDesc.h:118
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
std::vector< kkint32 > VectorInt32
Vector of signed 32 bit integers.
Definition: KKBaseTypes.h:144
AttributeType
Definition: Attribute.h:36
void AddANominalValue(const KKStr &nominalValue, bool &alreadyExist, RunLog &log)
Definition: FileDesc.cpp:242
FileDesc * FileDescPtr
const KKMLL::AttributePtr * CreateAAttributeTable() const
Definition: FileDesc.cpp:408
void ClassNameAttribute(const KKStr &_classNameAttribute)
Definition: FileDesc.h:123
static FileDescPtr NewContinuousDataOnly(VectorKKStr &_fieldNames)
Creates a simple FileDesc that consists of continuous data only.
Definition: FileDesc.cpp:116
friend std::ostream & operator<<(std::ostream &os, const Matrix &matrix)
kkint32 GetFieldNumFromAttributeName(const KKStr &attributeName) const
Definition: FileDesc.cpp:690
AttributeTypeVector CreateAttributeTypeTable() const
Definition: FileDesc.cpp:419
static void WriteXML(const FileDesc &fileDesc, const KKStr &varName, std::ostream &o)
Definition: FileDesc.cpp:962
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
FileDescPtr TakeOwnership()
Definition: FileDesc.cpp:953
FileDescPtr Value() const
Definition: FileDesc.cpp:947
kkint32 Cardinality(kkint32 fieldNum) const
Definition: FileDesc.cpp:341
Maintains a list of MLClass instances.
Definition: MLClass.h:233
void DisplayAttributeMappings()
Definition: FileDesc.cpp:617
static FileDescPtr MergeSymbolicFields(const FileDesc &left, const FileDesc &right, RunLog &log)
Merges the Symbolic fields of two different &#39;FileDesc&#39; instances producing a new instance of &#39;FileDes...
Definition: FileDesc.cpp:723
Container class file &#39;FileDesc&#39; instances.
Definition: FileDesc.cpp:42
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: FileDesc.cpp:875
kkint16 Version() const
Definition: FileDesc.h:121
const KKMLL::AttributeList & Attributes() const
Definition: FileDesc.h:114
const KKMLL::Attribute & GetAAttribute(kkint32 fieldNum) const
Definition: FileDesc.cpp:210
XmlElementFileDesc * XmlElementFileDescPtr
Definition: FileDesc.h:337
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163