KSquare Utilities
FeatureNumList_Old.h
Go to the documentation of this file.
1 #if !defined(_FEATURENUMLIST_)
2 #define _FEATURENUMLIST_
3 
4 /**
5  *@class KKMLL::FeatureNumList
6  *@brief Keeps track of selected features.
7  *@details Used by SVMModel and the newer 'Model', 'ModelParam' based classes.
8  * Each instance of this class will have an associated 'FileDesc' instance. It
9  * is meant to keep track of selected features from that instance('FileDesc').
10  * <br />
11  * This class is meant to work with both the FeaureVector, Model, and ModelParm
12  * based classes. This will allow us to select specific features
13  * from the FeatureVector instances that are to be used.
14  * <br />
15  * At no time are feature numbers that are out of range of the associated
16  * FileDesc instance to be selected.
17  * <br />
18  * Two of the constructors allow you to specify a list of features in a string.
19  * The list can consist of single features and/or ranges of features. Ranges of
20  * features are specified by using the dash('-') character between two numbers.
21  * The comma(',') character will be used as a separator. "All" specifies all are
22  * to be selected except those that are flagged as 'IgnoreAttribute' in the
23  * associated FileDesc instance. The list should be in ascending order.
24  *
25  *@code
26  * Example Strings:
27  * "1,2,3,10,20" Selects [1,2,3,10, 20].
28  * "1,4-7,9-12,13" Selects [1,4,,5,6,7,9,10,11,12,13]
29  * "All" Selects all features that '_fileDesc' includes accept
30  * those that are flagged as 'IgnoreAttribute' in the
31  * associated FileDesc instance.
32  *@endcode
33  */
34 
35 #include "Attribute.h"
36 #include "KKBaseTypes.h"
37 #include "BitString.h"
38 #include "RunLog.h"
39 #include "KKStr.h"
40 
41 
42 namespace KKMLL
43 {
44  #ifndef _FILEDESC_
45  class FileDesc;
46  typedef FileDesc* FileDescPtr;
47  #endif
48 
49 
50  class FeatureNumList;
51  typedef FeatureNumList* FeatureNumListPtr;
52 
53  class FeatureNumList
54  {
55  public:
57  typedef enum {IncludeFeatureNums, ExcludeFeatureNums} FeatureSelectionType;
58 
59  /** @brief Copy constructor. */
60  FeatureNumList (const FeatureNumList& featureNumList);
61 
62  /*
63  *@brief Constructs an instance with no features selected and no associated 'FileDesc' instance.
64  * This is a private constructor and is used for internal use of 'FeatureNumList only.
65  */
66  FeatureNumList (FileDescPtr _fileDesc);
67 
68 
69  /**
70  *@brief Constructs a 'FeatureNumList' instance using the set bits in 'bitString' to indicate which features are selected.
71  *@details For each bit position in 'bitString' that is set to '1' the corresponding feature will be selected. So the bit string '0110111' with consists of
72  * bits 0, 1, 2, 5, and 6 set to one will cause the features elected to be set to (0, 1, 2, 4, 6).<br />
73  * This is a useful constructor when dealing with dataset's that have a large number of features such as DNA based dataset's.
74  *@param[in] _fileDesc Description of the feature data.
75  *@param[in] bitString A bit string that indicates which features are selected. Bits that are set to 1 indicate that
76  * the corresponding feature is selected.
77  */
78  FeatureNumList (FileDescPtr _fileDesc,
79  const BitString& bitString
80  );
81 
82 
83  /**
84  * @brief Constructs a 'FeatureNumList' instance from a string that contains a list of selected features.
85  * @details The list can consist of single features and/or ranges of features. Ranges of features
86  * are specified by using the dash('-') character between two numbers. The comma(',')
87  * character will be used as a separator. "All" specifies all ate to be selected except
88  * those that are flagged as 'IgnoreAttribute' in the associated FileDesc instance.
89  * The list should be in ascending order.
90  * @code
91  * ex's:
92  * "1,2,3,10,20" Selects [1,2,3,10, 20].
93  * "1,4-7,9-12,13" Selects [1,4,,5,6,7,9,10,11,12,13]
94  * "All" Selects all features that '_fileDesc' includes accept those that are
95  * flagged as 'IgnoreAttribute' in the associated FileDesc instance.
96  * @endcode
97  * @see ExtractFeatureNumsFromStr
98  * @param[in] _fileDesc Description of the feature data.
99  * @param[in] _featureListStr Comma separated string that contains list of selected features; a range of
100  * features can be specified using the dash('-') character. ex: The string "1,3,5-7,9" indicates
101  * that features 1,3,5,6,7,9 are selected.
102  * @param[out] _valid returns false if '_featureListStr' is not a valid format.
103  */
104  FeatureNumList (FileDescPtr _fileDesc,
105  const KKStr& _featureListStr,
106  bool& _valid
107  );
108 
109 
110  /**
111  * @brief Constructs a 'FeatureNumList' instance from a string where '_selectionType' indicates if the features
112  * should be included or excluded.
113  *
114  * @details The list can consist of single features and/or ranges of features. Ranges of features
115  * are specified by using the ('-') character between two numbers. The comma(',')
116  * character will be used as a separator. The list should be in ascending order.
117  * The '_selectionType' parameter specifies weather we are going to select these
118  * features or exclude them form the list of all features(complement).
119  * @code
120  * ex's:
121  * "1,2,3,10,20" Selects [1,2,3,10, 20].
122  * "1,4-7,9-12,13" Selects [1,4,,5,6,7,9,10,11,12,13]
123  * "All" Selects all features that '_fileDesc' includes accept those that are
124  * flagged as 'IgnoreAttribute' in the associated FileDesc instance.
125  * @endcode
126  * @param[in] _fileDesc Description of the feature data.
127  * @param[in] _selectionType Specifies whether the features listed in '_featureListStr' should be
128  * included (IncludeFeatureNums) or excluded(ExcludeFeatureNums).
129  * @param[in] _featureListStr Comma separated string that contains list of features to be included or excluded;
130  * a range of features can be specified using the dash('-') character. ex: The string "1,3,5-7,9"
131  * indicates that features 1,3,5,6,7,9 are selected.
132  * @param[out] _valid returns false if '_featureListStr' is not a valid format.
133  */
134  FeatureNumList (FileDescPtr _fileDesc,
135  FeatureSelectionType _selectionType,
136  const KKStr& _featureListStr,
137  bool& _valid
138  );
139 
140 
141  ~FeatureNumList ();
142 
143 
144 
145  // Access Methods.
146  const kkuint16* FeatureNums () const {return featureNums;}
147  FileDescPtr FileDesc () const {return fileDesc;}
148  kkint32 NumOfFeatures () const {return numOfFeatures;}
149  kkint32 NumSelFeatures () const {return numOfFeatures;}
150 
151 
152 
153 
154 
155  /** @brief Adds 'featureNum' to the list of selected features. If it is already selected nothing happens. */
156  void AddFeature (kkuint16 featureNum);
157 
158  /** @brief Returns true if all features are selected. */
159  bool AllFeaturesSelected () const;
160 
162 
163  /** @brief Create a FeatureNumList object where all features are selected, except ones that are flagged as IgnoreAttribute in '__fileDesc'. */
164  static FeatureNumList AllFeatures (FileDescPtr fileDesc);
165 
166  /** @brief Compare with another featureNumList returning -1, 0, and 1 indicating less_than, equal, or greater_than. */
167  kkint32 Compare (const FeatureNumList& _features) const;
168 
169  /** @brief Perform a complement of selected features. That is if a feature is selected turn it off and if it is not selected then turn it on. */
170  FeatureNumList Complement () const;
171 
172  /**
173  *@brief Allocates a array of kkint32's that is a copy of FeatureNums. The caller will own the array and is responsible for deleting it.
174  *@returns A dynamically allocated array that will consist of a list of selected features.
175  */
177 
178 
179  /**
180  * @brief Will select the features specified in "featureListStr".
181  * @details The format is a comma delimited string, where each number represents a feature, ranges can be specified with
182  * a dash("-"). "All" will select all features that are not flagged as a 'IgnoreAttribute' in the associated FileDesc instance.
183  * @code
184  * ex's: String Selected Features
185  * "1,2,3,10,20" [1,2,3,10, 20].
186  * "1,4-7,9-12,23" [1,4,5,6,7,9,10,11,12,23]
187  * "All" Selects all features that '_fileDesc' includes accept those that are
188  * flagged as 'IgnoreAttribute' in the associated FileDesc instance.
189  * @endcode
190  */
191  void ExtractFeatureNumsFromStr (KKStr featureListStr,
192  bool& valid
193  );
194 
195  bool IsSubSet (const FeatureNumList& z); /**< @brief Returns true if 'z' is a subset of this instance. */
196 
197  bool InList (kkuint16 featureNum) const; /**< @brief returns true if '_featureNum' is one of the selected features. */
198 
199  void Load (const KKStr& _fileName,
200  bool& _successful,
201  RunLog& _log
202  );
203 
205 
206  /**
207  * @brief Generates a new FeatureNumList object that will select at random 'numToKeep' features from this instance.
208  * @param[in] numToKeep Number of features to select randomly from existing instance.
209  * @return Dynamically allocated instance of a ImageFeaturesList with randomly selected features.
210  */
212 
213  void Save (const KKStr& _fileName,
214  bool& _successful,
215  RunLog& _log
216  );
217 
218  void Save (std::ostream& o);
219 
220  void SetAllFeatures (); /**< @brief Selects all features except those flagged as 'IgnoreAttribute' in the associated FileDesc. */
221 
222  bool Test (kkuint16 _featureNum) const; /**< @brief Indicates whether feature '_featureNum' is selected. */
223 
224  void ToBitString (BitString& bitStr) const;
225 
226  KKStr ToHexString () const;
227 
228 
229  /** @brief Returns comma delimited list of all features selected; will make use of range specification. */
230  KKStr ToString () const;
231 
232 
233  KKStr ToCommaDelStr () const {return ToString ();}
234 
235 
236  /** @brief Turns off all features so that no feature is selected. */
237  void UnSet ();
238 
239  /** @brief Turns off specified feature 'featureNum'; if 'featureNum' is not turned on then nothing happens; same as using 'operator-='. */
240  void UnSet (kkuint16 featureNum);
241 
242  /**
243  *@brief Returns back the selected feature.
244  *@details A FeatureNumList instance consists of a list of selected features. It is logically like an
245  * array of selectedfeatures that is the same length as the number of selected features.
246  *@code
247  * Example code that scans the FeatureNumList object 'goodFeatures'
248  *
249  * void PrintSelectedFeatures (const FeatureNumList& goodFeatures)
250  * {
251  * cout << "Selected Features: ";
252  * for (kkint32 x = 0; x < goodFeatures.NumOfFeatures ();
253  * {
254  * if (x > 0) cout << ",";
255  * cout << goodFeatures[x];
256  * }
257  * cout << endl;
258  * }
259  *@endcode
260  *@param[in] _idx The position in this instance that you want to return.
261  *@return Selected feature at position '_idx'.
262  */
263  kkuint16 operator[] (kkint32 idx) const;
264 
265  FeatureNumList& operator= (const FeatureNumList& _features);
266  FeatureNumList& operator= (const FeatureNumListPtr _features);
267  FeatureNumList operator+ (const FeatureNumList& rightSide) const; /**< @brief Returns new FeatureNumList that is a union of this instance and 'rightSide'. */
268  FeatureNumList operator+ (kkuint16 rightSide) const; /**< @brief Returns new FeatureNumList that is a union of this instance and 'rightSide'. */
269  FeatureNumList& operator+= (const FeatureNumList& rightSide); /**< @brief Returns this FeatureNumList that is a union of this instance and 'rightSide'. */
270  FeatureNumList& operator+= (kkuint16 featureNum); /**< @brief Returns this FeatureNumList that is a union of this instance and 'rightSide'. */
271  FeatureNumList operator- (const FeatureNumList& rightSide) const; /**< Removes features that are selected in 'rightSide' from this instance and returns the result. */
272  FeatureNumList operator- (kkuint16 rightSide) const; /**< Returns this instance with the feature specified by 'rightSide' removed. */
273  FeatureNumList& operator-= (kkuint16 rightSide); /**< Remove the feature specified by 'rightSide' from this instance. */
274  FeatureNumList operator* (const FeatureNumList& rightSide) const; /**<*@brief Returns new instance that is the intersection of features. */
275  bool operator== (const FeatureNumList& _features) const; /**< @brief Indicates if the two FeatureNumLiost instances have the same features selected. */
276  bool operator> (const FeatureNumList& _features) const; /**< @brief Indicates if the Left FeatureNumList instances is greater than the right one. */
277  bool operator< (const FeatureNumList& _features) const; /**< @brief Indicates if the Left FeatureNumList instances is less than the right one. */
278 
279  private:
280  /*
281  * @brief Constructs an instance with no features selected and no associated 'FileDesc' instance.
282  * This is a private constructor and is used for internal use of 'FeatureNumList only.
283  */
284  FeatureNumList ();
285 
286  void AllocateArraySize (kkint32 size); /**< @brief Make sure that FeatureNums is allocated to at least this size. */
287 
288  kkuint16* featureNums; /**< @brief The feature numbers in this array are always kept in ascending order.
289  * @details There will be 'numOfFeatures' in this array. 'featureNumsAllocatedSize'
290  * indicates the size allocated, if more space is needed you need to call
291  * 'AllocateArraySize' to increase it.
292  */
293  kkint32 featureNumsAllocatedSize;
294  FileDescPtr fileDesc;
295  kkint32 numOfFeatures;
296  }; /* FeatureNumList */
297 
298 
299  typedef FeatureNumList* FeatureNumListPtr;
300 
301  #define _FeatureNumList_Defined_
302 
303 
304  std::ostream& operator<< ( std::ostream& os,
305  const FeatureNumList& features
306  );
307 
308 
309  std::ostream& operator<< ( std::ostream& os,
310  const FeatureNumListPtr& features
311  );
312 
313  extern
314  const char* FeatureDecriptions[];
315 } /* namespace KKMLL */
316 
317 #endif
Provides a detailed description of the attributes of a dataset.
Definition: FileDesc.h:72
__int32 kkint32
Definition: KKBaseTypes.h:88
FeatureNumListPtr RandomlySelectFeatures(kkint32 numToKeep) const
Generates a new FeatureNumList object that will select at random &#39;numToKeep&#39; features from this insta...
FeatureNumList operator+(kkuint16 rightSide) const
Returns new FeatureNumList that is a union of this instance and &#39;rightSide&#39;.
FeatureNumList operator+(const FeatureNumList &rightSide) const
Returns new FeatureNumList that is a union of this instance and &#39;rightSide&#39;.
bool Test(kkuint16 _featureNum) const
Indicates whether feature &#39;_featureNum&#39; is selected.
bool AllFeaturesSelected() const
Returns true if all features are selected.
Keeps track of selected features.
FeatureNumList(FileDescPtr _fileDesc)
unsigned __int16 kkuint16
16 bit unsigned integer.
Definition: KKBaseTypes.h:86
FeatureNumList & operator=(const FeatureNumListPtr _features)
bool InList(kkuint16 featureNum) const
returns true if &#39;_featureNum&#39; is one of the selected features.
void ExtractFeatureNumsFromStr(KKStr featureListStr, bool &valid)
Will select the features specified in "featureListStr".
KKStr ToHexString() const
FeatureNumList & operator+=(kkuint16 featureNum)
Returns this FeatureNumList that is a union of this instance and &#39;rightSide&#39;.
FeatureNumList(FileDescPtr _fileDesc, const KKStr &_featureListStr, bool &_valid)
Constructs a &#39;FeatureNumList&#39; instance from a string that contains a list of selected features...
void SetAllFeatures()
Selects all features except those flagged as &#39;IgnoreAttribute&#39; in the associated FileDesc.
const char * FeatureDecriptions[]
FileDescPtr FileDesc() const
FeatureNumList(const FeatureNumList &featureNumList)
Copy constructor.
kkuint16 * CreateFeatureNumArray() const
Allocates a array of kkint32&#39;s that is a copy of FeatureNums. The caller will own the array and is re...
FeatureNumList operator-(const FeatureNumList &rightSide) const
FeatureNumList(FileDescPtr _fileDesc, FeatureSelectionType _selectionType, const KKStr &_featureListStr, bool &_valid)
Constructs a &#39;FeatureNumList&#39; instance from a string where &#39;_selectionType&#39; indicates if the features...
Allows you to manage very long bit strings.
Definition: BitString.h:31
FeatureNumList Complement() const
Perform a complement of selected features. That is if a feature is selected turn it off and if it is ...
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
bool IsSubSet(const FeatureNumList &z)
Returns true if &#39;z&#39; is a subset of this instance.
void UnSet()
Turns off all features so that no feature is selected.
void ToBitString(BitString &bitStr) const
kkint32 NumOfFeatures() const
void AddFeature(kkuint16 featureNum)
Adds &#39;featureNum&#39; to the list of selected features. If it is already selected nothing happens...
void UnSet(kkuint16 featureNum)
Turns off specified feature &#39;featureNum&#39;; if &#39;featureNum&#39; is not turned on then nothing happens; same...
void Save(const KKStr &_fileName, bool &_successful, RunLog &_log)
bool operator>(const FeatureNumList &_features) const
Indicates if the Left FeatureNumList instances is greater than the right one.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
AttributeType
Definition: Attribute.h:36
kkint32 Compare(const FeatureNumList &_features) const
Compare with another featureNumList returning -1, 0, and 1 indicating less_than, equal, or greater_than.
FeatureNumList operator-(kkuint16 rightSide) const
FeatureNumList & operator=(const FeatureNumList &_features)
static FeatureNumList AllFeatures(FileDescPtr fileDesc)
Create a FeatureNumList object where all features are selected, except ones that are flagged as Ignor...
void Save(std::ostream &o)
void Load(const KKStr &_fileName, bool &_successful, RunLog &_log)
KKStr ToString() const
Returns comma delimited list of all features selected; will make use of range specification.
kkint32 NumSelFeatures() const
FeatureNumList * FeatureNumListPtr
FeatureNumList operator*(const FeatureNumList &rightSide) const
Returns new instance that is the intersection of features.
bool operator==(const FeatureNumList &_features) const
Indicates if the two FeatureNumLiost instances have the same features selected.
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
FeatureNumList(FileDescPtr _fileDesc, const BitString &bitString)
Constructs a &#39;FeatureNumList&#39; instance using the set bits in &#39;bitString&#39; to indicate which features a...
KKMLL::AttributeType FeatureAttributeType(kkint32 idx) const
FeatureNumList & operator+=(const FeatureNumList &rightSide)
Returns this FeatureNumList that is a union of this instance and &#39;rightSide&#39;.
kkint32 MemoryConsumedEstimated() const
bool operator<(const FeatureNumList &_features) const
Indicates if the Left FeatureNumList instances is less than the right one.
KKStr ToCommaDelStr() const
const kkuint16 * FeatureNums() const
FeatureNumList & operator-=(kkuint16 rightSide)