20 using namespace KKMLL;
26 featureNumsAllocatedSize (0),
33 <<
"FeatureNumList::FeatureNumList *** ERROR *** fileDesc = NULL" << endl
38 AllocateArraySize (fileDesc->NumOfFields ());
47 featureNumsAllocatedSize (0),
55 <<
"FeatureNumList *** ERROR *** No 'FileDesc' object provided." << endl
61 if (fileDesc->NumOfFields () != kkint32 (bitString.BitLen ()))
64 <<
"FeatureNumList *** ERROR ***" << endl
66 <<
" BitString.Len[" << bitString.BitLen () <<
"] is different than FileDesc.NumOfFields[" << fileDesc->NumOfFields () <<
"]" << endl
75 if (kkuint32 (featureNumsAllocatedSize) < listOfBits.size ())
76 AllocateArraySize ((kkint32)listOfBits.size ());
78 for (kkuint32 x = 0; x < listOfBits.size (); x++)
79 AddFeature (listOfBits[x]);
86 featureNumsAllocatedSize (0),
93 <<
"FeatureNumList::FeatureNumList (const FeatureNumList& _featureNumList)" << endl
94 <<
" fileDesc == NULL" << endl
100 if ((numOfFeatures < 0) || (numOfFeatures > (kkint32)fileDesc->NumOfFields ()))
103 <<
"FeatureNumList::FeatureNumList (const FeatureNumList& _featureNumList)" << endl
104 <<
" numOfFeatures[" << numOfFeatures <<
"] is out of range of fileDesc" << endl
110 featureNums =
new kkuint16[numOfFeatures + 1];
111 featureNumsAllocatedSize = numOfFeatures + 1;
112 for (
kkint32 x = 0; x < numOfFeatures; x++)
113 featureNums[x] = _featureNumList
[x];
119 const KKStr& _featureListStr,
124 featureNumsAllocatedSize (0),
133 <<
"FeatureNumList::FeatureNumList *** ERROR *** fileDesc = NULL" << endl
138 AllocateArraySize (fileDesc->NumOfFields ());
140 ExtractFeatureNumsFromStr (_featureListStr, _valid);
146 FeatureSelectionType _selectionType,
147 const KKStr& _featureListStr,
151 featureNumsAllocatedSize (0),
152 fileDesc (_fileDesc),
160 <<
"FeatureNumList::FeatureNumList *** ERROR *** fileDesc = NULL" << endl
165 AllocateArraySize (fileDesc->NumOfFields ());
167 switch (_selectionType)
169 case IncludeFeatureNums:
170 ExtractFeatureNumsFromStr (_featureListStr, _valid);
174 case ExcludeFeatureNums:
175 FeatureNumList excludedFeatures (fileDesc, _featureListStr, _valid);
177 numOfFeatures = fileDesc->NumOfFields () - excludedFeatures.NumOfFeatures ();
178 featureNums =
new kkuint16[numOfFeatures];
179 featureNumsAllocatedSize = numOfFeatures;
182 for (kkuint16 x = 0; x < fileDesc->NumOfFields (); x++)
184 if (!excludedFeatures.InList (x))
199 featureNumsAllocatedSize (0),
209 delete [] featureNums;
218 memoryConsumedEstimated +=
sizeof (
kkuint16) * featureNumsAllocatedSize;
220 return memoryConsumedEstimated;
227 if (featureNumsAllocatedSize >= size)
236 featureNumsAllocatedSize = size;
237 for (
kkint32 x = 0; x < size; x++)
247 for (x = 0; x < numOfFeatures; x++)
248 newFeatureNums[x] = featureNums[x];
250 for (x = numOfFeatures; x < size; x++)
251 newFeatureNums[x] = 0;
254 delete [] featureNums;
255 featureNums = newFeatureNums;
256 featureNumsAllocatedSize = size;
268 bitStr
.Set (featureNums[x]
);
276 for (
kkint32 x = 0; x < numOfFeatures; x++)
277 newList[x] = featureNums[x];
285 if (numOfFeatures >= (kkint32)fileDesc->NumOfFields ())
297 for (x = 0; x < featureNumsAllocatedSize; x++)
311 kkint32 indexToDel = numOfFeatures - 1;
313 while ((indexToDel >= 0) && (featureNums[indexToDel] > featureNum))
318 if (featureNums[indexToDel] == featureNum)
322 for (x = indexToDel; x < (numOfFeatures - 1); x++)
323 featureNums[x] = featureNums[x + 1];
327 featureNums[numOfFeatures] = 0;
338 featureNumsAllocatedSize = 10;
341 if (featureNum >= fileDesc->NumOfFields ())
343 KKStr errMsg =
"FeatureNumList::AddFeature ***ERROR*** exceeded MaxNumOfFeatures.";
345 <<
" FeastureNum[" << featureNum <<
"] MaxNumOfFeatures[" << fileDesc->NumOfFields () <<
"]";
347 cerr << std::endl << errMsg << endl << endl;
348 throw KKException (errMsg);
351 else if (numOfFeatures >= featureNumsAllocatedSize)
354 kkint32 newNumOfFeatures = numOfFeatures + 10;
355 kkuint16* newFeatureNums =
new kkuint16[newNumOfFeatures];
357 for (kkint32 x = 0; x < numOfFeatures; x++)
358 newFeatureNums[x] = featureNums[x];
360 delete [] featureNums;
361 featureNums = newFeatureNums;
362 featureNumsAllocatedSize = newNumOfFeatures;
365 if (numOfFeatures == 0)
367 featureNums[0] = featureNum;
374 if (featureNums[x] < featureNum)
377 featureNums[numOfFeatures] = featureNum;
383 while ((x >= 0) && (featureNums[x] > featureNum))
385 featureNums[x + 1] = featureNums[x];
386 featureNums[x] = featureNum;
410 for (
kkuint16 x = 0; x < fileDesc->NumOfFields (); ++x)
412 if (fileDesc->Type (x) != IgnoreAttribute)
413 AddFeature (kkuint16 (x));
424 bool isSubSet =
true;
443 while ((x < numOfFeatures) && (!found))
445 found = (_featureNum == featureNums[x]);
464 if (_idx >= numOfFeatures)
466 cerr << endl << endl << endl
467 <<
"FeatureNumList::operator[] Invalid Index[" << _idx <<
"] requested." << endl
474 return featureNums[_idx];
482 KKStr featureNumStr
(numOfFeatures * 6
);
484 if (numOfFeatures <= 0)
485 return featureNumStr;
489 while (nextIdx < numOfFeatures)
491 kkint32 startOfGroup = nextIdx;
494 while ((endOfGroup < (numOfFeatures - 1)) &&
495 (featureNums[endOfGroup] == (featureNums[endOfGroup + 1] - 1))
501 if ((endOfGroup - startOfGroup) < 3)
504 for (x = startOfGroup; x <= endOfGroup; x++)
507 featureNumStr <<
",";
508 featureNumStr << featureNums[x];
514 featureNumStr <<
",";
515 featureNumStr << featureNums[startOfGroup] <<
"-" << featureNums[endOfGroup];
518 nextIdx = endOfGroup + 1;
521 return featureNumStr;
549 if (_featureListStr
== "NONE")
553 if (_featureListStr
== "ALL")
569 featureNum = kkuint16 (atoi (field.Str ()));
572 if (featureNum >= fileDesc->NumOfFields ())
579 bool alreadyInList = InList (featureNum);
581 AddFeature (featureNum);
591 if ((startFeatureNum >= fileDesc->NumOfFields ()) ||
592 (endFeatureNum >= fileDesc->NumOfFields ()) ||
593 (startFeatureNum > endFeatureNum)
600 for (featureNum = startFeatureNum; featureNum <= endFeatureNum; featureNum++)
602 bool alreadyInList = InList (featureNum);
604 AddFeature (featureNum);
619 _log.Level (20) <<
"FeatureNumList::Load - File[" 626 _log.Level (-1) <<
"FeatureNumList::Load *** ERROR ***" << endl;
627 _log.Level (-1) <<
" Could Not Open File[" << _fileName <<
"]." << endl;
633 if (fgets (buff,
sizeof (buff), inputFile))
635 KKStr firstLine (buff);
636 kkint32 fileDescNumOfFields = atoi (firstLine.Str ());
637 if (fileDesc->NumOfFields () != fileDescNumOfFields)
639 _log.Level (-1) << endl
640 <<
"FeatureNumList::Load *** ERROR ***" << endl
641 <<
" Mismatch in field count" << endl
642 <<
" FileDesc->NumOfFields[" << fileDesc->NumOfFields () <<
"]" << endl
643 <<
" From File [" << fileDescNumOfFields <<
"]" << endl
652 _log.Level (-1) << endl
653 <<
"FeatureNumList::Load *** ERROR ***" << endl
654 <<
" Missing Data from File" << endl
661 if (fgets (buff,
sizeof (buff), inputFile))
663 _log.Level (50) <<
"Load - FeatureList = [" << buff <<
"]." << endl;
665 ExtractFeatureNumsFromStr (buff, valid);
671 _log.Level (-1) <<
"FeatureNumList::Load *** ERROR ***" << endl;
672 _log.Level (-1) <<
" No Data in File[" << _fileName <<
"]." << endl;
687 _log.Level (20) <<
"FeatureNumList::Save - File[" 693 ofstream outFile (_fileName.Str ());
695 outFile << fileDesc->NumOfFields () << endl;
696 outFile << ToString () << endl;
706 o <<
"<FeatureNumList>" <<
"\t" 707 <<
"NumOfFeatures" <<
"\t" << numOfFeatures
708 <<
"FeatureNums" <<
"\t" << ToString ()
709 <<
"</FeatureNumList>";
717 delete [] featureNums;
719 fileDesc = _features.fileDesc;
722 featureNums =
new kkuint16[numOfFeatures];
723 featureNumsAllocatedSize = numOfFeatures;
725 for (
kkint32 fn = 0; fn < numOfFeatures; fn++)
726 featureNums[fn] = _features
[fn];
747 if (featureNums[x] < _features.featureNums[x])
750 else if (featureNums[x] > _features.featureNums[x])
756 if (x < numOfFeatures)
759 else if (x < _features.numOfFeatures)
770 if (numOfFeatures != _features.numOfFeatures)
806 ostream& operator<< ( ostream& os,
815 ostream& operator<< ( ostream& os,
833 if (fileDesc != rightSide.FileDesc ())
835 KKStr errMsg =
"FeatureNumList::operator* ***ERROR*** Incompatible FileDesc's";
837 <<
" The associated FileDesc instances are not the same.";
838 cerr << endl << endl << errMsg << endl <<endl;
839 throw KKException (errMsg);
843 result.AllocateArraySize (numOfFeatures);
848 while ((l < numOfFeatures) && (r < rightSide.numOfFeatures))
850 if (featureNums[l] < rightSide.featureNums[r])
855 else if (featureNums[l] > rightSide.featureNums[r])
862 result.AddFeature (featureNums[l]);
885 for (l = 0; l < numOfFeatures; l++)
899 kkint32 newFeatureNumsAllocatedSize = Min (numOfFeatures + rightSide.numOfFeatures, (kkint32)fileDesc->NumOfFields ());
903 kkuint16* leftFeatureNums = featureNums;
904 kkint32 leftNumOfFeatures = numOfFeatures;
905 kkuint16* rightFeatureNums = rightSide.featureNums;
906 kkint32 rightNumOfFeatures = rightSide.numOfFeatures;
911 while ((l < leftNumOfFeatures) && (r < rightNumOfFeatures))
913 if (leftFeatureNums[l] < rightFeatureNums[r])
915 newFeatureNums[newNumOfFeatures] = leftFeatureNums[l];
920 else if (leftFeatureNums[l] > rightFeatureNums[r])
922 newFeatureNums[newNumOfFeatures] = rightFeatureNums[r];
929 newFeatureNums[newNumOfFeatures] = rightFeatureNums[r];
936 while (l < leftNumOfFeatures)
938 newFeatureNums[newNumOfFeatures] = leftFeatureNums[l];
943 while (r < rightNumOfFeatures)
945 newFeatureNums[newNumOfFeatures] = rightFeatureNums[r];
950 delete [] featureNums;
951 featureNums = newFeatureNums;
952 featureNumsAllocatedSize = newFeatureNumsAllocatedSize;
953 numOfFeatures = newNumOfFeatures;
975 if (kkint32 (rightSide) >= fileDesc->NumOfFields ())
977 KKStr errMsg =
"FeatureNumList::operator+ ***ERROR***";
978 errMsg <<
" Feature[" << rightSide <<
"] is too large.";
979 cerr << endl << endl << errMsg << endl <<endl;
980 throw KKException (errMsg);
994 if (rightSide >= fileDesc->NumOfFields ())
996 KKStr errMsg =
"FeatureNumList::operator- ***ERROR***";
997 errMsg <<
" Feature[" << rightSide <<
"] is too large.";
998 cerr << endl << endl << errMsg << endl <<endl;
999 throw KKException (errMsg);
1018 if (fileDesc != rightSide.FileDesc ())
1020 KKStr errMsg =
"FeatureNumList::operator- ***ERROR*** Incompatible FileDesc's";
1022 <<
" The associated FileDesc instances are not the same.";
1023 cerr << endl << endl << errMsg << endl <<endl;
1024 throw KKException (errMsg);
1049 if (numToKeep > numOfFeatures)
1053 <<
"FeatureNumList::RandomlySelectFeatures *** ERROR ***" << endl
1055 <<
"NumToKeep[" << numToKeep <<
"] Is greater than NumOfFeatures[" << numOfFeatures <<
"]" << endl
1058 numToKeep = numOfFeatures;
1067 for (x = 0; x < numOfFeatures; x++)
1068 selectedFeatures[x] = featureNums[x];
1072 for (x = 0; x < numToKeep; x++)
1075 z = selectedFeatures[x];
1076 selectedFeatures[x] = selectedFeatures[y];
1077 selectedFeatures[y] = z;
1081 for (x = 0; x < numToKeep; x++)
1084 delete [] selectedFeatures;
1086 return randomlySelectedFeatures;
1094 if ((idx < 0) || (idx >= numOfFeatures))
1096 cerr << endl << endl
1097 <<
"FeatureNumList::AttributeType *** ERROR ***" << endl
1099 <<
" Invalid Index[" << idx <<
"] Valid Range (0.." << (numOfFeatures - 1) <<
")" << endl
1102 return NULLAttribute;
1107 KKStr errMsg =
"FeatureNumList::AttributeType ***ERROR*** 'fileDesc == NULL' There is a major programing flaw.";
1108 cerr << endl << endl << errMsg << endl << endl;
1109 throw KKException (errMsg);
1112 return fileDesc->Type (featureNums[idx]);
1123 for (x = 0; x < kkuint16 (fileDesc->NumOfFields ()); x++)
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for 'size' characters.
FeatureNumListPtr RandomlySelectFeatures(kkint32 numToKeep) const
Generates a new FeatureNumList object that will select at random 'numToKeep' features from this insta...
FeatureNumList operator+(kkuint16 rightSide) const
Returns new FeatureNumList that is a union of this instance and 'rightSide'.
FeatureNumList operator+(const FeatureNumList &rightSide) const
Returns new FeatureNumList that is a union of this instance and 'rightSide'.
kkint32 LRand48()
A implementations of the Unix version of rand48 returning a 32 bit integer.
bool Test(kkuint16 _featureNum) const
Indicates whether feature '_featureNum' is selected.
bool AllFeaturesSelected() const
Returns true if all features are selected.
Keeps track of selected features.
FeatureNumList(FileDescPtr _fileDesc)
KKStr HexStr() const
Returns a Hex-String representation.
void ReSet()
Set all bits to '0'.
KKStr ExtractToken(const char *delStr="\n\t\r ")
unsigned __int16 kkuint16
16 bit unsigned integer.
FeatureNumList & operator=(const FeatureNumListPtr _features)
bool InList(kkuint16 featureNum) const
returns true if '_featureNum' is one of the selected features.
void ExtractFeatureNumsFromStr(KKStr featureListStr, bool &valid)
Will select the features specified in "featureListStr".
KKStr ToHexString() const
bool operator==(const char *rtStr) const
FeatureNumList & operator+=(kkuint16 featureNum)
Returns this FeatureNumList that is a union of this instance and 'rightSide'.
FeatureNumList(FileDescPtr _fileDesc, const KKStr &_featureListStr, bool &_valid)
Constructs a 'FeatureNumList' instance from a string that contains a list of selected features...
void SetAllFeatures()
Selects all features except those flagged as 'IgnoreAttribute' in the associated FileDesc.
FeatureNumList(const FeatureNumList &featureNumList)
Copy constructor.
kkuint16 * CreateFeatureNumArray() const
Allocates a array of kkint32's that is a copy of FeatureNums. The caller will own the array and is re...
FeatureNumList operator-(const FeatureNumList &rightSide) const
KKStr & operator=(KKStr &&src)
Allows you to manage very long bit strings.
FeatureNumList Complement() const
Perform a complement of selected features. That is if a feature is selected turn it off and if it is ...
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
bool IsSubSet(const FeatureNumList &z)
Returns true if 'z' is a subset of this instance.
FeatureNumList * FeatureNumListPtr
void UnSet()
Turns off all features so that no feature is selected.
void ToBitString(BitString &bitStr) const
kkint32 NumOfFeatures() const
void AddFeature(kkuint16 featureNum)
Adds 'featureNum' to the list of selected features. If it is already selected nothing happens...
void TrimLeft(const char *whiteSpaceChars="\n\r\t ")
void UnSet(kkuint16 featureNum)
Turns off specified feature 'featureNum'; if 'featureNum' is not turned on then nothing happens; same...
void Save(const KKStr &_fileName, bool &_successful, RunLog &_log)
bool operator>(const FeatureNumList &_features) const
Indicates if the Left FeatureNumList instances is greater than the right one.
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
void Upper()
Converts all characters in string to their Upper case equivalents via 'toupper'.
void ListOfSetBits16(VectorUint16 &setBits) const
kkint32 LocateCharacter(char ch) const
Returns index of 1st occurrence of 'ch' otherwise -1.
kkint32 Compare(const FeatureNumList &_features) const
Compare with another featureNumList returning -1, 0, and 1 indicating less_than, equal, or greater_than.
FeatureNumList operator-(kkuint16 rightSide) const
void Set(kkuint32 bitNum)
Set the bit indicated by 'bitNum' to '1'.
FeatureNumList & operator=(const FeatureNumList &_features)
static FeatureNumList AllFeatures(FileDescPtr fileDesc)
Create a FeatureNumList object where all features are selected, except ones that are flagged as Ignor...
void Save(std::ostream &o)
void Load(const KKStr &_fileName, bool &_successful, RunLog &_log)
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
kkint32 ExtractTokenInt(const char *delStr)
KKStr ToString() const
Returns comma delimited list of all features selected; will make use of range specification.
kkint32 NumSelFeatures() const
const char * Str() const
Returns a pointer to a ascii string.
FeatureNumList * FeatureNumListPtr
FILE * osFOPEN(const char *fileName, const char *mode)
FeatureNumList operator*(const FeatureNumList &rightSide) const
Returns new instance that is the intersection of features.
bool operator==(const FeatureNumList &_features) const
Indicates if the two FeatureNumLiost instances have the same features selected.
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
FeatureNumList(FileDescPtr _fileDesc, const BitString &bitString)
Constructs a 'FeatureNumList' instance using the set bits in 'bitString' to indicate which features a...
KKMLL::AttributeType FeatureAttributeType(kkint32 idx) const
std::vector< kkuint16 > VectorUint16
Vector of unsigned 16 bit integers.
FeatureNumList & operator+=(const FeatureNumList &rightSide)
Returns this FeatureNumList that is a union of this instance and 'rightSide'.
kkint32 MemoryConsumedEstimated() const
bool operator<(const FeatureNumList &_features) const
Indicates if the Left FeatureNumList instances is less than the right one.
FeatureNumList & operator-=(kkuint16 rightSide)