25 using namespace KKMLL;
37 cardinalityDest (NULL),
39 codedNumOfFeatures (0),
40 destFeatureNums (NULL),
42 encodedFileDesc (NULL),
46 srcFeatureNums (NULL),
55 srcFeatureNums =
new kkuint16 [numOfFeatures];
56 cardinalityDest =
new kkint32 [numOfFeatures];
57 destFeatureNums =
new kkint32 [numOfFeatures];
64 for (x = 0; x < numOfFeatures; x++)
67 srcFeatureNums [x] = srcFeatureNum;
68 destFeatureNums [x] = codedNumOfFeatures;
69 cardinalityDest [x] = 1;
72 Attribute srcAttribute = (fileDesc->Attributes ())[srcFeatureNum];
74 switch (encodingMethod)
76 case ModelParam::EncodingMethodType::Binary:
77 if ((attributeVector[srcFeatureNum] == AttributeType::Nominal) ||
78 (attributeVector[srcFeatureNum] == AttributeType::Symbolic)
81 destWhatToDo [x] = FeWhatToDo::FeBinary;
82 cardinalityDest [x] = cardinalityVector[srcFeatureNums [x]];
83 codedNumOfFeatures += cardinalityDest[x];
84 for (kkint32 zed = 0; zed < cardinalityDest[x]; zed++)
86 KKStr fieldName = srcAttribute.Name () +
"_" + srcAttribute.GetNominalValue (zed);
87 destFieldNames.push_back (fieldName);
93 destWhatToDo [x] = FeWhatToDo::FeAsIs;
94 destFieldNames.push_back (srcAttribute.Name ());
100 codedNumOfFeatures++;
101 if ((attributeVector[srcFeatureNums[x]] == AttributeType::Nominal) ||
102 (attributeVector[srcFeatureNums[x]] == AttributeType::Symbolic)
104 destWhatToDo [x] = FeWhatToDo::FeScale;
106 destWhatToDo [x] = FeWhatToDo::FeAsIs;
108 destFieldNames.push_back (srcAttribute.Name ());
114 codedNumOfFeatures++;
116 destFieldNames.push_back (srcAttribute.Name ());
128 attributeVector (_encoder.attributeVector),
129 cardinalityDest (NULL),
130 cardinalityVector (_encoder.cardinalityVector),
131 codedNumOfFeatures (_encoder.codedNumOfFeatures),
132 destFeatureNums (NULL),
134 encodedFileDesc (_encoder.encodedFileDesc),
135 encodingMethod (_encoder.encodingMethod),
136 fileDesc (_encoder.fileDesc),
137 numOfFeatures (_encoder.numOfFeatures),
138 srcFeatureNums (NULL),
139 param (_encoder.param)
141 cardinalityDest =
new kkint32[numOfFeatures];
142 destFeatureNums =
new kkint32[numOfFeatures];
144 srcFeatureNums =
new kkuint16[numOfFeatures];
147 for (x = 0; x < numOfFeatures; x++)
149 srcFeatureNums [x] = _encoder.srcFeatureNums [x];
150 destFeatureNums [x] = _encoder.destFeatureNums[x];
151 cardinalityDest [x] = _encoder.cardinalityDest[x];
152 destWhatToDo [x] = _encoder.destWhatToDo [x];
153 srcFeatureNums [x] = _encoder.srcFeatureNums [x];
164 delete srcFeatureNums;
165 delete destFeatureNums;
166 delete cardinalityDest;
173 kkint32 memoryConsumedEstimated =
sizeof (FeatureEncoder2)
174 + attributeVector.size () *
sizeof (AttributeType)
175 + cardinalityVector.size () *
sizeof (kkint32);
177 if (cardinalityDest) memoryConsumedEstimated += 2 * numOfFeatures *
sizeof (
kkint32);
178 if (destFeatureNums) memoryConsumedEstimated += numOfFeatures *
sizeof (
kkint32);
179 if (destWhatToDo) memoryConsumedEstimated += numOfFeatures *
sizeof (
FeWhatToDo);
180 if (srcFeatureNums) memoryConsumedEstimated += numOfFeatures *
sizeof (
kkuint16);
182 return memoryConsumedEstimated;
199 log.Level (40) <<
"FeatureEncoder2::CreateEncodedFileDesc" << endl;
205 <<
"Orig" <<
"\t" <<
"Orig" <<
"\t" <<
"Field" <<
"\t" <<
"Encoded" <<
"\t" <<
"Encoded" << endl;
206 *o <<
"FieldNum" <<
"\t" <<
"FieldName" <<
"\t" <<
"Type" <<
"\t" <<
"FieldNum" <<
"\t" <<
"FieldName" << endl;
213 for (x = 0; x < numOfFeatures; x++)
215 kkuint16 srcFeatureNum = srcFeatureNums[x];
216 kkint32 y = destFeatureNums[x];
218 if (y >= codedNumOfFeatures)
222 <<
"FeatureEncoder2::CreateEncodedFileDesc ***ERROR***" << endl
223 <<
" overriding number of encoded features. This should never be able to happen." << endl
224 <<
" Something is wrong with object." << endl
234 switch (destWhatToDo[x])
241 *o << origFieldDesc <<
"\t" 243 << fileDesc->FieldName (x)
251 for (
kkint32 z = 0; z < cardinalityDest[x]; z++)
258 *o << origFieldDesc <<
"\t" 275 *o << origFieldDesc <<
"\t" 277 << fileDesc->FieldName (x)
296 FeatureVectorPtr encodedImage =
new FeatureVector (codedNumOfFeatures
);
304 for (x = 0; x < numOfFeatures; x++)
306 float featureVal = featureData [srcFeatureNums[x]];
307 kkint32 y = destFeatureNums[x];
309 switch (destWhatToDo[x])
319 for (
kkint32 z = 0; z < cardinalityDest[x]; z++)
321 float bVal = ((
kkint32)featureVal == z);
350 FeatureVectorList::const_iterator idx;
352 for (idx = srcData->begin (); idx != srcData->end (); idx++)
354 const FeatureVectorPtr srcExample = *idx;
359 return encodedExamples;
372 FeatureVectorList::const_iterator idx;
373 for (idx = srcData.begin (); idx != srcData.end (); idx++)
375 FeatureVectorPtr srcExample = *idx;
381 return encodedFeatureVectorList;
FeatureVar2List(bool _owner)
KKStr TypeStr(kkint32 fieldNum) const
void PushOnBack(FeatureVectorPtr image)
Overloading the PushOnBack function in KKQueue so we can monitor the Version and Sort Order...
FileDescPtr CreateEncodedFileDesc(ostream *o, RunLog &log) const
FeatureEncoder2(const ModelParam &_param, FileDescPtr _fileDesc)
Constructs a Feature Encoder object.
Provides a detailed description of the attributes of a dataset.
const VectorInt32 & CardinalityVector() const
static FileDescPtr GetExistingFileDesc(FileDescPtr fileDesc)
Returns a pointer to an existing instance of 'fileDesc' if it exists, otherwise will use one being pa...
kkint32 NumEncodedFeatures() const
bool AllFieldsAreNumeric() const
Returns true if all fields are numeric, no nominal fields.
kkuint32 NumOfFields() const
void AddAAttribute(const KKB::KKStr &_name, KKMLL::AttributeType _type, bool &alreadyExists)
const float * FeatureData() const
Returns as a pointer to the feature data itself.
FeatureVector(kkint32 _numOfFeatures)
kkint32 MemoryConsumedEstimated() const
unsigned __int16 kkuint16
16 bit unsigned integer.
FeatureVectorListPtr EncodedFeatureVectorList(const FeatureVectorList &srcData) const
const AttributeTypeVector & AttributeVector() const
FeatureVar2(kkint32 _featureNum, AttributeType _attributeType, kkint32 _idx, double _var)
~FeatureEncoder2()
Frees any memory allocated by, and owned by the FeatureEncoder2.
virtual FeatureVectorListPtr DuplicateListAndContents() const
Creates a duplicate of list and also duplicates it contents.
void TrainWeight(float _trainWeight)
Assign a specific example a higher weight for training purposes.
virtual EncodingMethodType EncodingMethod() const
KKStr operator+(const char *right) const
const KKStr & FieldName(kkint32 fieldNum) const
void AddFeatureData(kkint32 _featureNum, float _featureData)
MLClassPtr PredictedClass() const
KKMLL::AttributeType attributeType
FeatureVectorList(FileDescPtr _fileDesc, bool _owner)
Will create a new empty list of FeatureVector's.
describes a single Feature, Type and possible values.
Container class for FeatureVector derived objects.
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
const KKStr & GetNominalValue(kkint32 fieldNum, kkint32 code) const
kkint32 NumOfFeatures() const
virtual FeatureNumListConstPtr SelectedFeatures() const
FeatureEncoder2(const FeatureEncoder2 &_encoder)
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
MLClassPtr MLClass() const
Class that is example is assigned to.
void MLClass(MLClassPtr _mlClass)
Assign a class to this example.
KKStr StrFormatInt(kkint32 val, const char *mask)
KKStr operator+(const KKStr &right) const
static FileDescPtr NewContinuousDataOnly(VectorKKStr &_fieldNames)
Creates a simple FileDesc that consists of continuous data only.
void PredictedClass(MLClassPtr _predictedClass)
FeatureNumListConst * FeatureNumListConstPtr
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
float TrainWeight() const
Represents a Feature Vector of a single example, labeled or unlabeled.
Abstract Base class for Machine Learning parameters.
FeatureVectorPtr EncodeAExample(FeatureVectorPtr src) const
FeatureVectorListPtr EncodeAllExamples(const FeatureVectorListPtr srcData)