24 using namespace KKMLL;
31 normalizeFeature (NULL),
32 normalizeNominalFeatures (
false),
49 normalizeFeature (NULL),
50 normalizeNominalFeatures (_normalizeNominalFeatures),
56 _log.Level (20) <<
"FeatureNormalization - Creating instance from[" << _examples.FileName () <<
"]." << endl;
61 DeriveNormalizationParameters (_examples);
73 normalizeFeature (NULL),
74 normalizeNominalFeatures (
false),
79 _log.Level (20) <<
"FeatureNormalization - Creating instance from[" << _examples.FileName () <<
"]." << endl;
86 DeriveNormalizationParameters (_examples);
99 normalizeFeature (NULL),
100 normalizeNominalFeatures (
false),
106 _log.Level (20) <<
"FeatureNormalization - Creating instance from[" << _examples.FileName () <<
"]." << endl;
114 DeriveNormalizationParameters (_examples);
122 delete [] mean; mean = NULL;
123 delete [] sigma; sigma = NULL;
124 delete [] normalizeFeature; normalizeFeature = NULL;
130 kkint32 memoryConsumedEstimated =
sizeof (NormalizationParms)
131 + attriuteTypes.size () *
sizeof (AttributeType)
132 + fileName.MemoryConsumedEstimated ()
133 + numOfFeatures * (
sizeof (
bool) +
sizeof (
double) +
sizeof (
double));
135 return memoryConsumedEstimated;
145 mean =
new double[numOfFeatures];
146 sigma =
new double[numOfFeatures];
148 double* total =
new double [numOfFeatures];
149 double* sigmaTot =
new double [numOfFeatures];
153 for (i = 0; i < numOfFeatures; i++)
163 FeatureVectorPtr image;
165 FeatureVectorList::iterator imageIDX;
167 for (imageIDX = _examples.begin (); imageIDX != _examples.end (); imageIDX++)
182 for (i = 0; i < numOfFeatures; i++)
185 total[i] += featureValue;
192 for (i = 0; i < numOfFeatures; i++)
194 double meanDouble = total[i] /
double (numOfExamples);
195 mean[i] = meanDouble;
199 for (imageIDX = _examples.begin (); imageIDX != _examples.end (); imageIDX++)
213 for (i = 0; i < numOfFeatures; i++)
216 double delta = featureValue - mean[i];
217 sigmaTot[i] += delta * delta;
222 for (i = 0; i < numOfFeatures; i++)
224 sigma[i] = sqrt (sigmaTot[i] / numOfExamples);
230 ConstructNormalizeFeatureVector ();
238 _log.Level (20) <<
"NormalizationParms::WriteToFile FileName[" << _fileName <<
"]." << endl;
239 fileName
= _fileName;
241 ofstream outFile (fileName.Str ());
242 if (!outFile.is_open ())
244 _log.Level (-1) << endl <<
"NormalizationParms::WriteToFile ***EROR*** writing to file["<< _fileName <<
"]." << endl << endl;
245 _successfull =
false;
248 WriteXML (
"NormalizationParms", outFile);
259 bool cancelFlag =
false;
263 if (
typeid (*t) !=
typeid (XmlElementNormalizationParms))
264 n =
dynamic_cast<XmlElementNormalizationParmsPtr> (t)->Value ();
269 delete stream; stream = NULL;
287 XmlElementInt32::WriteXML (numOfFeatures,
"NumOfFeatures", o);
288 XmlElementFloat::WriteXML (numOfExamples,
"NumOfExamples", o);
292 if (mean) XmlElementArrayDouble::WriteXML (numOfFeatures, mean,
"Mean", o);
293 if (sigma) XmlElementArrayDouble::WriteXML (numOfFeatures, sigma,
"sigma", o);
309 while (t && (!cancelFlag))
333 XmlElementArrayDoublePtr m =
dynamic_cast<XmlElementArrayDoublePtr>(e);
334 if (m->Count () == numOfFeatures)
337 mean = m->TakeOwnership ();
341 log.Level (-1) << endl
342 <<
"XmlElementNormalizationParms ***ERROR*** mean->Count[" << m->Count () <<
"] does not agree with NumOfFeatures[" << numOfFeatures <<
"]." <<endl
349 XmlElementArrayDoublePtr s =
dynamic_cast<XmlElementArrayDoublePtr>(e);
350 if (s->Count () == numOfFeatures)
353 sigma = s->TakeOwnership ();
357 log.Level (-1) << endl
358 <<
"XmlElementNormalizationParms ***ERROR*** sigma->Count[" << s->Count () <<
"] does not agree with NumOfFeatures[" << numOfFeatures <<
"]." <<endl
373 ConstructNormalizeFeatureVector ();
382 if ((i < 0) || (i > numOfFeatures))
384 log.Level (-1) <<
"NormalizationParms::Mean ***ERROR*** Feature Number[" << i <<
"] out of bounds." << endl;
398 if ((i < 0) || (i > numOfFeatures))
400 log.Level (-1) <<
"NormalizationParms::Mean ***ERROR*** Feature Number[" << i <<
"] out of bounds." << endl;
401 return (
float)-99999.99;
412 delete normalizeFeature;
413 normalizeFeature =
new bool [numOfFeatures];
416 for (i = 0; i < numOfFeatures; i++)
418 if (normalizeNominalFeatures)
420 normalizeFeature[i] =
true;
424 if ((attriuteTypes[i] == AttributeType::Nominal) ||
425 (attriuteTypes[i] == AttributeType::Symbolic)
428 normalizeFeature[i] =
false;
432 normalizeFeature[i] =
true;
443 for (
kkint32 i = 0; i < numOfFeatures; i++)
445 if (normalizeFeature[i])
447 double normValue = 0.0;
449 normValue = ((
double)featureData[i] - mean[i]) / sigma[i];
450 featureData[i] = (
float)normValue;
463 log.Level (-1) <<
"NormalizationParms::NoralizeImage **** ERROR **** Mismatched Feature Count." << endl
464 <<
" NormalizationParms [" << numOfFeatures <<
"]" << endl
465 <<
" ImageFeatiresList [" << examples->NumOfFeatures () <<
"]." << endl
473 FeatureVectorList::iterator idx;
475 for (idx = examples->begin (); idx != examples->end (); ++idx)
476 NormalizeAExample (*idx);
487 for (
kkint32 i = 0; i < numOfFeatures; ++i)
489 if (normalizeFeature[i])
491 double normValue = 0.0;
493 normValue = ((
double)featureData[i] - mean[i]) / sigma[i];
494 featureData[i] = (
float)normValue;
XmlTag(const KKStr &_name, TagTypes _tagType)
kkint32 MemoryConsumedEstimated() const
void NormalizeExamples(FeatureVectorListPtr examples, RunLog &log)
bool EqualIgnoreCase(const char *s2) const
NormalizationParms * NormalizationParmsPtr
void ReadXML(XmlStream &s, XmlTagPtr tag, VolConstBool &cancelFlag, RunLog &log)
float FeatureData(kkint32 featureNum) const
bool MissingData() const
True indicates that one or more features were missing.
void NormalizeAExample(FeatureVectorPtr example)
virtual float ToFloat() const
const FileDescPtr FileDesc() const
virtual bool NormalizeNominalFeatures() const
double Mean(kkint32 i, RunLog &log)
static void WriteXML(const bool b, const KKStr &varName, std::ostream &o)
NormalizationParms(bool _normalizeNominalFeatures, FeatureVectorList &_examples, RunLog &_log)
bool NormalizeNominalFeatures()
FeatureVector(const FeatureVector &_example)
Container class for FeatureVector derived objects.
virtual kkint32 ToInt32() const
FeatureVectorPtr ToNormalized(FeatureVectorPtr example) const
virtual bool ToBool() const
NormalizationParms(const ModelParam &_param, FeatureVectorList &_examples, RunLog &_log)
XmlElement * XmlElementPtr
void AddAtribute(const KKStr &attributeName, const KKStr &attributeValue)
kkint32 NumOfFeatures() const
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
void WriteXML(const KKStr &varName, std::ostream &o) const
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of 'std::string' strings.
FileDescPtr FileDesc() const
static NormalizationParmsPtr ReadFromFile(const KKStr &fileName, RunLog &log)
MLClassPtr MLClass() const
Class that is example is assigned to.
virtual const KKStr & VarName() const
XmlStream(const KKStr &_fileName, RunLog &_log)
virtual const KKStr & SectionName() const
AttributeTypeVector CreateAttributeTypeTable() const
Normalization Parameters; calculation and implementation.
void WriteXML(std::ostream &o)
static void WriteXML(const FileDesc &fileDesc, const KKStr &varName, std::ostream &o)
KKStr & operator=(const KKStr &src)
Used for logging messages.
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
double Sigma(kkint32 i, RunLog &log)
virtual TokenTypes TokenType()=0
FileDescPtr Value() const
void WriteToFile(const KKStr &_fileName, bool &_successfull, RunLog &_log) const
virtual XmlTokenPtr GetNextToken(VolConstBool &cancelFlag, RunLog &log)
Represents a Feature Vector of a single example, labeled or unlabeled.
Abstract Base class for Machine Learning parameters.
float * FeatureDataAlter()
Same as 'FeatureData() except you can modify the data.
#define XmlFactoryMacro(NameOfClass)
NormalizationParms(TrainingConfiguration2Ptr _config, FeatureVectorList &_examples, RunLog &_log)
XmlElementFileDesc * XmlElementFileDescPtr
volatile const bool VolConstBool