KSquare Utilities
NormalizationParms.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <math.h>
4 #include <string>
5 #include <iostream>
6 #include <fstream>
7 #include <vector>
8 #include "MemoryDebug.h"
9 using namespace std;
10 
11 #include "GlobalGoalKeeper.h"
12 #include "KKBaseTypes.h"
13 #include "OSservices.h"
14 #include "RunLog.h"
15 #include "XmlStream.h"
16 using namespace KKB;
17 
19 #include "FeatureNumList.h"
20 #include "FeatureVector.h"
21 #include "MLClass.h"
22 #include "ModelParam.h"
24 using namespace KKMLL;
25 
26 
28  fileDesc (NULL),
29  fileName (),
30  mean (NULL),
31  normalizeFeature (NULL),
32  normalizeNominalFeatures (false),
33  numOfFeatures (0),
34  numOfExamples (0),
35  sigma (NULL)
36 {
37 } /* NormalizationParms */
38 
39 
40 
41 NormalizationParms::NormalizationParms (bool _normalizeNominalFeatures,
42  FeatureVectorList& _examples,
43  RunLog& _log
44  ):
45 
46  fileDesc (NULL),
47  fileName (),
48  mean (NULL),
49  normalizeFeature (NULL),
50  normalizeNominalFeatures (_normalizeNominalFeatures),
51  numOfFeatures (0),
52  numOfExamples (0),
53  sigma (NULL)
54 
55 {
56  _log.Level (20) << "FeatureNormalization - Creating instance from[" << _examples.FileName () << "]." << endl;
57 
58  fileDesc = _examples.FileDesc ();
59  attriuteTypes = fileDesc->CreateAttributeTypeTable ();
60  numOfFeatures = _examples.NumOfFeatures ();
61  DeriveNormalizationParameters (_examples);
62 } /* NormalizationParms */
63 
64 
65 
67  FeatureVectorList& _examples,
68  RunLog& _log
69  ):
70  fileDesc (NULL),
71  fileName (),
72  mean (NULL),
73  normalizeFeature (NULL),
74  normalizeNominalFeatures (false),
75  numOfFeatures (0),
76  numOfExamples (0),
77  sigma (NULL)
78 {
79  _log.Level (20) << "FeatureNormalization - Creating instance from[" << _examples.FileName () << "]." << endl;
80 
81  fileDesc = _examples.FileDesc ();
82  numOfFeatures = _examples.NumOfFeatures ();
83  attriuteTypes = fileDesc->CreateAttributeTypeTable ();
84  normalizeNominalFeatures = _param.NormalizeNominalFeatures ();
85 
86  DeriveNormalizationParameters (_examples);
87 }
88 
89 
90 
91 NormalizationParms::NormalizationParms (TrainingConfiguration2Ptr _config,
92  FeatureVectorList& _examples,
93  RunLog& _log
94  ):
95 
96  fileDesc (NULL),
97  fileName (),
98  mean (NULL),
99  normalizeFeature (NULL),
100  normalizeNominalFeatures (false),
101  numOfFeatures (0),
102  numOfExamples (0),
103  sigma (NULL)
104 
105 {
106  _log.Level (20) << "FeatureNormalization - Creating instance from[" << _examples.FileName () << "]." << endl;
107 
108  fileDesc = _config->FileDesc ();
109  attriuteTypes = fileDesc->CreateAttributeTypeTable ();
110  normalizeNominalFeatures = _config->NormalizeNominalFeatures ();
111 
112  numOfFeatures = _examples.NumOfFeatures ();
113 
114  DeriveNormalizationParameters (_examples);
115 } /* NormalizationParms */
116 
117 
118 
119 
121 {
122  delete [] mean; mean = NULL;
123  delete [] sigma; sigma = NULL;
124  delete [] normalizeFeature; normalizeFeature = NULL;
125 }
126 
127 
129 {
130  kkint32 memoryConsumedEstimated = sizeof (NormalizationParms)
131  + attriuteTypes.size () * sizeof (AttributeType)
132  + fileName.MemoryConsumedEstimated ()
133  + numOfFeatures * (sizeof (bool) + sizeof (double) + sizeof (double)); // mean + sigma
134 
135  return memoryConsumedEstimated;
136 }
137 
138 
139 
140 void NormalizationParms::DeriveNormalizationParameters (FeatureVectorList& _examples)
141 {
142  numOfExamples = 0;
143  kkint32 numOfNoise = 0;
144 
145  mean = new double[numOfFeatures];
146  sigma = new double[numOfFeatures];
147 
148  double* total = new double [numOfFeatures];
149  double* sigmaTot = new double [numOfFeatures];
150 
151  kkint32 i;
152 
153  for (i = 0; i < numOfFeatures; i++)
154  {
155  mean[i] = 0.0;
156  sigma[i] = 0.0;
157  total[i] = 0.0;
158  sigmaTot[i] = 0.0;
159  }
160 
161  double featureValue;
162 
163  FeatureVectorPtr image;
164 
165  FeatureVectorList::iterator imageIDX;
166 
167  for (imageIDX = _examples.begin (); imageIDX != _examples.end (); imageIDX++)
168  {
169  image = *imageIDX;
170  if ((image->MLClass ()->UnDefined ()) ||
171  (image->MissingData ()) ||
172  (!image->FeatureDataValid ())
173  )
174  {
175  // We have a noise image and do not want this as partof our Normalization
176  // procedure.
177  numOfNoise++;
178  }
179  else
180  {
181  // Since this image is defined then we can use it in our normalization calculations.
182  for (i = 0; i < numOfFeatures; i++)
183  {
184  featureValue = double (image->FeatureData (i));
185  total[i] += featureValue;
186  }
187 
188  numOfExamples++;
189  }
190  }
191 
192  for (i = 0; i < numOfFeatures; i++)
193  {
194  double meanDouble = total[i] / double (numOfExamples);
195  mean[i] = meanDouble;
196  }
197 
198 
199  for (imageIDX = _examples.begin (); imageIDX != _examples.end (); imageIDX++)
200  {
201  image = *imageIDX;
202  if ((image->MLClass ()->UnDefined ()) ||
203  (image->MissingData ()) ||
204  (!image->FeatureDataValid ())
205  )
206  {
207  // We have a noise image and do not want this as part of our Normalization
208  // procedure.
209  }
210  else
211  {
212  // Since this image is defined then we can use it in our normalization calculations.
213  for (i = 0; i < numOfFeatures; i++)
214  {
215  featureValue = double (image->FeatureData (i));
216  double delta = featureValue - mean[i];
217  sigmaTot[i] += delta * delta;
218  }
219  }
220  }
221 
222  for (i = 0; i < numOfFeatures; i++)
223  {
224  sigma[i] = sqrt (sigmaTot[i] / numOfExamples);
225  }
226 
227  delete[] sigmaTot;
228  delete[] total;
229 
230  ConstructNormalizeFeatureVector ();
231 
232 } /* DeriveNormalizationParameters */
233 
234 
235 
236 void NormalizationParms::WriteToFile (const KKStr& _fileName, bool& _successfull, RunLog& _log) const
237 {
238  _log.Level (20) << "NormalizationParms::WriteToFile FileName[" << _fileName << "]." << endl;
239  fileName = _fileName;
240  _successfull = true;
241  ofstream outFile (fileName.Str ());
242  if (!outFile.is_open ())
243  {
244  _log.Level (-1) << endl << "NormalizationParms::WriteToFile ***EROR*** writing to file["<< _fileName << "]." << endl << endl;
245  _successfull = false;
246  return;
247  }
248  WriteXML ("NormalizationParms", outFile);
249  outFile.close ();
250  return;
251 } /* Save */
252 
253 
254 
255 NormalizationParmsPtr NormalizationParms::ReadFromFile (const KKStr& fileName, RunLog& log)
256 {
257  NormalizationParmsPtr n = NULL;
258  XmlStreamPtr stream = new XmlStream (fileName, log);
259  bool cancelFlag = false;
260  XmlTokenPtr t = stream->GetNextToken (cancelFlag, log);
261  while (t && (!n))
262  {
263  if (typeid (*t) != typeid (XmlElementNormalizationParms))
264  n = dynamic_cast<XmlElementNormalizationParmsPtr> (t)->Value ();
265  delete t;
266  t = stream->GetNextToken (cancelFlag, log);
267  }
268  delete t; t = NULL;
269  delete stream; stream = NULL;
270  return n;
271 }
272 
273 
274 
275 
276 
277 void NormalizationParms::WriteXML (const KKStr& varName,
278  ostream& o
279  ) const
280 {
281  XmlTag startTag ("NormalizationParms", XmlTag::TagTypes::tagStart);
282  if (!varName.Empty ())
283  startTag.AddAtribute ("VarName", varName);
284  startTag.WriteXML (o);
285  o << endl;
286 
287  XmlElementInt32::WriteXML (numOfFeatures, "NumOfFeatures", o);
288  XmlElementFloat::WriteXML (numOfExamples, "NumOfExamples", o);
289  XmlElementBool::WriteXML (normalizeNominalFeatures, "NormalizeNominalFeatures", o);
290 
291  if (fileDesc) XmlElementFileDesc::WriteXML (*fileDesc, "FileDesc", o);
292  if (mean) XmlElementArrayDouble::WriteXML (numOfFeatures, mean, "Mean", o);
293  if (sigma) XmlElementArrayDouble::WriteXML (numOfFeatures, sigma, "sigma", o);
294 
295  XmlTag endTag ("NormalizationParms", XmlTag::TagTypes::tagEnd);
296  endTag.WriteXML (o);
297  o << endl;
298 }
299 
300 
301 
303  XmlTagPtr tag,
304  VolConstBool& cancelFlag,
305  RunLog& log
306  )
307 {
308  XmlTokenPtr t = s.GetNextToken (cancelFlag, log);
309  while (t && (!cancelFlag))
310  {
312  {
313  XmlElementPtr e = dynamic_cast<XmlElementPtr> (t);
314  const KKStr& className = e->SectionName ();
315  const KKStr& varName = e->VarName ();
316  if (varName.EqualIgnoreCase ("NumOfFeatures"))
317  numOfFeatures = e->ToInt32 ();
318 
319  else if (varName.EqualIgnoreCase ("NumOfExamples"))
320  numOfExamples = e->ToFloat ();
321 
322  else if (varName.EqualIgnoreCase ("NormalizeNominalFeatures"))
323  normalizeNominalFeatures = e->ToBool ();
324 
325  else if (varName.EqualIgnoreCase ("FileDesc"))
326  {
327  XmlElementFileDescPtr fd = dynamic_cast<XmlElementFileDescPtr>(e);
328  fileDesc = fd->Value ();
329  }
330 
331  else if (varName.EqualIgnoreCase ("Mean"))
332  {
333  XmlElementArrayDoublePtr m = dynamic_cast<XmlElementArrayDoublePtr>(e);
334  if (m->Count () == numOfFeatures)
335  {
336  delete mean;
337  mean = m->TakeOwnership ();
338  }
339  else
340  {
341  log.Level (-1) << endl
342  << "XmlElementNormalizationParms ***ERROR*** mean->Count[" << m->Count () << "] does not agree with NumOfFeatures[" << numOfFeatures << "]." <<endl
343  << endl;
344  }
345  }
346 
347  else if (varName.EqualIgnoreCase ("Sigma"))
348  {
349  XmlElementArrayDoublePtr s = dynamic_cast<XmlElementArrayDoublePtr>(e);
350  if (s->Count () == numOfFeatures)
351  {
352  delete sigma;
353  sigma = s->TakeOwnership ();
354  }
355  else
356  {
357  log.Level (-1) << endl
358  << "XmlElementNormalizationParms ***ERROR*** sigma->Count[" << s->Count () << "] does not agree with NumOfFeatures[" << numOfFeatures << "]." <<endl
359  << endl;
360  }
361  }
362  }
363 
364  delete t;
365  t = s.GetNextToken (cancelFlag, log);
366  }
367  delete t;
368  t = NULL;
369 
370  if (fileDesc)
371  {
372  attriuteTypes = fileDesc->CreateAttributeTypeTable ();
373  ConstructNormalizeFeatureVector ();
374  }
375 } /* ReadXML */
376 
377 
379  RunLog& log
380  )
381 {
382  if ((i < 0) || (i > numOfFeatures))
383  {
384  log.Level (-1) << "NormalizationParms::Mean ***ERROR*** Feature Number[" << i << "] out of bounds." << endl;
385  return -99999.99;
386  }
387  else
388  {
389  return mean[i];
390  }
391 } /* Mean */
392 
393 
395  RunLog& log
396  )
397 {
398  if ((i < 0) || (i > numOfFeatures))
399  {
400  log.Level (-1) << "NormalizationParms::Mean ***ERROR*** Feature Number[" << i << "] out of bounds." << endl;
401  return (float)-99999.99;
402  }
403  else
404  {
405  return sigma[i];
406  }
407 } /* Sigma */
408 
409 
410 void NormalizationParms::ConstructNormalizeFeatureVector ()
411 {
412  delete normalizeFeature;
413  normalizeFeature = new bool [numOfFeatures];
414 
415  kkint32 i;
416  for (i = 0; i < numOfFeatures; i++)
417  {
418  if (normalizeNominalFeatures)
419  {
420  normalizeFeature[i] = true;
421  }
422  else
423  {
424  if ((attriuteTypes[i] == AttributeType::Nominal) ||
425  (attriuteTypes[i] == AttributeType::Symbolic)
426  )
427  {
428  normalizeFeature[i] = false;
429  }
430  else
431  {
432  normalizeFeature[i] = true;
433  }
434  }
435  }
436 } /* ConstructNormalizeFeatureVector */
437 
438 
439 void NormalizationParms::NormalizeAExample (FeatureVectorPtr example)
440 {
441  float* featureData = example->FeatureDataAlter ();
442 
443  for (kkint32 i = 0; i < numOfFeatures; i++)
444  {
445  if (normalizeFeature[i])
446  {
447  double normValue = 0.0;
448  if (sigma[i] != 0.0)
449  normValue = ((double)featureData[i] - mean[i]) / sigma[i];
450  featureData[i] = (float)normValue;
451  }
452  }
453 } /* NormalizeAExample */
454 
455 
456 
457 void NormalizationParms::NormalizeExamples (FeatureVectorListPtr examples,
458  RunLog& log
459  )
460 {
461  if (numOfFeatures != examples->NumOfFeatures ())
462  {
463  log.Level (-1) << "NormalizationParms::NoralizeImage **** ERROR **** Mismatched Feature Count." << endl
464  << " NormalizationParms [" << numOfFeatures << "]" << endl
465  << " ImageFeatiresList [" << examples->NumOfFeatures () << "]." << endl
466  << endl;
467 
469  exit (-1);
470  return;
471  }
472 
473  FeatureVectorList::iterator idx;
474 
475  for (idx = examples->begin (); idx != examples->end (); ++idx)
476  NormalizeAExample (*idx);
477 
478  return;
479 } /* NoralizeImage */
480 
481 
482 
483 FeatureVectorPtr NormalizationParms::ToNormalized (FeatureVectorPtr example) const
484 {
485  FeatureVectorPtr result = new FeatureVector (*example);
486  float* featureData = result->FeatureDataAlter ();
487  for (kkint32 i = 0; i < numOfFeatures; ++i)
488  {
489  if (normalizeFeature[i])
490  {
491  double normValue = 0.0;
492  if (sigma[i] != 0.0)
493  normValue = ((double)featureData[i] - mean[i]) / sigma[i];
494  featureData[i] = (float)normValue;
495  }
496  }
497 
498  return result;
499 } /* ToNormalized */
500 
501 
502 
503 XmlFactoryMacro(NormalizationParms)
XmlTag(const KKStr &_name, TagTypes _tagType)
Definition: XmlStream.cpp:586
kkint32 MemoryConsumedEstimated() const
void NormalizeExamples(FeatureVectorListPtr examples, RunLog &log)
bool EqualIgnoreCase(const char *s2) const
Definition: KKStr.cpp:1257
NormalizationParms * NormalizationParmsPtr
__int32 kkint32
Definition: KKBaseTypes.h:88
void ReadXML(XmlStream &s, XmlTagPtr tag, VolConstBool &cancelFlag, RunLog &log)
float FeatureData(kkint32 featureNum) const
bool MissingData() const
True indicates that one or more features were missing.
void osWaitForEnter()
void NormalizeAExample(FeatureVectorPtr example)
virtual float ToFloat() const
Definition: XmlStream.h:316
const FileDescPtr FileDesc() const
virtual bool NormalizeNominalFeatures() const
Definition: ModelParam.h:115
bool UnDefined() const
Definition: MLClass.h:183
double Mean(kkint32 i, RunLog &log)
static void WriteXML(const bool b, const KKStr &varName, std::ostream &o)
Definition: XmlStream.cpp:1035
NormalizationParms(bool _normalizeNominalFeatures, FeatureVectorList &_examples, RunLog &_log)
XmlToken * XmlTokenPtr
Definition: XmlStream.h:18
FeatureVector(const FeatureVector &_example)
Container class for FeatureVector derived objects.
virtual kkint32 ToInt32() const
Definition: XmlStream.h:317
FeatureVectorPtr ToNormalized(FeatureVectorPtr example) const
KKTHread * KKTHreadPtr
virtual bool ToBool() const
Definition: XmlStream.h:313
NormalizationParms(const ModelParam &_param, FeatureVectorList &_examples, RunLog &_log)
XmlElement * XmlElementPtr
Definition: XmlStream.h:21
void AddAtribute(const KKStr &attributeName, const KKStr &attributeValue)
Definition: XmlStream.cpp:602
bool Empty() const
Definition: KKStr.h:241
kkint32 NumOfFeatures() const
Manages the reading and writing of objects in a simple XML format. For a class to be supported by Xml...
Definition: XmlStream.h:46
void WriteXML(const KKStr &varName, std::ostream &o) const
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
static NormalizationParmsPtr ReadFromFile(const KKStr &fileName, RunLog &log)
MLClassPtr MLClass() const
Class that is example is assigned to.
virtual const KKStr & VarName() const
Definition: XmlStream.cpp:794
XmlStream(const KKStr &_fileName, RunLog &_log)
Definition: XmlStream.cpp:41
virtual const KKStr & SectionName() const
Definition: XmlStream.cpp:785
AttributeTypeVector CreateAttributeTypeTable() const
Definition: FileDesc.cpp:419
Normalization Parameters; calculation and implementation.
void WriteXML(std::ostream &o)
Definition: XmlStream.cpp:723
static void WriteXML(const FileDesc &fileDesc, const KKStr &varName, std::ostream &o)
Definition: FileDesc.cpp:962
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
double Sigma(kkint32 i, RunLog &log)
virtual TokenTypes TokenType()=0
FileDescPtr Value() const
Definition: FileDesc.cpp:947
void WriteToFile(const KKStr &_fileName, bool &_successfull, RunLog &_log) const
virtual XmlTokenPtr GetNextToken(VolConstBool &cancelFlag, RunLog &log)
Definition: XmlStream.cpp:116
Represents a Feature Vector of a single example, labeled or unlabeled.
Definition: FeatureVector.h:59
Abstract Base class for Machine Learning parameters.
Definition: ModelParam.h:35
float * FeatureDataAlter()
Same as &#39;FeatureData() except you can modify the data.
#define XmlFactoryMacro(NameOfClass)
Definition: XmlStream.h:688
NormalizationParms(TrainingConfiguration2Ptr _config, FeatureVectorList &_examples, RunLog &_log)
XmlElementFileDesc * XmlElementFileDescPtr
Definition: FileDesc.h:337
volatile const bool VolConstBool
Definition: KKBaseTypes.h:163