KSquare Utilities
ModelParam.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <fstream>
4 #include <string>
5 #include <iostream>
6 #include <vector>
7 #include "MemoryDebug.h"
8 using namespace std;
9 
10 #include "GlobalGoalKeeper.h"
11 #include "KKBaseTypes.h"
12 #include "OSservices.h"
13 #include "RunLog.h"
14 using namespace KKB;
15 
16 
17 #include "ModelParam.h"
18 #include "ModelParamKnn.h"
19 #include "ModelParamOldSVM.h"
20 #include "ModelParamSvmBase.h"
22 #include "ModelParamDual.h"
23 #include "KKMLLTypes.h"
24 #include "Model.h"
25 //#include "FileDesc.h"
26 #include "MLClass.h"
27 using namespace KKMLL;
28 
29 
30 
32 
33  encodingMethod (EncodingMethodType::NoEncoding),
34  examplesPerClass (int32_max),
35  fileName (),
36  normalizeNominalFeatures (false),
37  selectedFeatures (NULL),
38  validParam (true),
39  // SVM related parameters
40  cost (0.0),
41  gamma (0.0),
42  prob (0.0f)
43 {
44  /*
45  if (!fileDesc)
46  {
47  log.Level (-1) << endl
48  << "ModelParam::ModelParam *** ERROR ***" << endl
49  << " fileDesc == NULL" << endl
50  << endl;
51  osWaitForEnter ();
52  exit (-1);
53  }
54  */
55 }
56 
57 
58 
59 
60 
61 ModelParam::ModelParam (const ModelParam& _param):
62 
63  encodingMethod (_param.encodingMethod),
64  examplesPerClass (_param.examplesPerClass),
65  fileName (_param.fileName),
66  normalizeNominalFeatures (_param.normalizeNominalFeatures),
67  selectedFeatures (NULL),
68  validParam (_param.validParam),
69  // SVM related parameters
70  cost (_param.cost),
71  gamma (_param.gamma),
72  prob (_param.prob)
73 {
74  if (_param.selectedFeatures)
75  selectedFeatures = new FeatureNumList (*(_param.selectedFeatures));
76 }
77 
78 
79 
81 {
82  delete selectedFeatures;
83  selectedFeatures = NULL;
84 }
85 
86 
88 {
89  kkint32 memoryConsumedEstimated = sizeof (ModelParam)
90  + fileName.MemoryConsumedEstimated ();
91 
92  if (selectedFeatures)
93  memoryConsumedEstimated += selectedFeatures->MemoryConsumedEstimated ();
94  return memoryConsumedEstimated;
95 }
96 
97 
98 
100 {
101  if (selectedFeatures)
102  return (float)selectedFeatures->NumOfFeatures ();
103  else
104  return 0;
105 }
106 
107 
108 
110 {
111  delete selectedFeatures;
112  selectedFeatures = new FeatureNumList (_selectedFeatures);
113 }
114 
115 
116 
117 
118 
119 
120 
121 
123 {
124  if (_modelParamType == ModelParamTypes::Null)
125  return "NULL";
126 
127  else if (_modelParamType == ModelParamTypes::Dual)
128  return "ModelParamDual";
129 
130  else if (_modelParamType == ModelParamTypes::KNN)
131  return "ModelParamKnn";
132 
133  else if (_modelParamType == ModelParamTypes::OldSVM)
134  return "ModelParamOldSVM";
135 
136  else if (_modelParamType == ModelParamTypes::SvmBase)
137  return "ModelParamSvmBase";
138 
139  else if (_modelParamType == ModelParamTypes::UsfCasCor)
140  return "UsfCasCor";
141 
142  else
143  return "NULL";
144 }
145 
146 
147 
149 {
150  if (_modelParamTypeStr.EqualIgnoreCase ("ModelParamDual"))
151  return ModelParamTypes::Dual;
152 
153  else if (_modelParamTypeStr.EqualIgnoreCase ("ModelParamOldSVM"))
155 
156  else if (_modelParamTypeStr.EqualIgnoreCase ("ModelParamSvmBase"))
158 
159  else if (_modelParamTypeStr.EqualIgnoreCase ("ModelParamKnn"))
160  return ModelParamTypes::KNN;
161 
162  else if (_modelParamTypeStr.EqualIgnoreCase ("UsfCasCor"))
164 
165  else
166  return ModelParamTypes::Null;
167 }
168 
169 
170 
171 
172 float ModelParam::A_Param () const
173 {
174  return prob;
175 }
176 
177 double ModelParam::C_Param () const
178 {
179  return cost;
180 }
181 
182 double ModelParam::Cost () const
183 {
184  return cost;
185 }
186 
187 double ModelParam::Gamma () const
188 {
189  return gamma;
190 }
191 
192 float ModelParam::Prob () const
193 {
194  return prob;
195 }
196 
197 
198 
199 
200 
201 void ModelParam::A_Param (float _prob)
202 {
203  prob = _prob;
204 }
205 
206 
207 void ModelParam::C_Param (double _cost)
208 {
209  cost = _cost;
210 }
211 
212 
213 void ModelParam::Cost (double _cost)
214 {
215  cost = _cost;
216 }
217 
218 
219 void ModelParam::Gamma (double _gamma)
220 {
221  gamma = _gamma;
222 }
223 
224 
225 void ModelParam::Prob (float _prob)
226 {
227  prob = _prob;
228 }
229 
230 
231 
232 
233 
234 void ModelParam::ParseCmdLine (KKStr _cmdLineStr,
235  bool& _validFormat,
236  RunLog& _log
237  )
238 {
239  _validFormat = true;
240 
241  //DecodeParamStr (_cmdLineStr, param);
242 
243  KKStr field (_cmdLineStr.ExtractToken (" \t\n\r"));
244  KKStr value;
245 
246  double valueNum;
247 
248  while (!field.Empty () && _validFormat)
249  {
250  if (field.FirstChar () != '-')
251  {
252  _log.Level (-1) << "ModelParam::ParseCmdLine *** Invalid Parameter["
253  << field << "] ***"
254  << endl;
255  _validFormat = false;
256  break;
257  }
258 
259  // See if next field is a Switch field or a parameter.
260  _cmdLineStr.TrimLeft (" \t\n\r");
261  value == "";
262  if (_cmdLineStr.Len () > 0)
263  {
264  if (_cmdLineStr.FirstChar () != '-')
265  value = _cmdLineStr.ExtractToken (" \t\n\r");
266  }
267 
268  valueNum = atof (value.Str ());
269 
270  field.Upper ();
271  KKStr valueUpper (value);
272 
273  valueUpper.Upper ();
274 
275  if ((field == "-FS") || (field == "-FEATURESSELECTED") || (field == "-FEATURESSEL") || (field == "FEATURESEL"))
276  {
277  delete selectedFeatures;
278  bool valid = true;
279  selectedFeatures = new FeatureNumList (value, valid);
280  if (!selectedFeatures)
281  _validFormat= false;
282  }
283 
284  else if (field.EqualIgnoreCase ("-C") || field.EqualIgnoreCase ("-Cost"))
285  {
286  Cost (valueNum);
287  }
288 
289  else if ((field == "-ENCODE"))
290  {
291  encodingMethod = EncodingMethodFromStr (valueUpper);
292  }
293 
294  else if ((field.EqualIgnoreCase ("-EPC")) ||
295  (field.EqualIgnoreCase ("-ExamplesPerClass"))
296  )
297  {
298  examplesPerClass = value.ToInt ();
299  if (examplesPerClass < 1)
300  {
301  _log.Level (-1)
302  << endl << endl
303  << "ModelParam::ParseCmdLine ***ERROR*** Invalid '-ExamplsPerClass' parameter specified[" << value << "]" << endl
304  << endl;
305  _validFormat = false;
306  examplesPerClass = int32_max;
307  break;
308  }
309  }
310 
311  else if (field.EqualIgnoreCase ("-G") || field.EqualIgnoreCase ("-Gamma"))
312  {
313  Gamma (valueNum);
314  }
315 
316  else if ((field.EqualIgnoreCase ("-NormNominal")) ||
317  (field.EqualIgnoreCase ("-NormalizeNominal")) ||
318  (field.EqualIgnoreCase ("-NormalizeNominalFeatures")) ||
319  (field.EqualIgnoreCase ("-NN"))
320  )
321  {
322  if (value.Empty ())
323  normalizeNominalFeatures = true;
324  else
325  normalizeNominalFeatures = value.ExtractTokenBool ("\t");
326  }
327 
328  else
329  {
330  bool parameterUsed = false;
331  ParseCmdLineParameter (field, value, parameterUsed, _log);
332  if (!parameterUsed)
333  {
334  _log.Level (-1) << "ModelParam::ParseCmdLine - Invalid Parameter["
335  << field << "] Value[" << value << "]."
336  << endl;
337  _validFormat = false;
338  break;
339  }
340  }
341 
342  field = _cmdLineStr.ExtractToken (" \t\n\r");
343  }
344 
345  ParseCmdLinePost (_log);
346 
347  validParam = _validFormat;
348 } /* ParseCmdLine */
349 
350 
351 
352 /**
353  * @brief Called after 'ParseCmdLine' is completed. Classed derived from 'ModelParam' can implement this
354  * method to do any processing that they want after the entire command line has been processed.
355  * @details An example use of this is in 'ModelParamSvmBase' where the local 'ParseCmdLineParameter'
356  * routine processes parameters that 'Model' needs to be aware of.
357  */
359 {
360 }
361 
362 
363 
364 /**
365  * @brief Convert all parameters to a command line string.
366 */
368 {
369  KKStr cmdStr (300);
370 
371  if (selectedFeatures)
372  cmdStr << "-SF " + selectedFeatures->ToCommaDelStr ();
373 
374  if (examplesPerClass < int32_max)
375  cmdStr << " -EPC " << examplesPerClass;
376 
377  if (encodingMethod != EncodingMethodType::NoEncoding)
378  cmdStr << " -Encode " + EncodingMethodToStr (encodingMethod);
379 
380  return cmdStr;
381 } /* ToCmdLineStr */
382 
383 
384 
385 
386 
388  RunLog& log
389  ) const
390 {
391  kkint32 z;
392  kkint32 numFeaturesAfterEncoding = 0;
393 
394  if (!selectedFeatures)
395  selectedFeatures = new FeatureNumList (fileDesc);
396 
397  kkint32 numOfFeaturesSelected = selectedFeatures->NumOfFeatures ();
398 
399  switch (EncodingMethod ())
400  {
402  for (z = 0; z < numOfFeaturesSelected; z++)
403  {
404  kkint32 fieldNum = (*selectedFeatures)[z];
405  if ((fileDesc->Type (fieldNum) == AttributeType::Nominal) ||
406  (fileDesc->Type (fieldNum) == AttributeType::Symbolic)
407  )
408  numFeaturesAfterEncoding += fileDesc->Cardinality (fieldNum);
409  else
410  numFeaturesAfterEncoding ++;
411  }
412  break;
413 
416  default:
417  //numFeaturesAfterEncoding = fileDesc->NumOfFields ( );
418  numFeaturesAfterEncoding = selectedFeatures->NumOfFeatures ();
419  break;
420  }
421 
422  return numFeaturesAfterEncoding;
423 } // NumOfFeaturesAfterEncoding
424 
425 
426 
427 
429 {
430  if (encodingMethod == EncodingMethodType::Binary)
431  return "Binary";
432 
433  else if (encodingMethod == EncodingMethodType::Scaled)
434  return "Scale";
435 
436  else
437  return "None";
438 } /* EncodingMethodToStr */
439 
440 
441 
442 
443 
445 {
446  KKStr encodingMethodUpper = encodingMethodStr.ToUpper ();
447 
448  if ((encodingMethodUpper == "BINARY") || (encodingMethodUpper == "BIN"))
450 
451  if (encodingMethodUpper == "SCALE")
453 
454  if (encodingMethodUpper == "NONE")
456 
458 } /* EncodingMethodFromStr */
459 
460 
461 
462 
463 
464 void ModelParam::WriteXMLFields (ostream& o) const
465 
466 {
467  EncodingMethodToStr (encodingMethod).WriteXML ("EncodingMethod", o);
468 
469  XmlElementInt32::WriteXML (examplesPerClass, "ExamplesPerClass", o);
470 
471  fileName.WriteXML ("FileName", o);
472 
473  XmlElementBool::WriteXML (normalizeNominalFeatures, "NormalizeNominalFeatures", o);
474 
475  if (selectedFeatures)
476  selectedFeatures->WriteXML ("SelectedFeatures", o);
477 
478  XmlElementDouble::WriteXML (cost, "Cost", o);
479  XmlElementDouble::WriteXML (gamma, "Gamma", o);
480  XmlElementDouble::WriteXML (prob, "Prob", o);
481  XmlElementBool::WriteXML (validParam, "ValidParam", o);
482 } /* WriteXML */
483 
484 
485 
486 
488 {
489  const KKStr& varName = t->VarName ();
491  {
492  XmlElementPtr e = dynamic_cast<XmlElementPtr> (t);
493 
494  bool tokenFound = true;
495 
496  if (varName.EqualIgnoreCase ("EncodingMethod"))
497  {
498  encodingMethod = EncodingMethodFromStr (e->ToKKStr ());
499  }
500 
501  else if (varName.EqualIgnoreCase ("ExamplesPerClass"))
502  {
503  examplesPerClass = e->ToInt32 ();
504  }
505 
506  else if (varName.EqualIgnoreCase ("FileName"))
507  {
508  fileName = e->ToKKStr ();
509  }
510 
511  else if (varName.EqualIgnoreCase ("NormalizeNominalFeatures"))
512  {
513  normalizeNominalFeatures = e->ToBool ();;
514  }
515 
516  else if ((varName.EqualIgnoreCase ("SelectedFeatures")) && (typeid (*e) == typeid (XmlElementFeatureNumList)))
517  {
518  selectedFeatures = dynamic_cast<XmlElementFeatureNumListPtr> (e)->TakeOwnership ();
519  }
520 
521  else if (varName.EqualIgnoreCase ("Cost"))
522  {
523  cost = e->ToDouble ();
524  }
525 
526  else if (varName.EqualIgnoreCase ("Gamma"))
527  {
528  gamma = e->ToDouble ();
529  }
530 
531  else if (varName.EqualIgnoreCase ("Prob"))
532  {
533  prob = (float)(e->ToDouble ());
534  }
535 
536  else if (varName.EqualIgnoreCase ("ValidParam"))
537  {
538  validParam = e->ToBool ();
539  }
540  else
541  {
542  tokenFound = false;
543  }
544 
545  if (tokenFound)
546  {
547  delete t;
548  t = NULL;
549  }
550  }
551 
552  return t;
553 } /* ReadXMLModelParamToken */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
virtual double Gamma() const
Definition: ModelParam.cpp:187
bool EqualIgnoreCase(const char *s2) const
Definition: KKStr.cpp:1257
virtual KKStr ToCmdLineStr() const
Creates a a Command Line String that represents these parameters.
Definition: ModelParam.cpp:367
void WriteXMLFields(std::ostream &o) const
Definition: ModelParam.cpp:464
kkint32 MemoryConsumedEstimated() const
Definition: KKStr.cpp:766
__int32 kkint32
Definition: KKBaseTypes.h:88
virtual void Cost(double _cost)
Definition: ModelParam.cpp:213
virtual KKStr ToKKStr() const
Definition: XmlStream.h:314
Keeps track of selected features.
virtual double C_Param() const
Definition: ModelParam.cpp:177
FeatureNumList(FileDescPtr _fileDesc)
KKMLL::AttributeType Type(kkint32 fieldNum) const
Definition: FileDesc.cpp:370
KKStr ExtractToken(const char *delStr="\n\t\r ")
Definition: KKStr.cpp:2969
kkint32 ToInt() const
Definition: KKStr.cpp:3565
FeatureNumList const FeatureNumListConst
bool operator==(const char *rtStr) const
Definition: KKStr.cpp:1588
char FirstChar() const
Definition: KKStr.cpp:1970
KKStr ToUpper() const
Definition: KKStr.cpp:2517
virtual void ParseCmdLinePost(RunLog &log)
Called after &#39;ParseCmdLine&#39; is completed. Classed derived from &#39;ModelParam&#39; can implement this method...
Definition: ModelParam.cpp:358
virtual EncodingMethodType EncodingMethod() const
Definition: ModelParam.h:111
static void WriteXML(const bool b, const KKStr &varName, std::ostream &o)
Definition: XmlStream.cpp:1035
XmlToken * XmlTokenPtr
Definition: XmlStream.h:18
FeatureNumList(const FeatureNumList &featureNumList)
Copy constructor.
virtual void Prob(float _prob)
Definition: ModelParam.cpp:225
KKStr & operator=(KKStr &&src)
Definition: KKStr.cpp:1369
virtual kkint32 ToInt32() const
Definition: XmlStream.h:317
kkuint32 Len() const
Returns the number of characters in the string.
Definition: KKStr.h:366
KKTHread * KKTHreadPtr
static KKStr ModelParamTypeToStr(ModelParamTypes _modelParamType)
Definition: ModelParam.cpp:122
static ModelParamTypes ModelParamTypeFromStr(const KKStr &_modelParamTypeStr)
Definition: ModelParam.cpp:148
virtual bool ToBool() const
Definition: XmlStream.h:313
kkuint16 operator[](kkint32 idx) const
Returns back the selected feature.
KKStr operator+(const char *left, const KKStr &right)
Definition: KKStr.cpp:3976
bool ExtractTokenBool(const char *delStr)
Extract the next token from the string assuming that it is a logical True/False value.
Definition: KKStr.cpp:3165
virtual void Gamma(double _gamma)
Definition: ModelParam.cpp:219
KKStr(const KKStr &str)
Copy Constructor.
Definition: KKStr.cpp:561
XmlElement * XmlElementPtr
Definition: XmlStream.h:21
virtual kkint32 MemoryConsumedEstimated() const
Definition: ModelParam.cpp:87
kkint32 NumOfFeatures() const
static EncodingMethodType EncodingMethodFromStr(const KKStr &encodingMethodStr)
Definition: ModelParam.cpp:444
virtual float A_Param() const
Definition: ModelParam.cpp:172
void TrimLeft(const char *whiteSpaceChars="\n\r\t ")
Definition: KKStr.cpp:1745
XmlTokenPtr ReadXMLModelParamToken(XmlTokenPtr t)
Will process any tokens that belong to &#39;ModelParam&#39; and return NULL ones that are not will be passed ...
Definition: ModelParam.cpp:487
bool Empty() const
Definition: KKStr.h:241
virtual float AvgMumOfFeatures() const
Definition: ModelParam.cpp:99
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
void Upper()
Converts all characters in string to their Upper case equivalents via &#39;toupper&#39;.
Definition: KKStr.cpp:2461
AttributeType
Definition: Attribute.h:36
FileDesc * FileDescPtr
virtual void SelectedFeatures(FeatureNumListConst &_selectedFeatures)
Definition: ModelParam.cpp:109
void WriteXML(const KKStr &varName, std::ostream &o) const
Definition: KKStr.cpp:4420
virtual float Prob() const
Definition: ModelParam.cpp:192
FeatureNumList(const KKStr &_featureListStr, bool &_valid)
Constructs a &#39;FeatureNumList&#39; instance from a string that contains a list of selected features...
ModelParam(const ModelParam &_param)
Definition: ModelParam.cpp:61
virtual kkint32 NumOfFeaturesAfterEncoding(FileDescPtr fileDesc, RunLog &log) const
Definition: ModelParam.cpp:387
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
static KKStr EncodingMethodToStr(EncodingMethodType encodingMethod)
Definition: ModelParam.cpp:428
virtual TokenTypes TokenType()=0
kkint32 Cardinality(kkint32 fieldNum) const
Definition: FileDesc.cpp:341
virtual void A_Param(float _prob)
Definition: ModelParam.cpp:201
virtual void C_Param(double _cost)
Definition: ModelParam.cpp:207
virtual double Cost() const
Definition: ModelParam.cpp:182
#define int32_max
Definition: KKBaseTypes.h:119
Abstract Base class for Machine Learning parameters.
Definition: ModelParam.h:35
virtual ~ModelParam()
Definition: ModelParam.cpp:80
kkint32 MemoryConsumedEstimated() const
virtual const KKStr & VarName() const
Definition: XmlStream.h:269
void WriteXML(const KKStr &varName, std::ostream &o) const
KKStr ToCommaDelStr() const
virtual void ParseCmdLine(KKStr _cmdLineStr, bool &_validFormat, RunLog &_log)
Definition: ModelParam.cpp:234