KSquare Utilities
ConfusionMatrix2.cpp
Go to the documentation of this file.
1 #include "FirstIncludes.h"
2 #include <stdio.h>
3 #include <iomanip>
4 #include <string>
5 #include <iostream>
6 #include <fstream>
7 #include <map>
8 #include <vector>
9 #include "MemoryDebug.h"
10 #include "KKBaseTypes.h"
11 using namespace std;
12 
13 #include "OSservices.h"
14 #include "RunLog.h"
15 using namespace KKB;
16 
17 
18 #include "ConfusionMatrix2.h"
19 using namespace KKMLL;
20 
21 
22 
23 
24 ConfusionMatrix2::ConfusionMatrix2 (const MLClassList& _classes, // Will make its own copy of '_classes'
25  istream& f,
26  kkint32 _bucketSize,
27  kkint32 _numOfBuckets,
28  kkint32 _numOfProbBuckets,
29  kkint32 _probBucketSize,
30  RunLog& _log
31  ):
32  bucketSize (_bucketSize),
33  classCount (0),
34  classes (_classes),
37  correctCount (0.0),
41  numInvalidClassesPredicted (0.0),
42  numOfBuckets (_numOfBuckets),
43  numOfProbBuckets (_numOfProbBuckets),
45  probBucketSize (_probBucketSize),
46  totalCount (0.0),
47  totalPredProb (0.0),
50  totPredProbCM ()
51 {
52  InitializeMemory ();
53  Read (f, _log);
54 }
55 
56 
57 
58 ConfusionMatrix2::ConfusionMatrix2 (const MLClassList& _classes): // Will make its own copy of list
59  bucketSize (100),
60  classCount (0),
63  correctCount (0.0),
67  classes (_classes),
68  numInvalidClassesPredicted (0.0),
69  numOfBuckets (40),
70  numOfProbBuckets (20),
72  probBucketSize (5),
73  totalCount (0.0),
74  totalPredProb (0.0),
77  totPredProbCM ()
78 {
79  InitializeMemory ();
80 }
81 
82 
83 
84 
86  bucketSize (cm.bucketSize),
87  classCount (cm.classCount),
90  correctCount (cm.correctCount),
94  classes (cm.classes),
95  numInvalidClassesPredicted (cm.numInvalidClassesPredicted),
96  numOfBuckets (cm.numOfBuckets),
97  numOfProbBuckets (cm.numOfProbBuckets),
99  probBucketSize (cm.probBucketSize),
100  totalCount (cm.totalCount),
101  totalPredProb (cm.totalPredProb),
104  totPredProbCM ()
105 {
106  CopyVector (cm.countsByKnownClass, countsByKnownClass);
107  CopyVector (cm.totalPredProbsByKnownClass, totalPredProbsByKnownClass);
108  CopyVector (cm.totalSizesByKnownClass, totalSizesByKnownClass);
109 
110  CopyVectorDoublePtr (cm.predictedCountsCM, predictedCountsCM, classCount);
111  CopyVectorDoublePtr (cm.totPredProbCM, totPredProbCM, classCount);
112 
113  CopyVectorDoublePtr (cm.countByKnownClassBySize, countByKnownClassBySize, numOfBuckets);
114  CopyVectorDoublePtr (cm.correctByKnownClassBySize, correctByKnownClassBySize, numOfBuckets);
115  CopyVectorDoublePtr (cm.countByKnownClassByProb, countByKnownClassByProb, numOfProbBuckets);
116  CopyVectorDoublePtr (cm.correctByKnownClassByProb, correctByKnownClassByProb, numOfProbBuckets);
117 }
118 
119 
120 
121 
123  {
124  DeleteVectorDoublePtr (countByKnownClassBySize);
125  DeleteVectorDoublePtr (correctByKnownClassBySize);
126  DeleteVectorDoublePtr (countByKnownClassByProb);
127  DeleteVectorDoublePtr (correctByKnownClassByProb);
128  DeleteVectorDoublePtr (predictedCountsCM);
129  DeleteVectorDoublePtr (totPredProbCM);
130 }
131 
132 
133 
134 
135 void ConfusionMatrix2::InitializeMemory ()
136 {
137  classes.SortByName ();
138 
139  classCount = classes.QueueSize ();
140 
141 
142  InitializeVector (countsByKnownClass, classCount);
143  InitializeVector (totalSizesByKnownClass, classCount);
144  InitializeVector (totalPredProbsByKnownClass, classCount);
145 
146  InitializeVectorDoublePtr (predictedCountsCM, classCount, classCount);
147  InitializeVectorDoublePtr (totPredProbCM, classCount, classCount);
148 
149  InitializeVectorDoublePtr (countByKnownClassBySize, classCount, numOfBuckets);
150  InitializeVectorDoublePtr (correctByKnownClassBySize, classCount, numOfBuckets);
151 
152  InitializeVectorDoublePtr (countByKnownClassByProb, classCount, numOfProbBuckets);
153  InitializeVectorDoublePtr (correctByKnownClassByProb, classCount, numOfProbBuckets);
154 } /* InitializeMemory */
155 
156 
157 
158 void ConfusionMatrix2::InitializeVector (vector<double>& v,
159  kkint32 x
160  )
161 {
162  v.clear ();
163  for (kkint32 y = 0; y < x; ++y)
164  v.push_back (0.0);
165  }
166 
167 
168 
169 void ConfusionMatrix2::CopyVector (const vector<double>& src,
170  vector<double>& dest
171  )
172 {
173  dest.clear ();
174 
175  vector<double>::const_iterator idx;
176  for (idx = src.begin (); idx != src.end (); ++idx)
177  dest.push_back (*idx);
178 } /* CopyVector */
179 
180 
181 
182 void ConfusionMatrix2::InitializeVectorDoublePtr (vector<double*>& v,
183  kkint32 numClasses,
184  kkint32 numBuckets
185  )
186 {
187  for (kkuint32 x = 0; x < v.size (); ++x)
188  {
189  delete v[x];
190  v[x] = NULL;
191 }
192 
193  v.clear ();
194  while (v.size () < (kkuint32)numClasses)
195  {
196  double* d = new double[numBuckets];
197  v.push_back (d);
198  for (kkint32 y = 0; y < numBuckets; ++y)
199  d[y] = 0.0;
200  }
201 } /* InitializeVectorDoublePtr */
202 
203 
204 
205 void ConfusionMatrix2::IncreaseVectorDoublePtr (vector<double*>& v,
206  int numBucketsOld,
207  int numBucketsNew
208  )
209 {
210  if (numBucketsOld != numBucketsNew)
211  {
212  vector<double*>::iterator idx;
213  for (idx = v.begin (); idx != v.end (); ++idx)
214  {
215  double* oldArray = *idx;
216  double* newArray = new double[numBucketsNew];
217  for (kkint32 x = 0; x < numBucketsOld; ++x)
218  newArray[x]= oldArray[x];
219 
220  for (kkint32 x = numBucketsOld; x < numBucketsNew; ++x)
221  newArray[x] = 0.0;
222 
223  *idx = newArray;
224  delete oldArray;
225  oldArray = NULL;
226  }
227  }
228 
229  double* d = new double[numBucketsNew];
230  v.push_back (d);
231  for (kkint32 x = 0; x < numBucketsNew; ++x)
232  d[x] = 0.0;
233 
234 } /* IncreaseVectorDoublePtr */
235 
236 
237 
238 void ConfusionMatrix2::CopyVectorDoublePtr (const vector<double*>& src,
239  vector<double*>& dest,
240  kkint32 numBuckets
241  )
242 {
243  for (kkuint32 x = 0; x < dest.size (); ++x)
244  {
245  delete dest[x];
246  dest[x] = NULL;
247  }
248 
249  kkint32 classIdx = 0;
250  dest.clear ();
251  while (dest.size () < src.size ())
252  {
253  double* s = src[classIdx];
254  double* d = new double[numBuckets];
255  dest.push_back (d);
256  for (kkint32 y = 0; y < numBuckets; ++y)
257  d[y] = s[y];
258 
259  ++classIdx;
260  }
261 } /* CopyVectorDoublePtr */
262 
263 
264 
265 void ConfusionMatrix2::DeleteVectorDoublePtr (vector<double*>& v)
266  {
267  for (kkuint32 x = 0; x < v.size (); ++x)
268  {
269  delete v[x];
270  v[x] = NULL;
271  }
272 } /* DeleteVectorDoublePtr */
273 
274 
275 
276 
277 
278 kkint32 ConfusionMatrix2::AddClassToConfusionMatrix (MLClassPtr newClass,
279  RunLog& log
280  )
281  {
282  kkint32 existingClassIdx = classes.PtrToIdx (newClass);
283  if (existingClassIdx >= 0)
284  {
285  log.Level (-1) << endl
286  << "ConfusionMatrix2::AddClassToConfusionMatrix ***ERROR*** Class[" << newClass->Name () << "] already in class list." << endl
287  << endl;
288  return existingClassIdx;
289  }
290 
291  classes.PushOnBack (newClass);
292  classCount++;
293 
294  IncreaseVectorDoublePtr (correctByKnownClassByProb, numOfProbBuckets, numOfProbBuckets);
295  IncreaseVectorDoublePtr (countByKnownClassByProb, numOfProbBuckets, numOfProbBuckets);
296 
297  IncreaseVectorDoublePtr (correctByKnownClassBySize, numOfBuckets, numOfBuckets);
298  IncreaseVectorDoublePtr (countByKnownClassBySize, numOfBuckets, numOfBuckets);
299 
300  IncreaseVectorDoublePtr (predictedCountsCM, classCount - 1, classCount);
301  IncreaseVectorDoublePtr (totPredProbCM, classCount - 1, classCount);
302 
303  countsByKnownClass.push_back (0.0);
304  totalPredProbsByKnownClass.push_back (0.0);
305  totalSizesByKnownClass.push_back (0.0);
306 
307  return classes.PtrToIdx (newClass);
308 } /* AddClassToConfusionMatrix */
309 
310 
311 
312 
314  kkint32 predClassIdx
315  ) const
316 {
317  if ((knownClassIdx < 0) || (knownClassIdx >= classCount))
318  return 0.0;
319 
320  if ((predClassIdx < 0) || (predClassIdx >= classCount))
321  return 0.0;
322 
323  return predictedCountsCM [knownClassIdx][predClassIdx];
324 }
325 
326 
327 
329 {
330  kkint32 knownClassIdx, predClassIdx;
331 
332  VectorDouble pc;
333  for (predClassIdx = 0; predClassIdx < classCount; predClassIdx++)
334  {
335  double predCount = 0.0;
336  for (knownClassIdx = 0; knownClassIdx < classCount; knownClassIdx++)
337  predCount += predictedCountsCM[knownClassIdx][predClassIdx];
338  pc.push_back (predCount);
339  }
340 
341  return pc;
342 } /* PredictedCounts */
343 
344 
345 
346 
347 double ConfusionMatrix2::CountsByKnownClass (kkint32 knownClassIdx) const
348 {
349  if ((knownClassIdx < 0) || (knownClassIdx >= classCount))
350  return 0.0;
351 
352  return countsByKnownClass [knownClassIdx];
353 }
354 
355 
356 
358 {
359  return countsByKnownClass;
360 } /* CountsByKnownClass */
361 
362 
363 
364 
366  MLClassPtr _predClass,
367  kkint32 _size,
368  double _probability,
369  RunLog& _log
370  )
371 {
372  kkint32 knownClassNum = -1;
373  kkint32 predClassNum = -1;
374 
375  if (_probability < 0)
376  _probability = 0;
377 
378  if (!_knownClass)
379  {
380  numInvalidClassesPredicted += 1.0;
381  _log.Level (-1) << endl
382  << "ConfusionMatrix2::Increment **** _knownClass = NULL ****"
383  << endl
384  << endl;
385  return;
386  }
387 
388  if (!_predClass)
389  {
390  numInvalidClassesPredicted += 1.0;
391  _log.Level (-1) << endl
392  << "ConfusionMatrix2::Increment **** _predClass = NULL ****"
393  << endl
394  << endl;
395  return;
396  }
397 
398  knownClassNum = classes.PtrToIdx (_knownClass);
399  if (knownClassNum < 0)
400  knownClassNum = AddClassToConfusionMatrix (_knownClass, _log);
401 
402  predClassNum = classes.PtrToIdx (_predClass);
403  if (predClassNum < 0)
404  predClassNum = AddClassToConfusionMatrix (_predClass, _log);
405 
406  if ((knownClassNum < 0) || (knownClassNum >= classCount))
407  {
408  numInvalidClassesPredicted += 1.0;
409  _log.Level (-1) << "ConfusionMatrix2::IncrementPredHits knownClassNum[" << knownClassNum << "] out of bounds." << endl;
410  return;
411  }
412 
413  if ((predClassNum < 0) || (predClassNum >= classCount))
414  {
415  numInvalidClassesPredicted += 1.0;
416  _log.Level (-1) << "ConfusionMatrix2::IncrementPredHits predClassNum[" << predClassNum << "] out of bounds." << endl;
417  return;
418  }
419 
420  if (knownClassNum == predClassNum)
421  correctCount += 1.0;
422 
423  totalCount += 1.0;
424 
425  totalSizesByKnownClass[knownClassNum] += _size;
426 
427  totalPredProbsByKnownClass [knownClassNum] += _probability;
428  totalPredProb += _probability;
429 
430  countsByKnownClass [knownClassNum]++;
431 
432  (predictedCountsCM [knownClassNum] [predClassNum])++;
433  totPredProbCM [knownClassNum] [predClassNum] += _probability;
434 
435 
436  if (_size > 0)
437  {
438  kkint32 bucket = (_size - 1) / bucketSize;
439  if (bucket >= numOfBuckets)
440  bucket = numOfBuckets - 1;
441 
442  countByKnownClassBySize[knownClassNum][bucket]++;
443  if (knownClassNum == predClassNum)
444  correctByKnownClassBySize [knownClassNum][bucket]++;
445  }
446  else
447  {
448  _size = -1;
449  }
450 
451 
452  {
453  kkint32 bucket = 0;
454 
455  if ((_probability >= 0.0) && (_probability <= 1.0))
456  bucket = ((kkint32)(_probability * 100) / probBucketSize);
457  else
458  bucket = 0;
459 
460  if (bucket >= numOfProbBuckets)
461  bucket = numOfProbBuckets - 1;
462 
463  countByKnownClassByProb [knownClassNum][bucket]++;
464  if (knownClassNum == predClassNum)
465  correctByKnownClassByProb [knownClassNum][bucket]++;
466  }
467 } /* Increment */
468 
469 
470 
471 
472 
474 {
475  kkuint32 newLen = (kkint32)(src.Len () * 1.3);
476 
477  KKStr result (newLen);
478 
479  for (kkuint32 x = 0; x < src.Len (); x++)
480  {
481  char ch = src[x];
482 
483  switch (ch)
484  {
485  case '#': result << "\\#";
486  break;
487 
488  case '$': result << "\\$";
489  break;
490 
491  case '&': result << "\\&";
492  break;
493 
494  case '_': result << "\\_";
495  break;
496 
497  case '%': result << "\\%";
498  break;
499 
500  case '{': result << "\\{";
501  break;
502 
503  case '}': result << "\\}";
504  break;
505 
506 
507  default: result.Append (ch);
508  break;
509  }
510 
511  }
512 
513  return result;
514 } /* StripOutInvalidLatexCaracters */
515 
516 
517 
518 
519 
520 void ConfusionMatrix2::PrintSingleLine (ostream& _outFile,
521  KKStr _name,
522  double _lineTotal,
523  double _splits[]
524  )
525 {
526  kkint32 predClassNum;
527 
528  if (_name.Len () > 25)
529  _name = _name.SubStrPart (_name.Len () - 25);
530 
531  _outFile << setw (25) << _name
532  << setw (16) << _lineTotal;
533 
534  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
535  {
536  _outFile << setw (16) << _splits [predClassNum];
537  }
538 
539  _outFile << endl;
540 } /* PrintSingleLine */
541 
542 
543 
544 
545 
546 
547 void ConfusionMatrix2::PrintSingleLineTabDelimited (ostream& _outFile,
548  const KKStr& _name,
549  double _lineTotal,
550  double _splits[]
551  )
552 {
553  kkint32 predClassNum;
554 
555  KKStr name (_name);
556  name << "(" << _lineTotal << ")";
557 
558  _outFile << _name << "\t" << _lineTotal;
559 
560  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
561  {
562  _outFile << "\t" << _splits [predClassNum];
563  }
564 
565  _outFile << endl;
566 } /* PrintSingleLineTabDelimited */
567 
568 
569 
570 
571 void ConfusionMatrix2::PrintSingleLineHTML (ostream& o,
572  const KKStr& _name,
573  double _lineTotal,
574  kkint32 _knownClassNum,
575  double _splits[]
576  )
577 {
578  kkint32 predClassNum;
579 
580  o << " <tr><td style=\"text-align:left; font-family:Arial\">" << _name << "</td>" << "<td>" << _lineTotal << "</td>";
581 
582  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
583  {
584  if (predClassNum == _knownClassNum)
585  {
586  o << "<td style=\"font-weight:bold\">"
587  << _splits [predClassNum]
588  << "</td>";
589  }
590  else
591  {
592  o << "<td>";
593  if (_splits [predClassNum] != 0.0)
594  o << _splits [predClassNum];
595  o << "</td>";
596  }
597  }
598  o << "</tr>" << endl;
599 } /* PrintSingleLineHTML */
600 
601 
602 
603 
604 
605 
606 
607 void ConfusionMatrix2::PrintSingleLineLatexTable (ostream& _outFile,
608  kkint32 _knownClassNum,
609  const KKStr& _name,
610  double _lineTotal,
611  double _splits[]
612  )
613 {
614  kkint32 predClassNum;
615 
616  KKStr name (_name);
617  name << "(" << _lineTotal << ")";
618 
619  _outFile << StripOutInvalidLatexCaracters (_name) << " & " << _lineTotal;
620 
621  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
622  {
623  _outFile << " & ";
624  if (_knownClassNum == predClassNum)
625  _outFile << "\\textbf{";
626 
627  _outFile << _splits [predClassNum];
628 
629  if (_knownClassNum == predClassNum)
630  _outFile << "}";
631  }
632 
633  _outFile << "\\\\" << endl;
634 } /* PrintSingleLineLatexTable */
635 
636 
637 
638 
639 
640 
641 void ConfusionMatrix2::PrintSingleLineShort (ostream& _outFile,
642  const KKStr& _name,
643  double _lineTotal,
644  double _splits[])
645 {
646  kkint32 predClassNum;
647 
648  KKStr name (_name);
649  name << "(" << _lineTotal << ")";
650 
651  _outFile << setw (25) << name;
652 
653  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
654  {
655  _outFile << setw (6) << _splits [predClassNum];
656  }
657 
658  _outFile << endl;
659 }
660 
661 
662 
663 
664 
665 void ConfusionMatrix2::PrintPercentLine (ostream& _outFile,
666  KKStr _name,
667  double _lineTotal,
668  double _splits[]
669  )
670 {
671  char buff[40];
672  double perc;
673  kkint32 predClassNum;
674 
675  if (totalCount == 0.0)
676  perc = 0.0;
677  else
678  perc = (double)_lineTotal / totalCount;
679 
680 
681 # ifdef USE_SECURE_FUNCS
682  sprintf_s (buff, sizeof (buff), "%.1f%%", (100.0 * perc));
683 # else
684  sprintf (buff, "%.1f%%", (100.0 * perc));
685 # endif
686 
687  if (_name.Len () > 25)
688  _name = _name.SubStrPart (_name.Len () - 25);
689 
690  _outFile << setw (25) << _name
691  << setw (16) << buff;
692 
693  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
694  {
695  if (_lineTotal <= 0)
696  perc = 0.0;
697  else
698  perc = 100.0 * (double)_splits[predClassNum] / (double)_lineTotal;
699 
700 
701 # ifdef USE_SECURE_FUNCS
702  sprintf_s (buff, sizeof (buff), "%.3f%%", perc);
703 # else
704  sprintf (buff, "%.3f%%", perc);
705 #endif
706 
707  _outFile << setw (16) << buff;
708  }
709 
710  _outFile << endl;
711 } /* PrintPercentLine */
712 
713 
714 
715 
716 
717 
718 void ConfusionMatrix2::PrintPercentLineTabDelimited (ostream& _outFile,
719  const KKStr& _name,
720  double _lineTotal,
721  double _splits[]
722  )
723 {
724  double perc;
725  kkint32 predClassNum;
726 
727  if (totalCount <= 0.0)
728  perc = 0.0;
729  else
730  perc = (double)_lineTotal / totalCount;
731 
732  _outFile << _name << "\t"
733  << StrFormatDouble ((100.0 * perc), "zz0.00") << "%";
734 
735 
736  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
737  {
738  if (_lineTotal <= 0)
739  perc = 0.0;
740  else
741  perc = 100.0 * (double)_splits[predClassNum] / (double)_lineTotal;
742 
743  _outFile << "\t" << StrFormatDouble (perc, "ZZ0.000") << "%";
744  }
745 
746  _outFile << endl;
747 } /* PrintPercentLineTabDelimited */
748 
749 
750 
751 
752 void ConfusionMatrix2::PrintAvgPredProbLineHTML (ostream& o,
753  const KKStr& _name,
754  double _totalAvgPredProbThisLine,
755  double _totalCountThisLine,
756  kkint32 _knownClassNum,
757  double _avgPredProbs[],
758  double _numPredByClass[]
759  )
760 {
761  double avgPredProb;
762  kkint32 predClassNum;
763 
764  if (_totalCountThisLine <= 0.0)
765  avgPredProb = 0.0;
766  else
767  avgPredProb = _totalAvgPredProbThisLine / _totalCountThisLine;
768 
769  KKStr avgPredProbStr = StrFormatDouble ((100.0 * avgPredProb), "zz0.00") + "%";
770  o << " <tr>" << "<td style=\"text-align:left; font-family:Arial\">" << _name << "</td>" << "<td>" << avgPredProbStr << "</td>";
771 
772  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
773  {
774  if (_numPredByClass[predClassNum] <= 0.0)
775  avgPredProb = 0.0;
776  else
777  avgPredProb = 100.0 * _avgPredProbs[predClassNum] / _numPredByClass[predClassNum];
778 
779  if (predClassNum == _knownClassNum)
780  o << "<td style=\"font-weight:bold\">";
781  else
782  o << "<td>";
783 
784  o << StrFormatDouble (avgPredProb, "ZZ0.000") << "%" << "</td>";
785  }
786 
787  o << "</tr>" << endl;
788 } /* PrintAvgPredProbLineHTML */
789 
790 
791 
792 
793 
794 void ConfusionMatrix2::PrintPercentLineHTML (ostream& o,
795  const KKStr& _name,
796  double _lineTotal,
797  kkint32 _knownClassNum,
798  double _splits[]
799  )
800 {
801  double perc;
802  kkint32 predClassNum;
803 
804  if (totalCount <= 0.0)
805  perc = 0.0;
806  else
807  perc = (double)_lineTotal / totalCount;
808 
809  KKStr percentStr = StrFormatDouble ((100.0 * perc), "zz0.00") + "%";
810  o << " <tr>" << "<td style=\"text-align:left; font-family:Arial\">" << _name << "</td>" << "<td>" << percentStr << "</td>";
811 
812  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
813  {
814  if (_lineTotal <= 0)
815  perc = 0.0;
816  else
817  perc = 100.0 * (double)_splits[predClassNum] / (double)_lineTotal;
818 
819  if (predClassNum == _knownClassNum)
820  {
821  o << "<td style=\"font-weight:bold\">"
822  << StrFormatDouble (perc, "ZZ0.000") << "%"
823  << "</td>";
824  }
825  else
826  {
827  o << "<td>";
828  if (perc != 0.0)
829  o << StrFormatDouble (perc, "ZZ0.000") << "%";
830  o << "</td>";
831  }
832  }
833  o << "</tr>" << endl;
834 } /* PrintPercentLineHTML */
835 
836 
837 
838 
839 
840 
841 void ConfusionMatrix2::PrintPercentLineShort (ostream& _outFile,
842  const KKStr& _name,
843  double _lineTotal,
844  double _splits[]
845  )
846 {
847  double perc;
848  kkint32 predClassNum;
849 
850  if (totalCount == 0.0)
851  perc = 0.0;
852  else
853  perc = 100.0 * (double)_lineTotal / totalCount;
854 
855  KKStr name (_name);
856  name << "(" << StrFormatDouble (perc, "ZZ0.0") << ")";
857 
858  _outFile << setw (25) << name;
859 
860  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
861  {
862  if (_lineTotal <= 0)
863  perc = 0.0;
864  else
865  perc = 100.0 * (double)_splits[predClassNum] / (double)_lineTotal;
866 
867  _outFile << setw (6) << StrFormatDouble (perc, "zz0.0");
868  }
869 
870  _outFile << endl;
871 } /* PrintPercentLineShort */
872 
873 
874 
875 
876 
877 void ConfusionMatrix2::PrintPercentLineLatexTable (ostream& _outFile,
878  kkint32 _rowNum,
879  const KKStr& _name,
880  double _lineTotal,
881  double _splits[]
882  )
883 {
884  double perc;
885  kkint32 predClassNum;
886 
887  if (totalCount <= 0.0)
888  perc = 0.0;
889  else
890  perc = (double)_lineTotal / totalCount;
891 
892  _outFile << StripOutInvalidLatexCaracters (_name) << " & "
893  << StrFormatDouble ((100.0 * perc), "zz0.0") << "\\%";
894 
895  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
896  {
897  if (_lineTotal <= 0)
898  perc = 0.0;
899  else
900  perc = 100.0 * (double)_splits[predClassNum] / (double)_lineTotal;
901 
902  _outFile << " & ";
903  if (_rowNum == predClassNum)
904  _outFile << "\\textbf{";
905 
906  _outFile << StrFormatDouble (perc, "ZZ0.00") << "\\%";
907  if (_rowNum == predClassNum)
908  _outFile << "}";
909  }
910 
911  _outFile << "\\\\" << endl;
912 } /* PrintPercentLineLatexTable */
913 
914 
915 
916 
917 
918 
919 void ConfusionMatrix2::PrintConfusionMatrix (ostream& outFile)
920 {
921  kkint32 knownClassNum;
922  kkint32 predClassNum;
923  kkint32 x;
924 
925 
926  // Lets generate Titles first
927  outFile << endl;
928 
929  double perc = 0.0;
930  if (totalCount > 0.0)
931  perc = correctCount / totalCount;
932 
933  outFile << "Overall Accuracy is "
934  << setprecision (5)
935  << (100.0 * perc) << "%"
936  << endl;
937 
938  outFile << endl;
939 
940 
941 
942  KKStr titleLine1, titleLine2, titleLine3;
943  classes.ExtractThreeTitleLines (titleLine1, titleLine2, titleLine3, 16);
944 
945 
946  outFile << setw (25) << "" << setw(16) << "" << setw (0) << titleLine1 << endl;
947  outFile << setw (25) << "" << setw(16) << "" << setw (0) << titleLine2 << endl;
948  outFile << setw (25) << "ClassName" << setw(16) << "Count" << setw (0) << titleLine3 << endl;
949 
950  outFile << setw (25) << "===========" << setw(16) << "====";
951  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
952  {
953  outFile << setw (16) << "============";
954  }
955  outFile << endl;
956 
957  double* totals = new double[classCount];
958  for (x = 0; x < classCount; x++)
959  totals[x] = 0;
960 
961 
962  double totalNonNoise = 0;
963  double totalNonNoiseRight = 0;
964 
965  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
966  {
967  bool noiseClass = classes[knownClassNum].UnDefined ();
968 
969  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
970  {
971  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
972  }
973 
974  PrintSingleLine (outFile,
975  classes [knownClassNum].Name (),
976  countsByKnownClass [knownClassNum],
977  predictedCountsCM [knownClassNum]
978  );
979 
980  if (!noiseClass)
981  {
982  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
983  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
984  }
985  }
986 
987  PrintSingleLine (outFile,
988  KKStr ("Totals"),
989  totalCount,
990  totals
991  );
992 
993  outFile << endl << endl;
994 
995  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
996  {
997  PrintPercentLine (outFile,
998  classes [knownClassNum].Name (),
999  countsByKnownClass [knownClassNum],
1000  predictedCountsCM [knownClassNum]
1001  );
1002  }
1003 
1004  outFile << endl
1005  << endl;
1006 
1007  perc = 0.0;
1008  if (totalNonNoise != 0)
1009  perc = (double)totalNonNoiseRight / (double)totalNonNoise;
1010 
1011  outFile << "Accuracy for Non Noise "
1012  << setprecision (5)
1013  << (perc * 100.0)
1014  << "%"
1015  << endl;
1016 
1017 
1018  outFile << endl << endl;
1019 
1020  delete[] totals;
1021 } /* PrintConfusionMatrix */
1022 
1023 
1024 
1026 {
1027  kkint32 knownClassNum;
1028  kkint32 predClassNum;
1029  kkint32 x;
1030 
1031 
1032  double overallAccuracy = 0.0;
1033  if (totalCount != 0.0)
1034  overallAccuracy = 100.0 * correctCount / totalCount;
1035 
1036  if (numInvalidClassesPredicted > 0.0)
1037  {
1038  o << "<p style=\"font-weight:bold\">" << endl
1039  << "*********************************************************************************************<br />" << endl
1040  << "******************* WARNING WARNING WARNING WARNING *********************<br />" << endl
1041  << "******************* *********************<br />" << endl
1042  << "******************* There were invalid classes specified that were *********************<br />" << endl
1043  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************<br />" << endl
1044  << "*********************************************************************************************<br />" << endl
1045  << "</p>" << endl
1046  << "<br />" << endl;
1047  }
1048 
1049 
1050  o << "Overall Accuracy: "
1051  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "%"
1052  << endl;
1053 
1054 
1055  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1056  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1057  << " <tr>" << endl
1058  << " <th>Class<br />Names</th><th>Count</th>" << classes.ExtractHTMLTableHeader () << endl
1059  << " </tr>" << endl
1060  << " </thead>" << endl
1061  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1062 
1063  double* totals = new double[classCount];
1064  for (x = 0; x < classCount; x++)
1065  totals[x] = 0;
1066 
1067  double totalNonNoise = 0;
1068  double totalNonNoiseRight = 0;
1069 
1070  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1071  {
1072  bool noiseClass = classes[knownClassNum].UnDefined ();
1073 
1074  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1075  {
1076  totals[predClassNum] += predictedCountsCM [knownClassNum] [predClassNum];
1077  }
1078 
1079  PrintSingleLineHTML (o,
1080  classes [knownClassNum].Name (),
1081  countsByKnownClass [knownClassNum],
1082  knownClassNum,
1083  predictedCountsCM [knownClassNum]
1084  );
1085  if (!noiseClass)
1086  {
1087  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1088  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1089  }
1090 
1091  }
1092 
1093  PrintSingleLineHTML (o,
1094  KKStr ("Totals"),
1095  totalCount,
1096  -1,
1097  totals
1098  );
1099 
1100  o << "<tr><td colspan=\"" << (classCount + 2) << "\">&nbsp</td></tr>" << endl;
1101 
1102  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1103  {
1104  PrintPercentLineHTML (o,
1105  classes [knownClassNum].Name (),
1106  countsByKnownClass [knownClassNum],
1107  knownClassNum,
1108  predictedCountsCM [knownClassNum]
1109  );
1110  }
1111 
1112  o << "</tbody>" << endl
1113  << "</table>" << endl;
1114 
1115  double nonNoiseAccuracy = 0.0;
1116  if (totalNonNoise != 0)
1117  nonNoiseAccuracy = 100.0 * (double)totalNonNoiseRight / (double)totalNonNoise;
1118 
1119  o << "Non Noise Accuracy: " << StrFormatDouble (nonNoiseAccuracy, "ZZ0.000") << "%" << "<br />" << endl;
1120 
1121  delete [] totals;
1122 } /* PrintConfusionMatrixHTML */
1123 
1124 
1125 
1126 
1127 
1128 
1130 
1131 {
1132  kkint32 knownClassNum;
1133  kkint32 predClassNum;
1134 
1135  double overallAvgPredProb = 0.0;
1136  if (totalCount != 0.0)
1137  overallAvgPredProb = 100.0 * totalPredProb / totalCount;
1138 
1139  if (numInvalidClassesPredicted > 0.0)
1140  {
1141  o << "<p style=\"font-weight:bold\">" << endl
1142  << "*********************************************************************************************<br />" << endl
1143  << "******************* WARNING WARNING WARNING WARNING *********************<br />" << endl
1144  << "******************* *********************<br />" << endl
1145  << "******************* There were invalid classes specified that were *********************<br />" << endl
1146  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************<br />" << endl
1147  << "*********************************************************************************************<br />" << endl
1148  << "</p>" << endl
1149  << "<br />" << endl;
1150  }
1151 
1152 
1153  o << "Overall AvgPredProb: "
1154  << StrFormatDouble (overallAvgPredProb, "ZZZ0.000") << "%"
1155  << endl;
1156 
1157 
1158  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1159  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1160  << " <tr>" << endl
1161  << " <th>Class<br />Names</th><th>Count</th>" << classes.ExtractHTMLTableHeader () << endl
1162  << " </tr>" << endl
1163  << " </thead>" << endl
1164  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1165 
1166  double* totalPredProbByPredClass = new double[classCount];
1167  double* totalCountsByPredClass = new double[classCount];
1168  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1169  {
1170  totalPredProbByPredClass [predClassNum] = 0.0;
1171  totalCountsByPredClass [predClassNum] = 0.0;
1172  }
1173 
1174  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1175  {
1176  PrintAvgPredProbLineHTML (o,
1177  classes [knownClassNum].Name (),
1178  totalPredProbsByKnownClass [knownClassNum],
1179  countsByKnownClass [knownClassNum],
1180  knownClassNum,
1181  totPredProbCM [knownClassNum],
1182  predictedCountsCM [knownClassNum]
1183  );
1184  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1185  {
1186  totalPredProbByPredClass [predClassNum] += totPredProbCM [knownClassNum][predClassNum];
1187  totalCountsByPredClass [predClassNum] += predictedCountsCM [knownClassNum][predClassNum];
1188  }
1189  }
1190 
1191  o << "<tr><td colspan=\"" << (classCount + 2) << "\">&nbsp</td></tr>" << endl;
1192 
1193  PrintAvgPredProbLineHTML (o,
1194  "AllClasses",
1195  totalPredProb,
1196  totalCount,
1197  knownClassNum,
1198  totalPredProbByPredClass,
1199  totalCountsByPredClass
1200  );
1201 
1202  o << "</tbody>" << endl
1203  << "</table>" << endl;
1204 
1205  delete[] totalCountsByPredClass;
1206  delete[] totalPredProbByPredClass;
1207 } /* PrintConfusionMatrixHTML */
1208 
1209 
1210 
1211 
1212 
1213 
1214 
1215 
1217 
1218 {
1219  kkint32 bucket;
1220  kkint32 classNum;
1221 
1222  VectorDouble countByProb (numOfProbBuckets, 0.0);
1223  VectorDouble correctByProb (numOfProbBuckets, 0.0);
1224 
1225  double totalCount = 0.0;
1226  double totalCorrect = 0.0;
1227 
1228  double acc = 0.0;
1229 
1230  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1231  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1232  << " <tr>" << endl
1233  << " <th>Class<br />Names</th><th>All<br />Classes</th>";
1234 
1235  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1236  {
1237  o << "<th>" << ((bucket + 1) * probBucketSize) << "</th>";
1238  }
1239  o << " </tr>" << endl
1240  << " </thead>" << endl
1241  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1242 
1243  KKStr ln (1024);
1244  KKStr accStr;
1245 
1246  for (classNum = 0; classNum < classCount; classNum++)
1247  {
1248  double countThisClass = 0.0;
1249  double correctThisClass = 0.0;
1250 
1251  ln = "";
1252 
1253  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1254  {
1255  double count = countByKnownClassByProb [classNum][bucket];
1256  double correct = correctByKnownClassByProb [classNum][bucket];
1257 
1258  countThisClass += count;
1259  correctThisClass += correct;
1260 
1261  countByProb [bucket] += countByKnownClassByProb [classNum][bucket];
1262  correctByProb [bucket] += correctByKnownClassByProb [classNum][bucket];
1263 
1264  acc =0.0;
1265  accStr = "";
1266  if (count != 0.0)
1267  {
1268  acc = 100.0 * correct / count;
1269  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1270  }
1271 
1272  ln << "<td>" << accStr << "</td>";
1273  }
1274 
1275  totalCount += countThisClass;
1276  totalCorrect += correctThisClass;
1277 
1278  accStr = "";
1279  acc = 0.0;
1280  if (countThisClass != 0.0)
1281  {
1282  acc = 100.0 * correctThisClass / countThisClass;
1283  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1284  }
1285 
1286  o << " <tr>"
1287  << "<td style=\"text-align:left; font-family:Arial\">" + classes[classNum].Name () + "</td>"
1288  << "<td>" << accStr << "</td>"
1289  << ln
1290  << "</tr>"
1291  << endl;
1292  }
1293 
1294  {
1295  acc = 0.0;
1296  if (totalCount != 0.0)
1297  acc = 100.0 * totalCorrect / totalCount;
1298 
1299  o << " <tr>"
1300  << "<td style=\"text-align:left; font-family:Arial\">" << "Total<br />All Classes" << "</td>"
1301  << "<td>" << StrFormatDouble (acc, "ZZ0.000") << "%" << "</td>";
1302 
1303  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1304  {
1305  acc = 0.0;
1306  accStr = "";
1307  if (countByProb [bucket] != 0.0)
1308  {
1309  acc = 100.0 * correctByProb [bucket] / countByProb [bucket];
1310  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1311  }
1312 
1313  o << "<td>" << accStr << "</td>";
1314  }
1315  o << "</tr>" << endl;
1316  }
1317 
1318  o << "</tbody>" << endl
1319  << "</table>" << endl;
1320 } /* PrintAccuracyByProbByClassHTML */
1321 
1322 
1323 
1324 
1325 
1326 
1327 
1329 {
1330  kkint32 knownClassNum;
1331  kkint32 predClassNum;
1332  kkint32 x;
1333 
1334  // Lets generate Titles first
1335  outFile << endl;
1336 
1337  double overallAccuracy = 0.0;
1338  if (totalCount != 0.0)
1339  overallAccuracy = 100.0 * correctCount / totalCount;
1340 
1341  if (numInvalidClassesPredicted > 0.0)
1342  {
1343  outFile << endl
1344  << "*********************************************************************************************" << endl
1345  << "******************* WARNING WARNING WARNING WARNING *********************" << endl
1346  << "******************* *********************" << endl
1347  << "******************* There were invalid classes specified that were *********************" << endl
1348  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************" << endl
1349  << "*********************************************************************************************" << endl
1350  << endl;
1351  }
1352 
1353 
1354  outFile << "Overall Accuracy:\t"
1355  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "%"
1356  << endl;
1357 
1358  outFile << endl;
1359 
1360 
1361 
1362  KKStr titleLine1, titleLine2, titleLine3;
1363  classes.ExtractThreeTitleLines (titleLine1, titleLine2, titleLine3);
1364 
1365  outFile << "" << "\t" << "" << "\t" << titleLine1 << endl;
1366  outFile << "" << "\t" << "" << "\t" << titleLine2 << endl;
1367  outFile << "Class_Names" << "\t" << "Count" << "\t" << titleLine3 << endl;
1368 
1369  double* totals = new double[classCount];
1370  for (x = 0; x < classCount; x++)
1371  totals[x] = 0;
1372 
1373  double totalNonNoise = 0;
1374  double totalNonNoiseRight = 0;
1375 
1376  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1377  {
1378  bool noiseClass = classes[knownClassNum].UnDefined ();
1379 
1380  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1381  {
1382  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
1383  }
1384 
1385  PrintSingleLineTabDelimited (outFile,
1386  classes [knownClassNum].Name (),
1387  countsByKnownClass [knownClassNum],
1388  predictedCountsCM [knownClassNum]
1389  );
1390  if (!noiseClass)
1391  {
1392  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1393  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1394  }
1395  }
1396 
1397  PrintSingleLineTabDelimited (outFile,
1398  KKStr ("Totals"),
1399  totalCount,
1400  totals
1401  );
1402 
1403  outFile << endl << endl;
1404 
1405  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1406  {
1407  PrintPercentLineTabDelimited (outFile,
1408  classes [knownClassNum].Name (),
1409  countsByKnownClass [knownClassNum],
1410  predictedCountsCM [knownClassNum]
1411  );
1412  }
1413 
1414 
1415  double nonNoiseAccuracy = 0.0;
1416  if (totalNonNoise != 0)
1417  nonNoiseAccuracy = 100.0 * (double)totalNonNoiseRight / (double)totalNonNoise;
1418 
1419  outFile << endl
1420  << endl;
1421 
1422  outFile << "Non Noise Accuracy:" << "\t" << StrFormatDouble (nonNoiseAccuracy, "ZZ0.000") << "%" << endl;
1423 
1424 
1425  outFile << endl << endl;
1426 
1427  delete [] totals;
1428 } /* PrintConfusionMatrixTabDelimited */
1429 
1430 
1431 
1432 
1433 
1434 
1435 
1436 void ConfusionMatrix2::PrintLatexTableColumnHeaders (ostream& outFile)
1437 {
1438  kkint32 x = 0;
1439 
1440  outFile << "\\begin{tabular}{|";
1441  for (x = 0; x < (classCount + 2); x++)
1442  outFile << "c|";
1443  outFile << "}" << endl;
1444 
1445  outFile << "\\hline" << endl;
1446 
1447  outFile << "Class Names" << " & " << "Count";
1448 
1449  for (x = 0; x < classCount; x++)
1450  {
1451  outFile << " & " << StripOutInvalidLatexCaracters (classes[x].Name ());
1452  }
1453  outFile << "\\\\" << endl;
1454 
1455 
1456  outFile << "\\hline" << endl;
1457 } /* PrintConfusionMatrixLatexTableColumnHeaders */
1458 
1459 
1460 
1461 
1462 
1463 
1465 {
1466  kkint32 knownClassNum;
1467  kkint32 predClassNum;
1468  kkint32 x;
1469 
1470  double overallAccuracy = 0.0;
1471  if (totalCount != 0.0)
1472  overallAccuracy = 100.0 * correctCount / totalCount;
1473 
1474  outFile << "Overall Accuracy:\t"
1475  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "\\%\\\\"
1476  << endl;
1477 
1478  PrintLatexTableColumnHeaders (outFile);
1479 
1480 
1481  double* totals = new double[classCount];
1482  for (x = 0; x < classCount; x++)
1483  totals[x] = 0;
1484 
1485 
1486  double totalNonNoise = 0;
1487  double totalNonNoiseRight = 0;
1488 
1489  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1490  {
1491  bool noiseClass = classes[knownClassNum].UnDefined ();
1492 
1493  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1494  {
1495  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
1496  }
1497 
1498  PrintSingleLineLatexTable (outFile,
1499  knownClassNum,
1500  classes [knownClassNum].Name (),
1501  countsByKnownClass [knownClassNum],
1502  predictedCountsCM [knownClassNum]
1503  );
1504 
1505  outFile << "\\hline" << endl;
1506 
1507  if (!noiseClass)
1508  {
1509  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1510  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1511  }
1512 
1513  }
1514 
1515  outFile << "\\hline" << endl;
1516 
1517  PrintSingleLineLatexTable (outFile,
1518  -1,
1519  KKStr ("Totals"),
1520  totalCount,
1521  totals
1522  );
1523 
1524  outFile << "\\hline" << endl;
1525 
1526  outFile << "\\end{tabular}" << endl;
1527 
1528 
1529  outFile << endl
1530  << "\\vspace{16pt}" << endl
1531  << endl;
1532 
1533 
1534  PrintLatexTableColumnHeaders (outFile);
1535 
1536  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1537  {
1538  PrintPercentLineLatexTable (outFile,
1539  knownClassNum,
1540  classes [knownClassNum].Name (),
1541  countsByKnownClass [knownClassNum],
1542  predictedCountsCM [knownClassNum]
1543  );
1544  outFile << "\\hline" << endl;
1545  }
1546 
1547  outFile << "\\end{tabular}" << endl;
1548 
1549  outFile << endl;
1550 
1551  delete[] totals;
1552 } /* PrintConfusionMatrixLatexTable */
1553 
1554 
1555 
1556 
1557 
1558 
1559 
1561 {
1562  kkint32 knownClassNum;
1563  kkint32 predClassNum;
1564  kkint32 x;
1565 
1566 
1567  // Lets generate Titles first
1568  outFile << endl;
1569 
1570  double perc = 0.0;
1571  if (totalCount != 0)
1572  perc = correctCount / totalCount;
1573 
1574  outFile << "Overall Accuracy is "
1575  << setprecision (5)
1576  << (100.0 * perc) << "%"
1577  << endl;
1578 
1579  outFile << endl;
1580 
1581 
1582  outFile << setw (25) << "Class Names";
1583 
1584  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1585  {
1586  KKStr colDesc ("Col");
1587  colDesc << (knownClassNum + 1);
1588 
1589  colDesc.LeftPad (6);
1590 
1591  outFile << colDesc;
1592  }
1593  outFile << endl;
1594 
1595  outFile << setw (25) << "===========";
1596  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1597  {
1598  outFile << setw (6) << "====";
1599  }
1600  outFile << endl;
1601 
1602  double* totals = new double[classCount];
1603  for (x = 0; x < classCount; x++)
1604  totals[x] = 0;
1605 
1606 
1607  double totalNonNoise = 0;
1608  double totalNonNoiseRight = 0;
1609 
1610  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1611  {
1612  bool noiseClass = classes[knownClassNum].UnDefined ();
1613 
1614  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1615  {
1616  totals[predClassNum] += predictedCountsCM [knownClassNum] [predClassNum];
1617  }
1618 
1619  PrintSingleLineShort (outFile,
1620  classes [knownClassNum].Name (),
1621  countsByKnownClass [knownClassNum],
1622  predictedCountsCM [knownClassNum]
1623  );
1624  if (!noiseClass)
1625  {
1626  totalNonNoise += countsByKnownClass [knownClassNum];
1627  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
1628  }
1629 
1630  }
1631 
1632  PrintSingleLineShort (outFile,
1633  KKStr ("Totals"),
1634  totalCount,
1635  totals
1636  );
1637 
1638  outFile << endl << endl;
1639 
1640  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1641  {
1642  PrintPercentLineShort (outFile,
1643  classes [knownClassNum].Name (),
1644  countsByKnownClass [knownClassNum],
1645  predictedCountsCM [knownClassNum]
1646  );
1647  }
1648 
1649  outFile << endl
1650  << endl;
1651 
1652 
1653  perc = 0.0;
1654  if (totalNonNoise != 0)
1655  perc = (double)totalNonNoiseRight / (double)totalNonNoise;
1656 
1657  outFile << "Accuracy for Non Noise "
1658  << setprecision (5)
1659  << (perc * 100.0)
1660  << "%"
1661  << endl;
1662 
1663  outFile << endl << endl;
1664 
1665  delete[] totals;
1666 } /* PrintCrossValidationNarrow */
1667 
1668 
1669 
1670 double PercentOf (double x, double y)
1671 {
1672  double total = x + y;
1673  if (total == 0.0)
1674  return 0.0;
1675  else
1676  return 100.0 * x / total;
1677 }
1678 
1679 
1680 
1681 
1683 {
1684  kkint32 numOfClasses = classes.QueueSize ();
1685  kkint32 x = 0;
1686  kkint32 y = 0;
1687 
1688  // Refer to http://www.medcalc.be/manual/mpage06-13a.php for definitions.
1689  // First we calc TruePositives, FasePositives, TrueNegatives, FalseNegatives
1690  double* falsePositives = new double[numOfClasses];
1691  double* falseNegatives = new double[numOfClasses];
1692  double* truePositives = new double[numOfClasses];
1693  double* trueNegatives = new double[numOfClasses];
1694 
1695  double totalTP = 0.0;
1696  double totalFP = 0.0;
1697  double totalTN = 0.0;
1698  double totalFN = 0.0;
1699 
1700 
1701  for (x = 0; x < numOfClasses; x++)
1702  {
1703  truePositives [x] = predictedCountsCM [x][x];
1704  totalTP += predictedCountsCM[x][x];
1705 
1706  trueNegatives [x] = 0.0;
1707  falsePositives[x] = 0.0;
1708  falseNegatives[x] = 0.0;
1709 
1710  for (y = 0; y < numOfClasses; y++)
1711  {
1712  if (y != x)
1713  {
1714  falsePositives[x] += predictedCountsCM [y][x]; // Was classified as x but was classed as x.
1715  totalFP += predictedCountsCM [y][x];
1716 
1717  falseNegatives[x] += predictedCountsCM [x][y]; // Should have been classed as x not y.
1718  totalFN += predictedCountsCM [x][y];
1719 
1720  trueNegatives [x] += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1721  totalTN += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1722  }
1723  }
1724  }
1725 
1726 
1727  KKStr titleLine1, titleLine2;
1728  classes.ExtractTwoTitleLines (titleLine1, titleLine2);
1729 
1730  r << "\t" << "" "\t" << titleLine1 << endl;
1731  r << "\t" << "Total" << "\t" << titleLine2 << endl;
1732 
1733  r << "TruePositives" << "\t" << totalTP;
1734  for (x = 0; x < numOfClasses; x++)
1735  {
1736  r << "\t" << truePositives[x];
1737  }
1738  r << endl;
1739 
1740  r << "FalsePositives" << "\t" << totalFP;
1741  for (x = 0; x < numOfClasses; x++)
1742  {
1743  r << "\t" << falsePositives[x];
1744  }
1745  r << endl;
1746 
1747  r << "TrueNegatives" << "\t" << totalTN;
1748  for (x = 0; x < numOfClasses; x++)
1749  {
1750  r << "\t" << trueNegatives[x];
1751  }
1752  r << endl;
1753 
1754  r << "FalseNegatives" << "\t" << totalFN;
1755  for (x = 0; x < numOfClasses; x++)
1756  {
1757  r << "\t" << falseNegatives[x];
1758  }
1759  r << endl;
1760 
1761  r << endl;
1762  r << "Sensitivity(TP/(TP+FN))" << "\t" << StrFormatDouble(PercentOf (totalTP, totalFN), "zzz,zz0.00") << "%";
1763  for (x = 0; x < numOfClasses; x++)
1764  {
1765  r << "\t" << StrFormatDouble(PercentOf (truePositives[x], falseNegatives[x]), "zzz,zz0.00") << "%";
1766  }
1767  r << endl;
1768 
1769  r << "Specificity(TN/(TN+FP))" << "\t" << StrFormatDouble(PercentOf (totalTN, totalFP), "zzz,zz0.00") << "%";
1770  for (x = 0; x < numOfClasses; x++)
1771  {
1772  r << "\t" << StrFormatDouble(PercentOf (trueNegatives[x], falsePositives[x]), "zzz,zz0.00") << "%";
1773  }
1774  r << endl;
1775 
1776  r << "PositivePredictiveValue(TP/(TP+FP))" << "\t" << StrFormatDouble(PercentOf (totalTP, totalFP), "zzz,zz0.00") << "%";
1777  for (x = 0; x < numOfClasses; x++)
1778  {
1779  r << "\t" << StrFormatDouble(PercentOf (truePositives[x], falsePositives[x]), "zzz,zz0.00") << "%";
1780  }
1781  r << endl;
1782 
1783  {
1784  double fMeasure = 0.0;
1785  double divisor = 2.0 * (double)totalTP + (double)totalFP + (double)totalFN;
1786  if (divisor != 0.0)
1787  fMeasure = 100.0 * (2.0 * (double)totalTP / divisor);
1788 
1789  r << "F-Measure(2*TP/(2*TP + FP + FN))" << "\t" << StrFormatDouble(fMeasure, "zzz,zz0.00") << "%";
1790  for (x = 0; x < numOfClasses; x++)
1791  {
1792  fMeasure = 0.0;
1793  divisor = 2.0 * (double)truePositives[x] + (double)falsePositives[x] + (double)falseNegatives[x];
1794  if (divisor != 0.0)
1795  fMeasure = 100.0 * (2.0 * (double)truePositives[x] / divisor);
1796 
1797  r << "\t" << StrFormatDouble(fMeasure, "zzz,zz0.00") << "%";
1798  }
1799  r << endl;
1800  }
1801 
1802  delete[] falseNegatives;
1803  delete[] falsePositives;
1804  delete[] trueNegatives;
1805  delete[] truePositives;
1806 } /* PrintTrueFalsePositivesTabDelimited */
1807 
1808 
1809 
1811  double& truePositives,
1812  double& trueNegatives,
1813  double& falsePositives,
1814  double& falseNegatives
1815  )
1816  const
1817 {
1818  truePositives = 0.0;
1819  trueNegatives = 0.0;
1820  falsePositives = 0.0;
1821  falseNegatives = 0.0;
1822 
1823  kkint32 x = classes.PtrToIdx (ic);
1824  if (x < 0)
1825  return;
1826 
1827  kkint32 numOfClasses = classes.QueueSize ();
1828 
1829  truePositives = predictedCountsCM [x][x];
1830 
1831  for (kkint32 y = 0; y < numOfClasses; y++)
1832  {
1833  if (y != x)
1834  {
1835  falsePositives += predictedCountsCM [y][x]; // Was classified as x but was classed as x.
1836  falseNegatives += predictedCountsCM [x][y]; // Should have been classed as x not y.
1837  trueNegatives += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1838  }
1839  }
1840  return;
1841 } /* ComputeFundamentalStats */
1842 
1843 
1844 
1845 
1846 float ConfusionMatrix2::FMeasure (MLClassPtr positiveClass,
1847  RunLog& log
1848  ) const
1849 {
1850  kkint32 positiveIDX = classes.PtrToIdx (positiveClass);
1851  if (positiveIDX < 0)
1852  {
1853  KKStr invalidClassName = "";
1854  if (positiveClass)
1855  invalidClassName = positiveClass->Name ();
1856 
1857  log.Level (-1) << "ConfusionMatrix2::FMeasure ***ERROR*** Invalid Positive Class Specified[" << invalidClassName << "]" << endl;
1858  return 0.0f;
1859  }
1860 
1861  kkint32 numOfClasses = classes.QueueSize ();
1862 
1863  kkint32 y;
1864 
1865  double totalTP = 0.0;
1866  double totalFP = 0.0;
1867  double totalTN = 0.0;
1868  double totalFN = 0.0;
1869 
1870  totalTP = predictedCountsCM[positiveIDX][positiveIDX];
1871 
1872  for (y = 0; y < numOfClasses; y++)
1873  {
1874  if (y != positiveIDX)
1875  {
1876  totalFP += predictedCountsCM[y][positiveIDX];
1877  totalTN += predictedCountsCM[y][y];
1878  totalFN += predictedCountsCM[positiveIDX][y];
1879  }
1880  }
1881 
1882  double fMeasure = 0.0;
1883  double divisor = 2.0 * (double)totalTP + (double)totalFP + (double)totalFN;
1884  if (divisor != 0.0)
1885  fMeasure = 100.0 * (2.0 * (double)totalTP / divisor);
1886 
1887  return (float)fMeasure;
1888 } /* FMeasure */
1889 
1890 
1891 
1892 
1893 
1894 void ConfusionMatrix2::PrintErrorBySize (ostream& outFile)
1895 {
1896  kkint32 bucket;
1897  kkint32 classNum;
1898 
1899  outFile << endl;
1900 
1901  outFile << "Size" << "\t";
1902 
1903  // Lets first Print Titles.
1904  for (classNum = 0; classNum < classCount; classNum++)
1905  {
1906  outFile << "\t\t";
1907  outFile << classes[classNum].Name ();
1908  }
1909  outFile << endl;
1910 
1911 
1912 
1913  for (bucket = 0; bucket < numOfBuckets; bucket++)
1914  {
1915  outFile << ((bucket + 1) * bucketSize) << "\t";
1916 
1917  kkint32 classNum;
1918 
1919  for (classNum = 0; classNum < classCount; classNum++)
1920  {
1921  outFile << countByKnownClassBySize [classNum][bucket] << "\t"
1922  << correctByKnownClassBySize [classNum][bucket] << "\t";
1923  }
1924 
1925  outFile << endl;
1926  }
1927 
1928  outFile << endl;
1929 } /* PrintErrorBySize */
1930 
1931 
1932 
1933 
1934 void ConfusionMatrix2::PrintErrorByProb (ostream& outFile)
1935 {
1936  kkint32 bucket;
1937  kkint32 classNum;
1938 
1939  outFile << endl;
1940 
1941  outFile << "Prob" << "\t";
1942 
1943  // Lets first Print Titles.
1944  for (classNum = 0; classNum < classCount; classNum++)
1945  {
1946  outFile << "\t\t";
1947  outFile << classes[classNum].Name ();
1948  }
1949 
1950  outFile << endl;
1951 
1952 
1953  outFile << setiosflags (ios::fixed);
1954 
1955  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1956  {
1957  outFile << ((bucket + 1) * probBucketSize) << "%\t";
1958 
1959  kkint32 classNum;
1960 
1961  for (classNum = 0; classNum < classCount; classNum++)
1962  {
1963  double perc;
1964 
1965  double count = countByKnownClassByProb [classNum][bucket];
1966  double correct = correctByKnownClassByProb [classNum][bucket];
1967 
1968  if (count > 0)
1969  perc = 100.0 * (double)correct / (double)count;
1970  else
1971  perc = 0.0;
1972 
1973  outFile << count << "\t" << correct << "\t" << setprecision (1) << perc << "% \t";
1974  }
1975 
1976  outFile << endl;
1977  }
1978 
1979  outFile << endl;
1980 } /* PrintErrorByProb */
1981 
1982 
1983 
1984 
1985 
1987 {
1988  kkint32 bucket;
1989  kkint32 classNum;
1990 
1991 
1992  outFile << endl
1993  << "Error by size" << endl
1994  << endl;
1995 
1996  outFile << "ClassName\tAvg Size\t";
1997  for (bucket = 0; bucket < numOfBuckets; bucket++)
1998  {
1999  outFile << ((bucket + 1) * bucketSize) << "\t";
2000  }
2001 
2002  outFile << endl
2003  << endl;
2004 
2005  for (classNum = 0; classNum < classCount; classNum++)
2006  {
2007  double avg;
2008  if (countsByKnownClass [classNum] != 0)
2009  avg = totalSizesByKnownClass[classNum] / countsByKnownClass [classNum];
2010  else
2011  avg = 0;
2012 
2013  outFile << classes[classNum].Name ()
2014  << "\t"
2015  << avg;
2016 
2017 
2018  for (bucket = 0; bucket < numOfBuckets; bucket++)
2019  {
2020  outFile << "\t" << countByKnownClassBySize[classNum][bucket];
2021  }
2022  outFile << endl;
2023 
2024 
2025  outFile << classes[classNum].Name () << " Correct" << "\t";
2026  for (bucket = 0; bucket < numOfBuckets; bucket++)
2027  {
2028  outFile << "\t" << correctByKnownClassBySize [classNum][bucket];
2029  }
2030 
2031  outFile << endl;
2032  outFile << endl;
2033  }
2034  outFile << endl;
2035 
2036 
2037  outFile << endl
2038  << endl;
2039 
2040  outFile << "ClassName\tAvg Size\t";
2041  for (bucket = 0; bucket < numOfBuckets; bucket++)
2042  {
2043  outFile << ((bucket + 1) * bucketSize) << "\t";
2044  }
2045 
2046  outFile << endl
2047  << endl;
2048 
2049  for (classNum = 0; classNum < classCount; classNum++)
2050  {
2051  double avg;
2052  if (countsByKnownClass [classNum] != 0)
2053  avg = totalSizesByKnownClass[classNum] / countsByKnownClass [classNum];
2054  else
2055  avg = 0;
2056 
2057  outFile << classes[classNum].Name ()
2058  << "\t"
2059  << avg;
2060 
2061 
2062  for (bucket = 0; bucket < numOfBuckets; bucket++)
2063  {
2064  float a = 0.0f;
2065  if (countByKnownClassBySize[classNum][bucket] != 0)
2066  a = (float)(correctByKnownClassBySize [classNum][bucket]) / (float)(countByKnownClassBySize[classNum][bucket]);
2067 
2068  outFile << "\t" << a;
2069  }
2070  outFile << endl;
2071 
2072  outFile << endl;
2073  outFile << endl;
2074  }
2075 
2076  outFile << endl;
2077 } /* PrintErrorBySizeByRows */
2078 
2079 
2080 
2081 
2082 
2084 {
2085  kkint32 bucket;
2086  kkint32 classNum;
2087 
2088  kkint32 x;
2089 
2090  double* totalCount = new double[numOfProbBuckets];
2091  double* totalCorrect = new double[numOfProbBuckets];
2092 
2093  for (x = 0; x < numOfProbBuckets; x++)
2094  {
2095  totalCount [x] = 0;
2096  totalCorrect [x] = 0;
2097  }
2098 
2099  outFile << "ClassName\tAvg Prob\t";
2100  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2101  {
2102  outFile << ((bucket + 1) * probBucketSize) << "%\t";
2103  }
2104 
2105  outFile << endl
2106  << endl;
2107 
2108  for (classNum = 0; classNum < classCount; classNum++)
2109  {
2110  double avg;
2111  if (countsByKnownClass [classNum] != 0)
2112  avg = 100.0 * totalPredProbsByKnownClass [classNum] / countsByKnownClass [classNum];
2113  else
2114  avg = 0;
2115 
2116  outFile << classes[classNum].Name ()
2117  << "\t"
2118  << avg << "%";
2119 
2120 
2121  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2122  {
2123  outFile << "\t" << countByKnownClassByProb [classNum][bucket];
2124  totalCount[bucket] = totalCount[bucket] + countByKnownClassByProb [classNum][bucket];
2125  }
2126  outFile << endl;
2127 
2128 
2129 
2130  outFile << classes[classNum].Name () << " Correct" << "\t";
2131  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2132  {
2133  outFile << "\t" << correctByKnownClassByProb [classNum][bucket];
2134  totalCorrect [bucket] = totalCorrect [bucket] + correctByKnownClassByProb [classNum][bucket];
2135  }
2136  outFile << endl;
2137 
2138 
2139  outFile << "Accuracy" << "\t";
2140  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2141  {
2142  double perc;
2143 
2144  if (countByKnownClassByProb [classNum][bucket] <= 0.0)
2145  perc = 0.0;
2146  else
2147  perc = 100.0 * correctByKnownClassByProb [classNum][bucket] / countByKnownClassByProb [classNum][bucket];
2148 
2149  outFile << "\t" << perc << "%";
2150  }
2151  outFile << endl;
2152 
2153  outFile << endl;
2154  }
2155 
2156  outFile << endl;
2157 
2158  outFile << "Total" << "\t";
2159  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2160  {
2161  outFile << "\t" << totalCount[bucket];
2162  }
2163 
2164  outFile << endl;
2165 
2166  outFile << "Correct" << "\t";
2167  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2168  {
2169  outFile << "\t" << totalCorrect[bucket];
2170  }
2171  outFile << endl;
2172 
2173  outFile << "Accuracy" << "\t";
2174  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2175  {
2176  double perc;
2177 
2178  if (totalCount[bucket] <= 0)
2179  perc = 0.0;
2180  else
2181  perc = 100.0 * totalCorrect[bucket] / totalCount[bucket];
2182 
2183  outFile << "\t" << perc << "%";
2184  }
2185 
2186  delete[] totalCount; totalCount = NULL;
2187  delete[] totalCorrect; totalCorrect = NULL;
2188 
2189  outFile << endl;
2190 
2191  return;
2192 } /* PrintErrorByProbByRows */
2193 
2194 
2195 
2196 
2197 
2199 {
2200  kkint32 bucket;
2201 
2202  outFile << "Total";
2203  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2204  {
2205  outFile << "\t" << ((bucket + 1) * probBucketSize) << "%";
2206  }
2207 
2208  outFile << endl;
2209 } /* PrintPronDistributionTitle */
2210 
2211 
2212 
2213 
2214 
2215 
2217 {
2218  kkint32 bucket;
2219  kkint32 classNum;
2220 
2221  double* count = new double[numOfProbBuckets];
2222 
2223  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2224  {
2225  count [bucket] = 0;
2226  }
2227 
2228 
2229  double total = 0;
2230 
2231  for (classNum = 0; classNum < classCount; classNum++)
2232  {
2233  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2234  {
2235  count[bucket] = count[bucket] + countByKnownClassByProb [classNum][bucket];
2236  }
2237  }
2238 
2239 
2240  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2241  {
2242  total = total + count[bucket];
2243  }
2244 
2245 
2246  outFile << total;
2247  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2248  {
2249  outFile << "\t" << count[bucket];
2250  }
2251 
2252  outFile << endl;
2253 
2254  delete[] count;
2255 
2256  return;
2257 } /* PrintProbDistributionTotalCount */
2258 
2259 
2260 
2261 
2262 
2264 {
2265  kkint32 bucket;
2266  kkint32 classNum;
2267 
2268  double* count = new double[numOfProbBuckets];
2269  double* correct = new double[numOfProbBuckets];
2270 
2271  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2272  {
2273  count [bucket] = 0;
2274  correct [bucket] = 0;
2275  }
2276 
2277 
2278  for (classNum = 0; classNum < classCount; classNum++)
2279  {
2280  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2281  {
2282  count [bucket] = count [bucket] + countByKnownClassByProb [classNum][bucket];
2283  correct[bucket] = correct[bucket] + correctByKnownClassByProb [classNum][bucket];
2284  }
2285  }
2286 
2287 
2288  double overallAccuracy = 0.0;
2289 
2290  {
2291  double totalCount = 0;
2292  double totalCorrect = 0;
2293 
2294  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2295  {
2296  totalCount = totalCount + count[bucket];
2297  totalCorrect = totalCorrect + correct[bucket];
2298  }
2299 
2300  if (totalCount == 0)
2301  {
2302  overallAccuracy = 0.0;
2303  }
2304  else
2305  {
2306  overallAccuracy = 100.0 * (double)totalCorrect / (double)totalCount;
2307  }
2308  }
2309 
2310  outFile << setprecision (2);
2311 
2312  outFile << overallAccuracy << "%";
2313 
2314  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2315  {
2316  double accuracy = 0.0;
2317 
2318  if (count[bucket] > 0)
2319  accuracy = 100.0 * (double)correct[bucket] / (double)count[bucket];
2320 
2321  outFile << "\t" << accuracy << "%";
2322  }
2323 
2324  outFile << endl;
2325 
2326  delete[] count; count = NULL;
2327  delete[] correct; correct = NULL;
2328 } /* PrintProbDistributionTotalError */
2329 
2330 
2331 
2332 
2333 
2334 
2336 {
2337  kkint32 classNum;
2338 
2339  outFile << endl;
2340  outFile << endl;
2341  outFile << endl;
2342 
2343  for (classNum = 0; classNum < classCount; classNum++)
2344  PrintErrorBySizeRowReduced (outFile, classNum);
2345 
2346 } /* PrintErrorBySizeReduced */
2347 
2348 
2349 
2350 
2351 
2352 
2353 void ConfusionMatrix2::PrintErrorBySizeRowReduced (ostream& outFile,
2354  kkint32 classNum
2355  )
2356 {
2357  kkint32* bucketHeadings = new kkint32 [numOfBuckets];
2358  double* bucketCount = new double[numOfBuckets];
2359  double* bucketCorrect = new double[numOfBuckets];
2360 
2361  kkint32 bucket = 0;
2362 
2363  kkint32 numNewBuckets = 0;
2364 
2365  while (bucket < numOfBuckets)
2366  {
2367  double countInCol = 0.0;
2368  double countCorrectInCol = 0.0;
2369 
2370  while ((bucket < numOfBuckets) && (countInCol < 20))
2371  {
2372  countInCol = countInCol + countByKnownClassBySize [classNum][bucket];
2373  countCorrectInCol = countCorrectInCol + correctByKnownClassBySize [classNum][bucket];
2374  bucket++;
2375  }
2376 
2377  bucketHeadings[numNewBuckets] = bucket * bucketSize;
2378  bucketCount[numNewBuckets] = countInCol;
2379  bucketCorrect[numNewBuckets] = countCorrectInCol;
2380  numNewBuckets++;
2381  }
2382 
2383 
2384  outFile << "Class_Name\tAvg_Size";
2385  for (bucket = 0; bucket < numNewBuckets; bucket++)
2386  {
2387  outFile << "\t" << bucketHeadings[bucket];
2388  }
2389  outFile << endl;
2390 
2391 
2392  double avg;
2393  if (countsByKnownClass [classNum] != 0)
2394  avg = totalSizesByKnownClass [classNum] / countsByKnownClass [classNum];
2395  else
2396  avg = 0;
2397 
2398 
2399  outFile << classes[classNum].Name () << "\t" << avg;
2400 
2401  for (bucket = 0; bucket < numNewBuckets; bucket++)
2402  {
2403  outFile << "\t" << bucketCount[bucket];
2404  }
2405  outFile << endl;
2406 
2407  outFile << "\t";
2408  for (bucket = 0; bucket < numNewBuckets; bucket++)
2409  {
2410  outFile << "\t" << bucketCorrect[bucket];
2411  }
2412  outFile << endl;
2413 
2414 
2415  outFile << "\t";
2416  for (bucket = 0; bucket < numNewBuckets; bucket++)
2417  {
2418  double accuracy = 0;
2419  if (bucketCount[bucket] <= 0)
2420  {
2421  accuracy = 0.0;
2422  }
2423  else
2424  {
2425  accuracy = 100.0 * (double)bucketCorrect[bucket] / (double)bucketCount[bucket];
2426  }
2427 
2428  outFile << "\t" << accuracy << "%";
2429  }
2430 
2431  outFile << endl;
2432  outFile << endl;
2433  outFile << endl;
2434 
2435  delete[] bucketHeadings;
2436  delete[] bucketCount;
2437  delete[] bucketCorrect;
2438 } /* PrintErrorBySizeRowReduced */
2439 
2440 
2441 
2442 
2444 {
2445 
2446  kkint32 x;
2447 
2448  double* accuracys = new double[classCount];
2449 
2450  for (x = 0; x < classCount; x++)
2451  {
2452  if (countsByKnownClass [x] == 0)
2453  accuracys[x] = 0;
2454  else
2455  accuracys[x] = (100.0 * (double) predictedCountsCM [x] [x]) / ((double) (countsByKnownClass [x]));
2456  }
2457 
2458 
2459  KKStr accuracyStr;
2460 
2461  for (x = 0; x < classCount; x++)
2462  {
2463  if (x > 0)
2464  accuracyStr << " ";
2465 
2466  KKStr className;
2467  MLClassPtr mlClass = classes.IdxToPtr (x);
2468  if (mlClass)
2469  className = mlClass->Name ();
2470  else
2471  className = "***UnDefined***";
2472 
2473  accuracyStr << className << " " << StrFormatDouble (accuracys[x], "##0.000") << "%";
2474  }
2475 
2476  delete[] accuracys; accuracys = NULL;
2477 
2478  return accuracyStr;
2479 } /* AccuracyStr */
2480 
2481 
2482 
2483 
2484 
2486 {
2487  if (totalCount == 0)
2488  return 0.0;
2489 
2490  return 100.0 * correctCount / totalCount;
2491 }
2492 
2493 
2494 
2495 
2497 {
2498  if (totalCount == 0)
2499  return 0.0;
2500 
2501  return totalPredProb / (double)totalCount;
2502 }
2503 
2504 
2505 
2506 
2507 
2509 {
2510  kkint32 classNum = 0;
2511 
2512  classNum = classes.PtrToIdx (mlClass);
2513  if (classNum < 0)
2514  return 0.0f;
2515 
2516  if (countsByKnownClass [classNum] == 0)
2517  return 0.0f;
2518 
2519  float accuracy = (float)(100.0 * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2520 
2521  return accuracy;
2522 } /* Accuracy */
2523 
2524 
2525 
2526 
2528 {
2529  VectorFloat accuracies;
2530  for (kkint32 classNum = 0; classNum < classCount; classNum++)
2531  {
2532  if (countsByKnownClass [classNum] == 0)
2533  {
2534  accuracies.push_back (0.0f);
2535  }
2536  else
2537  {
2538  float classAccuracy = (float)(100.0f * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2539  accuracies.push_back (classAccuracy);
2540  }
2541  }
2542 
2543  return accuracies;
2544 } /* AccuracyByClass */
2545 
2546 
2547 
2549 {
2550  kkint32 classCount = classes.QueueSize ();
2551  float totalAccuracy = 0.0f;
2552 
2553  for (kkint32 classNum = 0; classNum < classCount; classNum++)
2554  {
2555  if (countsByKnownClass [classNum] != 0)
2556  {
2557  float classAccuracy = (float)(100.0f * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2558  totalAccuracy += classAccuracy;
2559  }
2560  }
2561 
2562  float weightedAccuracy = (float)(totalAccuracy / classCount);
2563 
2564  return weightedAccuracy;
2565 } /* AccuracyClassWeightedEqually */
2566 
2567 
2568 
2569 
2570 
2572 {
2573  kkint32 classNum = 0;
2574  bool found = false;
2575  kkint32 numClasses = classes.QueueSize ();
2576 
2577  while ((classNum < numClasses) && (!found))
2578  {
2579  if (classes[classNum].UpperName () == mlClass->UpperName ())
2580  found = true;
2581  else
2582  classNum++;
2583  }
2584 
2585 
2586  if (found)
2587  {
2588  return countsByKnownClass [classNum];
2589  }
2590 
2591  return 0.0;
2592 } /* Count */
2593 
2594 
2595 
2596 
2597 void ConfusionMatrix2::FactorCounts (double factor)
2598 {
2599  kkint32 x;
2600 
2601  correctCount *= factor;
2602  totalCount *= factor;
2603  totalPredProb *= factor;
2604  numInvalidClassesPredicted *= factor;
2605 
2606  for (x = 0; x < classCount; x++)
2607  {
2608  countsByKnownClass [x] = countsByKnownClass [x] * factor;
2609  totalSizesByKnownClass [x] = totalSizesByKnownClass [x] * factor;
2610  totalPredProbsByKnownClass [x] = totalPredProbsByKnownClass [x] * factor;
2611 
2612  kkint32 y;
2613 
2614  for (y = 0; y < classCount; y++)
2615  {
2616  predictedCountsCM[x][y] = predictedCountsCM[x][y] * factor;
2617  totPredProbCM [x][y] = totPredProbCM [x][y] * factor;
2618  }
2619 
2620  for (y = 0; y < numOfBuckets; y++)
2621  {
2622  countByKnownClassBySize [x][y] = countByKnownClassBySize [x][y] * factor;
2623  correctByKnownClassBySize [x][y] = correctByKnownClassBySize [x][y] * factor;
2624  }
2625 
2626  for (y = 0; y < numOfProbBuckets; y++)
2627  {
2628  countByKnownClassByProb [x][y] = countByKnownClassByProb [x][y] * factor;
2629  correctByKnownClassByProb [x][y] = correctByKnownClassByProb [x][y] * factor;
2630  }
2631  }
2632 } /* FactorCounts */
2633 
2634 
2635 
2636 
2637 
2638 /******************************************************************************
2639  * PrintConfusionMatrixHTML
2640  ******************************************************************************/
2642  ostream& file
2643  )
2644 {
2645  kkint32 knownClassNum;
2646  kkint32 predClassNum;
2647  kkint32 x;
2648 
2649  // generate html preamble
2650  file << "<html>" << endl;
2651  file << "<head>" << endl;
2652  file << "<title>" << title << "</title>" << endl;
2653  file << "<body bgcolor=\"white\">" << endl;
2654 
2655  // generate the title
2656  file << "<h1>" << title << "</h1>" << endl;
2657 
2658  // generate the accuracy statement
2659  file << "<p><b>Overall Accuracy</b> is "
2660  << setprecision (5)
2661  << (100.0 * correctCount / totalCount) << "%"
2662  << "</p>" << endl;
2663 
2664  /***************************************************************************
2665  * generate the table with the counts
2666  ***************************************************************************/
2667  file << "<table cellpadding=\"2\" cellspacing=\"0\" border=\"2\">" << endl;
2668  file << "<tr>" << endl;
2669 
2670  // output the first row (which is class names)
2671  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Class Names</b></th>" << endl;
2672  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2673  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2674  {
2675  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2676  }
2677  file << "</tr>" << endl;
2678 
2679  double *totals = new double[classCount];
2680  for (x = 0; x < classCount; x++)
2681  {
2682  totals[x] = 0;
2683  }
2684 
2685  double totalNonNoise = 0;
2686  double totalNonNoiseRight = 0;
2687 
2688  // output the data rows
2689  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2690  {
2691  bool noiseClass = classes[knownClassNum].UnDefined();
2692  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2693  {
2694  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
2695  }
2696 
2697  file << "<tr>" << endl;
2698  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes [knownClassNum].Name() << "</b></th>" << endl;
2699  file << "<td align=\"center\" bgcolor=\"#EFEFEF\">" << countsByKnownClass [knownClassNum] << "</td>" << endl;
2700 
2701  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2702  {
2703  if (predClassNum == knownClassNum)
2704  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">";
2705  else
2706  file << "<td align=\"center\">";
2707  file << predictedCountsCM[knownClassNum][predClassNum];
2708  file << "</td>" << endl;
2709  }
2710  file << "</tr>" << endl;
2711  if (!noiseClass)
2712  {
2713  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
2714  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
2715  }
2716  }
2717 
2718  // output the totals line for the first table
2719  file << "<tr>" << endl;
2720  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2721  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">" << totalCount << "</b></th>" << endl;
2722 
2723  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2724  {
2725  file << "<td align=\"center\">";
2726  file << totals[predClassNum];
2727  file << "</td>" << endl;
2728  }
2729  file << "</tr>" << endl;
2730  file << "</table>" << endl;
2731 
2732  /***************************************************************************
2733  * generate the table with the percents
2734  ***************************************************************************/
2735  file << "<br/>" << endl;
2736  file << "<p><b>Accuracy for Non Noise</b> "
2737  << setprecision (5)
2738  << (((double)totalNonNoiseRight / (double)totalNonNoise) * 100.0)
2739  << "%</p>"
2740  << endl;
2741  file << "<table cellpadding=\"2\" cellspacing=\"0\" border=\"2\">" << endl;
2742  file << "<tr>" << endl;
2743 
2744  // output the first row (which is class names)
2745  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Class Names</b></th>" << endl;
2746  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2747  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2748  {
2749  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2750  }
2751  file << "</tr>" << endl;
2752 
2753  // output the data rows
2754  double perc=0.0;
2755  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2756  {
2757  file << "<tr>" << endl;
2758  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2759  file << "<td align=\"center\" bgcolor=\"#EFEFEF\">" << setprecision (4) << (countsByKnownClass [knownClassNum]/totalCount*100) << "</td>" << endl;
2760 
2761  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2762  {
2763  if (predClassNum == knownClassNum)
2764  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">";
2765  else
2766  file << "<td align=\"center\">";
2767  if (countsByKnownClass [knownClassNum] <= 0)
2768  perc = 0.0;
2769  else
2770  perc = predictedCountsCM [knownClassNum][predClassNum] / countsByKnownClass [knownClassNum] * 100.0;
2771 
2772  file << setprecision (4) << perc;
2773  file << "</td>" << endl;
2774  }
2775  file << "</tr>" << endl;
2776  }
2777  file << "</table>" << endl;
2778 
2779  file << "</body>" << endl;
2780  file << "</html>" << endl;
2781 
2782  delete[] totals;
2783 } /* PrintConfusionMatrixHTML */
2784 
2785 
2786 
2787 void ConfusionMatrix2::MakeSureWeHaveTheseClasses (const MLClassList& classList,
2788  RunLog& log
2789  )
2790 {
2791  MLClassList::const_iterator idx;
2792  for (idx = classList.begin (); idx != classList.end (); ++idx)
2793  {
2794  MLClassPtr ic = *idx;
2795  if (classes.PtrToIdx (ic) < 0)
2796  AddClassToConfusionMatrix (ic, log);
2797  }
2798 } /* MakeSureWeHaveTheseClasses */
2799 
2800 
2801 
2803  RunLog& log
2804  )
2805 {
2806  MakeSureWeHaveTheseClasses (cm.classes, log);
2807 
2808  kkint32 numOfClasses = classes.QueueSize ();
2809  kkint32 classIDX = 0;
2810 
2811 
2812  // Create indirection array to handle the situation where the mlClass list's of the two
2813  // confusion matrixes '*this' and 'cm' are not in the same order.
2814 
2815  vector<kkint32> ind (numOfClasses, 0);
2816  for (classIDX = 0; classIDX < numOfClasses; classIDX++)
2817  {
2818  MLClassPtr mlClass = classes.IdxToPtr (classIDX);
2819  kkint32 cmsIDX = cm.classes.PtrToIdx (mlClass);
2820  ind[classIDX] = cmsIDX;
2821  }
2822 
2823  for (classIDX = 0; classIDX < numOfClasses; classIDX++)
2824  {
2825  kkint32 cmsIDX = ind[classIDX];
2826  if (cmsIDX < 0)
2827  {
2828  // cmsIDX < 0 indicates that the confusion matrix being added in does not include the class indicatd by 'classIDX'.
2829  }
2830  else
2831  {
2832  countsByKnownClass [classIDX] += cm.countsByKnownClass [cmsIDX];
2833  totalSizesByKnownClass [classIDX] += cm.totalSizesByKnownClass [cmsIDX];
2834  totalPredProbsByKnownClass [classIDX] += cm.totalPredProbsByKnownClass [cmsIDX];
2835 
2836  kkint32 predictedClassIDX = 0;
2837  for (predictedClassIDX = 0; predictedClassIDX < numOfClasses; predictedClassIDX++)
2838  {
2839  kkint32 cmsPredictedClassIDX = ind[predictedClassIDX];
2840  if (cmsPredictedClassIDX >= 0)
2841  {
2842  predictedCountsCM[classIDX][predictedClassIDX] += cm.predictedCountsCM[cmsIDX][cmsPredictedClassIDX];
2843  totPredProbCM [classIDX][predictedClassIDX] += cm.totPredProbCM [cmsIDX][cmsPredictedClassIDX];
2844  }
2845  }
2846 
2847  kkint32 bucketIDX = 0;
2848  for (bucketIDX = 0; bucketIDX < numOfBuckets; bucketIDX++)
2849  {
2850  countByKnownClassBySize [classIDX][bucketIDX] += cm.countByKnownClassBySize [cmsIDX][bucketIDX];
2851  correctByKnownClassBySize [classIDX][bucketIDX] += cm.correctByKnownClassBySize [cmsIDX][bucketIDX];
2852  }
2853 
2854  kkint32 probIDX = 0;
2855  for (probIDX = 0; probIDX < numOfProbBuckets; probIDX++)
2856  {
2857  countByKnownClassByProb [classIDX][probIDX] += cm.countByKnownClassByProb [cmsIDX][probIDX];
2858  correctByKnownClassByProb [classIDX][probIDX] += cm.correctByKnownClassByProb [cmsIDX][probIDX];
2859  }
2860  }
2861  }
2862 
2863  correctCount += cm.correctCount;
2864  totalCount += cm.totalCount;
2865  totalPredProb += cm.totalPredProb;
2866 } /* AddIn */
2867 
2868 
2869 
2870 template<typename T>
2872  kkint32 _count,
2873  char _delimiter
2874  )
2875 {
2876  kkint32 x = _count * 10;
2877  KKStr s (x);
2878 
2879  for (kkint32 x = 0; x < _count; x++)
2880  {
2881  if (x > 0)
2882  s.Append (_delimiter);
2883  s << _array[x];
2884  }
2885  return s;
2886 } /* ArrayToDelimitedDelimitedStr */
2887 
2888 
2889 
2890 template<typename T>
2892  char delimiter
2893  )
2894 {
2895  KKStr s (v.size () * 10);
2896 
2897  for (kkuint32 x = 0; x < v.size (); x++)
2898  {
2899  if (x > 0) s.Append (delimiter);
2900  s << v[x];
2901  }
2902  return s;
2903 } /* ArrayToDelimitedDelimitedStr */
2904 
2905 
2906 
2907 
2908 
2909 void DelimitedStrToArray (vector<kkint32>& v,
2910  kkint32 minSize,
2911  const KKStr& l,
2912  char delimiter
2913  )
2914 {
2915  v.clear ();
2916  VectorKKStr fields = l.Split (delimiter);
2917  kkint32 lastField = (kkint32)fields.size ();
2918  for (kkint32 idx = 0; idx < lastField; ++idx)
2919  v.push_back (fields[idx].ToInt32 ());
2920  while (v.size () < (kkuint32)minSize)
2921  v.push_back ((kkint32)0);
2922 } /* DelimitedStrToArray */
2923 
2924 
2925 
2926 void DelimitedStrToArray (vector<double>& v,
2927  kkint32 minSize,
2928  const KKStr& l,
2929  char delimiter
2930  )
2931 {
2932  v.clear ();
2933  VectorKKStr fields = l.Split (delimiter);
2934  kkint32 lastField = (kkint32)fields.size ();
2935  for (kkint32 idx = 0; idx < lastField; idx++)
2936  v.push_back (fields[idx].ToDouble ());
2937  while (v.size () < (kkuint32)minSize)
2938  v.push_back ((double)0);
2939 } /* DelimitedStrToArray */
2940 
2941 
2942 
2943 
2944 
2946  kkint32 _count,
2947  const KKStr& _l,
2948  char _delimiter
2949  )
2950 {
2951  VectorKKStr fields = _l.Split (_delimiter);
2952  kkint32 lastField = Min ((kkint32)fields.size (), _count);
2953  for (kkint32 idx = 0; idx < lastField; idx++)
2954  _array[idx] = fields[idx].ToInt ();
2955 } /* DelimitedStrToArray */
2956 
2957 
2958 
2959 
2960 void DelimitedStrToArray (double* _array,
2961  kkint32 _count,
2962  const KKStr& _l,
2963  char _delimiter
2964  )
2965 {
2966  VectorKKStr fields = _l.Split (_delimiter);
2967  kkint32 lastField = Min ((kkint32)fields.size (), _count);
2968  for (kkint32 idx = 0; idx < lastField; idx++)
2969  _array[idx] = fields[idx].ToDouble ();
2970 } /* DelimitedStrToArray */
2971 
2972 
2973 
2974 
2975 
2976 void ConfusionMatrix2::WriteXML (ostream& f) const
2977 {
2978  f << "<ConfusionMatrix2>" << endl;
2979 
2980  f << "Classes" << "\t" << classes.ToCommaDelimitedStr () << endl;
2981 
2982  f << "ClassCount" << "\t" << classCount << endl
2983  << "BucketSize" << "\t" << bucketSize << endl
2984  << "probBucketSize" << "\t" << probBucketSize << endl
2985  << "NumOfBuckets" << "\t" << numOfBuckets << endl
2986  << "NumOfProbBuckets" << "\t" << numOfProbBuckets << endl
2987  << endl
2988  << "TotalCount" << "\t" << totalCount << endl
2989  << "CorrectCount" << "\t" << correctCount << endl
2990  << "TotalPredProb" << "\t" << totalPredProb << endl
2991  << "NumInvalidClassesPredicted" << "\t" << numInvalidClassesPredicted << endl
2992  << endl;
2993 
2994  f << "CountsByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (countsByKnownClass, ',') << endl;
2995  f << "TotalSizesByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (totalSizesByKnownClass, ',') << endl;
2996  f << "TotalPredProbsByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (totalPredProbsByKnownClass, ',') << endl;
2997 
2998 
2999  kkint32 classIndex = 0;
3000  MLClassList::const_iterator idx;
3001  for (idx = classes.begin (); idx != classes.end (); idx++)
3002  {
3003  MLClassPtr mlClass = *idx;
3004  f << "ClassTotals" << "\t" << "ClassName" << "\t" << mlClass->Name ().QuotedStr () << "\t" << "ClassIndex" << "\t" << classIndex << endl;
3005 
3006  f << "CountByKnownClassBySize" << "\t" << ArrayToDelimitedDelimitedStr (countByKnownClassBySize [classIndex], numOfBuckets, ',') << endl;
3007  f << "CorrectByKnownClassBySize" << "\t" << ArrayToDelimitedDelimitedStr (correctByKnownClassBySize [classIndex], numOfBuckets, ',') << endl;
3008 
3009  f << "CountByKnownClassByProb" << "\t" << ArrayToDelimitedDelimitedStr (countByKnownClassByProb [classIndex], numOfProbBuckets, ',') << endl;
3010  f << "CorrectByKnownClassByProb" << "\t" << ArrayToDelimitedDelimitedStr (correctByKnownClassByProb [classIndex], numOfProbBuckets, ',') << endl;
3011 
3012  f << "PredictedCountsCM" << "\t" << ArrayToDelimitedDelimitedStr (predictedCountsCM [classIndex], classCount, ',') << endl;
3013 
3014  f << "TotPredProbCM" << "\t" << ArrayToDelimitedDelimitedStr (totPredProbCM [classIndex], classCount, ',') << endl;
3015 
3016  classIndex++;
3017  }
3018 
3019  f << "</ConfusionMatrix2>" << endl;
3020 } /* Write */
3021 
3022 
3023 
3024 
3025 
3026 
3027 ConfusionMatrix2Ptr ConfusionMatrix2::BuildFromIstreamXML (istream& f,
3028  RunLog& log
3029  )
3030 {
3031  if (f.eof ())
3032  {
3033  log.Level (-1) << endl << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** File already at EOF." << endl << endl;
3034  return NULL;
3035  }
3036 
3037 
3038  char buff[10240];
3039  buff[0] = 0;
3040 
3041  kkint64 startPos = f.tellg ();
3042  MLClassListPtr classes = NULL;
3043 
3044  kkint32 bucketSize = -1;
3045  kkint32 classCount = -1;
3046  kkint32 numOfBuckets = -1;
3047  kkint32 numOfProbBuckets = -1;
3048  kkint32 probBucketSize = -1;
3049 
3050  f.getline (buff, sizeof (buff));
3051  while ((!f.eof ()) && ((!classes) || (bucketSize < 1) || (numOfBuckets < 1) || (numOfProbBuckets < 1) || (probBucketSize < 1) || (classCount < 1)))
3052  {
3053  KKStr l (buff);
3054  l.TrimLeft ();
3055 
3056  if (l.CompareIgnoreCase ("</ConfusionMatrix2>") == 0)
3057  break;
3058 
3059  KKStr lineName = l.ExtractToken2 ("\t");
3060  if (lineName.CompareIgnoreCase ("Classes") == 0)
3061  classes = MLClassList::BuildListFromDelimtedStr (l, '\t');
3062 
3063  else if (lineName.CompareIgnoreCase ("bucketSize") == 0)
3064  bucketSize = l.ExtractTokenInt ("\t\n\r");
3065 
3066  else if (lineName.CompareIgnoreCase ("classCount") == 0)
3067  classCount = l.ExtractTokenInt ("\t\n\r");
3068 
3069  else if (lineName.CompareIgnoreCase ("numOfBuckets") == 0)
3070  numOfBuckets = l.ExtractTokenInt ("\t\n\r");
3071 
3072  else if (lineName.CompareIgnoreCase ("numOfProbBuckets") == 0)
3073  numOfProbBuckets = l.ExtractTokenInt ("\t\n\r");
3074 
3075  else if (lineName.CompareIgnoreCase ("probBucketSize") == 0)
3076  probBucketSize = l.ExtractTokenInt ("\t\n\r");
3077 
3078  f.getline (buff, sizeof (buff));
3079  }
3080 
3081  if (classes == NULL)
3082  {
3083  log.Level (-1) << endl
3084  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** No Class List Was Provided." << endl
3085  << endl;
3086  // Failed to locate ClassList ('classes') we an not build a ConfusionMatrixc2 object.
3087  return NULL;
3088  }
3089 
3090  if ((bucketSize < 1) || (numOfBuckets < 1) || (numOfProbBuckets < 1) || (probBucketSize < 1))
3091  {
3092  delete classes; classes = NULL;
3093  log.Level (-1) << endl
3094  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** Not all needed header fields were defined." << endl
3095  << " bucketSize[" << bucketSize << "] ClassCount[" << classCount << "] numOfBuckets[" << numOfBuckets << "] numOfProbBuckets[" << numOfProbBuckets << "] probBucketSize[" << probBucketSize << "]" << endl
3096  << endl;
3097  // Failed to locate ClassList ('classes') we an not build a ConfusionMatrixc2 object.
3098  return NULL;
3099  }
3100 
3101 
3102  if (classCount != classes->QueueSize ())
3103  {
3104  log.Level (-1) << endl
3105  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** Disagreement between ClassCount[" << classCount << "] and Classes.QueueSize[" << classes->QueueSize () << "]" << endl
3106  << endl;
3107  delete classes; classes = NULL;
3108  return NULL;
3109  }
3110 
3111 
3112  f.seekg (startPos);
3113 
3114  ConfusionMatrix2Ptr cm = new ConfusionMatrix2 (*classes, f, bucketSize, numOfBuckets, numOfProbBuckets, probBucketSize, log);
3115  delete classes; classes = NULL;
3116  return cm;
3117 } /* BuildFromIstreamXML */
3118 
3119 
3120 
3121 
3122 void ConfusionMatrix2::Read (istream& f,
3123  RunLog& log
3124  )
3125 {
3126  if (f.eof ())
3127  {
3128  log.Level (-1) << "ConfusionMatrix2::Read ***ERROR*** File at EOF can not read any data." << endl;
3129  return;
3130  }
3131 
3132  char buff[10240];
3133  buff[0] = 0;
3134  KKStr l (512);
3135 
3136  MLClassListPtr classes = NULL;
3137 
3138  kkint32 bucketSize = -1;
3139  kkint32 numOfBuckets = -1;
3140  kkint32 numOfProbBuckets = -1;
3141  kkint32 probBucketSize = -1;
3142 
3143  kkint32 classIndex = 0;
3144  KKStr className = "";
3145 
3146  f.getline (buff, sizeof (buff));
3147 
3148  while (!f.eof ())
3149  {
3150  l = buff;
3151  l.TrimLeft ();
3152  l.TrimRight ();
3153 
3154  if (l.CompareIgnoreCase ("</ConfusionMatrix2>") == 0)
3155  break;
3156 
3157  KKStr lineName = l.ExtractToken ("\t\n\r");
3158 
3159  if (lineName.CompareIgnoreCase ("ClassCount") == 0)
3160  classCount = l.ExtractTokenInt ("\t\n\r");
3161 
3162  else if (lineName.CompareIgnoreCase ("BucketSize") == 0)
3163  bucketSize = l.ExtractTokenInt ("\t");
3164 
3165  else if (lineName.CompareIgnoreCase ("probBucketSize") == 0)
3166  bucketSize = l.ExtractTokenInt ("\t");
3167 
3168  else if (lineName.CompareIgnoreCase ("NumOfBuckets") == 0)
3169  numOfBuckets = l.ExtractTokenInt ("\t");
3170 
3171  else if (lineName.CompareIgnoreCase ("NumOfProbBuckets") == 0)
3172  numOfProbBuckets = l.ExtractTokenInt ("\t");
3173 
3174  else if (lineName.CompareIgnoreCase ("TotalCount") == 0)
3175  totalCount = l.ExtractTokenDouble ("\t");
3176 
3177  else if (lineName.CompareIgnoreCase ("CorrectCount") == 0)
3178  correctCount = l.ExtractTokenDouble ("\t");
3179 
3180  else if (lineName.CompareIgnoreCase ("TotalPredProb") == 0)
3181  totalPredProb = l.ExtractTokenDouble ("\t");
3182 
3183  else if (lineName.CompareIgnoreCase ("NumInvalidClassesPredicted") == 0)
3184  numInvalidClassesPredicted = l.ExtractTokenDouble ("\t");
3185 
3186  else if (lineName.CompareIgnoreCase ("CountsByKnownClass") == 0)
3187  DelimitedStrToArray (countsByKnownClass, classCount, l, ',');
3188 
3189  else if (lineName.CompareIgnoreCase ("TotalSizesByKnownClass") == 0)
3190  DelimitedStrToArray (totalSizesByKnownClass, classCount, l, ',');
3191 
3192  else if (lineName.CompareIgnoreCase ("TotalPredProbsByKnownClass") == 0)
3193  DelimitedStrToArray (totalPredProbsByKnownClass, classCount, l, ',');
3194 
3195  else if (lineName.CompareIgnoreCase ("ClassTotals") == 0)
3196  {
3197  KKStr classNameLabel = l.ExtractToken ("\t");
3198  className = l.ExtractToken ("\t");
3199 
3200  KKStr classIndexLabel = l.ExtractToken ("\t");
3201  classIndex = l.ExtractTokenInt ("\t");
3202 
3203  if ((classIndex < 0) || (classIndex >= classCount))
3204  {
3205  log.Level (-1) << endl
3206  << "ConfusionMatrix2::Read ***ERROR*** ClassIndex[" << classIndex << "] out of range." << endl
3207  << endl;
3208  classIndex = 0;
3209  break;
3210  }
3211  }
3212 
3213  else if (lineName.CompareIgnoreCase ("CountByKnownClassBySize") == 0)
3214  DelimitedStrToArray (countByKnownClassBySize [classIndex], numOfBuckets, l, ',');
3215 
3216  else if (lineName.CompareIgnoreCase ("CorrectByKnownClassBySize") == 0)
3217  DelimitedStrToArray (correctByKnownClassBySize [classIndex], numOfBuckets, l, ',');
3218 
3219  else if (lineName.CompareIgnoreCase ("CountByKnownClassByProb") == 0)
3220  DelimitedStrToArray (countByKnownClassByProb [classIndex], numOfProbBuckets, l, ',');
3221 
3222  else if (lineName.CompareIgnoreCase ("CorrectByKnownClassByProb") == 0)
3223  DelimitedStrToArray (correctByKnownClassByProb [classIndex], numOfProbBuckets, l, ',');
3224 
3225  else if (lineName.CompareIgnoreCase ("PredictedCountsCM") == 0)
3226  DelimitedStrToArray (predictedCountsCM [classIndex], classCount, l, ',');
3227 
3228  else if (lineName.CompareIgnoreCase ("TotPredProbCM") == 0)
3229  DelimitedStrToArray (totPredProbCM [classIndex], classCount, l, ',');
3230 
3231  if (!f.eof ())
3232  f.getline (buff, sizeof (buff));
3233  }
3234 } /* Read */
3235 
3236 
3237 
3238 
3239 
3240 
3242 {
3243  // "Estimating the Taxonomic composition of a sample when individules are classified with error"
3244  // by Andrew Solow, Cabll Davis, Qiao Hu
3245  // Woods Hole Ocanographic Institution, Woods Hole Massachusetts
3246  // Marine Ecology Progresss Series
3247  // published 2006-july-06
3248  // vol 216:309-311
3249 
3250  // This data is ment to work with "ClassificationStatus.cs" to prvide the data nessasary to
3251  // adjust for bias.
3252 
3253  f << "<SimpleConfusionMatrix>" << endl;
3254  f << "Classes" << "\t" << classes.ToCommaDelimitedStr () << endl;
3255  kkint32 row = 0;
3256  kkint32 col = 0;
3257  MLClassList::const_iterator idx;
3258  MLClassList::const_iterator idx2;
3259  for (idx = classes.begin (); idx != classes.end (); idx++)
3260  {
3261  MLClassPtr mlClass = *idx;
3262  f << "DataRow" << "\t" << mlClass->Name () << "\t";
3263  col = 0;
3264  for (col = 0; col < classCount; col++)
3265  {
3266  double p = 0.0;
3267  if (countsByKnownClass[row] != 0.0)
3268  p = predictedCountsCM[row][col] / countsByKnownClass[row];
3269 
3270  if (col > 0)
3271  f << ",";
3272 
3273  f << StrFormatDouble (predictedCountsCM[row][col], "ZZZZZ0.00") << ":" << StrFormatDouble (p, "ZZ0.0000000");
3274  }
3275  f << std::endl;
3276 
3277  row++;
3278  }
3279 
3280  f << "</SimpleConfusionMatrix>" << std::endl;
3281 } /* WriteProbabilityMatrix */
3282 
3283 
3284 
3285 
3288 {
3289 
3290 }
3291 
3292 
3293 
3295 {
3296 }
3297 
3298 
3299 
3300 
3302 {
3303  if (QueueSize () == 0)
3304  {
3305  return NULL;
3306  }
3307 
3308  const_iterator cmIDX = begin ();
3309  const ConfusionMatrix2Ptr firstCM = *cmIDX;
3310 
3311  ConfusionMatrix2Ptr meanCM = new ConfusionMatrix2 (firstCM->MLClasses ());
3312 
3313  for (cmIDX = begin (); cmIDX != end (); cmIDX++)
3314  {
3315  const ConfusionMatrix2Ptr cm = *cmIDX;
3316  meanCM->AddIn (*cm, log);
3317  }
3318 
3319  double factor = 1.0 / (double)QueueSize ();
3320 
3321  meanCM->FactorCounts (factor);
3322 
3323  return meanCM;
3324 } /* DeriveAverageConfusionMatrix */
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for &#39;size&#39; characters.
Definition: KKStr.cpp:655
ConfusionMatrix2(const ConfusionMatrix2 &cm)
void PrintConfusionMatrixHTML(const char *title, ostream &file)
MLClass * MLClassPtr
Definition: MLClass.h:46
void PrintAccuracyByProbByClassHTML(ostream &o)
double PercentOf(double x, double y)
void PrintErrorByProb(ostream &outFile)
__int32 kkint32
Definition: KKBaseTypes.h:88
float FMeasure(MLClassPtr positiveClass, RunLog &log) const
void LeftPad(kkint32 width, uchar ch= ' ')
pads the string with enough &#39;ch&#39; characters on the left side until the string is as long as &#39;width&#39; c...
Definition: KKStr.cpp:2303
VectorFloat AccuracyByClass() const
void PrintConfusionMatrixLatexTable(ostream &outFile)
void PrintConfusionMatrixNarrow(ostream &outFile)
KKStr & operator=(const char *src)
Definition: KKStr.cpp:1442
void DelimitedStrToArray(kkint32 *_array, kkint32 _count, const KKStr &_l, char _delimiter)
ConfusionMatrix2(const MLClassList &_classes, istream &f, kkint32 _bucketSize, kkint32 _numOfBuckets, kkint32 _numOfProbBuckets, kkint32 _probBucketSize, RunLog &_log)
void PrintConfusionMatrixAvgPredProbHTML(ostream &o)
ConfusionMatrix2 * ConfusionMatrix2Ptr
void PrintTrueFalsePositivesTabDelimited(ostream &outFile)
double Count(MLClassPtr mlClass)
KKStr operator+(const char *right) const
Definition: KKStr.cpp:3986
void PrintErrorBySize(ostream &outFile)
KKStr ArrayToDelimitedDelimitedStr(const vector< T > &v, char delimiter)
unsigned __int32 kkuint32
Definition: KKBaseTypes.h:89
void PrintConfusionMatrixTabDelimited(ostream &outFile)
__int64 kkint64
Definition: KKBaseTypes.h:90
void PrintConfusionMatrix(ostream &_outFile)
char operator[](kkuint32 i) const
Definition: KKStr.cpp:3430
KKStr & operator=(KKStr &&src)
Definition: KKStr.cpp:1369
double CountsByKnownClass(kkint32 knownClassIdx) const
kkuint32 Len() const
Returns the number of characters in the string.
Definition: KKStr.h:366
KKTHread * KKTHreadPtr
void WriteSimpleConfusionMatrix(ostream &f) const
void Append(char ch)
Definition: KKStr.cpp:1863
void FactorCounts(double factor)
static ConfusionMatrix2Ptr BuildFromIstreamXML(istream &f, RunLog &log)
KKStr(const KKStr &str)
Copy Constructor.
Definition: KKStr.cpp:561
void AddIn(const ConfusionMatrix2 &cm, RunLog &log)
ConfusionMatrix2Ptr DeriveAverageConfusionMatrix(RunLog &log) const
std::vector< float > VectorFloat
Definition: KKBaseTypes.h:149
const VectorDouble & CountsByKnownClass() const
ConfusionMatrix2(const MLClassList &_classes)
void PrintErrorByProbByRows(ostream &outFile)
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
KKStr ArrayToDelimitedDelimitedStr(T *_array, kkint32 _count, char _delimiter)
void PrintProbDistributionTotalCount(ostream &outFile)
VectorDouble PredictedCounts() const
void PrintProbDistributionTotalError(ostream &outFile)
void PrintErrorBySizeReduced(ostream &outFile)
const KKStr & Name() const
Definition: MLClass.h:154
KKStr(const char *str)
Definition: KKStr.cpp:537
KKStr StripOutInvalidLatexCaracters(const KKStr &src)
std::ostream &__cdecl operator<<(std::ostream &os, const KKStr &str)
ConfussionMatrix2List(bool _owner=true)
void DelimitedStrToArray(vector< double > &v, kkint32 minSize, const KKStr &l, char delimiter)
void ComputeFundamentalStats(MLClassPtr ic, double &truePositives, double &trueNegatives, double &falsePositives, double &falseNegatives) const
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void Increment(MLClassPtr _knownClass, MLClassPtr _predClass, kkint32 _size, double _probability, RunLog &_log)
double Accuracy(MLClassPtr mlClass)
void PrintConfusionMatrixHTML(ostream &outFile)
KKStr & operator=(const KKStr &src)
Definition: KKStr.cpp:1390
void PrintProbDistributionTitle(ostream &outFile)
const MLClassList & MLClasses() const
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
Definition: MLClass.h:49
void WriteXML(ostream &f) const
void DelimitedStrToArray(double *_array, kkint32 _count, const KKStr &_l, char _delimiter)
VectorKKStr Split(char del) const
Splits the string up into tokens using &#39;del&#39; as the separator returning them in a vector...
Definition: KKStr.cpp:3500
Maintains a list of MLClass instances.
Definition: MLClass.h:233
void PrintErrorBySizeByRows(ostream &outFile)
double PredictedCountsCM(kkint32 knownClassIdx, kkint32 predClassIdx) const
KKStr SubStrPart(kkint32 firstChar) const
returns a SubString consisting of all characters starting at index &#39;firstChar&#39; until the end of the s...
Definition: KKStr.cpp:2780
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
std::vector< double > VectorDouble
Vector of doubles.
Definition: KKBaseTypes.h:148