KSquare Utilities
KKMLL::ConfusionMatrix2 Class Reference

A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation" More...

#include <ConfusionMatrix2.h>

Public Types

typedef ConfusionMatrix2ConfusionMatrix2Ptr
 

Public Member Functions

 ConfusionMatrix2 (const MLClassList &_classes)
 
 ConfusionMatrix2 (const ConfusionMatrix2 &cm)
 
 ConfusionMatrix2 (const MLClassList &_classes, istream &f, kkint32 _bucketSize, kkint32 _numOfBuckets, kkint32 _numOfProbBuckets, kkint32 _probBucketSize, RunLog &_log)
 
virtual ~ConfusionMatrix2 ()
 
double Accuracy ()
 
double Accuracy (MLClassPtr mlClass)
 
VectorFloat AccuracyByClass () const
 
float AccuracyClassWeightedEqually ()
 
float AccuracyNorm ()
 
KKStr AccuracyStr ()
 
void AddIn (const ConfusionMatrix2 &cm, RunLog &log)
 
double AvgPredProb () const
 
kkint32 ClassCount () const
 
void ComputeFundamentalStats (MLClassPtr ic, double &truePositives, double &trueNegatives, double &falsePositives, double &falseNegatives) const
 
double Count (MLClassPtr mlClass)
 
double CountsByKnownClass (kkint32 knownClassIdx) const
 
const VectorDoubleCountsByKnownClass () const
 
void FactorCounts (double factor)
 
float FMeasure (MLClassPtr positiveClass, RunLog &log) const
 
void Increment (MLClassPtr _knownClass, MLClassPtr _predClass, kkint32 _size, double _probability, RunLog &_log)
 
const MLClassListMLClasses () const
 
VectorDouble PredictedCounts () const
 
double PredictedCountsCM (kkint32 knownClassIdx, kkint32 predClassIdx) const
 
void PrintAccuracyByProbByClassHTML (ostream &o)
 
void PrintConfusionMatrix (ostream &_outFile)
 
void PrintConfusionMatrixAvgPredProbHTML (ostream &o)
 
void PrintConfusionMatrixHTML (const char *title, ostream &file)
 
void PrintConfusionMatrixHTML (ostream &outFile)
 
void PrintConfusionMatrixLatexTable (ostream &outFile)
 
void PrintConfusionMatrixNarrow (ostream &outFile)
 
void PrintConfusionMatrixTabDelimited (ostream &outFile)
 
void PrintErrorByProb (ostream &outFile)
 
void PrintErrorByProbByRows (ostream &outFile)
 
void PrintErrorBySize (ostream &outFile)
 
void PrintErrorBySizeByRows (ostream &outFile)
 
void PrintErrorBySizeReduced (ostream &outFile)
 
void PrintProbDistributionTitle (ostream &outFile)
 
void PrintProbDistributionTotalCount (ostream &outFile)
 
void PrintProbDistributionTotalError (ostream &outFile)
 
void PrintTrueFalsePositivesTabDelimited (ostream &outFile)
 
double TotalCount ()
 
void WriteSimpleConfusionMatrix (ostream &f) const
 
void WriteXML (ostream &f) const
 

Static Public Member Functions

static ConfusionMatrix2Ptr BuildFromIstreamXML (istream &f, RunLog &log)
 

Detailed Description

A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"

Definition at line 33 of file ConfusionMatrix2.h.

Member Typedef Documentation

Constructor & Destructor Documentation

ConfusionMatrix2::ConfusionMatrix2 ( const MLClassList _classes)

Definition at line 58 of file ConfusionMatrix2.cpp.

References ConfusionMatrix2().

Referenced by ConfusionMatrix2(), KKMLL::ConfussionMatrix2List::DeriveAverageConfusionMatrix(), KKMLL::CrossValidationMxN::RunTrainAndTest(), and KKMLL::CrossValidationMxN::RunValidations().

58  : // Will make its own copy of list
59  bucketSize (100),
60  classCount (0),
61  correctByKnownClassByProb (),
62  correctByKnownClassBySize (),
63  correctCount (0.0),
64  countByKnownClassByProb (),
65  countByKnownClassBySize (),
66  countsByKnownClass (),
67  classes (_classes),
68  numInvalidClassesPredicted (0.0),
69  numOfBuckets (40),
70  numOfProbBuckets (20),
71  predictedCountsCM (),
72  probBucketSize (5),
73  totalCount (0.0),
74  totalPredProb (0.0),
75  totalPredProbsByKnownClass (),
76  totalSizesByKnownClass (),
77  totPredProbCM ()
78 {
79  InitializeMemory ();
80 }
ConfusionMatrix2::ConfusionMatrix2 ( const ConfusionMatrix2 cm)

Definition at line 85 of file ConfusionMatrix2.cpp.

References ConfusionMatrix2().

Referenced by ConfusionMatrix2().

85  :
86  bucketSize (cm.bucketSize),
87  classCount (cm.classCount),
88  correctByKnownClassByProb (),
89  correctByKnownClassBySize (),
90  correctCount (cm.correctCount),
91  countByKnownClassByProb (),
92  countByKnownClassBySize (),
93  countsByKnownClass (),
94  classes (cm.classes),
95  numInvalidClassesPredicted (cm.numInvalidClassesPredicted),
96  numOfBuckets (cm.numOfBuckets),
97  numOfProbBuckets (cm.numOfProbBuckets),
98  predictedCountsCM (),
99  probBucketSize (cm.probBucketSize),
100  totalCount (cm.totalCount),
101  totalPredProb (cm.totalPredProb),
102  totalPredProbsByKnownClass (),
103  totalSizesByKnownClass (),
104  totPredProbCM ()
105 {
106  CopyVector (cm.countsByKnownClass, countsByKnownClass);
107  CopyVector (cm.totalPredProbsByKnownClass, totalPredProbsByKnownClass);
108  CopyVector (cm.totalSizesByKnownClass, totalSizesByKnownClass);
109 
110  CopyVectorDoublePtr (cm.predictedCountsCM, predictedCountsCM, classCount);
111  CopyVectorDoublePtr (cm.totPredProbCM, totPredProbCM, classCount);
112 
113  CopyVectorDoublePtr (cm.countByKnownClassBySize, countByKnownClassBySize, numOfBuckets);
114  CopyVectorDoublePtr (cm.correctByKnownClassBySize, correctByKnownClassBySize, numOfBuckets);
115  CopyVectorDoublePtr (cm.countByKnownClassByProb, countByKnownClassByProb, numOfProbBuckets);
116  CopyVectorDoublePtr (cm.correctByKnownClassByProb, correctByKnownClassByProb, numOfProbBuckets);
117 }
ConfusionMatrix2::ConfusionMatrix2 ( const MLClassList _classes,
istream &  f,
kkint32  _bucketSize,
kkint32  _numOfBuckets,
kkint32  _numOfProbBuckets,
kkint32  _probBucketSize,
RunLog _log 
)

Will construct an instance of 'ConfusionMatrix2' from the contents of the provided 'istream' object.

Parameters
[in]_classesWill make local copy of this instance; this way we know the ordering which represents the numbering can not change behind our back.
[in]fFile to write report to.
[in]_bucketSizeWill keep statistics by size of particles.
[in]_numOfBucketsNumber of Size buckets that will be maintained.
[in]_numOfProbBucketsMaximum number of probability buckets to keep track of,
[in]_probBucketSizeSize of each probability bucket.
[in]_logLogger where messages are written to.

Definition at line 24 of file ConfusionMatrix2.cpp.

References ConfusionMatrix2().

Referenced by BuildFromIstreamXML(), and ConfusionMatrix2().

31  :
32  bucketSize (_bucketSize),
33  classCount (0),
34  classes (_classes),
35  correctByKnownClassByProb (NULL),
36  correctByKnownClassBySize (NULL),
37  correctCount (0.0),
38  countByKnownClassByProb (),
39  countByKnownClassBySize (),
40  countsByKnownClass (),
41  numInvalidClassesPredicted (0.0),
42  numOfBuckets (_numOfBuckets),
43  numOfProbBuckets (_numOfProbBuckets),
44  predictedCountsCM (),
45  probBucketSize (_probBucketSize),
46  totalCount (0.0),
47  totalPredProb (0.0),
48  totalPredProbsByKnownClass (),
49  totalSizesByKnownClass (),
50  totPredProbCM ()
51 {
52  InitializeMemory ();
53  Read (f, _log);
54 }
ConfusionMatrix2::~ConfusionMatrix2 ( )
virtual

Definition at line 122 of file ConfusionMatrix2.cpp.

123  {
124  DeleteVectorDoublePtr (countByKnownClassBySize);
125  DeleteVectorDoublePtr (correctByKnownClassBySize);
126  DeleteVectorDoublePtr (countByKnownClassByProb);
127  DeleteVectorDoublePtr (correctByKnownClassByProb);
128  DeleteVectorDoublePtr (predictedCountsCM);
129  DeleteVectorDoublePtr (totPredProbCM);
130 }

Member Function Documentation

double ConfusionMatrix2::Accuracy ( )

Definition at line 2485 of file ConfusionMatrix2.cpp.

Referenced by KKMLL::CrossValidationVoting::Accuracy(), and KKMLL::CrossValidation::Accuracy().

2486 {
2487  if (totalCount == 0)
2488  return 0.0;
2489 
2490  return 100.0 * correctCount / totalCount;
2491 }
double ConfusionMatrix2::Accuracy ( MLClassPtr  mlClass)

Definition at line 2508 of file ConfusionMatrix2.cpp.

2509 {
2510  kkint32 classNum = 0;
2511 
2512  classNum = classes.PtrToIdx (mlClass);
2513  if (classNum < 0)
2514  return 0.0f;
2515 
2516  if (countsByKnownClass [classNum] == 0)
2517  return 0.0f;
2518 
2519  float accuracy = (float)(100.0 * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2520 
2521  return accuracy;
2522 } /* Accuracy */
__int32 kkint32
Definition: KKBaseTypes.h:88
kkint32 PtrToIdx(EntryConstPtr _entry) const
Definition: KKQueue.h:761
VectorFloat ConfusionMatrix2::AccuracyByClass ( ) const

Definition at line 2527 of file ConfusionMatrix2.cpp.

2528 {
2529  VectorFloat accuracies;
2530  for (kkint32 classNum = 0; classNum < classCount; classNum++)
2531  {
2532  if (countsByKnownClass [classNum] == 0)
2533  {
2534  accuracies.push_back (0.0f);
2535  }
2536  else
2537  {
2538  float classAccuracy = (float)(100.0f * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2539  accuracies.push_back (classAccuracy);
2540  }
2541  }
2542 
2543  return accuracies;
2544 } /* AccuracyByClass */
__int32 kkint32
Definition: KKBaseTypes.h:88
std::vector< float > VectorFloat
Definition: KKBaseTypes.h:149
float ConfusionMatrix2::AccuracyClassWeightedEqually ( )

Definition at line 2548 of file ConfusionMatrix2.cpp.

Referenced by AccuracyNorm().

2549 {
2550  kkint32 classCount = classes.QueueSize ();
2551  float totalAccuracy = 0.0f;
2552 
2553  for (kkint32 classNum = 0; classNum < classCount; classNum++)
2554  {
2555  if (countsByKnownClass [classNum] != 0)
2556  {
2557  float classAccuracy = (float)(100.0f * (predictedCountsCM[classNum] [classNum]) / (countsByKnownClass [classNum]));
2558  totalAccuracy += classAccuracy;
2559  }
2560  }
2561 
2562  float weightedAccuracy = (float)(totalAccuracy / classCount);
2563 
2564  return weightedAccuracy;
2565 } /* AccuracyClassWeightedEqually */
__int32 kkint32
Definition: KKBaseTypes.h:88
kkint32 QueueSize() const
Definition: KKQueue.h:313
float KKMLL::ConfusionMatrix2::AccuracyNorm ( )
inline
KKStr ConfusionMatrix2::AccuracyStr ( )

Definition at line 2443 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), KKMLL::MLClass::Name(), KKB::KKStr::operator=(), and KKB::StrFormatDouble().

2444 {
2445 
2446  kkint32 x;
2447 
2448  double* accuracys = new double[classCount];
2449 
2450  for (x = 0; x < classCount; x++)
2451  {
2452  if (countsByKnownClass [x] == 0)
2453  accuracys[x] = 0;
2454  else
2455  accuracys[x] = (100.0 * (double) predictedCountsCM [x] [x]) / ((double) (countsByKnownClass [x]));
2456  }
2457 
2458 
2459  KKStr accuracyStr;
2460 
2461  for (x = 0; x < classCount; x++)
2462  {
2463  if (x > 0)
2464  accuracyStr << " ";
2465 
2466  KKStr className;
2467  MLClassPtr mlClass = classes.IdxToPtr (x);
2468  if (mlClass)
2469  className = mlClass->Name ();
2470  else
2471  className = "***UnDefined***";
2472 
2473  accuracyStr << className << " " << StrFormatDouble (accuracys[x], "##0.000") << "%";
2474  }
2475 
2476  delete[] accuracys; accuracys = NULL;
2477 
2478  return accuracyStr;
2479 } /* AccuracyStr */
__int32 kkint32
Definition: KKBaseTypes.h:88
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
EntryPtr IdxToPtr(kkuint32 idx) const
Definition: KKQueue.h:732
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::AddIn ( const ConfusionMatrix2 cm,
RunLog log 
)

Definition at line 2802 of file ConfusionMatrix2.cpp.

Referenced by KKMLL::ConfussionMatrix2List::DeriveAverageConfusionMatrix(), KKMLL::CrossValidationMxN::RunTrainAndTest(), and KKMLL::CrossValidationMxN::RunValidations().

2805 {
2806  MakeSureWeHaveTheseClasses (cm.classes, log);
2807 
2808  kkint32 numOfClasses = classes.QueueSize ();
2809  kkint32 classIDX = 0;
2810 
2811 
2812  // Create indirection array to handle the situation where the mlClass list's of the two
2813  // confusion matrixes '*this' and 'cm' are not in the same order.
2814 
2815  vector<kkint32> ind (numOfClasses, 0);
2816  for (classIDX = 0; classIDX < numOfClasses; classIDX++)
2817  {
2818  MLClassPtr mlClass = classes.IdxToPtr (classIDX);
2819  kkint32 cmsIDX = cm.classes.PtrToIdx (mlClass);
2820  ind[classIDX] = cmsIDX;
2821  }
2822 
2823  for (classIDX = 0; classIDX < numOfClasses; classIDX++)
2824  {
2825  kkint32 cmsIDX = ind[classIDX];
2826  if (cmsIDX < 0)
2827  {
2828  // cmsIDX < 0 indicates that the confusion matrix being added in does not include the class indicatd by 'classIDX'.
2829  }
2830  else
2831  {
2832  countsByKnownClass [classIDX] += cm.countsByKnownClass [cmsIDX];
2833  totalSizesByKnownClass [classIDX] += cm.totalSizesByKnownClass [cmsIDX];
2834  totalPredProbsByKnownClass [classIDX] += cm.totalPredProbsByKnownClass [cmsIDX];
2835 
2836  kkint32 predictedClassIDX = 0;
2837  for (predictedClassIDX = 0; predictedClassIDX < numOfClasses; predictedClassIDX++)
2838  {
2839  kkint32 cmsPredictedClassIDX = ind[predictedClassIDX];
2840  if (cmsPredictedClassIDX >= 0)
2841  {
2842  predictedCountsCM[classIDX][predictedClassIDX] += cm.predictedCountsCM[cmsIDX][cmsPredictedClassIDX];
2843  totPredProbCM [classIDX][predictedClassIDX] += cm.totPredProbCM [cmsIDX][cmsPredictedClassIDX];
2844  }
2845  }
2846 
2847  kkint32 bucketIDX = 0;
2848  for (bucketIDX = 0; bucketIDX < numOfBuckets; bucketIDX++)
2849  {
2850  countByKnownClassBySize [classIDX][bucketIDX] += cm.countByKnownClassBySize [cmsIDX][bucketIDX];
2851  correctByKnownClassBySize [classIDX][bucketIDX] += cm.correctByKnownClassBySize [cmsIDX][bucketIDX];
2852  }
2853 
2854  kkint32 probIDX = 0;
2855  for (probIDX = 0; probIDX < numOfProbBuckets; probIDX++)
2856  {
2857  countByKnownClassByProb [classIDX][probIDX] += cm.countByKnownClassByProb [cmsIDX][probIDX];
2858  correctByKnownClassByProb [classIDX][probIDX] += cm.correctByKnownClassByProb [cmsIDX][probIDX];
2859  }
2860  }
2861  }
2862 
2863  correctCount += cm.correctCount;
2864  totalCount += cm.totalCount;
2865  totalPredProb += cm.totalPredProb;
2866 } /* AddIn */
__int32 kkint32
Definition: KKBaseTypes.h:88
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
EntryPtr IdxToPtr(kkuint32 idx) const
Definition: KKQueue.h:732
kkint32 QueueSize() const
Definition: KKQueue.h:313
kkint32 PtrToIdx(EntryConstPtr _entry) const
Definition: KKQueue.h:761
double ConfusionMatrix2::AvgPredProb ( ) const

Definition at line 2496 of file ConfusionMatrix2.cpp.

2497 {
2498  if (totalCount == 0)
2499  return 0.0;
2500 
2501  return totalPredProb / (double)totalCount;
2502 }
ConfusionMatrix2Ptr ConfusionMatrix2::BuildFromIstreamXML ( istream &  f,
RunLog log 
)
static

Definition at line 3027 of file ConfusionMatrix2.cpp.

References ConfusionMatrix2().

3030 {
3031  if (f.eof ())
3032  {
3033  log.Level (-1) << endl << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** File already at EOF." << endl << endl;
3034  return NULL;
3035  }
3036 
3037 
3038  char buff[10240];
3039  buff[0] = 0;
3040 
3041  kkint64 startPos = f.tellg ();
3042  MLClassListPtr classes = NULL;
3043 
3044  kkint32 bucketSize = -1;
3045  kkint32 classCount = -1;
3046  kkint32 numOfBuckets = -1;
3047  kkint32 numOfProbBuckets = -1;
3048  kkint32 probBucketSize = -1;
3049 
3050  f.getline (buff, sizeof (buff));
3051  while ((!f.eof ()) && ((!classes) || (bucketSize < 1) || (numOfBuckets < 1) || (numOfProbBuckets < 1) || (probBucketSize < 1) || (classCount < 1)))
3052  {
3053  KKStr l (buff);
3054  l.TrimLeft ();
3055 
3056  if (l.CompareIgnoreCase ("</ConfusionMatrix2>") == 0)
3057  break;
3058 
3059  KKStr lineName = l.ExtractToken2 ("\t");
3060  if (lineName.CompareIgnoreCase ("Classes") == 0)
3061  classes = MLClassList::BuildListFromDelimtedStr (l, '\t');
3062 
3063  else if (lineName.CompareIgnoreCase ("bucketSize") == 0)
3064  bucketSize = l.ExtractTokenInt ("\t\n\r");
3065 
3066  else if (lineName.CompareIgnoreCase ("classCount") == 0)
3067  classCount = l.ExtractTokenInt ("\t\n\r");
3068 
3069  else if (lineName.CompareIgnoreCase ("numOfBuckets") == 0)
3070  numOfBuckets = l.ExtractTokenInt ("\t\n\r");
3071 
3072  else if (lineName.CompareIgnoreCase ("numOfProbBuckets") == 0)
3073  numOfProbBuckets = l.ExtractTokenInt ("\t\n\r");
3074 
3075  else if (lineName.CompareIgnoreCase ("probBucketSize") == 0)
3076  probBucketSize = l.ExtractTokenInt ("\t\n\r");
3077 
3078  f.getline (buff, sizeof (buff));
3079  }
3080 
3081  if (classes == NULL)
3082  {
3083  log.Level (-1) << endl
3084  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** No Class List Was Provided." << endl
3085  << endl;
3086  // Failed to locate ClassList ('classes') we an not build a ConfusionMatrixc2 object.
3087  return NULL;
3088  }
3089 
3090  if ((bucketSize < 1) || (numOfBuckets < 1) || (numOfProbBuckets < 1) || (probBucketSize < 1))
3091  {
3092  delete classes; classes = NULL;
3093  log.Level (-1) << endl
3094  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** Not all needed header fields were defined." << endl
3095  << " bucketSize[" << bucketSize << "] ClassCount[" << classCount << "] numOfBuckets[" << numOfBuckets << "] numOfProbBuckets[" << numOfProbBuckets << "] probBucketSize[" << probBucketSize << "]" << endl
3096  << endl;
3097  // Failed to locate ClassList ('classes') we an not build a ConfusionMatrixc2 object.
3098  return NULL;
3099  }
3100 
3101 
3102  if (classCount != classes->QueueSize ())
3103  {
3104  log.Level (-1) << endl
3105  << "ConfusionMatrix2::BuildFromIstreamXML ***ERROR*** Disagreement between ClassCount[" << classCount << "] and Classes.QueueSize[" << classes->QueueSize () << "]" << endl
3106  << endl;
3107  delete classes; classes = NULL;
3108  return NULL;
3109  }
3110 
3111 
3112  f.seekg (startPos);
3113 
3114  ConfusionMatrix2Ptr cm = new ConfusionMatrix2 (*classes, f, bucketSize, numOfBuckets, numOfProbBuckets, probBucketSize, log);
3115  delete classes; classes = NULL;
3116  return cm;
3117 } /* BuildFromIstreamXML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
kkint32 CompareIgnoreCase(const KKStr &s2) const
Compares with another KKStr, ignoring case.
Definition: KKStr.cpp:919
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr ExtractToken2(const char *delStr="\n\t\r ")
Extract first Token from the string.
Definition: KKStr.cpp:3026
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
__int64 kkint64
Definition: KKBaseTypes.h:90
static MLClassListPtr BuildListFromDelimtedStr(const KKStr &s, char delimiter)
Definition: MLClass.cpp:1268
ConfusionMatrix2(const MLClassList &_classes)
kkint32 QueueSize() const
Definition: KKQueue.h:313
Maintains a list of MLClass instances.
Definition: MLClass.h:233
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
kkint32 KKMLL::ConfusionMatrix2::ClassCount ( ) const
inline

Definition at line 83 of file ConfusionMatrix2.h.

83 {return classCount;}
void ConfusionMatrix2::ComputeFundamentalStats ( MLClassPtr  ic,
double &  truePositives,
double &  trueNegatives,
double &  falsePositives,
double &  falseNegatives 
) const

Definition at line 1810 of file ConfusionMatrix2.cpp.

1817 {
1818  truePositives = 0.0;
1819  trueNegatives = 0.0;
1820  falsePositives = 0.0;
1821  falseNegatives = 0.0;
1822 
1823  kkint32 x = classes.PtrToIdx (ic);
1824  if (x < 0)
1825  return;
1826 
1827  kkint32 numOfClasses = classes.QueueSize ();
1828 
1829  truePositives = predictedCountsCM [x][x];
1830 
1831  for (kkint32 y = 0; y < numOfClasses; y++)
1832  {
1833  if (y != x)
1834  {
1835  falsePositives += predictedCountsCM [y][x]; // Was classified as x but was classed as x.
1836  falseNegatives += predictedCountsCM [x][y]; // Should have been classed as x not y.
1837  trueNegatives += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1838  }
1839  }
1840  return;
1841 } /* ComputeFundamentalStats */
__int32 kkint32
Definition: KKBaseTypes.h:88
kkint32 QueueSize() const
Definition: KKQueue.h:313
kkint32 PtrToIdx(EntryConstPtr _entry) const
Definition: KKQueue.h:761
double ConfusionMatrix2::Count ( MLClassPtr  mlClass)

Definition at line 2571 of file ConfusionMatrix2.cpp.

2572 {
2573  kkint32 classNum = 0;
2574  bool found = false;
2575  kkint32 numClasses = classes.QueueSize ();
2576 
2577  while ((classNum < numClasses) && (!found))
2578  {
2579  if (classes[classNum].UpperName () == mlClass->UpperName ())
2580  found = true;
2581  else
2582  classNum++;
2583  }
2584 
2585 
2586  if (found)
2587  {
2588  return countsByKnownClass [classNum];
2589  }
2590 
2591  return 0.0;
2592 } /* Count */
__int32 kkint32
Definition: KKBaseTypes.h:88
const KKStr & UpperName() const
Definition: MLClass.h:155
kkint32 QueueSize() const
Definition: KKQueue.h:313
double ConfusionMatrix2::CountsByKnownClass ( kkint32  knownClassIdx) const

Definition at line 347 of file ConfusionMatrix2.cpp.

348 {
349  if ((knownClassIdx < 0) || (knownClassIdx >= classCount))
350  return 0.0;
351 
352  return countsByKnownClass [knownClassIdx];
353 }
const VectorDouble & ConfusionMatrix2::CountsByKnownClass ( ) const

Definition at line 357 of file ConfusionMatrix2.cpp.

358 {
359  return countsByKnownClass;
360 } /* CountsByKnownClass */
void ConfusionMatrix2::FactorCounts ( double  factor)

Will multiply all counts by 'factor' You would use this in conjunction with 'AddIn'.

Definition at line 2597 of file ConfusionMatrix2.cpp.

Referenced by KKMLL::ConfussionMatrix2List::DeriveAverageConfusionMatrix(), KKMLL::CrossValidationMxN::RunTrainAndTest(), and KKMLL::CrossValidationMxN::RunValidations().

2598 {
2599  kkint32 x;
2600 
2601  correctCount *= factor;
2602  totalCount *= factor;
2603  totalPredProb *= factor;
2604  numInvalidClassesPredicted *= factor;
2605 
2606  for (x = 0; x < classCount; x++)
2607  {
2608  countsByKnownClass [x] = countsByKnownClass [x] * factor;
2609  totalSizesByKnownClass [x] = totalSizesByKnownClass [x] * factor;
2610  totalPredProbsByKnownClass [x] = totalPredProbsByKnownClass [x] * factor;
2611 
2612  kkint32 y;
2613 
2614  for (y = 0; y < classCount; y++)
2615  {
2616  predictedCountsCM[x][y] = predictedCountsCM[x][y] * factor;
2617  totPredProbCM [x][y] = totPredProbCM [x][y] * factor;
2618  }
2619 
2620  for (y = 0; y < numOfBuckets; y++)
2621  {
2622  countByKnownClassBySize [x][y] = countByKnownClassBySize [x][y] * factor;
2623  correctByKnownClassBySize [x][y] = correctByKnownClassBySize [x][y] * factor;
2624  }
2625 
2626  for (y = 0; y < numOfProbBuckets; y++)
2627  {
2628  countByKnownClassByProb [x][y] = countByKnownClassByProb [x][y] * factor;
2629  correctByKnownClassByProb [x][y] = correctByKnownClassByProb [x][y] * factor;
2630  }
2631  }
2632 } /* FactorCounts */
__int32 kkint32
Definition: KKBaseTypes.h:88
float ConfusionMatrix2::FMeasure ( MLClassPtr  positiveClass,
RunLog log 
) const

Definition at line 1846 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), KKMLL::MLClass::Name(), and KKB::KKStr::operator=().

1849 {
1850  kkint32 positiveIDX = classes.PtrToIdx (positiveClass);
1851  if (positiveIDX < 0)
1852  {
1853  KKStr invalidClassName = "";
1854  if (positiveClass)
1855  invalidClassName = positiveClass->Name ();
1856 
1857  log.Level (-1) << "ConfusionMatrix2::FMeasure ***ERROR*** Invalid Positive Class Specified[" << invalidClassName << "]" << endl;
1858  return 0.0f;
1859  }
1860 
1861  kkint32 numOfClasses = classes.QueueSize ();
1862 
1863  kkint32 y;
1864 
1865  double totalTP = 0.0;
1866  double totalFP = 0.0;
1867  double totalTN = 0.0;
1868  double totalFN = 0.0;
1869 
1870  totalTP = predictedCountsCM[positiveIDX][positiveIDX];
1871 
1872  for (y = 0; y < numOfClasses; y++)
1873  {
1874  if (y != positiveIDX)
1875  {
1876  totalFP += predictedCountsCM[y][positiveIDX];
1877  totalTN += predictedCountsCM[y][y];
1878  totalFN += predictedCountsCM[positiveIDX][y];
1879  }
1880  }
1881 
1882  double fMeasure = 0.0;
1883  double divisor = 2.0 * (double)totalTP + (double)totalFP + (double)totalFN;
1884  if (divisor != 0.0)
1885  fMeasure = 100.0 * (2.0 * (double)totalTP / divisor);
1886 
1887  return (float)fMeasure;
1888 } /* FMeasure */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
kkint32 QueueSize() const
Definition: KKQueue.h:313
kkint32 PtrToIdx(EntryConstPtr _entry) const
Definition: KKQueue.h:761
void ConfusionMatrix2::Increment ( MLClassPtr  _knownClass,
MLClassPtr  _predClass,
kkint32  _size,
double  _probability,
RunLog _log 
)

Definition at line 365 of file ConfusionMatrix2.cpp.

371 {
372  kkint32 knownClassNum = -1;
373  kkint32 predClassNum = -1;
374 
375  if (_probability < 0)
376  _probability = 0;
377 
378  if (!_knownClass)
379  {
380  numInvalidClassesPredicted += 1.0;
381  _log.Level (-1) << endl
382  << "ConfusionMatrix2::Increment **** _knownClass = NULL ****"
383  << endl
384  << endl;
385  return;
386  }
387 
388  if (!_predClass)
389  {
390  numInvalidClassesPredicted += 1.0;
391  _log.Level (-1) << endl
392  << "ConfusionMatrix2::Increment **** _predClass = NULL ****"
393  << endl
394  << endl;
395  return;
396  }
397 
398  knownClassNum = classes.PtrToIdx (_knownClass);
399  if (knownClassNum < 0)
400  knownClassNum = AddClassToConfusionMatrix (_knownClass, _log);
401 
402  predClassNum = classes.PtrToIdx (_predClass);
403  if (predClassNum < 0)
404  predClassNum = AddClassToConfusionMatrix (_predClass, _log);
405 
406  if ((knownClassNum < 0) || (knownClassNum >= classCount))
407  {
408  numInvalidClassesPredicted += 1.0;
409  _log.Level (-1) << "ConfusionMatrix2::IncrementPredHits knownClassNum[" << knownClassNum << "] out of bounds." << endl;
410  return;
411  }
412 
413  if ((predClassNum < 0) || (predClassNum >= classCount))
414  {
415  numInvalidClassesPredicted += 1.0;
416  _log.Level (-1) << "ConfusionMatrix2::IncrementPredHits predClassNum[" << predClassNum << "] out of bounds." << endl;
417  return;
418  }
419 
420  if (knownClassNum == predClassNum)
421  correctCount += 1.0;
422 
423  totalCount += 1.0;
424 
425  totalSizesByKnownClass[knownClassNum] += _size;
426 
427  totalPredProbsByKnownClass [knownClassNum] += _probability;
428  totalPredProb += _probability;
429 
430  countsByKnownClass [knownClassNum]++;
431 
432  (predictedCountsCM [knownClassNum] [predClassNum])++;
433  totPredProbCM [knownClassNum] [predClassNum] += _probability;
434 
435 
436  if (_size > 0)
437  {
438  kkint32 bucket = (_size - 1) / bucketSize;
439  if (bucket >= numOfBuckets)
440  bucket = numOfBuckets - 1;
441 
442  countByKnownClassBySize[knownClassNum][bucket]++;
443  if (knownClassNum == predClassNum)
444  correctByKnownClassBySize [knownClassNum][bucket]++;
445  }
446  else
447  {
448  _size = -1;
449  }
450 
451 
452  {
453  kkint32 bucket = 0;
454 
455  if ((_probability >= 0.0) && (_probability <= 1.0))
456  bucket = ((kkint32)(_probability * 100) / probBucketSize);
457  else
458  bucket = 0;
459 
460  if (bucket >= numOfProbBuckets)
461  bucket = numOfProbBuckets - 1;
462 
463  countByKnownClassByProb [knownClassNum][bucket]++;
464  if (knownClassNum == predClassNum)
465  correctByKnownClassByProb [knownClassNum][bucket]++;
466  }
467 } /* Increment */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
RunLog & Level(kkint32 _level)
Definition: RunLog.cpp:220
kkint32 PtrToIdx(EntryConstPtr _entry) const
Definition: KKQueue.h:761
const MLClassList& KKMLL::ConfusionMatrix2::MLClasses ( ) const
inline
VectorDouble ConfusionMatrix2::PredictedCounts ( ) const

Definition at line 328 of file ConfusionMatrix2.cpp.

329 {
330  kkint32 knownClassIdx, predClassIdx;
331 
332  VectorDouble pc;
333  for (predClassIdx = 0; predClassIdx < classCount; predClassIdx++)
334  {
335  double predCount = 0.0;
336  for (knownClassIdx = 0; knownClassIdx < classCount; knownClassIdx++)
337  predCount += predictedCountsCM[knownClassIdx][predClassIdx];
338  pc.push_back (predCount);
339  }
340 
341  return pc;
342 } /* PredictedCounts */
__int32 kkint32
Definition: KKBaseTypes.h:88
std::vector< double > VectorDouble
Vector of doubles.
Definition: KKBaseTypes.h:148
double ConfusionMatrix2::PredictedCountsCM ( kkint32  knownClassIdx,
kkint32  predClassIdx 
) const

Definition at line 313 of file ConfusionMatrix2.cpp.

316 {
317  if ((knownClassIdx < 0) || (knownClassIdx >= classCount))
318  return 0.0;
319 
320  if ((predClassIdx < 0) || (predClassIdx >= classCount))
321  return 0.0;
322 
323  return predictedCountsCM [knownClassIdx][predClassIdx];
324 }
void ConfusionMatrix2::PrintAccuracyByProbByClassHTML ( ostream &  o)

Definition at line 1216 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), KKB::KKStr::KKStr(), KKB::KKStr::operator+(), KKB::KKStr::operator=(), and KKB::StrFormatDouble().

1218 {
1219  kkint32 bucket;
1220  kkint32 classNum;
1221 
1222  VectorDouble countByProb (numOfProbBuckets, 0.0);
1223  VectorDouble correctByProb (numOfProbBuckets, 0.0);
1224 
1225  double totalCount = 0.0;
1226  double totalCorrect = 0.0;
1227 
1228  double acc = 0.0;
1229 
1230  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1231  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1232  << " <tr>" << endl
1233  << " <th>Class<br />Names</th><th>All<br />Classes</th>";
1234 
1235  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1236  {
1237  o << "<th>" << ((bucket + 1) * probBucketSize) << "</th>";
1238  }
1239  o << " </tr>" << endl
1240  << " </thead>" << endl
1241  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1242 
1243  KKStr ln (1024);
1244  KKStr accStr;
1245 
1246  for (classNum = 0; classNum < classCount; classNum++)
1247  {
1248  double countThisClass = 0.0;
1249  double correctThisClass = 0.0;
1250 
1251  ln = "";
1252 
1253  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1254  {
1255  double count = countByKnownClassByProb [classNum][bucket];
1256  double correct = correctByKnownClassByProb [classNum][bucket];
1257 
1258  countThisClass += count;
1259  correctThisClass += correct;
1260 
1261  countByProb [bucket] += countByKnownClassByProb [classNum][bucket];
1262  correctByProb [bucket] += correctByKnownClassByProb [classNum][bucket];
1263 
1264  acc =0.0;
1265  accStr = "";
1266  if (count != 0.0)
1267  {
1268  acc = 100.0 * correct / count;
1269  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1270  }
1271 
1272  ln << "<td>" << accStr << "</td>";
1273  }
1274 
1275  totalCount += countThisClass;
1276  totalCorrect += correctThisClass;
1277 
1278  accStr = "";
1279  acc = 0.0;
1280  if (countThisClass != 0.0)
1281  {
1282  acc = 100.0 * correctThisClass / countThisClass;
1283  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1284  }
1285 
1286  o << " <tr>"
1287  << "<td style=\"text-align:left; font-family:Arial\">" + classes[classNum].Name () + "</td>"
1288  << "<td>" << accStr << "</td>"
1289  << ln
1290  << "</tr>"
1291  << endl;
1292  }
1293 
1294  {
1295  acc = 0.0;
1296  if (totalCount != 0.0)
1297  acc = 100.0 * totalCorrect / totalCount;
1298 
1299  o << " <tr>"
1300  << "<td style=\"text-align:left; font-family:Arial\">" << "Total<br />All Classes" << "</td>"
1301  << "<td>" << StrFormatDouble (acc, "ZZ0.000") << "%" << "</td>";
1302 
1303  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1304  {
1305  acc = 0.0;
1306  accStr = "";
1307  if (countByProb [bucket] != 0.0)
1308  {
1309  acc = 100.0 * correctByProb [bucket] / countByProb [bucket];
1310  accStr = StrFormatDouble (acc, "ZZ0.000") + "%";
1311  }
1312 
1313  o << "<td>" << accStr << "</td>";
1314  }
1315  o << "</tr>" << endl;
1316  }
1317 
1318  o << "</tbody>" << endl
1319  << "</table>" << endl;
1320 } /* PrintAccuracyByProbByClassHTML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
std::vector< double > VectorDouble
Vector of doubles.
Definition: KKBaseTypes.h:148
void ConfusionMatrix2::PrintConfusionMatrix ( ostream &  _outFile)

Definition at line 919 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), and KKB::KKStr::KKStr().

920 {
921  kkint32 knownClassNum;
922  kkint32 predClassNum;
923  kkint32 x;
924 
925 
926  // Lets generate Titles first
927  outFile << endl;
928 
929  double perc = 0.0;
930  if (totalCount > 0.0)
931  perc = correctCount / totalCount;
932 
933  outFile << "Overall Accuracy is "
934  << setprecision (5)
935  << (100.0 * perc) << "%"
936  << endl;
937 
938  outFile << endl;
939 
940 
941 
942  KKStr titleLine1, titleLine2, titleLine3;
943  classes.ExtractThreeTitleLines (titleLine1, titleLine2, titleLine3, 16);
944 
945 
946  outFile << setw (25) << "" << setw(16) << "" << setw (0) << titleLine1 << endl;
947  outFile << setw (25) << "" << setw(16) << "" << setw (0) << titleLine2 << endl;
948  outFile << setw (25) << "ClassName" << setw(16) << "Count" << setw (0) << titleLine3 << endl;
949 
950  outFile << setw (25) << "===========" << setw(16) << "====";
951  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
952  {
953  outFile << setw (16) << "============";
954  }
955  outFile << endl;
956 
957  double* totals = new double[classCount];
958  for (x = 0; x < classCount; x++)
959  totals[x] = 0;
960 
961 
962  double totalNonNoise = 0;
963  double totalNonNoiseRight = 0;
964 
965  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
966  {
967  bool noiseClass = classes[knownClassNum].UnDefined ();
968 
969  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
970  {
971  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
972  }
973 
974  PrintSingleLine (outFile,
975  classes [knownClassNum].Name (),
976  countsByKnownClass [knownClassNum],
977  predictedCountsCM [knownClassNum]
978  );
979 
980  if (!noiseClass)
981  {
982  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
983  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
984  }
985  }
986 
987  PrintSingleLine (outFile,
988  KKStr ("Totals"),
989  totalCount,
990  totals
991  );
992 
993  outFile << endl << endl;
994 
995  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
996  {
997  PrintPercentLine (outFile,
998  classes [knownClassNum].Name (),
999  countsByKnownClass [knownClassNum],
1000  predictedCountsCM [knownClassNum]
1001  );
1002  }
1003 
1004  outFile << endl
1005  << endl;
1006 
1007  perc = 0.0;
1008  if (totalNonNoise != 0)
1009  perc = (double)totalNonNoiseRight / (double)totalNonNoise;
1010 
1011  outFile << "Accuracy for Non Noise "
1012  << setprecision (5)
1013  << (perc * 100.0)
1014  << "%"
1015  << endl;
1016 
1017 
1018  outFile << endl << endl;
1019 
1020  delete[] totals;
1021 } /* PrintConfusionMatrix */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ExtractThreeTitleLines(KKStr &titleLine1, KKStr &titleLine2, KKStr &titleLine3) const
Using the class names create three title lines where we split names by "_" characters between the thr...
Definition: MLClass.cpp:1068
void ConfusionMatrix2::PrintConfusionMatrixAvgPredProbHTML ( ostream &  o)

Definition at line 1129 of file ConfusionMatrix2.cpp.

1131 {
1132  kkint32 knownClassNum;
1133  kkint32 predClassNum;
1134 
1135  double overallAvgPredProb = 0.0;
1136  if (totalCount != 0.0)
1137  overallAvgPredProb = 100.0 * totalPredProb / totalCount;
1138 
1139  if (numInvalidClassesPredicted > 0.0)
1140  {
1141  o << "<p style=\"font-weight:bold\">" << endl
1142  << "*********************************************************************************************<br />" << endl
1143  << "******************* WARNING WARNING WARNING WARNING *********************<br />" << endl
1144  << "******************* *********************<br />" << endl
1145  << "******************* There were invalid classes specified that were *********************<br />" << endl
1146  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************<br />" << endl
1147  << "*********************************************************************************************<br />" << endl
1148  << "</p>" << endl
1149  << "<br />" << endl;
1150  }
1151 
1152 
1153  o << "Overall AvgPredProb: "
1154  << StrFormatDouble (overallAvgPredProb, "ZZZ0.000") << "%"
1155  << endl;
1156 
1157 
1158  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1159  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1160  << " <tr>" << endl
1161  << " <th>Class<br />Names</th><th>Count</th>" << classes.ExtractHTMLTableHeader () << endl
1162  << " </tr>" << endl
1163  << " </thead>" << endl
1164  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1165 
1166  double* totalPredProbByPredClass = new double[classCount];
1167  double* totalCountsByPredClass = new double[classCount];
1168  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1169  {
1170  totalPredProbByPredClass [predClassNum] = 0.0;
1171  totalCountsByPredClass [predClassNum] = 0.0;
1172  }
1173 
1174  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1175  {
1176  PrintAvgPredProbLineHTML (o,
1177  classes [knownClassNum].Name (),
1178  totalPredProbsByKnownClass [knownClassNum],
1179  countsByKnownClass [knownClassNum],
1180  knownClassNum,
1181  totPredProbCM [knownClassNum],
1182  predictedCountsCM [knownClassNum]
1183  );
1184  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1185  {
1186  totalPredProbByPredClass [predClassNum] += totPredProbCM [knownClassNum][predClassNum];
1187  totalCountsByPredClass [predClassNum] += predictedCountsCM [knownClassNum][predClassNum];
1188  }
1189  }
1190 
1191  o << "<tr><td colspan=\"" << (classCount + 2) << "\">&nbsp</td></tr>" << endl;
1192 
1193  PrintAvgPredProbLineHTML (o,
1194  "AllClasses",
1195  totalPredProb,
1196  totalCount,
1197  knownClassNum,
1198  totalPredProbByPredClass,
1199  totalCountsByPredClass
1200  );
1201 
1202  o << "</tbody>" << endl
1203  << "</table>" << endl;
1204 
1205  delete[] totalCountsByPredClass;
1206  delete[] totalPredProbByPredClass;
1207 } /* PrintConfusionMatrixHTML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr ExtractHTMLTableHeader() const
Will generate a HTML formatted string that can be used in a HTML table.
Definition: MLClass.cpp:1195
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::PrintConfusionMatrixHTML ( const char *  title,
ostream &  file 
)

Definition at line 2641 of file ConfusionMatrix2.cpp.

2644 {
2645  kkint32 knownClassNum;
2646  kkint32 predClassNum;
2647  kkint32 x;
2648 
2649  // generate html preamble
2650  file << "<html>" << endl;
2651  file << "<head>" << endl;
2652  file << "<title>" << title << "</title>" << endl;
2653  file << "<body bgcolor=\"white\">" << endl;
2654 
2655  // generate the title
2656  file << "<h1>" << title << "</h1>" << endl;
2657 
2658  // generate the accuracy statement
2659  file << "<p><b>Overall Accuracy</b> is "
2660  << setprecision (5)
2661  << (100.0 * correctCount / totalCount) << "%"
2662  << "</p>" << endl;
2663 
2664  /***************************************************************************
2665  * generate the table with the counts
2666  ***************************************************************************/
2667  file << "<table cellpadding=\"2\" cellspacing=\"0\" border=\"2\">" << endl;
2668  file << "<tr>" << endl;
2669 
2670  // output the first row (which is class names)
2671  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Class Names</b></th>" << endl;
2672  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2673  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2674  {
2675  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2676  }
2677  file << "</tr>" << endl;
2678 
2679  double *totals = new double[classCount];
2680  for (x = 0; x < classCount; x++)
2681  {
2682  totals[x] = 0;
2683  }
2684 
2685  double totalNonNoise = 0;
2686  double totalNonNoiseRight = 0;
2687 
2688  // output the data rows
2689  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2690  {
2691  bool noiseClass = classes[knownClassNum].UnDefined();
2692  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2693  {
2694  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
2695  }
2696 
2697  file << "<tr>" << endl;
2698  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes [knownClassNum].Name() << "</b></th>" << endl;
2699  file << "<td align=\"center\" bgcolor=\"#EFEFEF\">" << countsByKnownClass [knownClassNum] << "</td>" << endl;
2700 
2701  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2702  {
2703  if (predClassNum == knownClassNum)
2704  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">";
2705  else
2706  file << "<td align=\"center\">";
2707  file << predictedCountsCM[knownClassNum][predClassNum];
2708  file << "</td>" << endl;
2709  }
2710  file << "</tr>" << endl;
2711  if (!noiseClass)
2712  {
2713  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
2714  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
2715  }
2716  }
2717 
2718  // output the totals line for the first table
2719  file << "<tr>" << endl;
2720  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2721  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">" << totalCount << "</b></th>" << endl;
2722 
2723  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2724  {
2725  file << "<td align=\"center\">";
2726  file << totals[predClassNum];
2727  file << "</td>" << endl;
2728  }
2729  file << "</tr>" << endl;
2730  file << "</table>" << endl;
2731 
2732  /***************************************************************************
2733  * generate the table with the percents
2734  ***************************************************************************/
2735  file << "<br/>" << endl;
2736  file << "<p><b>Accuracy for Non Noise</b> "
2737  << setprecision (5)
2738  << (((double)totalNonNoiseRight / (double)totalNonNoise) * 100.0)
2739  << "%</p>"
2740  << endl;
2741  file << "<table cellpadding=\"2\" cellspacing=\"0\" border=\"2\">" << endl;
2742  file << "<tr>" << endl;
2743 
2744  // output the first row (which is class names)
2745  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Class Names</b></th>" << endl;
2746  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>Totals</b></th>" << endl;
2747  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2748  {
2749  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2750  }
2751  file << "</tr>" << endl;
2752 
2753  // output the data rows
2754  double perc=0.0;
2755  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
2756  {
2757  file << "<tr>" << endl;
2758  file << "<th align=\"center\" bgcolor=\"#CCCCCC\"><b>" << classes[knownClassNum].Name() << "</b></th>" << endl;
2759  file << "<td align=\"center\" bgcolor=\"#EFEFEF\">" << setprecision (4) << (countsByKnownClass [knownClassNum]/totalCount*100) << "</td>" << endl;
2760 
2761  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
2762  {
2763  if (predClassNum == knownClassNum)
2764  file << "<td align=\"center\" bgcolor=\"#EEEEEE\">";
2765  else
2766  file << "<td align=\"center\">";
2767  if (countsByKnownClass [knownClassNum] <= 0)
2768  perc = 0.0;
2769  else
2770  perc = predictedCountsCM [knownClassNum][predClassNum] / countsByKnownClass [knownClassNum] * 100.0;
2771 
2772  file << setprecision (4) << perc;
2773  file << "</td>" << endl;
2774  }
2775  file << "</tr>" << endl;
2776  }
2777  file << "</table>" << endl;
2778 
2779  file << "</body>" << endl;
2780  file << "</html>" << endl;
2781 
2782  delete[] totals;
2783 } /* PrintConfusionMatrixHTML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintConfusionMatrixHTML ( ostream &  outFile)

Definition at line 1025 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), and KKB::KKStr::KKStr().

1026 {
1027  kkint32 knownClassNum;
1028  kkint32 predClassNum;
1029  kkint32 x;
1030 
1031 
1032  double overallAccuracy = 0.0;
1033  if (totalCount != 0.0)
1034  overallAccuracy = 100.0 * correctCount / totalCount;
1035 
1036  if (numInvalidClassesPredicted > 0.0)
1037  {
1038  o << "<p style=\"font-weight:bold\">" << endl
1039  << "*********************************************************************************************<br />" << endl
1040  << "******************* WARNING WARNING WARNING WARNING *********************<br />" << endl
1041  << "******************* *********************<br />" << endl
1042  << "******************* There were invalid classes specified that were *********************<br />" << endl
1043  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************<br />" << endl
1044  << "*********************************************************************************************<br />" << endl
1045  << "</p>" << endl
1046  << "<br />" << endl;
1047  }
1048 
1049 
1050  o << "Overall Accuracy: "
1051  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "%"
1052  << endl;
1053 
1054 
1055  o << "<table align=\"center\" border=\"2\" cellpadding=\"3\" cellspacing=\"0\" frame=\"box\" summary=\"Confusion \" >" << endl
1056  << " <thead style=\"font-weight:bold; text-align:center; vertical-align:bottom\">" << endl
1057  << " <tr>" << endl
1058  << " <th>Class<br />Names</th><th>Count</th>" << classes.ExtractHTMLTableHeader () << endl
1059  << " </tr>" << endl
1060  << " </thead>" << endl
1061  << " <tbody style=\"font-weight:normal; text-align:right; font-family:Courier\">" << endl;
1062 
1063  double* totals = new double[classCount];
1064  for (x = 0; x < classCount; x++)
1065  totals[x] = 0;
1066 
1067  double totalNonNoise = 0;
1068  double totalNonNoiseRight = 0;
1069 
1070  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1071  {
1072  bool noiseClass = classes[knownClassNum].UnDefined ();
1073 
1074  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1075  {
1076  totals[predClassNum] += predictedCountsCM [knownClassNum] [predClassNum];
1077  }
1078 
1079  PrintSingleLineHTML (o,
1080  classes [knownClassNum].Name (),
1081  countsByKnownClass [knownClassNum],
1082  knownClassNum,
1083  predictedCountsCM [knownClassNum]
1084  );
1085  if (!noiseClass)
1086  {
1087  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1088  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1089  }
1090 
1091  }
1092 
1093  PrintSingleLineHTML (o,
1094  KKStr ("Totals"),
1095  totalCount,
1096  -1,
1097  totals
1098  );
1099 
1100  o << "<tr><td colspan=\"" << (classCount + 2) << "\">&nbsp</td></tr>" << endl;
1101 
1102  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1103  {
1104  PrintPercentLineHTML (o,
1105  classes [knownClassNum].Name (),
1106  countsByKnownClass [knownClassNum],
1107  knownClassNum,
1108  predictedCountsCM [knownClassNum]
1109  );
1110  }
1111 
1112  o << "</tbody>" << endl
1113  << "</table>" << endl;
1114 
1115  double nonNoiseAccuracy = 0.0;
1116  if (totalNonNoise != 0)
1117  nonNoiseAccuracy = 100.0 * (double)totalNonNoiseRight / (double)totalNonNoise;
1118 
1119  o << "Non Noise Accuracy: " << StrFormatDouble (nonNoiseAccuracy, "ZZ0.000") << "%" << "<br />" << endl;
1120 
1121  delete [] totals;
1122 } /* PrintConfusionMatrixHTML */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr ExtractHTMLTableHeader() const
Will generate a HTML formatted string that can be used in a HTML table.
Definition: MLClass.cpp:1195
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::PrintConfusionMatrixLatexTable ( ostream &  outFile)

Definition at line 1464 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), and KKB::KKStr::KKStr().

1465 {
1466  kkint32 knownClassNum;
1467  kkint32 predClassNum;
1468  kkint32 x;
1469 
1470  double overallAccuracy = 0.0;
1471  if (totalCount != 0.0)
1472  overallAccuracy = 100.0 * correctCount / totalCount;
1473 
1474  outFile << "Overall Accuracy:\t"
1475  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "\\%\\\\"
1476  << endl;
1477 
1478  PrintLatexTableColumnHeaders (outFile);
1479 
1480 
1481  double* totals = new double[classCount];
1482  for (x = 0; x < classCount; x++)
1483  totals[x] = 0;
1484 
1485 
1486  double totalNonNoise = 0;
1487  double totalNonNoiseRight = 0;
1488 
1489  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1490  {
1491  bool noiseClass = classes[knownClassNum].UnDefined ();
1492 
1493  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1494  {
1495  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
1496  }
1497 
1498  PrintSingleLineLatexTable (outFile,
1499  knownClassNum,
1500  classes [knownClassNum].Name (),
1501  countsByKnownClass [knownClassNum],
1502  predictedCountsCM [knownClassNum]
1503  );
1504 
1505  outFile << "\\hline" << endl;
1506 
1507  if (!noiseClass)
1508  {
1509  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1510  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1511  }
1512 
1513  }
1514 
1515  outFile << "\\hline" << endl;
1516 
1517  PrintSingleLineLatexTable (outFile,
1518  -1,
1519  KKStr ("Totals"),
1520  totalCount,
1521  totals
1522  );
1523 
1524  outFile << "\\hline" << endl;
1525 
1526  outFile << "\\end{tabular}" << endl;
1527 
1528 
1529  outFile << endl
1530  << "\\vspace{16pt}" << endl
1531  << endl;
1532 
1533 
1534  PrintLatexTableColumnHeaders (outFile);
1535 
1536  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1537  {
1538  PrintPercentLineLatexTable (outFile,
1539  knownClassNum,
1540  classes [knownClassNum].Name (),
1541  countsByKnownClass [knownClassNum],
1542  predictedCountsCM [knownClassNum]
1543  );
1544  outFile << "\\hline" << endl;
1545  }
1546 
1547  outFile << "\\end{tabular}" << endl;
1548 
1549  outFile << endl;
1550 
1551  delete[] totals;
1552 } /* PrintConfusionMatrixLatexTable */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::PrintConfusionMatrixNarrow ( ostream &  outFile)

Definition at line 1560 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), KKB::KKStr::KKStr(), KKB::KKStr::LeftPad(), and KKB::operator<<().

1561 {
1562  kkint32 knownClassNum;
1563  kkint32 predClassNum;
1564  kkint32 x;
1565 
1566 
1567  // Lets generate Titles first
1568  outFile << endl;
1569 
1570  double perc = 0.0;
1571  if (totalCount != 0)
1572  perc = correctCount / totalCount;
1573 
1574  outFile << "Overall Accuracy is "
1575  << setprecision (5)
1576  << (100.0 * perc) << "%"
1577  << endl;
1578 
1579  outFile << endl;
1580 
1581 
1582  outFile << setw (25) << "Class Names";
1583 
1584  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1585  {
1586  KKStr colDesc ("Col");
1587  colDesc << (knownClassNum + 1);
1588 
1589  colDesc.LeftPad (6);
1590 
1591  outFile << colDesc;
1592  }
1593  outFile << endl;
1594 
1595  outFile << setw (25) << "===========";
1596  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1597  {
1598  outFile << setw (6) << "====";
1599  }
1600  outFile << endl;
1601 
1602  double* totals = new double[classCount];
1603  for (x = 0; x < classCount; x++)
1604  totals[x] = 0;
1605 
1606 
1607  double totalNonNoise = 0;
1608  double totalNonNoiseRight = 0;
1609 
1610  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1611  {
1612  bool noiseClass = classes[knownClassNum].UnDefined ();
1613 
1614  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1615  {
1616  totals[predClassNum] += predictedCountsCM [knownClassNum] [predClassNum];
1617  }
1618 
1619  PrintSingleLineShort (outFile,
1620  classes [knownClassNum].Name (),
1621  countsByKnownClass [knownClassNum],
1622  predictedCountsCM [knownClassNum]
1623  );
1624  if (!noiseClass)
1625  {
1626  totalNonNoise += countsByKnownClass [knownClassNum];
1627  totalNonNoiseRight += predictedCountsCM [knownClassNum] [knownClassNum];
1628  }
1629 
1630  }
1631 
1632  PrintSingleLineShort (outFile,
1633  KKStr ("Totals"),
1634  totalCount,
1635  totals
1636  );
1637 
1638  outFile << endl << endl;
1639 
1640  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1641  {
1642  PrintPercentLineShort (outFile,
1643  classes [knownClassNum].Name (),
1644  countsByKnownClass [knownClassNum],
1645  predictedCountsCM [knownClassNum]
1646  );
1647  }
1648 
1649  outFile << endl
1650  << endl;
1651 
1652 
1653  perc = 0.0;
1654  if (totalNonNoise != 0)
1655  perc = (double)totalNonNoiseRight / (double)totalNonNoise;
1656 
1657  outFile << "Accuracy for Non Noise "
1658  << setprecision (5)
1659  << (perc * 100.0)
1660  << "%"
1661  << endl;
1662 
1663  outFile << endl << endl;
1664 
1665  delete[] totals;
1666 } /* PrintCrossValidationNarrow */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintConfusionMatrixTabDelimited ( ostream &  outFile)

Definition at line 1328 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat(), and KKB::KKStr::KKStr().

1329 {
1330  kkint32 knownClassNum;
1331  kkint32 predClassNum;
1332  kkint32 x;
1333 
1334  // Lets generate Titles first
1335  outFile << endl;
1336 
1337  double overallAccuracy = 0.0;
1338  if (totalCount != 0.0)
1339  overallAccuracy = 100.0 * correctCount / totalCount;
1340 
1341  if (numInvalidClassesPredicted > 0.0)
1342  {
1343  outFile << endl
1344  << "*********************************************************************************************" << endl
1345  << "******************* WARNING WARNING WARNING WARNING *********************" << endl
1346  << "******************* *********************" << endl
1347  << "******************* There were invalid classes specified that were *********************" << endl
1348  << "******************* not counted. numInvalidClassesPredicted[" << numInvalidClassesPredicted << "] *********************" << endl
1349  << "*********************************************************************************************" << endl
1350  << endl;
1351  }
1352 
1353 
1354  outFile << "Overall Accuracy:\t"
1355  << StrFormatDouble (overallAccuracy, "ZZZ0.000") << "%"
1356  << endl;
1357 
1358  outFile << endl;
1359 
1360 
1361 
1362  KKStr titleLine1, titleLine2, titleLine3;
1363  classes.ExtractThreeTitleLines (titleLine1, titleLine2, titleLine3);
1364 
1365  outFile << "" << "\t" << "" << "\t" << titleLine1 << endl;
1366  outFile << "" << "\t" << "" << "\t" << titleLine2 << endl;
1367  outFile << "Class_Names" << "\t" << "Count" << "\t" << titleLine3 << endl;
1368 
1369  double* totals = new double[classCount];
1370  for (x = 0; x < classCount; x++)
1371  totals[x] = 0;
1372 
1373  double totalNonNoise = 0;
1374  double totalNonNoiseRight = 0;
1375 
1376  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1377  {
1378  bool noiseClass = classes[knownClassNum].UnDefined ();
1379 
1380  for (predClassNum = 0; predClassNum < classCount; predClassNum++)
1381  {
1382  totals[predClassNum] += predictedCountsCM[knownClassNum] [predClassNum];
1383  }
1384 
1385  PrintSingleLineTabDelimited (outFile,
1386  classes [knownClassNum].Name (),
1387  countsByKnownClass [knownClassNum],
1388  predictedCountsCM [knownClassNum]
1389  );
1390  if (!noiseClass)
1391  {
1392  totalNonNoise = totalNonNoise + countsByKnownClass [knownClassNum];
1393  totalNonNoiseRight = totalNonNoiseRight + predictedCountsCM [knownClassNum] [knownClassNum];
1394  }
1395  }
1396 
1397  PrintSingleLineTabDelimited (outFile,
1398  KKStr ("Totals"),
1399  totalCount,
1400  totals
1401  );
1402 
1403  outFile << endl << endl;
1404 
1405  for (knownClassNum = 0; knownClassNum < classCount; knownClassNum++)
1406  {
1407  PrintPercentLineTabDelimited (outFile,
1408  classes [knownClassNum].Name (),
1409  countsByKnownClass [knownClassNum],
1410  predictedCountsCM [knownClassNum]
1411  );
1412  }
1413 
1414 
1415  double nonNoiseAccuracy = 0.0;
1416  if (totalNonNoise != 0)
1417  nonNoiseAccuracy = 100.0 * (double)totalNonNoiseRight / (double)totalNonNoise;
1418 
1419  outFile << endl
1420  << endl;
1421 
1422  outFile << "Non Noise Accuracy:" << "\t" << StrFormatDouble (nonNoiseAccuracy, "ZZ0.000") << "%" << endl;
1423 
1424 
1425  outFile << endl << endl;
1426 
1427  delete [] totals;
1428 } /* PrintConfusionMatrixTabDelimited */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ExtractThreeTitleLines(KKStr &titleLine1, KKStr &titleLine2, KKStr &titleLine3) const
Using the class names create three title lines where we split names by "_" characters between the thr...
Definition: MLClass.cpp:1068
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::PrintErrorByProb ( ostream &  outFile)

Definition at line 1934 of file ConfusionMatrix2.cpp.

1935 {
1936  kkint32 bucket;
1937  kkint32 classNum;
1938 
1939  outFile << endl;
1940 
1941  outFile << "Prob" << "\t";
1942 
1943  // Lets first Print Titles.
1944  for (classNum = 0; classNum < classCount; classNum++)
1945  {
1946  outFile << "\t\t";
1947  outFile << classes[classNum].Name ();
1948  }
1949 
1950  outFile << endl;
1951 
1952 
1953  outFile << setiosflags (ios::fixed);
1954 
1955  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
1956  {
1957  outFile << ((bucket + 1) * probBucketSize) << "%\t";
1958 
1959  kkint32 classNum;
1960 
1961  for (classNum = 0; classNum < classCount; classNum++)
1962  {
1963  double perc;
1964 
1965  double count = countByKnownClassByProb [classNum][bucket];
1966  double correct = correctByKnownClassByProb [classNum][bucket];
1967 
1968  if (count > 0)
1969  perc = 100.0 * (double)correct / (double)count;
1970  else
1971  perc = 0.0;
1972 
1973  outFile << count << "\t" << correct << "\t" << setprecision (1) << perc << "% \t";
1974  }
1975 
1976  outFile << endl;
1977  }
1978 
1979  outFile << endl;
1980 } /* PrintErrorByProb */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintErrorByProbByRows ( ostream &  outFile)

Definition at line 2083 of file ConfusionMatrix2.cpp.

2084 {
2085  kkint32 bucket;
2086  kkint32 classNum;
2087 
2088  kkint32 x;
2089 
2090  double* totalCount = new double[numOfProbBuckets];
2091  double* totalCorrect = new double[numOfProbBuckets];
2092 
2093  for (x = 0; x < numOfProbBuckets; x++)
2094  {
2095  totalCount [x] = 0;
2096  totalCorrect [x] = 0;
2097  }
2098 
2099  outFile << "ClassName\tAvg Prob\t";
2100  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2101  {
2102  outFile << ((bucket + 1) * probBucketSize) << "%\t";
2103  }
2104 
2105  outFile << endl
2106  << endl;
2107 
2108  for (classNum = 0; classNum < classCount; classNum++)
2109  {
2110  double avg;
2111  if (countsByKnownClass [classNum] != 0)
2112  avg = 100.0 * totalPredProbsByKnownClass [classNum] / countsByKnownClass [classNum];
2113  else
2114  avg = 0;
2115 
2116  outFile << classes[classNum].Name ()
2117  << "\t"
2118  << avg << "%";
2119 
2120 
2121  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2122  {
2123  outFile << "\t" << countByKnownClassByProb [classNum][bucket];
2124  totalCount[bucket] = totalCount[bucket] + countByKnownClassByProb [classNum][bucket];
2125  }
2126  outFile << endl;
2127 
2128 
2129 
2130  outFile << classes[classNum].Name () << " Correct" << "\t";
2131  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2132  {
2133  outFile << "\t" << correctByKnownClassByProb [classNum][bucket];
2134  totalCorrect [bucket] = totalCorrect [bucket] + correctByKnownClassByProb [classNum][bucket];
2135  }
2136  outFile << endl;
2137 
2138 
2139  outFile << "Accuracy" << "\t";
2140  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2141  {
2142  double perc;
2143 
2144  if (countByKnownClassByProb [classNum][bucket] <= 0.0)
2145  perc = 0.0;
2146  else
2147  perc = 100.0 * correctByKnownClassByProb [classNum][bucket] / countByKnownClassByProb [classNum][bucket];
2148 
2149  outFile << "\t" << perc << "%";
2150  }
2151  outFile << endl;
2152 
2153  outFile << endl;
2154  }
2155 
2156  outFile << endl;
2157 
2158  outFile << "Total" << "\t";
2159  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2160  {
2161  outFile << "\t" << totalCount[bucket];
2162  }
2163 
2164  outFile << endl;
2165 
2166  outFile << "Correct" << "\t";
2167  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2168  {
2169  outFile << "\t" << totalCorrect[bucket];
2170  }
2171  outFile << endl;
2172 
2173  outFile << "Accuracy" << "\t";
2174  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2175  {
2176  double perc;
2177 
2178  if (totalCount[bucket] <= 0)
2179  perc = 0.0;
2180  else
2181  perc = 100.0 * totalCorrect[bucket] / totalCount[bucket];
2182 
2183  outFile << "\t" << perc << "%";
2184  }
2185 
2186  delete[] totalCount; totalCount = NULL;
2187  delete[] totalCorrect; totalCorrect = NULL;
2188 
2189  outFile << endl;
2190 
2191  return;
2192 } /* PrintErrorByProbByRows */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintErrorBySize ( ostream &  outFile)

Definition at line 1894 of file ConfusionMatrix2.cpp.

1895 {
1896  kkint32 bucket;
1897  kkint32 classNum;
1898 
1899  outFile << endl;
1900 
1901  outFile << "Size" << "\t";
1902 
1903  // Lets first Print Titles.
1904  for (classNum = 0; classNum < classCount; classNum++)
1905  {
1906  outFile << "\t\t";
1907  outFile << classes[classNum].Name ();
1908  }
1909  outFile << endl;
1910 
1911 
1912 
1913  for (bucket = 0; bucket < numOfBuckets; bucket++)
1914  {
1915  outFile << ((bucket + 1) * bucketSize) << "\t";
1916 
1917  kkint32 classNum;
1918 
1919  for (classNum = 0; classNum < classCount; classNum++)
1920  {
1921  outFile << countByKnownClassBySize [classNum][bucket] << "\t"
1922  << correctByKnownClassBySize [classNum][bucket] << "\t";
1923  }
1924 
1925  outFile << endl;
1926  }
1927 
1928  outFile << endl;
1929 } /* PrintErrorBySize */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintErrorBySizeByRows ( ostream &  outFile)

Definition at line 1986 of file ConfusionMatrix2.cpp.

1987 {
1988  kkint32 bucket;
1989  kkint32 classNum;
1990 
1991 
1992  outFile << endl
1993  << "Error by size" << endl
1994  << endl;
1995 
1996  outFile << "ClassName\tAvg Size\t";
1997  for (bucket = 0; bucket < numOfBuckets; bucket++)
1998  {
1999  outFile << ((bucket + 1) * bucketSize) << "\t";
2000  }
2001 
2002  outFile << endl
2003  << endl;
2004 
2005  for (classNum = 0; classNum < classCount; classNum++)
2006  {
2007  double avg;
2008  if (countsByKnownClass [classNum] != 0)
2009  avg = totalSizesByKnownClass[classNum] / countsByKnownClass [classNum];
2010  else
2011  avg = 0;
2012 
2013  outFile << classes[classNum].Name ()
2014  << "\t"
2015  << avg;
2016 
2017 
2018  for (bucket = 0; bucket < numOfBuckets; bucket++)
2019  {
2020  outFile << "\t" << countByKnownClassBySize[classNum][bucket];
2021  }
2022  outFile << endl;
2023 
2024 
2025  outFile << classes[classNum].Name () << " Correct" << "\t";
2026  for (bucket = 0; bucket < numOfBuckets; bucket++)
2027  {
2028  outFile << "\t" << correctByKnownClassBySize [classNum][bucket];
2029  }
2030 
2031  outFile << endl;
2032  outFile << endl;
2033  }
2034  outFile << endl;
2035 
2036 
2037  outFile << endl
2038  << endl;
2039 
2040  outFile << "ClassName\tAvg Size\t";
2041  for (bucket = 0; bucket < numOfBuckets; bucket++)
2042  {
2043  outFile << ((bucket + 1) * bucketSize) << "\t";
2044  }
2045 
2046  outFile << endl
2047  << endl;
2048 
2049  for (classNum = 0; classNum < classCount; classNum++)
2050  {
2051  double avg;
2052  if (countsByKnownClass [classNum] != 0)
2053  avg = totalSizesByKnownClass[classNum] / countsByKnownClass [classNum];
2054  else
2055  avg = 0;
2056 
2057  outFile << classes[classNum].Name ()
2058  << "\t"
2059  << avg;
2060 
2061 
2062  for (bucket = 0; bucket < numOfBuckets; bucket++)
2063  {
2064  float a = 0.0f;
2065  if (countByKnownClassBySize[classNum][bucket] != 0)
2066  a = (float)(correctByKnownClassBySize [classNum][bucket]) / (float)(countByKnownClassBySize[classNum][bucket]);
2067 
2068  outFile << "\t" << a;
2069  }
2070  outFile << endl;
2071 
2072  outFile << endl;
2073  outFile << endl;
2074  }
2075 
2076  outFile << endl;
2077 } /* PrintErrorBySizeByRows */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintErrorBySizeReduced ( ostream &  outFile)

Definition at line 2335 of file ConfusionMatrix2.cpp.

2336 {
2337  kkint32 classNum;
2338 
2339  outFile << endl;
2340  outFile << endl;
2341  outFile << endl;
2342 
2343  for (classNum = 0; classNum < classCount; classNum++)
2344  PrintErrorBySizeRowReduced (outFile, classNum);
2345 
2346 } /* PrintErrorBySizeReduced */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintProbDistributionTitle ( ostream &  outFile)

Definition at line 2198 of file ConfusionMatrix2.cpp.

2199 {
2200  kkint32 bucket;
2201 
2202  outFile << "Total";
2203  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2204  {
2205  outFile << "\t" << ((bucket + 1) * probBucketSize) << "%";
2206  }
2207 
2208  outFile << endl;
2209 } /* PrintPronDistributionTitle */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintProbDistributionTotalCount ( ostream &  outFile)

Definition at line 2216 of file ConfusionMatrix2.cpp.

2217 {
2218  kkint32 bucket;
2219  kkint32 classNum;
2220 
2221  double* count = new double[numOfProbBuckets];
2222 
2223  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2224  {
2225  count [bucket] = 0;
2226  }
2227 
2228 
2229  double total = 0;
2230 
2231  for (classNum = 0; classNum < classCount; classNum++)
2232  {
2233  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2234  {
2235  count[bucket] = count[bucket] + countByKnownClassByProb [classNum][bucket];
2236  }
2237  }
2238 
2239 
2240  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2241  {
2242  total = total + count[bucket];
2243  }
2244 
2245 
2246  outFile << total;
2247  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2248  {
2249  outFile << "\t" << count[bucket];
2250  }
2251 
2252  outFile << endl;
2253 
2254  delete[] count;
2255 
2256  return;
2257 } /* PrintProbDistributionTotalCount */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintProbDistributionTotalError ( ostream &  outFile)

Definition at line 2263 of file ConfusionMatrix2.cpp.

2264 {
2265  kkint32 bucket;
2266  kkint32 classNum;
2267 
2268  double* count = new double[numOfProbBuckets];
2269  double* correct = new double[numOfProbBuckets];
2270 
2271  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2272  {
2273  count [bucket] = 0;
2274  correct [bucket] = 0;
2275  }
2276 
2277 
2278  for (classNum = 0; classNum < classCount; classNum++)
2279  {
2280  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2281  {
2282  count [bucket] = count [bucket] + countByKnownClassByProb [classNum][bucket];
2283  correct[bucket] = correct[bucket] + correctByKnownClassByProb [classNum][bucket];
2284  }
2285  }
2286 
2287 
2288  double overallAccuracy = 0.0;
2289 
2290  {
2291  double totalCount = 0;
2292  double totalCorrect = 0;
2293 
2294  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2295  {
2296  totalCount = totalCount + count[bucket];
2297  totalCorrect = totalCorrect + correct[bucket];
2298  }
2299 
2300  if (totalCount == 0)
2301  {
2302  overallAccuracy = 0.0;
2303  }
2304  else
2305  {
2306  overallAccuracy = 100.0 * (double)totalCorrect / (double)totalCount;
2307  }
2308  }
2309 
2310  outFile << setprecision (2);
2311 
2312  outFile << overallAccuracy << "%";
2313 
2314  for (bucket = 0; bucket < numOfProbBuckets; bucket++)
2315  {
2316  double accuracy = 0.0;
2317 
2318  if (count[bucket] > 0)
2319  accuracy = 100.0 * (double)correct[bucket] / (double)count[bucket];
2320 
2321  outFile << "\t" << accuracy << "%";
2322  }
2323 
2324  outFile << endl;
2325 
2326  delete[] count; count = NULL;
2327  delete[] correct; correct = NULL;
2328 } /* PrintProbDistributionTotalError */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
void ConfusionMatrix2::PrintTrueFalsePositivesTabDelimited ( ostream &  outFile)

Definition at line 1682 of file ConfusionMatrix2.cpp.

References KKB::KKStr::Concat().

1683 {
1684  kkint32 numOfClasses = classes.QueueSize ();
1685  kkint32 x = 0;
1686  kkint32 y = 0;
1687 
1688  // Refer to http://www.medcalc.be/manual/mpage06-13a.php for definitions.
1689  // First we calc TruePositives, FasePositives, TrueNegatives, FalseNegatives
1690  double* falsePositives = new double[numOfClasses];
1691  double* falseNegatives = new double[numOfClasses];
1692  double* truePositives = new double[numOfClasses];
1693  double* trueNegatives = new double[numOfClasses];
1694 
1695  double totalTP = 0.0;
1696  double totalFP = 0.0;
1697  double totalTN = 0.0;
1698  double totalFN = 0.0;
1699 
1700 
1701  for (x = 0; x < numOfClasses; x++)
1702  {
1703  truePositives [x] = predictedCountsCM [x][x];
1704  totalTP += predictedCountsCM[x][x];
1705 
1706  trueNegatives [x] = 0.0;
1707  falsePositives[x] = 0.0;
1708  falseNegatives[x] = 0.0;
1709 
1710  for (y = 0; y < numOfClasses; y++)
1711  {
1712  if (y != x)
1713  {
1714  falsePositives[x] += predictedCountsCM [y][x]; // Was classified as x but was classed as x.
1715  totalFP += predictedCountsCM [y][x];
1716 
1717  falseNegatives[x] += predictedCountsCM [x][y]; // Should have been classed as x not y.
1718  totalFN += predictedCountsCM [x][y];
1719 
1720  trueNegatives [x] += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1721  totalTN += (countsByKnownClass [y] - predictedCountsCM [y][x]);
1722  }
1723  }
1724  }
1725 
1726 
1727  KKStr titleLine1, titleLine2;
1728  classes.ExtractTwoTitleLines (titleLine1, titleLine2);
1729 
1730  r << "\t" << "" "\t" << titleLine1 << endl;
1731  r << "\t" << "Total" << "\t" << titleLine2 << endl;
1732 
1733  r << "TruePositives" << "\t" << totalTP;
1734  for (x = 0; x < numOfClasses; x++)
1735  {
1736  r << "\t" << truePositives[x];
1737  }
1738  r << endl;
1739 
1740  r << "FalsePositives" << "\t" << totalFP;
1741  for (x = 0; x < numOfClasses; x++)
1742  {
1743  r << "\t" << falsePositives[x];
1744  }
1745  r << endl;
1746 
1747  r << "TrueNegatives" << "\t" << totalTN;
1748  for (x = 0; x < numOfClasses; x++)
1749  {
1750  r << "\t" << trueNegatives[x];
1751  }
1752  r << endl;
1753 
1754  r << "FalseNegatives" << "\t" << totalFN;
1755  for (x = 0; x < numOfClasses; x++)
1756  {
1757  r << "\t" << falseNegatives[x];
1758  }
1759  r << endl;
1760 
1761  r << endl;
1762  r << "Sensitivity(TP/(TP+FN))" << "\t" << StrFormatDouble(PercentOf (totalTP, totalFN), "zzz,zz0.00") << "%";
1763  for (x = 0; x < numOfClasses; x++)
1764  {
1765  r << "\t" << StrFormatDouble(PercentOf (truePositives[x], falseNegatives[x]), "zzz,zz0.00") << "%";
1766  }
1767  r << endl;
1768 
1769  r << "Specificity(TN/(TN+FP))" << "\t" << StrFormatDouble(PercentOf (totalTN, totalFP), "zzz,zz0.00") << "%";
1770  for (x = 0; x < numOfClasses; x++)
1771  {
1772  r << "\t" << StrFormatDouble(PercentOf (trueNegatives[x], falsePositives[x]), "zzz,zz0.00") << "%";
1773  }
1774  r << endl;
1775 
1776  r << "PositivePredictiveValue(TP/(TP+FP))" << "\t" << StrFormatDouble(PercentOf (totalTP, totalFP), "zzz,zz0.00") << "%";
1777  for (x = 0; x < numOfClasses; x++)
1778  {
1779  r << "\t" << StrFormatDouble(PercentOf (truePositives[x], falsePositives[x]), "zzz,zz0.00") << "%";
1780  }
1781  r << endl;
1782 
1783  {
1784  double fMeasure = 0.0;
1785  double divisor = 2.0 * (double)totalTP + (double)totalFP + (double)totalFN;
1786  if (divisor != 0.0)
1787  fMeasure = 100.0 * (2.0 * (double)totalTP / divisor);
1788 
1789  r << "F-Measure(2*TP/(2*TP + FP + FN))" << "\t" << StrFormatDouble(fMeasure, "zzz,zz0.00") << "%";
1790  for (x = 0; x < numOfClasses; x++)
1791  {
1792  fMeasure = 0.0;
1793  divisor = 2.0 * (double)truePositives[x] + (double)falsePositives[x] + (double)falseNegatives[x];
1794  if (divisor != 0.0)
1795  fMeasure = 100.0 * (2.0 * (double)truePositives[x] / divisor);
1796 
1797  r << "\t" << StrFormatDouble(fMeasure, "zzz,zz0.00") << "%";
1798  }
1799  r << endl;
1800  }
1801 
1802  delete[] falseNegatives;
1803  delete[] falsePositives;
1804  delete[] trueNegatives;
1805  delete[] truePositives;
1806 } /* PrintTrueFalsePositivesTabDelimited */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
double PercentOf(double x, double y)
__int32 kkint32
Definition: KKBaseTypes.h:88
void ExtractTwoTitleLines(KKStr &titleLine1, KKStr &titleLine2) const
Using the class names create two title lines where we split names by "_" characters between the two l...
Definition: MLClass.cpp:1035
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
kkint32 QueueSize() const
Definition: KKQueue.h:313
double KKMLL::ConfusionMatrix2::TotalCount ( )
inline

Definition at line 154 of file ConfusionMatrix2.h.

154 {return totalCount;}
void ConfusionMatrix2::WriteSimpleConfusionMatrix ( ostream &  f) const

Meant to work with 'ClassificationStatus.cs' of PicesCommander. This will write a simple confusion matrix table; one row for each class. 'ClassificationStatus.cs' will then use this data to adjust for bias in the learner.

Definition at line 3241 of file ConfusionMatrix2.cpp.

3242 {
3243  // "Estimating the Taxonomic composition of a sample when individules are classified with error"
3244  // by Andrew Solow, Cabll Davis, Qiao Hu
3245  // Woods Hole Ocanographic Institution, Woods Hole Massachusetts
3246  // Marine Ecology Progresss Series
3247  // published 2006-july-06
3248  // vol 216:309-311
3249 
3250  // This data is ment to work with "ClassificationStatus.cs" to prvide the data nessasary to
3251  // adjust for bias.
3252 
3253  f << "<SimpleConfusionMatrix>" << endl;
3254  f << "Classes" << "\t" << classes.ToCommaDelimitedStr () << endl;
3255  kkint32 row = 0;
3256  kkint32 col = 0;
3259  for (idx = classes.begin (); idx != classes.end (); idx++)
3260  {
3261  MLClassPtr mlClass = *idx;
3262  f << "DataRow" << "\t" << mlClass->Name () << "\t";
3263  col = 0;
3264  for (col = 0; col < classCount; col++)
3265  {
3266  double p = 0.0;
3267  if (countsByKnownClass[row] != 0.0)
3268  p = predictedCountsCM[row][col] / countsByKnownClass[row];
3269 
3270  if (col > 0)
3271  f << ",";
3272 
3273  f << StrFormatDouble (predictedCountsCM[row][col], "ZZZZZ0.00") << ":" << StrFormatDouble (p, "ZZ0.0000000");
3274  }
3275  f << std::endl;
3276 
3277  row++;
3278  }
3279 
3280  f << "</SimpleConfusionMatrix>" << std::endl;
3281 } /* WriteProbabilityMatrix */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr ToCommaDelimitedStr() const
Definition: MLClass.cpp:970
std::vector< MLClass * >::const_iterator const_iterator
Definition: KKQueue.h:89
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
KKStr StrFormatDouble(double val, const char *mask)
Definition: KKStr.cpp:4819
void ConfusionMatrix2::WriteXML ( ostream &  f) const

Definition at line 2976 of file ConfusionMatrix2.cpp.

2977 {
2978  f << "<ConfusionMatrix2>" << endl;
2979 
2980  f << "Classes" << "\t" << classes.ToCommaDelimitedStr () << endl;
2981 
2982  f << "ClassCount" << "\t" << classCount << endl
2983  << "BucketSize" << "\t" << bucketSize << endl
2984  << "probBucketSize" << "\t" << probBucketSize << endl
2985  << "NumOfBuckets" << "\t" << numOfBuckets << endl
2986  << "NumOfProbBuckets" << "\t" << numOfProbBuckets << endl
2987  << endl
2988  << "TotalCount" << "\t" << totalCount << endl
2989  << "CorrectCount" << "\t" << correctCount << endl
2990  << "TotalPredProb" << "\t" << totalPredProb << endl
2991  << "NumInvalidClassesPredicted" << "\t" << numInvalidClassesPredicted << endl
2992  << endl;
2993 
2994  f << "CountsByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (countsByKnownClass, ',') << endl;
2995  f << "TotalSizesByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (totalSizesByKnownClass, ',') << endl;
2996  f << "TotalPredProbsByKnownClass" << "\t" << ArrayToDelimitedDelimitedStr (totalPredProbsByKnownClass, ',') << endl;
2997 
2998 
2999  kkint32 classIndex = 0;
3001  for (idx = classes.begin (); idx != classes.end (); idx++)
3002  {
3003  MLClassPtr mlClass = *idx;
3004  f << "ClassTotals" << "\t" << "ClassName" << "\t" << mlClass->Name ().QuotedStr () << "\t" << "ClassIndex" << "\t" << classIndex << endl;
3005 
3006  f << "CountByKnownClassBySize" << "\t" << ArrayToDelimitedDelimitedStr (countByKnownClassBySize [classIndex], numOfBuckets, ',') << endl;
3007  f << "CorrectByKnownClassBySize" << "\t" << ArrayToDelimitedDelimitedStr (correctByKnownClassBySize [classIndex], numOfBuckets, ',') << endl;
3008 
3009  f << "CountByKnownClassByProb" << "\t" << ArrayToDelimitedDelimitedStr (countByKnownClassByProb [classIndex], numOfProbBuckets, ',') << endl;
3010  f << "CorrectByKnownClassByProb" << "\t" << ArrayToDelimitedDelimitedStr (correctByKnownClassByProb [classIndex], numOfProbBuckets, ',') << endl;
3011 
3012  f << "PredictedCountsCM" << "\t" << ArrayToDelimitedDelimitedStr (predictedCountsCM [classIndex], classCount, ',') << endl;
3013 
3014  f << "TotPredProbCM" << "\t" << ArrayToDelimitedDelimitedStr (totPredProbCM [classIndex], classCount, ',') << endl;
3015 
3016  classIndex++;
3017  }
3018 
3019  f << "</ConfusionMatrix2>" << endl;
3020 } /* Write */
HTMLReport &__cdecl endl(HTMLReport &htmlReport)
Definition: HTMLReport.cpp:240
__int32 kkint32
Definition: KKBaseTypes.h:88
KKStr ToCommaDelimitedStr() const
Definition: MLClass.cpp:970
std::vector< MLClass * >::const_iterator const_iterator
Definition: KKQueue.h:89
Represents a "Class" in the Machine Learning Sense.
Definition: MLClass.h:52
KKStr ArrayToDelimitedDelimitedStr(T *_array, kkint32 _count, char _delimiter)

The documentation for this class was generated from the following files: