KSquare Utilities
ConfusionMatrix2.h
Go to the documentation of this file.
1 #ifndef _CONFUSIONMATRIX2_
2 #define _CONFUSIONMATRIX2_
3 
4 //***************************************************************************
5 //* Written by: Kurt Kramer *
6 //* For: Research Work, Plankton recognition System *
7 //* *
8 //*-------------------------------------------------------------------------*
9 //* History *
10 //* *
11 //* Prog Date Description *
12 //* ------- ----------- ------------------------------------------------- *
13 //* Kurt Oct-19-2002 Increment Will now take MLClasses instead of *
14 //* numbers. We will also make a unique copy of *
15 //* mlClassList. This way we will not have to worry *
16 //* about the numbering in the classList behind our *
17 //* back. *
18 //***************************************************************************
19 //*
20 
21 #include "RunLog.h"
22 #include "KKStr.h"
23 
24 #include "MLClass.h"
25 
26 namespace KKMLL
27 {
28 
29  /// <summary>
30  /// A confusion matrix object that is used to record the results from a CrossValidation.
31  /// <see also cref="CrossValidation"
32  /// </summary>
34  {
35  public:
37 
38  ConfusionMatrix2 (const MLClassList& _classes);
39 
41 
42 
43  /**
44  * Will construct an instance of 'ConfusionMatrix2' from the contents of the provided 'istream' object.
45  *@param[in] _classes Will make local copy of this instance; this way we know the ordering which represents the numbering can not change behind our back.
46  *@param[in] f File to write report to.
47  *@param[in] _bucketSize Will keep statistics by size of particles.
48  *@param[in] _numOfBuckets Number of Size buckets that will be maintained.
49  *@param[in] _numOfProbBuckets Maximum number of probability buckets to keep track of,
50  *@param[in] _probBucketSize Size of each probability bucket.
51  *@param[in] _log Logger where messages are written to.
52  */
53  ConfusionMatrix2 (const MLClassList& _classes, // Will make its own copy of '_classes'
54  istream& f,
55  kkint32 _bucketSize,
56  kkint32 _numOfBuckets,
57  kkint32 _numOfProbBuckets,
58  kkint32 _probBucketSize,
59  RunLog& _log
60  );
61 
62  virtual
63  ~ConfusionMatrix2 ();
64 
65  double Accuracy ();
66 
68 
70 
72 
73  double Accuracy (MLClassPtr mlClass);
74 
75  KKStr AccuracyStr ();
76 
77  void AddIn (const ConfusionMatrix2& cm,
78  RunLog& log
79  );
80 
81  double AvgPredProb () const;
82 
83  kkint32 ClassCount () const {return classCount;}
84 
85  double Count (MLClassPtr mlClass);
86 
87  double CountsByKnownClass (kkint32 knownClassIdx) const;
88 
89  const VectorDouble& CountsByKnownClass () const;
90 
91  void FactorCounts (double factor); /**< Will multiply all counts by 'factor' You would use this in conjunction with 'AddIn'. */
92 
94  double& truePositives,
95  double& trueNegatives,
96  double& falsePositives,
97  double& falseNegatives
98  )
99  const;
100 
101  float FMeasure (MLClassPtr positiveClass,
102  RunLog& log
103  ) const;
104 
105  const
106  MLClassList& MLClasses () const {return classes;}
107 
108  void Increment (MLClassPtr _knownClass,
109  MLClassPtr _predClass,
110  kkint32 _size,
111  double _probability,
112  RunLog& _log
113  );
114 
115  VectorDouble PredictedCounts () const;
116 
117  double PredictedCountsCM (kkint32 knownClassIdx, kkint32 predClassIdx) const;
118 
119  void PrintAccuracyByProbByClassHTML (ostream& o);
120 
121  void PrintConfusionMatrix (ostream& _outFile);
122 
123  void PrintConfusionMatrixAvgPredProbHTML (ostream& o);
124 
125  void PrintConfusionMatrixHTML (const char *title, ostream& file);
126 
127  void PrintConfusionMatrixLatexTable (ostream& outFile);
128 
129  void PrintConfusionMatrixNarrow (ostream& outFile);
130 
131  void PrintConfusionMatrixHTML (ostream& outFile);
132 
133  void PrintConfusionMatrixTabDelimited (ostream& outFile);
134 
135  void PrintTrueFalsePositivesTabDelimited (ostream& outFile);
136 
137  void PrintErrorBySize (ostream& outFile);
138 
139  void PrintErrorByProb (ostream& outFile);
140 
141  void PrintErrorBySizeByRows (ostream& outFile);
142 
143  void PrintErrorByProbByRows (ostream& outFile);
144 
145  void PrintErrorBySizeReduced (ostream& outFile);
146 
147  //***********************************************************
148  //* One Line Summaries *
149  //***********************************************************
150  void PrintProbDistributionTitle (ostream& outFile);
151  void PrintProbDistributionTotalCount (ostream& outFile);
152  void PrintProbDistributionTotalError (ostream& outFile);
153 
154  double TotalCount () {return totalCount;}
155 
156  static
158  RunLog& log
159  );
160 
161  void WriteXML (ostream& f) const;
162 
163 
164  /**
165  * Meant to work with 'ClassificationStatus.cs' of PicesCommander. This will write a simple
166  * confusion matrix table; one row for each class. 'ClassificationStatus.cs' will then use this
167  * data to adjust for bias in the learner.
168  */
169  void WriteSimpleConfusionMatrix (ostream& f) const;
170 
171  private:
172  kkint32 AddClassToConfusionMatrix (MLClassPtr newClass,
173  RunLog& log
174  );
175 
176  void InitializeMemory ();
177 
178  void InitializeVector (vector<double>& v,
179  kkint32 x
180  );
181 
182  void InitializeVectorDoublePtr (vector<double*>& v,
183  kkint32 numClasses,
184  kkint32 numBuckets
185  );
186 
187  void CopyVector (const vector<double>& src,
188  vector<double>& dest
189  );
190 
191  void CopyVectorDoublePtr (const vector<double*>& src,
192  vector<double*>& dest,
193  kkint32 numBuckets
194  );
195 
196  void DeleteVectorDoublePtr (vector<double*>& v);
197 
198  void IncreaseVectorDoublePtr (vector<double*>& v,
199  int numBucketsOld,
200  int numBucketsNew
201  );
202 
203  void MakeSureWeHaveTheseClasses (const MLClassList& classList,
204  RunLog& log
205  );
206 
207 
208  void PrintLatexTableColumnHeaders (ostream& outFile);
209 
210  void PrintSingleLine (ostream& _outFile,
211  KKStr _name,
212  double _lineTotal,
213  double _splits[]
214  );
215 
216  void PrintSingleLineTabDelimited (ostream& _outFile,
217  const KKStr& _name,
218  double _lineTotal,
219  double _splits[]
220  );
221 
222  void PrintSingleLineHTML (ostream& _outFile,
223  const KKStr& _name,
224  double _lineTotal,
225  kkint32 _knownClassNum,
226  double _splits[]
227  );
228 
229  void PrintSingleLineLatexTable (ostream& _outFile,
230  kkint32 _knownClassNum,
231  const KKStr& _name,
232  double _lineTotal,
233  double _splits[]
234  );
235 
236 
237  void PrintSingleLineShort (ostream& _outFile,
238  const KKStr& _name,
239  double _lineTotal,
240  double _splits[]
241  );
242 
243 
244  void PrintPercentLine (ostream& _outFile,
245  KKStr _name,
246  double _totalCount,
247  double _splits[]
248  );
249 
250 
251  void PrintPercentLineLatexTable (ostream& _outFile,
252  kkint32 _rowNum,
253  const KKStr& _name,
254  double _lineTotal,
255  double _splits[]
256  );
257 
258 
259  void PrintPercentLineTabDelimited (ostream& _outFile,
260  const KKStr& _name,
261  double _lineTotal,
262  double _splits[]
263  );
264 
265 
266  void PrintAvgPredProbLineHTML (ostream& o,
267  const KKStr& _name,
268  double _totalAvgPredProbThisLine,
269  double _totalCountThisLine,
270  kkint32 _knownClassNum,
271  double _avgPredProbs[],
272  double _numPredByClass[]
273  );
274 
275 
276  void PrintPercentLineHTML (ostream& _outFile,
277  const KKStr& _name,
278  double _lineTotal,
279  kkint32 _knownClassNum,
280  double _splits[]
281  );
282 
283 
284  void PrintPercentLineShort (ostream& _outFile,
285  const KKStr& _name,
286  double _lineTotal,
287  double _splits[]
288  );
289 
290 
291  void PrintErrorBySizeRowReduced (ostream& outFile,
292  kkint32 classNum
293  );
294 
295  void Read (istream& f,
296  RunLog& log
297  );
298 
299  kkint32 bucketSize;
300  kkint32 classCount;
301  vector<double*> correctByKnownClassByProb;
302  vector<double*> correctByKnownClassBySize;
303  double correctCount;
304  vector<double*> countByKnownClassByProb;
305  vector<double*> countByKnownClassBySize;
306  vector<double> countsByKnownClass;
307 
308  MLClassList classes; /**< We will make our own unique copy of the MLClassList.
309  * This way we know the ordering which represents the numbering
310  * can not change behind our back.
311  */
312 
313  kkint32 numOfBuckets;
314  kkint32 numOfProbBuckets;
315  vector<double*> predictedCountsCM;
316  vector<double*> totPredProbCM;
317  kkint32 probBucketSize;
318  double totalCount;
319  double totalPredProb;
320  vector<double> totalPredProbsByKnownClass; /**< Total Predicted Probabilities by Known Class. */
321  vector<double> totalSizesByKnownClass;
322 
323  double numInvalidClassesPredicted;
324  };
325 
326  typedef ConfusionMatrix2::ConfusionMatrix2Ptr ConfusionMatrix2Ptr;
327 
328 #define _ConfussionMatrix2_Defined_
329 
330 
332  {
333  public:
334  ConfussionMatrix2List (bool _owner = true);
335 
337 
338  ConfusionMatrix2Ptr DeriveAverageConfusionMatrix (RunLog& log) const;
339 
340 
341  }; /* ConfussionMatrix2List */
342 
344 
345 #define _ConfussionMatrix2List_Defined_
346 
347 } /* namespace KKMLL */
348 
349 #endif
ConfusionMatrix2(const ConfusionMatrix2 &cm)
void PrintConfusionMatrixHTML(const char *title, ostream &file)
MLClass * MLClassPtr
Definition: MLClass.h:46
void PrintAccuracyByProbByClassHTML(ostream &o)
void PrintErrorByProb(ostream &outFile)
__int32 kkint32
Definition: KKBaseTypes.h:88
float FMeasure(MLClassPtr positiveClass, RunLog &log) const
VectorFloat AccuracyByClass() const
void PrintConfusionMatrixLatexTable(ostream &outFile)
void PrintConfusionMatrixNarrow(ostream &outFile)
ConfusionMatrix2(const MLClassList &_classes, istream &f, kkint32 _bucketSize, kkint32 _numOfBuckets, kkint32 _numOfProbBuckets, kkint32 _probBucketSize, RunLog &_log)
void PrintConfusionMatrixAvgPredProbHTML(ostream &o)
kkint32 ClassCount() const
ConfusionMatrix2 * ConfusionMatrix2Ptr
void PrintTrueFalsePositivesTabDelimited(ostream &outFile)
double Count(MLClassPtr mlClass)
void PrintErrorBySize(ostream &outFile)
void PrintConfusionMatrixTabDelimited(ostream &outFile)
void PrintConfusionMatrix(ostream &_outFile)
double CountsByKnownClass(kkint32 knownClassIdx) const
void WriteSimpleConfusionMatrix(ostream &f) const
void FactorCounts(double factor)
static ConfusionMatrix2Ptr BuildFromIstreamXML(istream &f, RunLog &log)
void AddIn(const ConfusionMatrix2 &cm, RunLog &log)
ConfusionMatrix2Ptr DeriveAverageConfusionMatrix(RunLog &log) const
std::vector< float > VectorFloat
Definition: KKBaseTypes.h:149
const VectorDouble & CountsByKnownClass() const
ConfusionMatrix2(const MLClassList &_classes)
void PrintErrorByProbByRows(ostream &outFile)
static KKStr Concat(const std::vector< std::string > &values)
Concatenates the list of &#39;std::string&#39; strings.
Definition: KKStr.cpp:1082
void PrintProbDistributionTotalCount(ostream &outFile)
VectorDouble PredictedCounts() const
void PrintProbDistributionTotalError(ostream &outFile)
void PrintErrorBySizeReduced(ostream &outFile)
ConfussionMatrix2List(bool _owner=true)
void ComputeFundamentalStats(MLClassPtr ic, double &truePositives, double &trueNegatives, double &falsePositives, double &falseNegatives) const
void Increment(MLClassPtr _knownClass, MLClassPtr _predClass, kkint32 _size, double _probability, RunLog &_log)
double Accuracy(MLClassPtr mlClass)
void PrintConfusionMatrixHTML(ostream &outFile)
void PrintProbDistributionTitle(ostream &outFile)
const MLClassList & MLClasses() const
Used for logging messages.
Definition: RunLog.h:49
void EncodeProblem(const struct svm_paramater &param, struct svm_problem &prob_in, struct svm_problem &prob_out)
void WriteXML(ostream &f) const
Maintains a list of MLClass instances.
Definition: MLClass.h:233
void PrintErrorBySizeByRows(ostream &outFile)
double PredictedCountsCM(kkint32 knownClassIdx, kkint32 predClassIdx) const
ConfussionMatrix2List * ConfussionMatrix2ListPtr
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
std::vector< double > VectorDouble
Vector of doubles.
Definition: KKBaseTypes.h:148