28 using namespace KKMLL;
40 configFileName
(_configFileName
),
41 configFileNameFromMatrixBiasFile
(),
44 dateTimeFileWritten
(),
53 ifstream sr (biasFileName.Str ());
58 KKStr errorMsg =
"Error opening ConfigFileName[" + configFileName +
"]";
59 errMsgs.push_back (errorMsg);
69 catch (
const exception& e)
72 errMsgs.push_back (e.what ());
77 errMsgs.push_back (
"ClassificationBiasMatrix Exception occurred reading XML File");
85 biasFileName
(cbm.biasFileName
),
87 configFileName
(cbm.configFileName
),
88 configFileNameFromMatrixBiasFile
(cbm.configFileNameFromMatrixBiasFile
),
89 configDateTime
(cbm.configDateTime
),
91 dateTimeFileWritten
(cbm.dateTimeFileWritten
),
93 numClasses (cbm.numClasses),
100 counts =
new Matrix
(*cbm.counts
);
105 if (cbm.probabilities)
106 probabilities =
new Matrix
(*cbm.probabilities
);
118 configFileNameFromMatrixBiasFile
(),
121 dateTimeFileWritten
(),
124 probabilities (NULL),
129 numClasses = classes->QueueSize ();
131 BuildFromConfusionMatrix (cm);
142 configFileNameFromMatrixBiasFile
(),
145 dateTimeFileWritten
(),
148 probabilities (NULL),
153 numClasses = classes->QueueSize ();
163 configFileNameFromMatrixBiasFile
(),
166 dateTimeFileWritten
(),
169 probabilities (NULL),
185 delete probabilities;
186 probabilities = NULL;
211 numClasses = classes->QueueSize ();
212 probabilities =
new Matrix
(numClasses
, numClasses
);
213 counts =
new Matrix
(numClasses
, numClasses
);
214 for (
kkint32 r = 0; r < numClasses; r++)
216 for (
kkint32 c = 0; c < numClasses; c++)
218 (*probabilities)
[r][c] = 0.0;
239 numClasses = classes->QueueSize ();
241 probabilities =
new Matrix
(numClasses
, numClasses
);
242 counts =
new Matrix
(numClasses
, numClasses
);
244 Row& r0 = (*probabilities)
[0
];
245 Row& r1 = (*probabilities)
[1
];
246 Row& r2 = (*probabilities)
[2
];
247 Row& r3 = (*probabilities)
[3
];
248 Row& r4 = (*probabilities)
[4
];
249 Row& r5 = (*probabilities)
[5
];
250 Row& r6 = (*probabilities)
[6
];
252 r0
[0
] = 0.710; r0
[1
] = 0.059; r0
[2
] = 0.010; r0
[3
] = 0.010; r0
[4
] = 0.007; r0
[5
] = 0.031; r0
[6
] = 0.175;
253 r1
[0
] = 0.073; r1
[1
] = 0.873; r1
[2
] = 0.001; r1
[3
] = 0.007; r1
[4
] = 0.008; r1
[5
] = 0.013; r1
[6
] = 0.024;
254 r2
[0
] = 0.078; r2
[1
] = 0.012; r2
[2
] = 0.556; r2
[3
] = 0.035; r2
[4
] = 0.066; r2
[5
] = 0.179; r2
[6
] = 0.074;
255 r3
[0
] = 0.030; r3
[1
] = 0.028; r3
[2
] = 0.054; r3
[3
] = 0.560; r3
[4
] = 0.019; r3
[5
] = 0.177; r3
[6
] = 0.132;
256 r4
[0
] = 0.205; r4
[1
] = 0.054; r4
[2
] = 0.107; r4
[3
] = 0.046; r4
[4
] = 0.366; r4
[5
] = 0.157; r4
[6
] = 0.065;
257 r5
[0
] = 0.158; r5
[1
] = 0.025; r5
[2
] = 0.076; r5
[3
] = 0.064; r5
[4
] = 0.175; r5
[5
] = 0.449; r5
[6
] = 0.054;
258 r6
[0
] = 0.289; r6
[1
] = 0.096; r6
[2
] = 0.033; r6
[3
] = 0.065; r6
[4
] = 0.018; r6
[5
] = 0.072; r6
[6
] = 0.427;
267 classCounts[0] = 2891;
268 classCounts[1] = 1965;
269 classCounts[2] = 495;
270 classCounts[3] = 1399;
271 classCounts[4] = 676;
272 classCounts[5] = 1191;
273 classCounts[6] = 1752;
278 PerformAdjustmnts (classCounts, adjCounts, stdErrors);
289 for (x = 0; x < 7; x++)
290 sw <<
"\t" << StrFormatInt (x,
"ZZ0");
293 sw <<
"ClassifiedCounts";
294 for (x = 0; x < 7; x++)
295 sw <<
"\t" << StrFormatDouble (classCounts[x],
"ZZ,ZZ0.0");
299 for (x = 0; x < 7; x++)
300 sw <<
"\t" << StrFormatDouble (adjCounts[x],
"ZZZ,ZZ0.0");
304 for (x = 0; x < 7; x++)
305 sw <<
"\t" << StrFormatDouble (stdErrors[x],
"ZZ,ZZ0.0");
323 sr.getline (buff,
sizeof (buff));
328 if (l.CompareIgnoreCase (
"</ClassificationBiasMatrix>") == 0)
331 KKStr lineName = l.ExtractToken2 (
"\t");
332 if (!lineName.Empty ())
334 KKStr fieldValue = l.ExtractToken2 (
"\t");
336 if (lineName.CompareIgnoreCase (
"Classes") == 0)
338 delete fileClasses; fileClasses = NULL;
339 fileClasses = MLClassList::BuildListFromDelimtedStr (fieldValue,
',');
341 classes =
new MLClassList (*fileClasses);
344 else if (lineName.CompareIgnoreCase (
"ConfigDateTime") == 0)
346 configDateTime = fieldValue;
349 else if (lineName.CompareIgnoreCase (
"ConfigFileName") == 0)
351 configFileNameFromMatrixBiasFile = fieldValue;
354 else if (lineName.CompareIgnoreCase (
"ConfigFileDateTime") == 0)
356 configDateTime = fieldValue;
359 else if (lineName.CompareIgnoreCase (
"DateTime") == 0)
361 dateTimeFileWritten = fieldValue;
364 else if (lineName.CompareIgnoreCase (
"DateTimeFileWritten") == 0)
366 dateTimeFileWritten = fieldValue;
369 else if (lineName.CompareIgnoreCase (
"FileName") == 0)
373 else if (lineName.CompareIgnoreCase (
"NumClasses") == 0)
375 numClasses = fieldValue.ToInt ();
378 else if (lineName.CompareIgnoreCase (
"<SimpleConfusionMatrix>") == 0)
380 ReadSimpleConfusionMatrix (sr, fileClasses);
385 sr.getline (buff,
sizeof (buff));
400 if ((classes == NULL) || (fileClasses == NULL))
402 KKStr errMsg =
"ReadSimpleConfusionMatrix ***ERROR*** The 'Classes' line was never provided.";
403 runLog.Level (-1) << errMsg << endl;
414 sr.getline (buff,
sizeof (buff));
419 if (l.CompareIgnoreCase (
"</SimpleConfusionMatrix>") == 0)
422 KKStr lineName = l.ExtractToken2 (
"\t");
424 if (lineName.CompareIgnoreCase (
"DataRow") == 0)
426 if (fileClasses == NULL)
428 KKStr errMsg =
"ReadSimpleConfusionMatrix ***ERROR*** 'Classes' was not provided before 'DataRow'.";
429 runLog.Level (-1) << errMsg << endl;
431 throw KKException (errMsg);
434 KKStr className = l.ExtractToken2 (
"\t");
435 KKStr data = l.ExtractToken2 (
"\t");
437 MLClassPtr pc = MLClass::CreateNewMLClass (className);
438 kkint32 classesIdx = classes->PtrToIdx (pc);
439 kkint32 fileClassesIdx = fileClasses->PtrToIdx (pc);
443 KKStr errMsg =
"ReadSimpleConfusionMatrix ***ERROR*** DataRow specifies class[" + className +
"] which is not defined by caller";
444 runLog.Level (-1) << errMsg << endl;
446 throw KKException (errMsg);
449 if (fileClassesIdx < 0)
451 KKStr errMsg =
"ReadSimpleConfusionMatrix ***ERROR*** DataRow specifies class[" + className +
"] was not defined in 'Classes' line.";
452 runLog.Level (-1) << errMsg << endl;
454 throw KKException (errMsg);
457 kkint32 classesRowIdx = classesIdx;
459 VectorKKStr dataFields = data.Split (
',');
460 if (dataFields.size () != (kkuint32)numClasses)
462 KKStr errMsg =
"ReadSimpleConfusionMatrix ***ERROR*** DataRow Class[" + className +
"] number[" + StrFormatInt ((kkint32)dataFields.size (),
"ZZZ0") +
"] of values provided does not match number of Classes.";
463 runLog.Level (-1) << errMsg << endl;
465 throw KKException (errMsg);
468 for (kkint32 c = 0; c < numClasses; c++)
470 pc = fileClasses->IdxToPtr (c);
471 classesColIdx = classes->PtrToIdx (pc);
473 VectorKKStr parts = dataFields[c].Split (
':');
474 if (parts.size () > 1)
476 (*counts) [classesRowIdx][classesColIdx] = parts[0].ToDouble ();
477 (*probabilities)[classesRowIdx][classesColIdx] = parts[1].ToDouble ();
489 o <<
"<ClassificationBiasMatrix>" << std::endl;
491 o <<
"BiasFileName" <<
"\t" << biasFileName << std::endl;
493 if ((!classes) || (!counts) || (!probabilities))
495 runLog.Level (-1) <<
"ClassificationBiasMatrix::WriteXML ***ERROR*** Not all data is defined." << endl;
499 if ((numClasses != classes->QueueSize ()) ||
500 (numClasses != counts->NumOfRows ()) ||
501 (numClasses != probabilities->NumOfRows ())
504 runLog.Level (-1) <<
"ClassificationBiasMatrix::WriteXML ***ERROR*** Disagreement in variable dimensions." << endl;
508 o <<
"NumClasses" <<
"\t" << numClasses << std::endl;
509 o <<
"Classes" <<
"\t" << classes->ToCommaDelimitedStr () << std::endl;
510 o <<
"ConfigFileName" <<
"\t" << configFileName << std::endl;
511 o <<
"ConfigDateTime" <<
"\t" << configDateTime << std::endl;
512 o <<
"DateTimeFileWritten" <<
"\t" << dateTimeFileWritten << std::endl;
514 o <<
"<SimpleConfusionMatrix>" << std::endl;
515 for (
kkint32 rowIdx = 0; rowIdx < numClasses; rowIdx++)
517 o <<
"DataRow" <<
"\t" 518 << (*classes)[rowIdx].Name () <<
"\t";
520 for (
kkint32 colIdx = 0; colIdx < numClasses; colIdx++)
524 o << ((*counts)[rowIdx][colIdx]) <<
":" << StrFormatDouble ((*probabilities)[rowIdx][colIdx],
"ZZ0.0000");
529 o <<
"</SimpleConfusionMatrix>" << std::endl;
531 o <<
"</ClassificationBiasMatrix>" << std::endl;
545 for (classesRowIdx = 0; classesRowIdx < numClasses; classesRowIdx++)
549 for (classesColIdx = 0; classesColIdx < numClasses; classesColIdx++)
553 if (knownCount != 0.0)
554 prob = predCount / knownCount;
577 if (classifiedCounts.size () != (kkuint32)numClasses)
579 KKStr errMsg =
"ClassificationBiasMatrix::PerformAdjustmnts ***ERROR*** Disagreement in length of classifiedCounts[" +
580 StrFormatInt ((kkint32)classifiedCounts.size (),
"ZZZ0") +
581 "] and Prev Defined ClassList[" + StrFormatInt (numClasses,
"ZZZ0") +
"].";
582 runLog.Level (-1) << errMsg << endl;
584 throw KKException (errMsg);
593 for (x = 0; x < numClasses; x++)
595 if ((*probabilities)
[x][x] == 0.0)
601 double totalAmtStolen = 0.0;
602 double percentToSteal = 0.01;
603 for (i = 0; i < numClasses; i++)
605 if ((*probabilities)
[x][i] != 0.0)
607 double amtToSteal = (*probabilities)
[x][i] * percentToSteal;
608 (*probabilities)
[x][i] = (*probabilities)
[x][i] - amtToSteal;
609 totalAmtStolen += amtToSteal;
613 (*probabilities)
[x][x] = totalAmtStolen;
618 Matrix m
(numClasses
, 1
);
619 for (x = 0; x < numClasses; x++)
620 m[x][0] = classifiedCounts[x];
626 Matrix varM
(numClasses
, numClasses
);
627 for (j = 0; j < numClasses; j++)
630 for (i = 0; i < numClasses; i++)
632 double p = (*probabilities)
[i][j];
633 varM_j += n
[i][0
] * p * (1.0 - p);
638 for (j = 0; j < numClasses; j++)
640 for (k = 0; k < numClasses; k++)
644 double covM_jk = 0.0;
645 for (i = 0; i < numClasses; i++)
657 for (x = 0; x < numClasses; x++)
659 adjCounts.push_back (n[x][0]);
660 stdErrors.push_back (sqrt (varN[x][x]));
672 KKStr errMsg =
"ClassificationBiasMatrix::PrintBiasMatrix ***ERROR*** 'Classes' not defined; this indicates that this object was not properly initialized.";
673 runLog.Level (-1) << errMsg << endl;
678 sw <<
"BiasMatrix File Name [" << biasFileName <<
"]" << endl
679 <<
"Date Bias Matrix was Created [" << dateTimeFileWritten <<
"]" << endl
680 <<
"Configuration File Name [" << configFileName <<
"]" << endl
681 <<
"ConfigFile Used for Bias Matrix [" << configFileNameFromMatrixBiasFile <<
"]" << endl
689 sw <<
"" <<
"\t" <<
"" <<
"\t" << tl1 << endl;
690 sw <<
"Class" <<
"\t" <<
"" <<
"\t" << tl2 << endl;
691 sw <<
"Name" <<
"\t" <<
"Count" <<
"\t" << tl3 << endl;
702 for (row = 0; row < numClasses; row++)
705 for (col = 0; col < numClasses; col++)
708 colTotals[col] += (*counts)[row][col];
710 rowTotals[row] = rowTotal;
712 sw << (*classes)[row].Name () <<
"\t" << StrFormatDouble (rowTotal,
"zzz,zz0.00");
713 for (col = 0; col < numClasses; col++)
714 sw <<
"\t" << StrFormatDouble ((*counts)[row][col],
"###,##0.00");
718 sw <<
"Total" <<
"\t" << StrFormatDouble (total,
"###,##0.00");
719 for (col = 0; col < numClasses; col++)
720 sw <<
"\t" << StrFormatDouble (colTotals[col],
"ZZZ,ZZ0.00");
725 for (row = 0; row < numClasses; row++)
727 sw << (*classes)[row].Name ();
728 double rowPercent = 100.0 * rowTotals[row] / total;
729 sw <<
"\t" << StrFormatDouble (rowPercent,
"ZZ0.0000") +
"%";
731 for (col = 0; col < numClasses; col++)
732 sw <<
"\t" << StrFormatDouble (100.0 * (*probabilities)[row][col],
"ZZZ,ZZ0.00") <<
"%";
745 if (classifiedCounts.size () != (kkuint32)numClasses)
747 KKStr errMsg =
"ClassificationBiasMatrix::PrintAdjustedResults ***ERROR*** Number of entries in 'classifiedCounts' not equal the number of classes";
748 cerr <<
"ClassificationBiasMatrix::PrintAdjustedResults ***ERROR*** " << errMsg << endl;
750 throw KKException (errMsg);
762 sw <<
"" <<
"\t" <<
"\t" << tl1 << endl;
763 sw <<
"" <<
"\t" <<
"\t" << tl2 << endl;
764 sw <<
"Description" <<
"\t" <<
"\t" << tl3 << endl;
768 sw <<
"Classified Results" <<
"\t";
769 for (col = 0; col < numClasses; col++)
770 sw <<
"\t" << StrFormatDouble (classifiedCounts[col],
"Z,ZZZ,ZZ0.0");
773 sw <<
"Adjusted Results" <<
"\t";
774 for (col = 0; col < numClasses; col++)
775 sw <<
"\t" << StrFormatDouble (adjustedReults[col],
"Z,ZZZ,ZZ0.0");
778 sw <<
"Standard Errors" <<
"\t";
779 for (col = 0; col < numClasses; col++)
780 sw <<
"\t" << StrFormatDouble (stdErrors[col],
"Z,ZZZ,ZZ0.0");
785 KKStr errMsg =
"ClassificationBiasMatrix::PrintAdjustedResults ***ERROR*** KKException";
786 runLog.Level (-1) << errMsg << endl << e.ToString () << endl;
789 catch (std::exception& e2)
791 KKStr errMsg =
"ClassificationBiasMatrix::PrintAdjustedResults ***ERROR*** std::exception";
792 runLog.Level (-1) << errMsg << endl << e2.what() << endl;
797 KKStr errMsg =
"ClassificationBiasMatrix::PrintAdjustedResults ***ERROR*** Exception(...)";
798 runLog.Level (-1) << endl << errMsg << endl;
Row & operator[](kkint32 rowIDX) const
KKStr(kkint32 size)
Creates a KKStr object that pre-allocates space for 'size' characters.
ClassificationBiasMatrix(const ClassificationBiasMatrix &cbm)
void PerformAdjustmnts(const VectorDouble &classifiedCounts, VectorDouble &adjCounts, VectorDouble &stdErrors)
KKStr osRemoveExtension(const KKStr &_fullFileName)
ClassificationBiasMatrix(const KKStr &_configFileName, MLClassList &_classes, RunLog &_runLog)
Construct a ClassificationBiasMatrix instance from the parameter file for '_configFileName'.
void PrintAdjustedResults(ostream &sw, const VectorDouble &classifiedCounts)
KKException(const KKStr &_exceptionStr, const KKException &_innerException)
ClassificationBiasMatrix(const ConfusionMatrix2 &cm, RunLog &_runLog)
Construct a ClassificationBiasMatrix instance from a ConfusionMatrix object.
ClassificationBiasMatrix(MLClassList &classes, RunLog &_runLog)
Matrix(kkint32 _numOfRows, kkint32 _numOfCols)
Represents a "Class" in the Machine Learning Sense.
Assists in adjusting a Classifiers output for bias of a classifier.
Matrix(const Matrix &_matrix)
KKStr operator+(const char *right) const
KKException(const KKStr &_exceptionStr, const std::exception &_innerException)
double & operator[](kkint32 idx)
Matrix operator*(const Matrix &right)
KKStr & operator=(KKStr &&src)
void TestPaperResults(ostream &sw)
double CountsByKnownClass(kkint32 knownClassIdx) const
static ClassificationBiasMatrixPtr BuildFromIstreamXML(istream &f, MLClassList &classes, RunLog &log)
Will construct a ClassificationBiasMatrix instance from a input stream.
KKStr(const KKStr &str)
Copy Constructor.
virtual void PushOnBack(MLClassPtr mlClass)
void ExtractThreeTitleLines(KKStr &titleLine1, KKStr &titleLine2, KKStr &titleLine3) const
Using the class names create three title lines where we split names by "_" characters between the thr...
DateTime(const DateTime &dateTime)
void PrintBiasMatrix(ostream &sw)
~ClassificationBiasMatrix()
static MLClassPtr CreateNewMLClass(const KKStr &_name, kkint32 _classId=-1)
Static method used to create a new instance of a MLClass object.
MLClassList(const MLClassList &_mlClasses)
Copy constructor; will copy list but not own the contents.
const MLClassList & MLClasses() const
void EncodeProblem(const struct svm_paramater ¶m, struct svm_problem &prob_in, struct svm_problem &prob_out)
MLClassList * MLClassListPtr
KKException(const KKStr &_exceptionStr)
Maintains a list of MLClass instances.
double PredictedCountsCM(kkint32 knownClassIdx, kkint32 predClassIdx) const
ClassificationBiasMatrix * ClassificationBiasMatrixPtr
A confusion matrix object that is used to record the results from a CrossValidation. <see also cref="CrossValidation"
std::vector< double > VectorDouble
Vector of doubles.
ClassificationBiasMatrix(RunLog &_runLog)
Will construct an instance as defined in the Paper.
void WriteXML(ostream &o)