Skip to content

Commit

Permalink
Add MCC to r.kappa (#2680)
Browse files Browse the repository at this point in the history
* Report Matthews (Mattheus) Correlation Coefficient as one of measures
* Use the Neumaier's Kahan-Babuska algorithm to minimize errors during summation
  • Loading branch information
marisn authored Feb 14, 2023
1 parent f08fdcd commit 7aab4cc
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 15 deletions.
58 changes: 50 additions & 8 deletions raster/r.kappa/calc_metrics.c
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#include <stdlib.h>
#include <math.h>
#include <grass/gis.h>
#include <grass/glocale.h>
#include "kappa.h"
#include "local_proto.h"

static int longcomp(const void *aa, const void *bb);
static int collapse(long *l, int n);
static void update_sum(double *sum, double *c, double value);

void calc_metrics(void)
{
Expand All @@ -15,9 +17,12 @@ void calc_metrics(void)
int ncat1, ncat2;
int cndx;
double *pi, *pj, *pii;
double p0 = 0.0, pC = 0.0;
double inter1 = 0.0, inter2 = 0.0;
double p0 = 0.0, pC = 0.0, p0c = 0.0, pCc = 0.0;
double inter1 = 0.0, inter2 = 0.0, inter1c = 0.0, inter2c = 0.0;
int a_i = 0, b_i = 0;
double spktk = 0.0, spktkc = 0.0;
double spk2 = 0.0, spk2c = 0.0, stk2 = 0.0, stk2c = 0.0;
double unrooted;

metrics = (METRICS *)G_malloc(sizeof(METRICS));
if (nstats == 0) {
Expand All @@ -28,6 +33,7 @@ void calc_metrics(void)
metrics->overall_accuracy = 0.0;
metrics->kappa = na_value;
metrics->kappa_variance = na_value;
metrics->mcc = na_value;
return;
}

Expand Down Expand Up @@ -142,10 +148,12 @@ void calc_metrics(void)
else
metrics->producers_accuracy[i] = 100 * (pii[i] / pj[i]);
/* theta 1 */
p0 += pii[i];
update_sum(&p0, &p0c, pii[i]);
/* theta 2 */
pC += pi[i] * pj[i];
update_sum(&pC, &pCc, pi[i] * pj[i]);
}
p0 += p0c;
pC += pCc;
if (pC != 1)
metrics->kappa = (p0 - pC) / (1 - pC);
else
Expand All @@ -158,7 +166,8 @@ void calc_metrics(void)
else
metrics->conditional_kappa[i] =
(pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]);
inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.);
update_sum(&inter1, &inter1c,
pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.));
}

/* kappa variance */
Expand All @@ -170,17 +179,37 @@ void calc_metrics(void)
if (Gstats[l].cats[1] == rlst[i])
b_i = i;
}
inter2 += Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) /
metrics->observations;
update_sum(&inter2, &inter2c,
Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) /
metrics->observations);
}
}
inter1 += inter1c;
inter2 += inter2c;
metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 -
pow((p0 * pC - 2 * pC + p0), 2.)) /
pow((1 - pC), 4.) / metrics->observations;

G_free(pi);
G_free(pj);
G_free(pii);

/* MCC */
for (i = 0; i < ncat; i++) {
update_sum(&spktk, &spktkc, metrics->row_sum[i] * metrics->col_sum[i]);
update_sum(&spk2, &spk2c, metrics->row_sum[i] * metrics->row_sum[i]);
update_sum(&stk2, &stk2c, metrics->col_sum[i] * metrics->col_sum[i]);
}
spktk += spktkc;
spk2 += spk2c;
stk2 += stk2c;
unrooted = (metrics->observations * metrics->observations - spk2) *
(metrics->observations * metrics->observations - stk2);
if (unrooted <= 0.0) {
metrics->mcc = na_value;
return;
}
metrics->mcc =
(metrics->correct * metrics->observations - spktk) / sqrt(unrooted);
};

/* remove repeated values */
Expand Down Expand Up @@ -213,3 +242,16 @@ static int longcomp(const void *aa, const void *bb)

return (*a > *b);
}

/* Implements improved Kahan–Babuska algorithm by Neumaier, A. 1974 */
void update_sum(double *sum, double *c, double value)
{
double tmp = *sum + value;

if (fabs(*sum) >= fabs(value))
*c += (*sum - tmp) + value;
else
*c += (value - tmp) + *sum;

*sum = tmp;
}
1 change: 1 addition & 0 deletions raster/r.kappa/kappa.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ struct _metrics_ {
double kappa;
double kappa_variance;
double *conditional_kappa;
double mcc;
};

extern struct Cell_head window;
Expand Down
6 changes: 5 additions & 1 deletion raster/r.kappa/print_json.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,11 @@ void print_json()
else
fprintf(fd, "%.5f", metrics->conditional_kappa[i]);
}
fprintf(fd, "]\n");
fprintf(fd, "],\n");
if (metrics->mcc == na_value)
fprintf(fd, " \"mcc\": null\n");
else
fprintf(fd, " \"mcc\": %.5f\n", metrics->mcc);

fprintf(fd, "}\n");
if (output != NULL)
Expand Down
9 changes: 6 additions & 3 deletions raster/r.kappa/print_kappa.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,19 @@ void print_kappa(void)
fprintf(fd, "%f\n", metrics->conditional_kappa[i]);
}
fprintf(fd, "\n");
fprintf(fd, "Kappa\t\tKappa Variance\n");
fprintf(fd, "Kappa\t\tKappa Variance\tMCC\n");
if (metrics->kappa == na_value)
fprintf(fd, "NA");
else
fprintf(fd, "%f", metrics->kappa);
if (metrics->kappa_variance == na_value)
fprintf(fd, "\tNA");
else
fprintf(fd, "\t%f", metrics->kappa_variance);
if (metrics->mcc == na_value)
fprintf(fd, "\tNA\n");
else
fprintf(fd, "\t%f\n", metrics->kappa_variance);

fprintf(fd, "\t%f\n", metrics->mcc);
fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n");
fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->observations,
metrics->overall_accuracy);
Expand Down
12 changes: 9 additions & 3 deletions raster/r.kappa/r.kappa.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ <h2>OUTPUT VARIABLES</h2>
<dd>Variance of kappa index. Correctness needs to be validated.</dd>
<dt>Conditional kappa</dt>
<dd>Conditional user's kappa for specified class.</dd>
<dt>MCC</dt>
<dd>Matthews (Mattheus) Correlation Coefficient is implemented
according to Grandini, M., Bagli, E., Visani, G. 2020.
"Metrics for multi-class classification: An overview."</dd>
</dl>

<h2>NOTES</h2>
Expand Down Expand Up @@ -106,9 +110,11 @@ <h2>NOTES</h2>
</ul>

<p>
Some of reported values (Choen's kappa, overall accuracy) can be
Some of reported values (overall accuracy, Choen's kappa, MCC) can be
misleading if cell count among classes is not balanced. See e.g.
Powers, D.M.W., 2012. The Problem with Kappa.
Powers, D.M.W., 2012. "The Problem with Kappa"; Zhu, Q., 2020.
"On the performance of Matthews correlation coefficient (MCC) for
imbalanced dataset".

<H2>EXAMPLE</H2>

Expand Down Expand Up @@ -143,4 +149,4 @@ <h2>SEE ALSO</h2>
<h2>AUTHORS</h2>

Tao Wen, University of Illinois at Urbana-Champaign, Illinois<br>
Maris Nartiss, University of Latvia (JSON output)
Maris Nartiss, University of Latvia (JSON output, MCC)
8 changes: 8 additions & 0 deletions raster/r.kappa/testsuite/test_r_kappa.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def match(self, pat, ref):
# 4 0 14 4 0 - 0.000 0.000 0.222 0.000 0.000
# 5 1 17 0 0 1.000 1.000 0.056 0.056 0.056 1.000
# 6 2 13 2 1 0.667 0.500 0.111 0.222 0.167 0.400
# Correct MCC value was calculated manually and validated with
# mcc function of R package mltools.

def test_standard_output(self):
out = read_command(
Expand Down Expand Up @@ -181,6 +183,7 @@ def test_standard_output(self):
# Kappa value
vals = rows[28].split()
self.assertTrue(self.match(vals[0], 0.52091))
self.assertTrue(self.match(vals[2], 0.55930))

# Overall characteristics
vals = rows[31].split()
Expand Down Expand Up @@ -283,6 +286,7 @@ def test_standard_output(self):
# Kappa value
vals = rows[28].split()
self.assertTrue(self.match(vals[0], 0.0))
self.assertTrue(self.match(vals[2], "NA"))

# Overall characteristics
vals = rows[31].split()
Expand Down Expand Up @@ -341,6 +345,7 @@ def setUpClass(cls):
"producers_accuracy": [57.1429, 0.0, 100.0, None, 100.0, 66.66666],
"users_accuracy": [100.0, None, 80.0, 0.0, 100.0, 50.0],
"conditional_kappa": [1.0, None, 0.742857, 0.0, 1.0, 0.400],
"mcc": 0.55930,
}
)

Expand Down Expand Up @@ -380,6 +385,7 @@ def setUpClass(cls):
"producers_accuracy": [0.0, 0.0, 0.0, 0.0, 0.0, None],
"users_accuracy": [None, None, None, None, None, 0.0],
"conditional_kappa": [None, None, None, None, None, 0.0],
"mcc": None,
}
)

Expand Down Expand Up @@ -410,6 +416,7 @@ def setUpClass(cls):
"producers_accuracy": [],
"users_accuracy": [],
"conditional_kappa": [],
"mcc": None,
}
)

Expand Down Expand Up @@ -440,6 +447,7 @@ def setUpClass(cls):
"producers_accuracy": [],
"users_accuracy": [],
"conditional_kappa": [],
"mcc": None,
}
)

Expand Down

0 comments on commit 7aab4cc

Please sign in to comment.