From ffb0ce6dfb4e6cb909180e74fd32f6593d213581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C4=81ris=20Narti=C5=A1s?= Date: Thu, 15 Dec 2022 13:34:01 +0200 Subject: [PATCH] Add JSON output option to r.kappa (#2666) The old output mode is kept as a default to not break any of existing scripts. --- raster/r.kappa/calc_kappa.c | 124 ---------- raster/r.kappa/calc_metrics.c | 215 ++++++++++++++++++ raster/r.kappa/kappa.h | 20 ++ raster/r.kappa/local_proto.h | 27 ++- raster/r.kappa/main.c | 54 +++-- raster/r.kappa/mask.c | 1 - raster/r.kappa/print2csv_mat.c | 85 +++++++ raster/r.kappa/{prt_hdr.c => print_header.c} | 11 +- raster/r.kappa/print_json.c | 125 ++++++++++ raster/r.kappa/print_kappa.c | 55 +++++ raster/r.kappa/{prt_label.c => print_label.c} | 9 +- raster/r.kappa/{prt_mat.c => print_mat.c} | 125 ++-------- raster/r.kappa/prt2csv_mat.c | 178 --------------- raster/r.kappa/r.kappa.html | 60 ++++- raster/r.kappa/stats.c | 7 +- raster/r.kappa/sum.c | 49 ---- raster/r.kappa/testsuite/test_r_kappa.py | 207 ++++++++++++++++- 17 files changed, 830 insertions(+), 522 deletions(-) delete mode 100644 raster/r.kappa/calc_kappa.c create mode 100644 raster/r.kappa/calc_metrics.c create mode 100644 raster/r.kappa/print2csv_mat.c rename raster/r.kappa/{prt_hdr.c => print_header.c} (85%) create mode 100644 raster/r.kappa/print_json.c create mode 100644 raster/r.kappa/print_kappa.c rename raster/r.kappa/{prt_label.c => print_label.c} (81%) rename raster/r.kappa/{prt_mat.c => print_mat.c} (51%) delete mode 100644 raster/r.kappa/prt2csv_mat.c delete mode 100644 raster/r.kappa/sum.c diff --git a/raster/r.kappa/calc_kappa.c b/raster/r.kappa/calc_kappa.c deleted file mode 100644 index 0d252110da7..00000000000 --- a/raster/r.kappa/calc_kappa.c +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include "kappa.h" -#include "local_proto.h" - - -void calc_kappa(void) -{ - int i, j; - int a_i, b_i; - int s, l; - size_t ns; - double *pi, *pj, *pii, p0, pC; - double kp, vkp, *kpp; - double obs, inter1, inter2; - long total; - FILE *fd; - - /* initialize */ - s = 0; - l = -1; - ns = nstats; - obs = 0; - inter1 = inter2 = 0; - p0 = pC = 0; - - if (output == NULL) - fd = stdout; - else if ((fd = fopen(output, "a")) == NULL) { - G_fatal_error(_("Cannot open file <%s> to write kappa and relevant parameters"), - output); - return; - } - - total = count_sum(&s, l); - - /* calculate the parameters of the kappa-calculation */ - pi = (double *)G_calloc(ncat, sizeof(double)); - pj = (double *)G_calloc(ncat, sizeof(double)); - pii = (double *)G_calloc(ncat, sizeof(double)); - kpp = (double *)G_calloc(ncat, sizeof(double)); - - for (i = 0; i < ncat; i++) { - for (j = 0; j < ns; j++) { - if (Gstats[j].cats[0] == rlst[i]) - pi[i] += Gstats[j].count; - - if (Gstats[j].cats[1] == rlst[i]) - pj[i] += Gstats[j].count; - - if ((Gstats[j].cats[0] == Gstats[j].cats[1]) && - (Gstats[j].cats[0] == rlst[i])) - pii[i] += Gstats[j].count; - } - obs += pii[i]; - } - - for (i = 0; i < ncat; i++) { - pi[i] = pi[i] / total; - pj[i] = pj[i] / total; - pii[i] = pii[i] / total; - p0 += pii[i]; - pC += pi[i] * pj[i]; - } - - for (i = 0; i < ncat; i++) { - if (pi[i] == 0) - kpp[i] = -999; - else - kpp[i] = (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); - } - - /* print out the comission and omission accuracy, and conditional kappa */ - fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); - for (i = 0; i < ncat; i++) { - fprintf(fd, "%ld\t", rlst[i]); - if (pi[i] == 0) - fprintf(fd, "NA\t\t"); - else - fprintf(fd, "%f\t", 100 * (1 - pii[i] / pi[i])); - if (pj[i] == 0) - fprintf(fd, "NA\t\t"); - else - fprintf(fd, "%f\t", 100 * (1 - pii[i] / pj[i])); - if (kpp[i] == -999) - fprintf(fd, "NA\n"); - else - fprintf(fd, "%f\n", kpp[i]); - } - fprintf(fd, "\n"); - - for (i = 0; i < ncat; i++) { - inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); - } - - for (j = 0; j < ns; j++) { - if (Gstats[j].cats[0] != Gstats[j].cats[1]) { - for (i = 0; i < ncat; i++) { - if (Gstats[j].cats[0] == rlst[i]) - a_i = i; - if (Gstats[j].cats[1] == rlst[i]) - b_i = i; - } - inter2 += Gstats[j].count * pow((pi[a_i] + pj[b_i]), 2.) / total; - } - } - kp = (p0 - pC) / (1 - pC); - vkp = (inter1 + pow((1 - p0), 2.) * inter2 - - pow((p0 * pC - 2 * pC + p0), 2.)) / pow((1 - pC), 4.) / total; - fprintf(fd, "Kappa\t\tKappa Variance\n"); - fprintf(fd, "%f\t%f\n", kp, vkp); - - fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); - fprintf(fd, "%ld\t\t%ld\t\t%f\n", (long)obs, total, (100. * obs / total)); - if (output != NULL) - fclose(fd); - G_free(pi); - G_free(pj); - G_free(pii); - G_free(kpp); - /* print labels for categories of maps */ - prt_label(); -} diff --git a/raster/r.kappa/calc_metrics.c b/raster/r.kappa/calc_metrics.c new file mode 100644 index 00000000000..7b772a06629 --- /dev/null +++ b/raster/r.kappa/calc_metrics.c @@ -0,0 +1,215 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + +static int longcomp(const void *aa, const void *bb); +static int collapse(long *l, int n); + +void calc_metrics(void) +{ + int i, j, k; + size_t l; + long *clst; + int ncat1, ncat2; + int cndx; + double *pi, *pj, *pii; + double p0 = 0.0, pC = 0.0; + double inter1 = 0.0, inter2 = 0.0; + int a_i = 0, b_i = 0; + + metrics = (METRICS *)G_malloc(sizeof(METRICS)); + if (nstats == 0) { + G_warning(_("Both maps have nothing in common. Check the computational " + "region.")); + metrics->observations = 0; + metrics->correct = 0; + metrics->overall_accuracy = 0.0; + metrics->kappa = na_value; + metrics->kappa_variance = na_value; + return; + } + + /* get the cat lists */ + rlst = (long *)G_calloc(nstats * 2, sizeof(long)); + clst = (long *)G_calloc(nstats, sizeof(long)); + for (l = 0; l < nstats; l++) { + rlst[l] = Gstats[l].cats[0]; + clst[l] = Gstats[l].cats[1]; + } + + /* sort the cat lists */ + qsort(rlst, nstats, sizeof(long), longcomp); + qsort(clst, nstats, sizeof(long), longcomp); + + /* remove repeated cats */ + ncat1 = collapse(rlst, nstats); + ncat2 = collapse(clst, nstats); + + /* copy clst to the end of rlst, remove repeated cats, and free unused + * memory */ + for (i = 0; i < ncat2; i++) + rlst[ncat1 + i] = clst[i]; + qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); + ncat = collapse(rlst, ncat1 + ncat2); + rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); + G_free(clst); + + /* fill matrix with observed counts */ + metrics->matrix = (long *)G_malloc((size_t)ncat * ncat * sizeof(long)); + for (i = 0; i < ncat * ncat; i++) + metrics->matrix[i] = 0; + for (l = 0; l < nstats; l++) { + for (j = 0; j < ncat; j++) + if (rlst[j] == Gstats[l].cats[0]) + break; + for (k = 0; k < ncat; k++) + if (rlst[k] == Gstats[l].cats[1]) + break; + /* matrix: reference in columns, classification in rows */ + metrics->matrix[j * ncat + k] = Gstats[l].count; + } + + /* Calculate marginals */ + metrics->observations = 0; + metrics->correct = 0; + metrics->col_sum = (long *)G_malloc(ncat * sizeof(long)); + metrics->row_sum = (long *)G_malloc(ncat * sizeof(long)); + for (cndx = 0; cndx < ncat; cndx++) { + long t_col = 0; + long t_row = 0; + long x = cndx; + + for (k = 0; k < ncat; k++) { + t_col += metrics->matrix[x]; + x += ncat; + t_row += metrics->matrix[cndx * ncat + k]; + } + metrics->observations += t_row; + metrics->col_sum[cndx] = t_col; + metrics->row_sum[cndx] = t_row; + } + if (metrics->observations == 0) { + metrics->overall_accuracy = 0.0; + metrics->kappa = na_value; + metrics->kappa_variance = na_value; + return; + } + + /* Calculate kappa values */ + /* Row sum */ + pi = (double *)G_calloc(ncat, sizeof(double)); + /* Col sum */ + pj = (double *)G_calloc(ncat, sizeof(double)); + /* Correct */ + pii = (double *)G_calloc(ncat, sizeof(double)); + metrics->conditional_kappa = (double *)G_calloc(ncat, sizeof(double)); + metrics->users_accuracy = (double *)G_calloc(ncat, sizeof(double)); + metrics->producers_accuracy = (double *)G_calloc(ncat, sizeof(double)); + + for (i = 0; i < ncat; i++) { + for (l = 0; l < nstats; l++) { + if (Gstats[l].cats[0] == rlst[i]) { + pi[i] += Gstats[l].count; + } + + if (Gstats[l].cats[1] == rlst[i]) { + pj[i] += Gstats[l].count; + } + + if ((Gstats[l].cats[0] == Gstats[l].cats[1]) && + (Gstats[l].cats[0] == rlst[i])) { + pii[i] += Gstats[l].count; + } + } + metrics->correct += pii[i]; + } + + metrics->overall_accuracy = 100. * metrics->correct / metrics->observations; + + /* turn observations into probabilities */ + for (i = 0; i < ncat; i++) { + pi[i] = pi[i] / metrics->observations; + pj[i] = pj[i] / metrics->observations; + pii[i] = pii[i] / metrics->observations; + if (pi[i] == 0) + metrics->users_accuracy[i] = na_value; + else + metrics->users_accuracy[i] = 100 * (pii[i] / pi[i]); + if (pj[i] == 0) + metrics->producers_accuracy[i] = na_value; + else + metrics->producers_accuracy[i] = 100 * (pii[i] / pj[i]); + /* theta 1 */ + p0 += pii[i]; + /* theta 2 */ + pC += pi[i] * pj[i]; + } + if (pC != 1) + metrics->kappa = (p0 - pC) / (1 - pC); + else + metrics->kappa = na_value; + + /* conditional user's kappa */ + for (i = 0; i < ncat; i++) { + if (pi[i] == 0 || (pi[i] == 1 && pj[i] == 1)) + metrics->conditional_kappa[i] = na_value; + else + metrics->conditional_kappa[i] = + (pii[i] - pi[i] * pj[i]) / (pi[i] - pi[i] * pj[i]); + inter1 += pii[i] * pow(((1 - pC) - (1 - p0) * (pi[i] + pj[i])), 2.); + } + + /* kappa variance */ + for (l = 0; l < nstats; l++) { + if (Gstats[l].cats[0] != Gstats[l].cats[1]) { + for (i = 0; i < ncat; i++) { + if (Gstats[l].cats[0] == rlst[i]) + a_i = i; + if (Gstats[l].cats[1] == rlst[i]) + b_i = i; + } + inter2 += Gstats[l].count * pow((pi[a_i] + pj[b_i]), 2.) / + metrics->observations; + } + } + metrics->kappa_variance = (inter1 + pow((1 - p0), 2.) * inter2 - + pow((p0 * pC - 2 * pC + p0), 2.)) / + pow((1 - pC), 4.) / metrics->observations; + + G_free(pi); + G_free(pj); + G_free(pii); +}; + +/* remove repeated values */ +static int collapse(long *l, int n) +{ + long *c; + int m; + + c = l; + m = 1; + while (n-- > 0) { + if (*c != *l) { + c++; + *c = *l; + m++; + } + l++; + } + + return m; +} + +static int longcomp(const void *aa, const void *bb) +{ + const long *a = aa; + const long *b = bb; + + if (*a < *b) + return -1; + + return (*a > *b); +} diff --git a/raster/r.kappa/kappa.h b/raster/r.kappa/kappa.h index 1519c9b01f7..7e20092f52b 100644 --- a/raster/r.kappa/kappa.h +++ b/raster/r.kappa/kappa.h @@ -15,6 +15,21 @@ struct _layer_ struct Categories labels; }; +struct _metrics_ +{ + long observations; + long correct; + long *matrix; + long *row_sum; + long *col_sum; + double overall_accuracy; + double *producers_accuracy; + double *users_accuracy; + double kappa; + double kappa_variance; + double *conditional_kappa; +}; + extern struct Cell_head window; extern const char *maps[2]; @@ -32,3 +47,8 @@ extern int nlayers; #define GSTATS struct _gstats_ extern GSTATS *Gstats; extern size_t nstats; + +#define METRICS struct _metrics_ +extern METRICS *metrics; + +static const double na_value = -999.0; diff --git a/raster/r.kappa/local_proto.h b/raster/r.kappa/local_proto.h index 5f87285716f..8e08f5343ff 100644 --- a/raster/r.kappa/local_proto.h +++ b/raster/r.kappa/local_proto.h @@ -1,23 +1,26 @@ -/* calc_kappa.c */ -void calc_kappa(void); +/* print_kappa.c */ +void print_kappa(void); /* mask.c */ char *maskinfo(void); -/* prt_hdr.c */ -void prn_header(void); +/* print_hdr.c */ +void print_header(void); -/* prt_label.c */ -void prt_label(void); +/* print_label.c */ +void print_label(void); -/* prt_mat.c */ -void prn_error_mat(int out_cols, int hdr); +/* print_mat.c */ +void print_error_mat(int out_cols, int hdr); -/* prt2csv_mat.c */ -void prn2csv_error_mat(int out_cols, int hdr); +/* print2csv_mat.c */ +void print2csv_error_mat(int hdr); + +/* print_json.c */ +void print_json(void); /* stats.c */ int stats(void); -/* sum.c */ -long count_sum(int *ns, int n1); +/* calc_metrics.c */ +void calc_metrics(void); diff --git a/raster/r.kappa/main.c b/raster/r.kappa/main.c index da6e276e0b3..a7501262ae6 100644 --- a/raster/r.kappa/main.c +++ b/raster/r.kappa/main.c @@ -4,16 +4,17 @@ * MODULE: r.kappa * AUTHOR(S): Tao Wen, UIUC (original contributor) * Markus Neteler , - * Roberto Flor , - * Bernhard Reiter , - * Brad Douglas , - * Glynn Clements , - * Jachym Cepicky , + * Roberto Flor , + * Bernhard Reiter , + * Brad Douglas , + * Glynn Clements , + * Jachym Cepicky , * Jan-Oliver Wagner + * Maris Nartiss * PURPOSE: tabulates the error matrix of classification result by - * crossing classified map layer with respect to reference map + * crossing classified map layer with respect to reference map * layer - * COPYRIGHT: (C) 1999-2006 by the GRASS Development Team + * COPYRIGHT: (C) 1999-2022 by the GRASS Development Team * * This program is free software under the GNU General Public * License (>=v2). Read the file COPYING that comes with GRASS @@ -45,6 +46,8 @@ int nlayers; GSTATS *Gstats; size_t nstats; +METRICS *metrics; + /* function prototypes */ static void layer(const char *s); @@ -54,7 +57,7 @@ int main(int argc, char **argv) struct GModule *module; struct { - struct Option *map, *ref, *output, *titles; + struct Option *map, *ref, *output, *titles, *format; } parms; struct @@ -97,6 +100,17 @@ int main(int argc, char **argv) parms.titles->answer = "ACCURACY ASSESSMENT"; parms.titles->guisection = _("Output settings"); + parms.format = G_define_option(); + parms.format->key = "format"; + parms.format->type = TYPE_STRING; + parms.format->required = YES; + parms.format->label = _("Output format"); + parms.format->options = "plain,json"; + parms.format->descriptions = "plain;Plain text output;" + "json;JSON (JavaScript Object Notation);"; + parms.format->answer = "plain"; + parms.format->guisection = _("Output settings"); + flags.w = G_define_flag(); flags.w->key = 'w'; flags.w->label = _("Wide report"); @@ -116,6 +130,11 @@ int main(int argc, char **argv) if (G_parser(argc, argv)) exit(EXIT_FAILURE); + if (strcmp(parms.format->answer, "json") == 0 && + (flags.m->answer || flags.h->answer || flags.w->answer)) + G_warning(_("When JSON output format is requested, all formatting " + "flags are ignored")); + G_get_window(&window); maps[0] = parms.ref->answer; @@ -129,26 +148,29 @@ int main(int argc, char **argv) /* run r.stats to obtain statistics of map layers */ stats(); + /* calculate metrics from stats */ + calc_metrics(); - if (flags.m->answer) { - /* prepare the data for calculation */ - prn2csv_error_mat(2048, flags.h->answer); + if (strcmp(parms.format->answer, "json") == 0) { + print_json(); + } + else if (flags.m->answer) { + print2csv_error_mat(flags.h->answer); } else { /* print header of the output */ if (!flags.h->answer) - prn_header(); + print_header(); /* prepare the data for calculation */ - prn_error_mat(flags.w->answer ? 132 : 80, flags.h->answer); + print_error_mat(flags.w->answer ? 132 : 80, flags.h->answer); /* generate the error matrix, kappa and variance */ - calc_kappa(); + print_kappa(); } return EXIT_SUCCESS; } - static void layer(const char *s) { char name[GNAME_MAX]; @@ -160,7 +182,7 @@ static void layer(const char *s) G_fatal_error(_("Raster map <%s> not found"), s); n = nlayers++; - layers = (LAYER *) G_realloc(layers, 2 * sizeof(LAYER)); + layers = (LAYER *)G_realloc(layers, 2 * sizeof(LAYER)); layers[n].name = G_store(name); layers[n].mapset = mapset; Rast_read_cats(name, mapset, &layers[n].labels); diff --git a/raster/r.kappa/mask.c b/raster/r.kappa/mask.c index 3f3e28ea9c2..729f7c5b13a 100644 --- a/raster/r.kappa/mask.c +++ b/raster/r.kappa/mask.c @@ -9,7 +9,6 @@ static char *append(char *results, char *text); static void do_text(char *text, long first, long last); static int reclass_text(char *text, struct Reclass *reclass, int next); - char *maskinfo(void) { struct Reclass reclass; diff --git a/raster/r.kappa/print2csv_mat.c b/raster/r.kappa/print2csv_mat.c new file mode 100644 index 00000000000..8365d6c1cf0 --- /dev/null +++ b/raster/r.kappa/print2csv_mat.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + +void print2csv_error_mat(int hdr) +{ + int j; + + int cndx, rndx; + int first_col, last_col; + int thisone; + FILE *fd; + + long *cats; + char *cl; + + if (output != NULL) { + if (hdr) + fd = fopen(output, "w"); + else + fd = fopen(output, "a"); + } + else + fd = stdout; + + if (fd == NULL) + G_fatal_error( + _("Cannot open file <%s> to write cats and counts (error matrix)"), + output); + else { + /* format and print out the error matrix in panels */ + first_col = 0; + last_col = ncat; + /* name line */ + /*fprintf(fd, "\t\t\t MAP1\n"); */ + /* cat line */ + fprintf(fd, "cat#\t"); + /* print labels MAP1 */ + for (j = 0; j < ncat; j++) { + cats = rlst; + cl = Rast_get_c_cat((CELL *)&(cats[j]), &(layers[0].labels)); + if (cl) + G_strip(cl); + if (cl == NULL || *cl == 0) + fprintf(fd, "%ld\t", cats[j]); + else + fprintf(fd, "%s\t", cl); + } + /*for (cndx = first_col; cndx < last_col; cndx++) */ + /* fprintf(fd, "%ld\t", rlst[cndx]); */ + fprintf(fd, "RowSum"); + fprintf(fd, "\n"); + /* body of the matrix */ + for (rndx = 0; rndx < ncat; rndx++) { + cats = rlst; + cl = Rast_get_c_cat((CELL *)&(cats[rndx]), &(layers[1].labels)); + if (cl) + G_strip(cl); + if (cl == NULL || *cl == 0) + fprintf(fd, "%ld\t", cats[rndx]); + else + fprintf(fd, "%s\t", cl); + /* entries */ + for (cndx = first_col; cndx < last_col; cndx++) { + thisone = (ncat * rndx) + cndx; + fprintf(fd, "%ld\t", metrics->matrix[thisone]); + } + /* row marginal summation */ + fprintf(fd, "%ld", metrics->row_sum[rndx]); + fprintf(fd, "\n"); + } + /* column marginal summation */ + fprintf(fd, "ColSum\t"); + for (cndx = first_col; cndx < last_col; cndx++) { + fprintf(fd, "%ld\t", metrics->col_sum[cndx]); + } + /* grand total */ + fprintf(fd, "%ld", metrics->observations); + fprintf(fd, "\n\n"); + if (output != NULL) + fclose(fd); + } +} diff --git a/raster/r.kappa/prt_hdr.c b/raster/r.kappa/print_header.c similarity index 85% rename from raster/r.kappa/prt_hdr.c rename to raster/r.kappa/print_header.c index baa7378c5f3..eed803ae660 100644 --- a/raster/r.kappa/prt_hdr.c +++ b/raster/r.kappa/print_header.c @@ -4,8 +4,7 @@ #include "kappa.h" #include "local_proto.h" - -void prn_header(void) +void print_header(void) { int i, len; char buf[1024], *titles, *label; @@ -14,10 +13,8 @@ void prn_header(void) if (output == NULL) fd = stdout; - else if ((fd = fopen(output, "w")) == NULL) { + else if ((fd = fopen(output, "w")) == NULL) G_fatal_error(_("Cannot open file <%s> to write header"), output); - return; - } /* print header */ fprintf(fd, "\t\t\t%s\n", title); @@ -37,8 +34,8 @@ void prn_header(void) G_strip(titles); if (titles == NULL || *titles == 0) titles = "(untitled)"; - sprintf(buf, "%*s%-*s%d = %s (%s in %s)", i * 6, "", len, label, - i + 1, titles, layers[i].name, layers[i].mapset); + sprintf(buf, "%*s%-*s%d = %s (%s in %s)", i * 6, "", len, label, i + 1, + titles, layers[i].name, layers[i].mapset); fprintf(fd, "%s\n", buf); } diff --git a/raster/r.kappa/print_json.c b/raster/r.kappa/print_json.c new file mode 100644 index 00000000000..45e7c4b1e86 --- /dev/null +++ b/raster/r.kappa/print_json.c @@ -0,0 +1,125 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + +void print_json() +{ + bool first; + FILE *fd; + + if (output != NULL) + fd = fopen(output, "w"); + else + fd = stdout; + + if (fd == NULL) + G_fatal_error(_("Cannot open file <%s> to write JSON output"), output); + + fprintf(fd, "{\n"); + fprintf(fd, " \"reference\": \"%s\",\n", maps[0]); + fprintf(fd, " \"classification\": \"%s\",\n", maps[1]); + fprintf(fd, " \"observations\": %ld,\n", metrics->observations); + fprintf(fd, " \"correct\": %ld,\n", metrics->correct); + fprintf(fd, " \"overall_accuracy\": %.5f,\n", metrics->overall_accuracy); + if (metrics->kappa == na_value) + fprintf(fd, " \"kappa\": null,\n"); + else + fprintf(fd, " \"kappa\": %.5f,\n", metrics->kappa); + if (metrics->kappa_variance == na_value) + fprintf(fd, " \"kappa_variance\": null,\n"); + else + fprintf(fd, " \"kappa_variance\": %.5f,\n", metrics->kappa_variance); + fprintf(fd, " \"cats\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", rlst[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"matrix\": [\n ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, "],\n ["); + bool cfirst = 1; + + for (int j = 0; j < ncat; j++) { + if (cfirst) + cfirst = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", metrics->matrix[ncat * i + j]); + } + } + fprintf(fd, "]\n ],\n"); + fprintf(fd, " \"row_sum\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", metrics->row_sum[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"col_sum\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + fprintf(fd, "%ld", metrics->col_sum[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"producers_accuracy\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + if (metrics->producers_accuracy[i] == na_value) + fprintf(fd, "null"); + else + fprintf(fd, "%.5f", metrics->producers_accuracy[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"users_accuracy\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + if (metrics->users_accuracy[i] == na_value) + fprintf(fd, "null"); + else + fprintf(fd, "%.5f", metrics->users_accuracy[i]); + } + fprintf(fd, "],\n"); + fprintf(fd, " \"conditional_kappa\": ["); + first = 1; + for (int i = 0; i < ncat; i++) { + if (first) + first = 0; + else + fprintf(fd, ", "); + if (metrics->conditional_kappa[i] == na_value) + fprintf(fd, "null"); + else + fprintf(fd, "%.5f", metrics->conditional_kappa[i]); + } + fprintf(fd, "]\n"); + + fprintf(fd, "}\n"); + if (output != NULL) + fclose(fd); +} diff --git a/raster/r.kappa/print_kappa.c b/raster/r.kappa/print_kappa.c new file mode 100644 index 00000000000..d47f0b327cd --- /dev/null +++ b/raster/r.kappa/print_kappa.c @@ -0,0 +1,55 @@ +#include +#include +#include +#include "kappa.h" +#include "local_proto.h" + +void print_kappa(void) +{ + int i; + FILE *fd; + + if (output == NULL) + fd = stdout; + else if ((fd = fopen(output, "a")) == NULL) + G_fatal_error( + _("Cannot open file <%s> to write kappa and relevant parameters"), + output); + + /* print out the comission and omission accuracy, and conditional kappa */ + fprintf(fd, "\nCats\t%% Comission\t%% Omission\tEstimated Kappa\n"); + for (i = 0; i < ncat; i++) { + fprintf(fd, "%ld\t", rlst[i]); + if (metrics->users_accuracy[i] == na_value) + fprintf(fd, "NA\t\t"); + else + fprintf(fd, "%f\t", 100 - metrics->users_accuracy[i]); + if (metrics->producers_accuracy[i] == na_value) + fprintf(fd, "NA\t\t"); + else + fprintf(fd, "%f\t", 100 - metrics->producers_accuracy[i]); + if (metrics->conditional_kappa[i] == na_value) + fprintf(fd, "NA\n"); + else + fprintf(fd, "%f\n", metrics->conditional_kappa[i]); + } + fprintf(fd, "\n"); + fprintf(fd, "Kappa\t\tKappa Variance\n"); + if (metrics->kappa == na_value) + fprintf(fd, "NA"); + else + fprintf(fd, "%f", metrics->kappa); + if (metrics->kappa_variance == na_value) + fprintf(fd, "\tNA\n"); + else + fprintf(fd, "\t%f\n", metrics->kappa_variance); + + fprintf(fd, "\nObs Correct\tTotal Obs\t%% Observed Correct\n"); + fprintf(fd, "%ld\t\t%ld\t\t%f\n", metrics->correct, metrics->observations, + metrics->overall_accuracy); + if (output != NULL) + fclose(fd); + + /* print labels for categories of maps */ + print_label(); +} diff --git a/raster/r.kappa/prt_label.c b/raster/r.kappa/print_label.c similarity index 81% rename from raster/r.kappa/prt_label.c rename to raster/r.kappa/print_label.c index b55f6b4b961..d4f37303197 100644 --- a/raster/r.kappa/prt_label.c +++ b/raster/r.kappa/print_label.c @@ -3,8 +3,7 @@ #include "kappa.h" #include "local_proto.h" - -void prt_label(void) +void print_label(void) { int i, j; long *cats; @@ -13,10 +12,8 @@ void prt_label(void) if (output == NULL) fd = stdout; - else if ((fd = fopen(output, "a")) == NULL) { + else if ((fd = fopen(output, "a")) == NULL) G_fatal_error(_("Can't open file <%s> to write label"), output); - return; - } /* print labels */ for (i = 0; i < nlayers; i++) { @@ -24,7 +21,7 @@ void prt_label(void) fprintf(fd, "MAP%-d Category Description\n", i + 1); for (j = 0; j < ncat; j++) { cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[j]), &(layers[i].labels)); + cl = Rast_get_c_cat((CELL *)&(cats[j]), &(layers[i].labels)); if (cl) G_strip(cl); if (cl == NULL || *cl == 0) diff --git a/raster/r.kappa/prt_mat.c b/raster/r.kappa/print_mat.c similarity index 51% rename from raster/r.kappa/prt_mat.c rename to raster/r.kappa/print_mat.c index 1314e9a15c4..9c98e45cb2c 100644 --- a/raster/r.kappa/prt_mat.c +++ b/raster/r.kappa/print_mat.c @@ -4,24 +4,14 @@ #include "kappa.h" #include "local_proto.h" - -static int longcomp(const void *aa, const void *bb); -static int collapse(long *l, int n); - - -void prn_error_mat(int out_cols, int hdr) +void print_error_mat(int out_cols, int hdr) { - int i, j, k; - int ncat1, ncat2; - long x; - long *clst; - int num_panels, at_panel; int cndx, rndx; int first_col = 0, last_col = 0; int addflag = 0; int thisone; - long t_row, t_col; + long t_row; long t_rowcount, grand_count; const char *mapone; FILE *fd; @@ -35,59 +25,17 @@ void prn_error_mat(int out_cols, int hdr) else fd = stdout; - if (fd == NULL) { - G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), - output); - return; - } + if (fd == NULL) + G_fatal_error( + _("Cannot open file <%s> to write cats and counts (error matrix)"), + output); else { - /* get the cat lists */ - rlst = (long *)G_calloc(nstats * 2, sizeof(long)); - clst = (long *)G_calloc(nstats, sizeof(long)); - for (i = 0; i < nstats; i++) { - rlst[i] = Gstats[i].cats[0]; - clst[i] = Gstats[i].cats[1]; - } - - /* sort the cat lists */ - qsort(rlst, nstats, sizeof(long), longcomp); - qsort(clst, nstats, sizeof(long), longcomp); - - /* remove repeated cats */ - ncat1 = collapse(rlst, nstats); - ncat2 = collapse(clst, nstats); - - /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ - for (i = 0; i < ncat2; i++) - rlst[ncat1 + i] = clst[i]; - qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); - ncat = collapse(rlst, ncat1 + ncat2); - rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); - G_free(clst); - - /* allocate matrix and fill in with cats' value */ - matr = (long *)G_malloc(ncat * ncat * sizeof(long)); - for (i = 0; i < ncat * ncat; i++) - matr[i] = 0; - for (i = 0; i < nstats; i++) { - for (j = 0; j < ncat; j++) - if (rlst[j] == Gstats[i].cats[0]) - break; - for (k = 0; k < ncat; k++) - if (rlst[k] == Gstats[i].cats[1]) - break; - /* matrix: reference in columns, classification in rows */ - matr[j * ncat + k] = Gstats[i].count; - } - /* format and print out the error matrix in panels */ out_cols = (out_cols == 132) ? 9 : 5; num_panels = ncat / out_cols; if (ncat % out_cols) num_panels++; - t_rowcount = 0; - fprintf(fd, - "\nError Matrix (MAP1: reference, MAP2: classification)\n"); + fprintf(fd, "\nError Matrix (MAP1: reference, MAP2: classification)\n"); for (at_panel = 0; at_panel < num_panels; at_panel++) { first_col = at_panel * out_cols; @@ -95,7 +43,8 @@ void prn_error_mat(int out_cols, int hdr) if (last_col >= ncat) { last_col = ncat; } - /* determine whether room available for row total at the end of last panel */ + /* determine whether room available for row total at the end of last + * panel */ addflag = 0; if (at_panel == (num_panels - 1) && (last_col - first_col) < (out_cols - 1)) { @@ -122,32 +71,22 @@ void prn_error_mat(int out_cols, int hdr) /* entries */ for (cndx = first_col; cndx < last_col; cndx++) { thisone = (ncat * rndx) + cndx; - fprintf(fd, "%ld\t", matr[thisone]); + fprintf(fd, "%ld\t", metrics->matrix[thisone]); } /* row marginal summation */ if (addflag) { - t_row = 0; - for (k = 0; k < ncat; k++) - t_row += matr[rndx * ncat + k]; - t_rowcount += t_row; - fprintf(fd, "%ld", t_row); + fprintf(fd, "%ld", metrics->row_sum[rndx]); } fprintf(fd, "\n"); } /* column marginal summation */ fprintf(fd, "Col Sum\t\t"); for (cndx = first_col; cndx < last_col; cndx++) { - t_col = 0; - x = cndx; - for (k = 0; k < ncat; k++) { - t_col += matr[x]; - x += ncat; - } - fprintf(fd, "%ld\t", t_col); + fprintf(fd, "%ld\t", metrics->col_sum[cndx]); } /* grand total */ if (addflag) - fprintf(fd, "%ld", t_rowcount); + fprintf(fd, "%ld", metrics->observations); fprintf(fd, "\n\n"); } @@ -165,8 +104,8 @@ void prn_error_mat(int out_cols, int hdr) fprintf(fd, " %5ld", rlst[rndx]); for (cndx = first_col; cndx < last_col; cndx++) { thisone = (ncat * rndx) + cndx; - fprintf(fd, " %9ld ", matr[thisone]); - t_row += matr[thisone]; + fprintf(fd, " %9ld ", metrics->matrix[thisone]); + t_row += metrics->matrix[thisone]; } t_rowcount += t_row; grand_count += t_rowcount; @@ -174,41 +113,7 @@ void prn_error_mat(int out_cols, int hdr) } fprintf(fd, "%9ld\n", grand_count); } - G_free(matr); if (output != NULL) fclose(fd); } } - - -/* remove repeated values */ -static int collapse(long *l, int n) -{ - long *c; - int m; - - c = l; - m = 1; - while (n-- > 0) { - if (*c != *l) { - c++; - *c = *l; - m++; - } - l++; - } - - return m; -} - - -static int longcomp(const void *aa, const void *bb) -{ - const long *a = aa; - const long *b = bb; - - if (*a < *b) - return -1; - - return (*a > *b); -} diff --git a/raster/r.kappa/prt2csv_mat.c b/raster/r.kappa/prt2csv_mat.c deleted file mode 100644 index 4f208b5838b..00000000000 --- a/raster/r.kappa/prt2csv_mat.c +++ /dev/null @@ -1,178 +0,0 @@ -#include -#include -#include -#include "kappa.h" -#include "local_proto.h" - - -static int longcomp(const void *aa, const void *bb); -static int collapse(long *l, int n); - - -void prn2csv_error_mat(int out_cols, int hdr) -{ - int i, j, k; - int ncat1, ncat2; - long x; - long *clst; - - int cndx, rndx; - int first_col = 0, last_col = 0; - int thisone; - long t_row, t_col; - long t_rowcount; - FILE *fd; - - long *cats; - char *cl; - - if (output != NULL) { - if (hdr) - fd = fopen(output, "w"); - else - fd = fopen(output, "a"); - } - else - fd = stdout; - - if (fd == NULL) { - G_fatal_error(_("Cannot open file <%s> to write cats and counts (error matrix)"), - output); - return; - } - else { - /* get the cat lists */ - rlst = (long *)G_calloc(nstats * 2, sizeof(long)); - clst = (long *)G_calloc(nstats, sizeof(long)); - for (i = 0; i < nstats; i++) { - rlst[i] = Gstats[i].cats[0]; - clst[i] = Gstats[i].cats[1]; - } - - /* sort the cat lists */ - qsort(rlst, nstats, sizeof(long), longcomp); - qsort(clst, nstats, sizeof(long), longcomp); - - /* remove repeated cats */ - ncat1 = collapse(rlst, nstats); - ncat2 = collapse(clst, nstats); - - /* copy clst to the end of rlst, remove repeated cats, and free unused memory */ - for (i = 0; i < ncat2; i++) - rlst[ncat1 + i] = clst[i]; - qsort(rlst, ncat1 + ncat2, sizeof(long), longcomp); - ncat = collapse(rlst, ncat1 + ncat2); - rlst = (long *)G_realloc(rlst, ncat * sizeof(long)); - G_free(clst); - - /* allocate matrix and fill in with cats' value */ - matr = (long *)G_malloc(ncat * ncat * sizeof(long)); - for (i = 0; i < ncat * ncat; i++) - matr[i] = 0; - for (i = 0; i < nstats; i++) { - for (j = 0; j < ncat; j++) - if (rlst[j] == Gstats[i].cats[0]) - break; - for (k = 0; k < ncat; k++) - if (rlst[k] == Gstats[i].cats[1]) - break; - /* matrix: reference in columns, classification in rows */ - matr[j * ncat + k] = Gstats[i].count; - } - - /* format and print out the error matrix in panels */ - out_cols = 2048; - t_rowcount = 0; - first_col = 0; - last_col = ncat; - /* name line */ - /*fprintf(fd, "\t\t\t MAP1\n"); */ - /* cat line */ - fprintf(fd, "cat#\t"); - /* print labels MAP1 */ - for (j = 0; j < ncat; j++) { - cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[j]), &(layers[0].labels)); - if (cl) - G_strip(cl); - if (cl == NULL || *cl == 0) - fprintf(fd, "%ld\t", cats[j]); - else - fprintf(fd, "%s\t", cl); - } - /*for (cndx = first_col; cndx < last_col; cndx++) */ - /* fprintf(fd, "%ld\t", rlst[cndx]); */ - fprintf(fd, "RowSum"); - fprintf(fd, "\n"); - /* body of the matrix */ - for (rndx = 0; rndx < ncat; rndx++) { - cats = rlst; - cl = Rast_get_c_cat((CELL *) & (cats[rndx]), &(layers[1].labels)); - if (cl) - G_strip(cl); - if (cl == NULL || *cl == 0) - fprintf(fd, "%ld\t", cats[rndx]); - else - fprintf(fd, "%s\t", cl); - /* entries */ - for (cndx = first_col; cndx < last_col; cndx++) { - thisone = (ncat * rndx) + cndx; - fprintf(fd, "%ld\t", matr[thisone]); - } - /* row marginal summation */ - t_row = 0; - for (k = 0; k < ncat; k++) - t_row += matr[rndx * ncat + k]; - t_rowcount += t_row; - fprintf(fd, "%ld", t_row); - fprintf(fd, "\n"); - } - /* column marginal summation */ - fprintf(fd, "ColSum\t"); - for (cndx = first_col; cndx < last_col; cndx++) { - t_col = 0; - x = cndx; - for (k = 0; k < ncat; k++) { - t_col += matr[x]; - x += ncat; - } - fprintf(fd, "%ld\t", t_col); - } - /* grand total */ - fprintf(fd, "%ld", t_rowcount); - fprintf(fd, "\n\n"); - G_free(matr); - if (output != NULL) - fclose(fd); - } -} - - -/* remove repeated values */ -static int collapse(long *l, int n) -{ - long *c; - int m; - - c = l; - m = 1; - while (n-- > 0) { - if (*c != *l) { - c++; - *c = *l; - m++; - } - l++; - } - - return m; -} - - -static int longcomp(const void *aa, const void *bb) -{ - const long *a = aa; - const long *b = bb; - - return (*a - *b); -} diff --git a/raster/r.kappa/r.kappa.html b/raster/r.kappa/r.kappa.html index 85b765493ee..32569b12933 100644 --- a/raster/r.kappa/r.kappa.html +++ b/raster/r.kappa/r.kappa.html @@ -16,9 +16,10 @@

DESCRIPTION

pixels are tabulated.

-The report will be write to an output file which is in +The report will be written to an output file which is in plain text format and named by user at prompt of running -the program. +the program. To obtain machine readable version, specify a +json output format.

The body of the report is arranged in panels. The @@ -33,8 +34,46 @@

DESCRIPTION

panel. There is a total at the bottom of each column representing the sum of all the rows in that column. +

OUTPUT VARIABLES

+

+All output variables (except kappa variance) have been +validated to produce correct values in accordance +to formulas given by Rossiter, D.G., 2004. "Technical Note: +Statistical methods for accuracy assessment of classified +thematic maps". +

+
Observations
+
Overall count of observed cells (sum of both correct + and incorrect ones).
+
Correct
+
Overall count of correct cells (cells with equal value + in reference and classification maps).
+
Overall accuracy
+
Number of correct cells divided by overall cell count + (expressed in percent).
+
User's accuracy
+
Share of correctly classified cells out of all cells + classified as belonging to specified class (expressed in percent). + Inverse of commission error.
+
Commission
+
Commission error = 100 - user's accuracy.
+
Producer's accuracy
+
Share of correctly classified cells out of all cells + known to belong to specified class (expressed in percent). + Inverse of omission error.
+
Omission
+
Omission error = 100 - producer's accuracy.
+
Kappa
+
Choen's kappa index value.
+
Kappa variance
+
Variance of kappa index. Correctness needs to be validated.
+
Conditional kappa
+
Conditional user's kappa for specified class.
+
+

NOTES

+

It is recommended to reclassify categories of classified result map layer into a more manageable number before running r.kappa on the classified raster map @@ -42,8 +81,11 @@

NOTES

information for each and every category.

-NA's in output file mean non-applicable in case -MASK exists. +NA's in output mean it was not possible to calculate the value +(e.g. calculation would involve division by zero). +In JSON output NA's are represented with value null. +If there is no overlap between both maps, a warning is printed and +output values are set to 0 or null respectively.

The Estimated kappa value in r.kappa is the value @@ -63,6 +105,11 @@

NOTES

  • Pj[i] is the probability of classification j having classified the point as i.
  • +

    +Some of reported values (Choen's kappa, overall accuracy) can be +misleading if cell count among classes is not balanced. See e.g. +Powers, D.M.W., 2012. The Problem with Kappa. +

    EXAMPLE

    Example for North Carolina sample dataset: @@ -93,6 +140,7 @@

    SEE ALSO

    r.stats -

    AUTHOR

    +

    AUTHORS

    -Tao Wen, University of Illinois at Urbana-Champaign, Illinois +Tao Wen, University of Illinois at Urbana-Champaign, Illinois
    +Maris Nartiss, University of Latvia (JSON output) diff --git a/raster/r.kappa/stats.c b/raster/r.kappa/stats.c index 55d8173d779..eb9b977d6b2 100644 --- a/raster/r.kappa/stats.c +++ b/raster/r.kappa/stats.c @@ -7,14 +7,12 @@ #include #include "local_proto.h" - static void die(void) { unlink(stats_file); G_fatal_error(_("Problem reading r.stats output")); } - int stats(void) { char buf[1024]; @@ -45,8 +43,7 @@ int stats(void) argv[argc++] = "separator=:"; - sprintf(buf, "input=%s,%s", - G_fully_qualified_name(mname, mmapset), + sprintf(buf, "input=%s,%s", G_fully_qualified_name(mname, mmapset), G_fully_qualified_name(rname, rmapset)); argv[argc++] = buf; @@ -72,7 +69,7 @@ int stats(void) tokens = G_tokenize(buf, ":"); i = 0; ns = nstats++; - Gstats = (GSTATS *) G_realloc(Gstats, nstats * sizeof(GSTATS)); + Gstats = (GSTATS *)G_realloc(Gstats, nstats * sizeof(GSTATS)); Gstats[ns].cats = (long *)G_calloc(nlayers, sizeof(long)); for (nl = 0; nl < nlayers; nl++) { if (sscanf(tokens[i++], "%ld", &Gstats[ns].cats[nl]) != 1) diff --git a/raster/r.kappa/sum.c b/raster/r.kappa/sum.c deleted file mode 100644 index 9ac48fa16aa..00000000000 --- a/raster/r.kappa/sum.c +++ /dev/null @@ -1,49 +0,0 @@ -#include "kappa.h" -#include "local_proto.h" - - -/* function prototypes */ -static int same_cats(int a, int b, int nl); - - -/* within group totals: - *ns is the first stat - (updated upon return to point to next stat) - nl is the layer number (or level) */ - -long count_sum(int *ns, int nl) -{ - long count; - int k, n; - - k = n = *ns; - count = 0; - - if (nl >= 0) { - while (n < nstats && same_cats(k, n, nl)) - count += Gstats[n++].count; - } - else { - while (n < nstats) - count += Gstats[n++].count; - } - - *ns = n; - - return count; -} - - -static int same_cats(int a, int b, int nl) -{ - long *cat_a, *cat_b; - - cat_a = Gstats[a].cats; - cat_b = Gstats[b].cats; - - while (nl-- >= 0) - if (*cat_a++ != *cat_b++) - return 0; - - return 1; -} diff --git a/raster/r.kappa/testsuite/test_r_kappa.py b/raster/r.kappa/testsuite/test_r_kappa.py index fc85a3c1a4e..d19a7a3b37d 100644 --- a/raster/r.kappa/testsuite/test_r_kappa.py +++ b/raster/r.kappa/testsuite/test_r_kappa.py @@ -11,11 +11,16 @@ import os import pathlib +import json + +from tempfile import NamedTemporaryFile from grass.script import read_command +from grass.script import decode from grass.script.core import tempname from grass.gunittest.case import TestCase from grass.gunittest.main import test +from grass.gunittest.checkers import keyvalue_equals class MatrixCorrectnessTest(TestCase): @@ -33,14 +38,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), output=cls.class_1, - quiet=True, ) @classmethod @@ -91,14 +94,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_1.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_1.ascii"), output=cls.class_1, - quiet=True, ) cls.per_class = { "producer": [ @@ -204,14 +205,12 @@ def setUpClass(cls): "r.in.ascii", input=os.path.join(cls.data_dir, "ref_2.ascii"), output=cls.ref_1, - quiet=True, ) cls.class_1 = tempname(10) cls.runModule( "r.in.ascii", input=os.path.join(cls.data_dir, "class_2.ascii"), output=cls.class_1, - quiet=True, ) cls.per_class = { "producer": [ @@ -230,8 +229,8 @@ def setUpClass(cls): def tearDownClass(cls): """Remove temporary data""" cls.del_temp_region() - # cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) - # cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.ref_1) + cls.runModule("g.remove", flags="f", type="raster", name=cls.class_1) def match(self, pat, ref): if pat == "NA" or ref == "NA": @@ -292,5 +291,197 @@ def test_standard_output(self): self.assertTrue(self.match(vals[2], 0.0)) +class JSONOutputTest(TestCase): + """Test printing of parameters in JSON format""" + + @classmethod + def setUpClass(cls): + """Import sample maps with known properties""" + cls.use_temp_region() + cls.runModule("g.region", n=5, s=0, e=5, w=0, res=1) + + cls.data_dir = os.path.join(pathlib.Path(__file__).parent.absolute(), "data") + cls.references = [] + cls.classifications = [] + cls.expected_outputs = [] + # Normal case + cls.references.append(tempname(10)) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "ref_1.ascii"), + output=cls.references[0], + ) + cls.classifications.append(tempname(10)) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "class_1.ascii"), + output=cls.classifications[0], + ) + + cls.expected_outputs.append( + { + "reference": cls.references[0], + "classification": cls.classifications[0], + "observations": 18, + "correct": 11, + "overall_accuracy": 61.111111, + "kappa": 0.52091, + "kappa_variance": 0.016871, + "cats": [1, 2, 3, 4, 5, 6], + "matrix": [ + [4, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 1, 4, 0, 0, 0], + [3, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 0], + [0, 2, 0, 0, 0, 2], + ], + "row_sum": [4, 0, 5, 4, 1, 4], + "col_sum": [7, 3, 4, 0, 1, 3], + "producers_accuracy": [57.1429, 0.0, 100.0, None, 100.0, 66.66666], + "users_accuracy": [100.0, None, 80.0, 0.0, 100.0, 50.0], + "conditional_kappa": [1.0, None, 0.742857, 0.0, 1.0, 0.400], + } + ) + + # Bad case with no correct matches + cls.references.append(tempname(10)) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "ref_2.ascii"), + output=cls.references[1], + ) + cls.classifications.append(tempname(10)) + cls.runModule( + "r.in.ascii", + input=os.path.join(cls.data_dir, "class_2.ascii"), + output=cls.classifications[1], + ) + cls.expected_outputs.append( + { + "reference": cls.references[1], + "classification": cls.classifications[1], + "observations": 25, + "correct": 0, + "overall_accuracy": 0.0, + "kappa": 0.0, + "kappa_variance": 0.0, + "cats": [0, 1, 2, 3, 4, 9], + "matrix": [ + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0], + [8, 8, 4, 1, 4, 0], + ], + "row_sum": [0, 0, 0, 0, 0, 25], + "col_sum": [8, 8, 4, 1, 4, 0], + "producers_accuracy": [0.0, 0.0, 0.0, 0.0, 0.0, None], + "users_accuracy": [None, None, None, None, None, 0.0], + "conditional_kappa": [None, None, None, None, None, 0.0], + } + ) + + # Degenerate case #1 + cls.references.append(tempname(10)) + cls.classifications.append(tempname(10)) + cls.runModule( + "r.mapcalc", + expression=f"{cls.references[2]}=null()", + ) + cls.runModule( + "r.mapcalc", + expression=f"{cls.classifications[2]}=null()", + ) + cls.expected_outputs.append( + { + "reference": cls.references[2], + "classification": cls.classifications[2], + "observations": 0, + "correct": 0, + "overall_accuracy": 0.0, + "kappa": None, + "kappa_variance": None, + "cats": [], + "matrix": [[]], + "row_sum": [], + "col_sum": [], + "producers_accuracy": [], + "users_accuracy": [], + "conditional_kappa": [], + } + ) + + # Degenerate case #2 + cls.references.append(tempname(10)) + cls.classifications.append(tempname(10)) + cls.runModule( + "r.mapcalc", + expression=f"{cls.references[3]}=1", + ) + cls.runModule( + "r.mapcalc", + expression=f"{cls.classifications[3]}=null()", + ) + cls.expected_outputs.append( + { + "reference": cls.references[3], + "classification": cls.classifications[3], + "observations": 0, + "correct": 0, + "overall_accuracy": 0.0, + "kappa": None, + "kappa_variance": None, + "cats": [], + "matrix": [[]], + "row_sum": [], + "col_sum": [], + "producers_accuracy": [], + "users_accuracy": [], + "conditional_kappa": [], + } + ) + + @classmethod + def tearDownClass(cls): + """Remove temporary data""" + cls.del_temp_region() + for reference in cls.references: + cls.runModule("g.remove", flags="f", type="raster", name=reference) + for classification in cls.classifications: + cls.runModule("g.remove", flags="f", type="raster", name=classification) + + def test_stdout(self): + for i in range(len(self.references)): + out = read_command( + "r.kappa", + reference=self.references[i], + classification=self.classifications[i], + format="json", + quiet=True, + ) + json_out = json.loads(decode(out)) + self.assertTrue( + keyvalue_equals(self.expected_outputs[i], json_out, precision=4) + ) + + def test_file(self): + for i in range(len(self.references)): + f = NamedTemporaryFile() + self.runModule( + "r.kappa", + reference=self.references[i], + classification=self.classifications[i], + output=f.name, + format="json", + overwrite=True, + ) + json_out = json.loads(f.read()) + self.assertTrue( + keyvalue_equals(self.expected_outputs[i], json_out, precision=4) + ) + + if __name__ == "__main__": test()