feat: add percent error measure display

jolibrain · Jan 12, 2023 · 1cc15d6 · 1cc15d6
1 parent a006615
commit 1cc15d6
Show file tree

Hide file tree

Showing 3 changed files with 72 additions and 4 deletions.
diff --git a/docs/api.md b/docs/api.md
@@ -669,7 +669,7 @@ No parameters
 Parameter         | Type   | Optional | Default | Description
 ---------         | ----   | -------- | ------- | -----------
 best              | int    | yes      | 1       | Number of top predictions returned by data URI (supervised)
-measure           | array  | yes      | empty   | Output measures requested, from `acc`: accuracy, `acc-k`: top-k accuracy, replace k with number (e.g. `acc-5`), `f1`: f1, precision and recall, `mcll`: multi-class log loss, `auc`: area under the curve, `cmdiag`: diagonal of confusion matrix (requires `f1`), `cmfull`: full confusion matrix (requires `f1`), `mcc`: Matthews correlation coefficient, `eucll`: euclidean distance (e.g. for regression tasks),`l1`: l1 distance (e.g. for regression tasks), `kl`: KL_divergence, `js`: JS divergence, `was`: Wasserstein, `ks`: Kolmogorov Smirnov, `dc`: distance correlation, `r2`: R2, `deltas`: delta scores, 'raw': ouput raw results, in case of predict call, this requires a special deploy.prototxt that is a test network (to have ground truth)
+measure           | array  | yes      | empty   | Output measures requested, from `acc`: accuracy, `acc-k`: top-k accuracy, replace k with number (e.g. `acc-5`), `f1`: f1, precision and recall, `mcll`: multi-class log loss, `auc`: area under the curve, `cmdiag`: diagonal of confusion matrix (requires `f1`), `cmfull`: full confusion matrix (requires `f1`), `mcc`: Matthews correlation coefficient, `eucll`: euclidean distance (e.g. for regression tasks),`l1`: l1 distance (e.g. for regression tasks), `percent`: mean relative error in percent,  `kl`: KL_divergence, `js`: JS divergence, `was`: Wasserstein, `ks`: Kolmogorov Smirnov, `dc`: distance correlation, `r2`: R2, `deltas`: delta scores, 'raw': ouput raw results, in case of predict call, this requires a special deploy.prototxt that is a test network (to have ground truth)
 target_repository | string | yes      | empty   | target directory to which to copy the best model files once training has completed
 
 #### Machine learning libraries

diff --git a/src/supervisedoutputconnector.h b/src/supervisedoutputconnector.h
@@ -782,7 +782,10 @@ namespace dd
           find_presence_and_thres("eucll", measures, beucll, beucll_thres);
           bool bl1 = (std::find(measures.begin(), measures.end(), "l1")
                       != measures.end());
-          bool compute_all_distl = (beucll || bl1) && !autoencoder;
+          bool bpercent
+              = (std::find(measures.begin(), measures.end(), "percent")
+                 != measures.end());
+          bool compute_all_distl = (beucll || bl1 || bpercent) && !autoencoder;
 
           bool bmcc = (std::find(measures.begin(), measures.end(), "mcc")
                        != measures.end());
@@ -1111,6 +1114,16 @@ namespace dd
               for (unsigned int i = 0; i < all_ml1.size(); ++i)
                 meas_out.add("l1_" + std::to_string(i), all_ml1[i]);
             }
+          if (bpercent)
+            {
+              double mpercent;
+              std::vector<double> all_mpercent;
+              std::tie(mpercent, all_mpercent)
+                  = percentl(ad_res, compute_all_distl);
+              meas_out.add("percent", mpercent);
+              for (unsigned int i = 0; i < all_mpercent.size(); ++i)
+                meas_out.add("percent_" + std::to_string(i), all_mpercent[i]);
+            }
           if (bmcc)
             {
               double mmcc = mcc(ad_res);
@@ -2671,6 +2684,59 @@ namespace dd
       return std::make_tuple(eucl / static_cast<double>(batch_size), all_eucl);
     }
 
+    static std::tuple<double, std::vector<double>>
+    percentl(const APIData &ad, bool compute_all_distl)
+    {
+      double percent = 0.0;
+      unsigned int psize = ad.getobj(std::to_string(0))
+                               .get("pred")
+                               .get<std::vector<double>>()
+                               .size();
+      std::vector<double> all_percent;
+      if (compute_all_distl)
+        all_percent.resize(psize, 0.0);
+      int batch_size = ad.get("batch_size").get<int>();
+      bool has_ignore = ad.has("ignore_label");
+
+      int ignore_label = -10000;
+      if (has_ignore)
+        ignore_label = ad.get("ignore_label").get<int>();
+
+      for (int i = 0; i < batch_size; i++)
+        {
+          APIData bad = ad.getobj(std::to_string(i));
+          std::vector<double> predictions
+              = bad.get("pred").get<std::vector<double>>();
+          std::vector<double> target;
+          if (predictions.size() > 1)
+            target = bad.get("target").get<std::vector<double>>();
+          else
+            target.push_back(bad.get("target").get<double>());
+          int reg_dim = predictions.size();
+          for (size_t j = 0; j < target.size(); j++)
+            {
+              int t = target.at(j);
+              if (has_ignore && t - static_cast<double>(ignore_label) < 1E-9)
+                continue;
+              double reldiff = fabs((predictions.at(j) - target.at(j)))
+                               / (fabs(target.at(j)) + 1E-9);
+              percent += reldiff / reg_dim;
+              if (compute_all_distl)
+                all_percent[j] += reldiff;
+            }
+        }
+
+      if (compute_all_distl)
+        for (unsigned int i = 0; i < all_percent.size(); ++i)
+          {
+            all_percent[i] /= static_cast<double>(batch_size);
+            all_percent[i] *= 100.0;
+          }
+
+      return std::make_tuple(percent * 100.0 / static_cast<double>(batch_size),
+                             all_percent);
+    }
+
     // measure: gini coefficient
     static double comp_gini(const std::vector<double> &a,
                             const std::vector<double> &p)

diff --git a/tests/ut-torchapi.cc b/tests/ut-torchapi.cc
@@ -2049,7 +2049,7 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false)
   fileops::remove_dir(resnet50_train_repo + "test_0.lmdb");
 }
 
-TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)
+TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1_percent)
 {
   setenv("CUBLAS_WORKSPACE_CONFIG", ":4096:8", true);
   torch::manual_seed(torch_seed);
@@ -2080,7 +2080,7 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)
           "\"resume\":false},"
           "\"input\":{\"seed\":12345,\"db\":false,\"shuffle\":true,\"test_"
           "split\":0.1},"
-          "\"output\":{\"measure\":[\"l1\"]}},\"data\":[\""
+          "\"output\":{\"measure\":[\"l1\",\"percent\"]}},\"data\":[\""
         + resnet50_train_data_reg2 + "\"]}";
   joutstr = japi.jrender(japi.service_train(jtrainstr));
   JDoc jd;
@@ -2091,6 +2091,8 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)
 
   ASSERT_TRUE(jd["body"]["measure"]["iteration"] == 200) << "iterations";
   ASSERT_TRUE(jd["body"]["measure"]["l1"].GetDouble() <= 15.0) << "l1";
+  ASSERT_TRUE(jd["body"]["measure"]["percent"].GetDouble() <= 200.0)
+      << "percent";
 
   std::unordered_set<std::string> lfiles;
   fileops::list_directory(resnet50_train_repo, true, false, false, lfiles);