Skip to content

Commit

Permalink
feat: add percent error measure display
Browse files Browse the repository at this point in the history
  • Loading branch information
fantes authored and mergify[bot] committed Jan 12, 2023
1 parent a006615 commit 1cc15d6
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 4 deletions.
2 changes: 1 addition & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ No parameters
Parameter | Type | Optional | Default | Description
--------- | ---- | -------- | ------- | -----------
best | int | yes | 1 | Number of top predictions returned by data URI (supervised)
measure | array | yes | empty | Output measures requested, from `acc`: accuracy, `acc-k`: top-k accuracy, replace k with number (e.g. `acc-5`), `f1`: f1, precision and recall, `mcll`: multi-class log loss, `auc`: area under the curve, `cmdiag`: diagonal of confusion matrix (requires `f1`), `cmfull`: full confusion matrix (requires `f1`), `mcc`: Matthews correlation coefficient, `eucll`: euclidean distance (e.g. for regression tasks),`l1`: l1 distance (e.g. for regression tasks), `kl`: KL_divergence, `js`: JS divergence, `was`: Wasserstein, `ks`: Kolmogorov Smirnov, `dc`: distance correlation, `r2`: R2, `deltas`: delta scores, 'raw': ouput raw results, in case of predict call, this requires a special deploy.prototxt that is a test network (to have ground truth)
measure | array | yes | empty | Output measures requested, from `acc`: accuracy, `acc-k`: top-k accuracy, replace k with number (e.g. `acc-5`), `f1`: f1, precision and recall, `mcll`: multi-class log loss, `auc`: area under the curve, `cmdiag`: diagonal of confusion matrix (requires `f1`), `cmfull`: full confusion matrix (requires `f1`), `mcc`: Matthews correlation coefficient, `eucll`: euclidean distance (e.g. for regression tasks),`l1`: l1 distance (e.g. for regression tasks), `percent`: mean relative error in percent, `kl`: KL_divergence, `js`: JS divergence, `was`: Wasserstein, `ks`: Kolmogorov Smirnov, `dc`: distance correlation, `r2`: R2, `deltas`: delta scores, 'raw': ouput raw results, in case of predict call, this requires a special deploy.prototxt that is a test network (to have ground truth)
target_repository | string | yes | empty | target directory to which to copy the best model files once training has completed

#### Machine learning libraries
Expand Down
68 changes: 67 additions & 1 deletion src/supervisedoutputconnector.h
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,10 @@ namespace dd
find_presence_and_thres("eucll", measures, beucll, beucll_thres);
bool bl1 = (std::find(measures.begin(), measures.end(), "l1")
!= measures.end());
bool compute_all_distl = (beucll || bl1) && !autoencoder;
bool bpercent
= (std::find(measures.begin(), measures.end(), "percent")
!= measures.end());
bool compute_all_distl = (beucll || bl1 || bpercent) && !autoencoder;

bool bmcc = (std::find(measures.begin(), measures.end(), "mcc")
!= measures.end());
Expand Down Expand Up @@ -1111,6 +1114,16 @@ namespace dd
for (unsigned int i = 0; i < all_ml1.size(); ++i)
meas_out.add("l1_" + std::to_string(i), all_ml1[i]);
}
if (bpercent)
{
double mpercent;
std::vector<double> all_mpercent;
std::tie(mpercent, all_mpercent)
= percentl(ad_res, compute_all_distl);
meas_out.add("percent", mpercent);
for (unsigned int i = 0; i < all_mpercent.size(); ++i)
meas_out.add("percent_" + std::to_string(i), all_mpercent[i]);
}
if (bmcc)
{
double mmcc = mcc(ad_res);
Expand Down Expand Up @@ -2671,6 +2684,59 @@ namespace dd
return std::make_tuple(eucl / static_cast<double>(batch_size), all_eucl);
}

static std::tuple<double, std::vector<double>>
percentl(const APIData &ad, bool compute_all_distl)
{
double percent = 0.0;
unsigned int psize = ad.getobj(std::to_string(0))
.get("pred")
.get<std::vector<double>>()
.size();
std::vector<double> all_percent;
if (compute_all_distl)
all_percent.resize(psize, 0.0);
int batch_size = ad.get("batch_size").get<int>();
bool has_ignore = ad.has("ignore_label");

int ignore_label = -10000;
if (has_ignore)
ignore_label = ad.get("ignore_label").get<int>();

for (int i = 0; i < batch_size; i++)
{
APIData bad = ad.getobj(std::to_string(i));
std::vector<double> predictions
= bad.get("pred").get<std::vector<double>>();
std::vector<double> target;
if (predictions.size() > 1)
target = bad.get("target").get<std::vector<double>>();
else
target.push_back(bad.get("target").get<double>());
int reg_dim = predictions.size();
for (size_t j = 0; j < target.size(); j++)
{
int t = target.at(j);
if (has_ignore && t - static_cast<double>(ignore_label) < 1E-9)
continue;
double reldiff = fabs((predictions.at(j) - target.at(j)))
/ (fabs(target.at(j)) + 1E-9);
percent += reldiff / reg_dim;
if (compute_all_distl)
all_percent[j] += reldiff;
}
}

if (compute_all_distl)
for (unsigned int i = 0; i < all_percent.size(); ++i)
{
all_percent[i] /= static_cast<double>(batch_size);
all_percent[i] *= 100.0;
}

return std::make_tuple(percent * 100.0 / static_cast<double>(batch_size),
all_percent);
}

// measure: gini coefficient
static double comp_gini(const std::vector<double> &a,
const std::vector<double> &p)
Expand Down
6 changes: 4 additions & 2 deletions tests/ut-torchapi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2049,7 +2049,7 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false)
fileops::remove_dir(resnet50_train_repo + "test_0.lmdb");
}

TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)
TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1_percent)
{
setenv("CUBLAS_WORKSPACE_CONFIG", ":4096:8", true);
torch::manual_seed(torch_seed);
Expand Down Expand Up @@ -2080,7 +2080,7 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)
"\"resume\":false},"
"\"input\":{\"seed\":12345,\"db\":false,\"shuffle\":true,\"test_"
"split\":0.1},"
"\"output\":{\"measure\":[\"l1\"]}},\"data\":[\""
"\"output\":{\"measure\":[\"l1\",\"percent\"]}},\"data\":[\""
+ resnet50_train_data_reg2 + "\"]}";
joutstr = japi.jrender(japi.service_train(jtrainstr));
JDoc jd;
Expand All @@ -2091,6 +2091,8 @@ TEST(torchapi, service_train_images_split_regression_2dims_db_false_l1)

ASSERT_TRUE(jd["body"]["measure"]["iteration"] == 200) << "iterations";
ASSERT_TRUE(jd["body"]["measure"]["l1"].GetDouble() <= 15.0) << "l1";
ASSERT_TRUE(jd["body"]["measure"]["percent"].GetDouble() <= 200.0)
<< "percent";

std::unordered_set<std::string> lfiles;
fileops::list_directory(resnet50_train_repo, true, false, false, lfiles);
Expand Down

0 comments on commit 1cc15d6

Please sign in to comment.