added support for conformal set outputs to prediction container

Cortys · Mar 14, 2022 · 68d58d0 · 68d58d0
1 parent 8a77445
commit 68d58d0
Show file tree

Hide file tree

Showing 6 changed files with 82 additions and 33 deletions.
diff --git a/Dockerfile.pred b/Dockerfile.pred
@@ -7,7 +7,7 @@ RUN go get -d -v
 RUN go build
 
 # TF model container:
-FROM tensorflow/tensorflow:2.5.0
+FROM tensorflow/tensorflow:2.8.0
 
 RUN apt-get update &&\
 	apt-get install -y git golang graphviz graphviz-dev &&\

diff --git a/README.md b/README.md
@@ -76,8 +76,11 @@ It takes the following arguments:
 - **Cmds:**
   - `show --format [json (default)|dot]`: 
       Outputs the CFG for the selected usage as JSON or in Graphviz dot format.
-  - `predict --model [model name] [--limit-id [limit id, default=v127_d127_f127_p127]] [--logits]`: 
+  - `predict --model [model name] [--conformal-alpha [alpha, default=0]] [--limit-id [id, default=v127_d127_f127_p127)]] [--logits]`: 
       Outputs the prediction of the selected model as JSON.
+      The `conformal-alpha` parameter specifies whether conformal prediction results should be shown.
+      By default no conformal sets are produced. 
+      To obtain conformal sets, an error threshold `0 < alpha < 1` has to be provided; the smaller the alpha value, the larger the prediction sets will be (`0.1` is a good default choice).
       The `limit_id` specifies how the data associated with individual CFG nodes should be mapped to binary dimensions.
       If the `--logits` flag is set, prediction logits will be returned instead of normalized probabilities.
       Note that only combinations of models, limit ids and convert modes that were exported when building the prediction container will work.
@@ -103,35 +106,43 @@ An unsafe usage can be classified as follows:
 ./predict.sh \
   --project elastic/beats --package go.elastic.co/apm --file config.go \
   --line 413 --snippet "unsafe.Pointer(oldConfig)," \
-  predict -m WL2GNN 2>/dev/null \
+  predict -m WL2GNN -a 0.1 2>/dev/null \
 | jq
 ```
 Prediction output for both labels (exact probabilites might vary):
 ```json
-[{
-  "cast-basic": 3.8024680293347046e-08,
-  "cast-bytes": 2.10747663764721e-09,
-  "cast-header": 4.1693176910939655e-08,
-  "cast-pointer": 2.197234172385265e-09,
-  "cast-struct": 5.247088097348751e-07,
-  "definition": 1.1479721706564305e-07,
-  "delegate": 0.9999991655349731,
-  "memory-access": 8.367572235101761e-08,
-  "pointer-arithmetic": 3.887335964236627e-08,
-  "syscall": 2.412203492507814e-10,
-  "unused": 1.5881319870292288e-10
-}, {
-  "atomic": 0.9999879598617554,
-  "efficiency": 1.967955931547749e-08,
-  "ffi": 5.721917204937199e-06,
-  "generics": 1.3880583082936937e-06,
-  "hide-escape": 4.661455932364333e-06,
-  "layout": 1.2701431728601165e-07,
-  "no-gc": 1.7310886057941843e-09,
-  "reflect": 7.049543726544982e-10,
-  "serialization": 9.552078239494222e-08,
-  "types": 3.732756326257913e-09,
-  "unused": 1.0282862339394683e-09
-}]
+{
+  "probabilities": [{
+    "cast-basic": 0.000799796252977103,
+    "cast-bytes": 0.00023943622363731265,
+    "cast-header": 0.0008311063284054399,
+    "cast-pointer": 0.00024363627017010003,
+    "cast-struct": 0.0023890091106295586,
+    "definition": 0.0012677970807999372,
+    "delegate": 0.9921323657035828,
+    "memory-access": 0.001111199613660574,
+    "pointer-arithmetic": 0.0008071911288425326,
+    "syscall": 9.69868924585171e-05,
+    "unused": 8.147588232532144e-05
+  }, {
+    "atomic": 0.9911662936210632,
+    "efficiency": 0.00020463968394324183,
+    "ffi": 0.003083886345848441,
+    "generics": 0.0015664942329749465,
+    "hide-escape": 0.0027959353756159544,
+    "layout": 0.0004991954774595797,
+    "no-gc": 6.399328412953764e-05,
+    "reflect": 4.1643997974460945e-05,
+    "serialization": 0.0004356006102170795,
+    "types": 9.241054794983938e-05,
+    "unused": 4.988365981262177e-05
+  }],
+  "conformal_sets": [
+    ["delegate"],
+    ["atomic"]
+  ]
+}
 ```
 [jq](https://stedolan.github.io/jq/) is of course optional here. Also, `2>/dev/null` might not be the the right choice for production use 🙂.
+Note that the output format differs if `--conformal-alpha 0` (`-a 0`) is used;
+in this case, no conformal sets are produced, and the resulting JSON only contains the two probability maps (i.e. the value at `probabilites` in the above example output).
diff --git a/requirements.pred.txt b/requirements.pred.txt
@@ -1,5 +1,6 @@
 networkx==2.5.1
 pydot==1.4.2
+pyyaml==6
 pygraphviz==1.6
 funcy==1.16
 scikit-learn==0.24.2

diff --git a/src/usgoc/evaluation/evaluate.py b/src/usgoc/evaluation/evaluate.py
@@ -440,6 +440,7 @@ def export_best(
 
   model_name = hypermodel_builder.name
   kwargs["return_model_paths"] = True
+  kwargs["return_calibration_configs"] = True
   kwargs["return_metrics"] = True
   kwargs["return_dims"] = True
   kwargs["dry"] = True
@@ -450,13 +451,15 @@ def export_best(
       target_dir = f"{models_dir}/{convert_mode}_{limit_id}/{model_name}"
       best_fold_crit = None
       best_fold_path = None
+      best_fold_calib = None
       best_fold_dims = None
       for fold in folds:
         fold_crits = []
         best_repeat_crit = None
         best_repeat_path = None
+        best_repeat_calib = None
         best_repeat_dims = None
-        for model_path, metrics, dims in fold:
+        for model_path, calib_configs, metrics, dims in fold:
           if isinstance(criterion, str):
             crit = metrics[criterion]
           else:
@@ -465,12 +468,14 @@ def export_best(
           if best_repeat_crit is None or crit > best_repeat_crit:
             best_repeat_crit = crit
             best_repeat_path = model_path
+            best_repeat_calib = calib_configs
             best_repeat_dims = dims
         # Use mean - 1std as fold peformance criterion:
         fold_crit = np.mean(fold_crits) - np.std(fold_crits)
         if best_fold_crit is None or fold_crit > best_fold_crit:
           best_fold_crit = fold_crit
           best_fold_path = best_repeat_path
+          best_fold_calib = best_repeat_calib
           best_fold_dims = best_repeat_dims
 
       model_path = best_fold_path[len("file://"):]
@@ -480,6 +485,8 @@ def export_best(
       best_fold_dims["in_enc"] = hypermodel_builder.in_enc
       utils.cache_write(
         f"{target_dir}/dims.json", best_fold_dims, "json")
+      utils.cache_write(
+        f"{target_dir}/conformal_calibration_configs.yml", best_fold_calib, "yaml")
 
 def aggregate_confusion_matrices(cms, normalize=True):
   cms = fy.lcat(cms)

diff --git a/src/usgoc/run_prediction.py b/src/usgoc/run_prediction.py
@@ -1,5 +1,6 @@
 import os
 import json
+import yaml
 import click
 import subprocess
 import funcy as fy
@@ -9,6 +10,7 @@
 import usgoc.utils as utils
 import usgoc.datasets.unsafe_go as dataset
 import usgoc.metrics.multi as mm
+import usgoc.postprocessing.conformal as conf
 
 PROJECTS_DIR = "/projects"
 EXPORT_DIR = f"{utils.PROJECT_ROOT}/exported_models"
@@ -108,10 +110,14 @@ def show(obj, format):
   "--limit-id", "-l",
   type=click.STRING,
   default="v127_d127_f127_p127")
+@click.option(
+  "--conformal-alpha", "-a",
+  type=click.FloatRange(0.0, 1.0),
+  default=0)
 @click.option("--logits", is_flag=True, default=False)
 @click.pass_obj
 def predict(
-  obj, model, limit_id, logits=False):
+  obj, model, limit_id, conformal_alpha=0.1, logits=False):
   with utils.cache_env(use_cache=False):
     convert_mode = obj["convert_mode"]
     cfg = get_cfg_json(**obj)
@@ -122,16 +128,25 @@ def predict(
     assert os.path.isdir(dir), "Requested model does not exist."
     with open(f"{EXPORT_DIR}/target_label_dims.json", "r") as f:
       labels1, labels2 = json.load(f)
-      labels1_keys = labels1.keys()
-      labels2_keys = labels2.keys()
+      labels1_keys = list(labels1.keys())
+      labels2_keys = list(labels2.keys())
     with open(f"{dir}/dims.json", "r") as f:
       dims = json.load(f)
+    if conformal_alpha == 0.0:
+      calib_config = dict(t1=1, t2=1)
+    else:
+      with open(f"{dir}/conformal_calibration_configs.yml", "r") as f:
+        calib_configs = yaml.unsafe_load(f)
+      assert conformal_alpha in calib_configs, f"Alpha must be from {calib_configs.keys()}."
+      calib_config = calib_configs[conformal_alpha]
     in_enc = dims["in_enc"]
     encoder = dataset.dataset_encoders[in_enc]
     ds = encoder(graphs, dims)
     model = tf.keras.models.load_model(f"{dir}/model", custom_objects=dict(
       SparseMultiAccuracy=mm.SparseMultiAccuracy))
     l1_pred, l2_pred = model.predict(ds)
+    l1_pred /= calib_config["t1"]
+    l2_pred /= calib_config["t2"]
     if logits:
       prob1 = l1_pred[0]
       prob2 = l2_pred[0]
@@ -140,7 +155,17 @@ def predict(
       prob2 = tf.nn.softmax(l2_pred, -1).numpy()[0]
     l1_dict = fy.zipdict(labels1_keys, prob1)
     l2_dict = fy.zipdict(labels2_keys, prob2)
-    print(json.dumps([l1_dict, l2_dict], cls=utils.NumpyEncoder))
+
+    if conformal_alpha == 0.0:
+      print(json.dumps([l1_dict, l2_dict], cls=utils.NumpyEncoder))
+    else:
+      set1_idx = conf.adaptive_sets(l1_pred, calib_config["qhat1"])[0]
+      set2_idx = conf.adaptive_sets(l2_pred, calib_config["qhat2"])[0]
+      set1 = [labels1_keys[i] for i in set1_idx]
+      set2 = [labels2_keys[i] for i in set2_idx]
+      print(json.dumps(dict(
+        probabilities=[l1_dict, l2_dict],
+        conformal_sets=[set1, set2]), cls=utils.NumpyEncoder))
 
 
 if __name__ == "__main__":

diff --git a/src/usgoc/utils.py b/src/usgoc/utils.py
@@ -1,5 +1,6 @@
 import os
 import json
+import yaml
 import pickle
 import inspect
 import numbers
@@ -299,6 +300,10 @@ class CacheFormat:
     fy.partial(json.load, cls=NumpyDecoder),
     fy.partial(json.dump, indent="\t", cls=NumpyEncoder),
     type="text"),
+  yaml=cache_format(
+    fy.partial(yaml.unsafe_load),
+    fy.partial(yaml.dump),
+    type="text"),
   plot=cache_format(
     lambda _: None,
     lambda fig, file: fig.savefig(