Add test matrix (#1308)

* add simple test matrix * add expected materics to test matrix * update torch to 1.8.1 for test matrix * add overnight flag Co-authored-by: Jesse Swanson <js11133Wnyu.edu>
nyu-mll · Apr 23, 2021 · 4d0f6a9 · 4d0f6a9
1 parent ee65662
commit 4d0f6a9
Showing 4 changed files with 45 additions and 5 deletions.
diff --git a/conftest.py b/conftest.py
@@ -8,6 +8,8 @@
 def pytest_addoption(parser):
     parser.addoption("--runslow", action="store_true", default=False, help="run slow tests")
     parser.addoption("--rungpu", action="store_true", default=False, help="run gpu tests")
+    parser.addoption("--runovernight", action="store_true",
+                     default=False, help="run overnight tests")
 
 
 def pytest_configure(config):
@@ -26,3 +28,8 @@ def pytest_collection_modifyitems(config, items):
         for item in items:
             if "gpu" in item.keywords:
                 item.add_marker(skip_gpu)
+    if not config.getoption("--runovernight"):
+        skip_overnight = pytest.mark.skip(reason="need --runovernight option to run")
+        for item in items:
+            if "overnight" in item.keywords:
+                item.add_marker(skip_overnight)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,3 @@
 -r requirements-no-torch.txt
-torch>=1.5.0
-torchvision==0.6.0
+torch>=1.8.1
+torchvision==0.9.1
diff --git a/setup.py b/setup.py
@@ -73,10 +73,10 @@
         "scipy == 1.4.1",
         "sentencepiece == 0.1.86",
         "tokenizers == 0.8.1.rc2",
-        "torch >= 1.5.0",
+        "torch >= 1.8.1",
         "tqdm == 4.46.0",
         "transformers == 3.1.0",
-        "torchvision == 0.6.0",
+        "torchvision == 0.9.1",
     ],
     extras_require=extras,
     python_requires=">=3.6.0",

diff --git a/tests/proj/simple/test_runscript.py b/tests/proj/simple/test_runscript.py
@@ -1,14 +1,18 @@
 import os
 import pytest
 import torch
+import math
 
 import jiant.utils.python.io as py_io
 from jiant.proj.simple import runscript as run
 import jiant.scripts.download_data.runscript as downloader
 import jiant.utils.torch_utils as torch_utils
 
+EXPECTED_AGG_VAL_METRICS = {"bert-base-cased": {"rte": 0.5740072202166066, "commonsenseqa": 0.4258804258804259, "squad_v1": 29.071789929086883},
+                            "roberta-base": {"rte": 0.49458483754512633, "commonsenseqa": 0.23013923013923013, "squad_v1": 48.222444172918955},
+                            "xlm-roberta-base": {"rte": 0.4729241877256318, "commonsenseqa": 0.22686322686322685, "squad_v1": 10.30104037978786}}
+
 
-@pytest.mark.gpu
 @pytest.mark.parametrize("task_name", ["copa"])
 @pytest.mark.parametrize("model_type", ["bert-base-cased"])
 def test_simple_runscript(tmpdir, task_name, model_type):
@@ -34,6 +38,35 @@ def test_simple_runscript(tmpdir, task_name, model_type):
     assert val_metrics["aggregated"] > 0
 
 
+@pytest.mark.overnight
+@pytest.mark.parametrize(("task_name", "train_examples_cap"), [("rte", 1024), ("commonsenseqa", 1024), ("squad_v1", 2048)])
+@pytest.mark.parametrize("model_type", ["bert-base-cased", "roberta-base", "xlm-roberta-base"])
+def test_simple_runscript(tmpdir, task_name, train_examples_cap, model_type):
+    RUN_NAME = f"{test_simple_runscript.__name__}_{task_name}_{model_type}"
+    data_dir = str(tmpdir.mkdir("data"))
+    exp_dir = str(tmpdir.mkdir("exp"))
+
+    torch.use_deterministic_algorithms(True)
+
+    downloader.download_data([task_name], data_dir)
+    args = run.RunConfiguration(
+        run_name=RUN_NAME,
+        exp_dir=exp_dir,
+        data_dir=data_dir,
+        hf_pretrained_model_name_or_path=model_type,
+        tasks=task_name,
+        train_examples_cap=train_examples_cap,
+        train_batch_size=32,
+        seed=42,
+        no_cuda=False,
+    )
+    run.run_simple(args)
+
+    val_metrics = py_io.read_json(os.path.join(exp_dir, "runs", RUN_NAME, "val_metrics.json"))
+    assert math.isclose(val_metrics["aggregated"], EXPECTED_AGG_VAL_METRICS[model_type][task_name])
+    torch.use_deterministic_algorithms(False)
+
+
 @pytest.mark.gpu
 @pytest.mark.parametrize("task_name", ["copa"])
 @pytest.mark.parametrize("model_type", ["roberta-large"])