From 4d0f6a95ed099043a7c9d0ecbb2609ae18deaec2 Mon Sep 17 00:00:00 2001 From: jeswan <57466294+jeswan@users.noreply.github.com> Date: Fri, 23 Apr 2021 12:52:25 -0400 Subject: [PATCH] Add test matrix (#1308) * add simple test matrix * add expected materics to test matrix * update torch to 1.8.1 for test matrix * add overnight flag Co-authored-by: Jesse Swanson --- conftest.py | 7 ++++++ requirements.txt | 4 ++-- setup.py | 4 ++-- tests/proj/simple/test_runscript.py | 35 ++++++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/conftest.py b/conftest.py index 094ad4576..51b07d922 100644 --- a/conftest.py +++ b/conftest.py @@ -8,6 +8,8 @@ def pytest_addoption(parser): parser.addoption("--runslow", action="store_true", default=False, help="run slow tests") parser.addoption("--rungpu", action="store_true", default=False, help="run gpu tests") + parser.addoption("--runovernight", action="store_true", + default=False, help="run overnight tests") def pytest_configure(config): @@ -26,3 +28,8 @@ def pytest_collection_modifyitems(config, items): for item in items: if "gpu" in item.keywords: item.add_marker(skip_gpu) + if not config.getoption("--runovernight"): + skip_overnight = pytest.mark.skip(reason="need --runovernight option to run") + for item in items: + if "overnight" in item.keywords: + item.add_marker(skip_overnight) diff --git a/requirements.txt b/requirements.txt index 99a76a076..b1bcf36ee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -r requirements-no-torch.txt -torch>=1.5.0 -torchvision==0.6.0 +torch>=1.8.1 +torchvision==0.9.1 diff --git a/setup.py b/setup.py index c151cea58..1e426a3ee 100644 --- a/setup.py +++ b/setup.py @@ -73,10 +73,10 @@ "scipy == 1.4.1", "sentencepiece == 0.1.86", "tokenizers == 0.8.1.rc2", - "torch >= 1.5.0", + "torch >= 1.8.1", "tqdm == 4.46.0", "transformers == 3.1.0", - "torchvision == 0.6.0", + "torchvision == 0.9.1", ], extras_require=extras, python_requires=">=3.6.0", diff --git a/tests/proj/simple/test_runscript.py b/tests/proj/simple/test_runscript.py index a79a975b3..92ee288db 100644 --- a/tests/proj/simple/test_runscript.py +++ b/tests/proj/simple/test_runscript.py @@ -1,14 +1,18 @@ import os import pytest import torch +import math import jiant.utils.python.io as py_io from jiant.proj.simple import runscript as run import jiant.scripts.download_data.runscript as downloader import jiant.utils.torch_utils as torch_utils +EXPECTED_AGG_VAL_METRICS = {"bert-base-cased": {"rte": 0.5740072202166066, "commonsenseqa": 0.4258804258804259, "squad_v1": 29.071789929086883}, + "roberta-base": {"rte": 0.49458483754512633, "commonsenseqa": 0.23013923013923013, "squad_v1": 48.222444172918955}, + "xlm-roberta-base": {"rte": 0.4729241877256318, "commonsenseqa": 0.22686322686322685, "squad_v1": 10.30104037978786}} + -@pytest.mark.gpu @pytest.mark.parametrize("task_name", ["copa"]) @pytest.mark.parametrize("model_type", ["bert-base-cased"]) def test_simple_runscript(tmpdir, task_name, model_type): @@ -34,6 +38,35 @@ def test_simple_runscript(tmpdir, task_name, model_type): assert val_metrics["aggregated"] > 0 +@pytest.mark.overnight +@pytest.mark.parametrize(("task_name", "train_examples_cap"), [("rte", 1024), ("commonsenseqa", 1024), ("squad_v1", 2048)]) +@pytest.mark.parametrize("model_type", ["bert-base-cased", "roberta-base", "xlm-roberta-base"]) +def test_simple_runscript(tmpdir, task_name, train_examples_cap, model_type): + RUN_NAME = f"{test_simple_runscript.__name__}_{task_name}_{model_type}" + data_dir = str(tmpdir.mkdir("data")) + exp_dir = str(tmpdir.mkdir("exp")) + + torch.use_deterministic_algorithms(True) + + downloader.download_data([task_name], data_dir) + args = run.RunConfiguration( + run_name=RUN_NAME, + exp_dir=exp_dir, + data_dir=data_dir, + hf_pretrained_model_name_or_path=model_type, + tasks=task_name, + train_examples_cap=train_examples_cap, + train_batch_size=32, + seed=42, + no_cuda=False, + ) + run.run_simple(args) + + val_metrics = py_io.read_json(os.path.join(exp_dir, "runs", RUN_NAME, "val_metrics.json")) + assert math.isclose(val_metrics["aggregated"], EXPECTED_AGG_VAL_METRICS[model_type][task_name]) + torch.use_deterministic_algorithms(False) + + @pytest.mark.gpu @pytest.mark.parametrize("task_name", ["copa"]) @pytest.mark.parametrize("model_type", ["roberta-large"])