From 3e0a3d2582bb8f4045e68eff256f5c7bbde11a77 Mon Sep 17 00:00:00 2001 From: Maxime Gasse Date: Fri, 1 Nov 2024 16:08:28 -0400 Subject: [PATCH] ci tests --- tests/experiments/test_benchmark.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/experiments/test_benchmark.py b/tests/experiments/test_benchmark.py index d5fab6c..5a3c3ec 100644 --- a/tests/experiments/test_benchmark.py +++ b/tests/experiments/test_benchmark.py @@ -91,7 +91,7 @@ def test_benchmark_subset(): assert dict_1 == dict_2 -def test_miniwob_benchmark_reset(): +def test_prepare_backend_miniwob(): MINIWOB_URL = os.environ["MINIWOB_URL"] try: benchmark: Benchmark = DEFAULT_BENCHMARKS["miniwob"]() @@ -109,13 +109,13 @@ def test_miniwob_benchmark_reset(): os.environ["MINIWOB_URL"] = MINIWOB_URL -def test_assistantbench_benchmark_reset(): +def test_prepare_backend_assistantbench(): benchmark: Benchmark = DEFAULT_BENCHMARKS["assistantbench"]() benchmark.prepare_backends() @pytest.mark.skip -def test_webarena_benchmark_reset(): +def test_prepare_backend_webarena(): WA_FULL_RESET = os.environ["WA_FULL_RESET"] try: benchmark: Benchmark = DEFAULT_BENCHMARKS["webarena"]() @@ -134,7 +134,7 @@ def test_webarena_benchmark_reset(): @pytest.mark.skip -def test_visualwebarena_benchmark_reset(): +def test_prepare_backend_visualwebarena(): VWA_FULL_RESET = os.environ["VWA_FULL_RESET"] try: benchmark: Benchmark = DEFAULT_BENCHMARKS["visualwebarena"]() @@ -152,6 +152,22 @@ def test_visualwebarena_benchmark_reset(): os.environ["VWA_FULL_RESET"] = VWA_FULL_RESET +@pytest.mark.skip +def test_prepare_backend_weblinx(): + BROWSERGYM_WEBLINX_CACHE_DIR = os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"] + try: + benchmark: Benchmark = DEFAULT_BENCHMARKS["weblinx"]() + + benchmark.prepare_backends() + + del os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"] + with pytest.raises(Exception): + benchmark.prepare_backends() + + finally: + os.environ["BROWSERGYM_WEBLINX_CACHE_DIR"] = BROWSERGYM_WEBLINX_CACHE_DIR + + def test_run_mock_benchmark(): benchmark = Benchmark( name="miniwob_click_test",