From 2bd2e1617aa50729c99fa7a8ec90024e3f48b6e4 Mon Sep 17 00:00:00 2001 From: neeratyoy Date: Thu, 16 Nov 2023 20:54:49 +0100 Subject: [PATCH] Refactoring unit test + stochastic failure debug --- src/neps/status/status.py | 14 ++++---- tests/test_metahyper/test_locking.py | 48 +++++++++++++++------------- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/src/neps/status/status.py b/src/neps/status/status.py index efde8453..90832427 100644 --- a/src/neps/status/status.py +++ b/src/neps/status/status.py @@ -241,9 +241,9 @@ def _get_dataframes_from_summary( } df_run_data = pd.DataFrame.from_dict( - additional_data, orient="index", columns=["Value"] + additional_data, orient="index", columns=["value"] ) - df_run_data.index.name = "Description" + df_run_data.index.name = "description" return df_config_data, df_run_data @@ -272,9 +272,9 @@ def _save_data_to_csv( while not should_break: if locker.acquire_lock(): try: - pending_configs = run_data_df.loc["num_pending_configs", "Value"] + pending_configs = run_data_df.loc["num_pending_configs", "value"] pending_configs_with_worker = run_data_df.loc[ - "num_pending_configs_with_worker", "Value" + "num_pending_configs_with_worker", "value" ] # Represents the last worker if int(pending_configs) == 0 and int(pending_configs_with_worker) == 0: @@ -286,13 +286,13 @@ def _save_data_to_csv( if run_data_file_path.exists(): prev_run_data_df = pd.read_csv(run_data_file_path) - prev_run_data_df.set_index("Description", inplace=True) + prev_run_data_df.set_index("description", inplace=True) num_evaluated_configs_csv = prev_run_data_df.loc[ - "num_evaluated_configs", "Value" + "num_evaluated_configs", "value" ] num_evaluated_configs_run = run_data_df.loc[ - run_data_df.index == "num_evaluated_configs", "Value" + run_data_df.index == "num_evaluated_configs", "value" ] # checks if the current worker has more evaluated configs than the previous if int(num_evaluated_configs_csv) < int(num_evaluated_configs_run): diff --git a/tests/test_metahyper/test_locking.py b/tests/test_metahyper/test_locking.py index 98ab2a7c..2cd09499 100644 --- a/tests/test_metahyper/test_locking.py +++ b/tests/test_metahyper/test_locking.py @@ -8,8 +8,21 @@ from more_itertools import first_true +def launch_example_processes(n_workers: int=3) -> list: + processes = [] + for _ in range(n_workers): + processes.append( + subprocess.Popen( # pylint: disable=consider-using-with + "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse", + stdout=subprocess.PIPE, + shell=True, + text=True, + ) + ) + return processes + + @pytest.mark.metahyper -@pytest.mark.summary_csv def test_filelock() -> None: """Test that the filelocking method of parallelization works as intended.""" # Note: Not using tmpdir @@ -26,23 +39,9 @@ def test_filelock() -> None: results_dir = Path("results") / "hyperparameters_example" / "results" try: assert not results_dir.exists() - - # Launch both processes - p1 = subprocess.Popen( # pylint: disable=consider-using-with - "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse", - stdout=subprocess.PIPE, - shell=True, - text=True, - ) - p2 = subprocess.Popen( # pylint: disable=consider-using-with - "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse", - stdout=subprocess.PIPE, - shell=True, - text=True, - ) - # Wait for them - for p in (p1, p2): + p_list = launch_example_processes(n_workers=2) + for p in p_list: p.wait() out, _ = p.communicate() lines = out.splitlines() @@ -71,19 +70,22 @@ def test_filelock() -> None: raise e finally: if results_dir.exists(): - shutil.rmtree(results_dir) + shutil.rmtree(results_dir.parent) @pytest.mark.summary_csv def test_summary_csv(): # Testing the csv files output. + summary_dir = Path("results") / "hyperparameters_example" / "summary_csv" try: - summary_dir = Path("results") / "hyperparameters_example" / "summary_csv" + if not summary_dir.exists(): + p_list = launch_example_processes(n_workers=2) + for p in p_list: + p.wait() assert summary_dir.is_dir() - run_data_df = pd.read_csv(summary_dir / "run_status.csv") - run_data_df.set_index("Description", inplace=True) - num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "Value"] + run_data_df.set_index("description", inplace=True) + num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "value"] assert num_evaluated_configs_csv == 15 config_data_df = pd.read_csv(summary_dir / "config_data.csv") @@ -93,4 +95,4 @@ def test_summary_csv(): raise e finally: if summary_dir.exists(): - shutil.rmtree(summary_dir) + shutil.rmtree(summary_dir.parent)