From 2bd2e1617aa50729c99fa7a8ec90024e3f48b6e4 Mon Sep 17 00:00:00 2001
From: neeratyoy <neeratyoy@gmail.com>
Date: Thu, 16 Nov 2023 20:54:49 +0100
Subject: [PATCH] Refactoring unit test + stochastic failure debug

---
 src/neps/status/status.py            | 14 ++++----
 tests/test_metahyper/test_locking.py | 48 +++++++++++++++-------------
 2 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/src/neps/status/status.py b/src/neps/status/status.py
index efde8453..90832427 100644
--- a/src/neps/status/status.py
+++ b/src/neps/status/status.py
@@ -241,9 +241,9 @@ def _get_dataframes_from_summary(
     }
 
     df_run_data = pd.DataFrame.from_dict(
-        additional_data, orient="index", columns=["Value"]
+        additional_data, orient="index", columns=["value"]
     )
-    df_run_data.index.name = "Description"
+    df_run_data.index.name = "description"
 
     return df_config_data, df_run_data
 
@@ -272,9 +272,9 @@ def _save_data_to_csv(
     while not should_break:
         if locker.acquire_lock():
             try:
-                pending_configs = run_data_df.loc["num_pending_configs", "Value"]
+                pending_configs = run_data_df.loc["num_pending_configs", "value"]
                 pending_configs_with_worker = run_data_df.loc[
-                    "num_pending_configs_with_worker", "Value"
+                    "num_pending_configs_with_worker", "value"
                 ]
                 # Represents the last worker
                 if int(pending_configs) == 0 and int(pending_configs_with_worker) == 0:
@@ -286,13 +286,13 @@ def _save_data_to_csv(
 
                 if run_data_file_path.exists():
                     prev_run_data_df = pd.read_csv(run_data_file_path)
-                    prev_run_data_df.set_index("Description", inplace=True)
+                    prev_run_data_df.set_index("description", inplace=True)
 
                     num_evaluated_configs_csv = prev_run_data_df.loc[
-                        "num_evaluated_configs", "Value"
+                        "num_evaluated_configs", "value"
                     ]
                     num_evaluated_configs_run = run_data_df.loc[
-                        run_data_df.index == "num_evaluated_configs", "Value"
+                        run_data_df.index == "num_evaluated_configs", "value"
                     ]
                     # checks if the current worker has more evaluated configs than the previous
                     if int(num_evaluated_configs_csv) < int(num_evaluated_configs_run):
diff --git a/tests/test_metahyper/test_locking.py b/tests/test_metahyper/test_locking.py
index 98ab2a7c..2cd09499 100644
--- a/tests/test_metahyper/test_locking.py
+++ b/tests/test_metahyper/test_locking.py
@@ -8,8 +8,21 @@
 from more_itertools import first_true
 
 
+def launch_example_processes(n_workers: int=3) -> list:
+    processes = []
+    for _ in range(n_workers):
+        processes.append(
+            subprocess.Popen(  # pylint: disable=consider-using-with
+                "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
+                stdout=subprocess.PIPE,
+                shell=True,
+                text=True,
+            )
+        )
+    return processes
+
+
 @pytest.mark.metahyper
-@pytest.mark.summary_csv
 def test_filelock() -> None:
     """Test that the filelocking method of parallelization works as intended."""
     # Note: Not using tmpdir
@@ -26,23 +39,9 @@ def test_filelock() -> None:
     results_dir = Path("results") / "hyperparameters_example" / "results"
     try:
         assert not results_dir.exists()
-
-        # Launch both processes
-        p1 = subprocess.Popen(  # pylint: disable=consider-using-with
-            "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
-            stdout=subprocess.PIPE,
-            shell=True,
-            text=True,
-        )
-        p2 = subprocess.Popen(  # pylint: disable=consider-using-with
-            "python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
-            stdout=subprocess.PIPE,
-            shell=True,
-            text=True,
-        )
-
         # Wait for them
-        for p in (p1, p2):
+        p_list = launch_example_processes(n_workers=2)
+        for p in p_list:
             p.wait()
             out, _ = p.communicate()
             lines = out.splitlines()
@@ -71,19 +70,22 @@ def test_filelock() -> None:
         raise e
     finally:
         if results_dir.exists():
-            shutil.rmtree(results_dir)
+            shutil.rmtree(results_dir.parent)
 
 
 @pytest.mark.summary_csv
 def test_summary_csv():
     # Testing the csv files output.
+    summary_dir = Path("results") / "hyperparameters_example" / "summary_csv"
     try:
-        summary_dir = Path("results") / "hyperparameters_example" / "summary_csv"
+        if not summary_dir.exists():
+            p_list = launch_example_processes(n_workers=2)
+            for p in p_list:
+                p.wait()
         assert summary_dir.is_dir()
-
         run_data_df = pd.read_csv(summary_dir / "run_status.csv")
-        run_data_df.set_index("Description", inplace=True)
-        num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "Value"]
+        run_data_df.set_index("description", inplace=True)
+        num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "value"]
         assert num_evaluated_configs_csv == 15
 
         config_data_df = pd.read_csv(summary_dir / "config_data.csv")
@@ -93,4 +95,4 @@ def test_summary_csv():
         raise e
     finally:
         if summary_dir.exists():
-            shutil.rmtree(summary_dir)
+            shutil.rmtree(summary_dir.parent)