Skip to content

Commit

Permalink
Refactoring unit test + stochastic failure debug
Browse files Browse the repository at this point in the history
  • Loading branch information
Neeratyoy committed Nov 16, 2023
1 parent 74a5110 commit 2bd2e16
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 30 deletions.
14 changes: 7 additions & 7 deletions src/neps/status/status.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,9 @@ def _get_dataframes_from_summary(
}

df_run_data = pd.DataFrame.from_dict(
additional_data, orient="index", columns=["Value"]
additional_data, orient="index", columns=["value"]
)
df_run_data.index.name = "Description"
df_run_data.index.name = "description"

return df_config_data, df_run_data

Expand Down Expand Up @@ -272,9 +272,9 @@ def _save_data_to_csv(
while not should_break:
if locker.acquire_lock():
try:
pending_configs = run_data_df.loc["num_pending_configs", "Value"]
pending_configs = run_data_df.loc["num_pending_configs", "value"]
pending_configs_with_worker = run_data_df.loc[
"num_pending_configs_with_worker", "Value"
"num_pending_configs_with_worker", "value"
]
# Represents the last worker
if int(pending_configs) == 0 and int(pending_configs_with_worker) == 0:
Expand All @@ -286,13 +286,13 @@ def _save_data_to_csv(

if run_data_file_path.exists():
prev_run_data_df = pd.read_csv(run_data_file_path)
prev_run_data_df.set_index("Description", inplace=True)
prev_run_data_df.set_index("description", inplace=True)

num_evaluated_configs_csv = prev_run_data_df.loc[
"num_evaluated_configs", "Value"
"num_evaluated_configs", "value"
]
num_evaluated_configs_run = run_data_df.loc[
run_data_df.index == "num_evaluated_configs", "Value"
run_data_df.index == "num_evaluated_configs", "value"
]
# checks if the current worker has more evaluated configs than the previous
if int(num_evaluated_configs_csv) < int(num_evaluated_configs_run):
Expand Down
48 changes: 25 additions & 23 deletions tests/test_metahyper/test_locking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,21 @@
from more_itertools import first_true


def launch_example_processes(n_workers: int=3) -> list:
processes = []
for _ in range(n_workers):
processes.append(
subprocess.Popen( # pylint: disable=consider-using-with
"python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
stdout=subprocess.PIPE,
shell=True,
text=True,
)
)
return processes


@pytest.mark.metahyper
@pytest.mark.summary_csv
def test_filelock() -> None:
"""Test that the filelocking method of parallelization works as intended."""
# Note: Not using tmpdir
Expand All @@ -26,23 +39,9 @@ def test_filelock() -> None:
results_dir = Path("results") / "hyperparameters_example" / "results"
try:
assert not results_dir.exists()

# Launch both processes
p1 = subprocess.Popen( # pylint: disable=consider-using-with
"python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
stdout=subprocess.PIPE,
shell=True,
text=True,
)
p2 = subprocess.Popen( # pylint: disable=consider-using-with
"python -m neps_examples.basic_usage.hyperparameters && python -m neps_examples.basic_usage.analyse",
stdout=subprocess.PIPE,
shell=True,
text=True,
)

# Wait for them
for p in (p1, p2):
p_list = launch_example_processes(n_workers=2)
for p in p_list:
p.wait()
out, _ = p.communicate()
lines = out.splitlines()
Expand Down Expand Up @@ -71,19 +70,22 @@ def test_filelock() -> None:
raise e
finally:
if results_dir.exists():
shutil.rmtree(results_dir)
shutil.rmtree(results_dir.parent)


@pytest.mark.summary_csv
def test_summary_csv():
# Testing the csv files output.
summary_dir = Path("results") / "hyperparameters_example" / "summary_csv"
try:
summary_dir = Path("results") / "hyperparameters_example" / "summary_csv"
if not summary_dir.exists():
p_list = launch_example_processes(n_workers=2)
for p in p_list:
p.wait()
assert summary_dir.is_dir()

run_data_df = pd.read_csv(summary_dir / "run_status.csv")
run_data_df.set_index("Description", inplace=True)
num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "Value"]
run_data_df.set_index("description", inplace=True)
num_evaluated_configs_csv = run_data_df.loc["num_evaluated_configs", "value"]
assert num_evaluated_configs_csv == 15

config_data_df = pd.read_csv(summary_dir / "config_data.csv")
Expand All @@ -93,4 +95,4 @@ def test_summary_csv():
raise e
finally:
if summary_dir.exists():
shutil.rmtree(summary_dir)
shutil.rmtree(summary_dir.parent)

0 comments on commit 2bd2e16

Please sign in to comment.