From 014ba4e6a25988432b2828649db44a76ad6c44df Mon Sep 17 00:00:00 2001 From: bokajgd Date: Fri, 16 Jun 2023 13:08:40 +0200 Subject: [PATCH] fix: adding more flexibility --- src/psycop/common/model_evaluation/utils.py | 18 +++++++++--------- .../model_evaluation/config.py | 4 +--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/psycop/common/model_evaluation/utils.py b/src/psycop/common/model_evaluation/utils.py index b0008374e..0d3e5e080 100644 --- a/src/psycop/common/model_evaluation/utils.py +++ b/src/psycop/common/model_evaluation/utils.py @@ -9,18 +9,17 @@ from collections.abc import Iterable, MutableMapping, Sequence from datetime import date, datetime from pathlib import Path -from typing import Any, Dict, Optional, Union +from typing import Any, Optional, Union import dill as pkl import numpy as np import pandas as pd +import wandb +from psycop.common.global_utils.paths import PSYCOP_PKG_ROOT from pydantic import BaseModel as PydanticBaseModel from pydantic import Extra from sklearn.pipeline import Pipeline -import wandb -from psycop.common.global_utils.paths import PSYCOP_PKG_ROOT - TEST_PLOT_PATH = PSYCOP_PKG_ROOT / "test_utils" / "test_outputs" / "plots_from_tests" TEST_PLOT_PATH.mkdir(parents=True, exist_ok=True) @@ -362,7 +361,7 @@ def find_best_run_in_dir( run_group: str, lookahead_window: int, dir_path: Path, -) -> Dict: +) -> dict: """Function for finding best performing model run in a directory containing multiple runs. Args: @@ -374,14 +373,15 @@ def find_best_run_in_dir( str: Name of best performing run. """ - dir = dir_path / run_group + dir = dir_path / run_group - parquet_files = [file for file in os.listdir(dir) if file.endswith(f"{lookahead_window}.parquet")] + parquet_files = [ + file for file in os.listdir(dir) if file.endswith(f"{lookahead_window}.parquet") + ] dfs = [] for file in parquet_files: - parquet_file = os.path.join(dir, file) df = pd.read_parquet(parquet_file) dfs.append(df) @@ -390,4 +390,4 @@ def find_best_run_in_dir( best_run = concatenated_df.loc[concatenated_df["roc_auc"].idxmax()].to_dict() - return best_run \ No newline at end of file + return best_run diff --git a/src/psycop/projects/forced_admission_inpatient/model_evaluation/config.py b/src/psycop/projects/forced_admission_inpatient/model_evaluation/config.py index 250c0898c..3fdb9f9f4 100644 --- a/src/psycop/projects/forced_admission_inpatient/model_evaluation/config.py +++ b/src/psycop/projects/forced_admission_inpatient/model_evaluation/config.py @@ -2,7 +2,6 @@ from pathlib import Path import plotnine as pn - from psycop.common.model_evaluation.utils import find_best_run_in_dir from psycop.projects.forced_admission_inpatient.utils.best_runs import Run, RunGroup @@ -20,7 +19,6 @@ class BestRun: POS_RATE = 0.05 - DEV_GROUP_NAME = "bonnetiere-coarrange" # Best model on structured features @@ -31,7 +29,7 @@ class BestRun: ) # Exteracting name of run -BEST_RUN_NAME = BEST_RUN['run_name'] +BEST_RUN_NAME = BEST_RUN["run_name"] DEVELOPMENT_GROUP = RunGroup(name=DEV_GROUP_NAME)