Skip to content

Commit

Permalink
refactor: remove wandb from feature generation (#759)
Browse files Browse the repository at this point in the history
<!--
Reviews go much faster if the reviewer knows what to focus on! Help them
out, e.g.:
Reviewers can skip X, but should pay attention to Y.
-->
  • Loading branch information
MartinBernstorff authored Jan 25, 2024
2 parents fe57ae4 + 5b9979d commit 0cb9c96
Show file tree
Hide file tree
Showing 26 changed files with 26 additions and 180 deletions.
11 changes: 0 additions & 11 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"dependsOn": [
"Test",
"Submit PR",
"Types",
],
"presentation": {
"reveal": "never",
Expand Down Expand Up @@ -46,15 +45,5 @@
"showReuseMessage": false,
}
},
{
"label": "Types",
"type": "shell",
"command": "lefthook run types",
"presentation": {
"group": "pr",
"clear": true,
"showReuseMessage": false,
}
},
]
}
10 changes: 4 additions & 6 deletions lefthook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ pre-commit:
commands:
format:
glob: "*.{py}"
run: inv lint
run: ruff format {staged_files}
stage_fixed: true

types:
commands:
pyright:
lint:
glob: "*.{py}"
run: pyright {push_files}
run: ruff --fix --extend-select F401 --extend-select F841 --extend-select B007 {staged_files}
stage_fixed: true

test:
commands:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@
from timeseriesflattener.feature_specs.single_specs import StaticSpec, TemporalSpec
from timeseriesflattener.flattened_dataset import PredictorSpec

from psycop.common.feature_generation.application_modules.wandb_utils import (
wandb_alert_on_exception,
)
from psycop.common.feature_generation.data_checks.flattened.data_integrity import (
save_feature_set_integrity_checks_from_dir,
)
Expand All @@ -26,7 +23,6 @@
log = logging.getLogger(__name__)


@wandb_alert_on_exception
def save_flattened_dataset_description_to_disk(
project_info: ProjectInfo,
feature_set_dir: Path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
from psycop.common.feature_generation.application_modules.save_dataset_to_disk import (
split_and_save_dataset_to_disk,
)
from psycop.common.feature_generation.application_modules.wandb_utils import (
wandb_alert_on_exception,
)
from psycop.common.feature_generation.loaders.raw.load_demographic import birthdays

if TYPE_CHECKING:
Expand Down Expand Up @@ -52,7 +49,6 @@ def flatten_dataset_to_disk(
)


@wandb_alert_on_exception
def create_flattened_dataset(
project_info: ProjectInfo,
feature_specs: list[AnySpec],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,14 @@
create_flattened_dataset,
)
from psycop.common.feature_generation.application_modules.loggers import init_root_logger
from psycop.common.feature_generation.application_modules.project_setup import (
ProjectInfo,
init_wandb,
)
from psycop.common.feature_generation.application_modules.project_setup import ProjectInfo
from psycop.common.feature_generation.application_modules.save_dataset_to_disk import (
split_and_save_dataset_to_disk,
)
from psycop.common.feature_generation.application_modules.wandb_utils import (
wandb_alert_on_exception,
)

log = logging.getLogger()


@wandb_alert_on_exception
def generate_feature_set(
project_info: ProjectInfo,
eligible_prediction_times: pd.DataFrame,
Expand Down Expand Up @@ -88,39 +81,10 @@ def generate_feature_set(
return feature_set_dir


def init_wandb_and_generate_feature_set(
project_info: ProjectInfo,
eligible_prediction_times: pd.DataFrame,
feature_specs: list[AnySpec],
generate_in_chunks: bool = False,
chunksize: int = 250,
feature_set_name: str | None = None,
) -> Path:
# Run elements that are required before wandb init first,
# then run the rest in main so you can wrap it all in
# wandb_alert_on_exception, which will send a slack alert
# if you have wandb alerts set up in wandb
init_logger_and_wandb(project_info)

return generate_feature_set(
project_info=project_info,
eligible_prediction_times=eligible_prediction_times,
feature_specs=feature_specs,
generate_in_chunks=generate_in_chunks,
chunksize=chunksize,
feature_set_name=feature_set_name,
)


def init_logger_and_wandb(project_info: ProjectInfo):
def init_logger(project_info: ProjectInfo):
init_root_logger(project_info=project_info)

log.info( # pylint: disable=logging-fstring-interpolation
f"Stdout level is {logging.getLevelName(log.level)}"
)
log.debug("Debugging is still captured in the log file")

# Use wandb to keep track of your dataset generations
# Makes it easier to find paths on wandb, as well as
# allows monitoring and automatic slack alert on failure
init_wandb(project_info=project_info) # allows monitoring and automatic slack alert on failure
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
"""Setup for the project."""
import logging
import sys
import tempfile
from pathlib import Path

import wandb
from timeseriesflattener.feature_specs.single_specs import BaseModel

from psycop.common.global_utils.paths import PSYCOP_PKG_ROOT

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -38,27 +33,3 @@ class ProjectInfo(BaseModel):
@property
def flattened_dataset_dir(self) -> Path:
return self.project_path / "flattened_datasets"


def init_wandb(project_info: ProjectInfo) -> None:
"""Initialise wandb logging. Allows to use wandb to track progress, send
Slack notifications if failing, and track logs.
Args:
project_info (ProjectInfo): Project info.
"""

feature_settings = {"feature_set_path": project_info.flattened_dataset_dir}

# on Overtaci, the wandb tmp directory is not automatically created,
# so we create it here.
# create debug-cli.one folders in /tmp and project dir
if sys.platform == "win32":
(Path(tempfile.gettempdir()) / "debug-cli.onerm").mkdir(exist_ok=True, parents=True)
(PSYCOP_PKG_ROOT / "wandb" / "debug-cli.onerm").mkdir(exist_ok=True, parents=True)

wandb.init(
project=f"{project_info.project_name}-feature-generation",
config=feature_settings,
mode="offline",
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
import pandas as pd

from psycop.common.feature_generation.application_modules.project_setup import ProjectInfo
from psycop.common.feature_generation.application_modules.wandb_utils import (
wandb_alert_on_exception,
)
from psycop.common.feature_generation.loaders.raw.load_ids import (
SplitName,
load_stratified_by_outcome_split_ids,
Expand Down Expand Up @@ -72,7 +69,6 @@ def get_split_id_df(split_name: SplitName) -> pd.DataFrame:
return split_id_df.frame.collect().to_pandas()


@wandb_alert_on_exception
def split_and_save_dataset_to_disk(
flattened_df: pd.DataFrame,
project_info: ProjectInfo,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pandas as pd
import polars as pl
import pytest
import wandb
from polars.testing import assert_series_equal
from timeseriesflattener.aggregation_fns import mean
from timeseriesflattener.feature_specs.single_specs import PredictorSpec
Expand Down Expand Up @@ -96,8 +95,6 @@ def test_concatenation_assumptions(
):
"""assert that timeseriesflattener outputs are sorted as expected"""

wandb.init(project="test", mode="offline")

predictor_specs = [
PredictorSpec(
timeseries_df=synth_predictor_1,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,8 @@
from psycop.common.model_training.training.train_and_predict import train_and_predict
from psycop.common.model_training.training_output.model_evaluator import ModelEvaluator
from psycop.common.model_training.utils.col_name_inference import get_col_names
from psycop.common.model_training.utils.decorators import (
wandb_alert_on_exception_return_terrible_auc,
)

from ...utils.decorators import return_terrible_auroc_on_exception


def get_eval_dir(cfg: FullConfigSchema) -> Path:
Expand All @@ -37,7 +36,7 @@ def get_eval_dir(cfg: FullConfigSchema) -> Path:
return eval_dir_path


@wandb_alert_on_exception_return_terrible_auc
@return_terrible_auroc_on_exception
def post_wandb_setup_train_model(
cfg: FullConfigSchema, override_output_dir: Optional[Path] = None
) -> float:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""Test that the model trains correctly."""


from pathlib import Path

import pytest

from psycop.common.model_training.application_modules.train_model.main import (
Expand Down Expand Up @@ -33,17 +31,6 @@ def test_crossvalidation(muteable_test_config: FullConfigSchema):
train_model(cfg)


def test_list_of_data_dirs(muteable_test_config: FullConfigSchema):
"""Test train model can resolve list of data dir paths."""
cfg = muteable_test_config
cfg.data.model_config["frozen"] = False
cfg.data.dir = [
Path("psycop/common/model_training/tests/test_data/synth_splits_subsampled/"),
Path("psycop/common/model_training/tests/test_data/synth_splits_subsampled/"),
] # type: ignore
train_model(cfg)


def test_train_val_predict(muteable_test_config: FullConfigSchema):
"""Test train without crossvalidation."""
cfg = muteable_test_config
Expand Down
2 changes: 1 addition & 1 deletion psycop/common/model_training/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def wrapper(*args, **kwargs): # noqa # type: ignore
return wrapper


def wandb_alert_on_exception_return_terrible_auc(func): # noqa # type: ignore
def return_terrible_auroc_on_exception(func): # noqa # type: ignore
"""Alerts wandb on exception."""

@wraps(func)
Expand Down
4 changes: 2 additions & 2 deletions psycop/projects/cancer/feature_generation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path

from psycop.common.feature_generation.application_modules.generate_feature_set import (
init_wandb_and_generate_feature_set,
generate_feature_set,
)
from psycop.projects.cancer.cancer_config import (
get_cancer_feature_specifications,
Expand All @@ -20,7 +20,7 @@
exist_ok=True, parents=True
)

init_wandb_and_generate_feature_set(
generate_feature_set(
project_info=get_cancer_project_info(),
eligible_prediction_times=CancerCohortDefiner.get_filtered_prediction_times_bundle().prediction_times.frame.to_pandas(),
feature_specs=get_cancer_feature_specifications(),
Expand Down
4 changes: 2 additions & 2 deletions psycop/projects/cancer/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from psycop.common.feature_generation.application_modules.generate_feature_set import (
init_wandb_and_generate_feature_set,
generate_feature_set,
)
from psycop.projects.cancer.cancer_config import (
get_cancer_feature_specifications,
Expand All @@ -11,7 +11,7 @@
from psycop.projects.cancer.model_training.train_models_in_parallel import train_models_in_parallel

if __name__ == "__main__":
feature_set_path = init_wandb_and_generate_feature_set(
feature_set_path = generate_feature_set(
project_info=get_cancer_project_info(),
eligible_prediction_times=CancerCohortDefiner.get_filtered_prediction_times_bundle().prediction_times.frame.to_pandas(),
feature_specs=get_cancer_feature_specifications(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
from pathlib import Path
from typing import Optional

from psycop.common.global_utils.wandb.wandb_try_except_decorator import wandb_alert_on_exception
from psycop.common.model_training.application_modules.get_search_space import SearchSpaceInferrer
from psycop.common.model_training.application_modules.process_manager_setup import setup
from psycop.common.model_training.application_modules.trainer_spawner import spawn_trainers
from psycop.common.model_training.config_schemas.full_config import FullConfigSchema
from psycop.common.model_training.data_loader.data_loader import DataLoader


@wandb_alert_on_exception
def main(
cfg: FullConfigSchema,
wandb_group: str,
Expand Down
11 changes: 1 addition & 10 deletions psycop/projects/clozapine/feature_generation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,10 @@
save_flattened_dataset_description_to_disk,
)
from psycop.common.feature_generation.application_modules.loggers import init_root_logger
from psycop.common.feature_generation.application_modules.project_setup import (
ProjectInfo,
init_wandb,
)
from psycop.common.feature_generation.application_modules.project_setup import ProjectInfo
from psycop.common.feature_generation.application_modules.save_dataset_to_disk import (
split_and_save_dataset_to_disk,
)
from psycop.common.feature_generation.application_modules.wandb_utils import (
wandb_alert_on_exception,
)
from psycop.common.global_utils.paths import OVARTACI_SHARED_DIR
from psycop.projects.clozapine.feature_generation.cohort_definition.clozapine_cohort_definition import (
ClozapineCohortDefiner,
Expand All @@ -36,7 +30,6 @@
warnings.simplefilter(action="ignore", category=RuntimeWarning)


@wandb_alert_on_exception
def main(
add_text_features: bool = False,
min_set_for_debug: bool = False,
Expand Down Expand Up @@ -130,8 +123,6 @@ def main(
exist_ok=True, parents=True
)

init_wandb(project_info=project_info)

main(
add_text_features=False,
min_set_for_debug=True,
Expand Down
4 changes: 2 additions & 2 deletions psycop/projects/cvd/feature_generation/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


from psycop.common.feature_generation.application_modules.generate_feature_set import (
init_wandb_and_generate_feature_set,
generate_feature_set,
)
from psycop.common.feature_generation.application_modules.project_setup import ProjectInfo
from psycop.common.global_utils.paths import OVARTACI_SHARED_DIR
Expand All @@ -25,7 +25,7 @@ def get_cvd_project_info() -> ProjectInfo:
)
feature_specs = CVDFeatureSpecifier().get_feature_specs(layer=3)

init_wandb_and_generate_feature_set(
generate_feature_set(
project_info=project_info,
eligible_prediction_times=eligible_prediction_times,
feature_specs=feature_specs,
Expand Down
Loading

0 comments on commit 0cb9c96

Please sign in to comment.