Skip to content

Commit

Permalink
feat: examine LDL sampling (#883)
Browse files Browse the repository at this point in the history
<!--
Reviews go much faster if the reviewer knows what to focus on! Help them
out, e.g.:
Reviewers can skip X, but should pay attention to Y.
-->
  • Loading branch information
MartinBernstorff authored Apr 19, 2024
2 parents 0a5af40 + 4d0f690 commit 7aa0cc8
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 0 deletions.
134 changes: 134 additions & 0 deletions psycop/projects/cvd/cohort_examination/notebooks/ldl_sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import marimo

__generated_with = "0.4.0"
app = marimo.App(width="full")


@app.cell
def __():
import polars as pl

from psycop.common.feature_generation.loaders.raw.load_lab_results import ldl

return ldl, pl


@app.cell
def __(pl):
from psycop.common.feature_generation.loaders.raw.load_visits import physical_visits

visits = pl.from_pandas(physical_visits())
return physical_visits, visits


@app.cell
def __(ldl, pl):
ldl_timestamps = (
pl.from_pandas(ldl())
.drop("value")
.with_columns(pl.col("timestamp").alias("latest_ldl_timestamp"))
)
ldl_timestamps
return (ldl_timestamps,)


@app.cell
def __(ldl_timestamps, visits):
import datetime as dt

from timeseriesflattener import Flattener, PredictionTimeFrame, PredictorSpec, ValueFrame
from timeseriesflattener.aggregators import LatestAggregator

with_latest_ldl = Flattener(
predictiontime_frame=PredictionTimeFrame(
init_df=visits, entity_id_col_name="dw_ek_borger", timestamp_col_name="timestamp"
)
).aggregate_timeseries(
specs=[
PredictorSpec(
value_frame=ValueFrame(
init_df=ldl_timestamps,
entity_id_col_name="dw_ek_borger",
value_timestamp_col_name="timestamp",
),
lookbehind_distances=[dt.timedelta(days=365 * 10)],
aggregators=[LatestAggregator(timestamp_col_name="timestamp")],
fallback=dt.datetime(1979, 1, 1),
)
]
)
return (
Flattener,
LatestAggregator,
PredictionTimeFrame,
PredictorSpec,
ValueFrame,
dt,
with_latest_ldl,
)


@app.cell
def __(pl, with_latest_ldl):
collected_df = with_latest_ldl.df.collect()
plot_df = collected_df.with_columns(
latest_ldl_age_days=(
pl.col("timestamp")
- pl.col(
"pred_latest_ldl_timestamp_within_0_to_3650_days_latest_fallback_1979-01-01 00:00:00"
)
).dt.total_days()
)
plot_df
return collected_df, plot_df


@app.cell
def __(plot_df):
import plotnine as pn

plot = (
pn.ggplot(data=plot_df)
+ pn.stat_ecdf(mapping=pn.aes(x="days_after_ldl"))
+ pn.scale_color_brewer(type="qual", palette=2)
+ pn.coord_cartesian(xlim=(0, int(365 * 5)))
+ pn.ylab("Proportion with at least 1 LDL measurement")
+ pn.xlab("Days from visit to LDL measurement")
+ pn.scale_x_continuous(expand=(0, 0))
+ pn.scale_y_continuous(expand=(0, 0))
+ pn.theme_bw()
+ pn.theme(legend_position="bottom")
+ pn.labs(color="")
)
return plot, pn


@app.cell
def __(plot_df):
n = len(plot_df)
return (n,)


@app.cell
def __():
import marimo as mo

max_ldl_age_days = mo.ui.slider(1, 730, value=365, step=1, show_value=True)
max_ldl_age_days
return max_ldl_age_days, mo


@app.cell
def __(max_ldl_age_days, mo, n, pl, plot_df):
n_needing_ldl_measurement = len(
plot_df.filter(pl.col("latest_ldl_age_days") > max_ldl_age_days.value)
)

mo.md(
f"Out of {n} visits, {n_needing_ldl_measurement} ({round(n_needing_ldl_measurement / n * 100, 0)}%) could use a new LDL measurement"
)
return (n_needing_ldl_measurement,)


if __name__ == "__main__":
app.run()
1 change: 1 addition & 0 deletions psycop/timeseriesflattener
Submodule timeseriesflattener added at 4b89ec
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ exclude = [
"**/.*venv*",
".tox",
"build",
"**/notebooks/*", # Exclude marimo notebooks
# don't check files that import synthcity as it is not installed on the main environment
"psycop/projects/scz_bp/model_training/estimator_steps/synth_data_augmentation.py",
"psycop/projects/scz_bp/model_training/estimator_steps/test_synth_data_augmentation.py",
Expand Down Expand Up @@ -133,6 +134,7 @@ exclude = [
"venv",
"__init__.py",
"docs/conf.py",
"**/notebooks/*",
]
# Allow unused variables when underscore-prefixed.
dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
Expand Down

0 comments on commit 7aa0cc8

Please sign in to comment.