Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: argparse-min-wear #148

Merged
merged 2 commits into from
Dec 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 35 additions & 17 deletions src/stepcount/stepcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,26 @@ def main():
"Default: 'time,x,y,z'"),
type=str, default="time,x,y,z")
parser.add_argument("--exclude-wear-below", "-w",
help=("Minimum wear time for a day to be considered valid, otherwise exclude it. "
"Pass values as strings, e.g.: '12H', '30min'. Default: None (no exclusion)"),
help="Exclude days with wear time below threshold. Pass values as strings, e.g.: '12H', '30min'. "
"Default: None (no exclusion)",
type=str, default=None)
parser.add_argument("--exclude-first-last", "-e",
help="Exclude first, last or both days of data. Default: None (no exclusion)",
type=str, choices=['first', 'last', 'both'], default=None)
parser.add_argument("--min-wear-per-day", help="The minimum required wear time (in minutes) for a day to be considered valid.",
type=float, default=21 * 60)
parser.add_argument("--min-wear-per-hour", help="The minimum required wear time (in minutes) for an hour bin to be considered valid.",
type=float, default=50)
parser.add_argument("--min-wear-per-minute", help="The minimum required wear time (in minutes) for a minute bin to be considered valid.",
type=float, default=0.5)
parser.add_argument("--peak1-min-walk-per-day",
help="The minimum amount of walking time per day required for peak1 calculation.",
help="The minimum required walking time (in minutes) in a day for peak1 calculation.",
type=int, default=10)
parser.add_argument("--peak30-min-walk-per-day",
help="The minimum amount of walking time per day for peak30 calculation.",
help="The minimum required walking time (in minutes) in a day for peak30 calculation.",
type=int, default=30)
parser.add_argument("--p95-min-walk-per-day",
help="The minimum amount of walking time per day for p95 calculation.",
help="The minimum required walking time (in minutes) in a day for p95 calculation.",
type=int, default=10)
parser.add_argument("--start",
help=("Specicfy a start time for the data to be processed (otherwise, process all). "
Expand Down Expand Up @@ -461,14 +467,20 @@ def load_model(

def summarize_enmo(
data: pd.DataFrame,
adjust_estimates: bool = False
adjust_estimates: bool = False,
min_wear_per_day: float = 21 * 60,
min_wear_per_hour: float = 50,
min_wear_per_minute: float = 0.5,
):
"""
Summarize ENMO information from raw accelerometer data, e.g. daily and hourly averages, percentiles, etc.

Parameters:
- data (pd.DataFrame): A pandas DataFrame of raw accelerometer data with columns 'x', 'y', 'z'.
- adjust_estimates (bool, optional): Whether to adjust estimates to account for missing data. Defaults to False.
- min_wear_per_day (float, optional): The minimum required wear time (in minutes) for a day to be considered valid. Defaults to 21 hours.
- min_wear_per_hour (float, optional): The minimum required wear time (in minutes) for an hour bin to be considered valid. Defaults to 50 minutes.
- min_wear_per_minute (float, optional): The minimum required wear time (in minutes) for a minute bin to be considered valid. Defaults to 0.5 minutes (30 seconds).

Returns:
- dict: A dictionary containing various summary ENMO statistics.
Expand Down Expand Up @@ -503,9 +515,9 @@ def _mean(x, min_wear=None, dt=None):

if adjust_estimates:
# adjusted estimates account for NAs
minutely = v.resample('T').agg(_mean, min_wear=0.5, dt=dt).rename('ENMO(mg)') # up to 30s/min missingness
hourly = v.resample('H').agg(_mean, min_wear=50, dt=dt).rename('ENMO(mg)') # up to 10min/h missingness
daily = v.resample('D').agg(_mean, min_wear=21 * 60, dt=dt).rename('ENMO(mg)') # up to 3h/d missingness
minutely = v.resample('T').agg(_mean, min_wear=min_wear_per_minute, dt=dt).rename('ENMO(mg)') # up to 30s/min missingness
hourly = v.resample('H').agg(_mean, min_wear=min_wear_per_hour, dt=dt).rename('ENMO(mg)') # up to 10min/h missingness
daily = v.resample('D').agg(_mean, min_wear=min_wear_per_day, dt=dt).rename('ENMO(mg)') # up to 3h/d missingness
# adjusted estimates first form a 7-day representative week before final aggregation
# TODO: 7-day padding for shorter recordings
day_of_week = utils.impute_days(daily).groupby(daily.index.weekday).mean()
Expand Down Expand Up @@ -542,7 +554,10 @@ def _mean(x, min_wear=None, dt=None):
def summarize_steps(
Y: pd.Series,
steptol: int = 3,
adjust_estimates: bool = False
adjust_estimates: bool = False,
min_wear_per_day: int = 21 * 60,
min_wear_per_hour: int = 50,
min_wear_per_minute: int = 0.5,
):
"""
Summarize a series of step counts, e.g. daily and hourly averages, percentiles, etc.
Expand All @@ -551,6 +566,9 @@ def summarize_steps(
- Y (pd.Series): A pandas Series of step counts.
- steptol (int, optional): The minimum number of steps per window for the window to be considered valid for calculation. Defaults to 3 steps per window.
- adjust_estimates (bool, optional): Whether to adjust estimates to account for missing data. Defaults to False.
- min_wear_per_day (float, optional): The minimum required wear time (in minutes) for a day to be considered valid. Defaults to 21 hours.
- min_wear_per_hour (float, optional): The minimum required wear time (in minutes) for an hour bin to be considered valid. Defaults to 50 minutes.
- min_wear_per_minute (float, optional): The minimum required wear time (in minutes) for a minute bin to be considered valid. Defaults to 0.5 minutes (30 seconds).

Returns:
- dict: A dictionary containing various summary step count statistics.
Expand Down Expand Up @@ -627,9 +645,9 @@ def _tdelta_to_str(tdelta):
# steps
if adjust_estimates:
# adjusted estimates account for NAs
minutely_steps = Y.resample('T').agg(_sum, min_wear=0.5, dt=dt).rename('Steps') # up to 30s/min missingness
hourly_steps = Y.resample('H').agg(_sum, min_wear=50, dt=dt).rename('Steps') # up to 10min/h missingness
daily_steps = Y.resample('D').agg(_sum, min_wear=21 * 60, dt=dt).rename('Steps') # up to 3h/d missingness
minutely_steps = Y.resample('T').agg(_sum, min_wear=min_wear_per_minute, dt=dt).rename('Steps') # up to 30s/min missingness
hourly_steps = Y.resample('H').agg(_sum, min_wear=min_wear_per_hour, dt=dt).rename('Steps') # up to 10min/h missingness
daily_steps = Y.resample('D').agg(_sum, min_wear=min_wear_per_day, dt=dt).rename('Steps') # up to 3h/d missingness
# adjusted estimates first form a 7-day representative week before final aggregation
# TODO: 7-day padding for shorter recordings
day_of_week = utils.impute_days(daily_steps).groupby(daily_steps.index.weekday).mean()
Expand Down Expand Up @@ -675,9 +693,9 @@ def _tdelta_to_str(tdelta):
# walking
if adjust_estimates:
# adjusted estimates account for NAs
# minutely_walk = (W.resample('T').agg(_sum, min_wear=0.5, dt=dt) * dt / 60).rename('Walk(mins)') # up to 30s/min missingness
hourly_walk = (W.resample('H').agg(_sum, min_wear=50, dt=dt) * dt / 60).rename('Walk(mins)') # up to 10min/h missingness
daily_walk = (W.resample('D').agg(_sum, min_wear=21 * 60, dt=dt) * dt / 60).rename('Walk(mins)') # up to 3h/d missingness
# minutely_walk = (W.resample('T').agg(_sum, min_wear=min_wear_per_minute, dt=dt) * dt / 60).rename('Walk(mins)') # up to 30s/min missingness
hourly_walk = (W.resample('H').agg(_sum, min_wear=min_wear_per_hour, dt=dt) * dt / 60).rename('Walk(mins)') # up to 10min/h missingness
daily_walk = (W.resample('D').agg(_sum, min_wear=min_wear_per_day, dt=dt) * dt / 60).rename('Walk(mins)') # up to 3h/d missingness
# adjusted estimates first form a 7-day representative week before final aggregation
# TODO: 7-day padding for shorter recordings
day_of_week_walk = utils.impute_days(daily_walk).groupby(daily_walk.index.weekday).mean()
Expand Down Expand Up @@ -723,7 +741,7 @@ def _tdelta_to_str(tdelta):
# time of accumulated steps
if adjust_estimates:
# adjusted estimates account for NAs
daily_ptile_at = Y.groupby(pd.Grouper(freq='D')).apply(_percentile_at, min_wear=21 * 60, dt=dt).unstack(1) # up to 3h/d missingness
daily_ptile_at = Y.groupby(pd.Grouper(freq='D')).apply(_percentile_at, min_wear=min_wear_per_day, dt=dt).unstack(1) # up to 3h/d missingness
else:
# crude (unadjusted) estimates ignore NAs
daily_ptile_at = Y.groupby(pd.Grouper(freq='D')).apply(_percentile_at).unstack(1)
Expand Down
Loading