From e13bf2211a137bebd03d6b142cec6d129bafa2f4 Mon Sep 17 00:00:00 2001 From: Shing Chan Date: Fri, 6 Sep 2024 23:23:03 +0100 Subject: [PATCH] feat: detect bouts of activity and calculate bout metrics. Result is a dataframe (each row is a bout) with the following columns: - StartTime: Bout start time. - EndTime: Bout end time. - Duration(mins): Bout duration (in minutes). - TimeSinceLast(mins): Time since end of last bout (in minutes). - Steps: Number of steps in the bout. - Cadence(steps/min): Bout cadence (steps per minute). - CadenceSD(steps/min): Bout cadence standard deviation (steps per minute). - Cadence25th(steps/min): 25th percentile cadence (steps per minute) in the bout. - Cadence50th(steps/min): Median cadence (steps per minute) in the bout. - Cadence75th(steps/min): 75th percentile cadence (steps per minute) in the bout. - ENMO(mg): Bout mean ENMO (milli-g). - ENMOMed(mg): Bout median ENMO (milli-g). Output file: -Bouts.csv.gz --- setup.py | 3 +- src/stepcount/stepcount.py | 178 +++++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 674aad9..964b57c 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,8 @@ def get_string(string, rel_path="src/stepcount/__init__.py"): "hmmlearn==0.3.*", "torch==1.13.*", "torchvision==0.14.*", - "transforms3d==0.4.*" + "transforms3d==0.4.*", + "numba==0.58.*" ], extras_require={ "dev": [ diff --git a/src/stepcount/stepcount.py b/src/stepcount/stepcount.py index c7685b7..c4bc766 100644 --- a/src/stepcount/stepcount.py +++ b/src/stepcount/stepcount.py @@ -7,9 +7,11 @@ import argparse import json import re +from collections import defaultdict import numpy as np import pandas as pd import joblib +from numba import njit from stepcount import utils from stepcount import __version__ @@ -296,6 +298,9 @@ def main(): info['CadencePeak30Adjusted(steps/min)_Weekday'] = cadence_summary_adj['weekday_cadence_peak30'] info['Cadence95thAdjusted(steps/min)_Weekday'] = cadence_summary_adj['weekday_cadence_p95'] + # Bouts summary + bouts_summary = summarize_bouts(Y, W, data) + # Save Info.json with open(f"{outdir}/{basename}-Info.json", 'w') as f: json.dump(info, f, indent=4, cls=utils.NpEncoder) @@ -368,6 +373,9 @@ def main(): daily_adj.to_csv(f"{outdir}/{basename}-DailyAdjusted.csv.gz", index=False) # del daily_adj # still needed for printing + # Save bouts data + bouts_summary['bouts'].to_csv(f"{outdir}/{basename}-Bouts.csv.gz", index=False) + # Print print("\nSummary\n-------") print(json.dumps( @@ -907,6 +915,176 @@ def _cadence_p95(x, steptol, walktol=30): } +def summarize_bouts( + Y: pd.Series, + W: pd.Series, + data: pd.DataFrame +): + """ + Summarize bouts of walking activity. For each detected bout, it calculates + start and end times, duration, total steps, ENMO and cadence metrics. + + Parameters: + - Y (pd.Series): A pandas Series of step counts. + - W (pd.Series): A pandas Series indicating walking (1) and non-walking (0) windows, aligned with Y. + - data (pd.DataFrame): A pandas DataFrame containing raw acceleration data. + + Returns: + - dict: A dictionary containing summary information for each detected bout, with the following keys: + - 'StartTime': List of start times for each bout. + - 'EndTime': List of end times for each bout. + - 'Duration(mins)': List of durations (in minutes) for each bout. + - 'TimeSinceLast(mins)': List of time since the last bout (in minutes) for each bout. + - 'Steps': List of total steps for each bout. + - 'Cadence(steps/min)': List of average cadence (steps per minute) for each bout. + - 'CadenceSD(steps/min)': List of standard deviations of cadence (steps per minute) for each bout. + - 'Cadence25th(steps/min)': List of 25th percentile cadence (steps per minute) for each bout. + - 'Cadence50th(steps/min)': List of median cadence (steps per minute) for each bout. + - 'Cadence75th(steps/min)': List of 75th percentile cadence (steps per minute) for each bout. + - 'ENMO(mg)': Mean ENMO for each bout. + - 'ENMOMed(mg)': Median ENMO for each bout. + """ + + bouts = numba_detect_bouts(W.to_numpy()) + + if len(bouts) == 0: + return { + 'bouts': pd.DataFrame({ + 'StartTime': [], + 'EndTime': [], + 'Duration(mins)': [], + 'TimeSinceLast(mins)': [], + 'Steps': [], + 'Cadence(steps/min)': [], + 'CadenceSD(steps/min)': [], + 'Cadence25th(steps/min)': [], + 'Cadence50th(steps/min)': [], + 'Cadence75th(steps/min)': [], + 'ENMO(mg)': [], + 'ENMOMed(mg)': [], + }) + } + + bout_stats = defaultdict(list) + + dt = utils.infer_freq(Y.index) + one_min = pd.Timedelta('1min') + + # Truncated ENMO: Euclidean norm minus one and clipped at zero + v = np.sqrt(data['x'] ** 2 + data['y'] ** 2 + data['z'] ** 2) + v = np.clip(v - 1, a_min=0, a_max=None) + v *= 1000 # convert to mg + # resample to match Y + v = v.resample(dt).mean().reindex(Y.index, method='nearest', tolerance=dt) + + tlast = None + for i, n in bouts: + y = Y.iloc[i:i + n] + bout_steps = y.sum() + bout_duration = n * dt / one_min # in minutes + bout_cadence = bout_steps / bout_duration # steps per minute + # rescale to steps per minute + y *= one_min / dt + bout_cadence_sd = y.std() + bout_cadence_25th = y.quantile(0.25) + bout_cadence_50th = y.quantile(0.50) + bout_cadence_75th = y.quantile(0.75) + tstart, tend = y.index[0], y.index[-1] + if tlast is not None: + tsince = (tstart - tlast) / one_min + else: + tsince = np.nan + tlast = y.index[-1] + bout_enmo = v.loc[tstart:tend].mean() + bout_enmo_med = v.loc[tstart:tend].median() + bout_stats['StartTime'].append(tstart.strftime('%Y-%m-%d %H:%M:%S')) + bout_stats['EndTime'].append(tend.strftime('%Y-%m-%d %H:%M:%S')) + bout_stats['Duration(mins)'].append(bout_duration) + bout_stats['TimeSinceLast(mins)'].append(tsince) + bout_stats['Steps'].append(bout_steps) + bout_stats['Cadence(steps/min)'].append(bout_cadence) + bout_stats['CadenceSD(steps/min)'].append(bout_cadence_sd) + bout_stats['Cadence25th(steps/min)'].append(bout_cadence_25th) + bout_stats['Cadence50th(steps/min)'].append(bout_cadence_50th) + bout_stats['Cadence75th(steps/min)'].append(bout_cadence_75th) + bout_stats['ENMO(mg)'].append(bout_enmo) + bout_stats['ENMOMed(mg)'].append(bout_enmo_med) + + bout_stats = pd.DataFrame(bout_stats) + + return { + 'bouts': bout_stats, + } + + +@njit +def numba_detect_bouts( + arr: np.ndarray, + min_percent_ones: float = 0.8, + max_trailing_zeros: int = 3 +): + """ + For a series of 0s and 1s, find the start and duration of each bout. + A bout is a series of 0s and 1s where any expanding average is at least + `min_percent_ones`. If a bout has more than `max_trailing_zeros` trailing + 0s, the bout ends. Trailing 0s are not counted in the bout length. + + Parameters: + - arr (np.ndarray): An array of 0s and 1s representing activity data. + - min_percent_ones (float, optional): The minimum proportion of 1s required for a sequence + to be considered a bout. Default is 0.8. + - max_trailing_zeros (int, optional): The maximum number of trailing 0s allowed in a bout + before it is considered to have ended. Default is 3. + + Returns: + - list of tuple: A list of tuples where each tuple represents a detected bout. Each tuple + contains the start index and the length of the bout. + + Example: + arr: [0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0] + + min_percent_ones: 0.5 + max_trailing_zeros: 3 + Output: [(1, 1), (4, 2), (9, 10)] + + min_percent_ones: 0.5 + max_trailing_zeros: 2 + Output: [(1, 1), (4, 2), (9, 3), (15, 4)] + """ + + bouts = [] + bout_start = None + bout_length = 0 + bout_sum = 0 + trailing_zeros = 0 + + for i, a in enumerate(arr): + if a == 1: + if bout_start is None: # start new bout + bout_start = i + bout_sum += 1 + bout_length += 1 + trailing_zeros = 0 + else: + if bout_start is None: + continue # skip if no bout is ongoing + bout_length += 1 + trailing_zeros += 1 + # if too many trailing zeros or not enough ones, end the bout + if trailing_zeros > max_trailing_zeros or bout_sum / bout_length < min_percent_ones: + bouts.append((bout_start, bout_length - trailing_zeros)) + bout_start = None + bout_length = 0 + bout_sum = 0 + trailing_zeros = 0 + + # if the last bout is ongoing, add it to the list + if bout_start is not None: + bouts.append((bout_start, bout_length - trailing_zeros)) + + return bouts + + if __name__ == '__main__': main()