diff --git a/config.yaml b/config.yaml index 7d44b6b06..25fa84e24 100644 --- a/config.yaml +++ b/config.yaml @@ -258,6 +258,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 250 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/docs/change-log.md b/docs/change-log.md index 097fe72dc..d2f43724e 100644 --- a/docs/change-log.md +++ b/docs/change-log.md @@ -7,6 +7,7 @@ - Add parameter to exclude rows with implausible step count values from Fitbit steps intraday feature computation - Update tests for Fitbit steps intraday features - Fix bug of None/NULL values in parsed Fitbit heartrate summary JSON data +- Add parameter to PHONE_LOCATIONS DORYAB provider to exclude rows of locations data with implausible speed values from feature computation ## v1.9.4 diff --git a/docs/features/phone-locations.md b/docs/features/phone-locations.md index 34feee3a3..19a341206 100644 --- a/docs/features/phone-locations.md +++ b/docs/features/phone-locations.md @@ -136,6 +136,7 @@ Parameters description for `[PHONE_LOCATIONS][PROVIDERS][DORYAB]`: |`[MINIMUM_DAYS_TO_DETECT_HOME_CHANGES]` | The minimum number of consecutive days a new home location candidate has to repeat before it is considered the participant's new home. This parameter will be used only when `[INFER_HOME_LOCATION_STRATEGY]` is set to `SUN_LI_VEGA_STRATEGY`. | `[CLUSTERING_ALGORITHM]` | The original Doryab et al. implementation uses `DBSCAN`, `OPTICS` is also available with similar (but not identical) clustering results and lower memory consumption. | `[RADIUS_FOR_HOME]` | All location coordinates within this distance (meters) from the home location coordinates are considered a homestay (see `timeathome` feature). +| `[THRESHOLD_MAX_SPEED]` | Any rows of locations data with calculated speed greater than this threshold value in km/hr will be dropped prior to feature computation. Set to 0 to disable and retain all rows. Features description for `[PHONE_LOCATIONS][PROVIDERS][DORYAB]`: diff --git a/src/features/phone_locations/doryab/main.py b/src/features/phone_locations/doryab/main.py index 19a7b8d52..568038e1c 100644 --- a/src/features/phone_locations/doryab/main.py +++ b/src/features/phone_locations/doryab/main.py @@ -127,6 +127,7 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se cluster_on = provider["CLUSTER_ON"] clustering_algorithm = provider["CLUSTERING_ALGORITHM"] radius_from_home = provider["RADIUS_FOR_HOME"] + threshold_max_speed = provider["THRESHOLD_MAX_SPEED"] if provider["MINUTES_DATA_USED"]: requested_features.append("minutesdataused") @@ -136,10 +137,15 @@ def doryab_features(sensor_data_files, time_segment, provider, filter_data_by_se # the subset of requested features this function can compute features_to_compute = list(set(requested_features) & set(base_features_names)) + # if not disabled (threshold_max_speed=0), drop any rows of data where speed is greater than the specified value in km/h prior to feature computation + if threshold_max_speed > 0: + location_data = location_data.drop(location_data[location_data.speed > threshold_max_speed].index) + location_data = apply_cluster_strategy(location_data, time_segment, clustering_algorithm, dbscan_eps, dbscan_minsamples, cluster_on, filter_data_by_segment) if location_data.empty: return pd.DataFrame(columns=["local_segment"] + features_to_compute) + location_features = pd.DataFrame() # update distance after chunk_episodes() function diff --git a/tests/settings/mtz_event_config.yaml b/tests/settings/mtz_event_config.yaml index 97dffef22..798e1338d 100644 --- a/tests/settings/mtz_event_config.yaml +++ b/tests/settings/mtz_event_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tests/settings/mtz_frequency_config.yaml b/tests/settings/mtz_frequency_config.yaml index 487f10e6b..f3f8acf90 100644 --- a/tests/settings/mtz_frequency_config.yaml +++ b/tests/settings/mtz_frequency_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tests/settings/mtz_periodic_config.yaml b/tests/settings/mtz_periodic_config.yaml index 442d71055..b95b0a2bb 100644 --- a/tests/settings/mtz_periodic_config.yaml +++ b/tests/settings/mtz_periodic_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tests/settings/stz_event_config.yaml b/tests/settings/stz_event_config.yaml index a95d95d2f..39811fa2d 100644 --- a/tests/settings/stz_event_config.yaml +++ b/tests/settings/stz_event_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tests/settings/stz_frequency_config.yaml b/tests/settings/stz_frequency_config.yaml index 0c8dc47ad..5468efb77 100644 --- a/tests/settings/stz_frequency_config.yaml +++ b/tests/settings/stz_frequency_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tests/settings/stz_periodic_config.yaml b/tests/settings/stz_periodic_config.yaml index aeed7b8db..901eb6d99 100644 --- a/tests/settings/stz_periodic_config.yaml +++ b/tests/settings/stz_periodic_config.yaml @@ -260,6 +260,7 @@ PHONE_LOCATIONS: MINIMUM_DAYS_TO_DETECT_HOME_CHANGES: 3 CLUSTERING_ALGORITHM: DBSCAN # DBSCAN, OPTICS RADIUS_FOR_HOME: 100 + THRESHOLD_MAX_SPEED: 0 #km/h; set to 0 to disable SRC_SCRIPT: src/features/phone_locations/doryab/main.py BARNETT: diff --git a/tools/config.schema.yaml b/tools/config.schema.yaml index 64605a3d4..fe7cae555 100644 --- a/tools/config.schema.yaml +++ b/tools/config.schema.yaml @@ -751,6 +751,9 @@ properties: RADIUS_FOR_HOME: type: integer exclusiveMinimum: 0 + THRESHOLD_MAX_SPEED: + type: integer + minimum: 0 BARNETT: allOf: