Skip to content

Commit

Permalink
Updating traffic barriers to include low pop threshold (#1889)
Browse files Browse the repository at this point in the history
Changing the traffic barriers to only be included for places with recorded population
  • Loading branch information
emma-nechamkin authored Sep 7, 2022
1 parent d41153d commit 426328e
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,8 @@ def _prepare_initial_df(self) -> pd.DataFrame:
), "Join against national tract list ADDED rows"
logger.info(
"Dropped %s tracts not in the 2010 tract data",
pre_join_len - census_tract_df[field_names.GEOID_TRACT_FIELD].nunique()
pre_join_len
- census_tract_df[field_names.GEOID_TRACT_FIELD].nunique(),
)

# Now sanity-check the merged df.
Expand Down Expand Up @@ -551,6 +552,9 @@ class ReversePercentile:
# For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will
# get rid of tracts that we think are aberrations statistically. Right now, we have left this out
# pending ground-truthing.
#
# For *Traffic Barriers*, we want to exclude low population tracts, which may have high burden because they are
# low population alone. We set this low population constant in the if statement.

for numeric_column in numeric_columns:
drop_tracts = []
Expand All @@ -575,6 +579,17 @@ class ReversePercentile:
f"Dropping {len(drop_tracts)} tracts from Linguistic Isolation"
)

elif numeric_column == field_names.DOT_TRAVEL_BURDEN_FIELD:
# Not having any people appears to be correlated with transit burden, but also doesn't represent
# on the ground need. For now, we remove these tracts from the percentile calculation. (To be QAed live)
low_population = 20
drop_tracts = df_copy[
df_copy[field_names.TOTAL_POP_FIELD] <= low_population
][field_names.GEOID_TRACT_FIELD].to_list()
logger.info(
f"Dropping {len(drop_tracts)} tracts from DOT traffic burden"
)

df_copy = self._add_percentiles_to_df(
df=df_copy,
input_column_name=numeric_column,
Expand Down

0 comments on commit 426328e

Please sign in to comment.