Skip to content

Commit

Permalink
Apply feedback from review, linter (#1848)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbowen-usds committed Aug 30, 2022
1 parent ca22fdb commit d9697cf
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
9 changes: 5 additions & 4 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,19 +395,20 @@ def _prepare_initial_df(self) -> pd.DataFrame:
census_tract_df = self._join_tract_dfs(census_tract_dfs)

# Drop tracts that don't exist in the 2010 tracts
pre_join_len = census_tract_df.shape[0]
pre_join_len = census_tract_df.shape[0]

census_tract_df = census_tract_df.merge(
self.national_tract_df,
on="GEOID10_TRACT",
how="inner",
)

assert (
census_tract_df.shape[0] <= pre_join_len
), "Join against national tract list ADDED rows"
logger.info(
"Dropped %s tracts not in the 2010 tract data",
pre_join_len - census_tract_df.shape[0],
) # If GEOID10s are read as numbers instead of strings, the initial 0 is dropped,
# and then we get too many CBG rows (one for 012345 and one for 12345).
)

# Now sanity-check the merged df.
self._census_tract_df_sanity_check(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def error_message(self) -> Optional[str]:
f"score_df: {self.final_score_dtype}, "
f"tile_df: {self.tile_dtype}"
)
return None


def test_for_column_fidelitiy_from_score(tiles_df, final_score_df):
Expand Down

0 comments on commit d9697cf

Please sign in to comment.