Apply feedback from review, linter (#1848)

usds · Aug 30, 2022 · d9697cf · d9697cf
1 parent ca22fdb
commit d9697cf
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -395,19 +395,20 @@ def _prepare_initial_df(self) -> pd.DataFrame:
         census_tract_df = self._join_tract_dfs(census_tract_dfs)
 
         # Drop tracts that don't exist in the 2010 tracts
-        pre_join_len = census_tract_df.shape[0] 
+        pre_join_len = census_tract_df.shape[0]
 
         census_tract_df = census_tract_df.merge(
             self.national_tract_df,
             on="GEOID10_TRACT",
             how="inner",
         )
-
+        assert (
+            census_tract_df.shape[0] <= pre_join_len
+        ), "Join against national tract list ADDED rows"
         logger.info(
             "Dropped %s tracts not in the 2010 tract data",
             pre_join_len - census_tract_df.shape[0],
-        )        # If GEOID10s are read as numbers instead of strings, the initial 0 is dropped,
-        # and then we get too many CBG rows (one for 012345 and one for 12345).
+        )
 
         # Now sanity-check the merged df.
         self._census_tract_df_sanity_check(

diff --git a/data/data-pipeline/data_pipeline/tests/score/test_tiles_smoketests.py b/data/data-pipeline/data_pipeline/tests/score/test_tiles_smoketests.py
@@ -136,6 +136,7 @@ def error_message(self) -> Optional[str]:
                 f"score_df: {self.final_score_dtype}, "
                 f"tile_df: {self.tile_dtype}"
             )
+        return None
 
 
 def test_for_column_fidelitiy_from_score(tiles_df, final_score_df):