diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 81e96329e..154b3589a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -387,72 +387,3 @@ field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC", field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE", } - -# columns to round floats to 2 decimals -# TODO refactor to use much smaller subset of fields we DON'T want to round -TILES_SCORE_FLOAT_COLUMNS = [ - field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.POVERTY_LESS_THAN_100_FPL_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - # Percentiles for Island areas' workforce columns - # To be clear: the island areas pull from 2009 census. PR does not. - field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 - + field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 - + field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - # Island areas HS degree attainment rate - field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009, - field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.FUTURE_WILDFIRE_RISK_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME - + field_names.PERCENTILE_FIELD_SUFFIX, - # Include demographic data for sidebar -- as percents, NOT as percentiles. - field_names.PERCENT_BLACK_FIELD_NAME, - field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, - field_names.PERCENT_ASIAN_FIELD_NAME, - field_names.PERCENT_HAWAIIAN_FIELD_NAME, - field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, - field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, - field_names.PERCENT_HISPANIC_FIELD_NAME, - field_names.PERCENT_OTHER_RACE_FIELD_NAME, - field_names.PERCENT_AGE_UNDER_10, - field_names.PERCENT_AGE_10_TO_64, - field_names.PERCENT_AGE_OVER_64, - # Geojson cannot support nulls in a boolean column when we create tiles; - # to preserve null character, we coerce to floats for all fields - # that use null to signify missing information in a boolean field. - field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, - field_names.HISTORIC_REDLINING_SCORE_EXCEEDED, - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT, -] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 0296a9e32..87fbecda2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -231,16 +231,15 @@ def _create_tile_data( score_tiles = score_tiles[ ~score_tiles[field_names.GEOID_TRACT_FIELD].isin(tracts_to_drop) ] - - score_tiles[constants.TILES_SCORE_FLOAT_COLUMNS] = score_tiles[ - constants.TILES_SCORE_FLOAT_COLUMNS - ].apply( - func=lambda series: floor_series( - series=series, - number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS, - ), - axis=0, - ) + float_cols = [ + col + for col, col_dtype in score_tiles.dtypes.items() + if col_dtype == np.dtype("float64") + ] + scale_factor = 10**constants.TILES_ROUND_NUM_DECIMALS + score_tiles[float_cols] = ( + score_tiles[float_cols] * scale_factor + ).apply(np.floor) / scale_factor logger.info("Adding fields for island areas and Puerto Rico") # The below operation constructs variables for the front end. diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 7ec7b6f8e..2666b1512 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ