From f85208981f67b3abb9f70f9d371c62e44639203c Mon Sep 17 00:00:00 2001 From: Matt Bowen <83967628+mattbowen-usds@users.noreply.github.com> Date: Thu, 27 Oct 2022 11:44:56 -0400 Subject: [PATCH] Round ALL the float fields for the tiles (#2040) * Round ALL the float fields for the tiles (#2033) * Floor in a simpler way (#2033) Emma pointed out that all teh stuff we're doing in floor_series is probably unnecessary for this case, so just use the built-in floor. * Update pickle I missed (#2033) --- .../data_pipeline/etl/score/constants.py | 69 ------------------ .../data_pipeline/etl/score/etl_score_post.py | 19 +++-- .../tests/snapshots/tile_data_expected.pkl | Bin 4442 -> 4439 bytes 3 files changed, 9 insertions(+), 79 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 81e96329e..154b3589a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -387,72 +387,3 @@ field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC", field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE", } - -# columns to round floats to 2 decimals -# TODO refactor to use much smaller subset of fields we DON'T want to round -TILES_SCORE_FLOAT_COLUMNS = [ - field_names.DIABETES_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.ASTHMA_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.HEART_DISEASE_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.DIESEL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.ENERGY_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_AGRICULTURE_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_BUILDING_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.EXPECTED_POPULATION_LOSS_RATE_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.HOUSING_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LOW_LIFE_EXPECTANCY_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LINGUISTIC_ISO_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LOW_MEDIAN_INCOME_AS_PERCENT_OF_AMI_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.PM25_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.POVERTY_LESS_THAN_100_FPL_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.LEAD_PAINT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.NPL_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.RMP_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TSDF_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TRAFFIC_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.UNEMPLOYMENT_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - # Percentiles for Island areas' workforce columns - # To be clear: the island areas pull from 2009 census. PR does not. - field_names.LOW_CENSUS_DECENNIAL_AREA_MEDIAN_INCOME_PERCENT_FIELD_2009 - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009 - + field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 - + field_names.ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - # Island areas HS degree attainment rate - field_names.CENSUS_DECENNIAL_HIGH_SCHOOL_ED_FIELD_2009, - field_names.WASTEWATER_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.FUTURE_WILDFIRE_RISK_FIELD - + field_names.PERCENTILE_FIELD_SUFFIX, - field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME - + field_names.PERCENTILE_FIELD_SUFFIX, - # Include demographic data for sidebar -- as percents, NOT as percentiles. - field_names.PERCENT_BLACK_FIELD_NAME, - field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, - field_names.PERCENT_ASIAN_FIELD_NAME, - field_names.PERCENT_HAWAIIAN_FIELD_NAME, - field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, - field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, - field_names.PERCENT_HISPANIC_FIELD_NAME, - field_names.PERCENT_OTHER_RACE_FIELD_NAME, - field_names.PERCENT_AGE_UNDER_10, - field_names.PERCENT_AGE_10_TO_64, - field_names.PERCENT_AGE_OVER_64, - # Geojson cannot support nulls in a boolean column when we create tiles; - # to preserve null character, we coerce to floats for all fields - # that use null to signify missing information in a boolean field. - field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME, - field_names.HISTORIC_REDLINING_SCORE_EXCEEDED, - field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT, -] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py index 0296a9e32..87fbecda2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score_post.py @@ -231,16 +231,15 @@ def _create_tile_data( score_tiles = score_tiles[ ~score_tiles[field_names.GEOID_TRACT_FIELD].isin(tracts_to_drop) ] - - score_tiles[constants.TILES_SCORE_FLOAT_COLUMNS] = score_tiles[ - constants.TILES_SCORE_FLOAT_COLUMNS - ].apply( - func=lambda series: floor_series( - series=series, - number_of_decimals=constants.TILES_ROUND_NUM_DECIMALS, - ), - axis=0, - ) + float_cols = [ + col + for col, col_dtype in score_tiles.dtypes.items() + if col_dtype == np.dtype("float64") + ] + scale_factor = 10**constants.TILES_ROUND_NUM_DECIMALS + score_tiles[float_cols] = ( + score_tiles[float_cols] * scale_factor + ).apply(np.floor) / scale_factor logger.info("Adding fields for island areas and Puerto Rico") # The below operation constructs variables for the front end. diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 7ec7b6f8e6af8e9cba78534d0443b45f184d00c3..2666b1512f672e131b3617f38c4a8e1ef825efc3 100644 GIT binary patch delta 841 zcmZ{iO(;ZB6vyYjkI6inq4BZuwUCdJnq)Pd%tp$FEo(`su~ICaWENIa&M1^3Ha1gY zF;R#lqFCsm31bK|gO$mS_wGIKCgm*Nx%a;RJ@@xN_ukZ5b=rhB+r6%pDy#!5lqNQb zVWmZ33?s^*y>x9a6``hXv$l<-I?PxQ>-9!6+^Tv!$*O3sC;1NjvHj16{tpQ9$N`_m zvS3&%k&y8WBOzz>2I;XtqBEU^16v*2e*6dc*-3siHVtIxo~ zyg6@2nuL#3@KP3X!9?C6pa3I=$*lfgK+`&?t?ZYx*q23srlBt%p1#Wme;3VjNk zJ_ZsN-OwF0@~g^AUeVc05M|DZBTd|2rttA)y63t#AsRJBI-&aYrG39v{to{uAP!2% z!g>Be)(LP=J`Dl#B%hdQ;4#gWBfw?7)oBG@=;gj(G(Olh9iC$|#!#)FSV}0DQVviq r!*Qb>zx7JoGM2Ey>t~}_ubP8Z;_9YTG{9;{?>m0U=>YY delta 902 zcmcbvbW4e)fn}=yMiwC^Mu*AbOj?W$lbx9KCKoYf0Qt9pe1^$v%)2+AWKLsbnlcU9@rmIG0>Li zocx9@XmTHim>@^%lnl>KX9ocW2oRl|!6`pkol9V{J=YdS{mEarwCmmN-|Yx|-72@k z-Tr-y#@9prG4A%Sjlsa({#8F%$ld-qmus;>kZ$Xr8AyDd}`#O=Thd>da7LBim zfx3W(08Ij#15R`(TcK1asmmunTRG9E~aR80dDO2aw!a{}|?1xYKUH+z9vL3rO%GX?t@91kM6Q zU?$#!Ip`(K8*p>prGbIF{S=tT;M(8S!?eLI`v9{H%J72%cl$dqORs}Nex1AheOQdW z0mqysQnXx!2C)4FP=pmMakqztub&@Ml)tTqDQkhc4VqLK+{t6u<2J{B9n`OIlkGRY zwgtuh8+ZEyFjvBp%2Sxj;A!t9EM$*^W0A|xZ?ZhMhNM0?l{-~1GB5xU7%+j!$!j^X aCd+e5Ot$AVU^JcFz`2~Ud~*c%e^vmbYhly?