diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 5bb4374b6..de657f63a 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -123,7 +123,7 @@ # Controlling Tile user experience columns THRESHOLD_COUNT_TO_SHOW_FIELD_NAME = "THRHLD" TILES_ISLAND_AREAS_THRESHOLD_COUNT = 3 -TILES_PUERTO_RICO_THRESHOLD_COUNT = 4 +TILES_PUERTO_RICO_THRESHOLD_COUNT = 10 TILES_NATION_THRESHOLD_COUNT = 21 # Note that the FIPS code is a string diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index 2958f788c..fa05de68c 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -551,7 +551,7 @@ def _workforce_factor(self) -> bool: # Where the percent of households at or below 100% of the federal poverty level # is above Xth percentile # or - # Where linguistic isolation is above Xth percentile + # Where linguistic isolation is above Xth percentile (except PR) # AND # Where the high school degree achievement rates for adults 25 years and older # is less than Y% @@ -566,6 +566,12 @@ def _workforce_factor(self) -> bool: field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, ] + pr_workforce_eligibility_columns = [ + field_names.UNEMPLOYMENT_LOW_HS_EDUCATION_FIELD, + field_names.POVERTY_LOW_HS_EDUCATION_FIELD, + field_names.LOW_MEDIAN_INCOME_LOW_HS_EDUCATION_FIELD, + ] + self.df[field_names.LOW_HS_EDUCATION_FIELD] = ( self.df[field_names.HIGH_SCHOOL_ED_FIELD] >= self.LACK_OF_HIGH_SCHOOL_MINIMUM_THRESHOLD @@ -622,9 +628,41 @@ def _workforce_factor(self) -> bool: & self.df[field_names.LOW_HS_EDUCATION_FIELD] ) - workforce_combined_criteria_for_states = self.df[ - workforce_eligibility_columns - ].any(axis="columns") + self.df[field_names.WORKFORCE_THRESHOLD_EXCEEDED] = ( + ## First we calculate for the non-island areas + ( + ( + self.df[field_names.POVERTY_PCTILE_THRESHOLD] + | self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] + ) + | self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] + ) + | ( + self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] + & (self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0] ) + ) + ) + + # Use only PR combined criteria for rows with PR FIPS code; + # otherwise use all criteria. + workforce_combined_criteria_for_states = ( + ( + ( + self.df[field_names.GEOID_TRACT_FIELD].str[:2] == constants.TILES_PUERTO_RICO_FIPS_CODE[0] + ) + & + self.df[pr_workforce_eligibility_columns].any(axis="columns") + ) + | + ( + ( + self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0] + ) + & self.df[ + workforce_eligibility_columns + ].any(axis="columns") + ) + ) self._increment_total_eligibility_exceeded( workforce_eligibility_columns @@ -742,17 +780,21 @@ def _workforce_factor(self) -> bool: # Because these criteria are calculated differently for the islands, we also calculate the # thresholds to pass to the FE slightly differently + # If it's PR, we don't use linguistic isolation. self.df[field_names.WORKFORCE_THRESHOLD_EXCEEDED] = ( ## First we calculate for the non-island areas ( ( self.df[field_names.POVERTY_PCTILE_THRESHOLD] - | self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] + | self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] ) | self.df[field_names.LOW_MEDIAN_INCOME_PCTILE_THRESHOLD] ) - | self.df[field_names.UNEMPLOYMENT_PCTILE_THRESHOLD] + | ( + self.df[field_names.LINGUISTIC_ISOLATION_PCTILE_THRESHOLD] + & ( self.df[field_names.GEOID_TRACT_FIELD].str[:2] != constants.TILES_PUERTO_RICO_FIPS_CODE[0] ) + ) ) | ( ## then we calculate just for the island areas ( @@ -777,7 +819,7 @@ def _workforce_factor(self) -> bool: ) def add_columns(self) -> pd.DataFrame: - logger.info("Adding Score M") + logger.info("Adding Score Narhwal") self.df[field_names.THRESHOLD_COUNT] = 0