Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixup TA_COUNT and TA_PERC #1991

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions data/data-pipeline/data_pipeline/etl/score/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,9 +394,10 @@
field_names.PERCENT_AGE_UNDER_10: "AGE_10",
field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT: "TA_COUNT",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK: "TA_COUNT_AK",
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS: "TA_COUNT_C",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT: "TA_PERC",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING: "TA_PERC_STR",
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY: "TA_PERC_FE",
}

# columns to round floats to 2 decimals
Expand Down
5 changes: 3 additions & 2 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,15 +488,16 @@ def _prepare_initial_df(self) -> pd.DataFrame:
field_names.PERCENT_AGE_10_TO_64,
field_names.PERCENT_AGE_OVER_64,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS

non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
]

boolean_columns = [
Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
73 changes: 37 additions & 36 deletions data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

import geopandas as gpd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -51,12 +53,14 @@ class TribalOverlapETL(ExtractTransformLoad):
def __init__(self):
self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
]

self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
self.output_df: pd.DataFrame
self.census_tract_gdf: gpd.GeoDataFrame
self.tribal_gdf: gpd.GeoDataFrame
Expand All @@ -69,40 +73,18 @@ def _create_string_from_list(series: pd.Series) -> str:
return ", ".join(str_list)

@staticmethod
def _adjust_percentage_to_string(percentage_float: float) -> str:
"""Helper method that converts numeric floats to strings based on what-to-show rules.

What are these rules?
0. If None, return none
1. If the percentage is below 1%, produce 'less than 1%'
2. If the percentage is above 99.95%, produce '100%'
3. If the percentage is X.00 when rounded to two sig digits, display the integer of the percent
4. If the percentage has unique significant digits, report two digits
"""
# Rule 0
if not percentage_float:
# I believe we need to do this because JS will do weird things with a mix-type column?
return "No tribal areas"
# Rule 1
def _adjust_percentage_for_frontend(
percentage_float: float,
) -> Optional[float]:
"""Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
if percentage_float is None:
return None
if percentage_float < 0.01:
return "less than 1%"
# Rule 2
return 0.0
if percentage_float > 0.9995:
return "100%"

rounded_percentage_str = str(round(percentage_float, 4) * 100)
first_digits, last_digits = rounded_percentage_str.split(".")

# Rule 3 (this is a shorthand because round(4) will truncate repeated 0s)
if last_digits[-1] == "0":
return first_digits + "%"
return 1.0

# Rule 4
if last_digits != "00":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're dropping the whole rule of "Convert 7.00% to 7"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're returning floats now, so it'll be on the frontend to do that

return rounded_percentage_str + "%"

# There is something missing!
raise Exception("Yikes! The string conversion here failed!")
return percentage_float

def extract(self) -> None:
self.census_tract_gdf = get_tract_geojson()
Expand Down Expand Up @@ -130,7 +112,7 @@ def transform(self) -> None:

tribal_overlap_with_tracts = tribal_overlap_with_tracts.rename(
columns={
field_names.TRIBAL_ID: field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
field_names.TRIBAL_ID: self.OVERALL_TRIBAL_COUNT,
field_names.TRIBAL_LAND_AREA_NAME: field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
}
)
Expand Down Expand Up @@ -245,12 +227,31 @@ def transform(self) -> None:
merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT],
)

# Counting tribes in the lower 48 is different from counting in AK,
# so per request by the design and frontend team, we remove all the
# counts outside AK
merged_output_df[
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK
] = np.where(
# In Alaska
(merged_output_df_state_fips_code == "02"),
# Keep the counts
merged_output_df[self.OVERALL_TRIBAL_COUNT],
# Otherwise, null them
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great comments!

None,
)

# TODO: Count tribal areas in the lower 48 correctly
merged_output_df[
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
] = None

# The very final thing we want to do is produce a string for the front end to show
# We do this here so that all of the logic is included
merged_output_df[
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY
] = merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT].apply(
self._adjust_percentage_to_string
self._adjust_percentage_for_frontend
)

self.output_df = merged_output_df
9 changes: 7 additions & 2 deletions data/data-pipeline/data_pipeline/score/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,17 @@
TRIBAL_LAND_AREA_NAME = "landAreaName"

# Tribal overlap variables
COUNT_OF_TRIBAL_AREAS_IN_TRACT = "Number of Tribal areas within Census tract"
COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS = (
"Number of Tribal areas within Census tract"
)
COUNT_OF_TRIBAL_AREAS_IN_TRACT_AK = (
"Number of Tribal areas within Census tract for Alaska"
)
NAMES_OF_TRIBAL_AREAS_IN_TRACT = "Names of Tribal areas within Census tract"
PERCENT_OF_TRIBAL_AREA_IN_TRACT = (
"Percent of the Census tract that is within Tribal areas"
)
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY_STRING = (
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
"Percent of the Census tract that is within Tribal areas, for display"
)

Expand Down