Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 1944: Changing source of Tribal data file #1960

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,7 @@ def __init__(self):
# our other demographic data
self.NON_HISPANIC_WHITE_FIELD = "PCT086005" # Total!!White
self.HISPANIC_FIELD = "PCT086006" # Total!!Hispanic or Latino
self.OTHER_RACE_FIELD = (
"PCT086007" # Total!!Other Ethnic Origin or Ra
)
self.OTHER_RACE_FIELD = "PCT086007" # Total!!Other Ethnic Origin or Ra

self.TOTAL_RACE_POPULATION_VI_FIELD = "P003001" # Total
self.BLACK_VI_FIELD = (
Expand Down
76 changes: 51 additions & 25 deletions data/data-pipeline/data_pipeline/etl/sources/tribal/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import geopandas as gpd
import pandas as pd

from data_pipeline.config import settings
from data_pipeline.etl.base import ExtractTransformLoad
from data_pipeline.score import field_names
from data_pipeline.utils import get_module_logger, unzip_file_from_url
Expand All @@ -11,9 +12,13 @@

class TribalETL(ExtractTransformLoad):
def __init__(self):
self.GEOJSON_BASE_PATH = self.DATA_PATH / "tribal" / "geojson"
self.GEOGRAPHIC_BASE_PATH = (
self.DATA_PATH / "tribal" / "geographic_data"
)
self.CSV_BASE_PATH = self.DATA_PATH / "tribal" / "csv"
self.NATIONAL_TRIBAL_GEOJSON_PATH = self.GEOJSON_BASE_PATH / "usa.json"
self.NATIONAL_TRIBAL_GEOJSON_PATH = (
self.GEOGRAPHIC_BASE_PATH / "usa.json"
)
self.USA_TRIBAL_DF_LIST = []

def extract(self) -> None:
Expand All @@ -24,37 +29,57 @@ def extract(self) -> None:
"""
logger.info("Downloading Tribal Data")

bia_geojson_url = "https://justice40-data.s3.amazonaws.com/data-sources/BIA_National_LAR_json.zip"
alaska_geojson_url = "https://justice40-data.s3.amazonaws.com/data-sources/Alaska_Native_Villages_json.zip"
bia_shapefile_zip_url = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/BIA_National_LAR_updated_20220929.zip"
)

tsa_and_aian_geojson_zip_url = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/BIA_TSA_and_AIAN_json.zip"
)

alaska_geojson_url = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/Alaska_Native_Villages_json.zip"
)

unzip_file_from_url(
bia_geojson_url,
bia_shapefile_zip_url,
self.TMP_PATH,
self.DATA_PATH / "tribal" / "geojson" / "bia_national_lar",
self.GEOGRAPHIC_BASE_PATH / "bia_national_lar",
)

unzip_file_from_url(
tsa_and_aian_geojson_zip_url,
self.TMP_PATH,
self.GEOGRAPHIC_BASE_PATH / "tsa_and_aian",
)

unzip_file_from_url(
alaska_geojson_url,
self.TMP_PATH,
self.DATA_PATH / "tribal" / "geojson" / "alaska_native_villages",
self.GEOGRAPHIC_BASE_PATH / "alaska_native_villages",
)
pass

def _transform_bia_national_lar(self, tribal_geojson_path: Path) -> None:
def _transform_bia_national_lar(self, path: Path) -> None:
"""Transform the Tribal BIA National Lar Geodataframe and appends it to the
national Tribal Dataframe List

Args:
tribal_geojson_path (Path): the Path to the Tribal Geojson
path (Path): the Path to the BIA National Lar

Returns:
None
"""

bia_national_lar_df = gpd.read_file(tribal_geojson_path)
bia_national_lar_df = gpd.read_file(path)

# DELETE
logger.info(f"Columns: {bia_national_lar_df.columns}\n")

bia_national_lar_df.drop(
["OBJECTID", "GISAcres", "Shape_Length", "Shape_Area"],
["GISAcres"],
axis=1,
inplace=True,
)
Expand Down Expand Up @@ -162,29 +187,30 @@ def transform(self) -> None:
"""
logger.info("Transforming Tribal Data")

# load the geojsons
bia_national_lar_geojson = (
self.GEOJSON_BASE_PATH
/ "bia_national_lar"
/ "BIA_National_LAR.json"
# Set the filepaths:
bia_national_lar_shapefile = (
self.GEOGRAPHIC_BASE_PATH / "bia_national_lar"
)

bia_aian_supplemental_geojson = (
self.GEOJSON_BASE_PATH
/ "bia_national_lar"
self.GEOGRAPHIC_BASE_PATH
/ "tsa_and_aian"
/ "BIA_AIAN_Supplemental.json"
)
bia_tsa_geojson_geojson = (
self.GEOJSON_BASE_PATH / "bia_national_lar" / "BIA_TSA.json"

bia_tsa_geojson = (
self.GEOGRAPHIC_BASE_PATH / "tsa_and_aian" / "BIA_TSA.json"
)

alaska_native_villages_geojson = (
self.GEOJSON_BASE_PATH
self.GEOGRAPHIC_BASE_PATH
/ "alaska_native_villages"
/ "AlaskaNativeVillages.gdb.geojson"
)

self._transform_bia_national_lar(bia_national_lar_geojson)
self._transform_bia_national_lar(bia_national_lar_shapefile)
self._transform_bia_aian_supplemental(bia_aian_supplemental_geojson)
self._transform_bia_tsa(bia_tsa_geojson_geojson)
self._transform_bia_tsa(bia_tsa_geojson)
self._transform_alaska_native_villages(alaska_native_villages_geojson)

def load(self) -> None:
Expand All @@ -194,13 +220,13 @@ def load(self) -> None:
None
"""
logger.info("Saving Tribal GeoJson and CSV")

usa_tribal_df = gpd.GeoDataFrame(
pd.concat(self.USA_TRIBAL_DF_LIST, ignore_index=True)
)
usa_tribal_df = usa_tribal_df.to_crs(
"+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"
)

logger.info("Writing national geojson file")
usa_tribal_df.to_file(
self.NATIONAL_TRIBAL_GEOJSON_PATH, driver="GeoJSON"
Expand Down