Skip to content

Commit

Permalink
Make import more obvious (#1835)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbowen-usds committed Sep 6, 2022
1 parent 17669c9 commit 2d0a4aa
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 27 deletions.
45 changes: 22 additions & 23 deletions data/data-pipeline/data_pipeline/tests/score/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,15 @@
import pytest
from data_pipeline.config import settings
from data_pipeline.score import field_names
from data_pipeline.score.field_names import GEOID_TRACT_FIELD
from data_pipeline.etl.score import constants

GEOID_TRACT_FIELD_NAME = field_names.GEOID_TRACT_FIELD


@pytest.fixture(scope="session")
def final_score_df():
return pd.read_csv(
settings.APP_ROOT / "data" / "score" / "csv" / "full" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: str},
dtype={GEOID_TRACT_FIELD: str},
low_memory=False,
)

Expand All @@ -21,7 +20,7 @@ def census_df():
census_csv = constants.DATA_PATH / "dataset" / "census_acs_2019" / "usa.csv"
return pd.read_csv(
census_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -31,7 +30,7 @@ def ejscreen_df():
ejscreen_csv = constants.DATA_PATH / "dataset" / "ejscreen" / "usa.csv"
return pd.read_csv(
ejscreen_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -43,7 +42,7 @@ def hud_housing_df():
)
return pd.read_csv(
hud_housing_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -53,7 +52,7 @@ def cdc_places_df():
cdc_places_csv = constants.DATA_PATH / "dataset" / "cdc_places" / "usa.csv"
return pd.read_csv(
cdc_places_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -68,7 +67,7 @@ def census_acs_median_incomes_df():
)
return pd.read_csv(
census_acs_median_incomes_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -80,7 +79,7 @@ def cdc_life_expectancy_df():
)
return pd.read_csv(
cdc_life_expectancy_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -92,7 +91,7 @@ def doe_energy_burden_df():
)
return pd.read_csv(
doe_energy_burden_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -101,7 +100,7 @@ def doe_energy_burden_df():
def national_risk_index_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "national_risk_index" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -110,7 +109,7 @@ def national_risk_index_df():
def dot_travel_disadvantage_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "travel_composite" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -119,7 +118,7 @@ def dot_travel_disadvantage_df():
def fsf_fire_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "fsf_wildfire_risk" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -128,7 +127,7 @@ def fsf_fire_df():
def fsf_flood_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "fsf_flood_risk" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -137,7 +136,7 @@ def fsf_flood_df():
def nature_deprived_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "nlcd_nature_deprived" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -146,7 +145,7 @@ def nature_deprived_df():
def eamlis_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "eamlis" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -155,7 +154,7 @@ def eamlis_df():
def fuds_df():
return pd.read_csv(
constants.DATA_PATH / "dataset" / "us_army_fuds" / "usa.csv",
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -167,7 +166,7 @@ def geocorr_urban_rural_df():
)
return pd.read_csv(
geocorr_urban_rural_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -179,7 +178,7 @@ def census_decennial_df():
)
return pd.read_csv(
census_decennial_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -191,7 +190,7 @@ def census_2010_df():
)
return pd.read_csv(
census_2010_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -202,7 +201,7 @@ def hrs_df():

return pd.read_csv(
hrs_csv,
dtype={GEOID_TRACT_FIELD_NAME: "string"},
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
)

Expand All @@ -212,8 +211,8 @@ def national_tract_df():
national_tract_csv = constants.DATA_CENSUS_CSV_FILE_PATH
return pd.read_csv(
national_tract_csv,
names=[GEOID_TRACT_FIELD_NAME],
dtype={GEOID_TRACT_FIELD_NAME: "string"},
names=[GEOID_TRACT_FIELD],
dtype={GEOID_TRACT_FIELD: "string"},
low_memory=False,
header=None,
)
8 changes: 4 additions & 4 deletions data/data-pipeline/data_pipeline/tests/score/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pd
import numpy as np
from data_pipeline.score import field_names
from data_pipeline.score.field_names import GEOID_TRACT_FIELD
from .fixtures import (
final_score_df,
ejscreen_df,
Expand All @@ -30,7 +31,6 @@


pytestmark = pytest.mark.smoketest
GEOID_TRACT_FIELD_NAME = field_names.GEOID_TRACT_FIELD
UNMATCHED_TRACK_THRESHOLD = 1000


Expand Down Expand Up @@ -258,7 +258,7 @@ def test_data_sources(
final = "final_"
df: pd.DataFrame = final_score_df.merge(
data_source,
on=GEOID_TRACT_FIELD_NAME,
on=GEOID_TRACT_FIELD,
indicator="MERGE",
suffixes=(final, f"_{data_source_name}"),
how="outer",
Expand All @@ -267,7 +267,7 @@ def test_data_sources(
# Make our lists of columns for later comparison
core_cols = data_source.columns.intersection(
final_score_df.columns
).drop(GEOID_TRACT_FIELD_NAME)
).drop(GEOID_TRACT_FIELD)
data_source_columns = [f"{col}_{data_source_name}" for col in core_cols]
final_columns = [f"{col}{final}" for col in core_cols]
assert (
Expand Down Expand Up @@ -312,7 +312,7 @@ def test_data_sources(
def test_output_tracts(final_score_df, national_tract_df):
df = final_score_df.merge(
national_tract_df,
on=GEOID_TRACT_FIELD_NAME,
on=GEOID_TRACT_FIELD,
how="outer",
indicator="MERGE",
)
Expand Down

0 comments on commit 2d0a4aa

Please sign in to comment.