Skip to content

Commit

Permalink
Add column names test (#1848)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbowen-usds committed Aug 26, 2022
1 parent f2964d0 commit 192ebe6
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions data/data-pipeline/data_pipeline/tests/test_tiles_smoketests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
import pytest
from data_pipeline.utils import get_module_logger
from data_pipeline.config import settings
from data_pipeline.etl.score.constants import (
TILES_SCORE_COLUMNS,
THRESHOLD_COUNT_TO_SHOW_FIELD_NAME,
USER_INTERFACE_EXPERIENCE_FIELD_NAME,
)

logger = get_module_logger(__name__)

Expand All @@ -22,6 +27,7 @@ def tiles_df():
return pd.read_csv(
settings.APP_ROOT / "data" / "score" / "csv" / "tiles" / "usa.csv",
dtype={"GTF": str},
low_memory=False,
)


Expand Down Expand Up @@ -81,6 +87,26 @@ def test_count_of_fips_codes(tiles_df, states_count=56):
), "Some states do not have any percentile data"


def test_column_presence(tiles_df):
expected_column_names = set(TILES_SCORE_COLUMNS.values()) | {
THRESHOLD_COUNT_TO_SHOW_FIELD_NAME,
USER_INTERFACE_EXPERIENCE_FIELD_NAME,
}
actual_column_names = set(tiles_df.columns)
extra_columns = actual_column_names - expected_column_names
missing_columns = expected_column_names - expected_column_names
assert not (
extra_columns
), f"tiles/usa.csv has columns not specified in TILE_SCORE_COLUMNS: {extra_columns}"
assert not (
missing_columns
), f"tiles/usa.csv is missing columns from TILE_SCORE_COLUMNS: {missing_columns}"


def test_colunmn_types_as_expected(tiles_df):
breakpoint()
assert False

# For each data point that we visualize, we want to confirm that
# (1) the column is represented in tiles_columns
# (2) the column values are of the TYPE they are supposed to be

0 comments on commit 192ebe6

Please sign in to comment.