From c74f68df001f2516309b576c0c1fdd2ae25f5866 Mon Sep 17 00:00:00 2001 From: matt bowen Date: Thu, 15 Sep 2022 13:31:07 -0400 Subject: [PATCH] Fix bad test assumption (#1848) --- .../historic_redlining/data/output.csv | 32 +++++++++--------- .../sources/historic_redlining/test_etl.py | 33 +++++++++++++++++++ 2 files changed, 49 insertions(+), 16 deletions(-) diff --git a/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/data/output.csv b/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/data/output.csv index 934ebc711..8b86f139b 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/data/output.csv +++ b/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/data/output.csv @@ -1,16 +1,16 @@ -GEOID10_TRACT,Tract-level redlining score -06027000800,3.3000000000 -06061021322,3.9900000000 -06069000802,3.7800000000 -15001021010,4.0000000000 -15001021101,4.0000000000 -15001021402,3.8600000000 -15001021800,4.0000000000 -15003010201,3.9600000000 -15007040603,3.9700000000 -15007040604,3.9400000000 -15007040700,3.2000000000 -15009030100,3.7700000000 -15009030201,3.2300000000 -15009030402,3.0000000000 -15009030800,3.4000000000 +GEOID10_TRACT,Tract-level redlining score,Tract-level redlining score meets or exceeds 3.25,Tract-level redlining score meets or exceeds 3.5,Tract-level redlining score meets or exceeds 3.75 +06027000800,3.3000000000,True,False,False +06061021322,3.9900000000,True,True,True +06069000802,3.7800000000,True,True,True +15001021010,4.0000000000,True,True,True +15001021101,4.0000000000,True,True,True +15001021402,3.8600000000,True,True,True +15001021800,4.0000000000,True,True,True +15003010201,3.9600000000,True,True,True +15007040603,3.9700000000,True,True,True +15007040604,3.9400000000,True,True,True +15007040700,3.2000000000,False,False,False +15009030100,3.7700000000,True,True,True +15009030201,3.2300000000,False,False,False +15009030402,3.0000000000,False,False,False +15009030800,3.4000000000,True,False,False diff --git a/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/test_etl.py index e28e371f0..edd5a00c4 100644 --- a/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/test_etl.py +++ b/data/data-pipeline/data_pipeline/tests/sources/historic_redlining/test_etl.py @@ -1,3 +1,4 @@ +# pylint: disable=protected-access import pathlib import pandas as pd from data_pipeline.tests.sources.example.test_etl import TestETL @@ -31,3 +32,35 @@ def test_extract_produces_valid_data(self, snapshot, mock_etl, mock_paths): dtype={etl.GEOID_TRACT_FIELD_NAME: str}, ) assert tmp_df.shape == (15, 5) + + def test_load_base(self, snapshot, mock_etl, mock_paths): + """Test load method. + We need to run transform here for real to add + the dynamic cols to keep + """ + # setup - input variables + etl = self._setup_etl_instance_and_run_extract( + mock_etl=mock_etl, + mock_paths=mock_paths, + ) + etl.transform() + etl.load() + + # Make sure it creates the file. + actual_output_path = etl._get_output_file_path() + assert actual_output_path.exists() + + # Check COLUMNS_TO_KEEP remain + actual_output = pd.read_csv( + actual_output_path, dtype={etl.GEOID_TRACT_FIELD_NAME: str} + ) + + for col in etl.COLUMNS_TO_KEEP: + assert col in actual_output.columns, f"{col} is missing from output" + + # Check the snapshots + snapshot.snapshot_dir = self._DATA_DIRECTORY_FOR_TEST + snapshot.assert_match( + actual_output.to_csv(index=False, float_format=self._FLOAT_FORMAT), + self._OUTPUT_CSV_FILE_NAME, + )