-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding first street foundation data (#1823)
Adding FSF flood and wildfire risk datasets to the score.
- Loading branch information
1 parent
ebac552
commit 5e378ae
Showing
21 changed files
with
430 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 3 additions & 3 deletions
6
data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file modified
BIN
+632 Bytes
(100%)
data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
Binary file not shown.
Binary file modified
BIN
-614 Bytes
(97%)
data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
Binary file not shown.
Binary file modified
BIN
-608 Bytes
(97%)
data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
Binary file not shown.
Binary file modified
BIN
+101 Bytes
(100%)
data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
Binary file not shown.
3 changes: 3 additions & 0 deletions
3
data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# FSF flood risk data | ||
|
||
Flood risk computed as 1 in 100 year flood zone |
Empty file.
93 changes: 93 additions & 0 deletions
93
data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
# pylint: disable=unsubscriptable-object | ||
# pylint: disable=unsupported-assignment-operation | ||
|
||
import pandas as pd | ||
from data_pipeline.config import settings | ||
|
||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel | ||
from data_pipeline.utils import get_module_logger | ||
|
||
logger = get_module_logger(__name__) | ||
|
||
|
||
class FloodRiskETL(ExtractTransformLoad): | ||
"""ETL class for the First Street Foundation flood risk dataset""" | ||
|
||
NAME = "fsf_flood_risk" | ||
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_flood.zip" | ||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT | ||
|
||
# Output score variables (values set on datasets.yml) for linting purposes | ||
COUNT_PROPERTIES: str | ||
PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str | ||
PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str | ||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str | ||
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str | ||
|
||
def __init__(self): | ||
# define the full path for the input CSV file | ||
self.INPUT_CSV = ( | ||
self.get_tmp_path() / "fsf_flood" / "flood_tract_2010.csv" | ||
) | ||
|
||
# this is the main dataframe | ||
self.df: pd.DataFrame | ||
|
||
# Start dataset-specific vars here | ||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME = "count_properties" | ||
self.COUNT_PROPERTIES_AT_RISK_TODAY = "mid_depth_100_year00" | ||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS = "mid_depth_100_year30" | ||
self.CLIP_PROPERTIES_COUNT = 250 | ||
|
||
def transform(self) -> None: | ||
"""Reads the unzipped data file into memory and applies the following | ||
transformations to prepare it for the load() method: | ||
- Renames the Census Tract column to match the other datasets | ||
- Calculates share of properties at risk, left-clipping number of properties at 250 | ||
""" | ||
logger.info("Transforming National Risk Index Data") | ||
|
||
logger.info(self.COLUMNS_TO_KEEP) | ||
# read in the unzipped csv data source then rename the | ||
# Census Tract column for merging | ||
df_fsf_flood_disagg: pd.DataFrame = pd.read_csv( | ||
self.INPUT_CSV, | ||
dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str}, | ||
low_memory=False, | ||
) | ||
|
||
df_fsf_flood_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood_disagg[ | ||
self.INPUT_GEOID_TRACT_FIELD_NAME | ||
].str.zfill(11) | ||
|
||
# Because we have some tracts that are listed twice, we aggregate based on | ||
# GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do! | ||
df_fsf_flood = ( | ||
df_fsf_flood_disagg.groupby(self.GEOID_TRACT_FIELD_NAME) | ||
.sum() | ||
.reset_index() | ||
) | ||
|
||
df_fsf_flood[self.COUNT_PROPERTIES] = df_fsf_flood[ | ||
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME | ||
].clip(lower=self.CLIP_PROPERTIES_COUNT) | ||
|
||
df_fsf_flood[self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY] = ( | ||
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_TODAY] | ||
/ df_fsf_flood[self.COUNT_PROPERTIES] | ||
) | ||
df_fsf_flood[ | ||
self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS | ||
] = ( | ||
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_30_YEARS] | ||
/ df_fsf_flood[self.COUNT_PROPERTIES] | ||
) | ||
|
||
# Assign the final df to the class' output_df for the load method with rename | ||
self.output_df = df_fsf_flood.rename( | ||
columns={ | ||
self.COUNT_PROPERTIES_AT_RISK_TODAY: self.PROPERTIES_AT_RISK_FROM_FLOODING_TODAY, | ||
self.COUNT_PROPERTIES_AT_RISK_30_YEARS: self.PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS, | ||
} | ||
) |
3 changes: 3 additions & 0 deletions
3
data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# FSF wildfire risk data | ||
|
||
Fire risk computed as >= 0.003 burn risk probability |
Empty file.
Oops, something went wrong.