Skip to content

Commit

Permalink
Adding first street foundation data (#1823)
Browse files Browse the repository at this point in the history
Adding FSF flood and wildfire risk datasets to the score.
  • Loading branch information
emma-nechamkin authored Aug 17, 2022
1 parent ebac552 commit 5e378ae
Show file tree
Hide file tree
Showing 21 changed files with 430 additions and 82 deletions.
18 changes: 18 additions & 0 deletions data/data-pipeline/data_pipeline/content/config/csv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,21 @@ fields:
- score_name: Leaky underground storage tanks
label: Leaky underground storage tanks
format: float
- score_name: Share of properties at risk of flood in 30 years
label: Share of properties at risk of flood in 30 years
format: float
- score_name: Share of properties at risk of fire in 30 years
label: Share of properties at risk of fire in 30 years
format: float
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
format: bool
18 changes: 18 additions & 0 deletions data/data-pipeline/data_pipeline/content/config/excel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,21 @@ sheets:
- score_name: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
label: Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?
format: bool
- score_name: Share of properties at risk of flood in 30 years
label: Share of properties at risk of flood in 30 years
format: float
- score_name: Share of properties at risk of fire in 30 years
label: Share of properties at risk of fire in 30 years
format: float
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years
format: bool
- score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years
format: bool
10 changes: 10 additions & 0 deletions data/data-pipeline/data_pipeline/etl/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@
"module_dir": "mapping_for_ej",
"class_name": "MappingForEJETL",
},
{
"name": "fsf_flood_risk",
"module_dir": "fsf_flood_risk",
"class_name": "FloodRiskETL",
},
{
"name": "fsf_wildfire_risk",
"module_dir": "fsf_wildfire_risk",
"class_name": "WildfireRiskETL",
},
{
"name": "ejscreen",
"module_dir": "ejscreen",
Expand Down
82 changes: 82 additions & 0 deletions data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,88 @@ datasets:
include_in_tiles: true
include_in_downloadable_files: true

- long_name: "First Street Foundation Flood Risk"
short_name: "FSF Flood Risk"
module_name: fsf_flood_risk
input_geoid_tract_field_name: "GEOID"
load_fields:
- short_name: "flood_eligible_properties"
df_field_name: "COUNT_PROPERTIES"
long_name: "Count of properties eligible for flood risk calculation within tract (floor of 250)"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_properties_today"
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
long_name: "Count of properties at risk of flood today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_properties_30yrs"
df_field_name: "PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
long_name: "Count of properties at risk of flood in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "flood_risk_share_today"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY"
long_name: "Share of properties at risk of flood today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true
- short_name: "flood_risk_share_30yrs"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS"
long_name: "Share of properties at risk of flood in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true

- long_name: "First Street Foundation Wildfire Risk"
short_name: "FSF Wildfire Risk"
module_name: fsf_wildfire_risk
input_geoid_tract_field_name: "GEOID"
load_fields:
- short_name: "fire_eligible_properties"
df_field_name: "COUNT_PROPERTIES"
long_name: "Count of properties eligible for wildfire risk calculation within tract (floor of 250)"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_properties_today"
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
long_name: "Count of properties at risk of wildfire today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_properties_30yrs"
df_field_name: "PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
long_name: "Count of properties at risk of wildfire in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: false
- short_name: "fire_risk_share_today"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_TODAY"
long_name: "Share of properties at risk of fire today"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true
- short_name: "fire_risk_share_30yrs"
df_field_name: "SHARE_OF_PROPERTIES_AT_RISK_FROM_FIRE_IN_30_YEARS"
long_name: "Share of properties at risk of fire in 30 years"
field_type: float
include_in_tiles: false
include_in_downloadable_files: true
create_percentile: true

- long_name: "DOT Travel Disadvantage Index"
short_name: "DOT"
module_name: "travel_composite"
Expand Down
13 changes: 11 additions & 2 deletions data/data-pipeline/data_pipeline/etl/score/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,18 @@
field_names.WORKFORCE_THRESHOLD_EXCEEDED: "M_WKFC_EOMI",
# These are the booleans for socioeconomic indicators
## this measures low income boolean
field_names.FPL_200_SERIES: "FPL200S",
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED: "FPL200S",
## Low high school for t&wd
field_names.WORKFORCE_SOCIO_INDICATORS_EXCEEDED: "M_WKFC_EBSI",
field_names.DOT_BURDEN_PCTILE_THRESHOLD: "TD_ET",
field_names.DOT_TRAVEL_BURDEN_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS"
+ field_names.PERCENTILE_FIELD_SUFFIX: "TD_PFS",
field_names.FUTURE_FLOOD_RISK_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "FLD_PFS",
field_names.FUTURE_WILDFIRE_RISK_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS",
field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET",
field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET",
## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
## FPL_200 (there is no higher ed in narwhal)
}
Expand Down Expand Up @@ -352,4 +358,7 @@
field_names.COLLEGE_NON_ATTENDANCE_FIELD,
field_names.COLLEGE_ATTENDANCE_FIELD,
field_names.DOT_TRAVEL_BURDEN_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX,
field_names.FUTURE_WILDFIRE_RISK_FIELD
+ field_names.PERCENTILE_FIELD_SUFFIX,
]
16 changes: 16 additions & 0 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
from data_pipeline.etl.sources.dot_travel_composite.etl import (
TravelCompositeETL,
)
from data_pipeline.etl.sources.fsf_flood_risk.etl import (
FloodRiskETL,
)
from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL
from data_pipeline.score.score_runner import ScoreRunner
from data_pipeline.score import field_names
from data_pipeline.etl.score import constants
Expand Down Expand Up @@ -41,6 +45,8 @@ def __init__(self):
self.child_opportunity_index_df: pd.DataFrame
self.hrs_df: pd.DataFrame
self.dot_travel_disadvantage_df: pd.DataFrame
self.fsf_flood_df: pd.DataFrame
self.fsf_fire_df: pd.DataFrame

def extract(self) -> None:
logger.info("Loading data sets from disk.")
Expand Down Expand Up @@ -122,6 +128,12 @@ def extract(self) -> None:
# Load DOT Travel Disadvantage
self.dot_travel_disadvantage_df = TravelCompositeETL.get_data_frame()

# Load fire risk data
self.fsf_fire_df = WildfireRiskETL.get_data_frame()

# Load flood risk data
self.fsf_flood_df = FloodRiskETL.get_data_frame()

# Load GeoCorr Urban Rural Map
geocorr_urban_rural_csv = (
constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv"
Expand Down Expand Up @@ -342,6 +354,8 @@ def _prepare_initial_df(self) -> pd.DataFrame:
self.child_opportunity_index_df,
self.hrs_df,
self.dot_travel_disadvantage_df,
self.fsf_flood_df,
self.fsf_fire_df,
]

# Sanity check each data frame before merging.
Expand Down Expand Up @@ -426,6 +440,8 @@ def _prepare_initial_df(self) -> pd.DataFrame:
field_names.UST_FIELD,
field_names.DOT_TRAVEL_BURDEN_FIELD,
field_names.AGRICULTURAL_VALUE_BOOL_FIELD,
field_names.FUTURE_FLOOD_RISK_FIELD,
field_names.FUTURE_WILDFIRE_RISK_FIELD,
field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
]

Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# FSF flood risk data

Flood risk computed as 1 in 100 year flood zone
Empty file.
93 changes: 93 additions & 0 deletions data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# pylint: disable=unsubscriptable-object
# pylint: disable=unsupported-assignment-operation

import pandas as pd
from data_pipeline.config import settings

from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.utils import get_module_logger

logger = get_module_logger(__name__)


class FloodRiskETL(ExtractTransformLoad):
"""ETL class for the First Street Foundation flood risk dataset"""

NAME = "fsf_flood_risk"
SOURCE_URL = settings.AWS_JUSTICE40_DATASOURCES_URL + "/fsf_flood.zip"
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT

# Output score variables (values set on datasets.yml) for linting purposes
COUNT_PROPERTIES: str
PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str
PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY: str
SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS: str

def __init__(self):
# define the full path for the input CSV file
self.INPUT_CSV = (
self.get_tmp_path() / "fsf_flood" / "flood_tract_2010.csv"
)

# this is the main dataframe
self.df: pd.DataFrame

# Start dataset-specific vars here
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME = "count_properties"
self.COUNT_PROPERTIES_AT_RISK_TODAY = "mid_depth_100_year00"
self.COUNT_PROPERTIES_AT_RISK_30_YEARS = "mid_depth_100_year30"
self.CLIP_PROPERTIES_COUNT = 250

def transform(self) -> None:
"""Reads the unzipped data file into memory and applies the following
transformations to prepare it for the load() method:
- Renames the Census Tract column to match the other datasets
- Calculates share of properties at risk, left-clipping number of properties at 250
"""
logger.info("Transforming National Risk Index Data")

logger.info(self.COLUMNS_TO_KEEP)
# read in the unzipped csv data source then rename the
# Census Tract column for merging
df_fsf_flood_disagg: pd.DataFrame = pd.read_csv(
self.INPUT_CSV,
dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str},
low_memory=False,
)

df_fsf_flood_disagg[self.GEOID_TRACT_FIELD_NAME] = df_fsf_flood_disagg[
self.INPUT_GEOID_TRACT_FIELD_NAME
].str.zfill(11)

# Because we have some tracts that are listed twice, we aggregate based on
# GEOID10_TRACT. Note that I haven't confirmed this with the FSF boys -- to do!
df_fsf_flood = (
df_fsf_flood_disagg.groupby(self.GEOID_TRACT_FIELD_NAME)
.sum()
.reset_index()
)

df_fsf_flood[self.COUNT_PROPERTIES] = df_fsf_flood[
self.COUNT_PROPERTIES_NATIVE_FIELD_NAME
].clip(lower=self.CLIP_PROPERTIES_COUNT)

df_fsf_flood[self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_TODAY] = (
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_TODAY]
/ df_fsf_flood[self.COUNT_PROPERTIES]
)
df_fsf_flood[
self.SHARE_OF_PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS
] = (
df_fsf_flood[self.COUNT_PROPERTIES_AT_RISK_30_YEARS]
/ df_fsf_flood[self.COUNT_PROPERTIES]
)

# Assign the final df to the class' output_df for the load method with rename
self.output_df = df_fsf_flood.rename(
columns={
self.COUNT_PROPERTIES_AT_RISK_TODAY: self.PROPERTIES_AT_RISK_FROM_FLOODING_TODAY,
self.COUNT_PROPERTIES_AT_RISK_30_YEARS: self.PROPERTIES_AT_RISK_FROM_FLOODING_IN_30_YEARS,
}
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# FSF wildfire risk data

Fire risk computed as >= 0.003 burn risk probability
Empty file.
Loading

0 comments on commit 5e378ae

Please sign in to comment.