-
Notifications
You must be signed in to change notification settings - Fork 48
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Abandoned Mine Land data (#1780)
Using a similar structure but simpler apporach compared to FUDs, add an indicator for whether a tract has an abandonded mine.
- Loading branch information
1 parent
d7c28df
commit 878d1ea
Showing
11 changed files
with
280 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
63 changes: 63 additions & 0 deletions
63
data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
from pathlib import Path | ||
import geopandas as gpd | ||
import pandas as pd | ||
from data_pipeline.config import settings | ||
|
||
from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel | ||
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries | ||
from data_pipeline.utils import get_module_logger | ||
|
||
logger = get_module_logger(__name__) | ||
|
||
|
||
class AbandonedMineLandInventorySystem(ExtractTransformLoad): | ||
"""Data from Office Of Surface Mining Reclamation and Enforcement's | ||
eAMLIS. These are the locations of abandoned mines. | ||
""" | ||
|
||
# Metadata for the baseclass | ||
NAME = "eamlis" | ||
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT | ||
BINARY_HAS_AMLIS: str = "Has abandoned mine" | ||
|
||
# Define these for easy code completion | ||
def __init__(self): | ||
self.SOURCE_URL = ( | ||
settings.AWS_JUSTICE40_DATASOURCES_URL | ||
+ "/eAMLIS export of all data.tsv.zip" | ||
) | ||
|
||
self.TRACT_INPUT_COLUMN_NAME = self.INPUT_GEOID_TRACT_FIELD_NAME | ||
|
||
self.OUTPUT_PATH: Path = ( | ||
self.DATA_PATH / "dataset" / "abandoned_mine_land_inventory_system" | ||
) | ||
|
||
self.COLUMNS_TO_KEEP = [ | ||
self.GEOID_TRACT_FIELD_NAME, | ||
self.BINARY_HAS_AMLIS, | ||
] | ||
|
||
self.output_df: pd.DataFrame | ||
|
||
def transform(self) -> None: | ||
logger.info("Starting eAMLIS transforms.") | ||
df = pd.read_csv( | ||
self.get_tmp_path() / "eAMLIS export of all data.tsv", | ||
sep="\t", | ||
low_memory=False, | ||
) | ||
gdf = gpd.GeoDataFrame( | ||
df, | ||
geometry=gpd.points_from_xy( | ||
x=df["Longitude"], | ||
y=df["Latitude"], | ||
), | ||
crs="epsg:4326", | ||
) | ||
gdf.drop_duplicates(subset=["geometry"], inplace=True, keep="last") | ||
gdf_tracts = add_tracts_for_geometries(gdf) | ||
gdf_tracts.drop_duplicates(self.GEOID_TRACT_FIELD_NAME, inplace=True) | ||
gdf_tracts[self.BINARY_HAS_AMLIS] = True | ||
self.output_df = gdf_tracts[self.COLUMNS_TO_KEEP] | ||
|
Empty file.
Binary file added
BIN
+1 KB
data/data-pipeline/data_pipeline/tests/sources/eamlis/data/eAMLIS export of all data.tsv.zip
Binary file not shown.
16 changes: 16 additions & 0 deletions
16
data/data-pipeline/data_pipeline/tests/sources/eamlis/data/extract.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
AMLIS Key State/Tribe County Congressional District Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude Funding Source / Program Problem Area Name Problem Area Number Planning Unit Name Planning Unit Number Problem Priority Problem Type Mining Type Ore Types Date Prepared Date Revised Private Owner % State Owner % Other Federal Owner % Park Service Owner % Forest Service Owner % Indian Owner % BLM Owner % Unfunded Standard Units Unfunded Costs Unfunded GPRA Acres Unfunded Metric Units Funded Standard Units Funded Costs Funded GPRA Acres Funded Metric Units Completed Standard Units Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40 | ||
CA000001 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.25161281807095 -117.11772856883819 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
CA000002 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.5498780497345 -121.0070599015156 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
CA000003 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 38.84602113669345 -121.40564726784282 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000004 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.49784370888389 -155.10321769858746 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000005 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.446650238354696 -154.89548634140738 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000006 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.904412260968197 -159.43665201302525 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000007 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.94208315793464 -159.52362041178708 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000008 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.72796381691298 -156.14177664396527 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000009 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.86486713282688 -156.2497797752935 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000010 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.516629328900667 -155.91378867633992 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000011 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.164406070883054 -155.81110884967674 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000012 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.825369670478306 -156.33064622489087 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000013 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.9170439162332 -156.54289869319305 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000014 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.556464980367483 -157.89225964427064 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 | ||
HI000015 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.90754283544759 -159.48416846823164 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0 |
16 changes: 16 additions & 0 deletions
16
data/data-pipeline/data_pipeline/tests/sources/eamlis/data/output.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
GEOID10_TRACT,Is there at least one Abandoned Mine in this census tract? | ||
06027000800,True | ||
06069000802,True | ||
06061021322,True | ||
15001021010,True | ||
15001021101,True | ||
15007040603,True | ||
15007040700,True | ||
15009030100,True | ||
15009030201,True | ||
15001021402,True | ||
15001021800,True | ||
15009030402,True | ||
15009030800,True | ||
15003010201,True | ||
15007040604,True |
16 changes: 16 additions & 0 deletions
16
data/data-pipeline/data_pipeline/tests/sources/eamlis/data/transform.csv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
GEOID10_TRACT,Is there at least one Abandoned Mine in this census tract? | ||
06027000800,True | ||
06069000802,True | ||
06061021322,True | ||
15001021010,True | ||
15001021101,True | ||
15007040603,True | ||
15007040700,True | ||
15009030100,True | ||
15009030201,True | ||
15001021402,True | ||
15001021800,True | ||
15009030402,True | ||
15009030800,True | ||
15003010201,True | ||
15007040604,True |
152 changes: 152 additions & 0 deletions
152
data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
# pylint: disable=protected-access | ||
from unittest import mock | ||
import pathlib | ||
from data_pipeline.etl.base import ValidGeoLevel | ||
|
||
from data_pipeline.etl.sources.eamlis.etl import ( | ||
AbandonedMineLandInventorySystem, | ||
) | ||
from data_pipeline.tests.sources.example.test_etl import TestETL | ||
from data_pipeline.utils import get_module_logger | ||
|
||
logger = get_module_logger(__name__) | ||
|
||
|
||
def _fake_add_tracts_for_geometries(df): | ||
"""The actual geojoin is too slow for tests. Use precomputed results.""" | ||
lookups = { | ||
(-117.1177285688382, 36.25161281807095): "06027000800", | ||
(-121.0070599015156, 36.5498780497345): "06069000802", | ||
(-121.40564726784282, 38.84602113669345): "06061021322", | ||
(-155.10321769858746, 19.49784370888389): "15001021010", | ||
(-154.89548634140738, 19.446650238354696): "15001021101", | ||
(-159.43665201302525, 21.9044122609682): "15007040603", | ||
(-159.52362041178708, 21.94208315793464): "15007040700", | ||
(-156.14177664396527, 20.72796381691298): "15009030100", | ||
(-156.2497797752935, 20.86486713282688): "15009030201", | ||
(-155.91378867633992, 19.516629328900667): "15001021402", | ||
(-155.81110884967674, 20.164406070883054): "15001021800", | ||
(-156.33064622489087, 20.825369670478302): "15009030402", | ||
(-156.54289869319305, 20.9170439162332): "15009030800", | ||
(-157.89225964427064, 21.556464980367483): "15003010201", | ||
(-159.48416846823164, 21.90754283544759): "15007040604", | ||
} | ||
df["GEOID10_TRACT"] = df.geometry.apply( | ||
lambda point: lookups[(point.x, point.y)] | ||
) | ||
return df | ||
|
||
|
||
class TestAMLISETL(TestETL): | ||
"""Tests the Abandoned Mine Dataset ETL | ||
This uses pytest-snapshot. | ||
To update individual snapshots: $ poetry run pytest | ||
data_pipeline/tests/sources/eamlis/test_etl.py::TestClassNameETL::<testname> | ||
--snapshot-update | ||
""" | ||
|
||
_ETL_CLASS = AbandonedMineLandInventorySystem | ||
|
||
_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data" | ||
_SAMPLE_DATA_FILE_NAME = "eAMLIS export of all data.tsv" | ||
_SAMPLE_DATA_ZIP_FILE_NAME = "eAMLIS export of all data.tsv.zip" | ||
_EXTRACT_TMP_FOLDER_NAME = "AbandonedMineLandInventorySystem" | ||
|
||
def setup_method(self, _method, filename=__file__): | ||
"""Invoke `setup_method` from Parent, but using the current file name. | ||
This code can be copied identically between all child classes. | ||
""" | ||
super().setup_method(_method=_method, filename=filename) | ||
|
||
def test_init(self, mock_etl, mock_paths): | ||
"""Tests that the mock NationalRiskIndexETL class instance was | ||
initiliazed correctly. | ||
""" | ||
# setup | ||
etl = self._ETL_CLASS() | ||
# validation | ||
assert etl.GEOID_FIELD_NAME == "GEOID10" | ||
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT" | ||
assert etl.NAME == "eamlis" | ||
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT | ||
assert etl.COLUMNS_TO_KEEP == [ | ||
etl.GEOID_TRACT_FIELD_NAME, | ||
etl.BINARY_HAS_AMLIS, | ||
] | ||
|
||
def test_get_output_file_path(self, mock_etl, mock_paths): | ||
"""Tests the right file name is returned.""" | ||
etl = self._ETL_CLASS() | ||
data_path, tmp_path = mock_paths | ||
|
||
output_file_path = etl._get_output_file_path() | ||
expected_output_file_path = ( | ||
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv" | ||
) | ||
assert output_file_path == expected_output_file_path | ||
|
||
def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
return super().test_fixtures_contain_shared_tract_ids_base( | ||
mock_etl, mock_paths | ||
) | ||
|
||
def test_transform_base(self, snapshot, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
super().test_transform_base( | ||
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths | ||
) | ||
|
||
def test_transform_sets_output_df_base(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
super().test_transform_sets_output_df_base( | ||
mock_etl=mock_etl, mock_paths=mock_paths | ||
) | ||
|
||
def test_validate_base(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths) | ||
|
||
def test_full_etl_base(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
return super().test_full_etl_base(mock_etl, mock_paths) | ||
|
||
def test_get_data_frame_base(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
return super().test_get_data_frame_base(mock_etl, mock_paths) | ||
|
||
def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths): | ||
with mock.patch( | ||
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries", | ||
new=_fake_add_tracts_for_geometries, | ||
): | ||
etl = self._setup_etl_instance_and_run_extract( | ||
mock_etl=mock_etl, mock_paths=mock_paths | ||
) | ||
etl.transform() | ||
etl.validate() | ||
etl.load() | ||
df = etl.get_data_frame() | ||
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len( | ||
self._FIXTURES_SHARED_TRACT_IDS | ||
) |