Skip to content

Commit

Permalink
Add Abandoned Mine Land data (#1780)
Browse files Browse the repository at this point in the history
Using a similar structure but simpler apporach compared to FUDs, add an
indicator for whether a tract has an abandonded mine.
  • Loading branch information
mattbowen-usds committed Aug 16, 2022
1 parent d7c28df commit 878d1ea
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 1 deletion.
6 changes: 5 additions & 1 deletion data/data-pipeline/data_pipeline/etl/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,11 @@
"module_dir": "us_army_fuds",
"class_name": "USArmyFUDS",
},
]
{
"name": "eamlis",
"module_dir": "eamlis",
"class_name": "AbandonedMineLandInventorySystem",
},]

CENSUS_INFO = {
"name": "census",
Expand Down
12 changes: 12 additions & 0 deletions data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,18 @@ datasets:
field_type: bool
include_in_tiles: false
include_in_downloadable_files: false
- long_name: "Abandoned Mine Land Inventory System"
short_name: "eAMLIS"
module_name: "eamlis"
load_fields:
- short_name: "has_amlis"
df_field_name: "BINARY_HAS_AMLIS"
long_name: "Is there at least one Abandoned Mine in this census tract?"
description_short:
"Whether the tract has an abandoned mine"
field_type: bool
include_in_tiles: false
include_in_downloadable_files: false
- long_name: "Example ETL"
short_name: "Example"
module_name: "example_dataset"
Expand Down
Empty file.
Empty file.
63 changes: 63 additions & 0 deletions data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
from data_pipeline.config import settings

from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
from data_pipeline.utils import get_module_logger

logger = get_module_logger(__name__)


class AbandonedMineLandInventorySystem(ExtractTransformLoad):
"""Data from Office Of Surface Mining Reclamation and Enforcement's
eAMLIS. These are the locations of abandoned mines.
"""

# Metadata for the baseclass
NAME = "eamlis"
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
BINARY_HAS_AMLIS: str = "Has abandoned mine"

# Define these for easy code completion
def __init__(self):
self.SOURCE_URL = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/eAMLIS export of all data.tsv.zip"
)

self.TRACT_INPUT_COLUMN_NAME = self.INPUT_GEOID_TRACT_FIELD_NAME

self.OUTPUT_PATH: Path = (
self.DATA_PATH / "dataset" / "abandoned_mine_land_inventory_system"
)

self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,
self.BINARY_HAS_AMLIS,
]

self.output_df: pd.DataFrame

def transform(self) -> None:
logger.info("Starting eAMLIS transforms.")
df = pd.read_csv(
self.get_tmp_path() / "eAMLIS export of all data.tsv",
sep="\t",
low_memory=False,
)
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(
x=df["Longitude"],
y=df["Latitude"],
),
crs="epsg:4326",
)
gdf.drop_duplicates(subset=["geometry"], inplace=True, keep="last")
gdf_tracts = add_tracts_for_geometries(gdf)
gdf_tracts.drop_duplicates(self.GEOID_TRACT_FIELD_NAME, inplace=True)
gdf_tracts[self.BINARY_HAS_AMLIS] = True
self.output_df = gdf_tracts[self.COLUMNS_TO_KEEP]

Empty file.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
AMLIS Key State/Tribe County Congressional District Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude Funding Source / Program Problem Area Name Problem Area Number Planning Unit Name Planning Unit Number Problem Priority Problem Type Mining Type Ore Types Date Prepared Date Revised Private Owner % State Owner % Other Federal Owner % Park Service Owner % Forest Service Owner % Indian Owner % BLM Owner % Unfunded Standard Units Unfunded Costs Unfunded GPRA Acres Unfunded Metric Units Funded Standard Units Funded Costs Funded GPRA Acres Funded Metric Units Completed Standard Units Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40
CA000001 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.25161281807095 -117.11772856883819 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
CA000002 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.5498780497345 -121.0070599015156 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
CA000003 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 38.84602113669345 -121.40564726784282 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000004 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.49784370888389 -155.10321769858746 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000005 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.446650238354696 -154.89548634140738 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000006 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.904412260968197 -159.43665201302525 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000007 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.94208315793464 -159.52362041178708 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000008 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.72796381691298 -156.14177664396527 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000009 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.86486713282688 -156.2497797752935 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000010 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.516629328900667 -155.91378867633992 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000011 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.164406070883054 -155.81110884967674 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000012 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.825369670478306 -156.33064622489087 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000013 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.9170439162332 -156.54289869319305 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000014 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.556464980367483 -157.89225964427064 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
HI000015 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.90754283544759 -159.48416846823164 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
GEOID10_TRACT,Is there at least one Abandoned Mine in this census tract?
06027000800,True
06069000802,True
06061021322,True
15001021010,True
15001021101,True
15007040603,True
15007040700,True
15009030100,True
15009030201,True
15001021402,True
15001021800,True
15009030402,True
15009030800,True
15003010201,True
15007040604,True
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
GEOID10_TRACT,Is there at least one Abandoned Mine in this census tract?
06027000800,True
06069000802,True
06061021322,True
15001021010,True
15001021101,True
15007040603,True
15007040700,True
15009030100,True
15009030201,True
15001021402,True
15001021800,True
15009030402,True
15009030800,True
15003010201,True
15007040604,True
152 changes: 152 additions & 0 deletions data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# pylint: disable=protected-access
from unittest import mock
import pathlib
from data_pipeline.etl.base import ValidGeoLevel

from data_pipeline.etl.sources.eamlis.etl import (
AbandonedMineLandInventorySystem,
)
from data_pipeline.tests.sources.example.test_etl import TestETL
from data_pipeline.utils import get_module_logger

logger = get_module_logger(__name__)


def _fake_add_tracts_for_geometries(df):
"""The actual geojoin is too slow for tests. Use precomputed results."""
lookups = {
(-117.1177285688382, 36.25161281807095): "06027000800",
(-121.0070599015156, 36.5498780497345): "06069000802",
(-121.40564726784282, 38.84602113669345): "06061021322",
(-155.10321769858746, 19.49784370888389): "15001021010",
(-154.89548634140738, 19.446650238354696): "15001021101",
(-159.43665201302525, 21.9044122609682): "15007040603",
(-159.52362041178708, 21.94208315793464): "15007040700",
(-156.14177664396527, 20.72796381691298): "15009030100",
(-156.2497797752935, 20.86486713282688): "15009030201",
(-155.91378867633992, 19.516629328900667): "15001021402",
(-155.81110884967674, 20.164406070883054): "15001021800",
(-156.33064622489087, 20.825369670478302): "15009030402",
(-156.54289869319305, 20.9170439162332): "15009030800",
(-157.89225964427064, 21.556464980367483): "15003010201",
(-159.48416846823164, 21.90754283544759): "15007040604",
}
df["GEOID10_TRACT"] = df.geometry.apply(
lambda point: lookups[(point.x, point.y)]
)
return df


class TestAMLISETL(TestETL):
"""Tests the Abandoned Mine Dataset ETL
This uses pytest-snapshot.
To update individual snapshots: $ poetry run pytest
data_pipeline/tests/sources/eamlis/test_etl.py::TestClassNameETL::<testname>
--snapshot-update
"""

_ETL_CLASS = AbandonedMineLandInventorySystem

_SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
_SAMPLE_DATA_FILE_NAME = "eAMLIS export of all data.tsv"
_SAMPLE_DATA_ZIP_FILE_NAME = "eAMLIS export of all data.tsv.zip"
_EXTRACT_TMP_FOLDER_NAME = "AbandonedMineLandInventorySystem"

def setup_method(self, _method, filename=__file__):
"""Invoke `setup_method` from Parent, but using the current file name.
This code can be copied identically between all child classes.
"""
super().setup_method(_method=_method, filename=filename)

def test_init(self, mock_etl, mock_paths):
"""Tests that the mock NationalRiskIndexETL class instance was
initiliazed correctly.
"""
# setup
etl = self._ETL_CLASS()
# validation
assert etl.GEOID_FIELD_NAME == "GEOID10"
assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
assert etl.NAME == "eamlis"
assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
assert etl.COLUMNS_TO_KEEP == [
etl.GEOID_TRACT_FIELD_NAME,
etl.BINARY_HAS_AMLIS,
]

def test_get_output_file_path(self, mock_etl, mock_paths):
"""Tests the right file name is returned."""
etl = self._ETL_CLASS()
data_path, tmp_path = mock_paths

output_file_path = etl._get_output_file_path()
expected_output_file_path = (
data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
)
assert output_file_path == expected_output_file_path

def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
return super().test_fixtures_contain_shared_tract_ids_base(
mock_etl, mock_paths
)

def test_transform_base(self, snapshot, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
super().test_transform_base(
snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
)

def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
super().test_transform_sets_output_df_base(
mock_etl=mock_etl, mock_paths=mock_paths
)

def test_validate_base(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)

def test_full_etl_base(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
return super().test_full_etl_base(mock_etl, mock_paths)

def test_get_data_frame_base(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
return super().test_get_data_frame_base(mock_etl, mock_paths)

def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
with mock.patch(
"data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
new=_fake_add_tracts_for_geometries,
):
etl = self._setup_etl_instance_and_run_extract(
mock_etl=mock_etl, mock_paths=mock_paths
)
etl.transform()
etl.validate()
etl.load()
df = etl.get_data_frame()
assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
self._FIXTURES_SHARED_TRACT_IDS
)

0 comments on commit 878d1ea

Please sign in to comment.