Skip to content

Commit

Permalink
Add abandoned mine lands data (#1824)
Browse files Browse the repository at this point in the history
* Add notebook to generate test data (#1780)

* Add Abandoned Mine Land data (#1780)

Using a similar structure but simpler apporach compared to FUDs, add an
indicator for whether a tract has an abandonded mine.

* Adding some detail to dataset readmes

Just a thought!

* Apply feedback from revieiw (#1780)

* Fixup bad string that broke test (#1780)

* Update a string that I should have renamed (#1780)

* Reduce number of threads to reduce memory pressure (#1780)

* Try not running geo data (#1780)

* Run the high-memory sets separately (#1780)

* Actually deduplicate (#1780)

* Add flag for memory intensive ETLs (#1780)

* Document new flag for datasets (#1780)

* Add flag for new datasets fro rebase (#1780)

Co-authored-by: Emma Nechamkin <97977170+emma-nechamkin@users.noreply.github.com>
  • Loading branch information
mattbowen-usds and emma-nechamkin authored Aug 17, 2022
1 parent 5e378ae commit 49623e4
Show file tree
Hide file tree
Showing 13 changed files with 2,815 additions and 1 deletion.
36 changes: 36 additions & 0 deletions data/data-pipeline/data_pipeline/etl/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,152 +3,188 @@
"name": "cdc_places",
"module_dir": "cdc_places",
"class_name": "CDCPlacesETL",
"is_memory_intensive": False,
},
{
"name": "national_risk_index",
"module_dir": "national_risk_index",
"class_name": "NationalRiskIndexETL",
"is_memory_intensive": False,
},
{
"name": "travel_composite",
"module_dir": "dot_travel_composite",
"class_name": "TravelCompositeETL",
"is_memory_intensive": False,
},
{
"name": "tree_equity_score",
"module_dir": "tree_equity_score",
"class_name": "TreeEquityScoreETL",
"is_memory_intensive": False,
},
{
"name": "census_decennial",
"module_dir": "census_decennial",
"class_name": "CensusDecennialETL",
"is_memory_intensive": False,
},
{
"name": "housing_and_transportation",
"module_dir": "housing_and_transportation",
"class_name": "HousingTransportationETL",
"is_memory_intensive": False,
},
{
"name": "mapping_for_ej",
"module_dir": "mapping_for_ej",
"class_name": "MappingForEJETL",
"is_memory_intensive": False,
},
{
"name": "fsf_flood_risk",
"module_dir": "fsf_flood_risk",
"class_name": "FloodRiskETL",
"is_memory_intensive": False,
},
{
"name": "fsf_wildfire_risk",
"module_dir": "fsf_wildfire_risk",
"class_name": "WildfireRiskETL",
"is_memory_intensive": False,
},
{
"name": "ejscreen",
"module_dir": "ejscreen",
"class_name": "EJSCREENETL",
"is_memory_intensive": False,
},
{
"name": "hud_housing",
"module_dir": "hud_housing",
"class_name": "HudHousingETL",
"is_memory_intensive": False,
},
{
"name": "census_acs_median_income",
"module_dir": "census_acs_median_income",
"class_name": "CensusACSMedianIncomeETL",
"is_memory_intensive": False,
},
{
"name": "cdc_life_expectancy",
"module_dir": "cdc_life_expectancy",
"class_name": "CDCLifeExpectancy",
"is_memory_intensive": False,
},
{
"name": "doe_energy_burden",
"module_dir": "doe_energy_burden",
"class_name": "DOEEnergyBurden",
"is_memory_intensive": False,
},
{
"name": "geocorr",
"module_dir": "geocorr",
"class_name": "GeoCorrETL",
"is_memory_intensive": False,
},
{
"name": "child_opportunity_index",
"module_dir": "child_opportunity_index",
"class_name": "ChildOpportunityIndex",
"is_memory_intensive": False,
},
{
"name": "mapping_inequality",
"module_dir": "mapping_inequality",
"class_name": "MappingInequalityETL",
"is_memory_intensive": False,
},
{
"name": "persistent_poverty",
"module_dir": "persistent_poverty",
"class_name": "PersistentPovertyETL",
"is_memory_intensive": False,
},
{
"name": "ejscreen_areas_of_concern",
"module_dir": "ejscreen_areas_of_concern",
"class_name": "EJSCREENAreasOfConcernETL",
"is_memory_intensive": False,
},
{
"name": "calenviroscreen",
"module_dir": "calenviroscreen",
"class_name": "CalEnviroScreenETL",
"is_memory_intensive": False,
},
{
"name": "hud_recap",
"module_dir": "hud_recap",
"class_name": "HudRecapETL",
"is_memory_intensive": False,
},
{
"name": "epa_rsei",
"module_dir": "epa_rsei",
"class_name": "EPARiskScreeningEnvironmentalIndicatorsETL",
"is_memory_intensive": False,
},
{
"name": "energy_definition_alternative_draft",
"module_dir": "energy_definition_alternative_draft",
"class_name": "EnergyDefinitionAlternativeDraft",
"is_memory_intensive": False,
},
{
"name": "michigan_ejscreen",
"module_dir": "michigan_ejscreen",
"class_name": "MichiganEnviroScreenETL",
"is_memory_intensive": False,
},
{
"name": "cdc_svi_index",
"module_dir": "cdc_svi_index",
"class_name": "CDCSVIIndex",
"is_memory_intensive": False,
},
{
"name": "maryland_ejscreen",
"module_dir": "maryland_ejscreen",
"class_name": "MarylandEJScreenETL",
"is_memory_intensive": False,
},
{
"name": "historic_redlining",
"module_dir": "historic_redlining",
"class_name": "HistoricRedliningETL",
"is_memory_intensive": False,
},
# This has to come after us.json exists
{
"name": "census_acs",
"module_dir": "census_acs",
"class_name": "CensusACSETL",
"is_memory_intensive": False,
},
{
"name": "census_acs_2010",
"module_dir": "census_acs_2010",
"class_name": "CensusACS2010ETL",
"is_memory_intensive": False,
},
{
"name": "us_army_fuds",
"module_dir": "us_army_fuds",
"class_name": "USArmyFUDS",
"is_memory_intensive": True,
},
{
"name": "eamlis",
"module_dir": "eamlis",
"class_name": "AbandonedMineETL",
"is_memory_intensive": True,
},
]

Expand Down
23 changes: 22 additions & 1 deletion data/data-pipeline/data_pipeline/etl/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,38 @@ def etl_runner(dataset_to_run: str = None) -> None:
"""
dataset_list = _get_datasets_to_run(dataset_to_run)

# Because we are memory constrained on our infrastructure,
# we split datasets into those that are not memory intensive
# (is_memory_intensive == False) and thereby can be safely
# run in parallel, and those that require more RAM and thus
# should be run sequentially. The is_memory_intensive_flag is
# set manually in constants.py based on experience running
# the pipeline
concurrent_datasets = [
dataset
for dataset in dataset_list
if not dataset["is_memory_intensive"]
]
high_memory_datasets = [
dataset for dataset in dataset_list if dataset["is_memory_intensive"]
]

logger.info("Running concurrent jobs")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(_run_one_dataset, dataset=dataset)
for dataset in dataset_list
for dataset in concurrent_datasets
}

for fut in concurrent.futures.as_completed(futures):
# Calling result will raise an exception if one occurred.
# Otherwise, the exceptions are silently ignored.
fut.result()

logger.info("Running high-memory jobs")
for dataset in high_memory_datasets:
_run_one_dataset(dataset=dataset)


def score_generate() -> None:
"""Generates the score and saves it on the local data directory
Expand Down
12 changes: 12 additions & 0 deletions data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,18 @@ datasets:
field_type: bool
include_in_tiles: false
include_in_downloadable_files: false
- long_name: "Abandoned Mine Land Inventory System"
short_name: "eAMLIS"
module_name: "eamlis"
load_fields:
- short_name: "has_aml"
df_field_name: "AML_BOOLEAN"
long_name: "Is there at least one abandoned mine in this census tract?"
description_short:
"Whether the tract has an abandoned mine"
field_type: bool
include_in_tiles: true
include_in_downloadable_files: true
- long_name: "Example ETL"
short_name: "Example"
module_name: "example_dataset"
Expand Down
40 changes: 40 additions & 0 deletions data/data-pipeline/data_pipeline/etl/sources/eamlis/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
The following is the description from eAMLIS as of August 16, 2022.
---

e-AMLIS is not a comprehensive database of all AML features or all AML grant activities. e-AMLIS is a national inventory that provides information about known abandoned mine land (AML) features including polluted waters. The majority of the data in e-AMLIS provides information about known coal AML features for the 25 states and 3 tribal SMCRA-approved AML Programs. e-AMLIS also provides limited information on non-coal AML features, and, non-coal reclamation projects as well as AML features for states and tribes that do not have an approved AML Program. Additionally, e-AMLIS only accounts for the direct construction cost to reclaim each AML feature that has been identified by states and Tribes. Other project costs such as planning, design, permitting, and construction oversight are not tracked in e-AMLIS.

The figures in e-AMLIS are further broken down into 3 cost categories:

Unfunded Cost represents pre-construction estimates to reclaim the AML feature;
Funded Cost indicates that construction has been approved by OSM and these figures may change during construction;
Completed Cost is the actual cost to complete construction and reclamation of the AML feature.
DOI/OSMRE’s Financial Business & Management System is the system of record to obtain comprehensive information about all AML grant expenditures.

An inventory of land and water impacted by past mining (primarily coal mining) is maintained by OSMRE to provide information needed to implement the Surface Mining Control and Reclamation Act of 1977 (SMCRA). The inventory contains information on the location, type, and extent of AML impacts, as well as, information on the cost associated with the reclamation of those problems. The inventory is based upon field surveys by State, Tribal, and OSMRE program officials. It is dynamic to the extent that it is modified as new problems are identified and existing problems are reclaimed.

The Abandoned Mine Land Reclamation Act (AMRA) of 1990, amended SMCRA. The amended law expanded the scope of data OSMRE must collect regarding AML reclamation programs and progress. On December 20, 2006, SMCRA was amended under the Tax Relief and Health Care Act of 2006 to add sources of program funding, emphasize high priority coal reclamation, and expand OSMRE’s responsibilities towards implementation and management of the AML Inventory.

WHO MAINTAINS THE INFORMATION IN THE AML INVENTORY?
The information is developed and/or updated by the States and Indian Tribes managing their own AML programs under SMCRA or by the OSMRE office responsible for States and Indian Tribes not managing their own AML problems.

TYPES OF PROBLEMS
"High Priority"
The most serious AML problems are those posing a threat to health, safety and general welfare of people (Priority 1 and Priority 2, or "high priority"). These are the only problems which the law requires to be inventoried. There are 17 Priority 1 and 2 problem types.

Emergencies
Under the 2006 amendments to SMCRA, AML grants to states and tribes increased from $145 million in FY 2007 to $395 million in FY 2011. The increase in funding allowed states to take responsibility for their AML emergencies as part of their regular AML programs.

Until FY 2011, OSMRE provided Abandoned Mine Land (AML) State Emergency grants to the 15 states that manage their own emergency programs under the Abandoned Mine Land Reclamation Program. Thirteen other states and tribes that had approved AML programs did not receive emergency grants. OSMRE managed emergencies in those 13 states and tribes as well as in Federal Program States without AML programs.

OSMRE officially notified the state and tribal officials and Congressional delegations that, starting on October 1, 2010, they would fully assume responsibility for funding their emergency programs. OSMRE then worked with states and tribes to ensure a smooth transition to the states’ assumption of responsibility for administering state emergency programs. New funding and carryover balances were used during the transition to address immediate needs.

Overall, OSMRE successfully transitioned the financial responsibility to the states in FY 2011, and continues to provide technical and program assistance when needed. States with AML programs are now in a position to effectively handle emergency programs.

Environmental
AML problems impacting the environment are known as Priority 3 problems. While SMCRA does not require OSMRE to inventory every unreclaimed priority 3 problem, some program States and Indian tribes have chosen to submit such information. Information for priority 3 problem types is required when reclamation activities are funded and information on completed reclamation of priority 3 problems is kept in the inventory.

Other Coal Mine Related Problems
Information is also kept on lower priority coal related AML problems such as lower priority coal-related projects involving public facilities, and the development of publicly-owned land. The lower priority problems are also categorized-- Priority 4 and 5 problem types.

Non-coal Mine Related AML Problems
The non-coal problems are primarily problems reclaimed by States/Indian tribes that had "Certified" having addressed all known eligible coal related problems. States and Indian tribes managing their own AML programs reclaimed non-coal problems prior to addressing all their coal related problems under SMCRA SEC. 409-- FILLING VOIDS AND SEALING TUNNELS at the request of the Governor of the state or the governing body of the Indian tribe if the Secretary of the Department of the Interior determines such problems meet the criteria for a priority 1, extreme hazard, problems. This Program Area contains historical reclamation accomplishments for Certified Programs reclaiming Priority 1, 2, and 3 non-coal Problem Type features with pre-AML Reauthorization SMCRA funds distributed prior to October 1, 2007.
Empty file.
62 changes: 62 additions & 0 deletions data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from pathlib import Path
import geopandas as gpd
import pandas as pd
from data_pipeline.config import settings

from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
from data_pipeline.utils import get_module_logger

logger = get_module_logger(__name__)


class AbandonedMineETL(ExtractTransformLoad):
"""Data from Office Of Surface Mining Reclamation and Enforcement's
eAMLIS. These are the locations of abandoned mines.
"""

# Metadata for the baseclass
NAME = "eamlis"
GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
AML_BOOLEAN: str

# Define these for easy code completion
def __init__(self):
self.SOURCE_URL = (
settings.AWS_JUSTICE40_DATASOURCES_URL
+ "/eAMLIS export of all data.tsv.zip"
)

self.TRACT_INPUT_COLUMN_NAME = self.INPUT_GEOID_TRACT_FIELD_NAME

self.OUTPUT_PATH: Path = (
self.DATA_PATH / "dataset" / "abandoned_mine_land_inventory_system"
)

self.COLUMNS_TO_KEEP = [
self.GEOID_TRACT_FIELD_NAME,
self.AML_BOOLEAN,
]

self.output_df: pd.DataFrame

def transform(self) -> None:
logger.info("Starting eAMLIS transforms.")
df = pd.read_csv(
self.get_tmp_path() / "eAMLIS export of all data.tsv",
sep="\t",
low_memory=False,
)
gdf = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(
x=df["Longitude"],
y=df["Latitude"],
),
crs="epsg:4326",
)
gdf = gdf.drop_duplicates(subset=["geometry"], keep="last")
gdf_tracts = add_tracts_for_geometries(gdf)
gdf_tracts = gdf_tracts.drop_duplicates(self.GEOID_TRACT_FIELD_NAME)
gdf_tracts[self.AML_BOOLEAN] = True
self.output_df = gdf_tracts[self.COLUMNS_TO_KEEP]
Loading

0 comments on commit 49623e4

Please sign in to comment.