diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py
index 5f71af3aa..74b697b66 100644
--- a/data/data-pipeline/data_pipeline/etl/constants.py
+++ b/data/data-pipeline/data_pipeline/etl/constants.py
@@ -3,152 +3,188 @@
"name": "cdc_places",
"module_dir": "cdc_places",
"class_name": "CDCPlacesETL",
+ "is_memory_intensive": False,
},
{
"name": "national_risk_index",
"module_dir": "national_risk_index",
"class_name": "NationalRiskIndexETL",
+ "is_memory_intensive": False,
},
{
"name": "travel_composite",
"module_dir": "dot_travel_composite",
"class_name": "TravelCompositeETL",
+ "is_memory_intensive": False,
},
{
"name": "tree_equity_score",
"module_dir": "tree_equity_score",
"class_name": "TreeEquityScoreETL",
+ "is_memory_intensive": False,
},
{
"name": "census_decennial",
"module_dir": "census_decennial",
"class_name": "CensusDecennialETL",
+ "is_memory_intensive": False,
},
{
"name": "housing_and_transportation",
"module_dir": "housing_and_transportation",
"class_name": "HousingTransportationETL",
+ "is_memory_intensive": False,
},
{
"name": "mapping_for_ej",
"module_dir": "mapping_for_ej",
"class_name": "MappingForEJETL",
+ "is_memory_intensive": False,
},
{
"name": "fsf_flood_risk",
"module_dir": "fsf_flood_risk",
"class_name": "FloodRiskETL",
+ "is_memory_intensive": False,
},
{
"name": "fsf_wildfire_risk",
"module_dir": "fsf_wildfire_risk",
"class_name": "WildfireRiskETL",
+ "is_memory_intensive": False,
},
{
"name": "ejscreen",
"module_dir": "ejscreen",
"class_name": "EJSCREENETL",
+ "is_memory_intensive": False,
},
{
"name": "hud_housing",
"module_dir": "hud_housing",
"class_name": "HudHousingETL",
+ "is_memory_intensive": False,
},
{
"name": "census_acs_median_income",
"module_dir": "census_acs_median_income",
"class_name": "CensusACSMedianIncomeETL",
+ "is_memory_intensive": False,
},
{
"name": "cdc_life_expectancy",
"module_dir": "cdc_life_expectancy",
"class_name": "CDCLifeExpectancy",
+ "is_memory_intensive": False,
},
{
"name": "doe_energy_burden",
"module_dir": "doe_energy_burden",
"class_name": "DOEEnergyBurden",
+ "is_memory_intensive": False,
},
{
"name": "geocorr",
"module_dir": "geocorr",
"class_name": "GeoCorrETL",
+ "is_memory_intensive": False,
},
{
"name": "child_opportunity_index",
"module_dir": "child_opportunity_index",
"class_name": "ChildOpportunityIndex",
+ "is_memory_intensive": False,
},
{
"name": "mapping_inequality",
"module_dir": "mapping_inequality",
"class_name": "MappingInequalityETL",
+ "is_memory_intensive": False,
},
{
"name": "persistent_poverty",
"module_dir": "persistent_poverty",
"class_name": "PersistentPovertyETL",
+ "is_memory_intensive": False,
},
{
"name": "ejscreen_areas_of_concern",
"module_dir": "ejscreen_areas_of_concern",
"class_name": "EJSCREENAreasOfConcernETL",
+ "is_memory_intensive": False,
},
{
"name": "calenviroscreen",
"module_dir": "calenviroscreen",
"class_name": "CalEnviroScreenETL",
+ "is_memory_intensive": False,
},
{
"name": "hud_recap",
"module_dir": "hud_recap",
"class_name": "HudRecapETL",
+ "is_memory_intensive": False,
},
{
"name": "epa_rsei",
"module_dir": "epa_rsei",
"class_name": "EPARiskScreeningEnvironmentalIndicatorsETL",
+ "is_memory_intensive": False,
},
{
"name": "energy_definition_alternative_draft",
"module_dir": "energy_definition_alternative_draft",
"class_name": "EnergyDefinitionAlternativeDraft",
+ "is_memory_intensive": False,
},
{
"name": "michigan_ejscreen",
"module_dir": "michigan_ejscreen",
"class_name": "MichiganEnviroScreenETL",
+ "is_memory_intensive": False,
},
{
"name": "cdc_svi_index",
"module_dir": "cdc_svi_index",
"class_name": "CDCSVIIndex",
+ "is_memory_intensive": False,
},
{
"name": "maryland_ejscreen",
"module_dir": "maryland_ejscreen",
"class_name": "MarylandEJScreenETL",
+ "is_memory_intensive": False,
},
{
"name": "historic_redlining",
"module_dir": "historic_redlining",
"class_name": "HistoricRedliningETL",
+ "is_memory_intensive": False,
},
# This has to come after us.json exists
{
"name": "census_acs",
"module_dir": "census_acs",
"class_name": "CensusACSETL",
+ "is_memory_intensive": False,
},
{
"name": "census_acs_2010",
"module_dir": "census_acs_2010",
"class_name": "CensusACS2010ETL",
+ "is_memory_intensive": False,
},
{
"name": "us_army_fuds",
"module_dir": "us_army_fuds",
"class_name": "USArmyFUDS",
+ "is_memory_intensive": True,
+ },
+ {
+ "name": "eamlis",
+ "module_dir": "eamlis",
+ "class_name": "AbandonedMineETL",
+ "is_memory_intensive": True,
},
]
diff --git a/data/data-pipeline/data_pipeline/etl/runner.py b/data/data-pipeline/data_pipeline/etl/runner.py
index 5e9230bb3..6d98b1eca 100644
--- a/data/data-pipeline/data_pipeline/etl/runner.py
+++ b/data/data-pipeline/data_pipeline/etl/runner.py
@@ -77,10 +77,27 @@ def etl_runner(dataset_to_run: str = None) -> None:
"""
dataset_list = _get_datasets_to_run(dataset_to_run)
+ # Because we are memory constrained on our infrastructure,
+ # we split datasets into those that are not memory intensive
+ # (is_memory_intensive == False) and thereby can be safely
+ # run in parallel, and those that require more RAM and thus
+ # should be run sequentially. The is_memory_intensive_flag is
+ # set manually in constants.py based on experience running
+ # the pipeline
+ concurrent_datasets = [
+ dataset
+ for dataset in dataset_list
+ if not dataset["is_memory_intensive"]
+ ]
+ high_memory_datasets = [
+ dataset for dataset in dataset_list if dataset["is_memory_intensive"]
+ ]
+
+ logger.info("Running concurrent jobs")
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {
executor.submit(_run_one_dataset, dataset=dataset)
- for dataset in dataset_list
+ for dataset in concurrent_datasets
}
for fut in concurrent.futures.as_completed(futures):
@@ -88,6 +105,10 @@ def etl_runner(dataset_to_run: str = None) -> None:
# Otherwise, the exceptions are silently ignored.
fut.result()
+ logger.info("Running high-memory jobs")
+ for dataset in high_memory_datasets:
+ _run_one_dataset(dataset=dataset)
+
def score_generate() -> None:
"""Generates the score and saves it on the local data directory
diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
index be07d6ccb..1b59b4d61 100644
--- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
+++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml
@@ -145,6 +145,18 @@ datasets:
field_type: bool
include_in_tiles: false
include_in_downloadable_files: false
+ - long_name: "Abandoned Mine Land Inventory System"
+ short_name: "eAMLIS"
+ module_name: "eamlis"
+ load_fields:
+ - short_name: "has_aml"
+ df_field_name: "AML_BOOLEAN"
+ long_name: "Is there at least one abandoned mine in this census tract?"
+ description_short:
+ "Whether the tract has an abandoned mine"
+ field_type: bool
+ include_in_tiles: true
+ include_in_downloadable_files: true
- long_name: "Example ETL"
short_name: "Example"
module_name: "example_dataset"
diff --git a/data/data-pipeline/data_pipeline/etl/sources/eamlis/README.md b/data/data-pipeline/data_pipeline/etl/sources/eamlis/README.md
new file mode 100644
index 000000000..57892526f
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/etl/sources/eamlis/README.md
@@ -0,0 +1,40 @@
+The following is the description from eAMLIS as of August 16, 2022.
+---
+
+e-AMLIS is not a comprehensive database of all AML features or all AML grant activities. e-AMLIS is a national inventory that provides information about known abandoned mine land (AML) features including polluted waters. The majority of the data in e-AMLIS provides information about known coal AML features for the 25 states and 3 tribal SMCRA-approved AML Programs. e-AMLIS also provides limited information on non-coal AML features, and, non-coal reclamation projects as well as AML features for states and tribes that do not have an approved AML Program. Additionally, e-AMLIS only accounts for the direct construction cost to reclaim each AML feature that has been identified by states and Tribes. Other project costs such as planning, design, permitting, and construction oversight are not tracked in e-AMLIS.
+
+The figures in e-AMLIS are further broken down into 3 cost categories:
+
+Unfunded Cost represents pre-construction estimates to reclaim the AML feature;
+Funded Cost indicates that construction has been approved by OSM and these figures may change during construction;
+Completed Cost is the actual cost to complete construction and reclamation of the AML feature.
+DOI/OSMRE’s Financial Business & Management System is the system of record to obtain comprehensive information about all AML grant expenditures.
+
+An inventory of land and water impacted by past mining (primarily coal mining) is maintained by OSMRE to provide information needed to implement the Surface Mining Control and Reclamation Act of 1977 (SMCRA). The inventory contains information on the location, type, and extent of AML impacts, as well as, information on the cost associated with the reclamation of those problems. The inventory is based upon field surveys by State, Tribal, and OSMRE program officials. It is dynamic to the extent that it is modified as new problems are identified and existing problems are reclaimed.
+
+The Abandoned Mine Land Reclamation Act (AMRA) of 1990, amended SMCRA. The amended law expanded the scope of data OSMRE must collect regarding AML reclamation programs and progress. On December 20, 2006, SMCRA was amended under the Tax Relief and Health Care Act of 2006 to add sources of program funding, emphasize high priority coal reclamation, and expand OSMRE’s responsibilities towards implementation and management of the AML Inventory.
+
+WHO MAINTAINS THE INFORMATION IN THE AML INVENTORY?
+The information is developed and/or updated by the States and Indian Tribes managing their own AML programs under SMCRA or by the OSMRE office responsible for States and Indian Tribes not managing their own AML problems.
+
+TYPES OF PROBLEMS
+"High Priority"
+The most serious AML problems are those posing a threat to health, safety and general welfare of people (Priority 1 and Priority 2, or "high priority"). These are the only problems which the law requires to be inventoried. There are 17 Priority 1 and 2 problem types.
+
+Emergencies
+Under the 2006 amendments to SMCRA, AML grants to states and tribes increased from $145 million in FY 2007 to $395 million in FY 2011. The increase in funding allowed states to take responsibility for their AML emergencies as part of their regular AML programs.
+
+Until FY 2011, OSMRE provided Abandoned Mine Land (AML) State Emergency grants to the 15 states that manage their own emergency programs under the Abandoned Mine Land Reclamation Program. Thirteen other states and tribes that had approved AML programs did not receive emergency grants. OSMRE managed emergencies in those 13 states and tribes as well as in Federal Program States without AML programs.
+
+OSMRE officially notified the state and tribal officials and Congressional delegations that, starting on October 1, 2010, they would fully assume responsibility for funding their emergency programs. OSMRE then worked with states and tribes to ensure a smooth transition to the states’ assumption of responsibility for administering state emergency programs. New funding and carryover balances were used during the transition to address immediate needs.
+
+Overall, OSMRE successfully transitioned the financial responsibility to the states in FY 2011, and continues to provide technical and program assistance when needed. States with AML programs are now in a position to effectively handle emergency programs.
+
+Environmental
+AML problems impacting the environment are known as Priority 3 problems. While SMCRA does not require OSMRE to inventory every unreclaimed priority 3 problem, some program States and Indian tribes have chosen to submit such information. Information for priority 3 problem types is required when reclamation activities are funded and information on completed reclamation of priority 3 problems is kept in the inventory.
+
+Other Coal Mine Related Problems
+Information is also kept on lower priority coal related AML problems such as lower priority coal-related projects involving public facilities, and the development of publicly-owned land. The lower priority problems are also categorized-- Priority 4 and 5 problem types.
+
+Non-coal Mine Related AML Problems
+The non-coal problems are primarily problems reclaimed by States/Indian tribes that had "Certified" having addressed all known eligible coal related problems. States and Indian tribes managing their own AML programs reclaimed non-coal problems prior to addressing all their coal related problems under SMCRA SEC. 409-- FILLING VOIDS AND SEALING TUNNELS at the request of the Governor of the state or the governing body of the Indian tribe if the Secretary of the Department of the Interior determines such problems meet the criteria for a priority 1, extreme hazard, problems. This Program Area contains historical reclamation accomplishments for Certified Programs reclaiming Priority 1, 2, and 3 non-coal Problem Type features with pre-AML Reauthorization SMCRA funds distributed prior to October 1, 2007.
diff --git a/data/data-pipeline/data_pipeline/etl/sources/eamlis/__init__.py b/data/data-pipeline/data_pipeline/etl/sources/eamlis/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py b/data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
new file mode 100644
index 000000000..0c09b7118
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/etl/sources/eamlis/etl.py
@@ -0,0 +1,62 @@
+from pathlib import Path
+import geopandas as gpd
+import pandas as pd
+from data_pipeline.config import settings
+
+from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
+from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries
+from data_pipeline.utils import get_module_logger
+
+logger = get_module_logger(__name__)
+
+
+class AbandonedMineETL(ExtractTransformLoad):
+ """Data from Office Of Surface Mining Reclamation and Enforcement's
+ eAMLIS. These are the locations of abandoned mines.
+ """
+
+ # Metadata for the baseclass
+ NAME = "eamlis"
+ GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT
+ AML_BOOLEAN: str
+
+ # Define these for easy code completion
+ def __init__(self):
+ self.SOURCE_URL = (
+ settings.AWS_JUSTICE40_DATASOURCES_URL
+ + "/eAMLIS export of all data.tsv.zip"
+ )
+
+ self.TRACT_INPUT_COLUMN_NAME = self.INPUT_GEOID_TRACT_FIELD_NAME
+
+ self.OUTPUT_PATH: Path = (
+ self.DATA_PATH / "dataset" / "abandoned_mine_land_inventory_system"
+ )
+
+ self.COLUMNS_TO_KEEP = [
+ self.GEOID_TRACT_FIELD_NAME,
+ self.AML_BOOLEAN,
+ ]
+
+ self.output_df: pd.DataFrame
+
+ def transform(self) -> None:
+ logger.info("Starting eAMLIS transforms.")
+ df = pd.read_csv(
+ self.get_tmp_path() / "eAMLIS export of all data.tsv",
+ sep="\t",
+ low_memory=False,
+ )
+ gdf = gpd.GeoDataFrame(
+ df,
+ geometry=gpd.points_from_xy(
+ x=df["Longitude"],
+ y=df["Latitude"],
+ ),
+ crs="epsg:4326",
+ )
+ gdf = gdf.drop_duplicates(subset=["geometry"], keep="last")
+ gdf_tracts = add_tracts_for_geometries(gdf)
+ gdf_tracts = gdf_tracts.drop_duplicates(self.GEOID_TRACT_FIELD_NAME)
+ gdf_tracts[self.AML_BOOLEAN] = True
+ self.output_df = gdf_tracts[self.COLUMNS_TO_KEEP]
diff --git a/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb b/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb
new file mode 100644
index 000000000..410853252
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/ipython/explore_eamlis.ipynb
@@ -0,0 +1,2443 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "24085186-3472-43d3-8b87-b5191c4f6ca6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import sys\n",
+ "\n",
+ "module_path = os.path.abspath(os.path.join(\"../..\"))\n",
+ "if module_path not in sys.path:\n",
+ " sys.path.append(module_path)\n",
+ "\n",
+ "from data_pipeline.config import settings\n",
+ "from data_pipeline.etl.base import ExtractTransformLoad\n",
+ "from data_pipeline.etl.sources.census.etl import CensusETL\n",
+ "from data_pipeline.etl.sources.geo_utils import add_tracts_for_geometries\n",
+ "from data_pipeline.utils import unzip_file_from_url"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "f36fe7ef-2717-48be-ae94-a3f0aa33acc1",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "%load_ext lab_black"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dbecd665-1c8a-40fe-a7fc-684ecf73f991",
+ "metadata": {},
+ "source": [
+ "# Grab the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "f451ea70-917c-45f9-adf9-9306436b955d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2022-08-16 11:50:57,573 [data_pipeline.utils] INFO Downloading https://justice40-data.s3.amazonaws.com/data-sources/eAMLIS export of all data.tsv.zip\n",
+ "2022-08-16 11:50:57,857 [data_pipeline.utils] INFO Extracting /home/matt/active/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/abandoned_mine_lands/downloaded-af59fffe-aec2-48b4-a57f-716b8dc7e0a3.zip\n"
+ ]
+ }
+ ],
+ "source": [
+ "tmp_path = ExtractTransformLoad.DATA_PATH / \"tmp\" / \"abandoned_mine_lands\"\n",
+ "# Create directory if it doesn't exist\n",
+ "tmp_path.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ "eamlis_path_in_s3 = (\n",
+ " settings.AWS_JUSTICE40_DATASOURCES_URL + \"/eAMLIS export of all data.tsv.zip\"\n",
+ ")\n",
+ "\n",
+ "unzip_file_from_url(\n",
+ " file_url=eamlis_path_in_s3,\n",
+ " download_path=tmp_path,\n",
+ " unzipped_file_path=tmp_path,\n",
+ ")\n",
+ "\n",
+ "eamlis_path = tmp_path / \"eAMLIS export of all data.tsv\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "5cee0a59-4f69-4678-a7ab-877e57d06f1b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'/home/matt/active/justice40-tool/data/data-pipeline/data_pipeline/data/tmp/abandoned_mine_lands/eAMLIS export of all data.tsv'"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "str(eamlis_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "5b8119d8-315b-4e87-b286-3767158d63ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(eamlis_path, sep=\"\\t\", low_memory=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "4fbac263-5868-4fbc-bcb7-168ef479af53",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(57149, 42)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf = gpd.GeoDataFrame(\n",
+ " df,\n",
+ " geometry=gpd.points_from_xy(\n",
+ " x=df[\"Longitude\"],\n",
+ " y=df[\"Latitude\"],\n",
+ " ),\n",
+ " crs=\"epsg:4326\",\n",
+ ")\n",
+ "gdf.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "3b176f62-2d13-4bd2-9211-0ac7c2807146",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(3977, 42)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf.drop_duplicates(subset=[\"geometry\"], inplace=True, keep=\"last\")\n",
+ "gdf.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "a0116c52-58f5-48a6-aa9e-c49873ebafa7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2022-08-16 11:51:28,795 [data_pipeline.etl.sources.geo_utils] DEBUG Appending tract data to dataframe\n",
+ "2022-08-16 11:51:28,796 [data_pipeline.etl.sources.geo_utils] INFO Loading tract geometry data from census ETL\n",
+ "2022-08-16 11:51:28,796 [data_pipeline.etl.sources.geo_utils] DEBUG Loading existing tract geojson\n",
+ "/home/matt/.cache/pypoetry/virtualenvs/justice40-data-pipeline-IwBjhw-4-py3.10/lib/python3.10/site-packages/IPython/core/interactiveshell.py:3553: FutureWarning: The `op` parameter is deprecated and will be removed in a future release. Please use the `predicate` parameter instead.\n",
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n"
+ ]
+ }
+ ],
+ "source": [
+ "gdf_tracts = add_tracts_for_geometries(gdf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "2a18ba4d-274b-4640-a83c-02ae1d02837c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AMLIS Key | \n",
+ " State/Tribe | \n",
+ " County | \n",
+ " Congressional District | \n",
+ " Quadrangle Name | \n",
+ " Watershed | \n",
+ " HUC Code | \n",
+ " FIPS Code | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " ... | \n",
+ " Funded GPRA Acres | \n",
+ " Funded Metric Units | \n",
+ " Completed Standard Units | \n",
+ " Completed Costs | \n",
+ " Completed GPRA Acres | \n",
+ " Completed Metric Units | \n",
+ " Unnamed: 40 | \n",
+ " geometry | \n",
+ " index_right | \n",
+ " GEOID10_TRACT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0 rows × 44 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty GeoDataFrame\n",
+ "Columns: [AMLIS Key, State/Tribe, County, Congressional District, Quadrangle Name, Watershed, HUC Code, FIPS Code, Latitude, Longitude, Funding Source / Program, Problem Area Name, Problem Area Number, Planning Unit Name, Planning Unit Number, Problem Priority, Problem Type, Mining Type, Ore Types, Date Prepared, Date Revised, Private Owner %, State Owner %, Other Federal Owner %, Park Service Owner %, Forest Service Owner %, Indian Owner %, BLM Owner %, Unfunded Standard Units, Unfunded Costs, Unfunded GPRA Acres, Unfunded Metric Units, Funded Standard Units, Funded Costs, Funded GPRA Acres, Funded Metric Units, Completed Standard Units, Completed Costs, Completed GPRA Acres, Completed Metric Units, Unnamed: 40, geometry, index_right, GEOID10_TRACT]\n",
+ "Index: []\n",
+ "\n",
+ "[0 rows x 44 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf_tracts[gdf_tracts.GEOID10_TRACT.isna()]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e9aaa563-4c11-46b0-a64d-6ce053412e9c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AMLIS Key | \n",
+ " State/Tribe | \n",
+ " County | \n",
+ " Congressional District | \n",
+ " Quadrangle Name | \n",
+ " Watershed | \n",
+ " HUC Code | \n",
+ " FIPS Code | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " ... | \n",
+ " Funded GPRA Acres | \n",
+ " Funded Metric Units | \n",
+ " Completed Standard Units | \n",
+ " Completed Costs | \n",
+ " Completed GPRA Acres | \n",
+ " Completed Metric Units | \n",
+ " Unnamed: 40 | \n",
+ " geometry | \n",
+ " index_right | \n",
+ " GEOID10_TRACT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " AK000001 | \n",
+ " AK | \n",
+ " MATANUSKA-SUSITNA | \n",
+ " 1.0 | \n",
+ " ANCHORAGE C-8 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02170 | \n",
+ " 61.6 | \n",
+ " -149.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 900.00 | \n",
+ " 33200.0 | \n",
+ " 12.86 | \n",
+ " 274.30 | \n",
+ " NaN | \n",
+ " POINT (-149.80000 61.60000) | \n",
+ " 9900 | \n",
+ " 02170000401 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " AK000003 | \n",
+ " AK | \n",
+ " VALDEZ-CORDOVA | \n",
+ " 1.0 | \n",
+ " Valdez C-1 | \n",
+ " 19050003 | \n",
+ " NaN | \n",
+ " 02-26 | \n",
+ " 61.6 | \n",
+ " -144.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.34 | \n",
+ " 9200.0 | \n",
+ " 0.03 | \n",
+ " 0.34 | \n",
+ " NaN | \n",
+ " POINT (-144.00000 61.60000) | \n",
+ " 9918 | \n",
+ " 02261000100 | \n",
+ "
\n",
+ " \n",
+ " 100 | \n",
+ " AK000080 | \n",
+ " AK | \n",
+ " VALDEZ-CORDOVA CENSU | \n",
+ " 1.0 | \n",
+ " MCCARTHY C-5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02261 | \n",
+ " 61.5 | \n",
+ " -142.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 4.00 | \n",
+ " 9924.0 | \n",
+ " 0.40 | \n",
+ " 4.00 | \n",
+ " NaN | \n",
+ " POINT (-142.80000 61.50000) | \n",
+ " 9918 | \n",
+ " 02261000100 | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " AK000096 | \n",
+ " AK | \n",
+ " VALDEZ-CORDOVA | \n",
+ " 1.0 | \n",
+ " MCCARTHY C-6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Alaska | \n",
+ " 61.6 | \n",
+ " -142.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 2.00 | \n",
+ " 29729.0 | \n",
+ " 0.20 | \n",
+ " 2.00 | \n",
+ " NaN | \n",
+ " POINT (-142.80000 61.60000) | \n",
+ " 9918 | \n",
+ " 02261000100 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " AK000006 | \n",
+ " AK | \n",
+ " MATANUSKA-SUSITNA | \n",
+ " 1.0 | \n",
+ " ANCHORAGE C-6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Alaska | \n",
+ " 61.7 | \n",
+ " -149.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 3.00 | \n",
+ " 9225.0 | \n",
+ " 0.30 | \n",
+ " 3.00 | \n",
+ " NaN | \n",
+ " POINT (-149.00000 61.70000) | \n",
+ " 9938 | \n",
+ " 02170000200 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 44 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AMLIS Key State/Tribe County Congressional District \\\n",
+ "2 AK000001 AK MATANUSKA-SUSITNA 1.0 \n",
+ "6 AK000003 AK VALDEZ-CORDOVA 1.0 \n",
+ "100 AK000080 AK VALDEZ-CORDOVA CENSU 1.0 \n",
+ "113 AK000096 AK VALDEZ-CORDOVA 1.0 \n",
+ "12 AK000006 AK MATANUSKA-SUSITNA 1.0 \n",
+ "\n",
+ " Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude ... \\\n",
+ "2 ANCHORAGE C-8 NaN NaN 02170 61.6 -149.8 ... \n",
+ "6 Valdez C-1 19050003 NaN 02-26 61.6 -144.0 ... \n",
+ "100 MCCARTHY C-5 NaN NaN 02261 61.5 -142.8 ... \n",
+ "113 MCCARTHY C-6 NaN NaN Alaska 61.6 -142.8 ... \n",
+ "12 ANCHORAGE C-6 NaN NaN Alaska 61.7 -149.0 ... \n",
+ "\n",
+ " Funded GPRA Acres Funded Metric Units Completed Standard Units \\\n",
+ "2 0.0 0.0 900.00 \n",
+ "6 0.0 0.0 0.34 \n",
+ "100 0.0 0.0 4.00 \n",
+ "113 0.0 0.0 2.00 \n",
+ "12 0.0 0.0 3.00 \n",
+ "\n",
+ " Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40 \\\n",
+ "2 33200.0 12.86 274.30 NaN \n",
+ "6 9200.0 0.03 0.34 NaN \n",
+ "100 9924.0 0.40 4.00 NaN \n",
+ "113 29729.0 0.20 2.00 NaN \n",
+ "12 9225.0 0.30 3.00 NaN \n",
+ "\n",
+ " geometry index_right GEOID10_TRACT \n",
+ "2 POINT (-149.80000 61.60000) 9900 02170000401 \n",
+ "6 POINT (-144.00000 61.60000) 9918 02261000100 \n",
+ "100 POINT (-142.80000 61.50000) 9918 02261000100 \n",
+ "113 POINT (-142.80000 61.60000) 9918 02261000100 \n",
+ "12 POINT (-149.00000 61.70000) 9938 02170000200 \n",
+ "\n",
+ "[5 rows x 44 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf_tracts.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "901f8c2f-b27a-4223-ad73-71daf51f6bd6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(2034, 44)"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf_tracts.drop_duplicates(\"GEOID10_TRACT\").shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "65bfb3e7-d562-4d59-a2ef-f59d385a7c23",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "census_tracts = gpd.read_file(CensusETL.NATIONAL_TRACT_JSON_PATH)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "6b7011aa-49e8-4cf8-877f-b3b36805c18e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "census_tracts.set_index(\"GEOID10\", drop=False, inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "4334eb8e-c8eb-486a-a4e0-a455f18c038c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(3976, 44)"
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf_tracts.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "cbc84001-bb43-46dc-bc6a-306f0e169a20",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " AMLIS Key | \n",
+ " State/Tribe | \n",
+ " County | \n",
+ " Congressional District | \n",
+ " Quadrangle Name | \n",
+ " Watershed | \n",
+ " HUC Code | \n",
+ " FIPS Code | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " ... | \n",
+ " Funded GPRA Acres | \n",
+ " Funded Metric Units | \n",
+ " Completed Standard Units | \n",
+ " Completed Costs | \n",
+ " Completed GPRA Acres | \n",
+ " Completed Metric Units | \n",
+ " Unnamed: 40 | \n",
+ " geometry | \n",
+ " index_right | \n",
+ " GEOID10_TRACT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 100 | \n",
+ " AK000080 | \n",
+ " AK | \n",
+ " VALDEZ-CORDOVA CENSU | \n",
+ " 1.0 | \n",
+ " MCCARTHY C-5 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02261 | \n",
+ " 61.5 | \n",
+ " -142.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 4.0 | \n",
+ " 9924.0 | \n",
+ " 0.4 | \n",
+ " 4.0 | \n",
+ " NaN | \n",
+ " POINT (-142.80000 61.50000) | \n",
+ " 9918 | \n",
+ " 02261000100 | \n",
+ "
\n",
+ " \n",
+ " 113 | \n",
+ " AK000096 | \n",
+ " AK | \n",
+ " VALDEZ-CORDOVA | \n",
+ " 1.0 | \n",
+ " MCCARTHY C-6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Alaska | \n",
+ " 61.6 | \n",
+ " -142.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 2.0 | \n",
+ " 29729.0 | \n",
+ " 0.2 | \n",
+ " 2.0 | \n",
+ " NaN | \n",
+ " POINT (-142.80000 61.60000) | \n",
+ " 9918 | \n",
+ " 02261000100 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " AK000015 | \n",
+ " AK | \n",
+ " MATANUSKA-SUSITNA | \n",
+ " 1.0 | \n",
+ " ANCHORAGE D-4 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02170 | \n",
+ " 61.7 | \n",
+ " -148.2 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 7.0 | \n",
+ " 4100.0 | \n",
+ " 0.7 | \n",
+ " 7.0 | \n",
+ " NaN | \n",
+ " POINT (-148.20000 61.70000) | \n",
+ " 9938 | \n",
+ " 02170000200 | \n",
+ "
\n",
+ " \n",
+ " 45 | \n",
+ " AK000040 | \n",
+ " AK | \n",
+ " MATANUSKA-SUSITNA | \n",
+ " 1.0 | \n",
+ " ANCHORAGE C-6 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02170 | \n",
+ " 61.7 | \n",
+ " -148.8 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 20284.0 | \n",
+ " 0.1 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " POINT (-148.80000 61.70000) | \n",
+ " 9938 | \n",
+ " 02170000200 | \n",
+ "
\n",
+ " \n",
+ " 117 | \n",
+ " AK000099 | \n",
+ " AK | \n",
+ " MATANUSKA-SUSITNA | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 02170 | \n",
+ " 61.7 | \n",
+ " -148.4 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " POINT (-148.40000 61.70000) | \n",
+ " 9938 | \n",
+ " 02170000200 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 57095 | \n",
+ " WY176742 | \n",
+ " WY | \n",
+ " Campbell County | \n",
+ " 1.0 | \n",
+ " Little Thunder Reservoir | \n",
+ " UPPER CHEYENNE | \n",
+ " 10120103.0 | \n",
+ " 56005 | \n",
+ " 43.7 | \n",
+ " -105.4 | \n",
+ " ... | \n",
+ " 3.0 | \n",
+ " 3.0 | \n",
+ " 8.6 | \n",
+ " 1407322.0 | \n",
+ " 8.6 | \n",
+ " 8.6 | \n",
+ " NaN | \n",
+ " POINT (-105.40000 43.70000) | \n",
+ " 28394 | \n",
+ " 56005000100 | \n",
+ "
\n",
+ " \n",
+ " 56861 | \n",
+ " WY082926 | \n",
+ " WY | \n",
+ " PLATTE | \n",
+ " 1.0 | \n",
+ " Guernsey Reservoir | \n",
+ " GLENDO RESERVOIR | \n",
+ " 10180008.0 | \n",
+ " 56031 | \n",
+ " 42.3 | \n",
+ " -104.7 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 293122.0 | \n",
+ " 1.0 | \n",
+ " 0.4 | \n",
+ " NaN | \n",
+ " POINT (-104.70000 42.30000) | \n",
+ " 28402 | \n",
+ " 56031959100 | \n",
+ "
\n",
+ " \n",
+ " 56864 | \n",
+ " WY086744 | \n",
+ " WY | \n",
+ " PLATTE | \n",
+ " 1.0 | \n",
+ " HELL GAP | \n",
+ " GLENDO RESERVOIR | \n",
+ " 10180008.0 | \n",
+ " 56031 | \n",
+ " 42.4 | \n",
+ " -104.7 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 18848.0 | \n",
+ " 0.1 | \n",
+ " 1.0 | \n",
+ " NaN | \n",
+ " POINT (-104.70000 42.40000) | \n",
+ " 28402 | \n",
+ " 56031959100 | \n",
+ "
\n",
+ " \n",
+ " 56930 | \n",
+ " WY102624 | \n",
+ " WY | \n",
+ " FREMONT | \n",
+ " 1.0 | \n",
+ " Lookout Butte | \n",
+ " LOWER WIND | \n",
+ " 10080005.0 | \n",
+ " 56013 | \n",
+ " 43.3 | \n",
+ " -108.7 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " POINT (-108.70000 43.30000) | \n",
+ " 28442 | \n",
+ " 56013940201 | \n",
+ "
\n",
+ " \n",
+ " 57021 | \n",
+ " WY132533 | \n",
+ " WY | \n",
+ " FREMONT | \n",
+ " 1.0 | \n",
+ " Eagle Point | \n",
+ " LOWER WIND | \n",
+ " 10080005.0 | \n",
+ " 56013 | \n",
+ " 43.4 | \n",
+ " -108.7 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " NaN | \n",
+ " POINT (-108.70000 43.40000) | \n",
+ " 28442 | \n",
+ " 56013940201 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1942 rows × 44 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " AMLIS Key State/Tribe County Congressional District \\\n",
+ "100 AK000080 AK VALDEZ-CORDOVA CENSU 1.0 \n",
+ "113 AK000096 AK VALDEZ-CORDOVA 1.0 \n",
+ "30 AK000015 AK MATANUSKA-SUSITNA 1.0 \n",
+ "45 AK000040 AK MATANUSKA-SUSITNA 1.0 \n",
+ "117 AK000099 AK MATANUSKA-SUSITNA 1.0 \n",
+ "... ... ... ... ... \n",
+ "57095 WY176742 WY Campbell County 1.0 \n",
+ "56861 WY082926 WY PLATTE 1.0 \n",
+ "56864 WY086744 WY PLATTE 1.0 \n",
+ "56930 WY102624 WY FREMONT 1.0 \n",
+ "57021 WY132533 WY FREMONT 1.0 \n",
+ "\n",
+ " Quadrangle Name Watershed HUC Code FIPS Code \\\n",
+ "100 MCCARTHY C-5 NaN NaN 02261 \n",
+ "113 MCCARTHY C-6 NaN NaN Alaska \n",
+ "30 ANCHORAGE D-4 NaN NaN 02170 \n",
+ "45 ANCHORAGE C-6 NaN NaN 02170 \n",
+ "117 NaN NaN NaN 02170 \n",
+ "... ... ... ... ... \n",
+ "57095 Little Thunder Reservoir UPPER CHEYENNE 10120103.0 56005 \n",
+ "56861 Guernsey Reservoir GLENDO RESERVOIR 10180008.0 56031 \n",
+ "56864 HELL GAP GLENDO RESERVOIR 10180008.0 56031 \n",
+ "56930 Lookout Butte LOWER WIND 10080005.0 56013 \n",
+ "57021 Eagle Point LOWER WIND 10080005.0 56013 \n",
+ "\n",
+ " Latitude Longitude ... Funded GPRA Acres Funded Metric Units \\\n",
+ "100 61.5 -142.8 ... 0.0 0.0 \n",
+ "113 61.6 -142.8 ... 0.0 0.0 \n",
+ "30 61.7 -148.2 ... 0.0 0.0 \n",
+ "45 61.7 -148.8 ... 0.0 0.0 \n",
+ "117 61.7 -148.4 ... 0.0 0.0 \n",
+ "... ... ... ... ... ... \n",
+ "57095 43.7 -105.4 ... 3.0 3.0 \n",
+ "56861 42.3 -104.7 ... 0.0 0.0 \n",
+ "56864 42.4 -104.7 ... 0.0 0.0 \n",
+ "56930 43.3 -108.7 ... 0.0 0.0 \n",
+ "57021 43.4 -108.7 ... 0.0 0.0 \n",
+ "\n",
+ " Completed Standard Units Completed Costs Completed GPRA Acres \\\n",
+ "100 4.0 9924.0 0.4 \n",
+ "113 2.0 29729.0 0.2 \n",
+ "30 7.0 4100.0 0.7 \n",
+ "45 1.0 20284.0 0.1 \n",
+ "117 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "57095 8.6 1407322.0 8.6 \n",
+ "56861 1.0 293122.0 1.0 \n",
+ "56864 1.0 18848.0 0.1 \n",
+ "56930 0.0 0.0 0.0 \n",
+ "57021 0.0 0.0 0.0 \n",
+ "\n",
+ " Completed Metric Units Unnamed: 40 geometry \\\n",
+ "100 4.0 NaN POINT (-142.80000 61.50000) \n",
+ "113 2.0 NaN POINT (-142.80000 61.60000) \n",
+ "30 7.0 NaN POINT (-148.20000 61.70000) \n",
+ "45 1.0 NaN POINT (-148.80000 61.70000) \n",
+ "117 0.0 NaN POINT (-148.40000 61.70000) \n",
+ "... ... ... ... \n",
+ "57095 8.6 NaN POINT (-105.40000 43.70000) \n",
+ "56861 0.4 NaN POINT (-104.70000 42.30000) \n",
+ "56864 1.0 NaN POINT (-104.70000 42.40000) \n",
+ "56930 0.0 NaN POINT (-108.70000 43.30000) \n",
+ "57021 0.0 NaN POINT (-108.70000 43.40000) \n",
+ "\n",
+ " index_right GEOID10_TRACT \n",
+ "100 9918 02261000100 \n",
+ "113 9918 02261000100 \n",
+ "30 9938 02170000200 \n",
+ "45 9938 02170000200 \n",
+ "117 9938 02170000200 \n",
+ "... ... ... \n",
+ "57095 28394 56005000100 \n",
+ "56861 28402 56031959100 \n",
+ "56864 28402 56031959100 \n",
+ "56930 28442 56013940201 \n",
+ "57021 28442 56013940201 \n",
+ "\n",
+ "[1942 rows x 44 columns]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "gdf_tracts[gdf_tracts.GEOID10_TRACT.duplicated()]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bdf572df-8a20-4711-812c-376887de8d30",
+ "metadata": {},
+ "source": [
+ "# Assemble test data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6820da8-d61d-4da7-870a-d1e648152ae2",
+ "metadata": {},
+ "source": [
+ "## Get some test tracts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "78cf15fc-a9a6-41bc-ab90-9c94a32e0fcd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_geoids = pd.read_csv(\n",
+ " \"../tests/sources/example/data/extract.csv\", dtype=\"object\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "c0129782-bc75-4cb5-b53f-4a99d4473aa6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(15, 2)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "example_geoids.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "2647d12f-3eab-4cbc-a656-a514a2c06084",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "tract_df = gdf_tracts"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "7ac85550-c8b9-4978-869b-1a233f89a1b2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "merged_exaple_data = pd.merge(\n",
+ " example_geoids[\"GEOID10_TRACT\"],\n",
+ " tract_df,\n",
+ " on=\"GEOID10_TRACT\",\n",
+ " how=\"left\",\n",
+ " indicator=True,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "e3e36b6f-8edc-4459-9703-83b790778ce6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " GEOID10_TRACT | \n",
+ " AMLIS Key | \n",
+ " State/Tribe | \n",
+ " County | \n",
+ " Congressional District | \n",
+ " Quadrangle Name | \n",
+ " Watershed | \n",
+ " HUC Code | \n",
+ " FIPS Code | \n",
+ " Latitude | \n",
+ " ... | \n",
+ " Funded GPRA Acres | \n",
+ " Funded Metric Units | \n",
+ " Completed Standard Units | \n",
+ " Completed Costs | \n",
+ " Completed GPRA Acres | \n",
+ " Completed Metric Units | \n",
+ " Unnamed: 40 | \n",
+ " geometry | \n",
+ " index_right | \n",
+ " _merge | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 06027000800 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 06069000802 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 06061021322 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 15001021010 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 15001021101 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 15007040603 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 15007040700 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 15009030100 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 15009030201 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 15001021402 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 15001021800 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 15009030402 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 15009030800 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 15003010201 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 15007040604 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " NaN | \n",
+ " left_only | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
15 rows × 45 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " GEOID10_TRACT AMLIS Key State/Tribe County Congressional District \\\n",
+ "0 06027000800 NaN NaN NaN NaN \n",
+ "1 06069000802 NaN NaN NaN NaN \n",
+ "2 06061021322 NaN NaN NaN NaN \n",
+ "3 15001021010 NaN NaN NaN NaN \n",
+ "4 15001021101 NaN NaN NaN NaN \n",
+ "5 15007040603 NaN NaN NaN NaN \n",
+ "6 15007040700 NaN NaN NaN NaN \n",
+ "7 15009030100 NaN NaN NaN NaN \n",
+ "8 15009030201 NaN NaN NaN NaN \n",
+ "9 15001021402 NaN NaN NaN NaN \n",
+ "10 15001021800 NaN NaN NaN NaN \n",
+ "11 15009030402 NaN NaN NaN NaN \n",
+ "12 15009030800 NaN NaN NaN NaN \n",
+ "13 15003010201 NaN NaN NaN NaN \n",
+ "14 15007040604 NaN NaN NaN NaN \n",
+ "\n",
+ " Quadrangle Name Watershed HUC Code FIPS Code Latitude ... \\\n",
+ "0 NaN NaN NaN NaN NaN ... \n",
+ "1 NaN NaN NaN NaN NaN ... \n",
+ "2 NaN NaN NaN NaN NaN ... \n",
+ "3 NaN NaN NaN NaN NaN ... \n",
+ "4 NaN NaN NaN NaN NaN ... \n",
+ "5 NaN NaN NaN NaN NaN ... \n",
+ "6 NaN NaN NaN NaN NaN ... \n",
+ "7 NaN NaN NaN NaN NaN ... \n",
+ "8 NaN NaN NaN NaN NaN ... \n",
+ "9 NaN NaN NaN NaN NaN ... \n",
+ "10 NaN NaN NaN NaN NaN ... \n",
+ "11 NaN NaN NaN NaN NaN ... \n",
+ "12 NaN NaN NaN NaN NaN ... \n",
+ "13 NaN NaN NaN NaN NaN ... \n",
+ "14 NaN NaN NaN NaN NaN ... \n",
+ "\n",
+ " Funded GPRA Acres Funded Metric Units Completed Standard Units \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "5 NaN NaN NaN \n",
+ "6 NaN NaN NaN \n",
+ "7 NaN NaN NaN \n",
+ "8 NaN NaN NaN \n",
+ "9 NaN NaN NaN \n",
+ "10 NaN NaN NaN \n",
+ "11 NaN NaN NaN \n",
+ "12 NaN NaN NaN \n",
+ "13 NaN NaN NaN \n",
+ "14 NaN NaN NaN \n",
+ "\n",
+ " Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40 \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "5 NaN NaN NaN NaN \n",
+ "6 NaN NaN NaN NaN \n",
+ "7 NaN NaN NaN NaN \n",
+ "8 NaN NaN NaN NaN \n",
+ "9 NaN NaN NaN NaN \n",
+ "10 NaN NaN NaN NaN \n",
+ "11 NaN NaN NaN NaN \n",
+ "12 NaN NaN NaN NaN \n",
+ "13 NaN NaN NaN NaN \n",
+ "14 NaN NaN NaN NaN \n",
+ "\n",
+ " geometry index_right _merge \n",
+ "0 None NaN left_only \n",
+ "1 None NaN left_only \n",
+ "2 None NaN left_only \n",
+ "3 None NaN left_only \n",
+ "4 None NaN left_only \n",
+ "5 None NaN left_only \n",
+ "6 None NaN left_only \n",
+ "7 None NaN left_only \n",
+ "8 None NaN left_only \n",
+ "9 None NaN left_only \n",
+ "10 None NaN left_only \n",
+ "11 None NaN left_only \n",
+ "12 None NaN left_only \n",
+ "13 None NaN left_only \n",
+ "14 None NaN left_only \n",
+ "\n",
+ "[15 rows x 45 columns]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "merged_exaple_data[merged_exaple_data[\"_merge\"] == \"left_only\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "76f66b32-ce2e-4ad1-93f2-eb566be2e04f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'AMLIS Key': 'AK000001',\n",
+ " 'State/Tribe': 'AK',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 61.6,\n",
+ " 'Longitude': -149.8,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan,\n",
+ " 'geometry': }"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dict(df.iloc[0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "3caf8290-8248-44e5-8f3b-c67c54e834de",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['AMLIS Key', 'BLM Owner %', 'Completed Costs', 'Completed GPRA Acres',\n",
+ " 'Completed Metric Units', 'Completed Standard Units',\n",
+ " 'Congressional District', 'County', 'Date Prepared', 'Date Revised',\n",
+ " 'FIPS Code', 'Forest Service Owner %', 'Funded Costs',\n",
+ " 'Funded GPRA Acres', 'Funded Metric Units', 'Funded Standard Units',\n",
+ " 'Funding Source / Program', 'HUC Code', 'Indian Owner %', 'Latitude',\n",
+ " 'Longitude', 'Mining Type', 'Ore Types', 'Other Federal Owner %',\n",
+ " 'Park Service Owner %', 'Planning Unit Name', 'Planning Unit Number',\n",
+ " 'Private Owner %', 'Problem Area Name', 'Problem Area Number',\n",
+ " 'Problem Priority', 'Problem Type', 'Quadrangle Name', 'State Owner %',\n",
+ " 'State/Tribe', 'Unfunded Costs', 'Unfunded GPRA Acres',\n",
+ " 'Unfunded Metric Units', 'Unfunded Standard Units', 'Unnamed: 40',\n",
+ " 'Watershed', '_merge', 'geometry', 'index_right'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "merged_exaple_data.columns.difference([\"GEOID10_TRACT\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "2f0cd691-069e-4367-8f42-e77efd649e77",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def generate_fake_eamlis_row(tract: str, state: str) -> dict:\n",
+ " generate_fake_eamlis_row._row_id += 1\n",
+ " centroid = census_tracts.loc[tract].geometry.centroid\n",
+ " return {\n",
+ " \"AMLIS Key\": f\"{state}{str(generate_fake_eamlis_row._row_id).zfill(6)}\",\n",
+ " \"State/Tribe\": state,\n",
+ " \"County\": \"MATANUSKA-SUSITNA\",\n",
+ " \"Congressional District\": 1.0,\n",
+ " \"Quadrangle Name\": \"ANCHORAGE C-8\",\n",
+ " \"Watershed\": np.nan,\n",
+ " \"HUC Code\": np.nan,\n",
+ " \"FIPS Code\": \"02170\",\n",
+ " \"Latitude\": centroid.y,\n",
+ " \"Longitude\": centroid.x,\n",
+ " \"Funding Source / Program\": \"FRA\",\n",
+ " \"Problem Area Name\": \"EAST HOUSTON MINE\",\n",
+ " \"Problem Area Number\": 1,\n",
+ " \"Planning Unit Name\": \"HOUSTON\",\n",
+ " \"Planning Unit Number\": \"1\",\n",
+ " \"Problem Priority\": \"1\",\n",
+ " \"Problem Type\": \"VO\",\n",
+ " \"Mining Type\": \"S\",\n",
+ " \"Ore Types\": np.nan,\n",
+ " \"Date Prepared\": \"12/3/1986 12:00:00 AM\",\n",
+ " \"Date Revised\": \"4/23/2014 6:40:28 PM\",\n",
+ " \"Private Owner %\": 0.0,\n",
+ " \"State Owner %\": 0.0,\n",
+ " \"Other Federal Owner %\": 0.0,\n",
+ " \"Park Service Owner %\": 0.0,\n",
+ " \"Forest Service Owner %\": 0.0,\n",
+ " \"Indian Owner %\": 0.0,\n",
+ " \"BLM Owner %\": 0.0,\n",
+ " \"Unfunded Standard Units\": 0.0,\n",
+ " \"Unfunded Costs\": 0.0,\n",
+ " \"Unfunded GPRA Acres\": 0.0,\n",
+ " \"Unfunded Metric Units\": 0.0,\n",
+ " \"Funded Standard Units\": 0.0,\n",
+ " \"Funded Costs\": 0.0,\n",
+ " \"Funded GPRA Acres\": 0.0,\n",
+ " \"Funded Metric Units\": 0.0,\n",
+ " \"Completed Standard Units\": 2.0,\n",
+ " \"Completed Costs\": 10000.0,\n",
+ " \"Completed GPRA Acres\": 0.2,\n",
+ " \"Completed Metric Units\": 2.0,\n",
+ " \"Unnamed: 40\": np.nan,\n",
+ " }\n",
+ "\n",
+ "\n",
+ "generate_fake_eamlis_row._row_id = 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "6ab61dda-8642-4706-9b67-b03ed27713fd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rows = []\n",
+ "for tract in merged_exaple_data[\n",
+ " merged_exaple_data[\"_merge\"] == \"left_only\"\n",
+ "].GEOID10_TRACT:\n",
+ " state = \"HI\"\n",
+ " if tract.startswith(\"06\"):\n",
+ " state = \"CA\"\n",
+ " rows.append(generate_fake_eamlis_row(tract, state))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "93714c4d-3e1d-49c1-9bac-4e553e0bae08",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.DataFrame(rows).to_csv(\n",
+ " \"/home/matt/active/justice40-tool/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/eAMLIS export of all data.tsv.zip\",\n",
+ " index=False,\n",
+ " sep=\"\\t\",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "776d4dab-4067-4d36-9ce3-d7c7094ea8c6",
+ "metadata": {},
+ "source": [
+ "## Get the points for the geolocation mock"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "a630a0f8-e922-4eb3-9937-6c4b590fd527",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lookup_table = {}\n",
+ "for tract in merged_exaple_data[\n",
+ " merged_exaple_data[\"_merge\"] == \"left_only\"\n",
+ "].GEOID10_TRACT:\n",
+ " centroid = census_tracts.loc[tract].geometry.centroid\n",
+ " lookup_table[(centroid.x, centroid.y)] = tract"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "d828b2e0-424e-4040-b167-da46734bcf4d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'AMLIS Key': 'CA000001',\n",
+ " 'State/Tribe': 'CA',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 36.25161281807095,\n",
+ " 'Longitude': -117.11772856883819,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'CA000002',\n",
+ " 'State/Tribe': 'CA',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 36.5498780497345,\n",
+ " 'Longitude': -121.0070599015156,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'CA000003',\n",
+ " 'State/Tribe': 'CA',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 38.84602113669345,\n",
+ " 'Longitude': -121.40564726784282,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000004',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 19.49784370888389,\n",
+ " 'Longitude': -155.10321769858746,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000005',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 19.446650238354696,\n",
+ " 'Longitude': -154.89548634140738,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000006',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 21.904412260968197,\n",
+ " 'Longitude': -159.43665201302525,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000007',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 21.94208315793464,\n",
+ " 'Longitude': -159.52362041178708,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000008',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 20.72796381691298,\n",
+ " 'Longitude': -156.14177664396527,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000009',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 20.86486713282688,\n",
+ " 'Longitude': -156.2497797752935,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000010',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 19.516629328900667,\n",
+ " 'Longitude': -155.91378867633992,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000011',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 20.164406070883054,\n",
+ " 'Longitude': -155.81110884967674,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000012',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 20.825369670478306,\n",
+ " 'Longitude': -156.33064622489087,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000013',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 20.9170439162332,\n",
+ " 'Longitude': -156.54289869319305,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000014',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 21.556464980367483,\n",
+ " 'Longitude': -157.89225964427064,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan},\n",
+ " {'AMLIS Key': 'HI000015',\n",
+ " 'State/Tribe': 'HI',\n",
+ " 'County': 'MATANUSKA-SUSITNA',\n",
+ " 'Congressional District': 1.0,\n",
+ " 'Quadrangle Name': 'ANCHORAGE C-8',\n",
+ " 'Watershed': nan,\n",
+ " 'HUC Code': nan,\n",
+ " 'FIPS Code': '02170',\n",
+ " 'Latitude': 21.90754283544759,\n",
+ " 'Longitude': -159.48416846823164,\n",
+ " 'Funding Source / Program': 'FRA',\n",
+ " 'Problem Area Name': 'EAST HOUSTON MINE',\n",
+ " 'Problem Area Number': 1,\n",
+ " 'Planning Unit Name': 'HOUSTON',\n",
+ " 'Planning Unit Number': '1',\n",
+ " 'Problem Priority': '1',\n",
+ " 'Problem Type': 'VO',\n",
+ " 'Mining Type': 'S',\n",
+ " 'Ore Types': nan,\n",
+ " 'Date Prepared': '12/3/1986 12:00:00 AM',\n",
+ " 'Date Revised': '4/23/2014 6:40:28 PM',\n",
+ " 'Private Owner %': 0.0,\n",
+ " 'State Owner %': 0.0,\n",
+ " 'Other Federal Owner %': 0.0,\n",
+ " 'Park Service Owner %': 0.0,\n",
+ " 'Forest Service Owner %': 0.0,\n",
+ " 'Indian Owner %': 0.0,\n",
+ " 'BLM Owner %': 0.0,\n",
+ " 'Unfunded Standard Units': 0.0,\n",
+ " 'Unfunded Costs': 0.0,\n",
+ " 'Unfunded GPRA Acres': 0.0,\n",
+ " 'Unfunded Metric Units': 0.0,\n",
+ " 'Funded Standard Units': 0.0,\n",
+ " 'Funded Costs': 0.0,\n",
+ " 'Funded GPRA Acres': 0.0,\n",
+ " 'Funded Metric Units': 0.0,\n",
+ " 'Completed Standard Units': 2.0,\n",
+ " 'Completed Costs': 10000.0,\n",
+ " 'Completed GPRA Acres': 0.2,\n",
+ " 'Completed Metric Units': 2.0,\n",
+ " 'Unnamed: 40': nan}]"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "d6b1d835-deb2-4bbd-b33e-460ba94cf192",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{(-117.11772856883819, 36.25161281807095): '06027000800',\n",
+ " (-121.0070599015156, 36.5498780497345): '06069000802',\n",
+ " (-121.40564726784282, 38.84602113669345): '06061021322',\n",
+ " (-155.10321769858746, 19.49784370888389): '15001021010',\n",
+ " (-154.89548634140738, 19.446650238354696): '15001021101',\n",
+ " (-159.43665201302525, 21.904412260968197): '15007040603',\n",
+ " (-159.52362041178708, 21.94208315793464): '15007040700',\n",
+ " (-156.14177664396527, 20.72796381691298): '15009030100',\n",
+ " (-156.2497797752935, 20.86486713282688): '15009030201',\n",
+ " (-155.91378867633992, 19.516629328900667): '15001021402',\n",
+ " (-155.81110884967674, 20.164406070883054): '15001021800',\n",
+ " (-156.33064622489087, 20.825369670478306): '15009030402',\n",
+ " (-156.54289869319305, 20.9170439162332): '15009030800',\n",
+ " (-157.89225964427064, 21.556464980367483): '15003010201',\n",
+ " (-159.48416846823164, 21.90754283544759): '15007040604'}"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "lookup_table"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/__init__.py b/data/data-pipeline/data_pipeline/tests/sources/eamlis/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/eAMLIS export of all data.tsv.zip b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/eAMLIS export of all data.tsv.zip
new file mode 100644
index 000000000..d4729a9f1
Binary files /dev/null and b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/eAMLIS export of all data.tsv.zip differ
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/extract.csv b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/extract.csv
new file mode 100644
index 000000000..8e8f79062
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/extract.csv
@@ -0,0 +1,16 @@
+AMLIS Key State/Tribe County Congressional District Quadrangle Name Watershed HUC Code FIPS Code Latitude Longitude Funding Source / Program Problem Area Name Problem Area Number Planning Unit Name Planning Unit Number Problem Priority Problem Type Mining Type Ore Types Date Prepared Date Revised Private Owner % State Owner % Other Federal Owner % Park Service Owner % Forest Service Owner % Indian Owner % BLM Owner % Unfunded Standard Units Unfunded Costs Unfunded GPRA Acres Unfunded Metric Units Funded Standard Units Funded Costs Funded GPRA Acres Funded Metric Units Completed Standard Units Completed Costs Completed GPRA Acres Completed Metric Units Unnamed: 40
+CA000001 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.25161281807095 -117.11772856883819 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+CA000002 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 36.5498780497345 -121.0070599015156 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+CA000003 CA MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 38.84602113669345 -121.40564726784282 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000004 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.49784370888389 -155.10321769858746 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000005 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.446650238354696 -154.89548634140738 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000006 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.904412260968197 -159.43665201302525 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000007 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.94208315793464 -159.52362041178708 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000008 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.72796381691298 -156.14177664396527 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000009 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.86486713282688 -156.2497797752935 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000010 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 19.516629328900667 -155.91378867633992 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000011 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.164406070883054 -155.81110884967674 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000012 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.825369670478306 -156.33064622489087 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000013 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 20.9170439162332 -156.54289869319305 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000014 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.556464980367483 -157.89225964427064 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
+HI000015 HI MATANUSKA-SUSITNA 1.0 ANCHORAGE C-8 02170 21.90754283544759 -159.48416846823164 FRA EAST HOUSTON MINE 1 HOUSTON 1 1 VO S 12/3/1986 12:00:00 AM 4/23/2014 6:40:28 PM 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.0 10000.0 0.2 2.0
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/output.csv b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/output.csv
new file mode 100644
index 000000000..87d5ca8d0
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/output.csv
@@ -0,0 +1,16 @@
+GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
+06027000800,True
+06069000802,True
+06061021322,True
+15001021010,True
+15001021101,True
+15007040603,True
+15007040700,True
+15009030100,True
+15009030201,True
+15001021402,True
+15001021800,True
+15009030402,True
+15009030800,True
+15003010201,True
+15007040604,True
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/transform.csv b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/transform.csv
new file mode 100644
index 000000000..87d5ca8d0
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/data/transform.csv
@@ -0,0 +1,16 @@
+GEOID10_TRACT,Is there at least one abandoned mine in this census tract?
+06027000800,True
+06069000802,True
+06061021322,True
+15001021010,True
+15001021101,True
+15007040603,True
+15007040700,True
+15009030100,True
+15009030201,True
+15001021402,True
+15001021800,True
+15009030402,True
+15009030800,True
+15003010201,True
+15007040604,True
diff --git a/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py
new file mode 100644
index 000000000..e4c7d8ac1
--- /dev/null
+++ b/data/data-pipeline/data_pipeline/tests/sources/eamlis/test_etl.py
@@ -0,0 +1,152 @@
+# pylint: disable=protected-access
+from unittest import mock
+import pathlib
+from data_pipeline.etl.base import ValidGeoLevel
+
+from data_pipeline.etl.sources.eamlis.etl import (
+ AbandonedMineETL,
+)
+from data_pipeline.tests.sources.example.test_etl import TestETL
+from data_pipeline.utils import get_module_logger
+
+logger = get_module_logger(__name__)
+
+
+def _fake_add_tracts_for_geometries(df):
+ """The actual geojoin is too slow for tests. Use precomputed results."""
+ lookups = {
+ (-117.1177285688382, 36.25161281807095): "06027000800",
+ (-121.0070599015156, 36.5498780497345): "06069000802",
+ (-121.40564726784282, 38.84602113669345): "06061021322",
+ (-155.10321769858746, 19.49784370888389): "15001021010",
+ (-154.89548634140738, 19.446650238354696): "15001021101",
+ (-159.43665201302525, 21.9044122609682): "15007040603",
+ (-159.52362041178708, 21.94208315793464): "15007040700",
+ (-156.14177664396527, 20.72796381691298): "15009030100",
+ (-156.2497797752935, 20.86486713282688): "15009030201",
+ (-155.91378867633992, 19.516629328900667): "15001021402",
+ (-155.81110884967674, 20.164406070883054): "15001021800",
+ (-156.33064622489087, 20.825369670478302): "15009030402",
+ (-156.54289869319305, 20.9170439162332): "15009030800",
+ (-157.89225964427064, 21.556464980367483): "15003010201",
+ (-159.48416846823164, 21.90754283544759): "15007040604",
+ }
+ df["GEOID10_TRACT"] = df.geometry.apply(
+ lambda point: lookups[(point.x, point.y)]
+ )
+ return df
+
+
+class TestAbandondedLandMineETL(TestETL):
+ """Tests the Abandoned Mine Dataset ETL
+
+ This uses pytest-snapshot.
+ To update individual snapshots: $ poetry run pytest
+ data_pipeline/tests/sources/eamlis/test_etl.py::TestClassNameETL::
+ --snapshot-update
+ """
+
+ _ETL_CLASS = AbandonedMineETL
+
+ _SAMPLE_DATA_PATH = pathlib.Path(__file__).parents[0] / "data"
+ _SAMPLE_DATA_FILE_NAME = "eAMLIS export of all data.tsv"
+ _SAMPLE_DATA_ZIP_FILE_NAME = "eAMLIS export of all data.tsv.zip"
+ _EXTRACT_TMP_FOLDER_NAME = "AbandonedMineETL"
+
+ def setup_method(self, _method, filename=__file__):
+ """Invoke `setup_method` from Parent, but using the current file name.
+
+ This code can be copied identically between all child classes.
+ """
+ super().setup_method(_method=_method, filename=filename)
+
+ def test_init(self, mock_etl, mock_paths):
+ """Tests that the mock NationalRiskIndexETL class instance was
+ initiliazed correctly.
+ """
+ # setup
+ etl = self._ETL_CLASS()
+ # validation
+ assert etl.GEOID_FIELD_NAME == "GEOID10"
+ assert etl.GEOID_TRACT_FIELD_NAME == "GEOID10_TRACT"
+ assert etl.NAME == "eamlis"
+ assert etl.GEO_LEVEL == ValidGeoLevel.CENSUS_TRACT
+ assert etl.COLUMNS_TO_KEEP == [
+ etl.GEOID_TRACT_FIELD_NAME,
+ etl.AML_BOOLEAN,
+ ]
+
+ def test_get_output_file_path(self, mock_etl, mock_paths):
+ """Tests the right file name is returned."""
+ etl = self._ETL_CLASS()
+ data_path, tmp_path = mock_paths
+
+ output_file_path = etl._get_output_file_path()
+ expected_output_file_path = (
+ data_path / "dataset" / self._ETL_CLASS.NAME / "usa.csv"
+ )
+ assert output_file_path == expected_output_file_path
+
+ def test_fixtures_contain_shared_tract_ids_base(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ return super().test_fixtures_contain_shared_tract_ids_base(
+ mock_etl, mock_paths
+ )
+
+ def test_transform_base(self, snapshot, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ super().test_transform_base(
+ snapshot=snapshot, mock_etl=mock_etl, mock_paths=mock_paths
+ )
+
+ def test_transform_sets_output_df_base(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ super().test_transform_sets_output_df_base(
+ mock_etl=mock_etl, mock_paths=mock_paths
+ )
+
+ def test_validate_base(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ super().test_validate_base(mock_etl=mock_etl, mock_paths=mock_paths)
+
+ def test_full_etl_base(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ return super().test_full_etl_base(mock_etl, mock_paths)
+
+ def test_get_data_frame_base(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ return super().test_get_data_frame_base(mock_etl, mock_paths)
+
+ def test_tracts_without_fuds_not_in_results(self, mock_etl, mock_paths):
+ with mock.patch(
+ "data_pipeline.etl.sources.eamlis.etl.add_tracts_for_geometries",
+ new=_fake_add_tracts_for_geometries,
+ ):
+ etl = self._setup_etl_instance_and_run_extract(
+ mock_etl=mock_etl, mock_paths=mock_paths
+ )
+ etl.transform()
+ etl.validate()
+ etl.load()
+ df = etl.get_data_frame()
+ assert len(df[etl.GEOID_TRACT_FIELD_NAME]) == len(
+ self._FIXTURES_SHARED_TRACT_IDS
+ )