From f8f05aa593797072883305a08eb53214367e3c65 Mon Sep 17 00:00:00 2001
From: matt bowen <matthew.r.bowen@omb.eop.gov>
Date: Sun, 18 Sep 2022 23:18:26 -0400
Subject: [PATCH] Use projected CRSes, ignore geom types (#1900)

I looked into this a bit, and in general the geometry type mismatch
changes very little about the calculation; we have a mix of
multipolygons and polygons. The fastest thing to do is just not keep
geom type; I did some runs with it set to both True and False, and
they're the same within 9 digits of precision. Logically we just want to
overlaps, regardless of how the actual geometries are encoded between
the frames, so we can in this case ignore the geom types and feel OKAY.

I also moved to projected CRSes, since we are actually trying to do area
calculations and so like, we should. Again, the change is small in
magnitude but logically more sound.
---
 .../etl/sources/tribal_overlap/etl.py         | 58 +++++++++----------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
index 9653060d3..9582f5d75 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
@@ -1,8 +1,6 @@
-import warnings
 import geopandas as gpd
 import numpy as np
 import pandas as pd
-
 from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel
 from data_pipeline.etl.sources.geo_utils import (
     add_tracts_for_geometries,
@@ -114,8 +112,8 @@ def transform(self) -> None:
             f" {tribal_gdf_without_points.geom_type.unique()}"
         )
         logger.info(
-            f"Tribal without points GDF geom types: \
-            n{tribal_gdf_without_points.geom_type.value_counts()}"
+            f"Tribal without points GDF geom types: \n"
+            f"{tribal_gdf_without_points.geom_type.value_counts()}"
         )
         logger.info(
             f"Tract geom types:\n"
@@ -125,35 +123,35 @@ def transform(self) -> None:
             f"Tract geom types:\n"
             f" {self.census_tract_gdf.geom_type.value_counts()}"
         )
-        
-        # Create a measure for the entire census tract area
-        with warnings.catch_warnings():
-            # Note: geopandas will (helpfully) warn that area is not a useful metric
-            # without being converted to a CRS. However, since we are simply using a
-            # percentage, dividing two area values from the same CRS, this warning is
-            # not relevant. Therefore we suppress it.
-            warnings.filterwarnings(
-                action="ignore",
-                category=UserWarning,
-                message="Geometry is in a geographic CRS.",
-            )
 
-            self.census_tract_gdf["area_tract"] = self.census_tract_gdf.area
+        # Switch from geographic to projected CRSes
+        # because logically that's right
+        self.census_tract_gdf = self.census_tract_gdf.to_crs(crs=3857)
+        tribal_gdf_without_points = tribal_gdf_without_points.to_crs(crs=3857)
 
-            # Performing overlay funcion
-            gdf_joined = gpd.overlay(
-                self.census_tract_gdf,
-                tribal_gdf_without_points,
-                how="intersection",
-            )
-            # Calculating the areas of the newly-created overlapping geometries
-            gdf_joined["area_joined"] = gdf_joined.area
+        # Create a measure for the entire census tract area
+        self.census_tract_gdf["area_tract"] = self.census_tract_gdf.area
+
+        # Performing overlay funcion
+        # We have a mix of polygons and multipolygons, and we just want the overlaps
+        # without caring a ton about the specific types, so we ignore geom type.
+        # Realistically, this changes almost nothing in the calculation; True and False
+        # are the same within 9 digits of precision
+        gdf_joined = gpd.overlay(
+            self.census_tract_gdf,
+            tribal_gdf_without_points,
+            how="intersection",
+            keep_geom_type=False,
+        )
 
-            # Calculating the areas of the newly-created geometries in relation
-            # to the original tract geometries
-            gdf_joined[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT] = (
-                gdf_joined["area_joined"] / gdf_joined["area_tract"]
-            )
+        # Calculating the areas of the newly-created overlapping geometries
+        gdf_joined["area_joined"] = gdf_joined.area
+
+        # Calculating the areas of the newly-created geometries in relation
+        # to the original tract geometries
+        gdf_joined[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT] = (
+            gdf_joined["area_joined"] / gdf_joined["area_tract"]
+        )
 
         # Aggregate the results
         percentage_results = gdf_joined.groupby(