From ddb331d7b7b977330da7ef3ad6446db9fa6e0d4a Mon Sep 17 00:00:00 2001 From: chinandrew Date: Wed, 14 Oct 2020 17:09:03 -0700 Subject: [PATCH 1/3] Refactor to use GeoMapper for fips to state --- safegraph/delphi_safegraph/__init__.py | 1 - safegraph/delphi_safegraph/geo.py | 64 -------------------------- safegraph/delphi_safegraph/process.py | 13 ++++-- 3 files changed, 10 insertions(+), 68 deletions(-) delete mode 100644 safegraph/delphi_safegraph/geo.py diff --git a/safegraph/delphi_safegraph/__init__.py b/safegraph/delphi_safegraph/__init__.py index fc2d5818d..70cd6a34d 100644 --- a/safegraph/delphi_safegraph/__init__.py +++ b/safegraph/delphi_safegraph/__init__.py @@ -8,5 +8,4 @@ from __future__ import absolute_import -from . import geo from . import process diff --git a/safegraph/delphi_safegraph/geo.py b/safegraph/delphi_safegraph/geo.py deleted file mode 100644 index 67969630c..000000000 --- a/safegraph/delphi_safegraph/geo.py +++ /dev/null @@ -1,64 +0,0 @@ -# -*- coding: utf-8 -*- - -# https://code.activestate.com/recipes/577775-state-fips-codes-dict/ -STATE_TO_FIPS = { - "AS": "60", # American Samoa - "GU": "66", # Guam - "MP": "69", # Northern Mariana Islands - "VI": "78", # Virgin Islands - "WA": "53", - "DE": "10", - "DC": "11", - "WI": "55", - "WV": "54", - "HI": "15", - "FL": "12", - "WY": "56", - "PR": "72", - "NJ": "34", - "NM": "35", - "TX": "48", - "LA": "22", - "NC": "37", - "ND": "38", - "NE": "31", - "TN": "47", - "NY": "36", - "PA": "42", - "AK": "02", - "NV": "32", - "NH": "33", - "VA": "51", - "CO": "08", - "CA": "06", - "AL": "01", - "AR": "05", - "VT": "50", - "IL": "17", - "GA": "13", - "IN": "18", - "IA": "19", - "MA": "25", - "AZ": "04", - "ID": "16", - "CT": "09", - "ME": "23", - "MD": "24", - "OK": "40", - "OH": "39", - "UT": "49", - "MO": "29", - "MN": "27", - "MI": "26", - "RI": "44", - "KS": "20", - "MT": "30", - "MS": "28", - "SC": "45", - "KY": "21", - "OR": "41", - "SD": "46", -} - -FIPS_TO_STATE = {v: k.lower() for k, v in STATE_TO_FIPS.items()} - diff --git a/safegraph/delphi_safegraph/process.py b/safegraph/delphi_safegraph/process.py index 0da4be880..235b1d7d5 100644 --- a/safegraph/delphi_safegraph/process.py +++ b/safegraph/delphi_safegraph/process.py @@ -2,8 +2,9 @@ import numpy as np import pandas as pd +from delphi_utils import GeoMapper + from .constants import HOME_DWELL, COMPLETELY_HOME, FULL_TIME_WORK, PART_TIME_WORK -from .geo import FIPS_TO_STATE # Magic number for modular arithmetic; CBG -> FIPS MOD = 10000000 @@ -132,11 +133,17 @@ def aggregate(df, signal_names, geo_resolution='county'): ''' # Prepare geo resolution GEO_RESOLUTION = ('county', 'state') + if geo_resolution == 'county': df['geo_id'] = df['county_fips'] elif geo_resolution == 'state': - df['geo_id'] = df['county_fips'].apply(lambda x: - FIPS_TO_STATE[x[:2]]) + gmpr = GeoMapper() + df = gmpr.add_geocode(df, + from_col='county_fips', + from_code='fips', + new_code='state_id', + new_col='geo_id') + else: raise ValueError(f'`geo_resolution` must be one of {GEO_RESOLUTION}.') From 0e1bd78f536b50a455d51fb3589e4e08ae8c09e1 Mon Sep 17 00:00:00 2001 From: chinandrew Date: Wed, 14 Oct 2020 17:15:52 -0700 Subject: [PATCH 2/3] Change join behavior to match original --- safegraph/delphi_safegraph/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/safegraph/delphi_safegraph/process.py b/safegraph/delphi_safegraph/process.py index 235b1d7d5..ada343af2 100644 --- a/safegraph/delphi_safegraph/process.py +++ b/safegraph/delphi_safegraph/process.py @@ -142,8 +142,8 @@ def aggregate(df, signal_names, geo_resolution='county'): from_col='county_fips', from_code='fips', new_code='state_id', - new_col='geo_id') - + new_col='geo_id', + dropna=False) else: raise ValueError(f'`geo_resolution` must be one of {GEO_RESOLUTION}.') From ca7ccd0d598628aa9d41ab2807ad455febdd7664 Mon Sep 17 00:00:00 2001 From: chinandrew Date: Wed, 14 Oct 2020 17:29:17 -0700 Subject: [PATCH 3/3] Remove newline --- safegraph/delphi_safegraph/process.py | 1 - 1 file changed, 1 deletion(-) diff --git a/safegraph/delphi_safegraph/process.py b/safegraph/delphi_safegraph/process.py index ada343af2..0c09bbb9d 100644 --- a/safegraph/delphi_safegraph/process.py +++ b/safegraph/delphi_safegraph/process.py @@ -133,7 +133,6 @@ def aggregate(df, signal_names, geo_resolution='county'): ''' # Prepare geo resolution GEO_RESOLUTION = ('county', 'state') - if geo_resolution == 'county': df['geo_id'] = df['county_fips'] elif geo_resolution == 'state':