Skip to content

Commit

Permalink
process-documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
aseemdeodhar committed Jul 15, 2022
1 parent 2d65208 commit 7128868
Show file tree
Hide file tree
Showing 15 changed files with 161 additions and 91 deletions.
154 changes: 77 additions & 77 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,80 +1,3 @@
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>% view()
acs_name <- function(acs_year, acs_geoid){
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename(acs_geoid = GEOID,
muni_name = NAME)
}
acs_name(acs_year = 2020, acs_geoid = cosub_5y20)
acs_name <- function(acs_year, acs_geoid){
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename({{acs_geoid}} = GEOID,
muni_name = NAME)
}
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename(acs_geoid := GEOID,
muni_name = NAME)
acs_name <- function(acs_year, acs_geoid){
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename(acs_geoid := GEOID,
muni_name = NAME)
}
acs_name(acs_year = 2020, acs_geoid = cosub_5y20)
acs_name <- function(acs_year, acs_geoid){
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename({{acs_geoid}} = GEOID,
muni_name = NAME)
}
acs_name <- function(acs_year, acs_geoid){
tidycensus::get_acs(geography = "county subdivision",
table = "B01001",
year = acs_year,
state = 25,
cache_table = TRUE) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
rename({{acs_geoid}} = GEOID,
muni_name = NAME)
}
Expand Down Expand Up @@ -510,3 +433,80 @@ devtools::install()
mapcdatakeys::all_muni_data_keys
devtools::document()
devtools::install()
data-raw/2010_block_to_geo_crosswalk.csv
read_csv("data-raw/2010_block_to_geo_crosswalk.csv")
library(tidyverse)
read_csv("data-raw/2010_block_to_geo_crosswalk.csv")
read_csv("data-raw/2020_block_to_geo_crosswalk.csv")
load("D:/Work/00_MAPC/mapcdatakeys/data/geog_xw_2010.rda")
geog_xw_2010 %>% write_csv("data-raw/2010_block_to_geo_crosswalk.csv")
geog_xw_2010 <- read_csv("data-raw/2010_block_to_geo_crosswalk.csv")
library(tidyverse)
acs_name <- function(acs_year){
tidycensus::get_acs(geography = "county subdivision", table = "B01001", year = acs_year, state = 25) %>%
filter(variable == "B01001_001", !str_detect(NAME, "County subdivisions not defined")) %>%
mutate(NAME = gsub(pattern = " Town.*", "", NAME),
NAME = gsub(pattern = " city.*", "", NAME),
NAME = gsub(pattern = " town.*", "", NAME)) %>%
select(GEOID, NAME) %>% rename(muni_name = NAME) %>%
rename_with(., .cols = GEOID, .fn = function(x){paste0('cosub_5y', str_sub(as.character(acs_year),
start = 3, end = 4))})}
acs_name(acs_year = 2020)
all_muni_data_keys <- read_csv("data-raw/muni_data_keys_new.csv") %>%
left_join(acs_name(acs_year = 2020)) %>%
select(muni_id, muni_name, cosub_5y20, starts_with("cosub_5y"), sort(tidyselect::peek_vars())) %>% arrange(muni_id)
census_muni_keys <- all_muni_data_keys %>% select(muni_id, muni_name, starts_with("cosub"))
community_type <- all_muni_data_keys %>% select(muni_id, muni_name, cmtyp08_id, cmtyp08, cmsbt08_id, cmsbt08)
rpa_data_keys <- all_muni_data_keys %>% select(muni_id, muni_name, rpa_id, rpa_acr, rpa_name, rpa_alt)
mapc_data_keys <- all_muni_data_keys %>% select(muni_id, muni_name, mapc, mmc, nsc, subrg_id, subrg_nm, subrg_acr, subrg_alt)
mbta_data_keys <- all_muni_data_keys %>% select(muni_id, muni_name, rta_acr, rta_name, mbta, mbta14, mbta51, mbta_other, mbta_cmtyp)
# 2010 Geographies
census_xw_bl10 <- read_csv("data-raw/bl10_2020xw.csv") %>%
select(!c(ct_area, weight)) %>% rename(bl10_id = geoid10) %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
census_xw_bg10 <- read_csv("data-raw/bg10_2020xw.csv") %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
census_xw_ct10 <- read_csv("data-raw/ct10_2020xw.csv") %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
census_xw_mu <- read_csv("data-raw/muni_2020xw.csv") %>% select(!seq_id) %>%
janitor::clean_names() %>%
rename(muni_name = basename,
cosub_cn10 = geoid_2010,
cosub_cn20 = geoid_2020,
huch1020p = huch1020pc)
# 2020 Geographies
census_xw_bl20 <- read_csv("data-raw/bl20_2010xw.csv") %>%
select(!c(ct_area, weight)) %>% rename(bl20_id = geoid20) %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
census_xw_bg20 <- read_csv("data-raw/bg20_2010xw.csv") %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
census_xw_ct20 <- read_csv("data-raw/ct20_2010xw.csv") %>%
rename(huch1020p = hu_chng_p,
popch1020p = pop_chng_p,
huch1020 = hu_chng,
popch1020 = pop_chng)
geog_xw_2010 <- read_csv("data-raw/2010_block_to_geo_crosswalk.csv")
geog_xw_2020 <- read_csv("data-raw/2020_block_to_geo_crosswalk.csv")
usethis::use_data(census_muni_keys, all_muni_data_keys, community_type, rpa_data_keys, mapc_data_keys, mbta_data_keys,
census_xw_bl10, census_xw_bg10, census_xw_ct10,
census_xw_bl20, census_xw_bg20, census_xw_ct20,
geog_xw_2010, geog_xw_2020, census_xw_mu,
overwrite = TRUE)
devtools::document()
devtools::install()
mapcdatakeys::geog_xw_2010
2 changes: 1 addition & 1 deletion .Rproj.user/1D9B2893/pcs/files-pane.pper
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
"ascending": false
}
],
"path": "D:/Work/00_MAPC/mapcdatakeys"
"path": "D:/Work/00_MAPC/mapcdatakeys/data-raw"
}
2 changes: 1 addition & 1 deletion .Rproj.user/1D9B2893/pcs/source-pane.pper
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"activeTab": 12,
"activeTab": 2,
"activeTabSourceWindow0": 0
}
2 changes: 1 addition & 1 deletion .Rproj.user/1D9B2893/pcs/windowlayoutstate.pper
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"left": {
"splitterpos": 428,
"splitterpos": 308,
"topwindowstate": "NORMAL",
"panelheight": 870,
"windowheight": 908
Expand Down
3 changes: 2 additions & 1 deletion .Rproj.user/1D9B2893/sources/prop/6AAAACD7
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
"source_window_id": "",
"Source": "Source",
"cursorPosition": "9,0",
"scrollLine": "0"
"scrollLine": "0",
"docOutlineVisible": "0"
}
4 changes: 2 additions & 2 deletions .Rproj.user/1D9B2893/sources/prop/84450EC6
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"tempName": "Untitled1",
"source_window_id": "",
"Source": "Source",
"cursorPosition": "70,28",
"scrollLine": "55",
"cursorPosition": "22,75",
"scrollLine": "0",
"useRainbowParens": "true"
}
4 changes: 2 additions & 2 deletions .Rproj.user/1D9B2893/sources/prop/ACF584DB
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"source_window_id": "",
"Source": "Source",
"cursorPosition": "32,37",
"scrollLine": "20"
"cursorPosition": "16,0",
"scrollLine": "0"
}
26 changes: 20 additions & 6 deletions data-raw/muni_data_keys.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
library(tidyverse)

# Use this function to get the 'latest' ACS Municipal GEOID (aka cosub id) for the given new year.
# Append it as the years add up after the one already used in the definition of the *all_muni_data_keys*
# essentially, newer values will be all left_joined on the muni_name variable such as:
# left_join(acs_name(acs_year = 2020)) %>%
# left_join(acs_name(acs_year = 2021)) %>%
# left_join(acs_name(acs_year = 2022)) etc etc

# Run this entire script before moving on to next steps as listed in the attached .txt file.

acs_name <- function(acs_year){
tidycensus::get_acs(geography = "county subdivision", table = "B01001", year = acs_year, state = 25) %>%
Expand All @@ -10,14 +18,18 @@ acs_name <- function(acs_year){
select(GEOID, NAME) %>% rename(muni_name = NAME) %>%
rename_with(., .cols = GEOID, .fn = function(x){paste0('cosub_5y', str_sub(as.character(acs_year),
start = 3, end = 4))})}
acs_name(acs_year = 2020)



all_muni_data_keys <- read_csv("data-raw/muni_data_keys_new.csv") %>%
# All Muni Data Keys ------------------------------------------------------
# Parent Table
all_muni_data_keys <-
read_csv("data-raw/muni_data_keys_new.csv") %>%
left_join(acs_name(acs_year = 2020)) %>%
select(muni_id, muni_name, cosub_5y20, starts_with("cosub_5y"), sort(tidyselect::peek_vars())) %>% arrange(muni_id)
#left_join(acs_name(acs_year = 2021)) %>%
select(muni_id, muni_name, starts_with("cosub_5y"), sort(tidyselect::peek_vars())) %>% arrange(muni_id) %>%
mutate(across(.fns = as.character))

# Subset Tables which are collections of similarly linked variables
census_muni_keys <- all_muni_data_keys %>% select(muni_id, muni_name, starts_with("cosub"))

community_type <- all_muni_data_keys %>% select(muni_id, muni_name, cmtyp08_id, cmtyp08, cmsbt08_id, cmsbt08)
Expand All @@ -29,7 +41,7 @@ mapc_data_keys <- all_muni_data_keys %>% select(muni_id, muni_name, mapc, mmc, n
mbta_data_keys <- all_muni_data_keys %>% select(muni_id, muni_name, rta_acr, rta_name, mbta, mbta14, mbta51, mbta_other, mbta_cmtyp)


# 2010 Geographies
# 2010 Geographies ----
census_xw_bl10 <- read_csv("data-raw/bl10_2020xw.csv") %>%
select(!c(ct_area, weight)) %>% rename(bl10_id = geoid10) %>%
rename(huch1020p = hu_chng_p,
Expand All @@ -53,7 +65,7 @@ census_xw_mu <- read_csv("data-raw/muni_2020xw.csv") %>% select(!seq_id) %>%
cosub_cn20 = geoid_2020,
huch1020p = huch1020pc)

# 2020 Geographies
# 2020 Geographies ----
census_xw_bl20 <- read_csv("data-raw/bl20_2010xw.csv") %>%
select(!c(ct_area, weight)) %>% rename(bl20_id = geoid20) %>%
rename(huch1020p = hu_chng_p,
Expand All @@ -71,10 +83,12 @@ census_xw_ct20 <- read_csv("data-raw/ct20_2010xw.csv") %>%
huch1020 = hu_chng,
popch1020 = pop_chng)

# Geographic Crosswalks ----
geog_xw_2010 <- read_csv("data-raw/2010_block_to_geo_crosswalk.csv")
geog_xw_2020 <- read_csv("data-raw/2020_block_to_geo_crosswalk.csv")


# If you define any new or additional tables, please add them to the list below.
usethis::use_data(census_muni_keys, all_muni_data_keys, community_type, rpa_data_keys, mapc_data_keys, mbta_data_keys,
census_xw_bl10, census_xw_bg10, census_xw_ct10,
census_xw_bl20, census_xw_bg20, census_xw_ct20,
Expand Down
Binary file modified data/all_muni_data_keys.rda
Binary file not shown.
Binary file modified data/census_muni_keys.rda
Binary file not shown.
Binary file modified data/community_type.rda
Binary file not shown.
Binary file modified data/mapc_data_keys.rda
Binary file not shown.
Binary file modified data/mbta_data_keys.rda
Binary file not shown.
Binary file modified data/rpa_data_keys.rda
Binary file not shown.
55 changes: 55 additions & 0 deletions keys_process_documentation.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
Creating and Updating {mapcdatakeys} package

2022-07-13

Aseem Deodhar

Introduction
The {mapcdatakeys} package contains various municipal and census geography level keys, creating an official standardized format which is easily accessible without depending on static files. This document details out the process required for generating the annual update that municipal level GEOIDs *may* have with the release of the ACS every year.

While general package development requires a lot of steps and checks, this document will only highlight the steps required for this annual update. Please refer to chapter 12 of the rstudio4edu book developed by Alison Hill and Desiree De Leon for a detailed description on developing and maintaining data packages for R: https://rstudio4edu.github.io/rstudio4edu-book/data-pkg.html

Step 0: Required packages:

Make sure the following packages are installed in your R environment before proceeding:

#install.packages(c("usethis", "devtools", "roxygen2"))
library(usethis)
library(roxygen2)
library(devtools)


Step 1: Generate New Data
The RScript file called muni_data_keys.R in the data-raw folder creates the various data-tables we have in our package. It has instructions on how to edit and run it to add the latest annual municipal geoid values.

Run this entire file and follow instructions if you want to add or make any changes.

Step 2: Check for Errors and Warnings:
Run the following code in the Console:

devtools::check(document=FALSE)

This will take a bit of time. If there are no errors or warnings, proceed. You may get some notes, but you can ignore that. If there are any errors or warnings, check the message and rectify. Repeat until you get no error or warning messages.

Step 3: Documentation

Unless you're creating a new package, this step is not necessary.

Step 4: Installation

This step installs the package in your system.
Run the following code in the Console:

devtools::install()

You can test it out by calling mapcdatakeys::all_muni_data_keys and check that the latest added column is present.

Step 5: Github

Push to the MAPC/mapcdatakeys Github repo!

Step 6: Remote Installation
To install this package on any system, run the following code in that system:

# install.packages("remotes")
remotes::install_github("rstudio4edu/testpackage")

0 comments on commit 7128868

Please sign in to comment.