-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path02_used_addresses.R
118 lines (94 loc) · 4.14 KB
/
02_used_addresses.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#########################################################################
# Name of file - 02_used_addresses.R
#
# Type - Reproducible Analytical Pipeline (RAP)
# Written/run on - RStudio Desktop
# Version of R - 4.2.2
#
# Description - Imports all addresses sampled in the last four years
# and exports them in a single file.
#########################################################################
# clear environment
rm(list=ls())
### 0 - Setup ----
# Run setup script which loads all required packages and functions and
# executes the config.R script.
source(here::here("scripts", "00_setup.R"))
# Add message to inform user about progress
message(title("Execute used addresses script"))
### 1 - Import files ----
# Add message to inform user about progress
message(normal("Import data"))
# initiate list for previous samples
list_used_addresses <- list()
# Import all previously sampled address files and
# add column with file name
if(exists("previous.sas.samples.path") == TRUE){
prev.sas.samples <-
do.call(rbind, pblapply(seq_along(previous.sas.samples.path),
function(x)
transform(
haven::read_sas(previous.sas.samples.path[x],
col_select = "UDPRN"),
filename = previous.sas.samples.path[x]))) %>%
clean_names_modified()
list_used_addresses <- c(list_used_addresses, list(prev.sas.samples))
}
# import overwritten SAS files
if(exists("prev.csv") == TRUE){
prev.csv.samples <- pblapply(prev.csv, import_multiple_files_csv)
prev.csv.samples <- do.call("rbind", prev.csv.samples)
list_used_addresses <- c(list_used_addresses, list(prev.csv.samples))
}
# import rds files (i.e., samples drawn with RAP)
if(exists("previous.rap.samples.path") == TRUE){
files_prev_rap <- list.files(path = datashare.path,
pattern = capture.output(cat(previous.rap.samples.path, sep = "|")),
full.names = TRUE,
recursive = TRUE,
ignore.case = TRUE)
# import previously addresses that were sampled with RAP
prev.rap.samples.data <- do.call(rbind, pblapply(seq_along(files_prev_rap),
function(x)
transform(
readRDS(files_prev_rap[x]),
filename = files_prev_rap[x]))) %>%
select(udprn, filename)
list_used_addresses <- c(list_used_addresses, list(prev.rap.samples.data))
}
# combine udprns from all sources
usedaddresses <- do.call('rbind', list_used_addresses)
### 2 - Process data ----
# Add message to inform user about progress
message(normal("Process data"))
# Mutate file name to name of survey and clean names
usedaddresses <- usedaddresses %>%
mutate(survey = case_when(str_detect(filename,
regex("scjs",
ignore_case = TRUE)) ~ "scjs",
str_detect(filename,
regex("shes",
ignore_case = TRUE)) ~ "shes",
str_detect(filename,
regex("shs",
ignore_case = TRUE)) ~ "shs",
str_detect(filename,
regex("ssas",
ignore_case = TRUE)) ~ "ssas"))
# Keep unique UDPRN and survey combination and select required variables
usedaddresses <- usedaddresses %>%
group_by(udprn, survey) %>%
count() %>%
select(udprn, survey)
### 3 - Export used addresses ----
# Add message to inform user about progress
message(normal("Export used addresses"))
# Code to export used addresses into lookups folder
write_rds(
usedaddresses,
here("lookups", paste0(Sys.Date(), "_usedaddresses.rds")),
compress = "gz"
)
### END OF SCRIPT ####
# clear environment
rm(list=ls())