Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few fixes for some issues #244

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions R/dl.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,26 @@ dl_stats19 = function(year = NULL,
ask = FALSE,
silent = FALSE,
timeout = 600) {

# download what the user wanted
current_timeout = getOption("timeout")
if (current_timeout < timeout) {
options(timeout = timeout)
on.exit(options(timeout = current_timeout))
}
## generate file name if one is not specified
if (is.null(file_name)) {
fnames = find_file_name(years = year, type = type)
nfiles_found = length(fnames)
## test for multliple files
many_found = nfiles_found > 1
## decide which one to import, as the escooters file seems to be unique for 2020 it defaults to the file names that matches 2017-2022
if (many_found) {
if (interactive()) {
if (isTRUE(ask)) {
fnames = select_file(fnames)
} else {
if (isFALSE(silent)) {
message("More than one file found, selecting the first.")
}
fnames = fnames[1]
message(paste0("More than one file found, selecting ", fnames[2], " ignoring ", fnames[-2]))
fnames = fnames[2]
}
}
zip_url = get_url(fnames)
Expand Down Expand Up @@ -96,11 +99,11 @@ dl_stats19 = function(year = NULL,
resp = ""
}
if (resp != "" &
!grepl(
pattern = "yes|y",
x = resp,
ignore.case = TRUE
)) {
!grepl(
pattern = "yes|y",
x = resp,
ignore.case = TRUE
)) {
stop("Stopping as requested", call. = FALSE)
}
}
Expand All @@ -114,6 +117,8 @@ dl_stats19 = function(year = NULL,
if (isFALSE(silent)) {
message("Data saved at ", destfile)
}
return(NULL)

}

return(destfile)
}
79 changes: 53 additions & 26 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,8 @@
#' @examples
#' \donttest{
#' if(curl::has_internet()) {
#' col = get_stats19(year = 2022, type = "collision")
#' cas2 = get_stats19(year = 2022, type = "casualty")
#' veh = get_stats19(year = 2022, type = "vehicle")
#' class(col)
#' x = get_stats19(2022, silent = TRUE, format = TRUE)
#' class(x)
#' # data.frame output
#' x = get_stats19(2022, silent = TRUE, output_format = "data.frame")
#' class(x)
Expand Down Expand Up @@ -90,6 +88,7 @@
#' }
#' }
#' }

get_stats19 = function(year = NULL,
type = "collision",
data_dir = get_data_directory(),
Expand All @@ -99,9 +98,9 @@ get_stats19 = function(year = NULL,
silent = FALSE,
output_format = "tibble",
...) {
# Set type to "collision" if it's "accident" or similar:
if (grepl("acc", x = type, ignore.case = TRUE)) {
type = "collision"

if(!exists("type")) {
stop("Type is required", call. = FALSE)
}
if (!output_format %in% c("tibble", "data.frame", "sf", "ppp")) {
warning(
Expand All @@ -113,7 +112,7 @@ get_stats19 = function(year = NULL,
)
output_format = "tibble"
}
if (grepl(type, "casualties", ignore.case = TRUE) && output_format %in% c("sf", "ppp")) {
if (grepl("casualties", type, ignore.case = TRUE) && output_format %in% c("sf", "ppp")) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (grepl("casualties", type, ignore.case = TRUE) && output_format %in% c("sf", "ppp")) {
if (grepl("cas", type, ignore.case = TRUE) && output_format %in% c("sf", "ppp")) {

And save for "veh" and "acc|col" so more things match.

warning(
"You cannot select output_format = 'sf' or output_format = 'ppp' when type = 'casualties'.\n",
"Casualties do not have a spatial dimension.\n",
Expand All @@ -125,31 +124,37 @@ get_stats19 = function(year = NULL,
}

# download what the user wanted
dl_stats19(year = year,
# this is saved in the directory defined by data_dir
file_path <- dl_stats19(year = year,
type = type,
data_dir = data_dir,
file_name = file_name,
ask = ask,
silent = silent)

## read in file
ve = read_ve_ca(path = file_path)
## read in set to NULL
read_in = NULL
# read in
if(grepl("veh", x = type, ignore.case = TRUE)){
read_in = read_vehicles(
year = year,
data_dir = data_dir,
format = format)
} else if(grepl("cas", x = type, ignore.case = TRUE)) {
read_in = read_casualties(
year = year,
data_dir = data_dir,
format = format)
# read in from the file path defined above
if(grepl("vehicles", type, ignore.case = TRUE)){
if(format) {
read_in = format_vehicles(ve)
} else {
read_in = ve
}
} else if(grepl("casualty", type, ignore.case = TRUE)) {
if(format) {
read_in = format_casualties(ve)
} else {
read_in = ve
}
} else { # inline with type = "collision" by default
read_in = read_collisions(
year = year,
data_dir = data_dir,
format = format,
silent = silent)
}
if(format) {
read_in = format_collisions(ve)
} else {
read_in = ve
}

# transform read_in into the desired format
if (output_format != "tibble") {
Expand All @@ -162,5 +167,27 @@ get_stats19 = function(year = NULL,
}

read_in
}

}

#' Get data download dir
#' @examples
#' # get_data_directory()
get_data_directory = function() {
data_directory = Sys.getenv("STATS19_DOWNLOAD_DIRECTORY")
if(data_directory != "") {
return(data_directory)
}
tempdir()
}

#' Set data download dir
#'
#' Handy function to manage `stats19` package underlying environment
#' variable. If run interactively it makes sure user does not change
#' directory by mistatke.
#'
#' @param data_path valid existing path to save downloaded files in.
#' @examples
#' # set_data_directory("MY_PATH")
33 changes: 13 additions & 20 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
get_url = function(file_name = "",
domain = "https://data.dft.gov.uk",
directory = "road-accidents-safety-data"
) {
) {
path = file.path(domain, directory, file_name)
path
}
Expand All @@ -43,11 +43,12 @@ current_year = function() as.integer(format(format(Sys.Date(), "%Y")))
find_file_name = function(years = NULL, type = NULL) {
result = unlist(stats19::file_names, use.names = FALSE)
if(!is.null(years)) {
if(min(years) >= 2016) {
result = result[!grepl(pattern = "1979", x = result)]
if(min(years) >= 2018) {
result = result[grepl(pattern = years, x = result)]
}
if(min(years) <= 2017) {
result = result[!grepl(pattern = "adjust", x = result)]
result = result[grepl(pattern = years, x = result)]
result = result[grepl(pattern = "1979", x = result)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

}

# see https://github.com/ITSLeeds/stats19/issues/21
Expand All @@ -69,6 +70,7 @@ find_file_name = function(years = NULL, type = NULL) {
message("No files found. Check the stats19 website on data.gov.uk")
}
unique(result)
}
}

#' Locate a file on disk
Expand Down Expand Up @@ -97,11 +99,12 @@ locate_files = function(data_dir = get_data_directory(),
file_names = tools::file_path_sans_ext(file_names)
dir_files = list.dirs(data_dir)
# check is any file names match those on disk
files_on_disk = vapply(file_names, function(i) any(grepl(i, dir_files)),
logical(1))
if(any(files_on_disk)) { # return those on disk which match file names
files_on_disk = names(files_on_disk[files_on_disk])
}
files_on_disk <- list.files(dir_files, pattern = file_names, full.names = TRUE)
# files_on_disk = vapply(file_names, function(i) any(grepl(i, dir_files)),
# logical(1))
# if(any(files_on_disk)) { # return those on disk which match file names
# files_on_disk = names(files_on_disk[files_on_disk])
# }
return(files_on_disk)
}

Expand Down Expand Up @@ -152,6 +155,7 @@ locate_one_file = function(filename = NULL,
return("More than one csv file found.")
return(res)
}

utils::globalVariables(
c("stats19_variables", "stats19_schema", "skip", "accidents_sample",
"accidents_sample_raw", "casualties_sample", "casualties_sample_raw",
Expand Down Expand Up @@ -183,17 +187,6 @@ select_file = function(fnames) {
fnames[selection]
}

#' Get data download dir
#' @examples
#' # get_data_directory()
get_data_directory = function() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove this? It's needed in other functions.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I meant to move to get.R (as it took me a while to find where it was).
Have added to get.R

data_directory = Sys.getenv("STATS19_DOWNLOAD_DIRECTORY")
if(data_directory != "") {
return(data_directory)
}
tempdir()
}

#' Set data download dir
#'
#' Handy function to manage `stats19` package underlying environment
Expand Down
Loading