Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

changes relating to issue 237 #238

Closed
wants to merge 13 commits into from
Closed
26 changes: 15 additions & 11 deletions R/dl.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,25 @@ dl_stats19 = function(year = NULL,
ask = FALSE,
silent = FALSE,
timeout = 600) {
# download what the user wanted
current_timeout = getOption("timeout")
if (current_timeout < timeout) {
options(timeout = timeout)
on.exit(options(timeout = current_timeout))
}
## generate file name if one is not specified
if (is.null(file_name)) {
fnames = find_file_name(years = year, type = type)
nfiles_found = length(fnames)
## test for multliple files
many_found = nfiles_found > 1
## decide which one to import, as the escooters file seems to be unique for 2020 it defaults to the file names that matches 2017-2022
if (many_found) {
if (interactive()) {
if (isTRUE(ask)) {
fnames = select_file(fnames)
} else {
if (isFALSE(silent)) {
message("More than one file found, selecting the first.")
}
fnames = fnames[1]
message(paste0("More than one file found, selecting ", fnames[2], " ignoring ", fnames[-2]))
fnames = fnames[2]
}
}
zip_url = get_url(fnames)
Expand Down Expand Up @@ -96,11 +98,11 @@ dl_stats19 = function(year = NULL,
resp = ""
}
if (resp != "" &
!grepl(
pattern = "yes|y",
x = resp,
ignore.case = TRUE
)) {
!grepl(
pattern = "yes|y",
x = resp,
ignore.case = TRUE
)) {
stop("Stopping as requested", call. = FALSE)
}
}
Expand All @@ -114,6 +116,8 @@ dl_stats19 = function(year = NULL,
if (isFALSE(silent)) {
message("Data saved at ", destfile)
}
return(NULL)

}

return(destfile)
}
73 changes: 57 additions & 16 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
#' }
#' }
#' }

get_stats19 = function(year = NULL,
type = "collision",
data_dir = get_data_directory(),
Expand All @@ -97,6 +98,7 @@ get_stats19 = function(year = NULL,
silent = FALSE,
output_format = "tibble",
...) {

if(!exists("type")) {
stop("Type is required", call. = FALSE)
}
Expand All @@ -122,31 +124,35 @@ get_stats19 = function(year = NULL,
}

# download what the user wanted
dl_stats19(year = year,
# this is saved in the directory defined by data_dir
file_path <- dl_stats19(year = year,
type = type,
data_dir = data_dir,
file_name = file_name,
ask = ask,
silent = silent)

# make sure read_in doesn't have any left over variables
read_in = NULL
# read in
# read in from the file path defined above
if(grepl(type, "vehicles", ignore.case = TRUE)){
read_in = read_vehicles(
year = year,
data_dir = data_dir,
format = format)
if(format) {
ve = format_vehicles(ve)
} else {
ve
}
} else if(grepl(type, "casualty", ignore.case = TRUE)) {
read_in = read_casualties(
year = year,
data_dir = data_dir,
format = format)
if(format) {
ve = format_casualties(ve)
} else {
ve
}
} else { # inline with type = "collision" by default
read_in = read_collisions(
year = year,
data_dir = data_dir,
format = format,
silent = silent)
}
if(format) {
ve = format_collisions(ve)
} else {
ve
}

# transform read_in into the desired format
if (output_format != "tibble") {
Expand All @@ -159,5 +165,40 @@ get_stats19 = function(year = NULL,
}

read_in
}

}

#' Convert file names to urls
#'
#' @details
#' This function returns urls that allow data to be downloaded from the pages:
#'
#' https://data.dft.gov.uk/road-accidents-safety-data/RoadSafetyData_2015.zip
#'
#' Last updated: October 2020.
#' Files available from the s3 url in the default `domain` argument.
#'
#' @param file_name Optional file name to add to the url returned (empty by default)
#' @param domain The domain from where the data will be downloaded
#' @param directory The subdirectory of the url
#' @examples
#' # get_url(find_file_name(1985))
get_url = function(file_name = "",
domain = "https://data.dft.gov.uk",
directory = "road-accidents-safety-data"
) {
path = file.path(domain, directory, file_name)
path
}

#' Get data download dir
#' @examples
#' # get_data_directory()
get_data_directory = function() {
data_directory = Sys.getenv("STATS19_DOWNLOAD_DIRECTORY")
if(data_directory != "") {
return(data_directory)
}
tempdir()
}
48 changes: 9 additions & 39 deletions R/utils.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,4 @@
#' Convert file names to urls
#'
#' @details
#' This function returns urls that allow data to be downloaded from the pages:
#'
#' https://data.dft.gov.uk/road-accidents-safety-data/RoadSafetyData_2015.zip
#'
#' Last updated: October 2020.
#' Files available from the s3 url in the default `domain` argument.
#'
#' @param file_name Optional file name to add to the url returned (empty by default)
#' @param domain The domain from where the data will be downloaded
#' @param directory The subdirectory of the url
#' @examples
#' # get_url(find_file_name(1985))
get_url = function(file_name = "",
domain = "https://data.dft.gov.uk",
directory = "road-accidents-safety-data"
) {
path = file.path(domain, directory, file_name)
path
}


# current_year()
current_year = function() as.integer(format(format(Sys.Date(), "%Y")))
Expand Down Expand Up @@ -47,7 +26,7 @@ find_file_name = function(years = NULL, type = NULL) {
result = result[!grepl(pattern = "1979", x = result)]
}
result = result[!grepl(pattern = "adjust", x = result)]
result = result[grepl(pattern = years, x = result)]
result = result[grepl(pattern = "1979", x = result)]
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to that, although will it still work for non 1979 (all) years? I guess so but would need to test..


# see https://github.com/ITSLeeds/stats19/issues/21
Expand Down Expand Up @@ -97,11 +76,12 @@ locate_files = function(data_dir = get_data_directory(),
file_names = tools::file_path_sans_ext(file_names)
dir_files = list.dirs(data_dir)
# check is any file names match those on disk
files_on_disk = vapply(file_names, function(i) any(grepl(i, dir_files)),
logical(1))
if(any(files_on_disk)) { # return those on disk which match file names
files_on_disk = names(files_on_disk[files_on_disk])
}
files_on_disk <- list.files(dir_files, pattern = file_names, full.names = TRUE)
# files_on_disk = vapply(file_names, function(i) any(grepl(i, dir_files)),
# logical(1))
# if(any(files_on_disk)) { # return those on disk which match file names
# files_on_disk = names(files_on_disk[files_on_disk])
# }
return(files_on_disk)
}

Expand Down Expand Up @@ -152,6 +132,7 @@ locate_one_file = function(filename = NULL,
return("More than one csv file found.")
return(res)
}

utils::globalVariables(
c("stats19_variables", "stats19_schema", "skip", "accidents_sample",
"accidents_sample_raw", "casualties_sample", "casualties_sample_raw",
Expand Down Expand Up @@ -183,17 +164,6 @@ select_file = function(fnames) {
fnames[selection]
}

#' Get data download dir
#' @examples
#' # get_data_directory()
get_data_directory = function() {
data_directory = Sys.getenv("STATS19_DOWNLOAD_DIRECTORY")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 to moving everything into a more accessible place.

if(data_directory != "") {
return(data_directory)
}
tempdir()
}

#' Set data download dir
#'
#' Handy function to manage `stats19` package underlying environment
Expand Down
Loading