Merge pull request #15 from MattCowgill/dev

stable 0.0.2
MattCowgill · Oct 30, 2020 · 7528fed · 7528fed
2 parents 9bd0042 + 578cc1c
commit 7528fed
Show file tree

Hide file tree

Showing 65 changed files with 3,173 additions and 320 deletions.
diff --git a/.github/workflows/R-CMD-check-windows.yaml b/.github/workflows/R-CMD-check-windows.yaml
@@ -0,0 +1,20 @@
+# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
+# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
+on: [push, pull_request]
+
+name: R-CMD-check-windows
+
+jobs:
+  R-CMD-check:
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: r-lib/actions/setup-r@master
+      - name: Install dependencies
+        run: |
+          install.packages(c("remotes", "rcmdcheck"))
+          remotes::install_deps(dependencies = TRUE)
+        shell: Rscript {0}
+      - name: Check
+        run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
+        shell: Rscript {0}
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -1,12 +1,6 @@
 # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
-on:
-  push:
-    branches:
-      - master
-  pull_request:
-    branches:
-      - master
+on: [push, pull_request]
 
 name: R-CMD-check
 

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .Rproj.user
 .Rhistory
 .RData
+inst/doc
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: readrba
 Title: Download and Tidy Statistical Tables from the Reserve Bank of Australia
-Version: 0.0.1
+Version: 0.0.2
 Authors@R: 
     person(given = "Matt",
            family = "Cowgill",
@@ -18,19 +18,21 @@ URL: https://github.com/MattCowgill/readrba
 BugReports: https://github.com/MattCowgill/readrba/issues
 Suggests: 
     covr,
-    testthat
+    testthat,
+    knitr,
+    rmarkdown,
+    ggplot2
 Imports: 
     readxl,
-    tidyr,
+    tidyr (>= 1.0.0),
     dplyr,
     purrr,
     rlang,
     xml2,
     rvest,
-    tibble,
     stringr,
-    curl,
     httr,
     lubridate
 Depends: 
-    R (>= 2.10)
+    R (>= 3.5.0)
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(browse_rba_series)
+export(browse_rba_tables)
 export(read_rba)
+export(read_rba_local)
+export(read_rba_seriesid)
 export(tidy_rba)
 importFrom(dplyr,"%>%")
 importFrom(rlang,.data)
diff --git a/NEWS.md b/NEWS.md
@@ -0,0 +1,6 @@
+# readrba 0.0.2
+* Can now use `series_id` argument to `read_rba()` to fetch based on series ID(s)
+* Examine available RBA data using `browse_rba_series()` and `browse_rba_tables()`
+* `cur_hist = "all"` no longer allowed
+* Some non-standard tables now able to be tidied
+* Added a `NEWS.md` file to track changes to the package.
diff --git a/R/browse_data.R b/R/browse_data.R
@@ -0,0 +1,56 @@
+#' Browse available RBA data series
+#'
+#' @param search_string Word or phrase to search for, such as "gold" or "commodity" or "labour".
+#' If left as `""`, the function will return all series.
+#' @return A `data.frame` (`tbl_df`) containing RBA data series/tables that match the `search_string`.
+#'
+#' @export
+#' @examples
+#'
+#' # Find series that contain 'unemployment'
+#' browse_rba_series("unemployment")
+#'
+#' # Or all labour-related series
+#' browse_rba_series("labour")
+#'
+#' # Or those related to commodities
+#' browse_rba_series("commodities")
+#'
+#' # Or all series
+#' browse_rba_series()
+#'
+#' # Or just look for tables that contain the word 'labour'
+#' browse_rba_tables("labour")
+#'
+#' # Or all tables
+#' browse_rba_tables()
+#' @rdname browse_rba
+browse_rba_series <- function(search_string = "") {
+  do_rba_browse(
+    search_string = search_string,
+    lookup_table = series_list
+  )
+}
+
+#' @export
+#' @rdname browse_rba
+browse_rba_tables <- function(search_string = "") {
+  do_rba_browse(
+    search_string = search_string,
+    lookup_table = table_list
+  )
+}
+
+#' @noRd
+#' @keywords internal
+do_rba_browse <- function(search_string, lookup_table) {
+  row_any <- function(x) rowSums(x) > 0
+
+  dplyr::filter(
+    lookup_table,
+    row_any(dplyr::across(
+      dplyr::everything(),
+      ~ grepl(search_string, ., ignore.case = TRUE)
+    ))
+  )
+}
diff --git a/R/download_rba.R b/R/download_rba.R
@@ -20,6 +20,7 @@ download_rba <- function(urls, path = tempdir()) {
       url = urls,
       mode = "wb",
       destfile = filenames_with_path,
+      quiet = FALSE,
       method = "libcurl",
       cacheOK = FALSE
     )
@@ -29,6 +30,7 @@ download_rba <- function(urls, path = tempdir()) {
       .x = urls,
       .y = filenames,
       .f = utils::download.file,
+      quiet = FALSE,
       mode = "wb",
       cacheOK = FALSE
     )

diff --git a/R/get_rba_urls.R b/R/get_rba_urls.R
@@ -3,12 +3,15 @@
 #' @param table_no Character vector of RBA table number(s), such as `"A1"`
 #' or `c("A1", "g1")`.
 #' Not case-sensitive.
-#' @param cur_hist Either `"current"` (the default) or `"historical"`.
+#' @param cur_hist Either `"current"` (the default) or `"historical"`. Must be
+#' length 1.
 #' Choose between getting
 #' the URLs corresponding to current RBA tables, or historical tables.
 #' @return Vector of URL(s) corresponding to the supplied `table_no`
 #' @noRd
+#' @keywords internal
 get_rba_urls <- function(table_no, cur_hist = "current") {
+  stopifnot(length(cur_hist) == 1)
   stopifnot(cur_hist %in% c("current", "historical"))
 
   table_no <- tolower(table_no)
@@ -27,7 +30,7 @@ get_rba_urls <- function(table_no, cur_hist = "current") {
 
   urls_work <- url_exists(urls)
 
-  if (any(is.na(urls)) | any(urls_work == FALSE)) {
+  if (any(is.na(urls)) || any(urls_work == FALSE)) {
     # Re-scrape the list of URLs if some cannot be matched
     new_table_list <- scrape_table_list(cur_hist)
     urls <- get_urls(new_table_list, table_no)

diff --git a/R/prelim_tidy_rba.R b/R/prelim_tidy_rba.R
@@ -0,0 +1,152 @@
+# Functions to prepare Excel sheets with non-standard formatting prior
+# to tidying them using the `tidy_rba_normal()` function
+
+#' Function to wrangle historical yields data to get it in the standard format
+#' Called indirectly from tidy_rba()
+#' @param excel_sheet Excel sheet with no tidying done
+#' @keywords internal
+
+prelim_tidy_old_f16 <- function(excel_sheet) {
+  n_col <- ncol(excel_sheet)
+
+  issue_id <- as.character(excel_sheet[3, 2:n_col])
+
+  bond_type <- dplyr::case_when(
+    substr(issue_id, 1, 2) == "TB" ~
+    "Treasury Bonds ",
+    substr(issue_id, 1, 2) == "TI" ~
+    "Treasury Indexed Bonds ",
+    TRUE ~ NA_character_
+  )
+
+  bond_num <- ifelse(issue_id == "NA",
+    NA_character_,
+    substr(issue_id, 3, nchar(issue_id))
+  )
+
+  coupon <- as.character(excel_sheet[4, 2:n_col])
+  maturity <- as.character(excel_sheet[5, 2:n_col])
+  last_updated <- as.character(excel_sheet[8, 2:n_col])
+  source <- as.character(excel_sheet[9, 2:n_col])
+  mnemonic <- as.character(excel_sheet[10, 2:n_col])
+
+  new_title <- c(
+    "Title",
+    bond_type
+  )
+
+  excel_date_to_string <- function(x) {
+    x <- ifelse(x == "NA", NA_character_, x)
+    x <- as.numeric(x)
+    x <- as.Date(x, origin = "1899-12-30")
+    x <- format(x, "%d-%b-%Y")
+  }
+
+  new_description <- c(
+    "Description",
+    paste0(
+      bond_type,
+      bond_num, "\n",
+      suppressWarnings(as.numeric(coupon)) * 100, "%\n",
+      excel_date_to_string(maturity)
+    )
+  )
+
+  new_description <- ifelse(grepl("NA", new_description),
+    NA_character_,
+    new_description
+  )
+
+  new_frequency <- c("Frequency", rep("Daily", n_col - 1))
+  new_type <- c("Type", rep("Original", n_col - 1))
+  new_units <- c("Units", rep("Units", n_col - 1))
+  new_source <- c("Source", source)
+  new_pub_date <- c("Publication date", last_updated)
+  new_series_id <- c("Series ID", mnemonic)
+
+  new_metadata <- purrr::map(
+    list(
+      new_title, new_description, new_frequency, new_type,
+      new_units, new_source, new_pub_date, new_series_id
+    ),
+    ~ setNames(.x, paste0("V", 0:(n_col - 1)))
+  ) %>%
+    dplyr::bind_rows()
+
+  names(new_metadata) <- names(excel_sheet)
+
+  new_sheet <- rbind(new_metadata, excel_sheet[-(1:10), ])
+
+  new_sheet
+}
+
+#' Function to wrangle historical F2 table to get it in the standard format
+#' Called indirectly from tidy_rba()
+#' @param excel_sheet Excel sheet with no tidying done
+#' @keywords internal
+
+prelim_tidy_old_f2 <- function(excel_sheet) {
+
+  # fill_blank() adapted from {zoo} - note that this version removes leading NAs
+  fill_blanks <- function(x) {
+    L <- !is.na(x)
+    c(x[L])[cumsum(L)]
+  }
+
+  issuer <- as.character(excel_sheet[3, ])
+  issuer <- fill_blanks(issuer)
+  issuer <- gsub("Australian Government", "Commonwealth Government", issuer,
+    fixed = T
+  )
+
+  maturity <- as.character(excel_sheet[4, ])
+  maturity <- maturity[!is.na(maturity)]
+  maturity <- gsub(" yrs", " years", maturity)
+
+  title <- paste(issuer, maturity, "bond", sep = " ")
+  title <- gsub("years", "year", title)
+  new_title <- c("Title", title)
+
+  description <- paste("Yields on",
+    issuer, "bonds,",
+    maturity, "maturity",
+    sep = " "
+  )
+  new_description <- c("Description", description)
+
+  n_rows <- nrow(excel_sheet)
+  n_col <- ncol(excel_sheet)
+  max_date <- as.Date(as.numeric(excel_sheet[n_rows, 1]), origin = "1899-12-30")
+  min_date <- as.Date(as.numeric(excel_sheet[11, 1]), origin = "1899-12-30")
+  approx_days_per_row <- trunc(as.numeric(max_date - min_date) / n_rows)
+
+  frequency <- ifelse(approx_days_per_row == 1, "Daily", "Monthly")
+  new_frequency <- c("Frequency", rep(frequency, n_col - 1))
+
+  new_type <- c("Type", rep("Original", n_col - 1))
+
+  new_units <- c("Units", rep("Per cent per annum", n_col - 1))
+
+  new_source <- as.character(excel_sheet[9, ])
+
+  pub_date <- as.character(excel_sheet[8, ])
+  new_pub_date <- gsub("Last updated:", "Publish date", pub_date)
+
+  series_id <- as.character(excel_sheet[10, ])
+  new_series_id <- gsub("Mnemonic", "Series ID", series_id)
+
+  new_metadata <- purrr::map(
+    list(
+      new_title, new_description, new_frequency, new_type,
+      new_units, new_source, new_pub_date, new_series_id
+    ),
+    ~ setNames(.x, paste0("V", 0:(n_col - 1)))
+  ) %>%
+    dplyr::bind_rows()
+
+  names(new_metadata) <- names(excel_sheet)
+
+  new_sheet <- rbind(new_metadata, excel_sheet[-(1:10), ])
+
+  new_sheet
+}