Skip to content

Commit

Permalink
Merge pull request #15 from MattCowgill/dev
Browse files Browse the repository at this point in the history
stable 0.0.2
  • Loading branch information
MattCowgill authored Oct 30, 2020
2 parents 9bd0042 + 578cc1c commit 7528fed
Show file tree
Hide file tree
Showing 65 changed files with 3,173 additions and 320 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/R-CMD-check-windows.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on: [push, pull_request]

name: R-CMD-check-windows

jobs:
R-CMD-check:
runs-on: windows-latest
steps:
- uses: actions/checkout@v2
- uses: r-lib/actions/setup-r@master
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
shell: Rscript {0}
- name: Check
run: rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}
8 changes: 1 addition & 7 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
on:
push:
branches:
- master
pull_request:
branches:
- master
on: [push, pull_request]

name: R-CMD-check

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.Rproj.user
.Rhistory
.RData
inst/doc
14 changes: 8 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: readrba
Title: Download and Tidy Statistical Tables from the Reserve Bank of Australia
Version: 0.0.1
Version: 0.0.2
Authors@R:
person(given = "Matt",
family = "Cowgill",
Expand All @@ -18,19 +18,21 @@ URL: https://github.com/MattCowgill/readrba
BugReports: https://github.com/MattCowgill/readrba/issues
Suggests:
covr,
testthat
testthat,
knitr,
rmarkdown,
ggplot2
Imports:
readxl,
tidyr,
tidyr (>= 1.0.0),
dplyr,
purrr,
rlang,
xml2,
rvest,
tibble,
stringr,
curl,
httr,
lubridate
Depends:
R (>= 2.10)
R (>= 3.5.0)
VignetteBuilder: knitr
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(browse_rba_series)
export(browse_rba_tables)
export(read_rba)
export(read_rba_local)
export(read_rba_seriesid)
export(tidy_rba)
importFrom(dplyr,"%>%")
importFrom(rlang,.data)
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# readrba 0.0.2
* Can now use `series_id` argument to `read_rba()` to fetch based on series ID(s)
* Examine available RBA data using `browse_rba_series()` and `browse_rba_tables()`
* `cur_hist = "all"` no longer allowed
* Some non-standard tables now able to be tidied
* Added a `NEWS.md` file to track changes to the package.
56 changes: 56 additions & 0 deletions R/browse_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#' Browse available RBA data series
#'
#' @param search_string Word or phrase to search for, such as "gold" or "commodity" or "labour".
#' If left as `""`, the function will return all series.
#' @return A `data.frame` (`tbl_df`) containing RBA data series/tables that match the `search_string`.
#'
#' @export
#' @examples
#'
#' # Find series that contain 'unemployment'
#' browse_rba_series("unemployment")
#'
#' # Or all labour-related series
#' browse_rba_series("labour")
#'
#' # Or those related to commodities
#' browse_rba_series("commodities")
#'
#' # Or all series
#' browse_rba_series()
#'
#' # Or just look for tables that contain the word 'labour'
#' browse_rba_tables("labour")
#'
#' # Or all tables
#' browse_rba_tables()
#' @rdname browse_rba
browse_rba_series <- function(search_string = "") {
do_rba_browse(
search_string = search_string,
lookup_table = series_list
)
}

#' @export
#' @rdname browse_rba
browse_rba_tables <- function(search_string = "") {
do_rba_browse(
search_string = search_string,
lookup_table = table_list
)
}

#' @noRd
#' @keywords internal
do_rba_browse <- function(search_string, lookup_table) {
row_any <- function(x) rowSums(x) > 0

dplyr::filter(
lookup_table,
row_any(dplyr::across(
dplyr::everything(),
~ grepl(search_string, ., ignore.case = TRUE)
))
)
}
2 changes: 2 additions & 0 deletions R/download_rba.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ download_rba <- function(urls, path = tempdir()) {
url = urls,
mode = "wb",
destfile = filenames_with_path,
quiet = FALSE,
method = "libcurl",
cacheOK = FALSE
)
Expand All @@ -29,6 +30,7 @@ download_rba <- function(urls, path = tempdir()) {
.x = urls,
.y = filenames,
.f = utils::download.file,
quiet = FALSE,
mode = "wb",
cacheOK = FALSE
)
Expand Down
7 changes: 5 additions & 2 deletions R/get_rba_urls.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
#' @param table_no Character vector of RBA table number(s), such as `"A1"`
#' or `c("A1", "g1")`.
#' Not case-sensitive.
#' @param cur_hist Either `"current"` (the default) or `"historical"`.
#' @param cur_hist Either `"current"` (the default) or `"historical"`. Must be
#' length 1.
#' Choose between getting
#' the URLs corresponding to current RBA tables, or historical tables.
#' @return Vector of URL(s) corresponding to the supplied `table_no`
#' @noRd
#' @keywords internal
get_rba_urls <- function(table_no, cur_hist = "current") {
stopifnot(length(cur_hist) == 1)
stopifnot(cur_hist %in% c("current", "historical"))

table_no <- tolower(table_no)
Expand All @@ -27,7 +30,7 @@ get_rba_urls <- function(table_no, cur_hist = "current") {

urls_work <- url_exists(urls)

if (any(is.na(urls)) | any(urls_work == FALSE)) {
if (any(is.na(urls)) || any(urls_work == FALSE)) {
# Re-scrape the list of URLs if some cannot be matched
new_table_list <- scrape_table_list(cur_hist)
urls <- get_urls(new_table_list, table_no)
Expand Down
152 changes: 152 additions & 0 deletions R/prelim_tidy_rba.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Functions to prepare Excel sheets with non-standard formatting prior
# to tidying them using the `tidy_rba_normal()` function

#' Function to wrangle historical yields data to get it in the standard format
#' Called indirectly from tidy_rba()
#' @param excel_sheet Excel sheet with no tidying done
#' @keywords internal

prelim_tidy_old_f16 <- function(excel_sheet) {
n_col <- ncol(excel_sheet)

issue_id <- as.character(excel_sheet[3, 2:n_col])

bond_type <- dplyr::case_when(
substr(issue_id, 1, 2) == "TB" ~
"Treasury Bonds ",
substr(issue_id, 1, 2) == "TI" ~
"Treasury Indexed Bonds ",
TRUE ~ NA_character_
)

bond_num <- ifelse(issue_id == "NA",
NA_character_,
substr(issue_id, 3, nchar(issue_id))
)

coupon <- as.character(excel_sheet[4, 2:n_col])
maturity <- as.character(excel_sheet[5, 2:n_col])
last_updated <- as.character(excel_sheet[8, 2:n_col])
source <- as.character(excel_sheet[9, 2:n_col])
mnemonic <- as.character(excel_sheet[10, 2:n_col])

new_title <- c(
"Title",
bond_type
)

excel_date_to_string <- function(x) {
x <- ifelse(x == "NA", NA_character_, x)
x <- as.numeric(x)
x <- as.Date(x, origin = "1899-12-30")
x <- format(x, "%d-%b-%Y")
}

new_description <- c(
"Description",
paste0(
bond_type,
bond_num, "\n",
suppressWarnings(as.numeric(coupon)) * 100, "%\n",
excel_date_to_string(maturity)
)
)

new_description <- ifelse(grepl("NA", new_description),
NA_character_,
new_description
)

new_frequency <- c("Frequency", rep("Daily", n_col - 1))
new_type <- c("Type", rep("Original", n_col - 1))
new_units <- c("Units", rep("Units", n_col - 1))
new_source <- c("Source", source)
new_pub_date <- c("Publication date", last_updated)
new_series_id <- c("Series ID", mnemonic)

new_metadata <- purrr::map(
list(
new_title, new_description, new_frequency, new_type,
new_units, new_source, new_pub_date, new_series_id
),
~ setNames(.x, paste0("V", 0:(n_col - 1)))
) %>%
dplyr::bind_rows()

names(new_metadata) <- names(excel_sheet)

new_sheet <- rbind(new_metadata, excel_sheet[-(1:10), ])

new_sheet
}

#' Function to wrangle historical F2 table to get it in the standard format
#' Called indirectly from tidy_rba()
#' @param excel_sheet Excel sheet with no tidying done
#' @keywords internal

prelim_tidy_old_f2 <- function(excel_sheet) {

# fill_blank() adapted from {zoo} - note that this version removes leading NAs
fill_blanks <- function(x) {
L <- !is.na(x)
c(x[L])[cumsum(L)]
}

issuer <- as.character(excel_sheet[3, ])
issuer <- fill_blanks(issuer)
issuer <- gsub("Australian Government", "Commonwealth Government", issuer,
fixed = T
)

maturity <- as.character(excel_sheet[4, ])
maturity <- maturity[!is.na(maturity)]
maturity <- gsub(" yrs", " years", maturity)

title <- paste(issuer, maturity, "bond", sep = " ")
title <- gsub("years", "year", title)
new_title <- c("Title", title)

description <- paste("Yields on",
issuer, "bonds,",
maturity, "maturity",
sep = " "
)
new_description <- c("Description", description)

n_rows <- nrow(excel_sheet)
n_col <- ncol(excel_sheet)
max_date <- as.Date(as.numeric(excel_sheet[n_rows, 1]), origin = "1899-12-30")
min_date <- as.Date(as.numeric(excel_sheet[11, 1]), origin = "1899-12-30")
approx_days_per_row <- trunc(as.numeric(max_date - min_date) / n_rows)

frequency <- ifelse(approx_days_per_row == 1, "Daily", "Monthly")
new_frequency <- c("Frequency", rep(frequency, n_col - 1))

new_type <- c("Type", rep("Original", n_col - 1))

new_units <- c("Units", rep("Per cent per annum", n_col - 1))

new_source <- as.character(excel_sheet[9, ])

pub_date <- as.character(excel_sheet[8, ])
new_pub_date <- gsub("Last updated:", "Publish date", pub_date)

series_id <- as.character(excel_sheet[10, ])
new_series_id <- gsub("Mnemonic", "Series ID", series_id)

new_metadata <- purrr::map(
list(
new_title, new_description, new_frequency, new_type,
new_units, new_source, new_pub_date, new_series_id
),
~ setNames(.x, paste0("V", 0:(n_col - 1)))
) %>%
dplyr::bind_rows()

names(new_metadata) <- names(excel_sheet)

new_sheet <- rbind(new_metadata, excel_sheet[-(1:10), ])

new_sheet
}
Loading

0 comments on commit 7528fed

Please sign in to comment.