diff --git a/backfill_corrections/delphiBackfillCorrection/DESCRIPTION b/backfill_corrections/delphiBackfillCorrection/DESCRIPTION index 5c83c72f7..b68d27cef 100644 --- a/backfill_corrections/delphiBackfillCorrection/DESCRIPTION +++ b/backfill_corrections/delphiBackfillCorrection/DESCRIPTION @@ -25,7 +25,6 @@ Imports: tidyr, zoo, utils, - rlang, parallel Suggests: knitr (>= 1.15), diff --git a/backfill_corrections/delphiBackfillCorrection/NAMESPACE b/backfill_corrections/delphiBackfillCorrection/NAMESPACE index b9bc4b89b..45bec9a37 100644 --- a/backfill_corrections/delphiBackfillCorrection/NAMESPACE +++ b/backfill_corrections/delphiBackfillCorrection/NAMESPACE @@ -33,9 +33,7 @@ importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,group_split) importFrom(dplyr,if_else) -importFrom(dplyr,mutate) importFrom(dplyr,pull) -importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(dplyr,starts_with) importFrom(dplyr,summarize) @@ -50,9 +48,6 @@ importFrom(lubridate,year) importFrom(parallel,detectCores) importFrom(quantgen,quantile_lasso) importFrom(readr,write_csv) -importFrom(rlang,":=") -importFrom(rlang,.data) -importFrom(rlang,.env) importFrom(stats,coef) importFrom(stats,nlm) importFrom(stats,pbeta) diff --git a/backfill_corrections/delphiBackfillCorrection/R/beta_prior_estimation.R b/backfill_corrections/delphiBackfillCorrection/R/beta_prior_estimation.R index 088c4f91e..10b13ce99 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/beta_prior_estimation.R +++ b/backfill_corrections/delphiBackfillCorrection/R/beta_prior_estimation.R @@ -53,7 +53,7 @@ objective <- function(theta, x, prob, ...) { #' @param model_save_dir directory containing trained models #' #' @importFrom stats nlm predict -#' @importFrom dplyr %>% filter +#' @importFrom dplyr filter #' @importFrom quantgen quantile_lasso #' est_priors <- function(train_data, prior_test_data, geo, value_type, dw, taus, @@ -63,8 +63,8 @@ est_priors <- function(train_data, prior_test_data, geo, value_type, dw, taus, model_save_dir, start=c(0, log(10)), base_pseudo_denom=1000, base_pseudo_num=10, train_models = TRUE, make_predictions = TRUE) { - sub_train_data <- train_data %>% filter(train_data[[dw]] == 1) - sub_test_data <- prior_test_data %>% filter(prior_test_data[[dw]] == 1) + sub_train_data <- filter(train_data, train_data[[dw]] == 1) + sub_test_data <- filter(prior_test_data, prior_test_data[[dw]] == 1) if (nrow(sub_test_data) == 0) { pseudo_denom <- base_pseudo_denom pseudo_num <- base_pseudo_num diff --git a/backfill_corrections/delphiBackfillCorrection/R/io.R b/backfill_corrections/delphiBackfillCorrection/R/io.R index aa7b543ee..217d9c458 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/io.R +++ b/backfill_corrections/delphiBackfillCorrection/R/io.R @@ -13,18 +13,15 @@ read_data <- function(input_file) { #' Make sure data contains a `geo_value` field #' #' @template df-template -#' -#' @importFrom dplyr rename select -#' @importFrom rlang .data fips_to_geovalue <- function(df) { if ( !("geo_value" %in% colnames(df)) ) { if ( !("fips" %in% colnames(df)) ) { stop("Either `fips` or `geo_value` field must be available") } - df <- rename(df, geo_value = .data$fips) + df$geo_value <- df$fips } if ( "fips" %in% colnames(df) ) { - df <- select(df, -.data$fips) + df$fips <- NULL } return(df) } @@ -63,10 +60,10 @@ export_test_result <- function(test_data, coef_data, indicator, signal, dir.create(file.path(export_dir, signal_dir), showWarnings = FALSE) if (nrow(test_data) == 0) { - warning(str_interp("No test data available for ${signal_info}")) + warning("No test data available for ", signal_info) } else { - msg_ts(str_interp("Saving predictions to disk for ${signal_info} ")) - pred_output_file <- str_interp("prediction_${base_name}") + msg_ts("Saving predictions to disk for ", signal_info) + pred_output_file <- paste0("prediction_", base_name) prediction_col <- colnames(test_data)[grepl("^predicted", colnames(test_data))] expected_col <- c("time_value", "issue_date", "lag", "geo_value", @@ -75,10 +72,10 @@ export_test_result <- function(test_data, coef_data, indicator, signal, } if (nrow(coef_data) == 0) { - warning(str_interp("No coef data available for ${signal_info}")) + warning("No coef data available for ", signal_info) } else { - msg_ts(str_interp("Saving coefficients to disk for ${signal_info}")) - coef_output_file <- str_interp("coefs_${base_name}") + msg_ts("Saving coefficients to disk for ", signal_info) + coef_output_file <- paste0("coefs_", base_name) write_csv(coef_data, file.path(export_dir, signal_dir, coef_output_file)) } } diff --git a/backfill_corrections/delphiBackfillCorrection/R/main.R b/backfill_corrections/delphiBackfillCorrection/R/main.R index 1faefe899..f687c1f01 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/main.R +++ b/backfill_corrections/delphiBackfillCorrection/R/main.R @@ -9,16 +9,14 @@ #' @template indicator-template #' @template signal-template #' -#' @importFrom dplyr %>% filter select group_by summarize across everything group_split ungroup +#' @importFrom dplyr %>% filter group_by summarize across everything group_split ungroup #' @importFrom tidyr drop_na -#' @importFrom rlang .data .env -#' @importFrom stringr str_interp #' #' @export run_backfill <- function(df, params, refd_col = "time_value", lag_col = "lag", issued_col = "issue_date", signal_suffixes = c(""), indicator = "", signal = "") { - df <- filter(df, .data$lag < params$ref_lag + 30) # a rough filtration to save memory + df <- filter(df, lag < params$ref_lag + 30) # a rough filtration to save memory geo_levels <- params$geo_levels if ("state" %in% geo_levels) { @@ -28,15 +26,16 @@ run_backfill <- function(df, params, } for (geo_level in geo_levels) { - msg_ts(str_interp("geo level ${geo_level}")) + msg_ts("geo level ", geo_level) # Get full list of interested locations if (geo_level == "state") { # Drop county field and make new "geo_value" field from "state_id". # Aggregate counties up to state level agg_cols <- c("geo_value", issued_col, refd_col, lag_col) # Sum all non-agg columns. Summarized columns keep original names + df$geo_value <- df$state_id + df$state_id <- NULL df <- df %>% - select(-.data$geo_value, geo_value = .data$state_id) %>% group_by(across(agg_cols)) %>% summarize(across(everything(), sum)) %>% ungroup() @@ -44,7 +43,7 @@ run_backfill <- function(df, params, if (geo_level == "county") { # Keep only 200 most populous (within the US) counties top_200_geos <- get_populous_counties() - df <- filter(df, .data$geo_value %in% top_200_geos) + df <- filter(df, geo_value %in% top_200_geos) } test_data_list <- list() @@ -59,13 +58,13 @@ run_backfill <- function(df, params, } msg_ts("Splitting data into geo groups") - group_dfs <- group_split(df, .data$geo_value) + group_dfs <- group_split(df, geo_value) # Build model for each location for (subdf in group_dfs) { geo <- subdf$geo_value[1] - msg_ts(str_interp("Processing ${geo} geo group")) + msg_ts("Processing ", geo, " geo group") min_refd <- min(subdf[[refd_col]]) max_refd <- max(subdf[[refd_col]]) @@ -78,7 +77,7 @@ run_backfill <- function(df, params, # process again. Main use case is for quidel which has overall and # age-based signals. if (signal_suffix != "") { - msg_ts(str_interp("signal suffix ${signal_suffix}")) + msg_ts("signal suffix ", signal_suffix) num_col <- paste(params$num_col, signal_suffix, sep = "_") denom_col <- paste(params$denom_col, signal_suffix, sep = "_") } else { @@ -87,7 +86,7 @@ run_backfill <- function(df, params, } for (value_type in params$value_types) { - msg_ts(str_interp("value type ${value_type}")) + msg_ts("value type ", value_type) # Handle different signal types if (value_type == "count") { # For counts data only combined_df <- fill_missing_updates(subdf, num_col, refd_col, lag_col) @@ -113,15 +112,17 @@ run_backfill <- function(df, params, ) } combined_df <- add_params_for_dates(combined_df, refd_col, lag_col) - combined_df <- combined_df %>% filter(.data$lag < params$ref_lag) + combined_df <- filter(combined_df, lag < params$ref_lag) - geo_train_data <- combined_df %>% - filter(.data$issue_date < params$training_end_date) %>% - filter(.data$target_date <= params$training_end_date) %>% - filter(.data$target_date > params$training_start_date) %>% + geo_train_data <- filter(combined_df, + issue_date < params$training_end_date, + target_date <= params$training_end_date, + target_date > params$training_start_date, + ) %>% drop_na() - geo_test_data <- combined_df %>% - filter(.data$issue_date %in% params$test_dates) %>% + geo_test_data <- filter(combined_df, + issue_date %in% params$test_dates + ) %>% drop_na() if (nrow(geo_test_data) == 0) { @@ -135,9 +136,10 @@ run_backfill <- function(df, params, if (value_type == "fraction") { # Use beta prior approach to adjust fractions - geo_prior_test_data = combined_df %>% - filter(.data$issue_date > min(params$test_dates) - 7) %>% - filter(.data$issue_date <= max(params$test_dates)) + geo_prior_test_data = filter(combined_df, + issue_date > min(params$test_dates) - 7, + issue_date <= max(params$test_dates) + ) updated_data <- frac_adj(geo_train_data, geo_test_data, geo_prior_test_data, indicator = indicator, signal = signal, geo_level = geo_level, signal_suffix = signal_suffix, @@ -154,16 +156,15 @@ run_backfill <- function(df, params, } max_raw = sqrt(max(geo_train_data$value_raw)) for (test_lag in params$test_lags) { - msg_ts(str_interp("test lag ${test_lag}")) + msg_ts("test lag ", test_lag) filtered_data <- data_filteration(test_lag, geo_train_data, geo_test_data, params$lag_pad) train_data <- filtered_data[[1]] test_data <- filtered_data[[2]] if (nrow(train_data) == 0 || nrow(test_data) == 0) { - msg_ts(str_interp( - "Not enough data to either train or test for test_lag ${test_lag}, skipping" - )) + msg_ts("Not enough data to either train or test for test_lag ", + test_lag, ", skipping") next } @@ -238,9 +239,8 @@ run_backfill <- function(df, params, #' @template lag_col-template #' @template issued_col-template #' -#' @importFrom dplyr bind_rows mutate %>% +#' @importFrom dplyr bind_rows %>% #' @importFrom parallel detectCores -#' @importFrom rlang .data := #' @importFrom stringr str_interp #' #' @export @@ -253,7 +253,7 @@ main <- function(params, indicators_subset <- INDICATORS_AND_SIGNALS if (params$indicators != "all") { - indicators_subset <- filter(indicators_subset, .data$indicator == params$indicators) + indicators_subset <- filter(indicators_subset, indicator == params$indicators) } if (nrow(indicators_subset) == 0) { stop("no indicators to process") @@ -288,25 +288,20 @@ main <- function(params, params$training_start_date <- result$training_start_date params$training_end_date <- result$training_end_date - msg_ts(paste0( - str_interp("training_start_date is ${params$training_start_date}, "), - str_interp("training_end_date is ${params$training_end_date}") - )) + msg_ts("training_start_date is ", params$training_start_date, + ", training_end_date is ", params$training_end_date) # Loop over every indicator + signal combination. for (group_i in seq_len(nrow(indicators_subset))) { input_group <- indicators_subset[group_i,] - msg_ts(str_interp( - "Processing indicator ${input_group$indicator} signal ${input_group$signal}" - )) + msg_ts("Processing indicator ", input_group$indicator, " signal ", input_group$signal) files_list <- get_files_list( input_group$indicator, input_group$signal, params, input_group$sub_dir ) if (length(files_list) == 0) { - warning(str_interp( - "No files found for indicator ${input_group$indicator} signal ${input_group$signal}, skipping" - )) + warning("No files found for indicator indicator ", input_group$indicator, + " signal ", input_group$signal, ", skipping") next } @@ -314,36 +309,30 @@ main <- function(params, input_data <- lapply( files_list, function(file) { + # refd_col and issued_col read in as strings read_data(file) %>% - fips_to_geovalue() %>% - mutate( - # Use `glue` syntax to construct a new field by variable, - # from https://stackoverflow.com/a/26003971/14401472 - "{refd_col}" := as.Date(.data[[refd_col]], "%Y-%m-%d"), - "{issued_col}" := as.Date(.data[[issued_col]], "%Y-%m-%d") - ) + fips_to_geovalue() } ) %>% bind_rows() if (nrow(input_data) == 0) { - warning(str_interp( - "No data available for indicator ${input_group$indicator} signal ${input_group$signal}, skipping" - )) + warning("No data available for indicator ", input_group$indicator, + " signal ", input_group$signal, ", skipping") next } # Check data type and required columns msg_ts("Validating input data") - for (value_type in params$value_types) { - msg_ts(str_interp("for ${value_type}")) - result <- validity_checks( - input_data, value_type, - params$num_col, params$denom_col, input_group$name_suffix, - refd_col = refd_col, lag_col = lag_col, issued_col = issued_col - ) - input_data <- result[["df"]] - } + # Validate while date fields still stored as strings for speed. + input_data <- validity_checks( + input_data, params$value_types, + params$num_col, params$denom_col, input_group$name_suffix, + refd_col = refd_col, lag_col = lag_col, issued_col = issued_col + ) + + input_data[[refd_col]] <- as.Date(input_data[[refd_col]], "%Y-%m-%d") + input_data[[issued_col]] <- as.Date(input_data[[issued_col]], "%Y-%m-%d") # Check available training days training_days_check(input_data[[issued_col]], params$training_days) diff --git a/backfill_corrections/delphiBackfillCorrection/R/model.R b/backfill_corrections/delphiBackfillCorrection/R/model.R index a86540375..93fc5a2bd 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/model.R +++ b/backfill_corrections/delphiBackfillCorrection/R/model.R @@ -5,7 +5,7 @@ #' @param geo_train_data training data for a certain location #' @param geo_test_data testing data for a certain location #' -#' @importFrom rlang .data .env +#' @importFrom dplyr filter #' #' @export data_filteration <- function(test_lag, geo_train_data, geo_test_data, lag_pad) { @@ -22,12 +22,14 @@ data_filteration <- function(test_lag, geo_train_data, geo_test_data, lag_pad) { test_lag_pad1=8 test_lag_pad2=9 } - train_data = geo_train_data %>% - filter(.data$lag >= .env$test_lag - .env$test_lag_pad ) %>% - filter(.data$lag <= .env$test_lag + .env$test_lag_pad ) - test_data = geo_test_data %>% - filter(.data$lag >= .env$test_lag - .env$test_lag_pad1 ) %>% - filter(.data$lag <= .env$test_lag + .env$test_lag_pad2) + train_data = filter(geo_train_data, + lag >= test_lag - test_lag_pad, + lag <= test_lag + test_lag_pad + ) + test_data = filter(geo_test_data, + lag >= test_lag - test_lag_pad1, + lag <= test_lag + test_lag_pad2 + ) return (list(train_data, test_data)) } @@ -93,7 +95,6 @@ add_sqrtscale<- function(train_data, test_data, max_raw, value_col) { #' @template training_start_date-template #' #' @importFrom stats predict coef -#' @importFrom stringr str_interp #' #' @export model_training_and_testing <- function(train_data, test_data, taus, covariates, @@ -130,7 +131,7 @@ model_training_and_testing <- function(train_data, test_data, taus, covariates, success = success + 1 }, - error=function(e) {msg_ts(str_interp("Training failed for ${model_path}"))} + error=function(e) {msg_ts("Training failed for ", model_path)} ) } if (success < length(taus)) {return (NULL)} @@ -202,12 +203,11 @@ exponentiate_preds <- function(test_data, taus) { #' @template train_models-template #' #' @importFrom quantgen quantile_lasso -#' @importFrom stringr str_interp get_model <- function(model_path, train_data, covariates, tau, lambda, lp_solver, train_models) { if (train_models || !file.exists(model_path)) { if (!train_models && !file.exists(model_path)) { - warning(str_interp("user requested use of cached model but file {model_path}"), + warning("user requested use of cached model but file ", model_path, " does not exist; training new model") } # Quantile regression @@ -221,7 +221,7 @@ get_model <- function(model_path, train_data, covariates, tau, } else { # Load model from cache invisibly. Object has the same name as the original # model object, `obj`. - msg_ts(str_interp("Loading from ${model_path}")) + msg_ts("Loading from ", model_path) load(model_path) } @@ -248,21 +248,19 @@ get_model <- function(model_path, train_data, covariates, tau, #' #' @return path to file containing model object #' -#' @importFrom stringr str_interp -#' generate_filename <- function(indicator, signal, geo_level, signal_suffix, lambda, training_end_date, training_start_date, geo="", value_type = "", test_lag="", tau="", dw="", beta_prior_mode = FALSE, model_mode = TRUE) { if (lambda != "") { - lambda <- str_interp("lambda${lambda}") + lambda <- paste0("lambda", lambda) } if (test_lag != "") { - test_lag <- str_interp("lag${test_lag}") + test_lag <- paste0("lag", test_lag) } if (tau != "") { - tau <- str_interp("tau${tau}") + tau <- paste0("tau", tau) } if (beta_prior_mode) { beta_prior <- "beta_prior" diff --git a/backfill_corrections/delphiBackfillCorrection/R/preprocessing.R b/backfill_corrections/delphiBackfillCorrection/R/preprocessing.R index d366df7eb..094c92cb2 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/preprocessing.R +++ b/backfill_corrections/delphiBackfillCorrection/R/preprocessing.R @@ -27,8 +27,10 @@ fill_rows <- function(df, refd_col, lag_col, min_refd, max_refd, ref_lag) { # +30 to have values for calculating 7-day averages lags <- min(df[[lag_col]]): (ref_lag + 30) refds <- seq(min_refd, max_refd, by="day") # Full list reference date - row_inds_df <- as.data.frame(crossing(refds, lags)) %>% - setNames(c(refd_col, lag_col)) + row_inds_df <- setNames( + as.data.frame(crossing(refds, lags)), + c(refd_col, lag_col) + ) df_new = merge(x=df, y=row_inds_df, by=c(refd_col, lag_col), all.y=TRUE) return (df_new) @@ -54,13 +56,14 @@ fill_missing_updates <- function(df, value_col, refd_col, lag_col) { if (any(diff(pivot_df[[lag_col]]) != 1)) { stop("Risk exists in forward filling") } - pivot_df <- pivot_df %>% fill(everything(), .direction="down") + pivot_df <- fill(pivot_df, everything(), .direction="down") # Fill NAs with 0s pivot_df[is.na(pivot_df)] <- 0 - backfill_df <- pivot_df %>% - pivot_longer(-lag_col, values_to="value_raw", names_to=refd_col) + backfill_df <- pivot_longer(pivot_df, + -lag_col, values_to="value_raw", names_to=refd_col + ) backfill_df[[refd_col]] = as.Date(backfill_df[[refd_col]]) return (as.data.frame(backfill_df)) @@ -80,8 +83,9 @@ get_7dav <- function(pivot_df, refd_col) { if (col == refd_col) next pivot_df[, col] <- rollmeanr(pivot_df[, col], 7, align="right", fill=NA) } - backfill_df <- pivot_df %>% - pivot_longer(-refd_col, values_to="value_raw", names_to="issue_date") + backfill_df <- pivot_longer(pivot_df, + -refd_col, values_to="value_raw", names_to="issue_date" + ) backfill_df[[refd_col]] = as.Date(backfill_df[[refd_col]]) backfill_df[["issue_date"]] = as.Date(backfill_df[["issue_date"]]) return (as.data.frame(backfill_df)) @@ -205,7 +209,7 @@ add_7davs_and_target <- function(df, value_col, refd_col, lag_col, ref_lag) { backfill_df$log_7dav_slope = backfill_df$log_value_7dav - backfill_df$log_value_prev_7dav # Remove invalid rows - backfill_df <- backfill_df %>% drop_na(c(lag_col)) + backfill_df <- drop_na(backfill_df, c(lag_col)) return (as.data.frame(backfill_df)) } diff --git a/backfill_corrections/delphiBackfillCorrection/R/utils.R b/backfill_corrections/delphiBackfillCorrection/R/utils.R index 9d12fb7f0..9cf7d581f 100644 --- a/backfill_corrections/delphiBackfillCorrection/R/utils.R +++ b/backfill_corrections/delphiBackfillCorrection/R/utils.R @@ -122,7 +122,8 @@ create_dir_not_exist <- function(path) #' Check input data for validity #' #' @template df-template -#' @template value_type-template +#' @param value_types character vector of all signal types to process. Either +#' or both of "count" and "fraction". #' @template num_col-template #' @template denom_col-template #' @template signal_suffixes-template @@ -133,7 +134,7 @@ create_dir_not_exist <- function(path) #' @return list of input dataframe augmented with lag column, if it #' didn't already exist, and character vector of one or two value #' column names, depending on requested `value_type` -validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes, +validity_checks <- function(df, value_types, num_col, denom_col, signal_suffixes, refd_col = "time_value", lag_col = "lag", issued_col = "issue_date") { if (!missing(signal_suffixes) && !is.na(signal_suffixes) && !all(signal_suffixes == "") && !all(is.na(signal_suffixes))) { num_col <- paste(num_col, signal_suffixes, sep = "_") @@ -141,10 +142,12 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes, } # Check data type and required columns - if (value_type == "count") { - if ( all(num_col %in% colnames(df)) ) { value_cols=c(num_col) } - else { stop("No valid column name detected for the count values!") } - } else if (value_type == "fraction") { + if ("count" %in% value_types) { + if ( !all(num_col %in% colnames(df)) ) { + stop("No valid column name detected for the count values!") + } + } + if ("fraction" %in% value_types) { value_cols = c(num_col, denom_col) if ( !all(value_cols %in% colnames(df)) ) { stop("No valid column name detected for the fraction values!") @@ -156,19 +159,11 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes, stop("No reference date column detected for the reference date!") } - if (!(inherits(df[[refd_col]], "Date"))) { - stop("Reference date column must be of `Date` type") - } - # issue_date and lag should exist in the dataset - if ( !(lag_col %in% colnames(df)) || !(issued_col %in% colnames(df)) ) { + if ( !all(c(lag_col, issued_col) %in% colnames(df)) ) { stop("Issue date and lag fields must exist in the input data") } - if (!(inherits(df[[issued_col]], "Date"))) { - stop("Issue date column must be of `Date` type") - } - if ( any(is.na(df[[lag_col]])) || any(is.na(df[[issued_col]])) || any(is.na(df[[refd_col]])) ) { stop("Issue date, lag, or reference date fields contain missing values") @@ -186,7 +181,7 @@ validity_checks <- function(df, value_type, num_col, denom_col, signal_suffixes, " least one reference date-issue date-location combination") } - return(list(df = df, value_cols = value_cols)) + return(df) } #' Check available training days @@ -203,28 +198,28 @@ training_days_check <- function(issue_date, training_days) { #' Subset list of counties to those included in the 200 most populous in the US #' #' @importFrom dplyr select %>% arrange desc pull -#' @importFrom rlang .data #' @importFrom utils head #' @import covidcast get_populous_counties <- function() { return( covidcast::county_census %>% - dplyr::select(pop = .data$POPESTIMATE2019, fips = .data$FIPS) %>% + dplyr::select(pop = POPESTIMATE2019, fips = FIPS) %>% # Drop megacounties (states) - filter(!endsWith(.data$fips, "000")) %>% - arrange(desc(.data$pop)) %>% - pull(.data$fips) %>% + filter(!endsWith(fips, "000")) %>% + arrange(desc(pop)) %>% + pull(fips) %>% head(n=200) ) } #' Write a message to the console with the current time #' -#' @param text the body of the message to display +#' @param ... the body of the message to display. Objects should be strings or +#' coercible to strings and are pasted together with no separator. #' #' @export -msg_ts <- function(text) { - message(sprintf("%s --- %s", format(Sys.time()), text)) +msg_ts <- function(...) { + message(sprintf("%s --- %s", format(Sys.time()), .makeMessage(...))) } #' Generate key for identifying a value_type-signal combo diff --git a/backfill_corrections/delphiBackfillCorrection/man/msg_ts.Rd b/backfill_corrections/delphiBackfillCorrection/man/msg_ts.Rd index 2b47fc134..1a3078524 100644 --- a/backfill_corrections/delphiBackfillCorrection/man/msg_ts.Rd +++ b/backfill_corrections/delphiBackfillCorrection/man/msg_ts.Rd @@ -4,10 +4,11 @@ \alias{msg_ts} \title{Write a message to the console with the current time} \usage{ -msg_ts(text) +msg_ts(...) } \arguments{ -\item{text}{the body of the message to display} +\item{...}{the body of the message to display. Objects should be strings or +coercible to strings and are pasted together with no separator.} } \description{ Write a message to the console with the current time diff --git a/backfill_corrections/delphiBackfillCorrection/man/validity_checks.Rd b/backfill_corrections/delphiBackfillCorrection/man/validity_checks.Rd index d162b338b..c14b5af96 100644 --- a/backfill_corrections/delphiBackfillCorrection/man/validity_checks.Rd +++ b/backfill_corrections/delphiBackfillCorrection/man/validity_checks.Rd @@ -6,7 +6,7 @@ \usage{ validity_checks( df, - value_type, + value_types, num_col, denom_col, signal_suffixes, @@ -19,7 +19,8 @@ validity_checks( \item{df}{Data Frame of aggregated counts within a single location reported for each reference date and issue date.} -\item{value_type}{string describing signal type. Either "count" or "fraction".} +\item{value_types}{character vector of all signal types to process. Either +or both of "count" and "fraction".} \item{num_col}{name of numerator column in the input dataframe} diff --git a/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R b/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R index 461cd455d..e7911569c 100644 --- a/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R +++ b/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-model.R @@ -1,5 +1,7 @@ context("Testing the helper functions for modeling") +library(dplyr) + # Constants indicator <- "chng" signal <- "outpatient" diff --git a/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-utils.R b/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-utils.R index 73ed9f322..6529e4e5e 100644 --- a/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-utils.R +++ b/backfill_corrections/delphiBackfillCorrection/unit-tests/testthat/test-utils.R @@ -158,7 +158,7 @@ test_that("validity_checks alerts appropriately", { geo_value = rep("01001", 3) check_wrapper <- function(df, value_type, signal_suffixes = "") { - validity_checks(df, value_type = value_type, num_col = "num", + validity_checks(df, value_types = value_type, num_col = "num", denom_col = "den", signal_suffixes = signal_suffixes) } @@ -179,8 +179,6 @@ test_that("validity_checks alerts appropriately", { expect_error(check_wrapper(data.frame(num, den), "count"), "No reference date column detected for the reference date!") - expect_error(check_wrapper(data.frame(num, den, time_value = as.character(time_value)), "count"), - "Reference date column must be of `Date` type") issued_lag_error <- "Issue date and lag fields must exist in the input data" @@ -205,10 +203,6 @@ test_that("validity_checks alerts appropriately", { expect_error(check_wrapper(bind_rows(df, new_row), "count"), missing_val_error) - expect_error(check_wrapper(data.frame(num, den, time_value, lag, issue_date = as.character(issue_date)), "count"), - "Issue date column must be of `Date` type") - - df <- data.frame(num, den, time_value, issue_date, lag, geo_value, state_id) expect_warning(check_wrapper(df[rep(1, 3), ], "count"), "Data contains duplicate rows, dropping")