diff --git a/NAMESPACE b/NAMESPACE index 4e2af41..2122a38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -79,6 +79,7 @@ export(save_experiment) export(set_doc_options) export(set_rmd_options) export(set_save_dir) +export(simplify_tibble) export(summarize_feature_importance) export(summarize_feature_selection_curve) export(summarize_feature_selection_err) diff --git a/R/experiment-utils.R b/R/experiment-utils.R index e426602..fdaa47f 100644 --- a/R/experiment-utils.R +++ b/R/experiment-utils.R @@ -70,7 +70,7 @@ maybe_add_debug_data <- function(tbl, debug = FALSE) { #' Distribute simulation computation by replicates. #' #' @keywords internal -compute_rep <- function(n_reps, +compute_rep <- function(reps, future.globals, future.packages, future.seed, @@ -113,11 +113,18 @@ compute_rep <- function(n_reps, } # progress updates - total_reps <- n_reps * length(dgp_params_list) * length(method_params_list) + total_reps <- length(reps) * length(dgp_params_list) * length(method_params_list) p <- maybe_progressr(steps = total_reps, envir = parent.frame()) - results <- future.apply::future_replicate(n_reps, { + vary_param_names <- purrr::map( + c(dgp_params_list, method_params_list), + ~ names(.x) + ) |> + purrr::reduce(c) |> + unique() + + results <- future.apply::future_lapply(as.character(reps), function(i) { # make a local binding to error_state error_state <- error_state @@ -143,6 +150,15 @@ compute_rep <- function(n_reps, gc() }) + save_file <- file.path( + save_dir, "fit_results", sprintf("fit_result%s.rds", i) + ) + if (use_cached && file.exists(save_file) && !save_in_bulk) { + cached_results <- readRDS(save_file) + } else { + cached_results <- NULL + } + dgp_res <- purrr::list_rbind(purrr::map( dgp_params_list, function(dgp_params) { @@ -198,7 +214,8 @@ compute_rep <- function(n_reps, } return( - list(.dgp = dgp_list[[dgp_name]], + list(.rep = i, + .dgp = dgp_list[[dgp_name]], .dgp_name = dgp_name, .dgp_params = dgp_params, .method = NULL, @@ -207,6 +224,7 @@ compute_rep <- function(n_reps, .method_output = NULL, .err = data_list) |> list_to_tibble_row() |> + simplify_tibble(cols = c(".rep", ".dgp_name", ".method_name")) |> maybe_add_debug_data(TRUE) ) } @@ -240,7 +258,8 @@ compute_rep <- function(n_reps, method_params = method_params, duplicate_param_names = duplicate_param_names ) |> - list_to_tibble_row() + list_to_tibble_row() |> + simplify_tibble(cols = c(".rep", ".dgp_name", ".method_name")) # param_df$.seed <- seed @@ -248,6 +267,29 @@ compute_rep <- function(n_reps, method_params$data_list <- data_list method_params$.simplify <- FALSE + if (use_cached && file.exists(save_file) && !save_in_bulk) { + is_cached <- compare_tibble_rows( + param_df, + cached_results |> + dplyr::select(tidyselect::all_of(colnames(param_df))), + op = "contained_in" + ) && + compare_tibble_rows( + param_df, + cached_fit_params |> + dplyr::select(tidyselect::all_of(colnames(param_df))), + op = "contained_in" + ) + if (is_cached) { + # if (verbose >= 1) { + # inform(sprintf("Found cached results for rep=%s for", i)) + # inform(str(simplify_tibble(param_df))) + # } + return(NULL) + } + } + + fit_start_time <- Sys.time() result <- do_call_wrapper( method_name, method_list[[method_name]]$fit, @@ -256,6 +298,7 @@ compute_rep <- function(n_reps, # hard-coded method fun call for error messages call = rlang::call2(paste0(method_name, "$method_fun(...)")) ) + fit_time <- difftime(Sys.time(), fit_start_time, units = "mins") if ("error" %in% class(result)) { @@ -270,7 +313,8 @@ compute_rep <- function(n_reps, method_params$data_list <- NULL return( - list(.dgp = dgp_list[[dgp_name]], + list(.rep = i, + .dgp = dgp_list[[dgp_name]], .dgp_name = dgp_name, .dgp_params = dgp_params, .method = method_list[[method_name]], @@ -279,6 +323,7 @@ compute_rep <- function(n_reps, .method_output = NULL, .err = result) |> list_to_tibble_row() |> + simplify_tibble(cols = c(".rep", ".dgp_name", ".method_name")) |> maybe_add_debug_data(TRUE) ) } @@ -304,7 +349,8 @@ compute_rep <- function(n_reps, method_params$data_list <- NULL return( - list(.dgp = dgp_list[[dgp_name]], + list(.rep = i, + .dgp = dgp_list[[dgp_name]], .dgp_name = dgp_name, .dgp_params = dgp_params, .method = method_list[[method_name]], @@ -313,12 +359,18 @@ compute_rep <- function(n_reps, .method_output = result, .err = names_check) |> list_to_tibble_row() |> + simplify_tibble(cols = c(".rep", ".dgp_name", ".method_name")) |> maybe_add_debug_data(TRUE) ) } result <- result |> - tibble::add_column(param_df, .before = 1) + tibble::add_column(param_df, .before = 1) |> + tibble::add_column(.rep = i, .before = 1) + + if (record_time) { + result$.time_taken <- fit_time + } p("of total reps") @@ -332,16 +384,41 @@ compute_rep <- function(n_reps, } )) # dgp_res <- purrr::list_rbind(purrr::map( + if (use_cached && file.exists(save_file) && !save_in_bulk) { + dgp_res <- get_matching_rows( + id = cached_fit_params, x = cached_results + ) |> + dplyr::bind_rows(dgp_res) + } + + if (save_per_rep) { + if (".err" %in% colnames(dgp_res)) { + saveRDS( + dgp_res, + stringr::str_replace(save_file, "\\.rds$", "_error.rds") + ) + } else { + saveRDS(dgp_res, save_file) + } + dgp_res <- dgp_res |> + dplyr::select(tidyselect::any_of(unique(c( + ".rep", ".dgp", ".dgp_name", ".dgp_params", + ".method", ".method_name", ".method_params", + ".method_output", + vary_param_names, duplicate_param_names, + ".err", ".pid", ".gc" + )))) + } + return(dgp_res) }, - simplify = FALSE, future.globals = future.globals, future.packages = future.packages, future.seed = future.seed, ...) # results <- future.apply::future_replicate( - results <- dplyr::bind_rows(results, .id = ".rep") + results <- dplyr::bind_rows(results) if (debug) { @@ -377,7 +454,7 @@ compute_rep <- function(n_reps, #' Distribute simulation computation by DGPs. #' #' @keywords internal -compute_dgp <- function(n_reps, +compute_dgp <- function(reps, future.globals, future.packages, future.seed, @@ -389,7 +466,7 @@ compute_dgp <- function(n_reps, #' Distribute simulation computation by Methods. #' #' @keywords internal -compute_method <- function(n_reps, +compute_method <- function(reps, future.globals, future.packages, future.seed, @@ -401,7 +478,7 @@ compute_method <- function(n_reps, #' Doubly nested distributed simulation computation nested by DGPs and reps. #' #' @keywords internal -compute_dgp_rep <- function(n_reps, +compute_dgp_rep <- function(reps, future.globals, future.packages, future.seed, @@ -413,7 +490,7 @@ compute_dgp_rep <- function(n_reps, #' Doubly nested distributed simulation computation nested by Methods and reps. #' #' @keywords internal -compute_method_rep <- function(n_reps, +compute_method_rep <- function(reps, future.globals, future.packages, future.seed, @@ -425,7 +502,7 @@ compute_method_rep <- function(n_reps, #' Doubly nested distributed simulation computation nested by DGPs and Methods. #' #' @keywords internal -compute_dgp_method <- function(n_reps, +compute_dgp_method <- function(reps, future.globals, future.packages, future.seed, @@ -438,7 +515,7 @@ compute_dgp_method <- function(n_reps, #' reps. #' #' @keywords internal -compute_dgp_method_reps <- function(n_reps, +compute_dgp_method_reps <- function(reps, future.globals, future.packages, future.seed, diff --git a/R/experiment.R b/R/experiment.R index b0734cd..18cd186 100644 --- a/R/experiment.R +++ b/R/experiment.R @@ -45,6 +45,7 @@ Experiment <- R6::R6Class( .fit_params = tibble::tibble(), .future.globals = TRUE, .future.packages = NULL, + .save_in_bulk = c(fit = TRUE, eval = TRUE, viz = TRUE), # private methods .add_obj = function(field_name, obj, obj_name, ...) { @@ -336,7 +337,7 @@ Experiment <- R6::R6Class( .get_fit_params = function(cached_params = NULL, type = c("all", "cached", "new"), - n_reps = NULL, simplify = FALSE) { + n_reps = NULL, wide_params = FALSE) { # get all/new/cached (dgp, method) fit parameter combinations type <- match.arg(type) fit_params <- private$.fit_params @@ -365,7 +366,7 @@ Experiment <- R6::R6Class( } } - if (simplify) { + if (wide_params && (nrow(out_params) > 0)) { duplicate_param_names <- private$.get_duplicate_param_names() for (param_name in private$.get_vary_params("dgp")) { # fix naming if also in method vary across @@ -385,9 +386,12 @@ Experiment <- R6::R6Class( } out_params <- out_params |> dplyr::select(-.dgp, -.dgp_fun, -.dgp_params, - -.method, -.method_fun, -.method_params) |> - simplify_tibble() + -.method, -.method_fun, -.method_params) } + + out_params <- simplify_tibble( + out_params, cols = c(".rep", ".dgp_name", ".method_name") + ) return(out_params) }, @@ -537,33 +541,57 @@ Experiment <- R6::R6Class( save_dir <- private$.get_vary_across_dir() } if (results_type %in% c("fit", "eval", "viz")) { - save_file <- file.path(save_dir, paste0(results_type, "_results.rds")) - if (results_type == "fit") { - save_file2 <- file.path(save_dir, - paste0(results_type, - "_results_extra_cached_reps.rds")) - } + save_in_bulk <- private$.save_in_bulk[[results_type]] } else { - save_file <- file.path(save_dir, paste0(results_type, ".rds")) + save_in_bulk <- TRUE } - if (file.exists(save_file)) { - res <- readRDS(save_file) - if (results_type == "fit") { - if (file.exists(save_file2)) { - res <- dplyr::bind_rows(res, readRDS(save_file2)) + if (save_in_bulk || !(results_type %in% c("fit", "eval", "viz"))) { + if (results_type %in% c("fit", "eval", "viz")) { + save_file <- file.path(save_dir, paste0(results_type, "_results.rds")) + if (results_type == "fit") { + save_file2 <- file.path( + save_dir, + paste0(results_type,"_results_extra_cached_reps.rds") + ) + } + } else { + save_file <- file.path(save_dir, paste0(results_type, ".rds")) + } + if (file.exists(save_file)) { + res <- readRDS(save_file) + if (results_type == "fit") { + if (file.exists(save_file2)) { + res <- dplyr::bind_rows(res, readRDS(save_file2)) + } } + return(res) } - return(res) } else { - if (verbose >= 1) { - if (results_type %in% c("fit", "eval", "viz")) { - inform(sprintf("Cannot find cached %s results.", results_type)) + save_files <- list.files( + file.path(save_dir, sprintf("%s_results", results_type)), + pattern = ".rds", full.names = TRUE + ) + if (length(save_files) > 0) { + if (results_type == "fit") { + res <- purrr::map(save_files, ~ readRDS(.x)) |> + dplyr::bind_rows() } else { - inform("Cannot find cache.") + res <- purrr::map(save_files, ~ readRDS(.x)) |> + setNames( + stringr::str_remove(basename(save_files), "\\.rds$") + ) } + return(res) } - return(NULL) } + if (verbose >= 1) { + if (results_type %in% c("fit", "eval", "viz")) { + inform(sprintf("Cannot find cached %s results.", results_type)) + } else { + inform("Cannot find cache.") + } + } + return(NULL) }, .clear_cache = function() { @@ -698,6 +726,27 @@ Experiment <- R6::R6Class( } }, + .save_result = function(result, + results_type = c("fit", "eval", "viz"), + fname) { + results_type <- match.arg(results_type) + if (!private$.has_vary_across()) { + save_dir <- private$.save_dir + } else { + save_dir <- private$.get_vary_across_dir() + } + # save non-bulk result + save_file <- file.path( + save_dir, + sprintf("%s_results", results_type), + sprintf("%s.rds", fname) + ) + if (!dir.exists(dirname(save_file))) { + dir.create(dirname(save_file), recursive = TRUE) + } + saveRDS(result, save_file) + }, + .get_vary_across_dir = function() { obj_names <- purrr::map(private$.vary_across_list, names) |> purrr::reduce(c) |> @@ -752,6 +801,14 @@ Experiment <- R6::R6Class( #' in a directory called "results" with a sub-directory named after #' `Experiment$name` when using [run_experiment()] or [fit_experiment()] #' with `save=TRUE`. + #' @param save_in_bulk A logical, indicating whether or not to save the + #' fit, evaluator, and visualizer outputs, each as a single bulk .rds file + #' (i.e., as `fit_results.rds`, `eval_results.rds`, `viz_results.rds`). + #' Default is `TRUE`. If `FALSE`, each fit replicate is saved as a + #' separate .rds file while each evaluator/visualizer is saved as a + #' separate .rds file. One can alternatively specify a character vector + #' with some subset of "fit", "eval", and/or "viz", indicating the + #' elements to save in bulk to disk. #' @param ... Not used. #' #' @return A new instance of `Experiment`. @@ -759,7 +816,8 @@ Experiment <- R6::R6Class( dgp_list = list(), method_list = list(), evaluator_list = list(), visualizer_list = list(), future.globals = TRUE, future.packages = NULL, - clone_from = NULL, save_dir = NULL, ...) { + clone_from = NULL, save_dir = NULL, + save_in_bulk = TRUE, ...) { if (!is.null(clone_from)) { private$.check_obj(clone_from, "Experiment") clone <- clone_from$clone(deep = TRUE) @@ -782,6 +840,16 @@ Experiment <- R6::R6Class( save_dir <- file.path("results", name) } private$.save_dir <- R.utils::getAbsolutePath(save_dir) + if (!is.logical(save_in_bulk)) { + save_in_bulk <- c("fit", "eval", "viz") %in% save_in_bulk + } else { + if (length(save_in_bulk) > 1) { + warn("The input save_in_bulk is a logical vector of length > 1. Only the first element of save is used.") + } + save_in_bulk <- rep(save_in_bulk[1], 3) + } + private$.save_in_bulk <- save_in_bulk + names(private$.save_in_bulk) <- c("fit", "eval", "viz") }, #' @description Run the full `Experiment` pipeline (fitting, evaluating, @@ -811,7 +879,15 @@ Experiment <- R6::R6Class( #' `Experiment`. Note that even if `return_all_cached_reps = TRUE`, #' only the `n_reps` replicates are used when evaluating and visualizing #' the `Experiment`. - #' @param save If `TRUE`, save outputs to disk. + #' @param save A logical, indicating whether or not to save the fit, + #' evaluator, and visualizer outputs to disk. Alternatively, one can + #' specify a character vector with some subset of "fit", "eval", and/or + #' "viz", indicating the elements to save to disk. + #' @param record_time A logical, indicating whether or not to record the + #' time taken to run each `Method`, `Evaluator`, and `Visualizer` in the + #' `Experiment`. Alternatively, one can specify a character vector with + #' some subset of "fit", "eval", and/or "viz", indicating the elements for + #' which to record the time taken. #' @param checkpoint_n_reps The number of experiment replicates to compute #' before saving results to disk. If 0 (the default), no checkpoints are #' saved. @@ -827,7 +903,7 @@ Experiment <- R6::R6Class( #' \describe{ #' \item{fit_results}{A tibble containing results from the `fit` #' method. In addition to results columns, has columns named '.rep', '.dgp_name', - #' '.method_name', and the `vary_across` parameter names if applicable.} + #' '.method_name', '.time_taken', and the `vary_across` parameter names if applicable.} #' \item{eval_results}{A list of tibbles containing results from the #' `evaluate` method, which evaluates each `Evaluator` in #' the `Experiment`. Length of list is equivalent to the number of @@ -841,6 +917,7 @@ Experiment <- R6::R6Class( future.globals = NULL, future.packages = NULL, future.seed = TRUE, use_cached = FALSE, return_all_cached_reps = FALSE, save = FALSE, + record_time = FALSE, checkpoint_n_reps = 0, verbose = 1, ...) { if (!is.logical(save)) { @@ -851,6 +928,14 @@ Experiment <- R6::R6Class( } save <- rep(save[1], 3) } + if (!is.logical(record_time)) { + record_time <- c("fit", "eval", "viz") %in% record_time + } else { + if (length(record_time) > 1) { + warn("The input record_time is a logical vector of length > 1. Only the first element of save_time is used.") + } + record_time <- rep(record_time[1], 3) + } fit_results <- self$fit(n_reps, parallel_strategy = parallel_strategy, future.globals = future.globals, @@ -859,18 +944,21 @@ Experiment <- R6::R6Class( use_cached = use_cached, return_all_cached_reps = return_all_cached_reps, save = save[1], + record_time = record_time[1], checkpoint_n_reps = checkpoint_n_reps, verbose = verbose, ...) eval_results <- self$evaluate(fit_results = fit_results |> dplyr::filter(as.numeric(.rep) <= !!n_reps), use_cached = use_cached, save = save[2], + record_time = record_time[2], verbose = verbose, ...) viz_results <- self$visualize(fit_results = fit_results |> dplyr::filter(as.numeric(.rep) <= !!n_reps), eval_results = eval_results, use_cached = use_cached, save = save[3], + record_time = record_time[3], verbose = verbose, ...) return(list(fit_results = fit_results, @@ -955,7 +1043,9 @@ Experiment <- R6::R6Class( #' returns fit results for the requested `n_reps` plus any additional #' cached replicates from the (`DGP`, `Method`) combinations in the #' `Experiment`. - #' @param save If `TRUE`, save outputs to disk. + #' @param save Logical. If `TRUE`, save outputs to disk. + #' @param record_time Logical. If `TRUE`, record the amount of time taken to + #' fit each `Method` per replicate. #' @param checkpoint_n_reps The number of experiment replicates to compute #' before saving results to disk. If 0 (the default), no checkpoints are #' saved. @@ -970,12 +1060,14 @@ Experiment <- R6::R6Class( #' #' @return A tibble containing the results from fitting all `Methods` #' across all `DGPs` for `n_reps` repetitions. In addition to - #' results columns, has columns named '.rep', '.dgp_name', '.method_name', and the + #' results columns, has columns named '.rep', '.dgp_name', '.method_name', + #' '.time_taken' (if `record_time = TRUE`), and the #' `vary_across` parameter names if applicable. fit = function(n_reps = 1, parallel_strategy = "reps", future.globals = NULL, future.packages = NULL, future.seed = TRUE, use_cached = FALSE, return_all_cached_reps = FALSE, save = FALSE, + record_time = FALSE, checkpoint_n_reps = 0, verbose = 1, ...) { parallel_strategy <- unique(parallel_strategy) @@ -1022,6 +1114,17 @@ Experiment <- R6::R6Class( ) } + save_in_bulk <- private$.save_in_bulk[["fit"]] + save_per_rep <- save && !save_in_bulk + if (!private$.has_vary_across()) { + save_dir <- private$.save_dir + } else { + save_dir <- private$.get_vary_across_dir() + } + if (!dir.exists(file.path(save_dir, "fit_results"))) { + dir.create(file.path(save_dir, "fit_results"), recursive = TRUE) + } + dgp_list <- private$.get_obj_list("dgp") method_list <- private$.get_obj_list("method") @@ -1035,7 +1138,7 @@ Experiment <- R6::R6Class( private$.update_fit_params() - if (!is.numeric(checkpoint_n_reps)) { + if (!is.numeric(checkpoint_n_reps) || !save_in_bulk) { checkpoint <- FALSE } else { checkpoint <- isTRUE(checkpoint_n_reps > 0) @@ -1056,13 +1159,16 @@ Experiment <- R6::R6Class( if (n_reps_cached > 0) { results <- private$.get_cached_results("fit", verbose = verbose) - fit_params <- private$.get_fit_params(simplify = TRUE) + fit_params <- private$.get_fit_params(wide_params = TRUE) fit_results <- get_matching_rows(id = fit_params, x = results) |> - dplyr::select(.rep, tidyselect::everything()) |> + dplyr::select( + .rep, .dgp_name, .method_name, private$.get_vary_params(), + tidyselect::everything() + ) |> dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) - if (save) { + if (save && save_in_bulk) { n_reps_cached <- min(n_reps_total, n_reps_cached) private$.save_results(fit_results, "fit", n_reps_cached, verbose) } @@ -1071,14 +1177,32 @@ Experiment <- R6::R6Class( if (verbose >= 1) { inform("==============================") } - if (use_cached && return_all_cached_reps) { - return(fit_results) + return(simplify_tibble(fit_results)) } else { - return(fit_results |> - dplyr::filter(as.numeric(.rep) <= !!n_reps_total)) + fit_results <- fit_results |> + dplyr::filter(as.numeric(.rep) <= !!n_reps_total) + return(simplify_tibble(fit_results)) } } + + if (!save_in_bulk) { + purrr::walk( + unique(fit_results$.rep), + function(i) { + rep_results <- fit_results |> + dplyr::filter(as.numeric(.rep) == !!i) + private$.save_result( + rep_results, "fit", sprintf("fit_result%s", i) + ) + } + ) + cached_params_tmp <- private$.update_cache(results_type = "fit", + n_reps = n_reps_cached) + saveRDS(cached_params_tmp, + file.path(save_dir, "experiment_cached_params.rds")) + saveRDS(self, file.path(save_dir, "experiment.rds")) + } } } @@ -1102,6 +1226,7 @@ Experiment <- R6::R6Class( # combos of (dgp_params_list, method_params_list) need to be rerun so need # to check cache ids when fitting new_fit_params <- NULL + cached_fit_params <- tibble::tibble() if (use_cached) { @@ -1112,6 +1237,12 @@ Experiment <- R6::R6Class( n_params <- nrow(new_fit_params) new_fit <- n_params == nrow(private$.get_fit_params()) + if (!save_in_bulk) { + cached_fit_params <- private$.get_fit_params( + cached_params, "cached", 0, wide_params = TRUE + ) + } + if (!new_fit) { # get only the new dgps and methods that are not cached @@ -1147,9 +1278,16 @@ Experiment <- R6::R6Class( dgp_list = dgp_list, method_list = method_list, new_fit_params = new_fit_params, + cached_fit_params = cached_fit_params, dgp_params_list = dgp_params_list, method_params_list = method_params_list, duplicate_param_names = duplicate_param_names, + record_time = record_time, + save_in_bulk = save_in_bulk, + save_per_rep = save_per_rep, + use_cached = use_cached && (nrow(cached_fit_params) > 0), + save_dir = save_dir, + simplify_tibble = simplify_tibble, do_call_wrapper = function(name, fun, params, @@ -1181,7 +1319,7 @@ Experiment <- R6::R6Class( environment(compute_fun) <- workenv # compute the experiment - compute_fun(n_reps, + compute_fun((1:n_reps) + n_reps_cached, future.globals, future.packages, future.seed) @@ -1190,10 +1328,7 @@ Experiment <- R6::R6Class( gc() new_fit_results <- new_fit_results |> - dplyr::mutate( - .rep = as.character(as.numeric(.rep) + n_reps_cached) - ) |> - simplify_tibble() + simplify_tibble(cols = c(".rep", ".dgp_name", ".method_name")) if (".err" %in% colnames(new_fit_results)) { @@ -1203,10 +1338,10 @@ Experiment <- R6::R6Class( .err, ~!is.null(.x) ) ) |> - dplyr::select(.dgp, .dgp_name, .dgp_params, + dplyr::select(.rep, .dgp, .dgp_name, .dgp_params, .method, .method_name, .method_params, .method_output, .err, .pid, .gc) |> - dplyr::arrange(.dgp_name, .method_name) + dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) # filter out errors new_fit_results <- new_fit_results |> @@ -1232,8 +1367,8 @@ Experiment <- R6::R6Class( "along with the params,\n `DGP`, `Method`, and ", "inputs/outputs before the error occurred." ), - partial_results = new_fit_results, - errors = errors + partial_results = simplify_tibble(new_fit_results), + errors = simplify_tibble(errors) ) } @@ -1248,37 +1383,45 @@ Experiment <- R6::R6Class( new_fit_results[[col]] <- NA } - fit_results <- new_fit_results |> - dplyr::select(.rep, .dgp_name, .method_name, - private$.get_vary_params(), - tidyselect::everything()) |> - dplyr::bind_rows(fit_results) |> - dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) + fit_results <- dplyr::bind_rows(new_fit_results, fit_results) - if (use_cached && !new_fit) { - fit_params_cached <- private$.get_fit_params(cached_params, "cached", - n_reps_total, TRUE) - fit_results_cached <- private$.get_cached_results( - "fit", verbose = verbose - ) - fit_results_cached <- get_matching_rows( - id = fit_params_cached, x = fit_results_cached - ) - if (verbose >= 1) { - inform("Appending cached results to the new fit results...") + if (save_in_bulk) { + if (use_cached && !new_fit) { + fit_params_cached <- private$.get_fit_params(cached_params, "cached", + n_reps_total, TRUE) + fit_results_cached <- private$.get_cached_results( + "fit", verbose = verbose + ) + fit_results_cached <- get_matching_rows( + id = fit_params_cached, x = fit_results_cached + ) + if (verbose >= 1) { + inform("Appending cached results to the new fit results...") + } + fit_params <- private$.get_fit_params(wide_params = TRUE) + fit_results <- dplyr::bind_rows(fit_results, fit_results_cached) + fit_results <- get_matching_rows(id = fit_params, x = fit_results) |> + dplyr::select( + .rep, .dgp_name, .method_name, private$.get_vary_params(), + tidyselect::everything() + ) |> + dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) } - fit_params <- private$.get_fit_params(simplify = TRUE) - fit_results <- dplyr::bind_rows(fit_results, fit_results_cached) - fit_results <- get_matching_rows(id = fit_params, x = fit_results) |> - dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) |> - dplyr::select(.rep, tidyselect::everything()) - } - if (save || checkpoint) { - private$.save_results( - fit_results, "fit", n_reps_cached, verbose, - checkpoint && n_reps_cached < n_reps_total - ) + if (save || checkpoint) { + private$.save_results( + fit_results, "fit", n_reps_cached, verbose, + checkpoint && n_reps_cached < n_reps_total + ) + } + } else { + if (save) { + cached_params_tmp <- private$.update_cache(results_type = "fit", + n_reps = n_reps_cached) + saveRDS(cached_params_tmp, + file.path(save_dir, "experiment_cached_params.rds")) + saveRDS(self, file.path(save_dir, "experiment.rds")) + } } if (verbose >= 1) { @@ -1293,10 +1436,22 @@ Experiment <- R6::R6Class( inform("==============================") } + if (save && !save_in_bulk) { + fit_results <- private$.get_cached_results("fit", verbose = 0) + } + + fit_results <- fit_results |> + dplyr::select( + .rep, .dgp_name, .method_name, private$.get_vary_params(), + tidyselect::everything() + ) |> + dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) if (use_cached && return_all_cached_reps) { - return(fit_results) + return(simplify_tibble(fit_results)) } else { - return(fit_results |> dplyr::filter(as.numeric(.rep) <= !!n_reps_total)) + fit_results <- fit_results |> + dplyr::filter(as.numeric(.rep) <= !!n_reps_total) + return(simplify_tibble(fit_results)) } }, @@ -1308,7 +1463,9 @@ Experiment <- R6::R6Class( #' @param use_cached Logical. If `TRUE`, find and return previously saved #' results. If cached results cannot be found, continue as if `use_cached` was #' `FALSE`. - #' @param save If `TRUE`, save outputs to disk. + #' @param save Logical. If `TRUE`, save outputs to disk. + #' @param record_time Logical. If `TRUE`, record the amount of time taken to + #' evaluate each `Evaluator`. #' @param verbose Level of verbosity. Default is 1, which prints out messages #' after major checkpoints in the experiment. If 2, prints additional #' debugging information for warnings and messages from user-defined functions @@ -1319,7 +1476,7 @@ Experiment <- R6::R6Class( #' @return A list of evaluation result tibbles, one for each #' `Evaluator`. evaluate = function(fit_results, use_cached = FALSE, save = FALSE, - verbose = 1, ...) { + record_time = FALSE, verbose = 1, ...) { evaluator_list <- private$.get_obj_list("evaluator") evaluator_names <- names(evaluator_list) if (length(evaluator_list) == 0) { @@ -1330,6 +1487,13 @@ Experiment <- R6::R6Class( return(NULL) } + save_in_bulk <- private$.save_in_bulk[["eval"]] + if (!private$.has_vary_across()) { + save_dir <- private$.save_dir + } else { + save_dir <- private$.get_vary_across_dir() + } + n_reps <- max(as.numeric(fit_results$.rep)) private$.update_fit_params() if (use_cached) { @@ -1337,12 +1501,18 @@ Experiment <- R6::R6Class( is_cached <- private$.is_fully_cached(cached_params, "eval", n_reps) if (isTRUE(is_cached)) { results <- private$.get_cached_results("eval", verbose = verbose) - results <- results[names(private$.get_obj_list("evaluator"))] - if (save) { - if (!setequal(names(private$.get_obj_list("evaluator")), - names(results))) { + cached_eval_names <- names(results) + results <- results[evaluator_names] + if (save && save_in_bulk) { + if (!setequal(evaluator_names, cached_eval_names)) { private$.save_results(results, "eval", n_reps, verbose) } + } else if (save && !save_in_bulk) { + for (fname in list.files(file.path(save_dir, "eval_results"), pattern = ".rds")) { + if (!(basename(fname) %in% paste0(cached_eval_names, ".rds"))) { + file.remove(file.path(save_dir, "eval_results", fname)) + } + } } if (verbose >= 1) { inform("==============================") @@ -1356,6 +1526,15 @@ Experiment <- R6::R6Class( } } + cached_eval_names <- setdiff(evaluator_names, names(evaluator_list)) + if (save && !save_in_bulk) { + for (fname in list.files(file.path(save_dir, "eval_results"), pattern = ".rds")) { + if (!(basename(fname) %in% paste0(cached_eval_names, ".rds"))) { + file.remove(file.path(save_dir, "eval_results", fname)) + } + } + } + if (verbose >= 1) { inform(sprintf("Evaluating %s...", self$name)) start_time <- Sys.time() @@ -1363,28 +1542,59 @@ Experiment <- R6::R6Class( eval_results <- purrr::map2( names(evaluator_list), evaluator_list, function(name, evaluator) { - do_call_handler( + eval_start_time <- Sys.time() + eval_result <- do_call_handler( name, evaluator$evaluate, list(fit_results = fit_results, vary_params = private$.get_vary_params()), verbose ) + eval_time <- difftime(Sys.time(), eval_start_time, units = "mins") + if (record_time) { + attr(eval_result, ".time_taken") <- eval_time + } + if (save && !save_in_bulk) { + private$.save_result(eval_result, "eval", name) + return(NULL) + } else { + return(eval_result) + } } ) - names(eval_results) <- names(evaluator_list) - if (use_cached && !setequal(names(evaluator_list), evaluator_names)) { - eval_results_cached <- private$.get_cached_results("eval", - verbose = verbose) - if (verbose >= 1) { - inform("Appending cached results to the new evaluation results...") + if (verbose >= 1) { + if (length(cached_eval_names) > 0) { + inform( + sprintf( + "Using cached eval results for: %s", + paste0(cached_eval_names, collapse = ", ") + ) + ) + } + } + if (save && !save_in_bulk) { + eval_results <- private$.get_cached_results("eval", verbose = 0)[ + evaluator_names + ] + cached_params <- private$.update_cache("eval", n_reps = n_reps) + saveRDS(cached_params, + file.path(save_dir, "experiment_cached_params.rds")) + saveRDS(self, file.path(save_dir, "experiment.rds")) + } else { + names(eval_results) <- names(evaluator_list) + if (use_cached && !setequal(names(evaluator_list), evaluator_names)) { + eval_results_cached <- private$.get_cached_results("eval", + verbose = verbose) + if (verbose >= 1) { + inform("Appending cached results to the new evaluation results...") + } + eval_results <- c(eval_results, eval_results_cached)[evaluator_names] } - eval_results <- c(eval_results, eval_results_cached)[evaluator_names] } if (verbose >= 1) { inform(sprintf("Evaluation completed | time taken: %f minutes", difftime(Sys.time(), start_time, units = "mins"))) } - if (save) { + if (save && save_in_bulk) { private$.save_results(eval_results, "eval", n_reps, verbose) } if (verbose >= 1) { @@ -1404,7 +1614,9 @@ Experiment <- R6::R6Class( #' @param use_cached Logical. If `TRUE`, find and return previously saved #' results. If cached results cannot be found, continue as if `use_cached` was #' `FALSE`. - #' @param save If `TRUE`, save outputs to disk. + #' @param save Logical. If `TRUE`, save outputs to disk. + #' @param record_time Logical. If `TRUE`, record the amount of time taken to + #' visualize each `Visualizer`. #' @param verbose Level of verbosity. Default is 1, which prints out messages #' after major checkpoints in the experiment. If 2, prints additional #' debugging information for warnings and messages from user-defined functions @@ -1414,7 +1626,9 @@ Experiment <- R6::R6Class( #' #' @return A list of visualizations, one for each `Visualizer`. visualize = function(fit_results, eval_results = NULL, - use_cached = FALSE, save = FALSE, verbose = 1, ...) { + use_cached = FALSE, save = FALSE, + record_time = FALSE, + verbose = 1, ...) { visualizer_list <- private$.get_obj_list("visualizer") visualizer_names <- names(visualizer_list) @@ -1426,6 +1640,13 @@ Experiment <- R6::R6Class( return(NULL) } + save_in_bulk <- private$.save_in_bulk[["viz"]] + if (!private$.has_vary_across()) { + save_dir <- private$.save_dir + } else { + save_dir <- private$.get_vary_across_dir() + } + n_reps <- max(as.numeric(fit_results$.rep)) private$.update_fit_params() if (use_cached) { @@ -1433,12 +1654,18 @@ Experiment <- R6::R6Class( is_cached <- private$.is_fully_cached(cached_params, "viz", n_reps) if (isTRUE(is_cached)) { results <- private$.get_cached_results("viz", verbose = verbose) - results <- results[names(private$.get_obj_list("visualizer"))] - if (save) { - if (!setequal(names(private$.get_obj_list("visualizer")), - names(results))) { + cached_viz_names <- names(results) + results <- results[visualizer_names] + if (save && save_in_bulk) { + if (!setequal(visualizer_names, cached_viz_names)) { private$.save_results(results, "viz", n_reps, verbose) } + } else if (save && !save_in_bulk) { + for (fname in list.files(file.path(save_dir, "viz_results"), pattern = ".rds")) { + if (!(basename(fname) %in% paste0(cached_viz_names, ".rds"))) { + file.remove(file.path(save_dir, "eval_results", fname)) + } + } } if (verbose >= 1) { inform("==============================") @@ -1452,6 +1679,15 @@ Experiment <- R6::R6Class( } } + cached_viz_names <- setdiff(visualizer_names, names(visualizer_list)) + if (save && !save_in_bulk) { + for (fname in list.files(file.path(save_dir, "viz_results"), pattern = ".rds")) { + if (!(basename(fname) %in% paste0(cached_viz_names, ".rds"))) { + file.remove(file.path(save_dir, "viz_results", fname)) + } + } + } + if (verbose >= 1) { inform(sprintf("Visualizing %s...", self$name)) start_time <- Sys.time() @@ -1459,30 +1695,60 @@ Experiment <- R6::R6Class( viz_results <- purrr::map2( names(visualizer_list), visualizer_list, function(name, visualizer) { - do_call_handler( + viz_start_time <- Sys.time() + viz_result <- do_call_handler( name, visualizer$visualize, list(fit_results = fit_results, eval_results = eval_results, vary_params = private$.get_vary_params()), verbose ) + viz_time <- difftime(Sys.time(), viz_start_time, units = "mins") + if (record_time) { + attr(viz_result, ".time_taken") <- viz_time + } + if (save && !save_in_bulk) { + private$.save_result(viz_result, "viz", name) + return(NULL) + } else { + return(viz_result) + } } ) - names(viz_results) <- names(visualizer_list) - if (use_cached && !setequal(names(visualizer_list), visualizer_names)) { - viz_results_cached <- private$.get_cached_results("viz", - verbose = verbose) - if (verbose >= 1) { - inform("Appending cached results to the new visualization results...") + if (verbose >= 1) { + if (length(cached_viz_names) > 0) { + inform( + sprintf( + "Using cached eval results for: %s", + paste0(cached_viz_names, collapse = ", ") + ) + ) + } + } + if (save && !save_in_bulk) { + viz_results <- private$.get_cached_results("viz", verbose = 0)[ + visualizer_names + ] + cached_params <- private$.update_cache("viz", n_reps = n_reps) + saveRDS(cached_params, + file.path(save_dir, "experiment_cached_params.rds")) + saveRDS(self, file.path(save_dir, "experiment.rds")) + } else { + names(viz_results) <- names(visualizer_list) + if (use_cached && !setequal(names(visualizer_list), visualizer_names)) { + viz_results_cached <- private$.get_cached_results("viz", + verbose = verbose) + if (verbose >= 1) { + inform("Appending cached results to the new visualization results...") + } + viz_results <- c(viz_results, viz_results_cached)[visualizer_names] } - viz_results <- c(viz_results, - viz_results_cached)[visualizer_names] } if (verbose >= 1) { inform(sprintf("Visualization completed | time taken: %f minutes", difftime(Sys.time(), start_time, units = "mins"))) } - if (save) { + if (save && save_in_bulk) { private$.save_results(viz_results, "viz", n_reps, verbose) } if (verbose >= 1) { @@ -1903,8 +2169,18 @@ Experiment <- R6::R6Class( #' `results_type = "viz"`, and the experiment parameters used in #' the cache if `results_type = "experiment_cached_params"`. get_cached_results = function(results_type, verbose = 0) { - return(private$.get_cached_results(results_type = results_type, - verbose = verbose)) + cached_results <- private$.get_cached_results( + results_type = results_type, verbose = verbose + ) + if (results_type == "fit") { + cached_results <- simplify_tibble(cached_results) |> + dplyr::select( + .rep, .dgp_name, .method_name, private$.get_vary_params(), + tidyselect::everything() + ) |> + dplyr::arrange(as.numeric(.rep), .dgp_name, .method_name) + } + return(cached_results) }, #' @description Set R Markdown options for `Evaluator` or `Visualizer` diff --git a/R/globals.R b/R/globals.R index 305d10c..6c7d670 100644 --- a/R/globals.R +++ b/R/globals.R @@ -2,7 +2,10 @@ utils::globalVariables( c( "where", ":=", "!!", "verbose", "dgp_list", "method_list", - "new_fit_params", "dgp_params_list", "method_params_list", - "duplicate_param_names", "do_call_wrapper" + "new_fit_params", "cached_fit_params", + "dgp_params_list", "method_params_list", + "duplicate_param_names", "record_time", + "save_in_bulk", "save_per_rep", "use_cached", "save_dir", + "do_call_wrapper" ) ) diff --git a/R/utils.R b/R/utils.R index ca4c95f..a81390e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -99,17 +99,22 @@ list_to_tibble <- function(lst) { #' the list is a scalar value. #' #' @param tbl `tibble::tibble` to simplify. +#' @param cols Character vector of column names to simplify. If NULL (default), +#' all columns are eligible to be simplified. #' @param empty_as_na If TRUE (default), 0-length values will be treated as NA. #' #' @return A tibble that has been "simplified". -#' @keywords internal -simplify_tibble <- function(tbl, empty_as_na = TRUE) { +#' @export +simplify_tibble <- function(tbl, cols = NULL, empty_as_na = TRUE) { + if (is.null(cols)) { + cols <- colnames(tbl) + } tbl_list <- purrr::imap( tbl, function(col, col_name) { - if (!is.list(col)) { + if (!is.list(col) || !(col_name %in% cols)) { # only list cols need simplification tbl_col <- tibble::tibble(col) colnames(tbl_col) <- col_name @@ -330,7 +335,7 @@ compare_tibble_rows <- function(x, y, op = c("equal", "contained_in")) { return(FALSE) } } - duplicated_rows <- dplyr::bind_rows(x, y) |> + duplicated_rows <- rbind(x, y) |> duplicated(fromLast = TRUE) return(all(duplicated_rows[1:nrow(x)])) } @@ -357,6 +362,9 @@ get_matching_rows <- function(id, x) { if ((!tibble::is_tibble(id)) || (!tibble::is_tibble(x))) { abort("id and x must be tibbles.") } + if (nrow(id) == 0) { + return(tibble::tibble()) + } id_cols <- colnames(id) id_coltypes <- purrr::map_chr(id, class) if (anyDuplicated(id)) { diff --git a/_pkgdown.yml b/_pkgdown.yml index 3b95587..0b730b3 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -137,3 +137,4 @@ reference: - list_col_to_chr - load_all - add_na_counts + - simplify_tibble diff --git a/man/Experiment.Rd b/man/Experiment.Rd index f396b4d..8685149 100644 --- a/man/Experiment.Rd +++ b/man/Experiment.Rd @@ -90,6 +90,7 @@ Initialize a new \code{Experiment} object. future.packages = NULL, clone_from = NULL, save_dir = NULL, + save_in_bulk = TRUE, ... )}\if{html}{\out{}} } @@ -128,6 +129,15 @@ in a directory called "results" with a sub-directory named after \code{Experiment$name} when using \code{\link[=run_experiment]{run_experiment()}} or \code{\link[=fit_experiment]{fit_experiment()}} with \code{save=TRUE}.} +\item{\code{save_in_bulk}}{A logical, indicating whether or not to save the +fit, evaluator, and visualizer outputs, each as a single bulk .rds file +(i.e., as \code{fit_results.rds}, \code{eval_results.rds}, \code{viz_results.rds}). +Default is \code{TRUE}. If \code{FALSE}, each fit replicate is saved as a +separate .rds file while each evaluator/visualizer is saved as a +separate .rds file. One can alternatively specify a character vector +with some subset of "fit", "eval", and/or "viz", indicating the +elements to save in bulk to disk.} + \item{\code{...}}{Not used.} } \if{html}{\out{}} @@ -152,6 +162,7 @@ and visualizing). use_cached = FALSE, return_all_cached_reps = FALSE, save = FALSE, + record_time = FALSE, checkpoint_n_reps = 0, verbose = 1, ... @@ -192,7 +203,16 @@ cached replicates from the (\code{DGP}, \code{Method}) combinations in the only the \code{n_reps} replicates are used when evaluating and visualizing the \code{Experiment}.} -\item{\code{save}}{If \code{TRUE}, save outputs to disk.} +\item{\code{save}}{A logical, indicating whether or not to save the fit, +evaluator, and visualizer outputs to disk. Alternatively, one can +specify a character vector with some subset of "fit", "eval", and/or +"viz", indicating the elements to save to disk.} + +\item{\code{record_time}}{A logical, indicating whether or not to record the +time taken to run each \code{Method}, \code{Evaluator}, and \code{Visualizer} in the +\code{Experiment}. Alternatively, one can specify a character vector with +some subset of "fit", "eval", and/or "viz", indicating the elements for +which to record the time taken.} \item{\code{checkpoint_n_reps}}{The number of experiment replicates to compute before saving results to disk. If 0 (the default), no checkpoints are @@ -214,7 +234,7 @@ following entries: \describe{ \item{fit_results}{A tibble containing results from the \code{fit} method. In addition to results columns, has columns named '.rep', '.dgp_name', -'.method_name', and the \code{vary_across} parameter names if applicable.} +'.method_name', '.time_taken', and the \code{vary_across} parameter names if applicable.} \item{eval_results}{A list of tibbles containing results from the \code{evaluate} method, which evaluates each \code{Evaluator} in the \code{Experiment}. Length of list is equivalent to the number of @@ -275,6 +295,7 @@ Fit \code{Methods} in the \code{Experiment} across all use_cached = FALSE, return_all_cached_reps = FALSE, save = FALSE, + record_time = FALSE, checkpoint_n_reps = 0, verbose = 1, ... @@ -313,7 +334,10 @@ returns fit results for the requested \code{n_reps} plus any additional cached replicates from the (\code{DGP}, \code{Method}) combinations in the \code{Experiment}.} -\item{\code{save}}{If \code{TRUE}, save outputs to disk.} +\item{\code{save}}{Logical. If \code{TRUE}, save outputs to disk.} + +\item{\code{record_time}}{Logical. If \code{TRUE}, record the amount of time taken to +fit each \code{Method} per replicate.} \item{\code{checkpoint_n_reps}}{The number of experiment replicates to compute before saving results to disk. If 0 (the default), no checkpoints are @@ -334,7 +358,8 @@ functions. See \code{\link[future.apply:future_lapply]{future.apply::future_lapp \subsection{Returns}{ A tibble containing the results from fitting all \code{Methods} across all \code{DGPs} for \code{n_reps} repetitions. In addition to -results columns, has columns named '.rep', '.dgp_name', '.method_name', and the +results columns, has columns named '.rep', '.dgp_name', '.method_name', +'.time_taken' (if \code{record_time = TRUE}), and the \code{vary_across} parameter names if applicable. } } @@ -349,6 +374,7 @@ Evaluate the performance of method(s) across all fit_results, use_cached = FALSE, save = FALSE, + record_time = FALSE, verbose = 1, ... )}\if{html}{\out{}} @@ -363,7 +389,10 @@ Evaluate the performance of method(s) across all results. If cached results cannot be found, continue as if \code{use_cached} was \code{FALSE}.} -\item{\code{save}}{If \code{TRUE}, save outputs to disk.} +\item{\code{save}}{Logical. If \code{TRUE}, save outputs to disk.} + +\item{\code{record_time}}{Logical. If \code{TRUE}, record the amount of time taken to +evaluate each \code{Evaluator}.} \item{\code{verbose}}{Level of verbosity. Default is 1, which prints out messages after major checkpoints in the experiment. If 2, prints additional @@ -393,6 +422,7 @@ visualization results. eval_results = NULL, use_cached = FALSE, save = FALSE, + record_time = FALSE, verbose = 1, ... )}\if{html}{\out{}} @@ -410,7 +440,10 @@ visualization results. results. If cached results cannot be found, continue as if \code{use_cached} was \code{FALSE}.} -\item{\code{save}}{If \code{TRUE}, save outputs to disk.} +\item{\code{save}}{Logical. If \code{TRUE}, save outputs to disk.} + +\item{\code{record_time}}{Logical. If \code{TRUE}, record the amount of time taken to +visualize each \code{Visualizer}.} \item{\code{verbose}}{Level of verbosity. Default is 1, which prints out messages after major checkpoints in the experiment. If 2, prints additional diff --git a/man/compute_dgp.Rd b/man/compute_dgp.Rd index 4d1144b..412cccc 100644 --- a/man/compute_dgp.Rd +++ b/man/compute_dgp.Rd @@ -4,7 +4,7 @@ \alias{compute_dgp} \title{Distribute simulation computation by DGPs.} \usage{ -compute_dgp(n_reps, future.globals, future.packages, future.seed, ...) +compute_dgp(reps, future.globals, future.packages, future.seed, ...) } \description{ Distribute simulation computation by DGPs. diff --git a/man/compute_dgp_method.Rd b/man/compute_dgp_method.Rd index 0264644..fe37374 100644 --- a/man/compute_dgp_method.Rd +++ b/man/compute_dgp_method.Rd @@ -4,7 +4,7 @@ \alias{compute_dgp_method} \title{Doubly nested distributed simulation computation nested by DGPs and Methods.} \usage{ -compute_dgp_method(n_reps, future.globals, future.packages, future.seed, ...) +compute_dgp_method(reps, future.globals, future.packages, future.seed, ...) } \description{ Doubly nested distributed simulation computation nested by DGPs and Methods. diff --git a/man/compute_dgp_method_reps.Rd b/man/compute_dgp_method_reps.Rd index ca24967..e15a33a 100644 --- a/man/compute_dgp_method_reps.Rd +++ b/man/compute_dgp_method_reps.Rd @@ -6,7 +6,7 @@ reps.} \usage{ compute_dgp_method_reps( - n_reps, + reps, future.globals, future.packages, future.seed, diff --git a/man/compute_dgp_rep.Rd b/man/compute_dgp_rep.Rd index bd1c5b4..80b29e2 100644 --- a/man/compute_dgp_rep.Rd +++ b/man/compute_dgp_rep.Rd @@ -4,7 +4,7 @@ \alias{compute_dgp_rep} \title{Doubly nested distributed simulation computation nested by DGPs and reps.} \usage{ -compute_dgp_rep(n_reps, future.globals, future.packages, future.seed, ...) +compute_dgp_rep(reps, future.globals, future.packages, future.seed, ...) } \description{ Doubly nested distributed simulation computation nested by DGPs and reps. diff --git a/man/compute_method.Rd b/man/compute_method.Rd index 8fbbb55..2ac759f 100644 --- a/man/compute_method.Rd +++ b/man/compute_method.Rd @@ -4,7 +4,7 @@ \alias{compute_method} \title{Distribute simulation computation by Methods.} \usage{ -compute_method(n_reps, future.globals, future.packages, future.seed, ...) +compute_method(reps, future.globals, future.packages, future.seed, ...) } \description{ Distribute simulation computation by Methods. diff --git a/man/compute_method_rep.Rd b/man/compute_method_rep.Rd index 980b952..2c76a34 100644 --- a/man/compute_method_rep.Rd +++ b/man/compute_method_rep.Rd @@ -4,7 +4,7 @@ \alias{compute_method_rep} \title{Doubly nested distributed simulation computation nested by Methods and reps.} \usage{ -compute_method_rep(n_reps, future.globals, future.packages, future.seed, ...) +compute_method_rep(reps, future.globals, future.packages, future.seed, ...) } \description{ Doubly nested distributed simulation computation nested by Methods and reps. diff --git a/man/compute_rep.Rd b/man/compute_rep.Rd index 252855f..7063332 100644 --- a/man/compute_rep.Rd +++ b/man/compute_rep.Rd @@ -4,7 +4,7 @@ \alias{compute_rep} \title{Distribute simulation computation by replicates.} \usage{ -compute_rep(n_reps, future.globals, future.packages, future.seed, ...) +compute_rep(reps, future.globals, future.packages, future.seed, ...) } \description{ Distribute simulation computation by replicates. diff --git a/man/simplify_tibble.Rd b/man/simplify_tibble.Rd index e361a69..2af2058 100644 --- a/man/simplify_tibble.Rd +++ b/man/simplify_tibble.Rd @@ -4,11 +4,14 @@ \alias{simplify_tibble} \title{Simplify tibble.} \usage{ -simplify_tibble(tbl, empty_as_na = TRUE) +simplify_tibble(tbl, cols = NULL, empty_as_na = TRUE) } \arguments{ \item{tbl}{\code{tibble::tibble} to simplify.} +\item{cols}{Character vector of column names to simplify. If NULL (default), +all columns are eligible to be simplified.} + \item{empty_as_na}{If TRUE (default), 0-length values will be treated as NA.} } \value{ @@ -18,4 +21,3 @@ A tibble that has been "simplified". Simplify or unlist list columns in tibble if each element in the list is a scalar value. } -\keyword{internal} diff --git a/tests/testthat/setup-custom-expectations.R b/tests/testthat/setup-custom-expectations.R index e583ba9..d3d1808 100644 --- a/tests/testthat/setup-custom-expectations.R +++ b/tests/testthat/setup-custom-expectations.R @@ -198,7 +198,8 @@ expect_partial_results_and_errors <- function(err) { c(".rep", ".dgp_name", ".method_name") %in% names(err$partial_results) )) - expect_named(err$errors, c(".dgp", + expect_named(err$errors, c(".rep", + ".dgp", ".dgp_name", ".dgp_params", ".method", diff --git a/tests/testthat/test-caching.R b/tests/testthat/test-caching.R index 870c772..e3bb907 100644 --- a/tests/testthat/test-caching.R +++ b/tests/testthat/test-caching.R @@ -34,6 +34,13 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { add_method(method1, name = "Method1") |> add_evaluator(fit_results_eval, name = "Evaluator1") |> add_visualizer(fit_plot, name = "Visualizer1") + exp <- create_experiment( + name = "test-cache-save-per-rep", save_in_bulk = FALSE + ) |> + add_dgp(dgp1, name = "DGP1") |> + add_method(method1, name = "Method1") |> + add_evaluator(fit_results_eval, name = "Evaluator1") |> + add_visualizer(fit_plot, name = "Visualizer1") # remove cache if (dir.exists(file.path("results", "test-cache"))) { @@ -42,53 +49,101 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { file.remove(fname) } } + if (dir.exists(file.path("results", "test-cache-save-per-rep"))) { + for (fname in list.files(file.path("results", "test-cache-save-per-rep"), + recursive = TRUE, full.names = TRUE)) { + file.remove(fname) + } + } - # basic cache usage verbose <- 0 - results0 <- experiment$run(n_reps = 10, use_cached = TRUE, save = FALSE, + n_reps <- 2 + + # basic cache usage + results0 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, verbose = verbose) - results1 <- experiment$run(n_reps = 10, save = TRUE, verbose = verbose) + results1 <- experiment$run(n_reps = n_reps, save = TRUE, verbose = verbose) expect_false(identical(results0$fit_results, results1$fit_results)) - results2 <- experiment$run(n_reps = 10, use_cached = TRUE, verbose = verbose) + results2 <- experiment$run(n_reps = n_reps, use_cached = TRUE, verbose = verbose) expect_equal(results1, results2) + res0 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, + verbose = verbose) + res1 <- exp$run(n_reps = n_reps, save = TRUE, verbose = verbose) + expect_false(identical(res0$fit_results, res1$fit_results)) + res2 <- exp$run(n_reps = n_reps, use_cached = TRUE, verbose = verbose) + expect_equal(res1, res2) + # caching when adding objects experiment |> add_dgp(dgp2, "DGP2") - results3 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results3 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results3$fit_results), 20) + expect_equal(nrow(results3$fit_results), 2 * n_reps) expect_equal(results2$fit_results, results3$fit_results |> dplyr::filter(.dgp_name == "DGP1")) experiment |> add_method(method2, "Method2") - results4 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results4 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results4$fit_results), 40) + expect_equal(nrow(results4$fit_results), 2 * 2 * n_reps) experiment |> add_evaluator(vary_params_eval, "Eval2") - results5 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results5 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results4$fit_results, results5$fit_results) expect_equal(results4$eval_results$Evaluator1, results5$eval_results$Evaluator1) experiment |> add_visualizer(eval_plot, "Plot2") - results6 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results6 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results4$fit_results, results6$fit_results) expect_equal(results5$eval_results, results6$eval_results) expect_equal(results5$viz_results$Visualizer1, results6$viz_results$Visualizer1) - results7 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results7 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results6, results7) + exp |> add_dgp(dgp2, "DGP2") + res3 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res3$fit_results), 2 * n_reps) + expect_equal(res2$fit_results, + res3$fit_results |> dplyr::filter(.dgp_name == "DGP1")) + exp |> add_method(method2, "Method2") + res4 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res4$fit_results), 2 * 2 * n_reps) + exp |> add_evaluator(vary_params_eval, "Eval2") + res5 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res4$fit_results, res5$fit_results) + expect_equal(res4$eval_results$Evaluator1, + res5$eval_results$Evaluator1) + exp |> add_visualizer(eval_plot, "Plot2") + res6 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res4$fit_results, res6$fit_results) + expect_equal(res5$eval_results, res6$eval_results) + expect_equal(res5$viz_results$Visualizer1, + res6$viz_results$Visualizer1) + res7 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res6, res7) + # caching when update objects does not change original object experiment |> update_dgp(dgp2, "DGP2") - results8 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results8 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) + expect_equal(results8, results7) + + exp |> update_dgp(dgp2, "DGP2") + res8 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res8, res7) # caching when updating objects that actually change experiment |> update_dgp(dgp1, "DGP2") fit_cols <- colnames(results7$fit_results) - results8 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results8 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) results8$fit_results <- results8$fit_results |> dplyr::select({{fit_cols}}) expect_equal(nrow(results7$fit_results), nrow(results8$fit_results)) @@ -98,82 +153,195 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_false(identical(results8$eval_results, results7$eval_results)) expect_false(identical(results8$viz_results, results7$viz_results)) experiment |> update_method(method1, "Method2") - results9 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results9 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(nrow(results7$fit_results), nrow(results9$fit_results)) expect_false(identical(results8$eval_results, results7$eval_results)) expect_false(identical(results8$viz_results, results7$viz_results)) experiment |> update_evaluator(fit_results_eval, "Eval2") - results10 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results10 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results10$fit_results, results9$fit_results) expect_equal(length(results10$eval_results), 2) experiment |> update_visualizer(fit_plot, "Plot2") - results11 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results11 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results11$fit_results, results10$fit_results) expect_equal(results11$eval_results, results10$eval_results) expect_equal(length(results11$viz_results), 2) + exp |> update_dgp(dgp1, "DGP2") + fit_cols <- colnames(res7$fit_results) + res8 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + res8$fit_results <- res8$fit_results |> dplyr::select({{fit_cols}}) + expect_equal(nrow(res7$fit_results), nrow(res8$fit_results)) + expect_false(identical(res7$fit_results, res8$fit_results)) + expect_equal(res8$fit_results |> dplyr::filter(.dgp_name == "DGP1"), + res7$fit_results |> dplyr::filter(.dgp_name == "DGP1")) + expect_false(identical(res8$eval_results, res7$eval_results)) + expect_false(identical(res8$viz_results, res7$viz_results)) + exp |> update_method(method1, "Method2") + res9 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res7$fit_results), nrow(res9$fit_results)) + expect_false(identical(res8$eval_results, res7$eval_results)) + expect_false(identical(res8$viz_results, res7$viz_results)) + exp |> update_evaluator(fit_results_eval, "Eval2") + res10 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res10$fit_results, res9$fit_results) + expect_equal(length(res10$eval_results), 2) + exp |> update_visualizer(fit_plot, "Plot2") + res11 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res11$fit_results, res10$fit_results) + expect_equal(res11$eval_results, res10$eval_results) + expect_equal(length(res11$viz_results), 2) + # caching when removing objects experiment |> remove_dgp("DGP2") - results12 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, - verbose = verbose) + results12 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) expect_equal(results12$fit_results, results11$fit_results |> dplyr::filter(.dgp_name == "DGP1")) expect_false(identical(results12$eval_results, results11$eval_results)) expect_false(identical(results12$viz_results, results11$viz_results)) experiment |> remove_method("Method2") - results13 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, - verbose = verbose) + results13 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) expect_equal(results13$fit_results, results12$fit_results |> dplyr::filter(.method_name == "Method1")) expect_false(identical(results13$eval_results, results12$eval_results)) expect_false(identical(results13$viz_results, results12$viz_results)) experiment |> remove_evaluator("Eval2") - results14 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, - verbose = verbose) + results14 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) expect_equal(results14$fit_results, results13$fit_results) expect_equal(names(results14$eval_results), "Evaluator1") expect_equal(results14$eval_results, results13$eval_results[1]) experiment |> remove_visualizer("Plot2") - results15 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, - verbose = verbose) + results15 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) expect_equal(results15$fit_results, results14$fit_results) expect_equal(results15$eval_results, results14$eval_results) expect_equal(names(results15$viz_results), "Visualizer1") expect_equal(results15$viz_results, results14$viz_results[1]) + exp |> remove_dgp("DGP2") + res12 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res12$fit_results, + res11$fit_results |> dplyr::filter(.dgp_name == "DGP1")) + expect_false(identical(res12$eval_results, res11$eval_results)) + expect_false(identical(res12$viz_results, res11$viz_results)) + exp |> remove_method("Method2") + res13 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res13$fit_results, + res12$fit_results |> dplyr::filter(.method_name == "Method1")) + expect_false(identical(res13$eval_results, res12$eval_results)) + expect_false(identical(res13$viz_results, res12$viz_results)) + exp |> remove_evaluator("Eval2") + res14 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res14$fit_results, res13$fit_results) + expect_equal(names(res14$eval_results), "Evaluator1") + expect_equal(res14$eval_results, res13$eval_results[1]) + exp |> remove_visualizer("Plot2") + res15 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res15$fit_results, res14$fit_results) + expect_equal(res15$eval_results, res14$eval_results) + expect_equal(names(res15$viz_results), "Visualizer1") + expect_equal(res15$viz_results, res14$viz_results[1]) + # caching when vary across experiment |> add_vary_across(.dgp = "DGP1", x = c(0, 1)) - results1 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results1 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results1$fit_results), 10 * 2) + expect_equal(nrow(results1$fit_results), 2 * n_reps) experiment |> add_vary_across(.method = "Method1", y = c(0, 1)) - results2 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results2 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results2$fit_results), 10 * 2 * 2) + expect_equal(nrow(results2$fit_results), 2 * 2 * n_reps) experiment |> remove_vary_across(method = "Method1") - results3 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results3 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results3$fit_results), 10 * 2) + expect_equal(nrow(results3$fit_results), 2 * n_reps) expect_true(identical(results1$fit_results, results3$fit_results)) experiment |> update_vary_across(.dgp = "DGP1", x = c(0, 2)) - results4 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results4 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results3$fit_results |> dplyr::filter(x == 0), results4$fit_results |> dplyr::filter(x == 0)) experiment |> update_vary_across(.dgp = "DGP1", x = list(0, 2, 4)) - results5 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results5 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) expect_equal(results4$fit_results |> dplyr::filter(x %in% c(0, 2)), results5$fit_results |> dplyr::filter(x %in% c(0, 2))) experiment |> add_vary_across(.method = "Method1", y = list("a", "b")) - results6 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results6 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(results6$fit_results), 3 * 2 * n_reps) + experiment |> update_vary_across(.method = "Method1", y = list("a", 1)) + results6b <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal( + results6$fit_results |> + dplyr::filter(y %in% c("a")), + results6b$fit_results |> + dplyr::mutate( + y = purrr::map_chr(y, as.character) + ) |> + dplyr::filter(y %in% c("a")) + ) + expect_equal(nrow(results6b$fit_results), 3 * 2 * n_reps) + + exp |> add_vary_across(.dgp = "DGP1", x = c(0, 1)) + res1 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(nrow(results6$fit_results), 10 * 3 * 2) + expect_equal(nrow(res1$fit_results), 2 * n_reps) + exp |> add_vary_across(.method = "Method1", y = c(0, 1)) + res2 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res2$fit_results), 2 * 2 * n_reps) + exp |> remove_vary_across(method = "Method1") + res3 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res3$fit_results), 2 * n_reps) + expect_true(identical(res1$fit_results, res3$fit_results)) + exp |> update_vary_across(.dgp = "DGP1", x = c(0, 2)) + res4 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res3$fit_results |> dplyr::filter(x == 0), + res4$fit_results |> dplyr::filter(x == 0)) + exp |> update_vary_across(.dgp = "DGP1", x = list(0, 2, 4)) + res5 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res4$fit_results |> dplyr::filter(x %in% c(0, 2)), + res5$fit_results |> dplyr::filter(x %in% c(0, 2))) + exp |> add_vary_across(.method = "Method1", y = list("a", "b")) + res6 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res6$fit_results), 3 * 2 * n_reps) + exp |> update_vary_across(.method = "Method1", y = list("a", 1)) + res6b <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal( + res6$fit_results |> + dplyr::filter(y %in% c("a")), + res6b$fit_results |> + dplyr::mutate( + y = purrr::map_chr(y, as.character) + ) |> + dplyr::filter(y %in% c("a")) + ) + expect_equal(nrow(res6b$fit_results), 3 * 2 * n_reps) # check caching when n changes + results6b <- experiment$run(n_reps = 10, use_cached = FALSE, save = TRUE, + verbose = verbose) results7 <- experiment$run(n_reps = 4, use_cached = TRUE, save = TRUE, verbose = verbose) extra_reps_fpath <- file.path( @@ -181,25 +349,40 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { "fit_results_extra_cached_reps.rds" ) extra_fit_results7 <- readRDS(extra_reps_fpath) - expect_equal(nrow(results7$fit_results),4 * 3 * 2) + expect_equal(nrow(results7$fit_results), 4 * 3 * 2) expect_equal(nrow(extra_fit_results7), 6 * 3 * 2) expect_equal(results7$fit_results, - results6$fit_results |> dplyr::filter(as.numeric(.rep) <= 4)) - expect_equal(extra_fit_results7, - results6$fit_results |> dplyr::filter(as.numeric(.rep) > 4)) + results6b$fit_results |> dplyr::filter(as.numeric(.rep) <= 4)) + expect_equal(simplify_tibble(extra_fit_results7), + results6b$fit_results |> dplyr::filter(as.numeric(.rep) > 4)) results8 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, verbose = verbose) - fit_results8 <- readRDS( - file.path("results", "test-cache", "DGP1-Method1", "Varying x-y", - "fit_results.rds") - ) + fit_results8 <- experiment$get_cached_results("fit") expect_equal(results8$fit_results, fit_results8) expect_equal(nrow(results8$fit_results), 10 * 3 * 2) expect_false(file.exists(extra_reps_fpath)) expect_true(identical(results7$fit_results, results8$fit_results |> dplyr::filter(as.numeric(.rep) <= 4))) - expect_true(identical(results6$fit_results, results8$fit_results)) + expect_true(identical(results6b$fit_results, results8$fit_results)) + + res6b <- exp$run(n_reps = 10, use_cached = FALSE, save = TRUE, + verbose = verbose) + extra_rep_fpath <- file.path( + "results", "test-cache-save-per-rep", "DGP1-Method1", "Varying x-y", + "fit_results", + "fit_result6.rds" + ) + extra_fit_res7a <- readRDS(extra_rep_fpath) + res7 <- exp$run(n_reps = 4, use_cached = TRUE, save = TRUE, + verbose = verbose) + extra_fit_res7b <- readRDS(extra_rep_fpath) + expect_equal(extra_fit_res7a, extra_fit_res7b) + expect_equal(nrow(res7$fit_results), 4 * 3 * 2) + res8 <- exp$run(n_reps = 10, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(nrow(res8$fit_results), 10 * 3 * 2) + expect_true(identical(res6b$fit_results, res8$fit_results)) # check caching when n changes and experiment changes experiment |> add_dgp(dgp2, "DGP2") @@ -208,10 +391,21 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_true(all(results9$fit_results$.rep %in% as.character(1:5))) expect_equal(results9$fit_results |> dplyr::filter(.dgp_name != "DGP2"), results8$fit_results |> dplyr::filter(as.numeric(.rep) <= 5)) - expect_equal(readRDS(extra_reps_fpath), + expect_equal(simplify_tibble(readRDS(extra_reps_fpath)), results8$fit_results |> dplyr::filter(as.numeric(.rep) > 5)) expect_equal(nrow(results9$fit_results), 5 * 3 * 2 + 5 * 2) + exp |> add_dgp(dgp2, "DGP2") + extra_fit_res10a <- readRDS(extra_rep_fpath) + res9 <- exp$run(n_reps = 5, use_cached = TRUE, save = TRUE, + verbose = verbose) + extra_fit_res10b <- readRDS(extra_rep_fpath) + expect_true(all(res9$fit_results$.rep %in% as.character(1:5))) + expect_equal(res9$fit_results |> dplyr::filter(.dgp_name != "DGP2"), + res8$fit_results |> dplyr::filter(as.numeric(.rep) <= 5)) + expect_equal(extra_fit_res10a, extra_fit_res10b) + expect_equal(nrow(res9$fit_results), 5 * 3 * 2 + 5 * 2) + # check return_all_cached_reps works properly results10 <- experiment$run(n_reps = 5, use_cached = FALSE, return_all_cached_reps = TRUE, verbose = verbose) @@ -223,56 +417,80 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_equal(results11$viz_results, results9$viz_results) experiment |> remove_dgp("DGP2") + res10 <- exp$run(n_reps = 5, use_cached = FALSE, + return_all_cached_reps = TRUE, verbose = verbose) + expect_equal(nrow(res9$fit_results), nrow(res10$fit_results)) + res11 <- exp$run(n_reps = 5, use_cached = TRUE, + return_all_cached_reps = TRUE, verbose = verbose) + expect_equal(nrow(res11$fit_results), 10 * 3 * 2 + 5 * 2) + expect_equal(res11$eval_results, res9$eval_results) + expect_equal(res11$viz_results, res9$viz_results) + exp |> remove_dgp("DGP2") + # check when add multiple new objects to experiment experiment |> add_dgp(dgp2, "DGP3") - - parallel_strategies <- list( - "reps", "dgps", "methods", c("reps", "dgps"), c("reps", "methods"), - c("dgps", "methods"), c("reps", "dgps", "methods") - ) - experiment |> update_dgp(dgp2, "DGP3") |> update_vary_across(.method = method1, y = c("a", "b")) - results9 <- experiment$run( - n_reps = 10, use_cached = TRUE, save = TRUE, verbose = verbose + n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose ) + expect_equal(nrow(results9$fit_results), 4 * 2 * n_reps) - expect_equal(nrow(results9$fit_results), 10 * 4 * 2) + exp |> add_dgp(dgp2, "DGP3") + exp |> + update_dgp(dgp2, "DGP3") |> + update_vary_across(.method = method1, y = c("a", "b")) + res9 <- exp$run( + n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose + ) + expect_equal(nrow(res9$fit_results), 4 * 2 * n_reps) # check clear cache - results10 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results10 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_equal(results9, results10) - experiment |> clear_cache() - - results11 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, + results11 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, verbose = verbose) - expect_false(identical(results11$fit_results, results10$fit_results)) + res10 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_equal(res9, res10) + exp |> clear_cache() + res11 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = TRUE, + verbose = verbose) + expect_false(identical(res11$fit_results, res10$fit_results)) + # check caching works when not saving - results12 <- experiment$run(n_reps = 4, use_cached = TRUE, save = FALSE, + results12 <- experiment$run(n_reps = 1, use_cached = TRUE, save = FALSE, verbose = verbose) expect_true(identical(results11$fit_results |> - dplyr::filter(as.numeric(.rep) <= 4), + dplyr::filter(as.numeric(.rep) <= 1), results12$fit_results)) - results13 <- experiment$run(n_reps = 10, use_cached = TRUE, save = FALSE, + results13 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, verbose = verbose) expect_true(identical(results13, results11)) + res12 <- exp$run(n_reps = 1, use_cached = TRUE, save = FALSE, + verbose = verbose) + expect_true(identical(res11$fit_results |> + dplyr::filter(as.numeric(.rep) <= 1), + res12$fit_results)) + res13 <- exp$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, + verbose = verbose) + expect_true(identical(res13, res11)) + # check running fit, evaluate, and visualize separately - fit_results <- experiment$fit(n_reps = 10, use_cached = TRUE, save = TRUE, + fit_results <- experiment$fit(n_reps = 4, use_cached = TRUE, save = TRUE, verbose = verbose) eval_results <- experiment$evaluate(fit_results, use_cached = TRUE, save = TRUE, verbose = verbose) viz_results <- experiment$visualize(fit_results, eval_results, use_cached = TRUE, save = TRUE, verbose = verbose) - fit_results <- experiment$fit(n_reps = 4, use_cached = TRUE, save = FALSE, + fit_results <- experiment$fit(n_reps = 2, use_cached = TRUE, save = FALSE, verbose = verbose) eval_results <- experiment$evaluate(fit_results, use_cached = TRUE, save = FALSE, verbose = verbose) @@ -280,8 +498,23 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { use_cached = TRUE, save = FALSE, verbose = verbose) + fit_res <- exp$fit(n_reps = 4, use_cached = TRUE, save = TRUE, + verbose = verbose) + eval_res <- exp$evaluate(fit_res, use_cached = TRUE, + save = TRUE, verbose = verbose) + viz_res <- exp$visualize(fit_res, eval_res, + use_cached = TRUE, save = TRUE, + verbose = verbose) + fit_res <- exp$fit(n_reps = 2, use_cached = TRUE, save = FALSE, + verbose = verbose) + eval_res <- exp$evaluate(fit_res, use_cached = TRUE, + save = FALSE, verbose = verbose) + viz_res <- exp$visualize(fit_res, eval_res, + use_cached = TRUE, save = FALSE, + verbose = verbose) + # check with non-standard combos of save = T and F are used - fit_results2 <- experiment$fit(n_reps = 12, use_cached = TRUE, save = FALSE, + fit_results2 <- experiment$fit(n_reps = 6, use_cached = TRUE, save = FALSE, verbose = verbose) experiment |> add_evaluator(vary_params_eval, "Evaluator2") @@ -296,9 +529,28 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { file.path("results", "test-cache", "DGP1-Method1", "Varying x-y", "experiment_cached_params.rds") ) - expect_equal(cached_params$fit$fit$.n_reps, rep(10, 8)) - expect_equal(cached_params$evaluate$fit$.n_reps, rep(12, 8)) - expect_equal(cached_params$visualize$fit$.n_reps, rep(10, 8)) + expect_equal(cached_params$fit$fit$.n_reps, rep(4, 8)) + expect_equal(cached_params$evaluate$fit$.n_reps, rep(6, 8)) + expect_equal(cached_params$visualize$fit$.n_reps, rep(4, 8)) + + fit_res2 <- exp$fit(n_reps = 6, use_cached = TRUE, save = FALSE, + verbose = verbose) + exp |> + add_evaluator(vary_params_eval, "Evaluator2") + eval_res2 <- exp$evaluate(fit_res2, use_cached = TRUE, + save = TRUE, verbose = verbose) + expect_false(identical(eval_res2$Evaluator1, eval_res$Evaluator1)) + viz_res2 <- exp$visualize(fit_res2, eval_res2, + use_cached = TRUE, save = FALSE, + verbose = verbose) + expect_false(identical(viz_res, viz_res2)) + cached_params <- readRDS( + file.path("results", "test-cache-save-per-rep", "DGP1-Method1", "Varying x-y", + "experiment_cached_params.rds") + ) + expect_equal(cached_params$fit$fit$.n_reps, rep(4, 8)) + expect_equal(cached_params$evaluate$fit$.n_reps, rep(6, 8)) + expect_equal(cached_params$visualize$fit$.n_reps, rep(4, 8)) # check if caching works for functions my_mean <- function(x) mean(x) @@ -334,21 +586,21 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { } # basic cache usage - results0 <- experiment$run(n_reps = 10, use_cached = TRUE, save = FALSE, + results0 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, verbose = verbose) - results1 <- experiment$run(n_reps = 10, save = TRUE, verbose = verbose) + results1 <- experiment$run(n_reps = n_reps, save = TRUE, verbose = verbose) expect_false(identical(results0$fit_results, results1$fit_results)) - results2 <- experiment$run(n_reps = 10, use_cached = TRUE, verbose = verbose) + results2 <- experiment$run(n_reps = n_reps, use_cached = TRUE, verbose = verbose) expect_equal(results1, results2) # try caching with function in vary across experiment |> add_vary_across(.dgp = "DGP1", f = list(my_mean, my_median)) - results0 <- experiment$run(n_reps = 10, use_cached = TRUE, save = FALSE, + results0 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, verbose = verbose) - results1 <- experiment$run(n_reps = 10, save = TRUE, verbose = verbose) + results1 <- experiment$run(n_reps = n_reps, save = TRUE, verbose = verbose) expect_false(identical(results0$fit_results, results1$fit_results)) - results2 <- experiment$run(n_reps = 10, use_cached = TRUE, verbose = verbose) + results2 <- experiment$run(n_reps = n_reps, use_cached = TRUE, verbose = verbose) # to ignore function source bytecode results1$fit_results$f <- clean_fun(results1$fit_results$f) results2$fit_results$f <- clean_fun(results2$fit_results$f) @@ -360,17 +612,20 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { experiment |> update_evaluator(new_eval, "Evaluator1") |> update_visualizer(new_plot, "Visualizer1") - results0 <- experiment$run(n_reps = 10, use_cached = TRUE, save = FALSE, + results0 <- experiment$run(n_reps = n_reps, use_cached = TRUE, save = FALSE, verbose = verbose) results0$fit_results$f <- clean_fun(results0$fit_results$f) - results1 <- experiment$run(n_reps = 10, save = TRUE, verbose = verbose) + results1 <- experiment$run(n_reps = n_reps, save = TRUE, verbose = verbose) results1$fit_results$f <- clean_fun(results1$fit_results$f) expect_false(identical(results0$fit_results, results1$fit_results)) - results2 <- experiment$run(n_reps = 10, use_cached = TRUE, verbose = verbose) + results2 <- experiment$run(n_reps = n_reps, use_cached = TRUE, verbose = verbose) results2$fit_results$f <- purrr::map(results2$fit_results$f, deparse) expect_equal(results1, results2) # check caching with function and different n_reps + results2 <- experiment$run(n_reps = 10, use_cached = FALSE, save = TRUE, + verbose = verbose) + results2$fit_results$f <- purrr::map(results2$fit_results$f, deparse) results3 <- experiment$run(n_reps = 4, use_cached = TRUE, save = TRUE, verbose = verbose) results3$fit_results$f <- clean_fun(results3$fit_results$f) @@ -384,7 +639,7 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_equal(nrow(extra_fit_results3), 6 * 2) expect_equal(results3$fit_results, results2$fit_results |> dplyr::filter(as.numeric(.rep) <= 4)) - expect_equal(extra_fit_results3, + expect_equal(simplify_tibble(extra_fit_results3), results2$fit_results |> dplyr::filter(as.numeric(.rep) > 4)) results4 <- experiment$run(n_reps = 10, use_cached = TRUE, save = TRUE, @@ -395,7 +650,7 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { "fit_results.rds") ) fit_results4$f <- clean_fun(fit_results4$f) - expect_equal(results4$fit_results, fit_results4) + expect_equal(results4$fit_results, simplify_tibble(fit_results4)) expect_equal(nrow(results4$fit_results), 10 * 2) expect_false(file.exists(extra_reps_fpath)) expect_true(identical(results3$fit_results, diff --git a/tests/testthat/test-experiment.R b/tests/testthat/test-experiment.R index 6b0c29c..7d978ff 100644 --- a/tests/testthat/test-experiment.R +++ b/tests/testthat/test-experiment.R @@ -282,6 +282,16 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_equal(results$fit_results, experiment$fit(n_reps = 3, verbose = 0)) expect_equal(results, run_experiment(experiment, n_reps = 3, verbose = 0)) + + # check that record_time works + results <- experiment$run(n_reps = 2, record_time = TRUE, verbose = 0) + expect_true(".time_taken" %in% colnames(results$fit_results)) + expect_false(is.null(attr(results$eval_results[[1]], ".time_taken"))) + expect_false(is.null(attr(results$viz_results[[1]], ".time_taken"))) + results <- experiment$run(n_reps = 2, record_time = FALSE, verbose = 0) + expect_false(".time_taken" %in% colnames(results$fit_results)) + expect_true(is.null(attr(results$eval_results[[1]], ".time_taken"))) + expect_true(is.null(attr(results$viz_results[[1]], ".time_taken"))) }) test_that("Generate data from Experiment works properly", { @@ -394,6 +404,11 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { expect_equal(nrow(fit_results), 12) expect_snapshot_output(fit_results) + # check that record_time works + results <- experiment$fit(n_reps = 2, record_time = FALSE, verbose = 0) + expect_false(".time_taken" %in% colnames(results)) + results <- experiment$fit(n_reps = 2, record_time = TRUE, verbose = 0) + expect_true(".time_taken" %in% colnames(results)) }) test_that("Evaluating experiment works properly", { @@ -453,6 +468,12 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { .method_name = "Method", result1 = 2), `Vary Params` = tibble::tibble()) ) + + # check that record_time works + results <- experiment$evaluate(fit_results, record_time = FALSE, verbose = 0) + expect_true(is.null(attr(results[[1]], ".time_taken"))) + results <- experiment$evaluate(fit_results, record_time = TRUE, verbose = 0) + expect_false(is.null(attr(results[[1]], ".time_taken"))) }) test_that("Plotting experiment works properly", { @@ -510,6 +531,12 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { viz_results, list(`Fit Results` = fit_results, `Vary Params` = eval_results) ) + + # check that record_time works + results <- experiment$visualize(fit_results, eval_results, record_time = FALSE, verbose = 0) + expect_true(is.null(attr(results[[1]], ".time_taken"))) + results <- experiment$visualize(fit_results, eval_results, record_time = TRUE, verbose = 0) + expect_false(is.null(attr(results[[1]], ".time_taken"))) }) test_that("Add/update/remove vary across works properly", { @@ -678,6 +705,23 @@ withr::with_tempdir(pattern = "simChef-test-checkpointing-temp", code = { `Vary Params` = tibble::tibble(value = "x")) ) + x <- list(1, 3:5) + exp <- create_experiment( + name = "test-vary-across-dgp-save-per-rep", + save_in_bulk = FALSE + ) |> + add_dgp(dgp, name = "DGP") |> + add_method(method, name = "Method") |> + add_evaluator(fit_results_eval, name = "Fit Results") |> + add_evaluator(vary_params_eval, name = "Vary Params") |> + add_vary_across(.dgp = "DGP", x = x) + fit_results <- fit_experiment(exp, save = FALSE, verbose = 0) + expect_equal( + fit_results, + tibble::tibble(.rep = "1", .dgp_name = "DGP", .method_name = "Method", + x = x, x_idx = purrr::map_dbl(x, ~.x[1])) + ) + # test list-type method vary across case idx <- list(1:2, 3:5, 7:10) experiment <- experiment |>