diff --git a/DESCRIPTION b/DESCRIPTION index ce17a355e8..a557c1274b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,6 +68,7 @@ Authors@R: c( person(c("Kevin", "K."), "Smith", role = "ctb"), person("Kirill", "Mueller", role = "ctb"), person("Kohske", "Takahashi", role = "ctb"), + person("Leonardo", "Gama", role = "ctb"), person("Lorenz", "Walthert", role = "ctb"), person("Lucas", "Gallindo", role = "ctb"), person("Marius", "Hofert", role = "ctb"), diff --git a/NEWS.md b/NEWS.md index 889b2e7ad5..24c4bc6a7d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ - Added an argument `exact` to `pandoc_to()` and `pandoc_from()` to decide whether to use/return the exact Pandoc output/input format name. If not (default), Pandoc extensions will be removed from the format name, e.g., `latex-smart` will be treated as `latex`. +- For `python` code chunks, objects can be cached using the Python package **dill**. This currently requires the patch in **reticulate** https://github.com/rstudio/reticulate/pull/1210, and should be considered experimental before the patch is accepted and a new version of **reticulate** is released (thanks, @leogama, #2170). + ## BUG FIXES - Plot created outside of `knit()` could sneak into `knit_child()` results (thanks, @niklaswillrich, #2166). diff --git a/R/block.R b/R/block.R index 994c82a874..898a1d5435 100644 --- a/R/block.R +++ b/R/block.R @@ -93,14 +93,14 @@ call_block = function(block) { } hash = paste(valid_path(params$cache.path, label), digest(content), sep = '_') params$hash = hash - if (cache$exists(hash, params$cache.lazy) && - isFALSE(params$cache.rebuild) && - params$engine != 'Rcpp') { + xfun::dir_create(dirname(hash)) + if (cache_exists(params) && isFALSE(params$cache.rebuild) && params$engine != 'Rcpp') { if (opts_knit$get('verbose')) message(' loading cache from ', hash) - cache$load(hash, lazy = params$cache.lazy) - cache_engine(params) + cache$load(hash, options = params) if (!params$include) return('') if (params$cache == 3) return(cache$output(hash)) + } else { + purge_cache(params) # purge any invalid cache files } if (params$engine == 'R') cache$library(params$cache.path, save = FALSE) # load packages @@ -156,7 +156,7 @@ block_exec = function(options) { output = paste(c(res.before, output, res.after), collapse = '') output = knit_hooks$get('chunk')(output, options) if (options$cache) { - cache.exists = cache$exists(options$hash, options$cache.lazy) + cache.exists = cache_exists(options) if (options$cache.rebuild || !cache.exists) block_cache(options, output, switch( options$engine, 'stan' = options$output.var, 'sql' = options$output.var, character(0) @@ -246,7 +246,7 @@ eng_r = function(options) { # guess plot file type if it is NULL if (keep != 'none') options$fig.ext = dev2ext(options) - cache.exists = cache$exists(options$hash, options$cache.lazy) + cache.exists = cache_exists(options) evaluate = knit_hooks$get('evaluate') # return code with class 'source' if not eval chunks res = if (is_blank(code)) list() else if (isFALSE(ev)) { @@ -350,16 +350,33 @@ block_cache = function(options, output, objects) { hash = options$hash outname = cache_output_name(hash) assign(outname, output, envir = knit_global()) - purge_cache(options) cache$library(options$cache.path, save = TRUE) cache$save(objects, outname, hash, lazy = options$cache.lazy) + cache_action(options, 'save', options) +} + +# test if cache exists: first R cache must exist, then if a custom cache engine +# exists, use the engine to check its cache exists +cache_exists = function(options) { + cache$exists(options$hash, options$cache.lazy) && + cache_action(options, 'exists', options) } purge_cache = function(options) { # purge my old cache and cache of chunks dependent on me - cache$purge(paste0(valid_path( - options$cache.path, c(options$label, dep_list$get(options$label)) - ), '_????????????????????????????????')) + prefix = valid_path(options$cache.path, c(options$label, dep_list$get(options$label))) + glob_path = paste0(prefix, '_', paste(rep('?', 32), collapse = '')) # length of the MD5 hash + cache$purge(glob_path) + cache_action(options, 'purge', glob_path) +} + +cache_action = function(options, method, ...) { + res = if (method == 'exists') TRUE + if (length(eng <- cache_engines$get(options$engine))) { + obj = eng(options) + if (is.function(action <- obj[[method]])) res = action(...) + } + res } cache_globals = function(option, code) { diff --git a/R/cache.R b/R/cache.R index 0664c6277b..5ff5706e17 100644 --- a/R/cache.R +++ b/R/cache.R @@ -4,14 +4,8 @@ ## but it is using .rdb and .rdx as 'hard cache' (instead of cache in memory) new_cache = function() { - cache_path = function(hash) { - d = dirname(hash) - if (!file.exists(d)) dir.create(d, showWarnings = FALSE, recursive = TRUE) - file.path(d, basename(hash)) - } - - cache_purge = function(hash) { - for (h in hash) unlink(paste(cache_path(h), c('rdb', 'rdx', 'RData'), sep = '.')) + cache_purge = function(path) { + for (p in path) unlink(paste(p, c('rdb', 'rdx', 'RData'), sep = '.')) } cache_save = function(keys, outname, hash, lazy = TRUE) { @@ -20,18 +14,17 @@ new_cache = function() { out0 = outname on.exit(rm(list = out0, envir = knit_global()), add = TRUE) # keys are new variables created; outname is the text output of a chunk - path = cache_path(hash) # add random seed to cache if exists if (exists('.Random.seed', envir = globalenv(), inherits = FALSE)) { copy_env(globalenv(), knit_global(), '.Random.seed') outname = c('.Random.seed', outname) } if (!lazy) outname = c(keys, outname) - save(list = outname, file = paste(path, 'RData', sep = '.'), envir = knit_global()) + save(list = outname, file = paste(hash, 'RData', sep = '.'), envir = knit_global()) if (!lazy) return() # everything has been saved; no need to make lazy db # random seed is always load()ed keys = setdiff(keys, '.Random.seed') - getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), path, variables = keys) + getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), hash, variables = keys) } save_objects = function(objs, label, path) { @@ -53,13 +46,12 @@ new_cache = function() { save_objects(globals, label, valid_path(path, '__globals')) } - cache_load = function(hash, lazy = TRUE) { - path = cache_path(hash) - if (!is_abs_path(path)) path = file.path(getwd(), path) - if (lazy) lazyLoad(path, envir = knit_global()) + cache_load = function(hash, lazy = options$cache.lazy, options = list()) { + if (!is_abs_path(hash)) path = file.path(getwd(), hash) + if (lazy) lazyLoad(hash, envir = knit_global()) # load output from last run if exists - if (file.exists(path2 <- paste(path, 'RData', sep = '.'))) { - load(path2, envir = knit_global()) + if (file.exists(path <- paste(hash, 'RData', sep = '.'))) { + load(path, envir = knit_global()) if (exists('.Random.seed', envir = knit_global(), inherits = FALSE)) copy_env(knit_global(), globalenv(), '.Random.seed') name = cache_meta_name(hash) @@ -70,6 +62,7 @@ new_cache = function() { rm(list = name, envir = knit_global()) } } + cache_action(options, 'load', options) } cache_library = function(path, save = TRUE) { @@ -89,7 +82,7 @@ new_cache = function() { cache_exists = function(hash, lazy = TRUE) { is.character(hash) && all(file.exists(paste( - cache_path(hash), if (lazy) c('rdb', 'rdx') else 'RData', sep = '.' + hash, c('RData', 'rdb', 'rdx')[if (lazy) 1:3 else 1], sep = '.' ))) } diff --git a/R/engine.R b/R/engine.R index 5bfc0b0e9f..2196ec3327 100644 --- a/R/engine.R +++ b/R/engine.R @@ -278,11 +278,11 @@ eng_python = function(options) { cache_eng_python = function(options) { if (isFALSE(options$python.reticulate)) return() - # TODO: change this hack to reticulate::cache_eng_python(options) after - # https://github.com/rstudio/reticulate/pull/167 is merged and released + # TODO: change this hack to reticulate::cache_eng_python after + # https://github.com/rstudio/reticulate/pull/1210 is merged and released if (!'cache_eng_python' %in% ls(asNamespace('reticulate'))) return() - fun = getFromNamespace('cache_eng_python', 'reticulate') - fun(options) + eng = getFromNamespace('cache_eng_python', 'reticulate') + if (eng$available(options)) eng } ## Java @@ -936,12 +936,6 @@ get_engine = function(name) { } } -cache_engine = function(options) { - cache_fun = cache_engines$get(options$engine) - if (!is.function(cache_fun)) return() - cache_fun(options) -} - # possible values for engines (for auto-completion in RStudio) opts_chunk_attr$engine = as.list(sort(c('R', names(knit_engines$get())))) opts_chunk_attr[c('engine.path', 'engine.opts')] = list('character', 'character') diff --git a/R/plot.R b/R/plot.R index 5a6a7d7e40..8e364120e1 100644 --- a/R/plot.R +++ b/R/plot.R @@ -146,7 +146,7 @@ save_plot = function(plot, name, dev, width, height, ext, dpi, options) { path = paste(name, ext, sep = '.') # when cache=2 and plot file exists, just return the filename - if (options$cache == 2 && cache$exists(options$hash, options$cache.lazy)) { + if (options$cache == 2 && cache_exists(options)) { if (in_base_dir(!file.exists(path))) { purge_cache(options) stop('cannot find ', path, '; the cache has been purged; please re-compile') diff --git a/tests/testit/test-cache.R b/tests/testit/test-cache.R index bf0da9f42d..5941c64e44 100644 --- a/tests/testit/test-cache.R +++ b/tests/testit/test-cache.R @@ -44,3 +44,34 @@ assert('dep_prev() sets dependencies on previous chunks', { }) dep_list$restore() knit_code$restore() + +mock_cache = (function() { + noop_false = function(...) FALSE + noop_true = function(...) TRUE + list( + available = noop_true, exists = noop_false, load = noop_false, + save = noop_false, purge = noop_false + ) # may return anything +})() +knit_engines$set(mock = function(...) "\n\nmock result\n\n") +cache_engines$set(mock = function(...) mock_cache) +knit_engine_cache = function() { + in_dir(tempdir(), { + txt = c( + '```{mock test, cache=TRUE, cache.path=""}', + 'mock code', + '```' + ) + knit(text = txt, quiet = TRUE) + R_cache_file = list.files(pattern = "RData$") + t1 = file.mtime(R_cache_file) + knit(text = txt, quiet = TRUE) + t2 = file.mtime(R_cache_file) + t1 != t2 # missing "mock" cache should invalidate R cache + }) +} +assert("missing external engine's cache invalidates R cache", { + (knit_engine_cache()) +}) +knit_engines$delete('mock') +cache_engines$delete('mock')