Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to the cache API required by the new reticulate cache implementation #2170

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c9e142c
Change API of external engines' cache to be similar to the R cache
leogama Sep 11, 2022
ed0ead6
Also check for .Rdata cache file (with results) in the lazy cache case
leogama Sep 12, 2022
706ab38
Create cache directory only before first cache check
leogama Sep 12, 2022
82b7930
Fix: load package (and cache) of external engine only if necessary
leogama Sep 12, 2022
8336713
Pass full options to engine_cache$exists(), $load() and $save()
leogama Sep 12, 2022
19def13
Pass full options to engine_caches$get(); fix variable name
leogama Sep 13, 2022
1070a1b
Only return python engine cache if available, or the R cache will alw…
leogama Sep 14, 2022
ae43d23
Fix: purge invalid cache files after the first cache check
leogama Sep 16, 2022
2a1ff59
Test: cache invalidation due to missing cache file
leogama Sep 16, 2022
0b7eb4e
showWarnings = FALSE is unnecessary (xfun::dir_create() won't create …
yihui Sep 27, 2022
8d6d9a8
let cache$load() handle custom cache engines
yihui Sep 27, 2022
5ca33cd
cosmetic
yihui Sep 27, 2022
212bd26
let block_cache() handle custom cache engines, too
yihui Sep 27, 2022
95b8f80
cache_engines$get() always try to get the engine from a name, instead…
yihui Sep 27, 2022
3307e76
use paste() instead of stringr::str_dup()
yihui Sep 27, 2022
b5e64bf
the path could be a vector of length > 1
yihui Sep 27, 2022
e093389
cosmetic
yihui Sep 27, 2022
5330ffc
clean up after testing
yihui Sep 27, 2022
7b3fb59
add news and ctb
yihui Sep 27, 2022
eeaafcb
Merge commit '6bfffe9c1ae8c84f0f2ae07cd3c4b00876757587'
yihui Sep 27, 2022
5316f3d
Merged origin/master into leogama-cache-api
yihui Sep 27, 2022
7a6c852
apply cache_engines$get() on `options` to return a list (of methods)
yihui Sep 27, 2022
ad99f51
Merge branch 'master' into cache-api
leogama Dec 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 36 additions & 12 deletions R/block.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,20 @@ call_block = function(block) {
}
hash = paste(valid_path(params$cache.path, label), digest(content), sep = '_')
params$hash = hash
if (cache$exists(hash, params$cache.lazy) &&
xfun::dir_create(dirname(hash), showWarnings = FALSE)
if (cache_exists(params) &&
isFALSE(params$cache.rebuild) &&
params$engine != 'Rcpp') {
if (opts_knit$get('verbose')) message(' loading cache from ', hash)
cache$load(hash, lazy = params$cache.lazy)
cache_engine(params)
if (params$engine != 'R' &&
!is.null(engine_cache <- cache_engines$get(params))) {
engine_cache$load(params)
}
if (!params$include) return('')
if (params$cache == 3) return(cache$output(hash))
} else {
purge_cache(params) # purge any invalid cache files
}
if (params$engine == 'R')
cache$library(params$cache.path, save = FALSE) # load packages
Expand Down Expand Up @@ -156,11 +162,16 @@ block_exec = function(options) {
output = paste(c(res.before, output, res.after), collapse = '')
output = knit_hooks$get('chunk')(output, options)
if (options$cache) {
cache.exists = cache$exists(options$hash, options$cache.lazy)
if (options$cache.rebuild || !cache.exists) block_cache(options, output, switch(
options$engine,
'stan' = options$output.var, 'sql' = options$output.var, character(0)
))
cache.exists = cache_exists(options)
if (options$cache.rebuild || !cache.exists) {
block_cache(options, output, switch(
options$engine,
'stan' = options$output.var, 'sql' = options$output.var, character(0)
))
if (!is.null(engine_cache <- cache_engines$get(options))) {
engine_cache$save(options)
}
}
}
if (options$include) output else ''
}
Expand Down Expand Up @@ -242,7 +253,7 @@ eng_r = function(options) {
# guess plot file type if it is NULL
if (keep != 'none') options$fig.ext = dev2ext(options)

cache.exists = cache$exists(options$hash, options$cache.lazy)
cache.exists = cache_exists(options)
evaluate = knit_hooks$get('evaluate')
# return code with class 'source' if not eval chunks
res = if (is_blank(code)) list() else if (isFALSE(ev)) {
Expand Down Expand Up @@ -346,16 +357,29 @@ block_cache = function(options, output, objects) {
hash = options$hash
outname = cache_output_name(hash)
assign(outname, output, envir = knit_global())
purge_cache(options)
cache$library(options$cache.path, save = TRUE)
cache$save(objects, outname, hash, lazy = options$cache.lazy)
}

cache_exists = function(options) {
R_cache_exists = cache$exists(options$hash, options$cache.lazy)
if (options$engine != 'R' &&
!is.null(engine_cache <- cache_engines$get(options))) {
R_cache_exists && engine_cache$exists(options)
} else {
R_cache_exists
}
}

purge_cache = function(options) {
# purge my old cache and cache of chunks dependent on me
cache$purge(paste0(valid_path(
options$cache.path, c(options$label, dep_list$get(options$label))
), '_????????????????????????????????'))
glob_prefix = valid_path(options$cache.path, c(options$label, dep_list$get(options$label)))
glob_path = paste0(glob_prefix, '_', stringr::str_dup('?', 32)) # length of the MD5 hash
cache$purge(glob_path)
if (options$engine != 'R' &&
!is.null(engine_cache <- cache_engines$get(options))) {
engine_cache$purge(glob_path)
}
}

cache_globals = function(option, code) {
Expand Down
26 changes: 9 additions & 17 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,8 @@
## but it is using .rdb and .rdx as 'hard cache' (instead of cache in memory)
new_cache = function() {

cache_path = function(hash) {
d = dirname(hash)
if (!file.exists(d)) dir.create(d, showWarnings = FALSE, recursive = TRUE)
file.path(d, basename(hash))
}

cache_purge = function(hash) {
for (h in hash) unlink(paste(cache_path(h), c('rdb', 'rdx', 'RData'), sep = '.'))
cache_purge = function(glob_path) {
unlink(paste(glob_path, c('rdb', 'rdx', 'RData'), sep = '.'))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that glob_path could be a vector of length > 1, so we still need to purge them one by one via a loop.

}

cache_save = function(keys, outname, hash, lazy = TRUE) {
Expand All @@ -20,18 +14,17 @@ new_cache = function() {
out0 = outname
on.exit(rm(list = out0, envir = knit_global()), add = TRUE)
# keys are new variables created; outname is the text output of a chunk
path = cache_path(hash)
# add random seed to cache if exists
if (exists('.Random.seed', envir = globalenv(), inherits = FALSE)) {
copy_env(globalenv(), knit_global(), '.Random.seed')
outname = c('.Random.seed', outname)
}
if (!lazy) outname = c(keys, outname)
save(list = outname, file = paste(path, 'RData', sep = '.'), envir = knit_global())
save(list = outname, file = paste(hash, 'RData', sep = '.'), envir = knit_global())
if (!lazy) return() # everything has been saved; no need to make lazy db
# random seed is always load()ed
keys = setdiff(keys, '.Random.seed')
getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), path, variables = keys)
getFromNamespace('makeLazyLoadDB', 'tools')(knit_global(), hash, variables = keys)
}

save_objects = function(objs, label, path) {
Expand All @@ -54,12 +47,11 @@ new_cache = function() {
}

cache_load = function(hash, lazy = TRUE) {
path = cache_path(hash)
if (!is_abs_path(path)) path = file.path(getwd(), path)
if (lazy) lazyLoad(path, envir = knit_global())
if (!is_abs_path(hash)) path = file.path(getwd(), hash)
if (lazy) lazyLoad(hash, envir = knit_global())
# load output from last run if exists
if (file.exists(path2 <- paste(path, 'RData', sep = '.'))) {
load(path2, envir = knit_global())
if (file.exists(path <- paste(hash, 'RData', sep = '.'))) {
load(path, envir = knit_global())
if (exists('.Random.seed', envir = knit_global(), inherits = FALSE))
copy_env(knit_global(), globalenv(), '.Random.seed')
name = cache_meta_name(hash)
Expand Down Expand Up @@ -89,7 +81,7 @@ new_cache = function() {
cache_exists = function(hash, lazy = TRUE) {
is.character(hash) &&
all(file.exists(paste(
cache_path(hash), if (lazy) c('rdb', 'rdx') else 'RData', sep = '.'
hash, c('RData', 'rdb', 'rdx')[if (lazy) 1:3 else 1], sep = '.'
)))
}

Expand Down
32 changes: 20 additions & 12 deletions R/engine.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@ knit_engines = new_defaults()
#' @export
cache_engines = new_defaults()

# NOTE: these assignments don't change the closures namespace.
cache_engines$.get = cache_engines$get
cache_engines$get = function(options, ...) {
if (missing(options)) {
cache_engines$.get(...)
} else if (!is.null(cache_importer <- cache_engines$.get(options$engine, ...))) {
cache_importer(options)
} else {
NULL
}
}

#' An output wrapper for language engine output
#'
#' If you have designed a language engine, you may call this function in the end
Expand Down Expand Up @@ -277,12 +289,14 @@ eng_python = function(options) {
}

cache_eng_python = function(options) {
if (isFALSE(options$python.reticulate)) return()
# TODO: change this hack to reticulate::cache_eng_python(options) after
# https://github.com/rstudio/reticulate/pull/167 is merged and released
if (!'cache_eng_python' %in% ls(asNamespace('reticulate'))) return()
fun = getFromNamespace('cache_eng_python', 'reticulate')
fun(options)
# TODO: change this hack to reticulate::cache_eng_python after
# https://github.com/rstudio/reticulate/pull/1210 is merged and released
if (!isFALSE(options$python.reticulate) &&
'cache_eng_python' %in% ls(asNamespace('reticulate'))) {
engine_cache = getFromNamespace('cache_eng_python', 'reticulate')
if (engine_cache$available(options)) return(engine_cache)
}
NULL
}

## Java
Expand Down Expand Up @@ -936,12 +950,6 @@ get_engine = function(name) {
}
}

cache_engine = function(options) {
cache_fun = cache_engines$get(options$engine)
if (!is.function(cache_fun)) return()
cache_fun(options)
}

# possible values for engines (for auto-completion in RStudio)
opts_chunk_attr$engine = as.list(sort(c('R', names(knit_engines$get()))))
opts_chunk_attr[c('engine.path', 'engine.opts')] = list('character', 'character')
2 changes: 1 addition & 1 deletion R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ save_plot = function(plot, name, dev, width, height, ext, dpi, options) {

path = paste(name, ext, sep = '.')
# when cache=2 and plot file exists, just return the filename
if (options$cache == 2 && cache$exists(options$hash, options$cache.lazy)) {
if (options$cache == 2 && cache_exists(options)) {
if (in_base_dir(!file.exists(path))) {
purge_cache(options)
stop('cannot find ', path, '; the cache has been purged; please re-compile')
Expand Down
25 changes: 25 additions & 0 deletions tests/testit/test-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,28 @@ assert('dep_prev() sets dependencies on previous chunks', {
})
dep_list$restore()
knit_code$restore()

mock_cache = (function() {
noop_false = function(...) FALSE
noop_true = function(...) TRUE
list(available = noop_true, exists = noop_false,
load = noop_false, save = noop_false, purge = noop_false) # may return anything
})()
knit_engines$set(mock = function(...) "\n\nmock result\n\n")
cache_engines$set(mock = function(...) mock_cache)
knit_engine_cache = function() {
in_dir(tempdir(), {
txt = c(
'```{mock test, cache=TRUE, cache.path=""}',
'mock code',
'```'
)
knit(text = txt, quiet = TRUE)
R_cache_file = list.files(pattern = "RData$")
t1 = file.mtime(R_cache_file)
knit(text = txt, quiet = TRUE)
t2 = file.mtime(R_cache_file)
t1 != t2 # missing "mock" cache should invalidate R cache
})
}
assert("missing external engine's cache invalidates R cache", knit_engine_cache())