From aaeeb3e12c2ec8291fbb571337adbb9953474775 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 16 Aug 2024 08:32:07 -0700 Subject: [PATCH] Improving caching logic (#514) * Prune cache before checking if a specific key exists * Don't check existence then read separately --- NEWS.md | 1 + R/req-cache.R | 74 ++++++++++++++++-------------- tests/testthat/_snaps/req-cache.md | 6 ++- tests/testthat/test-req-cache.R | 34 +++++++------- 4 files changed, 62 insertions(+), 53 deletions(-) diff --git a/NEWS.md b/NEWS.md index 40985569..04002dcb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # httr2 (development version) +* `req_cache()` now prunes cache _before_ checking if a given key exists, eliminating the occassional error about reading from an invalid RDS file. It also no longer tests for existence then later reads the cache, avoiding potential race conditions. * `jwt_encode_hmac()` now calls correct underlying function `jose::jwt_encode_hmac()` and has correct default size parameter value' (@denskh, #508). * `req_perform_parallel()` now respects error handling in `req_error()` * New function `req_perform_promise()` allows creating a `promises::promise` for a request that runs in the background (#501, @gergness). diff --git a/R/req-cache.R b/R/req-cache.R index 39e73f17..904b14ee 100644 --- a/R/req-cache.R +++ b/R/req-cache.R @@ -87,35 +87,32 @@ cache_debug <- function(req) { # Cache management -------------------------------------------------------- -cache_exists <- function(req) { - if (!req_policy_exists(req, "cache_path")) { - return(FALSE) +cache_active <- function(req) { + req_policy_exists(req, "cache_path") +} + +cache_get <- function(req) { + # This check should be redudant but we keep it in for safety + if (!cache_active(req)) { + return(req) } path <- req_cache_path(req) if (!file.exists(path)) { - return(FALSE) + return(NULL) } tryCatch( { - readRDS(path) - TRUE + rds <- readRDS(path) + # Update file time if read successfully + Sys.setFileTime(path, Sys.time()) + rds }, - error = function(e) { - FALSE - } + error = function(e) NULL ) } -# Callers responsibility to check that cache exists -cache_get <- function(req) { - path <- req_cache_path(req) - - touch(path) - readRDS(path) -} - cache_set <- function(req, resp) { if (is_path(resp$body)) { body_path <- req_cache_path(req, ".body") @@ -178,20 +175,24 @@ cache_prune_files <- function(info, to_remove, why, debug = TRUE) { # Can return request or response cache_pre_fetch <- function(req) { - if (!cache_exists(req)) { + if (!cache_active(req)) { return(req) } debug <- cache_debug(req) cache_prune_if_needed(req, debug = debug) - info <- resp_cache_info(cache_get(req)) + cached_resp <- cache_get(req) + if (is.null(cached_resp)) { + return(req) + } if (debug) cli::cli_text("Found url in cache {.val {hash(req$url)}}") + info <- resp_cache_info(cached_resp) if (!is.na(info$expires) && info$expires >= Sys.time()) { signal("", "httr2_cache_cached") - if (debug) cli::cli_text("Cached value is fresh; retrieving response from cache") - cache_get(req) + if (debug) cli::cli_text("Cached value is fresh; using response from cache") + cached_resp } else { if (debug) cli::cli_text("Cached value is stale; checking for updates") req_headers(req, @@ -201,29 +202,30 @@ cache_pre_fetch <- function(req) { } } +# Always returns response cache_post_fetch <- function(req, resp, path = NULL) { - if (!req_policy_exists(req, "cache_path")) { + if (!cache_active(req)) { return(resp) } + debug <- cache_debug(req) + cached_resp <- cache_get(req) if (is_error(resp)) { - if (cache_use_on_error(req) && cache_exists(req)) { + if (cache_use_on_error(req) && !is.null(cached_resp)) { if (debug) cli::cli_text("Request errored; retrieving response from cache") - cache_get(req) + cached_resp } else { resp } - } else if (resp_status(resp) == 304 && cache_exists(req)) { + } else if (resp_status(resp) == 304 && !is.null(cached_resp)) { signal("", "httr2_cache_not_modified") if (debug) cli::cli_text("Cached value still ok; retrieving body from cache") # Replace body with cached result - resp$body <- cache_body(req, path) - + resp$body <- cache_body(cached_resp, path) # Combine headers - resp$headers <- cache_headers(req, resp) - + resp$headers <- cache_headers(cached_resp, resp) resp } else if (resp_is_cacheable(resp)) { signal("", "httr2_cache_save") @@ -235,8 +237,10 @@ cache_post_fetch <- function(req, resp, path = NULL) { } } -cache_body <- function(req, path = NULL) { - body <- cache_get(req)$body +cache_body <- function(cached_resp, path = NULL) { + check_response(cached_resp) + + body <- cached_resp$body if (is.null(path)) { return(body) @@ -250,10 +254,10 @@ cache_body <- function(req, path = NULL) { new_path(path) } -cache_headers <- function(req, resp) { - # https://www.rfc-editor.org/rfc/rfc7232#section-4.1 - cached_headers <- cache_get(req)$headers - as_headers(modify_list(cached_headers, !!!resp$headers)) +# https://www.rfc-editor.org/rfc/rfc7232#section-4.1 +cache_headers <- function(cached_resp, resp) { + check_response(cached_resp) + as_headers(modify_list(cached_resp$headers, !!!resp$headers)) } # Caching headers --------------------------------------------------------- diff --git a/tests/testthat/_snaps/req-cache.md b/tests/testthat/_snaps/req-cache.md index 85360027..23fc83d3 100644 --- a/tests/testthat/_snaps/req-cache.md +++ b/tests/testthat/_snaps/req-cache.md @@ -3,15 +3,17 @@ Code # Immutable invisible(cache_pre_fetch(req)) + Message + Pruning cache + Code invisible(cache_post_fetch(req, resp)) Message Saving response to cache "f3805db63ff822b4743f247cfdde10a3" Code invisible(cache_pre_fetch(req)) Message - Pruning cache Found url in cache "f3805db63ff822b4743f247cfdde10a3" - Cached value is fresh; retrieving response from cache + Cached value is fresh; using response from cache --- diff --git a/tests/testthat/test-req-cache.R b/tests/testthat/test-req-cache.R index c9ee9a12..63bfaaa3 100644 --- a/tests/testthat/test-req-cache.R +++ b/tests/testthat/test-req-cache.R @@ -1,7 +1,5 @@ test_that("nothing happens if cache not enabled", { req <- request("http://example.com") - - expect_false(cache_exists(req)) expect_equal(cache_pre_fetch(req), req) resp <- response() @@ -60,11 +58,11 @@ test_that("304 retains headers but gets cached body", { test_that("automatically adds to cache", { req <- request("http://example.com") %>% req_cache(tempfile()) - expect_false(cache_exists(req)) + expect_true(is.null(cache_get(req))) resp <- response(200, headers = 'Etag: "abc"', body = charToRaw("OK")) cached <- cache_post_fetch(req, resp) - expect_true(cache_exists(req)) + expect_false(is.null(cache_get(req))) expect_equal(cache_get(req), resp) }) @@ -116,23 +114,25 @@ test_that("can get and set from cache", { ) ) - expect_false(cache_exists(req)) + expect_true(is.null(cache_get(req))) cache_set(req, resp) - expect_true(cache_exists(req)) - expect_equal(cache_get(req), resp) + expect_false(is.null(cache_get(req))) + + resp_from_cache <- cache_get(req) + expect_equal(resp_from_cache, resp) # Uses new headers if available, otherwise cached headers - out_headers <- cache_headers(req, cached_resp) + out_headers <- cache_headers(resp_from_cache, cached_resp) expect_equal(out_headers$`content-type`, "application/json") expect_equal(out_headers$Etag, "DEF") expect_equal(out_headers$other, "new") # If path is null can leave resp as is - expect_equal(cache_body(req, NULL), resp$body) - expect_equal(resp_body_json(cache_get(req)), list(a = 1L)) + expect_equal(cache_body(resp_from_cache, NULL), resp$body) + expect_equal(resp_body_json(resp_from_cache), list(a = 1L)) # If path is set, need to save to path path <- tempfile() - body <- cache_body(req, path) + body <- cache_body(resp_from_cache, path) expect_equal(body, new_path(path)) expect_equal(readLines(path, warn = FALSE), rawToChar(resp$body)) }) @@ -147,15 +147,17 @@ test_that("handles responses with files", { # File should be copied in cache directory, and response body updated body_path <- req_cache_path(req, ".body") expect_equal(readLines(body_path), "Hi there") - expect_equal(cache_get(req)$body, new_path(body_path)) + + resp_from_cache <- cache_get(req) + expect_equal(resp_from_cache$body, new_path(body_path)) # If path is null, just leave body as is, since req_body() already # papers over the differences - expect_equal(cache_body(req, NULL), new_path(body_path)) + expect_equal(cache_body(resp_from_cache, NULL), new_path(body_path)) # If path is not null, copy to desired location, and update body path2 <- tempfile() - body <- cache_body(req, path2) + body <- cache_body(resp_from_cache, path2) expect_equal(readLines(body), "Hi there") expect_equal(body, new_path(path2)) }) @@ -165,10 +167,10 @@ test_that("corrupt files are ignored", { req <- request("http://example.com") %>% req_cache(cache_dir) writeLines(letters, req_cache_path(req)) - expect_false(cache_exists(req)) + expect_true(is.null(cache_get(req))) saveRDS(1:10, req_cache_path(req)) - expect_true(cache_exists(req)) + expect_false(is.null(cache_get(req))) }) # pruning -----------------------------------------------------------------