Skip to content

Commit

Permalink
Improving caching logic (#514)
Browse files Browse the repository at this point in the history
* Prune cache before checking if a specific key exists
* Don't check existence then read separately
  • Loading branch information
hadley authored Aug 16, 2024
1 parent f3992d5 commit aaeeb3e
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 53 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# httr2 (development version)

* `req_cache()` now prunes cache _before_ checking if a given key exists, eliminating the occassional error about reading from an invalid RDS file. It also no longer tests for existence then later reads the cache, avoiding potential race conditions.
* `jwt_encode_hmac()` now calls correct underlying function `jose::jwt_encode_hmac()` and has correct default size parameter value' (@denskh, #508).
* `req_perform_parallel()` now respects error handling in `req_error()`
* New function `req_perform_promise()` allows creating a `promises::promise` for a request that runs in the background (#501, @gergness).
Expand Down
74 changes: 39 additions & 35 deletions R/req-cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,35 +87,32 @@ cache_debug <- function(req) {

# Cache management --------------------------------------------------------

cache_exists <- function(req) {
if (!req_policy_exists(req, "cache_path")) {
return(FALSE)
cache_active <- function(req) {
req_policy_exists(req, "cache_path")
}

cache_get <- function(req) {
# This check should be redudant but we keep it in for safety
if (!cache_active(req)) {
return(req)
}

path <- req_cache_path(req)
if (!file.exists(path)) {
return(FALSE)
return(NULL)
}

tryCatch(
{
readRDS(path)
TRUE
rds <- readRDS(path)
# Update file time if read successfully
Sys.setFileTime(path, Sys.time())
rds
},
error = function(e) {
FALSE
}
error = function(e) NULL
)
}

# Callers responsibility to check that cache exists
cache_get <- function(req) {
path <- req_cache_path(req)

touch(path)
readRDS(path)
}

cache_set <- function(req, resp) {
if (is_path(resp$body)) {
body_path <- req_cache_path(req, ".body")
Expand Down Expand Up @@ -178,20 +175,24 @@ cache_prune_files <- function(info, to_remove, why, debug = TRUE) {

# Can return request or response
cache_pre_fetch <- function(req) {
if (!cache_exists(req)) {
if (!cache_active(req)) {
return(req)
}

debug <- cache_debug(req)
cache_prune_if_needed(req, debug = debug)

info <- resp_cache_info(cache_get(req))
cached_resp <- cache_get(req)
if (is.null(cached_resp)) {
return(req)
}
if (debug) cli::cli_text("Found url in cache {.val {hash(req$url)}}")

info <- resp_cache_info(cached_resp)
if (!is.na(info$expires) && info$expires >= Sys.time()) {
signal("", "httr2_cache_cached")
if (debug) cli::cli_text("Cached value is fresh; retrieving response from cache")
cache_get(req)
if (debug) cli::cli_text("Cached value is fresh; using response from cache")
cached_resp
} else {
if (debug) cli::cli_text("Cached value is stale; checking for updates")
req_headers(req,
Expand All @@ -201,29 +202,30 @@ cache_pre_fetch <- function(req) {
}
}

# Always returns response
cache_post_fetch <- function(req, resp, path = NULL) {
if (!req_policy_exists(req, "cache_path")) {
if (!cache_active(req)) {
return(resp)
}

debug <- cache_debug(req)
cached_resp <- cache_get(req)

if (is_error(resp)) {
if (cache_use_on_error(req) && cache_exists(req)) {
if (cache_use_on_error(req) && !is.null(cached_resp)) {
if (debug) cli::cli_text("Request errored; retrieving response from cache")
cache_get(req)
cached_resp
} else {
resp
}
} else if (resp_status(resp) == 304 && cache_exists(req)) {
} else if (resp_status(resp) == 304 && !is.null(cached_resp)) {
signal("", "httr2_cache_not_modified")
if (debug) cli::cli_text("Cached value still ok; retrieving body from cache")

# Replace body with cached result
resp$body <- cache_body(req, path)

resp$body <- cache_body(cached_resp, path)
# Combine headers
resp$headers <- cache_headers(req, resp)

resp$headers <- cache_headers(cached_resp, resp)
resp
} else if (resp_is_cacheable(resp)) {
signal("", "httr2_cache_save")
Expand All @@ -235,8 +237,10 @@ cache_post_fetch <- function(req, resp, path = NULL) {
}
}

cache_body <- function(req, path = NULL) {
body <- cache_get(req)$body
cache_body <- function(cached_resp, path = NULL) {
check_response(cached_resp)

body <- cached_resp$body

if (is.null(path)) {
return(body)
Expand All @@ -250,10 +254,10 @@ cache_body <- function(req, path = NULL) {
new_path(path)
}

cache_headers <- function(req, resp) {
# https://www.rfc-editor.org/rfc/rfc7232#section-4.1
cached_headers <- cache_get(req)$headers
as_headers(modify_list(cached_headers, !!!resp$headers))
# https://www.rfc-editor.org/rfc/rfc7232#section-4.1
cache_headers <- function(cached_resp, resp) {
check_response(cached_resp)
as_headers(modify_list(cached_resp$headers, !!!resp$headers))
}

# Caching headers ---------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/_snaps/req-cache.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
Code
# Immutable
invisible(cache_pre_fetch(req))
Message
Pruning cache
Code
invisible(cache_post_fetch(req, resp))
Message
Saving response to cache "f3805db63ff822b4743f247cfdde10a3"
Code
invisible(cache_pre_fetch(req))
Message
Pruning cache
Found url in cache "f3805db63ff822b4743f247cfdde10a3"
Cached value is fresh; retrieving response from cache
Cached value is fresh; using response from cache

---

Expand Down
34 changes: 18 additions & 16 deletions tests/testthat/test-req-cache.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
test_that("nothing happens if cache not enabled", {
req <- request("http://example.com")

expect_false(cache_exists(req))
expect_equal(cache_pre_fetch(req), req)

resp <- response()
Expand Down Expand Up @@ -60,11 +58,11 @@ test_that("304 retains headers but gets cached body", {

test_that("automatically adds to cache", {
req <- request("http://example.com") %>% req_cache(tempfile())
expect_false(cache_exists(req))
expect_true(is.null(cache_get(req)))

resp <- response(200, headers = 'Etag: "abc"', body = charToRaw("OK"))
cached <- cache_post_fetch(req, resp)
expect_true(cache_exists(req))
expect_false(is.null(cache_get(req)))
expect_equal(cache_get(req), resp)
})

Expand Down Expand Up @@ -116,23 +114,25 @@ test_that("can get and set from cache", {
)
)

expect_false(cache_exists(req))
expect_true(is.null(cache_get(req)))
cache_set(req, resp)
expect_true(cache_exists(req))
expect_equal(cache_get(req), resp)
expect_false(is.null(cache_get(req)))

resp_from_cache <- cache_get(req)
expect_equal(resp_from_cache, resp)

# Uses new headers if available, otherwise cached headers
out_headers <- cache_headers(req, cached_resp)
out_headers <- cache_headers(resp_from_cache, cached_resp)
expect_equal(out_headers$`content-type`, "application/json")
expect_equal(out_headers$Etag, "DEF")
expect_equal(out_headers$other, "new")

# If path is null can leave resp as is
expect_equal(cache_body(req, NULL), resp$body)
expect_equal(resp_body_json(cache_get(req)), list(a = 1L))
expect_equal(cache_body(resp_from_cache, NULL), resp$body)
expect_equal(resp_body_json(resp_from_cache), list(a = 1L))
# If path is set, need to save to path
path <- tempfile()
body <- cache_body(req, path)
body <- cache_body(resp_from_cache, path)
expect_equal(body, new_path(path))
expect_equal(readLines(path, warn = FALSE), rawToChar(resp$body))
})
Expand All @@ -147,15 +147,17 @@ test_that("handles responses with files", {
# File should be copied in cache directory, and response body updated
body_path <- req_cache_path(req, ".body")
expect_equal(readLines(body_path), "Hi there")
expect_equal(cache_get(req)$body, new_path(body_path))

resp_from_cache <- cache_get(req)
expect_equal(resp_from_cache$body, new_path(body_path))

# If path is null, just leave body as is, since req_body() already
# papers over the differences
expect_equal(cache_body(req, NULL), new_path(body_path))
expect_equal(cache_body(resp_from_cache, NULL), new_path(body_path))

# If path is not null, copy to desired location, and update body
path2 <- tempfile()
body <- cache_body(req, path2)
body <- cache_body(resp_from_cache, path2)
expect_equal(readLines(body), "Hi there")
expect_equal(body, new_path(path2))
})
Expand All @@ -165,10 +167,10 @@ test_that("corrupt files are ignored", {
req <- request("http://example.com") %>% req_cache(cache_dir)

writeLines(letters, req_cache_path(req))
expect_false(cache_exists(req))
expect_true(is.null(cache_get(req)))

saveRDS(1:10, req_cache_path(req))
expect_true(cache_exists(req))
expect_false(is.null(cache_get(req)))
})

# pruning -----------------------------------------------------------------
Expand Down

0 comments on commit aaeeb3e

Please sign in to comment.