Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: X-Ipfs-Roots for smarter HTTP caches #8720

Merged
merged 3 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
w.Header().Set("X-IPFS-Path", urlPath)
w.Header().Set("Etag", responseEtag)

if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil {
w.Header().Set("X-Ipfs-Roots", rootCids)
} else { // this should never happen, as we resolved the urlPath already
webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError)
return
}

// set these headers _after_ the error, for we may just not have it
// and don't want the client to cache a 500 response...
// and only if it's /ipfs!
Expand Down Expand Up @@ -375,6 +382,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
internalWebError(w, files.ErrNotReader)
return
}
// static index.html → no need to generate dynamic dir-index-html
// replace mutable DirIndex Etag with immutable dir CID
w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`)

// write to request
i.serveFile(w, r, "index.html", modtime, f)
Expand Down Expand Up @@ -758,6 +768,50 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) {
}
}

// Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation.
func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) {
/*
These are logical roots where each CID represent one path segment
and resolves to either a directory or the root block of a file.
The main purpose of this header is allow HTTP caches to do smarter decisions
around cache invalidation (eg. keep specific subdirectory/file if it did not change)

A good example is Wikipedia, which is HAMT-sharded, but we only care about
logical roots that represent each segment of the human-readable content
path:

Given contentPath = /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey
rootCidList is a generated by doing `ipfs resolve -r` on each sub path:
/ipns/en.wikipedia-on-ipfs.org → bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze
/ipns/en.wikipedia-on-ipfs.org/wiki/ → bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4
/ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey → bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma

The result is an ordered array of values:
X-Ipfs-Roots: bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze,bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4,bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma

Note that while the top one will change every time any article is changed,
the last root (responsible for specific article) may not change at all.
*/
var sp strings.Builder
var pathRoots []string
pathSegments := strings.Split(contentPath[6:], "/")
sp.WriteString(contentPath[:5]) // /ipfs or /ipns
for _, root := range pathSegments {
if root == "" {
continue
}
sp.WriteString("/")
sp.WriteString(root)
resolvedSubPath, err := i.api.ResolvePath(r.Context(), ipath.New(sp.String()))
if err != nil {
return "", err
}
pathRoots = append(pathRoots, resolvedSubPath.Cid().String())
}
rootCidList := strings.Join(pathRoots, ",") // convention from rfc2616#sec4.2
return rootCidList, nil
}

func webError(w http.ResponseWriter, message string, err error, defaultCode int) {
if _, ok := err.(resolver.ErrNoLink); ok {
webErrorWithCode(w, message, err, http.StatusNotFound)
Expand Down
150 changes: 150 additions & 0 deletions test/sharness/t0116-gateway-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#!/usr/bin/env bash

test_description="Test HTTP Gateway Cache Control Support"

. lib/test-lib.sh

test_init_ipfs
test_launch_ipfs_daemon_without_network

# Cache control support is based on logical roots (each path segment == one logical root).
# To maximize the test surface, we want to test:
# - /ipfs/ content path
# - /ipns/ content path
# - at least 3 levels
# - separate tests for a directory listing and a file
# - have implicit index.html for a good measure
# /ipns/root1/root2/root3/ (/ipns/root1/root2/root3/index.html)

# Note: we cover important edge case here:
# ROOT3_CID - dir listing (dir-index-html response)
# ROOT4_CID - index.html returned as a root response (dir/), instead of generated dir-index-html
# FILE_CID - index.html returned directly, as a file

test_expect_success "Add the test directory" '
mkdir -p root2/root3/root4 &&
echo "hello" > root2/root3/root4/index.html &&
ROOT1_CID=$(ipfs add -Qrw --cid-version 1 root2)
ROOT2_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2 | cut -d "/" -f3)
ROOT3_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3 | cut -d "/" -f3)
ROOT4_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4 | cut -d "/" -f3)
FILE_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4/index.html | cut -d "/" -f3)
'

test_expect_success "Prepare IPNS unixfs content path for testing" '
TEST_IPNS_ID=$(ipfs key gen --ipns-base=base36 --type=ed25519 cache_test_key | head -n1 | tr -d "\n")
ipfs name publish --key cache_test_key --allow-offline -Q "/ipfs/$ROOT1_CID" > name_publish_out &&
test_check_peerid "${TEST_IPNS_ID}" &&
ipfs name resolve "${TEST_IPNS_ID}" > output &&
printf "/ipfs/%s\n" "$ROOT1_CID" > expected &&
test_cmp expected output
'

# GET /ipfs/
test_expect_success "GET for /ipfs/ unixfs dir listing succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_ipfs_dir_listing_output &&
cat curl_ipfs_dir_listing_output
'
test_expect_success "GET for /ipfs/ unixfs dir with index.html succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_ipfs_dir_index.html_output &&
cat curl_ipfs_dir_index.html_output
'
test_expect_success "GET for /ipfs/ unixfs file succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_ipfs_file_output &&
cat curl_ipfs_file_output
'
# GET /ipns/
test_expect_success "GET for /ipns/ unixfs dir listing succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/" >/dev/null 2>curl_ipns_dir_listing_output &&
cat curl_ipns_dir_listing_output
'
test_expect_success "GET for /ipns/ unixfs dir with index.html succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/" >/dev/null 2>curl_ipns_dir_index.html_output &&
cat curl_ipns_dir_index.html_output
'
test_expect_success "GET for /ipns/ unixfs file succeeds" '
curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_ipns_file_output &&
cat curl_ipns_file_output
'

# X-Ipfs-Path

## dir generated listing
test_expect_success "GET /ipfs/ dir listing response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir listing response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3" curl_ipns_dir_listing_output
'

## dir static index.html
test_expect_success "GET /ipfs/ dir index.html response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/" curl_ipns_dir_index.html_output
'

# file
test_expect_success "GET /ipfs/ file response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/index.html" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ file response has original content path in X-Ipfs-Path" '
grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" curl_ipns_file_output
'

# X-Ipfs-Roots

## dir generated listing
test_expect_success "GET /ipfs/ dir listing response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir listing response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipns_dir_listing_output
'

## dir static index.html
test_expect_success "GET /ipfs/ dir index.html response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipns_dir_index.html_output
'

## file
test_expect_success "GET /ipfs/ file response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ file response has logical CID roots in X-Ipfs-Roots" '
grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipns_file_output
'

# Etag

## dir generated listing
test_expect_success "GET /ipfs/ dir response has special Etag for generated dir listing" '
grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipfs_dir_listing_output
'
test_expect_success "GET /ipns/ dir response has special Etag for generated dir listing" '
grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipns_dir_listing_output
'

## dir static index.html should use CID of the index.html file for improved HTTP caching
test_expect_success "GET /ipfs/ dir index.html response has dir CID as Etag" '
grep "< Etag: \"${ROOT4_CID}\"" curl_ipfs_dir_index.html_output
'
test_expect_success "GET /ipns/ dir index.html response has dir CID as Etag" '
grep "< Etag: \"${ROOT4_CID}\"" curl_ipns_dir_index.html_output
'

## file
test_expect_success "GET /ipfs/ response has CID as Etag for a file" '
grep "< Etag: \"${FILE_CID}\"" curl_ipfs_file_output
'
test_expect_success "GET /ipns/ response has CID as Etag for a file" '
grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output
'

test_kill_ipfs_daemon

test_done
File renamed without changes.