From 9713912dd579b64646b948982504e660ff35b342 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 15 Apr 2022 17:04:04 +0200 Subject: [PATCH] feat(gw): improved If-None-Match support Improves the way we handle If-None-Match header: - Support for more than one Etag passed in If-None-Match - Match both strong and weak Etags to maximize caching across various HTTP clients and libraries (some send weak Etags by default) - Support for wildcard '*' - Tests for If-None-Match behavior --- core/corehttp/gateway_handler.go | 83 +++++++++++++++++++-- core/corehttp/gateway_handler_unixfs_dir.go | 12 +-- core/corehttp/gateway_test.go | 25 +++++++ test/sharness/t0116-gateway-cache.sh | 49 ++++++++++++ 4 files changed, 155 insertions(+), 14 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 0c34cc9b175..03f2ec4f98c 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -7,6 +7,7 @@ import ( "io" "mime" "net/http" + "net/textproto" "net/url" "os" gopath "path" @@ -409,11 +410,18 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat)) trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String())) - // Finish early if client already has matching Etag - ifNoneMatch := r.Header.Get("If-None-Match") - if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) { - w.WriteHeader(http.StatusNotModified) - return + // Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified + if inm := r.Header.Get("If-None-Match"); inm != "" { + pathCid := resolvedPath.Cid() + // need to check against both File and Dir Etag variants + // because this inexpensive check happens before we do any I/O + cidEtag := getEtag(r, pathCid) + dirEtag := getDirListingEtag(pathCid) + if etagMatch(inm, cidEtag, dirEtag) { + // Finish early if client already has a matching Etag + w.WriteHeader(http.StatusNotModified) + return + } } // Update the global metric of the time it takes to read the final root block of the requested resource @@ -818,6 +826,71 @@ func getFilename(contentPath ipath.Path) string { return gopath.Base(s) } +// etagMatch evaluates if we can respond with HTTP 304 Not Modified +// It supports multiple weak and strong etags passed in If-None-Matc stringh +// including the wildcard one. +func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool { + buf := ifNoneMatchHeader + for { + buf = textproto.TrimString(buf) + if len(buf) == 0 { + break + } + if buf[0] == ',' { + buf = buf[1:] + continue + } + // If-None-Match: * should match against any etag + if buf[0] == '*' { + return true + } + etag, remain := scanETag(buf) + if etag == "" { + break + } + // Check for match both strong and weak etags + if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) { + return true + } + buf = remain + } + return false +} + +// scanETag determines if a syntactically valid ETag is present at s. If so, +// the ETag and remaining text after consuming ETag is returned. Otherwise, +// it returns "", "". +// (This is the same logic as one executed inside of http.ServeContent) +func scanETag(s string) (etag string, remain string) { + s = textproto.TrimString(s) + start := 0 + if strings.HasPrefix(s, "W/") { + start = 2 + } + if len(s[start:]) < 2 || s[start] != '"' { + return "", "" + } + // ETag is either W/"text" or "text". + // See RFC 7232 2.3. + for i := start + 1; i < len(s); i++ { + c := s[i] + switch { + // Character values allowed in ETags. + case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80: + case c == '"': + return s[:i+1], s[i+1:] + default: + return "", "" + } + } + return "", "" +} + +// etagWeakMatch reports whether a and b match using weak ETag comparison. +func etagWeakMatch(a, b string) bool { + return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/") +} + // generate Etag value based on HTTP request and CID func getEtag(r *http.Request, cid cid.Cid) string { prefix := `"` diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go index f462e52f8f6..d2cce586801 100644 --- a/core/corehttp/gateway_handler_unixfs_dir.go +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -93,15 +93,9 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // type instead of relying on autodetection (which may fail). w.Header().Set("Content-Type", "text/html") - // Generated dir index requires custom Etag (it may change between go-ipfs versions) - if assets.AssetHash != "" { - dirEtag := getDirListingEtag(resolvedPath.Cid()) - w.Header().Set("Etag", dirEtag) - if r.Header.Get("If-None-Match") == dirEtag { - w.WriteHeader(http.StatusNotModified) - return - } - } + // Generated dir index requires custom Etag (output may change between go-ipfs versions) + dirEtag := getDirListingEtag(resolvedPath.Cid()) + w.Header().Set("Etag", dirEtag) if r.Method == http.MethodHead { logger.Debug("return as request's HTTP method is HEAD") diff --git a/core/corehttp/gateway_test.go b/core/corehttp/gateway_test.go index 2cba931ddc4..303e4a1ac11 100644 --- a/core/corehttp/gateway_test.go +++ b/core/corehttp/gateway_test.go @@ -656,3 +656,28 @@ func TestVersion(t *testing.T) { t.Fatalf("response doesn't contain protocol version:\n%s", s) } } + +func TestEtagMatch(t *testing.T) { + for _, test := range []struct { + header string // value in If-None-Match HTTP header + cidEtag string + dirEtag string + expected bool // expected result of etagMatch(header, cidEtag, dirEtag) + }{ + {"", `"etag"`, "", false}, // no If-None-Match + {"", "", `"etag"`, false}, // no If-None-Match + {`"etag"`, `"etag"`, "", true}, // file etag match + {`W/"etag"`, `"etag"`, "", true}, // file etag match + {`"foo", W/"bar", W/"etag"`, `"etag"`, "", true}, // file etag match (array) + {`"foo",W/"bar",W/"etag"`, `"etag"`, "", true}, // file etag match (compact array) + {`"etag"`, "", `W/"etag"`, true}, // dir etag match + {`"etag"`, "", `W/"etag"`, true}, // dir etag match + {`W/"etag"`, "", `W/"etag"`, true}, // dir etag match + {`*`, `"etag"`, "", true}, // wildcard etag match + } { + result := etagMatch(test.header, test.cidEtag, test.dirEtag) + if result != test.expected { + t.Fatalf("unexpected result of etagMatch(%q, %q, %q), got %t, expected %t", test.header, test.cidEtag, test.dirEtag, result, test.expected) + } + } +} diff --git a/test/sharness/t0116-gateway-cache.sh b/test/sharness/t0116-gateway-cache.sh index 3e9e2af48b8..e5471088e2d 100755 --- a/test/sharness/t0116-gateway-cache.sh +++ b/test/sharness/t0116-gateway-cache.sh @@ -145,6 +145,55 @@ test_expect_success "Prepare IPNS unixfs content path for testing" ' grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output ' +# If-None-Match (return 304 Not Modified when client sends matching Etag they already have) + + test_expect_success "GET for /ipfs/ file with matching Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipfs/ dir with index.html file with matching Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: \"$ROOT4_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipfs/ file with matching third Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: \"fakeEtag1\", \"fakeEtag2\", \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipfs/ file with matching weak Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: W/\"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipfs/ file with wildcard Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: *" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipns/ file with matching Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" ' + curl -svX GET -H "If-None-Match: W/\"$ROOT3_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + + # DirIndex etag is based on xxhash(./assets/dir-index-html), so we need to fetch it dynamically + test_expect_success "GET for /ipfs/ dir listing with matching strong Etag in If-None-Match returns 304 Not Modified" ' + curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag && + curl -svX GET -H "If-None-Match: \"$(/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" ' + curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag && + curl -svX GET -H "If-None-Match: W/\"$(/dev/null 2>curl_output && + test_should_contain "304 Not Modified" curl_output + ' + test_kill_ipfs_daemon test_done