Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(gw): Etag and If-None-Match #8891

Merged
merged 1 commit into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 78 additions & 5 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"io"
"mime"
"net/http"
"net/textproto"
"net/url"
"os"
gopath "path"
Expand Down Expand Up @@ -409,11 +410,18 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResponseFormat", responseFormat))
trace.SpanFromContext(r.Context()).SetAttributes(attribute.String("ResolvedPath", resolvedPath.String()))

// Finish early if client already has matching Etag
ifNoneMatch := r.Header.Get("If-None-Match")
if ifNoneMatch == getEtag(r, resolvedPath.Cid()) || ifNoneMatch == getDirListingEtag(resolvedPath.Cid()) {
w.WriteHeader(http.StatusNotModified)
return
// Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified
if inm := r.Header.Get("If-None-Match"); inm != "" {
pathCid := resolvedPath.Cid()
// need to check against both File and Dir Etag variants
// because this inexpensive check happens before we do any I/O
cidEtag := getEtag(r, pathCid)
dirEtag := getDirListingEtag(pathCid)
if etagMatch(inm, cidEtag, dirEtag) {
// Finish early if client already has a matching Etag
w.WriteHeader(http.StatusNotModified)
return
}
}

// Update the global metric of the time it takes to read the final root block of the requested resource
Expand Down Expand Up @@ -818,6 +826,71 @@ func getFilename(contentPath ipath.Path) string {
return gopath.Base(s)
}

// etagMatch evaluates if we can respond with HTTP 304 Not Modified
// It supports multiple weak and strong etags passed in If-None-Matc stringh
// including the wildcard one.
func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool {
buf := ifNoneMatchHeader
for {
buf = textproto.TrimString(buf)
if len(buf) == 0 {
break
}
if buf[0] == ',' {
buf = buf[1:]
continue
}
// If-None-Match: * should match against any etag
if buf[0] == '*' {
return true
}
etag, remain := scanETag(buf)
if etag == "" {
break
}
// Check for match both strong and weak etags
if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) {
return true
}
buf = remain
}
return false
}

// scanETag determines if a syntactically valid ETag is present at s. If so,
// the ETag and remaining text after consuming ETag is returned. Otherwise,
// it returns "", "".
// (This is the same logic as one executed inside of http.ServeContent)
func scanETag(s string) (etag string, remain string) {
s = textproto.TrimString(s)
start := 0
if strings.HasPrefix(s, "W/") {
start = 2
}
if len(s[start:]) < 2 || s[start] != '"' {
return "", ""
}
// ETag is either W/"text" or "text".
// See RFC 7232 2.3.
for i := start + 1; i < len(s); i++ {
c := s[i]
switch {
// Character values allowed in ETags.
case c == 0x21 || c >= 0x23 && c <= 0x7E || c >= 0x80:
case c == '"':
return s[:i+1], s[i+1:]
default:
return "", ""
}
}
return "", ""
}

// etagWeakMatch reports whether a and b match using weak ETag comparison.
func etagWeakMatch(a, b string) bool {
return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/")
}

// generate Etag value based on HTTP request and CID
func getEtag(r *http.Request, cid cid.Cid) string {
prefix := `"`
Expand Down
12 changes: 3 additions & 9 deletions core/corehttp/gateway_handler_unixfs_dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,15 +93,9 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// type instead of relying on autodetection (which may fail).
w.Header().Set("Content-Type", "text/html")

// Generated dir index requires custom Etag (it may change between go-ipfs versions)
if assets.AssetHash != "" {
dirEtag := getDirListingEtag(resolvedPath.Cid())
w.Header().Set("Etag", dirEtag)
if r.Header.Get("If-None-Match") == dirEtag {
w.WriteHeader(http.StatusNotModified)
return
}
}
// Generated dir index requires custom Etag (output may change between go-ipfs versions)
dirEtag := getDirListingEtag(resolvedPath.Cid())
w.Header().Set("Etag", dirEtag)

if r.Method == http.MethodHead {
logger.Debug("return as request's HTTP method is HEAD")
Expand Down
25 changes: 25 additions & 0 deletions core/corehttp/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -656,3 +656,28 @@ func TestVersion(t *testing.T) {
t.Fatalf("response doesn't contain protocol version:\n%s", s)
}
}

func TestEtagMatch(t *testing.T) {
for _, test := range []struct {
header string // value in If-None-Match HTTP header
cidEtag string
dirEtag string
expected bool // expected result of etagMatch(header, cidEtag, dirEtag)
}{
{"", `"etag"`, "", false}, // no If-None-Match
{"", "", `"etag"`, false}, // no If-None-Match
{`"etag"`, `"etag"`, "", true}, // file etag match
{`W/"etag"`, `"etag"`, "", true}, // file etag match
{`"foo", W/"bar", W/"etag"`, `"etag"`, "", true}, // file etag match (array)
{`"foo",W/"bar",W/"etag"`, `"etag"`, "", true}, // file etag match (compact array)
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
{`"etag"`, "", `W/"etag"`, true}, // dir etag match
{`W/"etag"`, "", `W/"etag"`, true}, // dir etag match
{`*`, `"etag"`, "", true}, // wildcard etag match
} {
result := etagMatch(test.header, test.cidEtag, test.dirEtag)
if result != test.expected {
t.Fatalf("unexpected result of etagMatch(%q, %q, %q), got %t, expected %t", test.header, test.cidEtag, test.dirEtag, result, test.expected)
}
}
}
49 changes: 49 additions & 0 deletions test/sharness/t0116-gateway-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,55 @@ test_expect_success "Prepare IPNS unixfs content path for testing" '
grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output
'

# If-None-Match (return 304 Not Modified when client sends matching Etag they already have)

test_expect_success "GET for /ipfs/ file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ dir with index.html file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$ROOT4_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with matching third Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"fakeEtag1\", \"fakeEtag2\", \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: W/\"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ file with wildcard Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: *" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipns/ file with matching Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: \"$FILE_CID\"" "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -svX GET -H "If-None-Match: W/\"$ROOT3_CID\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

# DirIndex etag is based on xxhash(./assets/dir-index-html), so we need to fetch it dynamically
test_expect_success "GET for /ipfs/ dir listing with matching strong Etag in If-None-Match returns 304 Not Modified" '
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
curl -svX GET -H "If-None-Match: \"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'
test_expect_success "GET for /ipfs/ dir listing with matching weak Etag in If-None-Match returns 304 Not Modified" '
curl -Is "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/"| grep -i Etag | cut -f2- -d: | tr -d "[:space:]\"" > dir_index_etag &&
curl -svX GET -H "If-None-Match: W/\"$(<dir_index_etag)\"" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_output &&
test_should_contain "304 Not Modified" curl_output
'

test_kill_ipfs_daemon

test_done