From 7647b01d5005cde76fa208465382a08767fcc5b4 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 31 May 2023 14:58:03 +0200 Subject: [PATCH] feat: add etag based on params for car - removes duplicate TAR format ETag check - getTag now adds W/ for TAR and CAR - adds check for dag index etags --- gateway/handler.go | 59 ++++++++---- gateway/handler_block.go | 2 +- gateway/handler_car.go | 131 +++++++++++++++++---------- gateway/handler_codec.go | 4 +- gateway/handler_ipns_record.go | 8 ++ gateway/handler_tar.go | 16 +--- gateway/handler_unixfs__redirects.go | 2 +- gateway/handler_unixfs_file.go | 2 +- 8 files changed, 136 insertions(+), 88 deletions(-) diff --git a/gateway/handler.go b/gateway/handler.go index 049b4b2a5..75a4dbca0 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -281,7 +281,7 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { // Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified // TODO: Handle If-None-Match for CAR files once what goes in the ETag is resolved - ifNoneMatchResolvedPath, ok := i.handleIfNoneMatch(w, r, responseFormat, contentPath, immutableContentPath, logger) + ifNoneMatchResolvedPath, ok := i.handleIfNoneMatch(w, r, responseFormat, contentPath, immutableContentPath) if !ok { return } @@ -413,9 +413,12 @@ func panicHandler(w http.ResponseWriter) { } } -func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) { - // Set Etag to based on CID (override whatever was set before) - w.Header().Set("Etag", getEtag(r, fileCid)) +func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, cid cid.Cid, responseFormat string) (modtime time.Time) { + // Best effort attempt to set an Etag based on the CID and response format. + // Setting an ETag is handled separately for CARs and IPNS records. + if etag := getEtag(r, cid, responseFormat); etag != "" { + w.Header().Set("Etag", etag) + } // Set Cache-Control and Last-Modified based on contentPath properties if contentPath.Mutable() { @@ -519,9 +522,9 @@ func getFilename(contentPath ipath.Path) string { } // etagMatch evaluates if we can respond with HTTP 304 Not Modified -// It supports multiple weak and strong etags passed in If-None-Matc stringh +// It supports multiple weak and strong etags passed in If-None-Match string // including the wildcard one. -func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool { +func etagMatch(ifNoneMatchHeader string, etagsToCheck ...string) bool { buf := ifNoneMatchHeader for { buf = textproto.TrimString(buf) @@ -541,9 +544,12 @@ func etagMatch(ifNoneMatchHeader string, cidEtag string, dirEtag string) bool { break } // Check for match both strong and weak etags - if etagWeakMatch(etag, cidEtag) || etagWeakMatch(etag, dirEtag) { - return true + for _, etagToCheck := range etagsToCheck { + if etagWeakMatch(etag, etagToCheck) { + return true + } } + buf = remain } return false @@ -583,19 +589,31 @@ func etagWeakMatch(a, b string) bool { return strings.TrimPrefix(a, "W/") == strings.TrimPrefix(b, "W/") } -// generate Etag value based on HTTP request and CID -func getEtag(r *http.Request, cid cid.Cid) string { +// getEtag generates an ETag value based on an HTTP Request, a CID and a response +// format. This function DOES NOT generate ETags for CARs or IPNS Records. +func getEtag(r *http.Request, cid cid.Cid, responseFormat string) string { prefix := `"` suffix := `"` - responseFormat, _, err := customResponseFormat(r) - if err == nil && responseFormat != "" { + + switch responseFormat { + case "": + // Do nothing. + case "application/vnd.ipld.car", "application/vnd.ipfs.ipns-record": + // CARs and IPNS Record ETags are handled differently, in their respective handler. + return "" + case "application/x-tar": + // Weak Etag W/ for formats that we can't guarantee byte-for-byte identical + // responses, but still want to benefit from HTTP Caching. + prefix = "W/" + prefix + fallthrough + default: // application/vnd.ipld.foo → foo // application/x-bar → x-bar shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:] // Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses) suffix = `.` + shortFormat + suffix } - // TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands + return prefix + cid.String() + suffix } @@ -664,9 +682,9 @@ func debugStr(path string) string { return q } -func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, imPath ImmutablePath, logger *zap.SugaredLogger) (ipath.Resolved, bool) { +func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, responseFormat string, contentPath ipath.Path, imPath ImmutablePath) (ipath.Resolved, bool) { // Detect when If-None-Match HTTP header allows returning HTTP 304 Not Modified - if inm := r.Header.Get("If-None-Match"); inm != "" { + if ifNoneMatch := r.Header.Get("If-None-Match"); ifNoneMatch != "" { pathMetadata, err := i.api.ResolvePath(r.Context(), imPath) if err != nil { // Note: webError will replace http.StatusInternalServerError with a more appropriate error (e.g. StatusNotFound, StatusRequestTimeout, StatusServiceUnavailable, etc.) if necessary @@ -677,11 +695,14 @@ func (i *handler) handleIfNoneMatch(w http.ResponseWriter, r *http.Request, resp resolvedPath := pathMetadata.LastSegment pathCid := resolvedPath.Cid() - // need to check against both File and Dir Etag variants - // because this inexpensive check happens before we do any I/O - cidEtag := getEtag(r, pathCid) + + // Checks against both file, dir listing, and dag index Etags. + // This is an inexpensive check, and it happens before we do any I/O. + cidEtag := getEtag(r, pathCid, responseFormat) dirEtag := getDirListingEtag(pathCid) - if etagMatch(inm, cidEtag, dirEtag) { + dagEtag := getDagIndexEtag(pathCid) + + if etagMatch(ifNoneMatch, cidEtag, dirEtag, dagEtag) { // Finish early if client already has a matching Etag w.WriteHeader(http.StatusNotModified) return nil, false diff --git a/gateway/handler_block.go b/gateway/handler_block.go index b21926a02..cb9bc9065 100644 --- a/gateway/handler_block.go +++ b/gateway/handler_block.go @@ -38,7 +38,7 @@ func (i *handler) serveRawBlock(ctx context.Context, w http.ResponseWriter, r *h setContentDispositionHeader(w, name, "attachment") // Set remaining headers - modtime := addCacheControlHeaders(w, r, contentPath, blockCid) + modtime := addCacheControlHeaders(w, r, contentPath, blockCid, "application/vnd.ipld.raw") w.Header().Set("Content-Type", "application/vnd.ipld.raw") w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) diff --git a/gateway/handler_car.go b/gateway/handler_car.go index 8125253d3..78761b5a6 100644 --- a/gateway/handler_car.go +++ b/gateway/handler_car.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/cespare/xxhash/v2" ipath "github.com/ipfs/boxo/coreiface/path" "github.com/ipfs/go-cid" @@ -39,47 +40,13 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R return false } - queryParams := r.URL.Query() - rangeStr, hasRange := queryParams.Get(carRangeBytesKey), queryParams.Has(carRangeBytesKey) - scopeStr, hasScope := queryParams.Get(carTerminalElementTypeKey), queryParams.Has(carTerminalElementTypeKey) - - params := CarParams{} - if hasRange { - rng, err := rangeStrToByteRange(rangeStr) - if err != nil { - err = fmt.Errorf("invalid entity-bytes: %w", err) - i.webError(w, r, err, http.StatusBadRequest) - return false - } - params.Range = &rng - } - - if hasScope { - switch s := DagScope(scopeStr); s { - case dagScopeEntity, dagScopeAll, dagScopeBlock: - params.Scope = s - default: - err := fmt.Errorf("unsupported dag-scope %s", scopeStr) - i.webError(w, r, err, http.StatusBadRequest) - return false - } - } else { - params.Scope = dagScopeAll - } - - carFile, err := i.api.GetCAR(ctx, imPath, params) - if !i.handleRequestErrors(w, r, contentPath, err) { + params, err := getCarParams(r) + if err != nil { + i.webError(w, r, err, http.StatusBadRequest) return false } - defer carFile.Close() - imPathStr := imPath.String() - if !strings.HasPrefix(imPathStr, "/ipfs/") { - i.webError(w, r, fmt.Errorf("path does not have /ipfs/ prefix"), http.StatusInternalServerError) - return false - } - firstSegment, _, _ := strings.Cut(imPathStr[6:], "/") - rootCid, err := cid.Decode(firstSegment) + rootCid, err := getCarRootCid(imPath) if err != nil { i.webError(w, r, err, http.StatusInternalServerError) return false @@ -97,23 +64,25 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R setContentDispositionHeader(w, name, "attachment") // Set Cache-Control (same logic as for a regular files) - addCacheControlHeaders(w, r, contentPath, rootCid) - - // TODO: What Etag to use here, the full path? - // Weak Etag W/ because we can't guarantee byte-for-byte identical - // responses, but still want to benefit from HTTP Caching. Two CAR - // responses for the same CID and selector will be logically equivalent, - // but when CAR is streamed, then in theory, blocks may arrive from - // datastore in non-deterministic order. - etag := `W/` + getEtag(r, rootCid) + addCacheControlHeaders(w, r, contentPath, rootCid, "application/vnd.ipld.car") + + // Generate the CAR Etag. + etag := getCarEtag(r, imPath, params, rootCid) w.Header().Set("Etag", etag) - // Finish early if Etag match - if r.Header.Get("If-None-Match") == etag { + // Terminate early if Etag matches. We cannot rely on handleIfNoneMatch since + // since it does not contain the parameters information we retrieve here. + if etagMatch(r.Header.Get("If-None-Match"), etag) { w.WriteHeader(http.StatusNotModified) return false } + carFile, err := i.api.GetCAR(ctx, imPath, params) + if !i.handleRequestErrors(w, r, contentPath, err) { + return false + } + defer carFile.Close() + // Make it clear we don't support range-requests over a car stream // Partial downloads and resumes should be handled using requests for // sub-DAGs and IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769 @@ -142,6 +111,36 @@ func (i *handler) serveCAR(ctx context.Context, w http.ResponseWriter, r *http.R return true } +func getCarParams(r *http.Request) (CarParams, error) { + queryParams := r.URL.Query() + rangeStr, hasRange := queryParams.Get(carRangeBytesKey), queryParams.Has(carRangeBytesKey) + scopeStr, hasScope := queryParams.Get(carTerminalElementTypeKey), queryParams.Has(carTerminalElementTypeKey) + + params := CarParams{} + if hasRange { + rng, err := rangeStrToByteRange(rangeStr) + if err != nil { + err = fmt.Errorf("invalid entity-bytes: %w", err) + return CarParams{}, err + } + params.Range = &rng + } + + if hasScope { + switch s := DagScope(scopeStr); s { + case dagScopeEntity, dagScopeAll, dagScopeBlock: + params.Scope = s + default: + err := fmt.Errorf("unsupported dag-scope %s", scopeStr) + return CarParams{}, err + } + } else { + params.Scope = dagScopeAll + } + + return params, nil +} + func rangeStrToByteRange(rangeStr string) (DagEntityByteRange, error) { rangeElems := strings.Split(rangeStr, ":") if len(rangeElems) != 2 { @@ -177,3 +176,37 @@ func rangeStrToByteRange(rangeStr string) (DagEntityByteRange, error) { To: &to, }, nil } + +func getCarRootCid(imPath ImmutablePath) (cid.Cid, error) { + imPathStr := imPath.String() + if !strings.HasPrefix(imPathStr, "/ipfs/") { + return cid.Undef, fmt.Errorf("path does not have /ipfs/ prefix") + } + + firstSegment, _, _ := strings.Cut(imPathStr[6:], "/") + rootCid, err := cid.Decode(firstSegment) + if err != nil { + return cid.Undef, err + } + + return rootCid, err +} + +func getCarEtag(r *http.Request, imPath ImmutablePath, params CarParams, rootCid cid.Cid) string { + data := imPath.String() + if params.Scope != dagScopeAll { + data += "." + string(params.Scope) + } + + if params.Range != nil { + if params.Range.From != 0 || params.Range.To != nil { + data += "." + strconv.FormatInt(params.Range.From, 10) + if params.Range.To != nil { + data += "." + strconv.FormatInt(*params.Range.To, 10) + } + } + } + + suffix := strconv.FormatUint(xxhash.Sum64([]byte(data)), 32) + return `W/"` + rootCid.String() + ".car." + suffix + `"` +} diff --git a/gateway/handler_codec.go b/gateway/handler_codec.go index 6c2a47ec9..82a6c04fe 100644 --- a/gateway/handler_codec.go +++ b/gateway/handler_codec.go @@ -106,8 +106,8 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt responseContentType = cidContentType } - // Set HTTP headers (for caching etc) - modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid()) + // Set HTTP headers (for caching, etc). Etag will be replaced if handled by serveCodecHTML. + modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid(), responseContentType) name := setCodecContentDisposition(w, r, resolvedPath, responseContentType) w.Header().Set("Content-Type", responseContentType) w.Header().Set("X-Content-Type-Options", "nosniff") diff --git a/gateway/handler_ipns_record.go b/gateway/handler_ipns_record.go index 6d42836b3..18bddcdf9 100644 --- a/gateway/handler_ipns_record.go +++ b/gateway/handler_ipns_record.go @@ -61,6 +61,14 @@ func (i *handler) serveIpnsRecord(ctx context.Context, w http.ResponseWriter, r // TODO: use addCacheControlHeaders once #1818 is fixed. recordEtag := strconv.FormatUint(xxhash.Sum64(rawRecord), 32) w.Header().Set("Etag", recordEtag) + + // Terminate early if Etag matches. We cannot rely on handleIfNoneMatch since + // we use the raw record to generate the etag value. + if etagMatch(r.Header.Get("If-None-Match"), recordEtag) { + w.WriteHeader(http.StatusNotModified) + return false + } + if record.Ttl != nil { seconds := int(time.Duration(*record.Ttl).Seconds()) w.Header().Set("Cache-Control", fmt.Sprintf("public, max-age=%d", seconds)) diff --git a/gateway/handler_tar.go b/gateway/handler_tar.go index 27ab8fa40..f0406be7d 100644 --- a/gateway/handler_tar.go +++ b/gateway/handler_tar.go @@ -36,21 +36,7 @@ func (i *handler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.R rootCid := pathMetadata.LastSegment.Cid() // Set Cache-Control and read optional Last-Modified time - modtime := addCacheControlHeaders(w, r, contentPath, rootCid) - - // Weak Etag W/ because we can't guarantee byte-for-byte identical - // responses, but still want to benefit from HTTP Caching. Two TAR - // responses for the same CID will be logically equivalent, - // but when TAR is streamed, then in theory, files and directories - // may arrive in different order (depends on TAR lib and filesystem/inodes). - etag := `W/` + getEtag(r, rootCid) - w.Header().Set("Etag", etag) - - // Finish early if Etag match - if r.Header.Get("If-None-Match") == etag { - w.WriteHeader(http.StatusNotModified) - return false - } + modtime := addCacheControlHeaders(w, r, contentPath, rootCid, "application/x-tar") // Set Content-Disposition var name string diff --git a/gateway/handler_unixfs__redirects.go b/gateway/handler_unixfs__redirects.go index de5d0105f..6a487d5d6 100644 --- a/gateway/handler_unixfs__redirects.go +++ b/gateway/handler_unixfs__redirects.go @@ -203,7 +203,7 @@ func (i *handler) serve4xx(w http.ResponseWriter, r *http.Request, content4xxPat log.Debugf("using _redirects: custom %d file at %q", status, content4xxPath) w.Header().Set("Content-Type", "text/html") w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) - addCacheControlHeaders(w, r, content4xxPath, content4xxCid) + addCacheControlHeaders(w, r, content4xxPath, content4xxCid, "") w.WriteHeader(status) _, err = io.CopyN(w, content4xxFile, size) return err diff --git a/gateway/handler_unixfs_file.go b/gateway/handler_unixfs_file.go index 296bef450..8b247e7fa 100644 --- a/gateway/handler_unixfs_file.go +++ b/gateway/handler_unixfs_file.go @@ -24,7 +24,7 @@ func (i *handler) serveFile(ctx context.Context, w http.ResponseWriter, r *http. defer span.End() // Set Cache-Control and read optional Last-Modified time - modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid()) + modtime := addCacheControlHeaders(w, r, contentPath, resolvedPath.Cid(), "") // Set Content-Disposition name := addContentDispositionHeader(w, r, contentPath)