From 8eb1210d1c41e2677050f38e117f310b82458cfa Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 2 Mar 2022 19:50:45 +0100 Subject: [PATCH 01/17] refactor(gw): move file logic to serveFile func one must imagine Sisyphus happy --- core/corehttp/gateway_handler.go | 111 +++++++++++++++++++------------ 1 file changed, 70 insertions(+), 41 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index d6e45ba927a..14abc9bcf21 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -18,7 +18,7 @@ import ( humanize "github.com/dustin/go-humanize" "github.com/gabriel-vasile/mimetype" - "github.com/ipfs/go-cid" + cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" assets "github.com/ipfs/go-ipfs/assets" dag "github.com/ipfs/go-merkledag" @@ -32,8 +32,9 @@ import ( ) const ( - ipfsPathPrefix = "/ipfs/" - ipnsPathPrefix = "/ipns/" + ipfsPathPrefix = "/ipfs/" + ipnsPathPrefix = "/ipns/" + immutableCacheControl = "public, max-age=29030400, immutable" ) var onlyAscii = regexp.MustCompile("[[:^ascii:]]") @@ -312,8 +313,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request _, ok := dr.(files.Directory) if ok && assets.BindataVersionHash != "" { + // generated dir listing response responseEtag = `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` } else { + // regular file response responseEtag = `"` + resolvedPath.Cid().String() + `"` } @@ -324,7 +327,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-IPFS-Path", urlPath) + w.Header().Set("X-Ipfs-Path", urlPath) w.Header().Set("Etag", responseEtag) if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { @@ -334,37 +337,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // set these headers _after_ the error, for we may just not have it - // and don't want the client to cache a 500 response... - // and only if it's /ipfs! - // TODO: break this out when we split /ipfs /ipns routes. - modtime := time.Now() - if f, ok := dr.(files.File); ok { - if strings.HasPrefix(urlPath, ipfsPathPrefix) { - w.Header().Set("Cache-Control", "public, max-age=29030400, immutable") - - // set modtime to a really long time ago, since files are immutable and should stay cached - modtime = time.Unix(1, 0) - } - - urlFilename := r.URL.Query().Get("filename") - var name string - if urlFilename != "" { - disposition := "inline" - if r.URL.Query().Get("download") == "true" { - disposition = "attachment" - } - utf8Name := url.PathEscape(urlFilename) - asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_")) - w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) - name = urlFilename - } else { - name = getFilename(urlPath) - } - - logger.Debugw("serving file", "name", name) - i.serveFile(w, r, name, modtime, f) + logger.Debugw("serving file", "path", parsedPath) + i.serveFile(w, r, f, resolvedPath.Cid(), parsedPath) return } dir, ok := dr.(files.Directory) @@ -398,13 +373,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request internalWebError(w, files.ErrNotReader) return } - // static index.html → no need to generate dynamic dir-index-html - // replace mutable DirIndex Etag with immutable dir CID - w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`) logger.Debugw("serving index.html file", "path", idxPath) // write to request - i.serveFile(w, r, "index.html", modtime, f) + i.serveFile(w, r, f, resolvedPath.Cid(), idxPath) return case resolver.ErrNoLink: logger.Debugw("no index.html; noop", "path", idxPath) @@ -527,18 +499,70 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -func (i *gatewayHandler) serveFile(w http.ResponseWriter, req *http.Request, name string, modtime time.Time, file files.File) { +// serveFile returns data behind a file along with HTTP headers based on +// the file itself, its CID and the contentPath used for accessing it. +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, file files.File, fileCid cid.Cid, contentPath ipath.Path) { + var modtime time.Time + name := getFilename(contentPath) + + // Set Etag to file's CID (override whatever we set before) + w.Header().Set("Etag", `"`+fileCid.String()+`"`) + + // Set Cache-Control and Last-Modified + if contentPath.Mutable() { + // mutable namespaces such as /ipns/ can't be cached forever + + // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 + // TODO: set Last-Modified based on unixfs 1.5 (if present) + + /* For now we set Last-Modified to Now() to leverage caching heuristics built into modern browsers: + * https://github.com/ipfs/go-ipfs/pull/8074#pullrequestreview-645196768 + * but we should not set it to fake values and use Cache-Control based on TTL instead */ + modtime = time.Now() + + } else { + // immutable! CACHE ALL THE THINGS, FOREVER! + w.Header().Set("Cache-Control", immutableCacheControl) + + // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) + modtime = time.Unix(0, 0) + // TODO: support unixfs 1.5 and set it if modification metadata is present in unixfs: https://github.com/ipfs/go-ipfs/issues/6920 + } + + /* Set Content-Disposition if needed. + * This logic enables: + * - creation of HTML links that trigger "Save As.." dialog instead of being rendered by the browser + * - overriding the filename used when saving subrresource assets on HTML page + * - provide default filename for HTTP clients when downloading direct /ipfs/CID without any subpath + */ + // URL param ?filename=cat.jpg triggers Content-Disposition: [..] filename + urlFilename := r.URL.Query().Get("filename") + if urlFilename != "" { + disposition := "inline" + // URL param ?download=true triggers Content-Disposition: [..] attachment + if r.URL.Query().Get("download") == "true" { + disposition = "attachment" + } + utf8Name := url.PathEscape(urlFilename) + asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_")) + w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) + name = urlFilename + } + + // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) size, err := file.Size() if err != nil { http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) return } + // Lazy seeker enables efficient range-requests and HTTP HEAD responses content := &lazySeeker{ size: size, reader: file, } + // Calculate deterministic value for Content-Type HTTP header var ctype string if _, isSymlink := file.(*files.Symlink); isSymlink { // We should be smarter about resolving symlinks but this is the @@ -570,10 +594,14 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, req *http.Request, nam ctype = "text/html" } } + // Setting explicit Content-Type to avoid mime-type sniffing on the client + // (unifies behavior across gateways and web browsers) w.Header().Set("Content-Type", ctype) + // special fixup around redirects w = &statusResponseWriter{w} - http.ServeContent(w, req, name, modtime, content) + + http.ServeContent(w, r, name, modtime, content) } func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { @@ -863,7 +891,8 @@ func internalWebError(w http.ResponseWriter, err error) { webErrorWithCode(w, "internalWebError", err, http.StatusInternalServerError) } -func getFilename(s string) string { +func getFilename(contentPath ipath.Path) string { + s := contentPath.String() if (strings.HasPrefix(s, ipfsPathPrefix) || strings.HasPrefix(s, ipnsPathPrefix)) && strings.Count(gopath.Clean(s), "/") <= 2 { // Don't want to treat ipfs.io in /ipns/ipfs.io as a filename. return "" From d2f563e04f3e4fc3476a9daa3b5615d532e2f87c Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 3 Mar 2022 00:44:38 +0100 Subject: [PATCH 02/17] feat: serveRawBlock implements ?format=block This is mvp which reuses http header logic from serveFile, plus custom content-disposition to ensure browsers dont render garbage --- core/corehttp/gateway_handler.go | 215 ++++++++++++++++++++----------- 1 file changed, 143 insertions(+), 72 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 14abc9bcf21..a56986cc95b 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -1,10 +1,12 @@ package corehttp import ( + "bytes" "context" "fmt" "html/template" "io" + "io/ioutil" "mime" "net/http" "net/url" @@ -38,6 +40,7 @@ const ( ) var onlyAscii = regexp.MustCompile("[[:^ascii:]]") +var noModtime = time.Unix(0, 0) // disables Last-Modified header if passed as modtime // HTML-based redirect for errors which can be recovered from, but we want // to provide hint to people that they should fix things on their end. @@ -65,6 +68,7 @@ type gatewayHandler struct { config GatewayConfig api coreiface.CoreAPI + // TODO: add metrics for non-unixfs responses (block, car) unixfsGetMetric *prometheus.SummaryVec } @@ -297,38 +301,32 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound) + // Finish early if client already has matching Etag + // (suffix match to cover both direct CID and DirIndex cases) + cidEtagSuffix := resolvedPath.Cid().String() + `"` + if strings.HasSuffix(r.Header.Get("If-None-Match"), cidEtagSuffix) { + w.WriteHeader(http.StatusNotModified) return } - i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) - - defer dr.Close() - - var responseEtag string - - // we need to figure out whether this is a directory before doing most of the heavy lifting below - _, ok := dr.(files.Directory) - - if ok && assets.BindataVersionHash != "" { - // generated dir listing response - responseEtag = `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` - } else { - // regular file response - responseEtag = `"` + resolvedPath.Cid().String() + `"` - } - - // Check etag sent back to us - if r.Header.Get("If-None-Match") == responseEtag || r.Header.Get("If-None-Match") == `W/`+responseEtag { - w.WriteHeader(http.StatusNotModified) - return + // Support custom response format via explicit override in URL + if responseFormat := r.URL.Query().Get("format"); responseFormat != "" { + switch responseFormat { + case "block": + logger.Debugw("serving raw block", "path", parsedPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) + return + // TODO: case "car" + default: + err := fmt.Errorf("requested unsupported response format") + webError(w, "failed to parse request format", err, http.StatusBadRequest) + return + } } + // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("X-Ipfs-Path", urlPath) - w.Header().Set("Etag", responseEtag) if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { w.Header().Set("X-Ipfs-Roots", rootCids) @@ -337,17 +335,31 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + // Handling Unixfs + dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound) + return + } + // TODO: do we want to reuse unixfsGetMetric for block/car, or should we have separate ones? + i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) + defer dr.Close() + + // Handling Unixfs file if f, ok := dr.(files.File); ok { logger.Debugw("serving file", "path", parsedPath) - i.serveFile(w, r, f, resolvedPath.Cid(), parsedPath) + i.serveFile(w, r, parsedPath, resolvedPath.Cid(), f) return } + + // Handling Unixfs directory dir, ok := dr.(files.Directory) if !ok { internalWebError(w, fmt.Errorf("unsupported file type")) return } + // Check if directory has index.html, if so, serveFile idxPath := ipath.Join(resolvedPath, "index.html") idx, err := i.api.Unixfs().Get(r.Context(), idxPath) switch err.(type) { @@ -376,7 +388,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger.Debugw("serving index.html file", "path", idxPath) // write to request - i.serveFile(w, r, f, resolvedPath.Cid(), idxPath) + i.serveFile(w, r, idxPath, resolvedPath.Cid(), f) return case resolver.ErrNoLink: logger.Debugw("no index.html; noop", "path", idxPath) @@ -398,6 +410,17 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // A HTML directory index will be presented, be sure to set the correct // type instead of relying on autodetection (which may fail). w.Header().Set("Content-Type", "text/html") + + // Generated dir index requires custom Etag (it may change between go-ipfs versions) + if assets.BindataVersionHash != "" { + dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` + w.Header().Set("Etag", dirEtag) + if r.Header.Get("If-None-Match") == dirEtag { + w.WriteHeader(http.StatusNotModified) + return + } + } + if r.Method == http.MethodHead { logger.Debug("return as request's HTTP method is HEAD") return @@ -501,53 +524,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // serveFile returns data behind a file along with HTTP headers based on // the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, file files.File, fileCid cid.Cid, contentPath ipath.Path) { - var modtime time.Time - name := getFilename(contentPath) - - // Set Etag to file's CID (override whatever we set before) - w.Header().Set("Etag", `"`+fileCid.String()+`"`) - - // Set Cache-Control and Last-Modified - if contentPath.Mutable() { - // mutable namespaces such as /ipns/ can't be cached forever - - // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 - // TODO: set Last-Modified based on unixfs 1.5 (if present) - - /* For now we set Last-Modified to Now() to leverage caching heuristics built into modern browsers: - * https://github.com/ipfs/go-ipfs/pull/8074#pullrequestreview-645196768 - * but we should not set it to fake values and use Cache-Control based on TTL instead */ - modtime = time.Now() +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { - } else { - // immutable! CACHE ALL THE THINGS, FOREVER! - w.Header().Set("Cache-Control", immutableCacheControl) + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, fileCid) - // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) - modtime = time.Unix(0, 0) - // TODO: support unixfs 1.5 and set it if modification metadata is present in unixfs: https://github.com/ipfs/go-ipfs/issues/6920 - } - - /* Set Content-Disposition if needed. - * This logic enables: - * - creation of HTML links that trigger "Save As.." dialog instead of being rendered by the browser - * - overriding the filename used when saving subrresource assets on HTML page - * - provide default filename for HTTP clients when downloading direct /ipfs/CID without any subpath - */ - // URL param ?filename=cat.jpg triggers Content-Disposition: [..] filename - urlFilename := r.URL.Query().Get("filename") - if urlFilename != "" { - disposition := "inline" - // URL param ?download=true triggers Content-Disposition: [..] attachment - if r.URL.Query().Get("download") == "true" { - disposition = "attachment" - } - utf8Name := url.PathEscape(urlFilename) - asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(urlFilename, "_")) - w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) - name = urlFilename - } + // Set Content-Disposition + name := addContentDispositionHeader(w, r, contentPath) // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) size, err := file.Size() @@ -563,6 +546,7 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, file } // Calculate deterministic value for Content-Type HTTP header + // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) var ctype string if _, isSymlink := file.(*files.Symlink); isSymlink { // We should be smarter about resolving symlinks but this is the @@ -604,6 +588,32 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, file http.ServeContent(w, r, name, modtime, content) } +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { + blockReader, err := i.api.Block().Get(r.Context(), contentPath) + if err != nil { + webError(w, "failed to get block", err, http.StatusInternalServerError) + return + } + block, err := ioutil.ReadAll(blockReader) + if err != nil { + webError(w, "failed to read block", err, http.StatusInternalServerError) + return + } + content := bytes.NewReader(block) + + // Set Content-Disposition + name := blockCid.String() + ".ipfs.block" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + modtime := addCacheControlHeaders(w, r, contentPath, blockCid) + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Done: http.ServeContent will take care of Content-Length and range requests + http.ServeContent(w, r, name, modtime, content) +} + func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) if err != nil { @@ -823,6 +833,67 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { } } +func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) { + // Set Etag to file's CID (override whatever was set before) + w.Header().Set("Etag", `"`+fileCid.String()+`"`) + + // Set Cache-Control and Last-Modified based on contentPath properties + if contentPath.Mutable() { + // mutable namespaces such as /ipns/ can't be cached forever + + /* For now we set Last-Modified to Now() to leverage caching heuristics built into modern browsers: + * https://github.com/ipfs/go-ipfs/pull/8074#pullrequestreview-645196768 + * but we should not set it to fake values and use Cache-Control based on TTL instead */ + modtime = time.Now() + + // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 + // TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920 + + } else { + // immutable! CACHE ALL THE THINGS, FOREVER! wolololol + w.Header().Set("Cache-Control", immutableCacheControl) + + // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) + modtime = noModtime + + // TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920 + } + + return modtime +} + +// Set Content-Disposition if filename URL query param is present, return preferred filename +func addContentDispositionHeader(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) string { + /* This logic enables: + * - creation of HTML links that trigger "Save As.." dialog instead of being rendered by the browser + * - overriding the filename used when saving subresource assets on HTML page + * - providing a default filename for HTTP clients when downloading direct /ipfs/CID without any subpath + */ + + // URL param ?filename=cat.jpg triggers Content-Disposition: [..] filename + // which impacts default name used in "Save As.." dialog + name := getFilename(contentPath) + urlFilename := r.URL.Query().Get("filename") + if urlFilename != "" { + disposition := "inline" + // URL param ?download=true triggers Content-Disposition: [..] attachment + // which skips rendering and forces "Save As.." dialog in browsers + if r.URL.Query().Get("download") == "true" { + disposition = "attachment" + } + setContentDispositionHeader(w, urlFilename, disposition) + name = urlFilename + } + return name +} + +// Set Content-Disposition to arbitrary filename and disposition +func setContentDispositionHeader(w http.ResponseWriter, filename string, disposition string) { + utf8Name := url.PathEscape(filename) + asciiName := url.PathEscape(onlyAscii.ReplaceAllLiteralString(filename, "_")) + w.Header().Set("Content-Disposition", fmt.Sprintf("%s; filename=\"%s\"; filename*=UTF-8''%s", disposition, asciiName, utf8Name)) +} + // Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation. func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) { /* From e213164abfc9a11835ff740d9b23de8269a4761e Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 3 Mar 2022 02:14:13 +0100 Subject: [PATCH 03/17] feat: serveCar implements ?format=car This is PoC implementation that returns CAR as a chunked stream. It does not set cache-control nor it has content-length. TBD if we want/can have these things. --- core/corehttp/gateway_handler.go | 56 ++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index a56986cc95b..f46b6379e6c 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -20,6 +20,7 @@ import ( humanize "github.com/dustin/go-humanize" "github.com/gabriel-vasile/mimetype" + blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" assets "github.com/ipfs/go-ipfs/assets" @@ -29,6 +30,8 @@ import ( "github.com/ipfs/go-path/resolver" coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" + gocar "github.com/ipld/go-car" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" routing "github.com/libp2p/go-libp2p-core/routing" prometheus "github.com/prometheus/client_golang/prometheus" ) @@ -316,9 +319,12 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger.Debugw("serving raw block", "path", parsedPath) i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) return - // TODO: case "car" + case "car": + logger.Debugw("serving car", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return default: - err := fmt.Errorf("requested unsupported response format") + err := fmt.Errorf("unsupported format %q", responseFormat) webError(w, "failed to parse request format", err, http.StatusBadRequest) return } @@ -614,6 +620,52 @@ func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, b http.ServeContent(w, r, name, modtime, content) } +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { + ctx := r.Context() + + // Set Content-Disposition + name := rootCid.String() + ".ipfs.car" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) + - how does cache-control look like, given car can fail mid-stream? + - we don't want clients to cache partial/interrupted CAR + - we may document that client should verify that all blocks were dowloaded, + or we may leverage content-length to hint something went wrong + */ + + /* TODO: content-length (so user agents show % of remaining download) + - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? + - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) + */ + + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Same go-car settings as dag.export command + store := dagStore{dag: i.api.Dag(), ctx: ctx} + dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} + car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) + + w.Header().Set("Transfer-Encoding", "chunked") + w.WriteHeader(http.StatusOK) + + if err := car.Write(w); err != nil { + // TODO: can we do any error handling here? + } +} + +type dagStore struct { + dag coreiface.APIDagService + ctx context.Context +} + +func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { + obj, err := ds.dag.Get(ds.ctx, c) + return obj, err +} + func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) if err != nil { From 9fbfb0b3931117cfcf055e6f694f3500f6d1dd62 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 8 Mar 2022 02:16:10 +0100 Subject: [PATCH 04/17] feat(gw): ?format= or Accept HTTP header - extracted file-like content type responses to separate .go files - Accept HTTP header with support for application/vnd.ipld.* types (TBD, we did not register them yet, so for illustration purpose only) --- core/corehttp/gateway_handler.go | 206 ++++++------------------- core/corehttp/gateway_handler_block.go | 37 +++++ core/corehttp/gateway_handler_car.go | 59 +++++++ core/corehttp/gateway_handler_file.go | 81 ++++++++++ 4 files changed, 220 insertions(+), 163 deletions(-) create mode 100644 core/corehttp/gateway_handler_block.go create mode 100644 core/corehttp/gateway_handler_car.go create mode 100644 core/corehttp/gateway_handler_file.go diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index f46b6379e6c..0973e42cf9e 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -1,13 +1,10 @@ package corehttp import ( - "bytes" "context" "fmt" "html/template" "io" - "io/ioutil" - "mime" "net/http" "net/url" "os" @@ -19,8 +16,6 @@ import ( "time" humanize "github.com/dustin/go-humanize" - "github.com/gabriel-vasile/mimetype" - blocks "github.com/ipfs/go-block-format" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" assets "github.com/ipfs/go-ipfs/assets" @@ -30,8 +25,6 @@ import ( "github.com/ipfs/go-path/resolver" coreiface "github.com/ipfs/interface-go-ipfs-core" ipath "github.com/ipfs/interface-go-ipfs-core/path" - gocar "github.com/ipld/go-car" - selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" routing "github.com/libp2p/go-libp2p-core/routing" prometheus "github.com/prometheus/client_golang/prometheus" ) @@ -312,24 +305,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } - // Support custom response format via explicit override in URL - if responseFormat := r.URL.Query().Get("format"); responseFormat != "" { - switch responseFormat { - case "block": - logger.Debugw("serving raw block", "path", parsedPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) - return - case "car": - logger.Debugw("serving car", "path", parsedPath) - i.serveCar(w, r, resolvedPath.Cid(), parsedPath) - return - default: - err := fmt.Errorf("unsupported format %q", responseFormat) - webError(w, "failed to parse request format", err, http.StatusBadRequest) - return - } - } - // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("X-Ipfs-Path", urlPath) @@ -341,6 +316,32 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + // Support custom response formats passed via ?format or Accept HTTP header + if contentType := getExplicitContentType(r); contentType != "" { + switch contentType { + case "application/vnd.ipld.raw": + logger.Debugw("serving raw block", "path", parsedPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car": + logger.Debugw("serving car stream", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car; version=1": + logger.Debugw("serving car stream", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car; version=2": // no CARv2 in go-ipfs atm + err := fmt.Errorf("unsupported CARv2 format, try again with CARv1") + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + return + default: + err := fmt.Errorf("unsupported format %q", contentType) + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + return + } + } + // Handling Unixfs dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) if err != nil { @@ -528,144 +529,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } -// serveFile returns data behind a file along with HTTP headers based on -// the file itself, its CID and the contentPath used for accessing it. -func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { - - // Set Cache-Control and read optional Last-Modified time - modtime := addCacheControlHeaders(w, r, contentPath, fileCid) - - // Set Content-Disposition - name := addContentDispositionHeader(w, r, contentPath) - - // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) - size, err := file.Size() - if err != nil { - http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) - return - } - - // Lazy seeker enables efficient range-requests and HTTP HEAD responses - content := &lazySeeker{ - size: size, - reader: file, - } - - // Calculate deterministic value for Content-Type HTTP header - // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) - var ctype string - if _, isSymlink := file.(*files.Symlink); isSymlink { - // We should be smarter about resolving symlinks but this is the - // "most correct" we can be without doing that. - ctype = "inode/symlink" - } else { - ctype = mime.TypeByExtension(gopath.Ext(name)) - if ctype == "" { - // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. - // Fixes https://github.com/ipfs/go-ipfs/issues/7252 - mimeType, err := mimetype.DetectReader(content) - if err != nil { - http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) - return - } - - ctype = mimeType.String() - _, err = content.Seek(0, io.SeekStart) - if err != nil { - http.Error(w, "seeker can't seek", http.StatusInternalServerError) - return - } - } - // Strip the encoding from the HTML Content-Type header and let the - // browser figure it out. - // - // Fixes https://github.com/ipfs/go-ipfs/issues/2203 - if strings.HasPrefix(ctype, "text/html;") { - ctype = "text/html" - } - } - // Setting explicit Content-Type to avoid mime-type sniffing on the client - // (unifies behavior across gateways and web browsers) - w.Header().Set("Content-Type", ctype) - - // special fixup around redirects - w = &statusResponseWriter{w} - - http.ServeContent(w, r, name, modtime, content) -} - -func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { - blockReader, err := i.api.Block().Get(r.Context(), contentPath) - if err != nil { - webError(w, "failed to get block", err, http.StatusInternalServerError) - return - } - block, err := ioutil.ReadAll(blockReader) - if err != nil { - webError(w, "failed to read block", err, http.StatusInternalServerError) - return - } - content := bytes.NewReader(block) - - // Set Content-Disposition - name := blockCid.String() + ".ipfs.block" - setContentDispositionHeader(w, name, "attachment") - - // Set remaining headers - modtime := addCacheControlHeaders(w, r, contentPath, blockCid) - w.Header().Set("Content-Type", "application/octet-stream") - w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - - // Done: http.ServeContent will take care of Content-Length and range requests - http.ServeContent(w, r, name, modtime, content) -} - -func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { - ctx := r.Context() - - // Set Content-Disposition - name := rootCid.String() + ".ipfs.car" - setContentDispositionHeader(w, name, "attachment") - - // Set remaining headers - /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) - - how does cache-control look like, given car can fail mid-stream? - - we don't want clients to cache partial/interrupted CAR - - we may document that client should verify that all blocks were dowloaded, - or we may leverage content-length to hint something went wrong - */ - - /* TODO: content-length (so user agents show % of remaining download) - - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? - - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) - */ - - w.Header().Set("Content-Type", "application/octet-stream") - w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - - // Same go-car settings as dag.export command - store := dagStore{dag: i.api.Dag(), ctx: ctx} - dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} - car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) - - w.Header().Set("Transfer-Encoding", "chunked") - w.WriteHeader(http.StatusOK) - - if err := car.Write(w); err != nil { - // TODO: can we do any error handling here? - } -} - -type dagStore struct { - dag coreiface.APIDagService - ctx context.Context -} - -func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { - obj, err := ds.dag.Get(ds.ctx, c) - return obj, err -} - func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) if err != nil { @@ -1023,6 +886,23 @@ func getFilename(contentPath ipath.Path) string { return gopath.Base(s) } +// return explicit response format if specified in request as query parameter or via Accept HTTP header +func getExplicitContentType(r *http.Request) string { + if formatParam := r.URL.Query().Get("format"); formatParam != "" { + // translate query param to a content type + switch formatParam { + case "raw": + return "application/vnd.ipld.raw" + case "car": + return "application/vnd.ipld.car" + } + } + if accept := r.Header.Get("Accept"); strings.HasPrefix(accept, "application/vnd.") { + return accept + } + return "" +} + func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { diff --git a/core/corehttp/gateway_handler_block.go b/core/corehttp/gateway_handler_block.go new file mode 100644 index 00000000000..9264e6875a4 --- /dev/null +++ b/core/corehttp/gateway_handler_block.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "bytes" + "io/ioutil" + "net/http" + + cid "github.com/ipfs/go-cid" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveRawBlock returns bytes behind a raw block +func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { + blockReader, err := i.api.Block().Get(r.Context(), contentPath) + if err != nil { + webError(w, "failed to get block", err, http.StatusInternalServerError) + return + } + block, err := ioutil.ReadAll(blockReader) + if err != nil { + webError(w, "failed to read block", err, http.StatusInternalServerError) + return + } + content := bytes.NewReader(block) + + // Set Content-Disposition + name := blockCid.String() + ".raw" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + modtime := addCacheControlHeaders(w, r, contentPath, blockCid) + w.Header().Set("Content-Type", "application/vnd.ipld.raw") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Done: http.ServeContent will take care of Content-Length and range requests + http.ServeContent(w, r, name, modtime, content) +} diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go new file mode 100644 index 00000000000..5702e86b01a --- /dev/null +++ b/core/corehttp/gateway_handler_car.go @@ -0,0 +1,59 @@ +package corehttp + +import ( + "context" + "net/http" + + blocks "github.com/ipfs/go-block-format" + cid "github.com/ipfs/go-cid" + coreiface "github.com/ipfs/interface-go-ipfs-core" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + gocar "github.com/ipld/go-car" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" +) + +// serveCar returns a CAR stream for specific DAG+selector +func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { + ctx := r.Context() + + // Set Content-Disposition + name := rootCid.String() + ".car" + setContentDispositionHeader(w, name, "attachment") + + // Set remaining headers + /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) + - how does cache-control look like, given car can fail mid-stream? + - we don't want clients to cache partial/interrupted CAR + - we may document that client should verify that all blocks were dowloaded, + or we may leverage content-length to hint something went wrong + */ + + /* TODO: content-length (so user agents show % of remaining download) + - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? + - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) + */ + + w.Header().Set("Content-Type", "application/vnd.ipld.car; version=1") + w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) + + // Same go-car settings as dag.export command + store := dagStore{dag: i.api.Dag(), ctx: ctx} + dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} + car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) + + w.WriteHeader(http.StatusOK) + + if err := car.Write(w); err != nil { + // TODO: can we do any error handling here? + } +} + +type dagStore struct { + dag coreiface.APIDagService + ctx context.Context +} + +func (ds dagStore) Get(c cid.Cid) (blocks.Block, error) { + obj, err := ds.dag.Get(ds.ctx, c) + return obj, err +} diff --git a/core/corehttp/gateway_handler_file.go b/core/corehttp/gateway_handler_file.go new file mode 100644 index 00000000000..4ca85565f0c --- /dev/null +++ b/core/corehttp/gateway_handler_file.go @@ -0,0 +1,81 @@ +package corehttp + +import ( + "fmt" + "io" + "mime" + "net/http" + gopath "path" + "strings" + + "github.com/gabriel-vasile/mimetype" + cid "github.com/ipfs/go-cid" + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" +) + +// serveFile returns data behind a file along with HTTP headers based on +// the file itself, its CID and the contentPath used for accessing it. +func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid, file files.File) { + + // Set Cache-Control and read optional Last-Modified time + modtime := addCacheControlHeaders(w, r, contentPath, fileCid) + + // Set Content-Disposition + name := addContentDispositionHeader(w, r, contentPath) + + // Prepare size value for Content-Length HTTP header (set inside of http.ServeContent) + size, err := file.Size() + if err != nil { + http.Error(w, "cannot serve files with unknown sizes", http.StatusBadGateway) + return + } + + // Lazy seeker enables efficient range-requests and HTTP HEAD responses + content := &lazySeeker{ + size: size, + reader: file, + } + + // Calculate deterministic value for Content-Type HTTP header + // (we prefer to do it here, rather than using implicit sniffing in http.ServeContent) + var ctype string + if _, isSymlink := file.(*files.Symlink); isSymlink { + // We should be smarter about resolving symlinks but this is the + // "most correct" we can be without doing that. + ctype = "inode/symlink" + } else { + ctype = mime.TypeByExtension(gopath.Ext(name)) + if ctype == "" { + // uses https://github.com/gabriel-vasile/mimetype library to determine the content type. + // Fixes https://github.com/ipfs/go-ipfs/issues/7252 + mimeType, err := mimetype.DetectReader(content) + if err != nil { + http.Error(w, fmt.Sprintf("cannot detect content-type: %s", err.Error()), http.StatusInternalServerError) + return + } + + ctype = mimeType.String() + _, err = content.Seek(0, io.SeekStart) + if err != nil { + http.Error(w, "seeker can't seek", http.StatusInternalServerError) + return + } + } + // Strip the encoding from the HTML Content-Type header and let the + // browser figure it out. + // + // Fixes https://github.com/ipfs/go-ipfs/issues/2203 + if strings.HasPrefix(ctype, "text/html;") { + ctype = "text/html" + } + } + // Setting explicit Content-Type to avoid mime-type sniffing on the client + // (unifies behavior across gateways and web browsers) + w.Header().Set("Content-Type", ctype) + + // special fixup around redirects + w = &statusResponseWriter{w} + + http.ServeContent(w, r, name, modtime, content) +} From ee7b0ae41de235b0ac6acf0318db64c3f3f08275 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 8 Mar 2022 16:19:31 +0100 Subject: [PATCH 05/17] refactor: unixfs_get_latency_seconds Include block and car in unixfs_get_latency_seconds for now, so we keep basic visibility into gateway behavior until better metrics are added by https://github.com/ipfs/go-ipfs/issues/8441 --- core/corehttp/gateway_handler.go | 13 ++++++++++--- core/corehttp/gateway_handler_block.go | 4 ++-- core/corehttp/gateway_handler_car.go | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 0973e42cf9e..febc4d01af4 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -64,7 +64,6 @@ type gatewayHandler struct { config GatewayConfig api coreiface.CoreAPI - // TODO: add metrics for non-unixfs responses (block, car) unixfsGetMetric *prometheus.SummaryVec } @@ -90,6 +89,7 @@ func (sw *statusResponseWriter) WriteHeader(code int) { func newGatewayHandler(c GatewayConfig, api coreiface.CoreAPI) *gatewayHandler { unixfsGetMetric := prometheus.NewSummaryVec( + // TODO: deprecate and switch to content type agnostic metrics: https://github.com/ipfs/go-ipfs/issues/8441 prometheus.SummaryOpts{ Namespace: "ipfs", Subsystem: "http", @@ -305,6 +305,15 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + // Update the global metric of the time it takes to read the final root block of the requested resource + // NOTE: for legacy reasons this happens before we go into content-type specific code paths + _, err = i.api.Block().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) + return + } + i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) + // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. w.Header().Set("X-Ipfs-Path", urlPath) @@ -348,8 +357,6 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound) return } - // TODO: do we want to reuse unixfsGetMetric for block/car, or should we have separate ones? - i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) defer dr.Close() // Handling Unixfs file diff --git a/core/corehttp/gateway_handler_block.go b/core/corehttp/gateway_handler_block.go index 9264e6875a4..d7a3a8c4e8b 100644 --- a/core/corehttp/gateway_handler_block.go +++ b/core/corehttp/gateway_handler_block.go @@ -13,12 +13,12 @@ import ( func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, blockCid cid.Cid, contentPath ipath.Path) { blockReader, err := i.api.Block().Get(r.Context(), contentPath) if err != nil { - webError(w, "failed to get block", err, http.StatusInternalServerError) + webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) return } block, err := ioutil.ReadAll(blockReader) if err != nil { - webError(w, "failed to read block", err, http.StatusInternalServerError) + webError(w, "ipfs block get "+blockCid.String(), err, http.StatusInternalServerError) return } content := bytes.NewReader(block) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 5702e86b01a..30a90c5c801 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -45,6 +45,7 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi if err := car.Write(w); err != nil { // TODO: can we do any error handling here? + // TODO: idea: add best-effort proxy reader which will set http.StatusOK only if the first block is yielded correctly } } From aed0bf5a844e18b20b7fe5f93a9638ac4fc4bb51 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 9 Mar 2022 15:48:03 +0100 Subject: [PATCH 06/17] test: t0117-gateway-block.sh --- core/corehttp/gateway_handler.go | 46 ++++++++---------- test/sharness/t0117-gateway-block.sh | 71 ++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 25 deletions(-) create mode 100755 test/sharness/t0117-gateway-block.sh diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index febc4d01af4..854d33bd576 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -326,29 +326,19 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } // Support custom response formats passed via ?format or Accept HTTP header - if contentType := getExplicitContentType(r); contentType != "" { - switch contentType { - case "application/vnd.ipld.raw": - logger.Debugw("serving raw block", "path", parsedPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) - return - case "application/vnd.ipld.car": - logger.Debugw("serving car stream", "path", parsedPath) - i.serveCar(w, r, resolvedPath.Cid(), parsedPath) - return - case "application/vnd.ipld.car; version=1": - logger.Debugw("serving car stream", "path", parsedPath) - i.serveCar(w, r, resolvedPath.Cid(), parsedPath) - return - case "application/vnd.ipld.car; version=2": // no CARv2 in go-ipfs atm - err := fmt.Errorf("unsupported CARv2 format, try again with CARv1") - webError(w, "failed respond with requested content type", err, http.StatusBadRequest) - return - default: - err := fmt.Errorf("unsupported format %q", contentType) - webError(w, "failed respond with requested content type", err, http.StatusBadRequest) - return - } + switch contentType := getExplicitContentType(r); contentType { + case "application/vnd.ipld.raw": + logger.Debugw("serving raw block", "path", parsedPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) + return + case "application/vnd.ipld.car", "application/vnd.ipld.car; version=1": + logger.Debugw("serving car stream", "path", parsedPath) + i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + return + default: + err := fmt.Errorf("unsupported format %q", contentType) + webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + return } // Handling Unixfs @@ -904,8 +894,14 @@ func getExplicitContentType(r *http.Request) string { return "application/vnd.ipld.car" } } - if accept := r.Header.Get("Accept"); strings.HasPrefix(accept, "application/vnd.") { - return accept + // Browsers and other user agents will send Accept header with generic types like: + // Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8 + // We only care about explciit, vendor-specific content-types. + for _, accept := range r.Header.Values("Accept") { + // respond to the very first ipld content type + if strings.HasPrefix(accept, "application/vnd.ipld") { + return accept + } } return "" } diff --git a/test/sharness/t0117-gateway-block.sh b/test/sharness/t0117-gateway-block.sh new file mode 100755 index 00000000000..b04d4ac1762 --- /dev/null +++ b/test/sharness/t0117-gateway-block.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash + +test_description="Test HTTP Gateway Raw Block (application/vnd.ipld.raw) Support" + +. lib/test-lib.sh + +test_init_ipfs +test_launch_ipfs_daemon + +test_expect_success "Create text fixtures" ' + mkdir -p dir && + echo "hello" > dir/ascii.txt && + ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 dir) + FILE_CID=$(ipfs resolve -r /ipfs/$ROOT_DIR_CID/dir/ascii.txt | cut -d "/" -f3) +' + +# GET unixfs dir root block and compare it with `ipfs block get` output + + test_expect_success "GET with format=raw param returns a raw block" ' + ipfs block get "/ipfs/$ROOT_DIR_CID/dir" > expected && + curl -sX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/dir?format=raw" -o curl_ipfs_dir_block_param_output && + test_cmp expected curl_ipfs_dir_block_param_output + ' + + test_expect_success "GET for application/vnd.ipld.raw returns a raw block" ' + ipfs block get "/ipfs/$ROOT_DIR_CID/dir" > expected_block && + curl -sX GET -H "Accept: application/vnd.ipld.raw" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/dir" -o curl_ipfs_dir_block_accept_output && + test_cmp expected_block curl_ipfs_dir_block_accept_output + ' + +# Make sure expected HTTP headers are returned with the block bytes + + test_expect_success "GET response for application/vnd.ipld.raw has expected Content-Type" ' + curl -svX GET -H "Accept: application/vnd.ipld.raw" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/dir/ascii.txt" >/dev/null 2>curl_output && + cat curl_output && + grep "< Content-Type: application/vnd.ipld.raw" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes Content-Length" ' + BYTES=$(ipfs block get $FILE_CID | wc --bytes) + grep "< Content-Length: $BYTES" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes Content-Disposition" ' + grep "< Content-Disposition: attachment\; filename=\"${FILE_CID}.raw\"" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes nosniff hint" ' + grep "< X-Content-Type-Options: nosniff" curl_output + ' + +# Cache control HTTP headers +# (basic checks, detailed behavior is tested in t0116-gateway-cache.sh) + + test_expect_success "GET response for application/vnd.ipld.raw includes Etag" ' + grep "< Etag: \"${FILE_CID}\"" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes X-Ipfs-Path and X-Ipfs-Roots" ' + grep "< X-Ipfs-Path" curl_output && + grep "< X-Ipfs-Roots" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes Cache-Control" ' + grep "< X-Ipfs-Path" curl_output && + grep "< X-Ipfs-Roots" curl_output + ' + +test_kill_ipfs_daemon + +test_done From 43dc5bfd1f17d030889244d2b86c1cfe9ea53544 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 9 Mar 2022 16:52:46 +0100 Subject: [PATCH 07/17] test: t0118-gateway-car.sh --- core/corehttp/gateway_handler.go | 3 + core/corehttp/gateway_handler_car.go | 31 +++++---- test/sharness/lib/test-lib.sh | 13 ++++ test/sharness/t0117-gateway-block.sh | 6 +- test/sharness/t0118-gateway-car.sh | 97 ++++++++++++++++++++++++++++ 5 files changed, 131 insertions(+), 19 deletions(-) create mode 100755 test/sharness/t0118-gateway-car.sh diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 854d33bd576..ffa814f880b 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -327,6 +327,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // Support custom response formats passed via ?format or Accept HTTP header switch contentType := getExplicitContentType(r); contentType { + case "": + // nothing we should special-case, skip + break case "application/vnd.ipld.raw": logger.Debugw("serving raw block", "path", parsedPath) i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 30a90c5c801..812b413c340 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -20,32 +20,31 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi name := rootCid.String() + ".car" setContentDispositionHeader(w, name, "attachment") - // Set remaining headers - /* TODO modtime := addCacheControlHeaders(w, r, contentPath, rootCid) - - how does cache-control look like, given car can fail mid-stream? - - we don't want clients to cache partial/interrupted CAR - - we may document that client should verify that all blocks were dowloaded, - or we may leverage content-length to hint something went wrong - */ - - /* TODO: content-length (so user agents show % of remaining download) - - introduce max-car-size limit in go-ipfs-config and pre-compute CAR first, and then get size and use lazySeeker? - - are we able to provide length for Unixfs DAGs? (CumulativeSize+CARv0 header+envelopes) - */ + // Weak Etag W/ because we can't guarantee byte-for-byte identical responses + // (CAR is streamed, blocks arrive from datastore in non-deterministic order) + w.Header().Set("Etag", `W/"`+rootCid.String()+`.car"`) + + // Explicit Cache-Control to ensure fresh stream on retry. + // CAR stream could be interrupted, and client should be able to resume and get full response, not the truncated one + w.Header().Set("Cache-Control", "no-cache, no-transform") w.Header().Set("Content-Type", "application/vnd.ipld.car; version=1") w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) // Same go-car settings as dag.export command store := dagStore{dag: i.api.Dag(), ctx: ctx} + + // TODO: support selectors passed as request param: https://github.com/ipfs/go-ipfs/issues/8769 dag := gocar.Dag{Root: rootCid, Selector: selectorparse.CommonSelector_ExploreAllRecursively} car := gocar.NewSelectiveCar(ctx, store, []gocar.Dag{dag}, gocar.TraverseLinksOnlyOnce()) - w.WriteHeader(http.StatusOK) - if err := car.Write(w); err != nil { - // TODO: can we do any error handling here? - // TODO: idea: add best-effort proxy reader which will set http.StatusOK only if the first block is yielded correctly + // We return error as a trailer, however it is not something browsers can access + // (https://github.com/mdn/browser-compat-data/issues/14703) + // Due to this, we suggest client always verify that + // the received CAR stream response is matching requested DAG selector + w.Header().Set("X-Stream-Error", err.Error()) + return } } diff --git a/test/sharness/lib/test-lib.sh b/test/sharness/lib/test-lib.sh index a68c5d9737b..38f12a0250c 100644 --- a/test/sharness/lib/test-lib.sh +++ b/test/sharness/lib/test-lib.sh @@ -520,3 +520,16 @@ findprovs_expect() { test_cmp findprovsOut expected ' } + +purge_blockstore() { + ipfs pin ls --quiet --type=recursive | ipfs pin rm &>/dev/null + ipfs repo gc --silent &>/dev/null + + test_expect_success "pinlist empty" ' + [[ -z "$( ipfs pin ls )" ]] + ' + test_expect_success "nothing left to gc" ' + [[ -z "$( ipfs repo gc )" ]] + ' +} + diff --git a/test/sharness/t0117-gateway-block.sh b/test/sharness/t0117-gateway-block.sh index b04d4ac1762..3ce3f6a6f0a 100755 --- a/test/sharness/t0117-gateway-block.sh +++ b/test/sharness/t0117-gateway-block.sh @@ -5,12 +5,12 @@ test_description="Test HTTP Gateway Raw Block (application/vnd.ipld.raw) Support . lib/test-lib.sh test_init_ipfs -test_launch_ipfs_daemon +test_launch_ipfs_daemon_without_network test_expect_success "Create text fixtures" ' mkdir -p dir && - echo "hello" > dir/ascii.txt && - ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 dir) + echo "hello application/vnd.ipld.raw" > dir/ascii.txt && + ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 dir) && FILE_CID=$(ipfs resolve -r /ipfs/$ROOT_DIR_CID/dir/ascii.txt | cut -d "/" -f3) ' diff --git a/test/sharness/t0118-gateway-car.sh b/test/sharness/t0118-gateway-car.sh new file mode 100755 index 00000000000..fab9d37073c --- /dev/null +++ b/test/sharness/t0118-gateway-car.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +test_description="Test HTTP Gateway CAR (application/vnd.ipld.car) Support" + +. lib/test-lib.sh + +test_init_ipfs +test_launch_ipfs_daemon_without_network + +# CAR stream is not deterministic, as blocks can arrive in random order, +# but if we have a small file that fits into a single block, and export its CID +# we will get a CAR that is a deterministic array of bytes. + +test_expect_success "Create a deterministic CAR for testing" ' + mkdir -p subdir && + echo "hello application/vnd.ipld.car" > subdir/ascii.txt && + ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 subdir) && + FILE_CID=$(ipfs resolve -r /ipfs/$ROOT_DIR_CID/subdir/ascii.txt | cut -d "/" -f3) && + ipfs dag export $ROOT_DIR_CID > test-dag.car && + ipfs dag export $FILE_CID > deterministic.car && + purge_blockstore +' + +# GET unixfs file as CAR +# (by using a single file we ensure deterministic result that can be compared byte-for-byte) + + test_expect_success "GET with format=car param returns a CARv1 stream" ' + ipfs dag import test-dag.car && + curl -sX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/subdir/ascii.txt?format=car" -o gateway-param.car && + test_cmp deterministic.car gateway-param.car + ' + + test_expect_success "GET for application/vnd.ipld.car returns a CARv1 stream" ' + ipfs dag import test-dag.car && + curl -sX GET -H "Accept: application/vnd.ipld.car" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/subdir/ascii.txt" -o gateway-header.car && + test_cmp deterministic.car gateway-header.car + ' + + # explicit version=1 + test_expect_success "GET for application/vnd.ipld.raw version=1 returns a CARv1 stream" ' + ipfs dag import test-dag.car && + curl -sX GET -H "Accept: application/vnd.ipld.car; version=1" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/subdir/ascii.txt" -o gateway-header-v1.car && + test_cmp deterministic.car gateway-header-v1.car + ' + +# GET unixfs directory as a CAR with DAG and some selector + + # TODO: this is basic test for "full" selector, we will add support for custom ones in https://github.com/ipfs/go-ipfs/issues/8769 + test_expect_success "GET for application/vnd.ipld.car with unixfs dir returns a CARv1 stream with full DAG" ' + ipfs dag import test-dag.car && + curl -sX GET -H "Accept: application/vnd.ipld.car" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID" -o gateway-dir.car && + purge_blockstore && + ipfs dag import gateway-dir.car && + ipfs dag stat --offline $ROOT_DIR_CID + ' + +# Make sure expected HTTP headers are returned with the block bytes + + test_expect_success "GET response for application/vnd.ipld.car has expected Content-Type" ' + ipfs dag import test-dag.car && + curl -svX GET -H "Accept: application/vnd.ipld.car" "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT_DIR_CID/subdir/ascii.txt" >/dev/null 2>curl_output && + cat curl_output && + grep "< Content-Type: application/vnd.ipld.car; version=1" curl_output + ' + + # CAR is streamed, gateway may not have the entire thing, unable to calculate total size + test_expect_success "GET response for application/vnd.ipld.car includes no Content-Length" ' + grep -qv "< Content-Length:" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.car includes Content-Disposition" ' + grep "< Content-Disposition: attachment\; filename=\"${FILE_CID}.car\"" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.car includes nosniff hint" ' + grep "< X-Content-Type-Options: nosniff" curl_output + ' + +# Cache control HTTP headers + + test_expect_success "GET response for application/vnd.ipld.car includes a weak Etag" ' + grep "< Etag: W/\"${FILE_CID}.car\"" curl_output + ' + + # (basic checks, detailed behavior for some fields is tested in t0116-gateway-cache.sh) + test_expect_success "GET response for application/vnd.ipld.car includes X-Ipfs-Path and X-Ipfs-Roots" ' + grep "< X-Ipfs-Path" curl_output && + grep "< X-Ipfs-Roots" curl_output + ' + + test_expect_success "GET response for application/vnd.ipld.raw includes expected Cache-Control" ' + grep "< Cache-Control: no-cache, no-transform" curl_output + ' + +test_kill_ipfs_daemon + +test_done From c5b5f34b445f28ac9d3132a495b187d90205b7be Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 9 Mar 2022 17:55:33 +0100 Subject: [PATCH 08/17] fix: use .bin for raw block content-disposition .raw may be handled by something, depending on OS, and .bin seems to be universially "binary file" across all systems: https://en.wikipedia.org/wiki/List_of_filename_extensions_(A%E2%80%93E) --- core/corehttp/gateway_handler_block.go | 2 +- test/sharness/t0117-gateway-block.sh | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/corehttp/gateway_handler_block.go b/core/corehttp/gateway_handler_block.go index d7a3a8c4e8b..ebeda1f67d1 100644 --- a/core/corehttp/gateway_handler_block.go +++ b/core/corehttp/gateway_handler_block.go @@ -24,7 +24,7 @@ func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, b content := bytes.NewReader(block) // Set Content-Disposition - name := blockCid.String() + ".raw" + name := blockCid.String() + ".bin" setContentDispositionHeader(w, name, "attachment") // Set remaining headers diff --git a/test/sharness/t0117-gateway-block.sh b/test/sharness/t0117-gateway-block.sh index 3ce3f6a6f0a..3ebe67ab568 100755 --- a/test/sharness/t0117-gateway-block.sh +++ b/test/sharness/t0117-gateway-block.sh @@ -42,7 +42,7 @@ test_expect_success "Create text fixtures" ' ' test_expect_success "GET response for application/vnd.ipld.raw includes Content-Disposition" ' - grep "< Content-Disposition: attachment\; filename=\"${FILE_CID}.raw\"" curl_output + grep "< Content-Disposition: attachment\; filename=\"${FILE_CID}.bin\"" curl_output ' test_expect_success "GET response for application/vnd.ipld.raw includes nosniff hint" ' @@ -62,8 +62,7 @@ test_expect_success "Create text fixtures" ' ' test_expect_success "GET response for application/vnd.ipld.raw includes Cache-Control" ' - grep "< X-Ipfs-Path" curl_output && - grep "< X-Ipfs-Roots" curl_output + grep "< Cache-Control" curl_output ' test_kill_ipfs_daemon From b45198b87e022fc49a2087dc194e160a5ede5e9c Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 9 Mar 2022 18:03:07 +0100 Subject: [PATCH 09/17] docs: docs/gateway.md --- docs/gateway.md | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/docs/gateway.md b/docs/gateway.md index 7e85c58ec83..fcfbed36595 100644 --- a/docs/gateway.md +++ b/docs/gateway.md @@ -65,17 +65,36 @@ images, audio, video, PDF) and trigger immediate "save as" dialog by appending > https://ipfs.io/ipfs/QmfM2r8seH2GiRaC4esTjeraXEachRt8ZsSeGaWTPLyMoG?filename=hello_world.txt&download=true -## MIME-Types +## Response Format -TODO +An explicit response format can be requested using `?format=raw|car|..` URL parameter, +or by sending `Accept: application/vnd.ipld.{format}` HTTP header with one of supported content types. -## Read-Only API +## Content-Types -For convenience, the gateway exposes a read-only API. This read-only API exposes -a read-only, "safe" subset of the normal API. +### `application/vnd.ipld.raw` -For example, you use this to download a block: +Returns a byte array for a single `raw` block. -``` -> curl https://ipfs.io/api/v0/block/get/bafkreifjjcie6lypi6ny7amxnfftagclbuxndqonfipmb64f2km2devei4 -``` +Sending such requests for `/ipfs/{cid}` allows for efficient fetch of blocks with data +encoded in custom format, without the need for deserialization and traversal on the gateway. + +This is equivalent of `ipfs block get`. + +### `application/vnd.ipld.car` + +Returns a [CAR](https://ipld.io/specs/transport/car/) stream for specific DAG and selector. + +Right now only 'full DAG' implicit selector is implemented. +Support for user-provided IPLD selectors is tracked in https://github.com/ipfs/go-ipfs/issues/8769. + +This is a rough equivalent of `ipfs dag export`. + +## Deprecated Subset of RPC API + +For legacy reasons, the gateway port exposes a small subset of RPC API under `/api/v0/`. +While this read-only API exposes a read-only, "safe" subset of the normal API, +it is deprecated and should not be used for greenfield projects. + +Where possible, leverage `/ipfs/` and `/ipns/` endpoints. +along with `application/vnd.ipld.*` Content-Types instead. From 17d00ce8cfff23d58ff7c647e204db26777a2681 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 9 Mar 2022 18:26:48 +0100 Subject: [PATCH 10/17] test: t0118-gateway-car/carv1-basic.car This test uses official CARv1 fixture from https://ipld.io/specs/transport/car/fixture/carv1-basic/ The CAR has two dag-cbor roots, and we use one of them, which represents a nice DAG with both dag-cbor, dag-pb and raw blocks --- test/sharness/t0118-gateway-car.sh | 30 +++- test/sharness/t0118-gateway-car/README.md | 10 ++ .../t0118-gateway-car/carv1-basic.car | Bin 0 -> 715 bytes .../t0118-gateway-car/carv1-basic.json | 159 ++++++++++++++++++ 4 files changed, 190 insertions(+), 9 deletions(-) create mode 100644 test/sharness/t0118-gateway-car/README.md create mode 100644 test/sharness/t0118-gateway-car/carv1-basic.car create mode 100644 test/sharness/t0118-gateway-car/carv1-basic.json diff --git a/test/sharness/t0118-gateway-car.sh b/test/sharness/t0118-gateway-car.sh index fab9d37073c..850f7bc0f50 100755 --- a/test/sharness/t0118-gateway-car.sh +++ b/test/sharness/t0118-gateway-car.sh @@ -11,15 +11,27 @@ test_launch_ipfs_daemon_without_network # but if we have a small file that fits into a single block, and export its CID # we will get a CAR that is a deterministic array of bytes. -test_expect_success "Create a deterministic CAR for testing" ' - mkdir -p subdir && - echo "hello application/vnd.ipld.car" > subdir/ascii.txt && - ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 subdir) && - FILE_CID=$(ipfs resolve -r /ipfs/$ROOT_DIR_CID/subdir/ascii.txt | cut -d "/" -f3) && - ipfs dag export $ROOT_DIR_CID > test-dag.car && - ipfs dag export $FILE_CID > deterministic.car && - purge_blockstore -' + test_expect_success "Create a deterministic CAR for testing" ' + mkdir -p subdir && + echo "hello application/vnd.ipld.car" > subdir/ascii.txt && + ROOT_DIR_CID=$(ipfs add -Qrw --cid-version 1 subdir) && + FILE_CID=$(ipfs resolve -r /ipfs/$ROOT_DIR_CID/subdir/ascii.txt | cut -d "/" -f3) && + ipfs dag export $ROOT_DIR_CID > test-dag.car && + ipfs dag export $FILE_CID > deterministic.car && + purge_blockstore + ' + +# GET a reference DAG with dag-cbor+dag-pb+raw blocks as CAR + + # This test uses official CARv1 fixture from https://ipld.io/specs/transport/car/fixture/carv1-basic/ + test_expect_success "GET for application/vnd.ipld.car with dag-cbor root returns a CARv1 stream with full DAG" ' + ipfs dag import ../t0118-gateway-car/carv1-basic.car && + DAG_CBOR_CID=bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm && + curl -sX GET -H "Accept: application/vnd.ipld.car" "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_CBOR_CID" -o gateway-dag-cbor.car && + purge_blockstore && + ipfs dag import gateway-dag-cbor.car && + ipfs dag stat --offline $DAG_CBOR_CID + ' # GET unixfs file as CAR # (by using a single file we ensure deterministic result that can be compared byte-for-byte) diff --git a/test/sharness/t0118-gateway-car/README.md b/test/sharness/t0118-gateway-car/README.md new file mode 100644 index 00000000000..2efccc18544 --- /dev/null +++ b/test/sharness/t0118-gateway-car/README.md @@ -0,0 +1,10 @@ +# Dataset description/sources + +- carv1-basic.car + - raw CARv1 + - Source: https://ipld.io/specs/transport/car/fixture/carv1-basic/carv1-basic.car + +- carv1-basic.json + - description of the contents and layout of the raw CAR, encoded in DAG-JSON + - Source: https://ipld.io/specs/transport/car/fixture/carv1-basic/carv1-basic.json + diff --git a/test/sharness/t0118-gateway-car/carv1-basic.car b/test/sharness/t0118-gateway-car/carv1-basic.car new file mode 100644 index 0000000000000000000000000000000000000000..48c67a3d8dc77ccff652289efb2b41edd4867d44 GIT binary patch literal 715 zcmYdZlv@T^XBM16^mDnYKs~9PRn;i&guKF^#9|+JO5wK+*E)0UP@kKZfZ(W zPG&(fBVpA-dR!`up+XAVeqSs7*{8Kv>B?Kpzb6a>{@wA2tebRKs!GK|)@4yipb$$^ zYGRQDi;zB-l8{2>%BFiZGufxw`Wo51ljYy|w%_%~d!}|Co}CXSx$O<+5@IV(P0r6t zk(kP;L5%6iK+phmEkSd2A+Bva6D`izRIm8 zgfZBugp5uCf Date: Tue, 15 Mar 2022 14:03:52 +0100 Subject: [PATCH 11/17] fix: cancel context if car write fails https://github.com/ipfs/go-ipfs/pull/8758#discussion_r824656552 --- core/corehttp/gateway_handler_car.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 812b413c340..8add346217f 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -14,7 +14,8 @@ import ( // serveCar returns a CAR stream for specific DAG+selector func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCid cid.Cid, contentPath ipath.Path) { - ctx := r.Context() + ctx, cancel := context.WithCancel(r.Context()) + defer cancel() // Set Content-Disposition name := rootCid.String() + ".car" From 84f2b059d216cc196dbca374c31d362460fd83ec Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Mar 2022 15:14:28 +0100 Subject: [PATCH 12/17] feat: Accept-Ranges: none for car responses https://github.com/ipfs/go-ipfs/pull/8758#discussion_r824649922 --- core/corehttp/gateway_handler_car.go | 5 +++++ test/sharness/t0118-gateway-car.sh | 11 +++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 8add346217f..897314dc0da 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -25,6 +25,11 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi // (CAR is streamed, blocks arrive from datastore in non-deterministic order) w.Header().Set("Etag", `W/"`+rootCid.String()+`.car"`) + // Make it clear we don't support range-requests over a car stream + // Partial downloads and resumes should be handled using + // IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769 + w.Header().Set("Accept-Ranges", "none") + // Explicit Cache-Control to ensure fresh stream on retry. // CAR stream could be interrupted, and client should be able to resume and get full response, not the truncated one w.Header().Set("Cache-Control", "no-cache, no-transform") diff --git a/test/sharness/t0118-gateway-car.sh b/test/sharness/t0118-gateway-car.sh index 850f7bc0f50..9cdb5aec522 100755 --- a/test/sharness/t0118-gateway-car.sh +++ b/test/sharness/t0118-gateway-car.sh @@ -66,7 +66,7 @@ test_launch_ipfs_daemon_without_network ipfs dag stat --offline $ROOT_DIR_CID ' -# Make sure expected HTTP headers are returned with the block bytes +# Make sure expected HTTP headers are returned with the CAR bytes test_expect_success "GET response for application/vnd.ipld.car has expected Content-Type" ' ipfs dag import test-dag.car && @@ -88,6 +88,13 @@ test_launch_ipfs_daemon_without_network grep "< X-Content-Type-Options: nosniff" curl_output ' + # CAR is streamed, gateway may not have the entire thing, unable to support range-requests + # Partial downloads and resumes should be handled using + # IPLD selectors: https://github.com/ipfs/go-ipfs/issues/8769 + test_expect_success "GET response for application/vnd.ipld.car includes Accept-Ranges header" ' + grep "< Accept-Ranges: none" curl_output + ' + # Cache control HTTP headers test_expect_success "GET response for application/vnd.ipld.car includes a weak Etag" ' @@ -100,7 +107,7 @@ test_launch_ipfs_daemon_without_network grep "< X-Ipfs-Roots" curl_output ' - test_expect_success "GET response for application/vnd.ipld.raw includes expected Cache-Control" ' + test_expect_success "GET response for application/vnd.ipld.car includes expected Cache-Control" ' grep "< Cache-Control: no-cache, no-transform" curl_output ' From d8491154a83e7a0ae4a2e25a7e3b7559fe2f33f6 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Mar 2022 16:43:29 +0100 Subject: [PATCH 13/17] refactor: gateway_handler_unixfs.go - Moved UnixFS response handling to gateway_handler_unixfs*.go files. - Removed support for X-Ipfs-Gateway-Prefix (Closes #7702) --- core/corehttp/gateway_handler.go | 271 +++--------------- core/corehttp/gateway_handler_unixfs.go | 37 +++ core/corehttp/gateway_handler_unixfs_dir.go | 197 +++++++++++++ ...file.go => gateway_handler_unixfs_file.go} | 0 4 files changed, 267 insertions(+), 238 deletions(-) create mode 100644 core/corehttp/gateway_handler_unixfs.go create mode 100644 core/corehttp/gateway_handler_unixfs_dir.go rename core/corehttp/{gateway_handler_file.go => gateway_handler_unixfs_file.go} (100%) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index ffa814f880b..1db0bdbb8f8 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -3,6 +3,7 @@ package corehttp import ( "context" "fmt" + "html" "html/template" "io" "net/http" @@ -15,10 +16,8 @@ import ( "strings" "time" - humanize "github.com/dustin/go-humanize" cid "github.com/ipfs/go-cid" files "github.com/ipfs/go-ipfs-files" - assets "github.com/ipfs/go-ipfs/assets" dag "github.com/ipfs/go-merkledag" mfs "github.com/ipfs/go-mfs" path "github.com/ipfs/go-path" @@ -197,38 +196,17 @@ func (i *gatewayHandler) optionsHandler(w http.ResponseWriter, r *http.Request) func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { begin := time.Now() - urlPath := r.URL.Path - escapedURLPath := r.URL.EscapedPath() logger := log.With("from", r.RequestURI) logger.Debug("http request received") - // If the gateway is behind a reverse proxy and mounted at a sub-path, - // the prefix header can be set to signal this sub-path. - // It will be prepended to links in directory listings and the index.html redirect. - // TODO: this feature is deprecated and will be removed (https://github.com/ipfs/go-ipfs/issues/7702) - prefix := "" - if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); len(prfx) > 0 { - for _, p := range i.config.PathPrefixes { - if prfx == p || strings.HasPrefix(prfx, p+"/") { - prefix = prfx - break - } - } - logger.Debugw("sub-path (deprecrated)", "prefix", prefix) - } - - // HostnameOption might have constructed an IPNS/IPFS path using the Host header. - // In this case, we need the original path for constructing redirects - // and links that match the requested URL. - // For example, http://example.net would become /ipns/example.net, and - // the redirects and links would end up as http://example.net/ipns/example.net - requestURI, err := url.ParseRequestURI(r.RequestURI) - if err != nil { - webError(w, "failed to parse request path", err, http.StatusInternalServerError) + // X-Ipfs-Gateway-Prefix was removed (https://github.com/ipfs/go-ipfs/issues/7702) + // TODO: remove this after go-ipfs 0.13 ships + if prfx := r.Header.Get("X-Ipfs-Gateway-Prefix"); prfx != "" { + err := fmt.Errorf("X-Ipfs-Gateway-Prefix support was removed: https://github.com/ipfs/go-ipfs/issues/7702") + webError(w, "unsupported HTTP header", err, http.StatusBadRequest) return } - originalUrlPath := prefix + requestURI.Path // ?uri query param support for requests produced by web browsers // via navigator.registerProtocolHandler Web API @@ -249,7 +227,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request path = path + "?" + u.RawQuery } - redirectURL := gopath.Join("/", prefix, u.Scheme, u.Host, path) + redirectURL := gopath.Join("/", u.Scheme, u.Host, path) logger.Debugw("uri param, redirect", "to", redirectURL, "status", http.StatusMovedPermanently) http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) return @@ -267,9 +245,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } } - parsedPath := ipath.New(urlPath) - if pathErr := parsedPath.IsValid(); pathErr != nil { - if prefix == "" && fixupSuperfluousNamespace(w, urlPath, r.URL.RawQuery) { + contentPath := ipath.New(r.URL.Path) + if pathErr := contentPath.IsValid(); pathErr != nil { + if fixupSuperfluousNamespace(w, r.URL.Path, r.URL.RawQuery) { // the error was due to redundant namespace, which we were able to fix // by returning error/redirect page, nothing left to do here logger.Debugw("redundant namespace; noop") @@ -281,19 +259,19 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } // Resolve path to the final DAG node for the ETag - resolvedPath, err := i.api.ResolvePath(r.Context(), parsedPath) + resolvedPath, err := i.api.ResolvePath(r.Context(), contentPath) switch err { case nil: case coreiface.ErrOffline: - webError(w, "ipfs resolve -r "+escapedURLPath, err, http.StatusServiceUnavailable) + webError(w, "ipfs resolve -r "+html.EscapeString(contentPath.String()), err, http.StatusServiceUnavailable) return default: - if i.servePretty404IfPresent(w, r, parsedPath) { + if i.servePretty404IfPresent(w, r, contentPath) { logger.Debugw("serve pretty 404 if present") return } - webError(w, "ipfs resolve -r "+escapedURLPath, err, http.StatusNotFound) + webError(w, "ipfs resolve -r "+html.EscapeString(contentPath.String()), err, http.StatusNotFound) return } @@ -312,225 +290,42 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request webError(w, "ipfs block get "+resolvedPath.Cid().String(), err, http.StatusInternalServerError) return } - i.unixfsGetMetric.WithLabelValues(parsedPath.Namespace()).Observe(time.Since(begin).Seconds()) + i.unixfsGetMetric.WithLabelValues(contentPath.Namespace()).Observe(time.Since(begin).Seconds()) // HTTP Headers i.addUserHeaders(w) // ok, _now_ write user's headers. - w.Header().Set("X-Ipfs-Path", urlPath) + w.Header().Set("X-Ipfs-Path", contentPath.String()) - if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { + if rootCids, err := i.buildIpfsRootsHeader(contentPath.String(), r); err == nil { w.Header().Set("X-Ipfs-Roots", rootCids) - } else { // this should never happen, as we resolved the urlPath already + } else { // this should never happen, as we resolved the contentPath already webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) return } // Support custom response formats passed via ?format or Accept HTTP header switch contentType := getExplicitContentType(r); contentType { - case "": - // nothing we should special-case, skip - break + case "": // The default, implicit response format is UnixFS + logger.Debugw("serving unixfs", "path", contentPath) + i.serveUnixFs(w, r, resolvedPath, contentPath, logger) + return case "application/vnd.ipld.raw": - logger.Debugw("serving raw block", "path", parsedPath) - i.serveRawBlock(w, r, resolvedPath.Cid(), parsedPath) + logger.Debugw("serving raw block", "path", contentPath) + i.serveRawBlock(w, r, resolvedPath.Cid(), contentPath) return case "application/vnd.ipld.car", "application/vnd.ipld.car; version=1": - logger.Debugw("serving car stream", "path", parsedPath) - i.serveCar(w, r, resolvedPath.Cid(), parsedPath) + logger.Debugw("serving car stream", "path", contentPath) + i.serveCar(w, r, resolvedPath.Cid(), contentPath) return default: err := fmt.Errorf("unsupported format %q", contentType) webError(w, "failed respond with requested content type", err, http.StatusBadRequest) return } - - // Handling Unixfs - dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) - if err != nil { - webError(w, "ipfs cat "+escapedURLPath, err, http.StatusNotFound) - return - } - defer dr.Close() - - // Handling Unixfs file - if f, ok := dr.(files.File); ok { - logger.Debugw("serving file", "path", parsedPath) - i.serveFile(w, r, parsedPath, resolvedPath.Cid(), f) - return - } - - // Handling Unixfs directory - dir, ok := dr.(files.Directory) - if !ok { - internalWebError(w, fmt.Errorf("unsupported file type")) - return - } - - // Check if directory has index.html, if so, serveFile - idxPath := ipath.Join(resolvedPath, "index.html") - idx, err := i.api.Unixfs().Get(r.Context(), idxPath) - switch err.(type) { - case nil: - dirwithoutslash := urlPath[len(urlPath)-1] != '/' - goget := r.URL.Query().Get("go-get") == "1" - if dirwithoutslash && !goget { - // See comment above where originalUrlPath is declared. - suffix := "/" - if r.URL.RawQuery != "" { - // preserve query parameters - suffix = suffix + "?" + r.URL.RawQuery - } - - redirectURL := originalUrlPath + suffix - logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) - http.Redirect(w, r, redirectURL, http.StatusFound) - return - } - - f, ok := idx.(files.File) - if !ok { - internalWebError(w, files.ErrNotReader) - return - } - - logger.Debugw("serving index.html file", "path", idxPath) - // write to request - i.serveFile(w, r, idxPath, resolvedPath.Cid(), f) - return - case resolver.ErrNoLink: - logger.Debugw("no index.html; noop", "path", idxPath) - default: - internalWebError(w, err) - return - } - - // See statusResponseWriter.WriteHeader - // and https://github.com/ipfs/go-ipfs/issues/7164 - // Note: this needs to occur before listingTemplate.Execute otherwise we get - // superfluous response.WriteHeader call from prometheus/client_golang - if w.Header().Get("Location") != "" { - logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently) - w.WriteHeader(http.StatusMovedPermanently) - return - } - - // A HTML directory index will be presented, be sure to set the correct - // type instead of relying on autodetection (which may fail). - w.Header().Set("Content-Type", "text/html") - - // Generated dir index requires custom Etag (it may change between go-ipfs versions) - if assets.BindataVersionHash != "" { - dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` - w.Header().Set("Etag", dirEtag) - if r.Header.Get("If-None-Match") == dirEtag { - w.WriteHeader(http.StatusNotModified) - return - } - } - - if r.Method == http.MethodHead { - logger.Debug("return as request's HTTP method is HEAD") - return - } - - // storage for directory listing - var dirListing []directoryItem - dirit := dir.Entries() - for dirit.Next() { - size := "?" - if s, err := dirit.Node().Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } - - resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())) - if err != nil { - internalWebError(w, err) - return - } - hash := resolved.Cid().String() - - // See comment above where originalUrlPath is declared. - di := directoryItem{ - Size: size, - Name: dirit.Name(), - Path: gopath.Join(originalUrlPath, dirit.Name()), - Hash: hash, - ShortHash: shortHash(hash), - } - dirListing = append(dirListing, di) - } - if dirit.Err() != nil { - internalWebError(w, dirit.Err()) - return - } - - // construct the correct back link - // https://github.com/ipfs/go-ipfs/issues/1365 - var backLink string = originalUrlPath - - // don't go further up than /ipfs/$hash/ - pathSplit := path.SplitList(urlPath) - switch { - // keep backlink - case len(pathSplit) == 3: // url: /ipfs/$hash - - // keep backlink - case len(pathSplit) == 4 && pathSplit[3] == "": // url: /ipfs/$hash/ - - // add the correct link depending on whether the path ends with a slash - default: - if strings.HasSuffix(backLink, "/") { - backLink += "./.." - } else { - backLink += "/.." - } - } - - size := "?" - if s, err := dir.Size(); err == nil { - // Size may not be defined/supported. Continue anyways. - size = humanize.Bytes(uint64(s)) - } - - hash := resolvedPath.Cid().String() - - // Gateway root URL to be used when linking to other rootIDs. - // This will be blank unless subdomain or DNSLink resolution is being used - // for this request. - var gwURL string - - // Get gateway hostname and build gateway URL. - if h, ok := r.Context().Value("gw-hostname").(string); ok { - gwURL = "//" + h - } else { - gwURL = "" - } - - dnslink := hasDNSLinkOrigin(gwURL, urlPath) - - // See comment above where originalUrlPath is declared. - tplData := listingTemplateData{ - GatewayURL: gwURL, - DNSLink: dnslink, - Listing: dirListing, - Size: size, - Path: urlPath, - Breadcrumbs: breadcrumbs(urlPath, dnslink), - BackLink: backLink, - Hash: hash, - } - - logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash, "duration", time.Since(begin)) - - if err := listingTemplate.Execute(w, tplData); err != nil { - internalWebError(w, err) - return - } } -func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, parsedPath ipath.Path) bool { - resolved404Path, ctype, err := i.searchUpTreeFor404(r, parsedPath) +func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http.Request, contentPath ipath.Path) bool { + resolved404Path, ctype, err := i.searchUpTreeFor404(r, contentPath) if err != nil { return false } @@ -551,7 +346,7 @@ func (i *gatewayHandler) servePretty404IfPresent(w http.ResponseWriter, r *http. return false } - log.Debugw("using pretty 404 file", "path", parsedPath) + log.Debugw("using pretty 404 file", "path", contentPath) w.Header().Set("Content-Type", ctype) w.Header().Set("Content-Length", strconv.FormatInt(size, 10)) w.WriteHeader(http.StatusNotFound) @@ -762,7 +557,7 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath modtime = time.Now() // TODO: set Cache-Control based on TTL of IPNS/DNSLink: https://github.com/ipfs/go-ipfs/issues/1818#issuecomment-1015849462 - // TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920 + // TODO: set Last-Modified based on /ipns/ publishing timestamp? } else { // immutable! CACHE ALL THE THINGS, FOREVER! wolololol @@ -771,7 +566,7 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) modtime = noModtime - // TODO: set Last-Modified if modification metadata is present in unixfs 1.5: https://github.com/ipfs/go-ipfs/issues/6920 + // TODO: set Last-Modified - TBD - /ipfs/ modification metadata is present in unixfs 1.5 https://github.com/ipfs/go-ipfs/issues/6920? } return modtime @@ -909,13 +704,13 @@ func getExplicitContentType(r *http.Request) string { return "" } -func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, parsedPath ipath.Path) (ipath.Resolved, string, error) { +func (i *gatewayHandler) searchUpTreeFor404(r *http.Request, contentPath ipath.Path) (ipath.Resolved, string, error) { filename404, ctype, err := preferred404Filename(r.Header.Values("Accept")) if err != nil { return nil, "", err } - pathComponents := strings.Split(parsedPath.String(), "/") + pathComponents := strings.Split(contentPath.String(), "/") for idx := len(pathComponents); idx >= 3; idx-- { pretty404 := gopath.Join(append(pathComponents[0:idx], filename404)...) diff --git a/core/corehttp/gateway_handler_unixfs.go b/core/corehttp/gateway_handler_unixfs.go new file mode 100644 index 00000000000..6f476b2afe3 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs.go @@ -0,0 +1,37 @@ +package corehttp + +import ( + "fmt" + "html" + "net/http" + + files "github.com/ipfs/go-ipfs-files" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.uber.org/zap" +) + +func (i *gatewayHandler) serveUnixFs(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, logger *zap.SugaredLogger) { + // Handling UnixFS + dr, err := i.api.Unixfs().Get(r.Context(), resolvedPath) + if err != nil { + webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusNotFound) + return + } + defer dr.Close() + + // Handling Unixfs file + if f, ok := dr.(files.File); ok { + logger.Debugw("serving unixfs file", "path", contentPath) + i.serveFile(w, r, contentPath, resolvedPath.Cid(), f) + return + } + + // Handling Unixfs directory + dir, ok := dr.(files.Directory) + if !ok { + internalWebError(w, fmt.Errorf("unsupported UnixFs type")) + return + } + logger.Debugw("serving unixfs directory", "path", contentPath) + i.serveDirectory(w, r, resolvedPath, contentPath, dir, logger) +} diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go new file mode 100644 index 00000000000..8e7e131ddf8 --- /dev/null +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -0,0 +1,197 @@ +package corehttp + +import ( + "net/http" + "net/url" + gopath "path" + "strings" + + "github.com/dustin/go-humanize" + files "github.com/ipfs/go-ipfs-files" + "github.com/ipfs/go-ipfs/assets" + path "github.com/ipfs/go-path" + "github.com/ipfs/go-path/resolver" + ipath "github.com/ipfs/interface-go-ipfs-core/path" + "go.uber.org/zap" +) + +// serveDirectory returns the best representation of UnixFS directory +// +// It will return index.html if present, or generate directory listing otherwise. +func (i *gatewayHandler) serveDirectory(w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, dir files.Directory, logger *zap.SugaredLogger) { + + // HostnameOption might have constructed an IPNS/IPFS path using the Host header. + // In this case, we need the original path for constructing redirects + // and links that match the requested URL. + // For example, http://example.net would become /ipns/example.net, and + // the redirects and links would end up as http://example.net/ipns/example.net + requestURI, err := url.ParseRequestURI(r.RequestURI) + if err != nil { + webError(w, "failed to parse request path", err, http.StatusInternalServerError) + return + } + originalUrlPath := requestURI.Path + + // Check if directory has index.html, if so, serveFile + idxPath := ipath.Join(resolvedPath, "index.html") + idx, err := i.api.Unixfs().Get(r.Context(), idxPath) + switch err.(type) { + case nil: + cpath := contentPath.String() + dirwithoutslash := cpath[len(cpath)-1] != '/' + goget := r.URL.Query().Get("go-get") == "1" + if dirwithoutslash && !goget { + // See comment above where originalUrlPath is declared. + suffix := "/" + if r.URL.RawQuery != "" { + // preserve query parameters + suffix = suffix + "?" + r.URL.RawQuery + } + + redirectURL := originalUrlPath + suffix + logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) + http.Redirect(w, r, redirectURL, http.StatusFound) + return + } + + f, ok := idx.(files.File) + if !ok { + internalWebError(w, files.ErrNotReader) + return + } + + logger.Debugw("serving index.html file", "path", idxPath) + // write to request + i.serveFile(w, r, idxPath, resolvedPath.Cid(), f) + return + case resolver.ErrNoLink: + logger.Debugw("no index.html; noop", "path", idxPath) + default: + internalWebError(w, err) + return + } + + // See statusResponseWriter.WriteHeader + // and https://github.com/ipfs/go-ipfs/issues/7164 + // Note: this needs to occur before listingTemplate.Execute otherwise we get + // superfluous response.WriteHeader call from prometheus/client_golang + if w.Header().Get("Location") != "" { + logger.Debugw("location moved permanently", "status", http.StatusMovedPermanently) + w.WriteHeader(http.StatusMovedPermanently) + return + } + + // A HTML directory index will be presented, be sure to set the correct + // type instead of relying on autodetection (which may fail). + w.Header().Set("Content-Type", "text/html") + + // Generated dir index requires custom Etag (it may change between go-ipfs versions) + if assets.BindataVersionHash != "" { + dirEtag := `"DirIndex-` + assets.BindataVersionHash + `_CID-` + resolvedPath.Cid().String() + `"` + w.Header().Set("Etag", dirEtag) + if r.Header.Get("If-None-Match") == dirEtag { + w.WriteHeader(http.StatusNotModified) + return + } + } + + if r.Method == http.MethodHead { + logger.Debug("return as request's HTTP method is HEAD") + return + } + + // storage for directory listing + var dirListing []directoryItem + dirit := dir.Entries() + for dirit.Next() { + size := "?" + if s, err := dirit.Node().Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + + resolved, err := i.api.ResolvePath(r.Context(), ipath.Join(resolvedPath, dirit.Name())) + if err != nil { + internalWebError(w, err) + return + } + hash := resolved.Cid().String() + + // See comment above where originalUrlPath is declared. + di := directoryItem{ + Size: size, + Name: dirit.Name(), + Path: gopath.Join(originalUrlPath, dirit.Name()), + Hash: hash, + ShortHash: shortHash(hash), + } + dirListing = append(dirListing, di) + } + if dirit.Err() != nil { + internalWebError(w, dirit.Err()) + return + } + + // construct the correct back link + // https://github.com/ipfs/go-ipfs/issues/1365 + var backLink string = originalUrlPath + + // don't go further up than /ipfs/$hash/ + pathSplit := path.SplitList(contentPath.String()) + switch { + // keep backlink + case len(pathSplit) == 3: // url: /ipfs/$hash + + // keep backlink + case len(pathSplit) == 4 && pathSplit[3] == "": // url: /ipfs/$hash/ + + // add the correct link depending on whether the path ends with a slash + default: + if strings.HasSuffix(backLink, "/") { + backLink += "./.." + } else { + backLink += "/.." + } + } + + size := "?" + if s, err := dir.Size(); err == nil { + // Size may not be defined/supported. Continue anyways. + size = humanize.Bytes(uint64(s)) + } + + hash := resolvedPath.Cid().String() + + // Gateway root URL to be used when linking to other rootIDs. + // This will be blank unless subdomain or DNSLink resolution is being used + // for this request. + var gwURL string + + // Get gateway hostname and build gateway URL. + if h, ok := r.Context().Value("gw-hostname").(string); ok { + gwURL = "//" + h + } else { + gwURL = "" + } + + dnslink := hasDNSLinkOrigin(gwURL, contentPath.String()) + + // See comment above where originalUrlPath is declared. + tplData := listingTemplateData{ + GatewayURL: gwURL, + DNSLink: dnslink, + Listing: dirListing, + Size: size, + Path: contentPath.String(), + Breadcrumbs: breadcrumbs(contentPath.String(), dnslink), + BackLink: backLink, + Hash: hash, + } + + logger.Debugw("request processed", "tplDataDNSLink", dnslink, "tplDataSize", size, "tplDataBackLink", backLink, "tplDataHash", hash) + + if err := listingTemplate.Execute(w, tplData); err != nil { + internalWebError(w, err) + return + } +} diff --git a/core/corehttp/gateway_handler_file.go b/core/corehttp/gateway_handler_unixfs_file.go similarity index 100% rename from core/corehttp/gateway_handler_file.go rename to core/corehttp/gateway_handler_unixfs_file.go From 26c122efc063284bb67799e7ec3f78077ef73c87 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Mar 2022 21:56:45 +0100 Subject: [PATCH 14/17] fix: exact match Etag https://github.com/ipfs/go-ipfs/pull/8758#discussion_r824551165 --- core/corehttp/gateway_handler.go | 38 ++++++++++++++------ core/corehttp/gateway_handler_block.go | 3 +- core/corehttp/gateway_handler_car.go | 10 ++++-- core/corehttp/gateway_handler_unixfs_file.go | 2 ++ test/sharness/t0117-gateway-block.sh | 2 +- 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 1db0bdbb8f8..d1583d63173 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -275,10 +275,11 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return } + // Detect when explicit Accept header or ?format parameter are present + responseFormat := customResponseFormat(r) + // Finish early if client already has matching Etag - // (suffix match to cover both direct CID and DirIndex cases) - cidEtagSuffix := resolvedPath.Cid().String() + `"` - if strings.HasSuffix(r.Header.Get("If-None-Match"), cidEtagSuffix) { + if r.Header.Get("If-None-Match") == getEtag(r, resolvedPath.Cid()) { w.WriteHeader(http.StatusNotModified) return } @@ -304,8 +305,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request } // Support custom response formats passed via ?format or Accept HTTP header - switch contentType := getExplicitContentType(r); contentType { - case "": // The default, implicit response format is UnixFS + switch responseFormat { + case "": // The implicit response format is UnixFS logger.Debugw("serving unixfs", "path", contentPath) i.serveUnixFs(w, r, resolvedPath, contentPath, logger) return @@ -317,8 +318,8 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger.Debugw("serving car stream", "path", contentPath) i.serveCar(w, r, resolvedPath.Cid(), contentPath) return - default: - err := fmt.Errorf("unsupported format %q", contentType) + default: // catch-all for unsuported application/vnd.* + err := fmt.Errorf("unsupported format %q", responseFormat) webError(w, "failed respond with requested content type", err, http.StatusBadRequest) return } @@ -544,8 +545,8 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { } func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath ipath.Path, fileCid cid.Cid) (modtime time.Time) { - // Set Etag to file's CID (override whatever was set before) - w.Header().Set("Etag", `"`+fileCid.String()+`"`) + // Set Etag to based on CID (override whatever was set before) + w.Header().Set("Etag", getEtag(r, fileCid)) // Set Cache-Control and Last-Modified based on contentPath properties if contentPath.Mutable() { @@ -566,7 +567,7 @@ func addCacheControlHeaders(w http.ResponseWriter, r *http.Request, contentPath // Set modtime to 'zero time' to disable Last-Modified header (superseded by Cache-Control) modtime = noModtime - // TODO: set Last-Modified - TBD - /ipfs/ modification metadata is present in unixfs 1.5 https://github.com/ipfs/go-ipfs/issues/6920? + // TODO: set Last-Modified? - TBD - /ipfs/ modification metadata is present in unixfs 1.5 https://github.com/ipfs/go-ipfs/issues/6920? } return modtime @@ -681,8 +682,23 @@ func getFilename(contentPath ipath.Path) string { return gopath.Base(s) } +// generate Etag value based on HTTP request and CID +func getEtag(r *http.Request, cid cid.Cid) string { + prefix := `"` + suffix := `"` + responseFormat := customResponseFormat(r) + if responseFormat != "" { + // application/vnd.ipld.foo → foo + f := responseFormat[strings.LastIndex(responseFormat, ".")+1:] + // Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses) + suffix = `.` + f + suffix + } + // TODO: include selector suffix when https://github.com/ipfs/go-ipfs/issues/8769 lands + return prefix + cid.String() + suffix +} + // return explicit response format if specified in request as query parameter or via Accept HTTP header -func getExplicitContentType(r *http.Request) string { +func customResponseFormat(r *http.Request) string { if formatParam := r.URL.Query().Get("format"); formatParam != "" { // translate query param to a content type switch formatParam { diff --git a/core/corehttp/gateway_handler_block.go b/core/corehttp/gateway_handler_block.go index ebeda1f67d1..3b93851d214 100644 --- a/core/corehttp/gateway_handler_block.go +++ b/core/corehttp/gateway_handler_block.go @@ -32,6 +32,7 @@ func (i *gatewayHandler) serveRawBlock(w http.ResponseWriter, r *http.Request, b w.Header().Set("Content-Type", "application/vnd.ipld.raw") w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^) - // Done: http.ServeContent will take care of Content-Length and range requests + // Done: http.ServeContent will take care of + // If-None-Match+Etag, Content-Length and range requests http.ServeContent(w, r, name, modtime, content) } diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 897314dc0da..35df4a69ad0 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -22,8 +22,14 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi setContentDispositionHeader(w, name, "attachment") // Weak Etag W/ because we can't guarantee byte-for-byte identical responses - // (CAR is streamed, blocks arrive from datastore in non-deterministic order) - w.Header().Set("Etag", `W/"`+rootCid.String()+`.car"`) + // (CAR is streamed, and in theory, blocks may arrive from datastore in non-deterministic order) + w.Header().Set("Etag", `W/`+getEtag(r, rootCid)) + + // Finish early if Etag match + if r.Header.Get("If-None-Match") == w.Header().Get("Etag") { + w.WriteHeader(http.StatusNotModified) + return + } // Make it clear we don't support range-requests over a car stream // Partial downloads and resumes should be handled using diff --git a/core/corehttp/gateway_handler_unixfs_file.go b/core/corehttp/gateway_handler_unixfs_file.go index 4ca85565f0c..19e6d6795e5 100644 --- a/core/corehttp/gateway_handler_unixfs_file.go +++ b/core/corehttp/gateway_handler_unixfs_file.go @@ -77,5 +77,7 @@ func (i *gatewayHandler) serveFile(w http.ResponseWriter, r *http.Request, conte // special fixup around redirects w = &statusResponseWriter{w} + // Done: http.ServeContent will take care of + // If-None-Match+Etag, Content-Length and range requests http.ServeContent(w, r, name, modtime, content) } diff --git a/test/sharness/t0117-gateway-block.sh b/test/sharness/t0117-gateway-block.sh index 3ebe67ab568..c9e3a4713c8 100755 --- a/test/sharness/t0117-gateway-block.sh +++ b/test/sharness/t0117-gateway-block.sh @@ -53,7 +53,7 @@ test_expect_success "Create text fixtures" ' # (basic checks, detailed behavior is tested in t0116-gateway-cache.sh) test_expect_success "GET response for application/vnd.ipld.raw includes Etag" ' - grep "< Etag: \"${FILE_CID}\"" curl_output + grep "< Etag: \"${FILE_CID}.raw\"" curl_output ' test_expect_success "GET response for application/vnd.ipld.raw includes X-Ipfs-Path and X-Ipfs-Roots" ' From eb95d2b693ad3ccf028d3995b9b109307dae9f62 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 15 Mar 2022 23:29:05 +0100 Subject: [PATCH 15/17] refactor: prefix cleanup and readable paths - removed dead code after X-Ipfs-Gateway-Prefix is gone (https://github.com/ipfs/go-ipfs/issues/7702) - escaped special characters in content paths returned with http.Error making them both safer and easier to reason about (e.g. when invisible whitespace unicode is copied) --- core/corehttp/gateway_handler.go | 17 ++++-- core/corehttp/gateway_test.go | 90 ++------------------------------ 2 files changed, 16 insertions(+), 91 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index d1583d63173..45356271d20 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -3,7 +3,6 @@ package corehttp import ( "context" "fmt" - "html" "html/template" "io" "net/http" @@ -263,15 +262,16 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch err { case nil: case coreiface.ErrOffline: - webError(w, "ipfs resolve -r "+html.EscapeString(contentPath.String()), err, http.StatusServiceUnavailable) + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusServiceUnavailable) return default: + // if Accept is text/html, see if ipfs-404.html is present if i.servePretty404IfPresent(w, r, contentPath) { logger.Debugw("serve pretty 404 if present") return } - webError(w, "ipfs resolve -r "+html.EscapeString(contentPath.String()), err, http.StatusNotFound) + webError(w, "ipfs resolve -r "+debugStr(contentPath.String()), err, http.StatusNotFound) return } @@ -664,7 +664,7 @@ func webError(w http.ResponseWriter, message string, err error, defaultCode int) func webErrorWithCode(w http.ResponseWriter, message string, err error, code int) { http.Error(w, fmt.Sprintf("%s: %s", message, err), code) if code >= 500 { - log.Warnf("server error: %s: %s", err) + log.Warnf("server error: %s: %s", message, err) } } @@ -762,6 +762,15 @@ func preferred404Filename(acceptHeaders []string) (string, string, error) { return "", "", fmt.Errorf("there is no 404 file for the requested content types") } +// returns unquoted path with all special characters revealed as \u codes +func debugStr(path string) string { + q := fmt.Sprintf("%+q", path) + if len(q) >= 3 { + q = q[1 : len(q)-1] + } + return q +} + // Attempt to fix redundant /ipfs/ namespace as long as resulting // 'intended' path is valid. This is in case gremlins were tickled // wrong way and user ended up at /ipfs/ipfs/{cid} or /ipfs/ipns/{id} diff --git a/core/corehttp/gateway_test.go b/core/corehttp/gateway_test.go index 8cccde0e22a..40bede82289 100644 --- a/core/corehttp/gateway_test.go +++ b/core/corehttp/gateway_test.go @@ -126,12 +126,6 @@ func newTestServerAndNode(t *testing.T, ns mockNamesys) (*httptest.Server, iface t.Fatal(err) } - cfg, err := n.Repo.Config() - if err != nil { - t.Fatal(err) - } - cfg.Gateway.PathPrefixes = []string{"/good-prefix"} - // need this variable here since we need to construct handler with // listener, and server with handler. yay cycles. dh := &delegatedHandler{} @@ -242,7 +236,7 @@ func TestGatewayGet(t *testing.T) { {"127.0.0.1:8080", "/" + k.Cid().String(), http.StatusNotFound, "404 page not found\n"}, {"127.0.0.1:8080", k.String(), http.StatusOK, "fnord"}, {"127.0.0.1:8080", "/ipns/nxdomain.example.com", http.StatusNotFound, "ipfs resolve -r /ipns/nxdomain.example.com: " + namesys.ErrResolveFailed.Error() + "\n"}, - {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusNotFound, "ipfs resolve -r /ipns/%0D%0A%0D%0Ahello: " + namesys.ErrResolveFailed.Error() + "\n"}, + {"127.0.0.1:8080", "/ipns/%0D%0A%0D%0Ahello", http.StatusNotFound, "ipfs resolve -r /ipns/\\r\\n\\r\\nhello: " + namesys.ErrResolveFailed.Error() + "\n"}, {"127.0.0.1:8080", "/ipns/example.com", http.StatusOK, "fnord"}, {"example.com", "/", http.StatusOK, "fnord"}, @@ -403,7 +397,6 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") res, err = doWithoutRedirect(req) if err != nil { @@ -417,8 +410,8 @@ func TestIPNSHostnameRedirect(t *testing.T) { hdr = res.Header["Location"] if len(hdr) < 1 { t.Errorf("location header not present") - } else if hdr[0] != "/good-prefix/foo/" { - t.Errorf("location header is %v, expected /good-prefix/foo/", hdr[0]) + } else if hdr[0] != "/foo/" { + t.Errorf("location header is %v, expected /foo/", hdr[0]) } // make sure /version isn't exposed @@ -427,7 +420,6 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") res, err = doWithoutRedirect(req) if err != nil { @@ -583,82 +575,6 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !strings.Contains(s, k3.Cid().String()) { t.Fatalf("expected hash in directory listing") } - - // make request to directory listing with prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/good-prefix") - - res, err = doWithoutRedirect(req) - if err != nil { - t.Fatal(err) - } - - // expect correct backlinks with prefix - body, err = ioutil.ReadAll(res.Body) - if err != nil { - t.Fatalf("error reading response: %s", err) - } - s = string(body) - t.Logf("body: %s\n", string(body)) - - if !matchPathOrBreadcrumbs(s, "/ipns/example.net") { - t.Fatalf("expected a path in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected backlink in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected file in directory listing") - } - if !strings.Contains(s, k.Cid().String()) { - t.Fatalf("expected hash in directory listing") - } - - // make request to directory listing with illegal prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "/bad-prefix") - - // make request to directory listing with evil prefix - req, err = http.NewRequest(http.MethodGet, ts.URL, nil) - if err != nil { - t.Fatal(err) - } - req.Host = "example.net" - req.Header.Set("X-Ipfs-Gateway-Prefix", "//good-prefix/foo") - - res, err = doWithoutRedirect(req) - if err != nil { - t.Fatal(err) - } - - // expect correct backlinks without illegal prefix - body, err = ioutil.ReadAll(res.Body) - if err != nil { - t.Fatalf("error reading response: %s", err) - } - s = string(body) - t.Logf("body: %s\n", string(body)) - - if !matchPathOrBreadcrumbs(s, "/") { - t.Fatalf("expected a path in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected backlink in directory listing") - } - if !strings.Contains(s, "") { - t.Fatalf("expected file in directory listing") - } - if !strings.Contains(s, k.Cid().String()) { - t.Fatalf("expected hash in directory listing") - } } func TestCacheControlImmutable(t *testing.T) { From aae5c28646b66585ad569d0589484e06fd33e1a8 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 16 Mar 2022 00:03:34 +0100 Subject: [PATCH 16/17] refactor: cleanup --- core/corehttp/gateway_handler_car.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index 35df4a69ad0..dfd602d5632 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -23,10 +23,11 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi // Weak Etag W/ because we can't guarantee byte-for-byte identical responses // (CAR is streamed, and in theory, blocks may arrive from datastore in non-deterministic order) - w.Header().Set("Etag", `W/`+getEtag(r, rootCid)) + etag := getEtag(r, rootCid) + w.Header().Set("Etag", `W/`+etag) // Finish early if Etag match - if r.Header.Get("If-None-Match") == w.Header().Get("Etag") { + if r.Header.Get("If-None-Match") == etag { w.WriteHeader(http.StatusNotModified) return } From 5b43672a8733525ad9fab9be03d14d983f3f9914 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 16 Mar 2022 01:16:55 +0100 Subject: [PATCH 17/17] fix: car response etag matching https://github.com/ipfs/go-ipfs/pull/8758#discussion_r827503815 lidel needs some sleep --- core/corehttp/gateway_handler_car.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/corehttp/gateway_handler_car.go b/core/corehttp/gateway_handler_car.go index dfd602d5632..43ce99eef53 100644 --- a/core/corehttp/gateway_handler_car.go +++ b/core/corehttp/gateway_handler_car.go @@ -23,8 +23,8 @@ func (i *gatewayHandler) serveCar(w http.ResponseWriter, r *http.Request, rootCi // Weak Etag W/ because we can't guarantee byte-for-byte identical responses // (CAR is streamed, and in theory, blocks may arrive from datastore in non-deterministic order) - etag := getEtag(r, rootCid) - w.Header().Set("Etag", `W/`+etag) + etag := `W/` + getEtag(r, rootCid) + w.Header().Set("Etag", etag) // Finish early if Etag match if r.Header.Get("If-None-Match") == etag {