From 17512b0f1e563f7934072a0314ccbf7cc3263fa9 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 19 Jan 2023 14:46:56 +0100 Subject: [PATCH 1/8] fix(gateway): do not convert unixfs/raw into dag-* unless explicit --- core/corehttp/gateway_handler.go | 4 +- core/corehttp/gateway_handler_codec.go | 13 ++++++- test/sharness/t0123-gateway-json-cbor.sh | 47 +++++++----------------- 3 files changed, 27 insertions(+), 37 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index c20f112d76a..b76099ea86f 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -422,7 +422,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch resolvedPath.Cid().Prefix().Codec { case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor): logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) default: logger.Debugw("serving unixfs", "path", contentPath) i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger) @@ -444,7 +444,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request case "application/json", "application/vnd.ipld.dag-json", "application/cbor", "application/vnd.ipld.dag-cbor": logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 95a151c7943..4d51c6e723c 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -21,6 +21,7 @@ import ( mc "github.com/multiformats/go-multicodec" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" ) // codecToContentType maps the supported IPLD codecs to the HTTP Content @@ -52,13 +53,23 @@ var contentTypeToExtension = map[string]string{ "application/vnd.ipld.dag-cbor": ".cbor", } -func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) { +func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger, requestedContentType string) { ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType))) defer span.End() cidCodec := resolvedPath.Cid().Prefix().Codec responseContentType := requestedContentType + // Do not convert UnixFS and Raw blocks into DAG-* unless we explicitly + // requested it. If it is not requested as DAG-*, we defer the execution + // to the i.serveUnixFS handler. + if cidCodec == uint64(mc.Raw) || cidCodec == uint64(mc.DagPb) { + if !strings.Contains(responseContentType, "dag-") { + i.serveUnixFS(ctx, w, r, resolvedPath, contentPath, begin, logger) + return + } + } + // If the resolved path still has some remainder, return error for now. // TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT // TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782) diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index f4ebca19d2c..5cd020831de 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -43,24 +43,22 @@ test_dag_pb_headers () { test_should_not_contain "Content-Type: application/$format" curl_output ' - test_expect_success "GET UnixFS as $name with format=$format has expected Content-Type" ' + test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' has expected Content-Type" ' + curl -sD - -H "Accept: foo, application/vnd.ipld.dag-$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && + test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output + ' + + test_expect_success "GET UnixFS with format=$format returns raw (no conversion)" ' curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output && + test_should_not_contain "Content-Type: application/$format" curl_output && test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output ' - test_expect_success "GET UnixFS as $name with 'Accept: application/$format' has expected Content-Type" ' + test_expect_success "GET UnixFS with 'Accept: application/$format' returns raw (no conversion)" ' curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output && + test_should_not_contain "Content-Type: application/$format" curl_output && test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output ' - - test_expect_success "GET UnixFS as $name with 'Accept: foo, application/$format,bar' has expected Content-Type" ' - curl -sD - -H "Accept: foo, application/$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && - test_should_contain "Content-Type: application/$format" curl_output - ' } test_dag_pb_headers "DAG-JSON" "json" "inline" @@ -81,12 +79,6 @@ test_dag_pb () { ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 && test_cmp ipfs_dag_get_output curl_output ' - - test_expect_success "GET UnixFS as $name with format=dag-$format and format=$format produce same output" ' - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output_1 2>&1 && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=$format" > curl_output_2 2>&1 && - test_cmp curl_output_1 curl_output_2 - ' } test_dag_pb "DAG-JSON" "json" @@ -204,12 +196,6 @@ test_expect_success "GET DAG-CBOR traverses multiple links" ' test_cmp expected actual ' -# test_expect_success "GET DAG-PB has expected output" ' -# curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DAG_PB_CID?format=dag-json" > curl_output 2>&1 && -# jq --sort-keys . curl_output > actual && -# test_cmp ../t0123-gateway-json-cbor/dag-pb.json actual -# ' - ## NATIVE TESTS: ## DAG- regression tests for core behaviors when native DAG-(CBOR|JSON) is requested @@ -302,18 +288,11 @@ test_native_dag () { test_should_contain "Content-Type: application/vnd.ipld.dag-$format" output && test_should_contain "Content-Length: " output ' - test_expect_success "HEAD $name with an explicit JSON format returns HTTP 200" ' - curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" -o output && - test_should_contain "HTTP/1.1 200 OK" output && - test_should_contain "Etag: \"$CID.json\"" output && - test_should_contain "Content-Type: application/json" output && - test_should_contain "Content-Length: " output - ' - test_expect_success "HEAD dag-pb with ?format=$format returns HTTP 200" ' - curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" -o output && + test_expect_success "HEAD $name with an explicit DAG-JSON format returns HTTP 200" ' + curl -I "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-json" -o output && test_should_contain "HTTP/1.1 200 OK" output && - test_should_contain "Etag: \"$FILE_CID.$format\"" output && - test_should_contain "Content-Type: application/$format" output && + test_should_contain "Etag: \"$CID.dag-json\"" output && + test_should_contain "Content-Type: application/vnd.ipld.dag-json" output && test_should_contain "Content-Length: " output ' test_expect_success "HEAD $name with only-if-cached for missing block returns HTTP 412 Precondition Failed" ' From 1aad10f28e91c57bc13fbf850b5d4472bbe65c63 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 19 Jan 2023 18:41:44 +0100 Subject: [PATCH 2/8] fix(gateway): keep only dag-json|dag-cbor handling --- core/corehttp/gateway_handler.go | 5 +-- core/corehttp/gateway_handler_codec.go | 56 +++++------------------- test/sharness/t0123-gateway-json-cbor.sh | 45 ------------------- 3 files changed, 12 insertions(+), 94 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index b76099ea86f..822125dc759 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -420,7 +420,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request switch responseFormat { case "": switch resolvedPath.Cid().Prefix().Codec { - case uint64(mc.Json), uint64(mc.DagJson), uint64(mc.Cbor), uint64(mc.DagCbor): + case uint64(mc.DagJson), uint64(mc.DagCbor): logger.Debugw("serving codec", "path", contentPath) i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) default: @@ -441,8 +441,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request logger.Debugw("serving tar file", "path", contentPath) i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger) return - case "application/json", "application/vnd.ipld.dag-json", - "application/cbor", "application/vnd.ipld.dag-cbor": + case "application/vnd.ipld.dag-json", "application/vnd.ipld.dag-cbor": logger.Debugw("serving codec", "path", contentPath) i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) return diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 4d51c6e723c..9f7be8f79ef 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -26,30 +26,24 @@ import ( // codecToContentType maps the supported IPLD codecs to the HTTP Content // Type they should have. -var codecToContentType = map[uint64]string{ - uint64(mc.Json): "application/json", - uint64(mc.Cbor): "application/cbor", - uint64(mc.DagJson): "application/vnd.ipld.dag-json", - uint64(mc.DagCbor): "application/vnd.ipld.dag-cbor", +var codecToContentType = map[mc.Code]string{ + mc.DagJson: "application/vnd.ipld.dag-json", + mc.DagCbor: "application/vnd.ipld.dag-cbor", } // contentTypeToCodecs maps the HTTP Content Type to the respective // possible codecs. If the original data is in one of those codecs, // we stream the raw bytes. Otherwise, we encode in the last codec // of the list. -var contentTypeToCodecs = map[string][]uint64{ - "application/json": {uint64(mc.Json), uint64(mc.DagJson)}, - "application/vnd.ipld.dag-json": {uint64(mc.DagJson)}, - "application/cbor": {uint64(mc.Cbor), uint64(mc.DagCbor)}, - "application/vnd.ipld.dag-cbor": {uint64(mc.DagCbor)}, +var contentTypeToCodecs = map[string]mc.Code{ + "application/vnd.ipld.dag-json": mc.DagJson, + "application/vnd.ipld.dag-cbor": mc.DagCbor, } // contentTypeToExtension maps the HTTP Content Type to the respective file // extension, used in Content-Disposition header when downloading the file. var contentTypeToExtension = map[string]string{ - "application/json": ".json", "application/vnd.ipld.dag-json": ".json", - "application/cbor": ".cbor", "application/vnd.ipld.dag-cbor": ".cbor", } @@ -57,19 +51,9 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType))) defer span.End() - cidCodec := resolvedPath.Cid().Prefix().Codec + cidCodec := mc.Code(resolvedPath.Cid().Prefix().Codec) responseContentType := requestedContentType - // Do not convert UnixFS and Raw blocks into DAG-* unless we explicitly - // requested it. If it is not requested as DAG-*, we defer the execution - // to the i.serveUnixFS handler. - if cidCodec == uint64(mc.Raw) || cidCodec == uint64(mc.DagPb) { - if !strings.Contains(responseContentType, "dag-") { - i.serveUnixFS(ctx, w, r, resolvedPath, contentPath, begin, logger) - return - } - } - // If the resolved path still has some remainder, return error for now. // TODO: handle this when we have IPLD Patch (https://ipld.io/specs/patch/) via HTTP PUT // TODO: (depends on https://github.com/ipfs/kubo/issues/4801 and https://github.com/ipfs/kubo/issues/4782) @@ -101,11 +85,10 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // No content type is specified by the user (via Accept, or format=). However, // we support this format. Let's handle it. if requestedContentType == "" { - isDAG := cidCodec == uint64(mc.DagJson) || cidCodec == uint64(mc.DagCbor) acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html") download := r.URL.Query().Get("download") == "true" - if isDAG && acceptsHTML && !download { + if acceptsHTML && !download { i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath) } else { i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) @@ -116,7 +99,7 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // Otherwise, the user has requested a specific content type. Let's first get // the codecs that can be used with this content type. - codecs, ok := contentTypeToCodecs[requestedContentType] + toCodec, ok := contentTypeToCodecs[requestedContentType] if !ok { // This is never supposed to happen unless function is called with wrong parameters. err := fmt.Errorf("unsupported content type: %s", requestedContentType) @@ -124,28 +107,9 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, return } - // If we need to convert, use the last codec (strict dag- variant) - toCodec := codecs[len(codecs)-1] - // If the requested content type has "dag-", ALWAYS go through the encoding // process in order to validate the content. - if strings.Contains(requestedContentType, "dag-") { - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) - return - } - - // Otherwise, check if the data is encoded with the requested content type. - // If so, we can directly stream the raw data. serveRawBlock cannot be directly - // used here as it sets different headers. - for _, codec := range codecs { - if resolvedPath.Cid().Prefix().Codec == codec { - i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) - return - } - } - - // Finally, if nothing of the above is true, we have to actually convert the codec. - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) + i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, uint64(toCodec), modtime) } func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) { diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index 5cd020831de..5b3929ca5fe 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -91,34 +91,6 @@ test_cmp_dag_get () { format=$2 disposition=$3 - test_expect_success "GET $name without Accept or format= has expected Content-Type" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output - ' - - test_expect_success "GET $name without Accept or format= produces correct output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && - ipfs dag get --output-codec $format $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output - ' - - test_expect_success "GET $name with format=$format produces expected Content-Type" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -sD- "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output - ' - - test_expect_success "GET $name with format=$format produces correct output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" > curl_output 2>&1 && - ipfs dag get --output-codec $format $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output - ' - test_expect_success "GET $name with format=dag-$format produces expected Content-Type" ' CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && curl -sD- "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" > curl_output 2>&1 && @@ -138,23 +110,6 @@ test_cmp_dag_get "JSON" "json" "inline" test_cmp_dag_get "CBOR" "cbor" "attachment" -## Lossless conversion between JSON and CBOR - -test_expect_success "GET JSON as CBOR produces DAG-CBOR output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec json) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=cbor" > curl_output 2>&1 && - ipfs dag get --output-codec dag-cbor $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output -' - -test_expect_success "GET CBOR as JSON produces DAG-JSON output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec cbor) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=json" > curl_output 2>&1 && - ipfs dag get --output-codec dag-json $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output -' - - ## Pathing, traversal DAG_CBOR_TRAVERSAL_CID="bafyreibs4utpgbn7uqegmd2goqz4bkyflre2ek2iwv743fhvylwi4zeeim" From 297e910efb5f4b9c48fed8ae7328d6e9e79db5a6 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 19 Jan 2023 18:43:55 +0100 Subject: [PATCH 3/8] refactor: rename variable and update comment --- core/corehttp/gateway_handler_codec.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 9f7be8f79ef..cf3047e2415 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -31,11 +31,8 @@ var codecToContentType = map[mc.Code]string{ mc.DagCbor: "application/vnd.ipld.dag-cbor", } -// contentTypeToCodecs maps the HTTP Content Type to the respective -// possible codecs. If the original data is in one of those codecs, -// we stream the raw bytes. Otherwise, we encode in the last codec -// of the list. -var contentTypeToCodecs = map[string]mc.Code{ +// contentTypeToCodec maps the HTTP Content Type to the respective codec. +var contentTypeToCodec = map[string]mc.Code{ "application/vnd.ipld.dag-json": mc.DagJson, "application/vnd.ipld.dag-cbor": mc.DagCbor, } @@ -99,7 +96,7 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // Otherwise, the user has requested a specific content type. Let's first get // the codecs that can be used with this content type. - toCodec, ok := contentTypeToCodecs[requestedContentType] + toCodec, ok := contentTypeToCodec[requestedContentType] if !ok { // This is never supposed to happen unless function is called with wrong parameters. err := fmt.Errorf("unsupported content type: %s", requestedContentType) From efccafcb4dd0c190b253a4849ef7e0c58cea87b1 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 19 Jan 2023 19:24:16 +0100 Subject: [PATCH 4/8] fix: re-add --- core/corehttp/gateway_handler.go | 12 ++++-------- core/corehttp/gateway_handler_codec.go | 25 +++++++++++++++++------- test/sharness/t0123-gateway-json-cbor.sh | 14 +++++++++++++ 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 822125dc759..fc82d9f15c3 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -419,10 +419,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // Support custom response formats passed via ?format or Accept HTTP header switch responseFormat { case "": - switch resolvedPath.Cid().Prefix().Codec { - case uint64(mc.DagJson), uint64(mc.DagCbor): + switch mc.Code(resolvedPath.Cid().Prefix().Codec) { + case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor: logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) default: logger.Debugw("serving unixfs", "path", contentPath) i.serveUnixFS(r.Context(), w, r, resolvedPath, contentPath, begin, logger) @@ -443,7 +443,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return case "application/vnd.ipld.dag-json", "application/vnd.ipld.dag-cbor": logger.Debugw("serving codec", "path", contentPath) - i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, logger, responseFormat) + i.serveCodec(r.Context(), w, r, resolvedPath, contentPath, begin, responseFormat) return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) @@ -879,12 +879,8 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "application/x-tar", nil, nil case "dag-json": return "application/vnd.ipld.dag-json", nil, nil - case "json": - return "application/json", nil, nil case "dag-cbor": return "application/vnd.ipld.dag-cbor", nil, nil - case "cbor": - return "application/cbor", nil, nil } } // Browsers and other user agents will send Accept header with generic types like: diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index cf3047e2415..80f90c96585 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -21,17 +21,19 @@ import ( mc "github.com/multiformats/go-multicodec" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" - "go.uber.org/zap" ) // codecToContentType maps the supported IPLD codecs to the HTTP Content // Type they should have. var codecToContentType = map[mc.Code]string{ + mc.Json: "application/json", + mc.Cbor: "application/cbor", mc.DagJson: "application/vnd.ipld.dag-json", mc.DagCbor: "application/vnd.ipld.dag-cbor", } -// contentTypeToCodec maps the HTTP Content Type to the respective codec. +// contentTypeToCodec maps the HTTP Content Type to the respective codec. We +// only add here the codecs that we want to convert-to-from. var contentTypeToCodec = map[string]mc.Code{ "application/vnd.ipld.dag-json": mc.DagJson, "application/vnd.ipld.dag-cbor": mc.DagCbor, @@ -40,11 +42,13 @@ var contentTypeToCodec = map[string]mc.Code{ // contentTypeToExtension maps the HTTP Content Type to the respective file // extension, used in Content-Disposition header when downloading the file. var contentTypeToExtension = map[string]string{ + "application/json": ".json", "application/vnd.ipld.dag-json": ".json", + "application/cbor": ".cbor", "application/vnd.ipld.dag-cbor": ".cbor", } -func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger, requestedContentType string) { +func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, requestedContentType string) { ctx, span := tracing.Span(ctx, "Gateway", "ServeCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", requestedContentType))) defer span.End() @@ -82,10 +86,11 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // No content type is specified by the user (via Accept, or format=). However, // we support this format. Let's handle it. if requestedContentType == "" { + isDAG := cidCodec == mc.DagJson || cidCodec == mc.DagCbor acceptsHTML := strings.Contains(r.Header.Get("Accept"), "text/html") download := r.URL.Query().Get("download") == "true" - if acceptsHTML && !download { + if isDAG && acceptsHTML && !download { i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath) } else { i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) @@ -106,7 +111,13 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // If the requested content type has "dag-", ALWAYS go through the encoding // process in order to validate the content. - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, uint64(toCodec), modtime) + if strings.Contains(requestedContentType, "dag-") { + i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) + return + } + + // Otherwise, it's just JSON or CBOR. Serve it as-is. + i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) } func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) { @@ -156,7 +167,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite _, _, _ = ServeContent(w, r, name, modtime, content) } -func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec uint64, modtime time.Time) { +func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) { obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid()) if err != nil { webError(w, "ipfs dag get "+html.EscapeString(resolvedPath.String()), err, http.StatusInternalServerError) @@ -171,7 +182,7 @@ func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.Respons } finalNode := universal.(ipld.Node) - encoder, err := multicodec.LookupEncoder(toCodec) + encoder, err := multicodec.LookupEncoder(uint64(toCodec)) if err != nil { webError(w, err.Error(), err, http.StatusInternalServerError) return diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index 5b3929ca5fe..563855ae758 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -91,6 +91,20 @@ test_cmp_dag_get () { format=$2 disposition=$3 + test_expect_success "GET $name without Accept or format= has expected Content-Type" ' + CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" curl_output && + test_should_contain "Content-Type: application/$format" curl_output + ' + + test_expect_success "GET $name without Accept or format= produces correct output" ' + CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && + ipfs dag get --output-codec $format $CID > ipfs_dag_get_output 2>&1 && + test_cmp ipfs_dag_get_output curl_output + ' + test_expect_success "GET $name with format=dag-$format produces expected Content-Type" ' CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && curl -sD- "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" > curl_output 2>&1 && From bbecb9306b312206f507252bba69accaf50052d4 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 19 Jan 2023 19:27:27 +0100 Subject: [PATCH 5/8] refactor: simplify code --- core/corehttp/gateway_handler_codec.go | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 80f90c96585..e6dd2dd0a9a 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -93,14 +93,16 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, if isDAG && acceptsHTML && !download { i.serveCodecHTML(ctx, w, r, resolvedPath, contentPath) } else { + // This covers CIDs with codec 'json' and 'cbor' as those do not have + // an explicit requested content type. i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) } return } - // Otherwise, the user has requested a specific content type. Let's first get - // the codecs that can be used with this content type. + // Otherwise, the user has requested a specific content type (a DAG-* variant). + // Let's first get the codecs that can be used with this content type. toCodec, ok := contentTypeToCodec[requestedContentType] if !ok { // This is never supposed to happen unless function is called with wrong parameters. @@ -109,15 +111,8 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, return } - // If the requested content type has "dag-", ALWAYS go through the encoding - // process in order to validate the content. - if strings.Contains(requestedContentType, "dag-") { - i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) - return - } - - // Otherwise, it's just JSON or CBOR. Serve it as-is. - i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) + // This handles DAG-* conversions and validations. + i.serveCodecConverted(ctx, w, r, resolvedPath, contentPath, toCodec, modtime) } func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path) { From c3ebe80d585af0091b506ffd3103de3f5b28f770 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Fri, 20 Jan 2023 03:33:56 +0100 Subject: [PATCH 6/8] fix: allow requesting dag-json as application/json - adds bunch of additional tests including JSON file on UnixFS - fix: dag-json codec (0x0129) can be returned as plain json - fix: json codec (0x0200) cna be retrurned as plain json - same for cbor variants --- core/corehttp/gateway_handler.go | 10 +- core/corehttp/gateway_handler_codec.go | 17 +++ test/sharness/t0123-gateway-json-cbor.sh | 172 +++++++++++++---------- 3 files changed, 124 insertions(+), 75 deletions(-) diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index fc82d9f15c3..1c6797e685d 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -418,7 +418,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request // Support custom response formats passed via ?format or Accept HTTP header switch responseFormat { - case "": + case "", "application/json", "application/cbor": switch mc.Code(resolvedPath.Cid().Prefix().Codec) { case mc.Json, mc.DagJson, mc.Cbor, mc.DagCbor: logger.Debugw("serving codec", "path", contentPath) @@ -447,7 +447,7 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request return default: // catch-all for unsuported application/vnd.* err := fmt.Errorf("unsupported format %q", responseFormat) - webError(w, "failed respond with requested content type", err, http.StatusBadRequest) + webError(w, "failed to respond with requested content type", err, http.StatusBadRequest) return } } @@ -877,6 +877,10 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "application/vnd.ipld.car", nil, nil case "tar": return "application/x-tar", nil, nil + case "json": + return "application/json", nil, nil + case "cbor": + return "application/cbor", nil, nil case "dag-json": return "application/vnd.ipld.dag-json", nil, nil case "dag-cbor": @@ -903,6 +907,8 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] } } } + // If none of special-cased content types is found, return empty string + // to indicate default, implicit UnixFS response should be prepared return "", nil, nil } diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index e6dd2dd0a9a..417836f356a 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -32,6 +32,13 @@ var codecToContentType = map[mc.Code]string{ mc.DagCbor: "application/vnd.ipld.dag-cbor", } +// contentTypeToRaw maps the HTTP Content Type to the respective codec that +// allows raw response without any conversion. +var contentTypeToRaw = map[string]mc.Code{ + "application/json": mc.DagJson, + "application/cbor": mc.DagCbor, +} + // contentTypeToCodec maps the HTTP Content Type to the respective codec. We // only add here the codecs that we want to convert-to-from. var contentTypeToCodec = map[string]mc.Code{ @@ -101,6 +108,14 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, return } + // If DAG-JSON or DAG-CBOR was requested using corresponding plain content type + // return raw block as-is, without conversion + skipCodec, ok := contentTypeToRaw[requestedContentType] + if ok && skipCodec == cidCodec { + i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) + return + } + // Otherwise, the user has requested a specific content type (a DAG-* variant). // Let's first get the codecs that can be used with this content type. toCodec, ok := contentTypeToCodec[requestedContentType] @@ -143,6 +158,7 @@ func (i *gatewayHandler) serveCodecHTML(ctx context.Context, w http.ResponseWrit } } +// serveCodecRaw returns the raw block without any conversion func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, name string, modtime time.Time) { blockCid := resolvedPath.Cid() blockReader, err := i.api.Block().Get(ctx, resolvedPath) @@ -162,6 +178,7 @@ func (i *gatewayHandler) serveCodecRaw(ctx context.Context, w http.ResponseWrite _, _, _ = ServeContent(w, r, name, modtime, content) } +// serveCodecConverted returns payload converted to codec specified in toCodec func (i *gatewayHandler) serveCodecConverted(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, toCodec mc.Code, modtime time.Time) { obj, err := i.api.Dag().Get(ctx, resolvedPath.Cid()) if err != nil { diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index 563855ae758..143e158bce0 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -12,119 +12,130 @@ test_expect_success "Add the test directory" ' mkdir -p rootDir/ipns && mkdir -p rootDir/api && mkdir -p rootDir/ą/ę && + echo "{ \"test\": \"i am a plain json file\" }" > rootDir/ą/ę/t.json && echo "I am a txt file on path with utf8" > rootDir/ą/ę/file-źł.txt && echo "I am a txt file in confusing /api dir" > rootDir/api/file.txt && echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt && echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt && DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) && + FILE_JSON_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/t.json | jq -r .Hash) && FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) && FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size) echo "$FILE_CID / $FILE_SIZE" ' +## Quick regression check for JSON stored on UnixFS: +## it has nothing to do with DAG-JSON and JSON codecs, +## but a lot of JSON data is stored on UnixFS and is requested with or without various hints +## and we want to avoid surprises like https://github.com/protocol/bifrost-infra/issues/2290 +test_expect_success "GET UnixFS file with JSON bytes is returned with application/json Content-Type" ' + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_JSON_CID" > curl_output 2>&1 && + curl -sD headers_accept -H "Accept: application/json" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_JSON_CID" > curl_output_accept 2>&1 && + ipfs cat $FILE_JSON_CID > ipfs_cat_output 2>&1 && + test_should_contain "Content-Type: application/json" headers && + test_should_contain "Content-Type: application/json" headers_accept && + test_cmp ipfs_cat_output curl_output && + test_cmp curl_output curl_output_accept +' + + ## Reading UnixFS (data encoded with dag-pb codec) as DAG-CBOR and DAG-JSON +## (returns representation defined in https://ipld.io/specs/codecs/dag-pb/spec/#logical-format) -test_dag_pb_headers () { +test_dag_pb_conversion () { name=$1 format=$2 disposition=$3 - test_expect_success "GET UnixFS as $name with format=dag-$format has expected Content-Type" ' - curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=dag-$format" > curl_output 2>&1 && - test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && - test_should_not_contain "Content-Type: application/$format" curl_output + test_expect_success "GET UnixFS file as $name with format=dag-$format converts to the expected Content-Type" ' + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=dag-$format" > curl_output 2>&1 && + ipfs dag get --output-codec dag-$format $FILE_CID > ipfs_dag_get_output 2>&1 && + test_cmp ipfs_dag_get_output curl_output && + test_should_contain "Content-Type: application/vnd.ipld.dag-$format" headers && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" headers && + test_should_not_contain "Content-Type: application/$format" headers + ' + + test_expect_success "GET UnixFS directory as $name with format=dag-$format converts to the expected Content-Type" ' + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output 2>&1 && + ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 && + test_cmp ipfs_dag_get_output curl_output && + test_should_contain "Content-Type: application/vnd.ipld.dag-$format" headers && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${DIR_CID}.${format}\"" headers && + test_should_not_contain "Content-Type: application/$format" headers ' - test_expect_success "GET UnixFS as $name with 'Accept: application/vnd.ipld.dag-$format' has expected Content-Type" ' + test_expect_success "GET UnixFS as $name with 'Accept: application/vnd.ipld.dag-$format' converts to the expected Content-Type" ' curl -sD - -H "Accept: application/vnd.ipld.dag-$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && test_should_contain "Content-Disposition: ${disposition}\; filename=\"${FILE_CID}.${format}\"" curl_output && test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output && test_should_not_contain "Content-Type: application/$format" curl_output ' - test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' has expected Content-Type" ' + test_expect_success "GET UnixFS as $name with 'Accept: foo, application/vnd.ipld.dag-$format,bar' converts to the expected Content-Type" ' curl -sD - -H "Accept: foo, application/vnd.ipld.dag-$format,text/plain" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output ' - test_expect_success "GET UnixFS with format=$format returns raw (no conversion)" ' - curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 && - test_should_not_contain "Content-Type: application/$format" curl_output && - test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output - ' - - test_expect_success "GET UnixFS with 'Accept: application/$format' returns raw (no conversion)" ' - curl -sD - -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && - test_should_not_contain "Content-Type: application/$format" curl_output && - test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output - ' -} - -test_dag_pb_headers "DAG-JSON" "json" "inline" -test_dag_pb_headers "DAG-CBOR" "cbor" "attachment" - -test_dag_pb () { - name=$1 - format=$2 - - test_expect_success "GET UnixFS as $name has expected output for file" ' - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=dag-$format" > curl_output 2>&1 && - ipfs dag get --output-codec dag-$format $FILE_CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output + test_expect_success "GET UnixFS with format=$format (not dag-$format) is no-op (no conversion)" ' + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=$format" > curl_output 2>&1 && + ipfs cat $FILE_CID > cat_output && + test_cmp cat_output curl_output && + test_should_contain "Content-Type: text/plain" headers && + test_should_not_contain "Content-Type: application/$format" headers && + test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" headers ' - test_expect_success "GET UnixFS as $name has expected output for directory" ' - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=dag-$format" > curl_output 2>&1 && - ipfs dag get --output-codec dag-$format $DIR_CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output + test_expect_success "GET UnixFS with 'Accept: application/$format' (not dag-$format) is no-op (no conversion)" ' + curl -sD headers -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output 2>&1 && + ipfs cat $FILE_CID > cat_output && + test_cmp cat_output curl_output && + test_should_contain "Content-Type: text/plain" headers && + test_should_not_contain "Content-Type: application/$format" headers && + test_should_not_contain "Content-Type: application/vnd.ipld.dag-$format" headers ' } -test_dag_pb "DAG-JSON" "json" -test_dag_pb "DAG-CBOR" "cbor" +test_dag_pb_conversion "DAG-JSON" "json" "inline" +test_dag_pb_conversion "DAG-CBOR" "cbor" "attachment" -## Content-Type response based on Accept header and ?format= parameter -test_cmp_dag_get () { +# Requesting CID with plain json (0x0200) and cbor (0x51) codecs +# (note these are not UnixFS, not DAG-* variants, just raw block identified by a CID with a special codec) +test_plain_codec () { name=$1 format=$2 disposition=$3 - test_expect_success "GET $name without Accept or format= has expected Content-Type" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/$format" curl_output - ' - - test_expect_success "GET $name without Accept or format= produces correct output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && - ipfs dag get --output-codec $format $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output - ' - - test_expect_success "GET $name with format=dag-$format produces expected Content-Type" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -sD- "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" > curl_output 2>&1 && - test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" curl_output && - test_should_contain "Content-Type: application/vnd.ipld.dag-$format" curl_output + # no explicit format, just codec in CID + test_expect_success "GET $name without Accept or format= has expected $format Content-Type and body as-is" ' + CID=$(echo "{ \"test\": \"plain json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && + ipfs block get $CID > ipfs_block_output 2>&1 && + test_cmp ipfs_block_output curl_output && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" headers && + test_should_contain "Content-Type: application/$format" headers ' - test_expect_success "GET $name with format=dag-$format produces correct output" ' - CID=$(echo "{ \"test\": \"json\" }" | ipfs dag put --input-codec json --store-codec $format) && - curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" > curl_output 2>&1 && + # explicit dag-* format passed, attempt to parse as dag* variant + ## Note: this works only for simple JSON that can be upgraded to DAG-JSON. + test_expect_success "GET $name with format=dag-$format interprets $format as dag-* variant and produces expected Content-Type and body" ' + CID=$(echo "{ \"test\": \"plain-json-that-can-also-be-dag-json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" > curl_output_param 2>&1 && ipfs dag get --output-codec dag-$format $CID > ipfs_dag_get_output 2>&1 && - test_cmp ipfs_dag_get_output curl_output + test_cmp ipfs_dag_get_output curl_output_param && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" headers && + test_should_contain "Content-Type: application/vnd.ipld.dag-$format" headers && + curl -s -H "Accept: application/vnd.ipld.dag-$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output_accept 2>&1 && + test_cmp curl_output_param curl_output_accept ' -} -test_cmp_dag_get "JSON" "json" "inline" -test_cmp_dag_get "CBOR" "cbor" "attachment" +} +test_plain_codec "plain JSON codec" "json" "inline" +test_plain_codec "plain CBOR codec" "cbor" "attachment" -## Pathing, traversal +## Pathing, traversal over DAG-JSON and DAG-CBOR DAG_CBOR_TRAVERSAL_CID="bafyreibs4utpgbn7uqegmd2goqz4bkyflre2ek2iwv743fhvylwi4zeeim" DAG_JSON_TRAVERSAL_CID="baguqeeram5ujjqrwheyaty3w5gdsmoz6vittchvhk723jjqxk7hakxkd47xq" @@ -165,11 +176,9 @@ test_expect_success "GET DAG-CBOR traverses multiple links" ' test_cmp expected actual ' - -## NATIVE TESTS: +## NATIVE TESTS for DAG-JSON (0x0129) and DAG-CBOR (0x71): ## DAG- regression tests for core behaviors when native DAG-(CBOR|JSON) is requested - test_native_dag () { name=$1 format=$2 @@ -192,10 +201,10 @@ test_native_dag () { test_cmp expected curl_ipfs_dag_param_output ' - test_expect_success "GET $name from /ipfs with format=$format returns the same payload as format=dag-$format" ' + test_expect_success "GET $name from /ipfs for application/$format returns the same payload as format=dag-$format" ' curl -sX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" -o expected && - curl -sX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" -o curl_ipfs_dag_param_output && - test_cmp expected curl_ipfs_dag_param_output + curl -sX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" -o plain_output && + test_cmp expected plain_output ' test_expect_success "GET $name from /ipfs with application/vnd.ipld.dag-$format returns the same payload as the raw block" ' @@ -204,6 +213,23 @@ test_native_dag () { test_cmp expected_block curl_ipfs_dag_block_accept_output ' + # Make sure DAG-* can be requested as plain JSON or CBOR and response has plain Content-Type for interop purposes + + test_expect_success "GET $name with format=$format returns same payload as format=dag-$format but with plain Content-Type" ' + curl -s "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=dag-$format" -o expected && + curl -sD plain_headers "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" -o plain_output && + test_should_contain "Content-Type: application/$format" plain_headers && + test_cmp expected plain_output + ' + + test_expect_success "GET $name with Accept: application/$format returns same payload as application/vnd.ipld.dag-$format but with plain Content-Type" ' + curl -s -H "Accept: application/vnd.ipld.dag-$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > expected && + curl -sD plain_headers -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > plain_output && + test_should_contain "Content-Type: application/$format" plain_headers && + test_cmp expected plain_output + ' + + # Make sure expected HTTP headers are returned with the dag- block test_expect_success "GET response for application/vnd.ipld.dag-$format has expected Content-Type" ' From fb616814939e63d200f9c0c15166e688996a25a5 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Fri, 20 Jan 2023 10:30:08 +0100 Subject: [PATCH 7/8] fix: using ?format|Accept with CID w/ codec works --- core/corehttp/gateway_handler_codec.go | 18 +++++++++++------- test/sharness/t0123-gateway-json-cbor.sh | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/core/corehttp/gateway_handler_codec.go b/core/corehttp/gateway_handler_codec.go index 417836f356a..93e9593b7b3 100644 --- a/core/corehttp/gateway_handler_codec.go +++ b/core/corehttp/gateway_handler_codec.go @@ -34,9 +34,9 @@ var codecToContentType = map[mc.Code]string{ // contentTypeToRaw maps the HTTP Content Type to the respective codec that // allows raw response without any conversion. -var contentTypeToRaw = map[string]mc.Code{ - "application/json": mc.DagJson, - "application/cbor": mc.DagCbor, +var contentTypeToRaw = map[string][]mc.Code{ + "application/json": {mc.Json, mc.DagJson}, + "application/cbor": {mc.Cbor, mc.DagCbor}, } // contentTypeToCodec maps the HTTP Content Type to the respective codec. We @@ -110,10 +110,14 @@ func (i *gatewayHandler) serveCodec(ctx context.Context, w http.ResponseWriter, // If DAG-JSON or DAG-CBOR was requested using corresponding plain content type // return raw block as-is, without conversion - skipCodec, ok := contentTypeToRaw[requestedContentType] - if ok && skipCodec == cidCodec { - i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) - return + skipCodecs, ok := contentTypeToRaw[requestedContentType] + if ok { + for _, skipCodec := range skipCodecs { + if skipCodec == cidCodec { + i.serveCodecRaw(ctx, w, r, resolvedPath, contentPath, name, modtime) + return + } + } } // Otherwise, the user has requested a specific content type (a DAG-* variant). diff --git a/test/sharness/t0123-gateway-json-cbor.sh b/test/sharness/t0123-gateway-json-cbor.sh index 143e158bce0..704d075f940 100755 --- a/test/sharness/t0123-gateway-json-cbor.sh +++ b/test/sharness/t0123-gateway-json-cbor.sh @@ -117,6 +117,26 @@ test_plain_codec () { test_should_contain "Content-Type: application/$format" headers ' + # explicit format still gives correct output, just codec in CID + test_expect_success "GET $name with ?format= has expected $format Content-Type and body as-is" ' + CID=$(echo "{ \"test\": \"plain json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -sD headers "http://127.0.0.1:$GWAY_PORT/ipfs/$CID?format=$format" > curl_output 2>&1 && + ipfs block get $CID > ipfs_block_output 2>&1 && + test_cmp ipfs_block_output curl_output && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" headers && + test_should_contain "Content-Type: application/$format" headers + ' + + # explicit format still gives correct output, just codec in CID + test_expect_success "GET $name with Accept has expected $format Content-Type and body as-is" ' + CID=$(echo "{ \"test\": \"plain json\" }" | ipfs dag put --input-codec json --store-codec $format) && + curl -sD headers -H "Accept: application/$format" "http://127.0.0.1:$GWAY_PORT/ipfs/$CID" > curl_output 2>&1 && + ipfs block get $CID > ipfs_block_output 2>&1 && + test_cmp ipfs_block_output curl_output && + test_should_contain "Content-Disposition: ${disposition}\; filename=\"${CID}.${format}\"" headers && + test_should_contain "Content-Type: application/$format" headers + ' + # explicit dag-* format passed, attempt to parse as dag* variant ## Note: this works only for simple JSON that can be upgraded to DAG-JSON. test_expect_success "GET $name with format=dag-$format interprets $format as dag-* variant and produces expected Content-Type and body" ' From 8dadb9a24c57386eaf2b50f667d8eba5a9c73518 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 21 Jan 2023 03:49:24 +0100 Subject: [PATCH 8/8] docs(changelog): cbor and json on gateway --- docs/changelogs/v0.18.md | 79 ++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 11 deletions(-) diff --git a/docs/changelogs/v0.18.md b/docs/changelogs/v0.18.md index cca07ecfbd0..d10f56d7704 100644 --- a/docs/changelogs/v0.18.md +++ b/docs/changelogs/v0.18.md @@ -64,19 +64,74 @@ Learn more in the [`Reprovider` config](https://github.com/ipfs/go-ipfs/blob/mas ##### (DAG-)JSON and (DAG-)CBOR response formats -Implemented [IPIP-328](https://github.com/ipfs/specs/pull/328) which adds support -for DAG-JSON and DAG-CBOR, as well as their non-DAG variants, to the gateway. Now, -CIDs that encode JSON, CBOR, DAG-JSON and DAG-CBOR objects can be retrieved, and -traversed thanks to the [special meaning of CBOR Tag 42](https://github.com/ipld/cid-cbor/). +The IPFS project has reserved the corresponding media types at IANA: +- [`application/vnd.ipld.dag-json`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-json) +- [`application/vnd.ipld.dag-cbor`](https://www.iana.org/assignments/media-types/application/vnd.ipld.dag-cbor) -HTTP clients can request JSON, CBOR, DAG-JSON, and DAG-CBOR responses by either -passing the query parameter `?format` or setting the `Accept` HTTP header to the -following values: +This release implements them as part of [IPIP-328](https://github.com/ipfs/specs/pull/328) +and adds Gateway support for CIDs with `json` (0x0200), `cbor` (0x51), +[`dag-json`](https://ipld.io/specs/codecs/dag-json/) (0x0129) +and [`dag-cbor`](https://ipld.io/specs/codecs/dag-cbor/spec/) (0x71) codecs. -- JSON: `?format=json`, or `Accept: application/json` -- CBOR: `?format=cbor`, or `Accept: application/cbor` -- DAG-JSON: `?format=dag-json`, or `Accept: application/vnd.ipld.dag-json` -- DAG-JSON: `?format=dag-cbor`, or `Accept: application/vnd.ipld.dag-cbor` +To specify the response `Content-Type` explicitly, the HTTP client can override +the codec present in the CID by using the `format` parameter +or setting the `Accept` HTTP header: + +- Plain JSON: `?format=json` or `Accept: application/json` +- Plain CBOR: `?format=cbor` or `Accept: application/cbor` +- DAG-JSON: `?format=dag-json` or `Accept: application/vnd.ipld.dag-json` +- DAG-CBOR: `?format=dag-cbor` or `Accept: application/vnd.ipld.dag-cbor` + +In addition, when DAG-JSON or DAG-CBOR is requested with the `Accept` header +set to `text/html`, the Gateway will return a basic HTML page with download +options, improving the user experience in web browsers. + +###### Example 1: DAG-CBOR and DAG-JSON Conversion on Gateway + +The Gateway supports conversion between DAG-CBOR and DAG-JSON for efficient +end-to-end data structure management: author in CBOR or JSON, store as binary +CBOR and retrieve as JSON via HTTP: + +```console +$ echo '{"test": "json"}' | ipfs dag put # implicit --input-codec dag-json --store-codec dag-cbor +bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 + +$ ipfs block get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 | xxd +00000000: a164 7465 7374 646a 736f 6e .dtestdjson + +$ ipfs dag get bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4 # implicit --output-codec dag-json +{"test":"json"} + +$ curl "http://127.0.0.1:8080/ipfs/bafyreico7mjtqtqhvawro3yud5uqn6sc33nzqb7b5j2d7pdmzer5nab4t4?format=dag-json" +{"test":"json"} +``` + +###### Example 2: Traversing CBOR DAGs + +Placing a CID in [CBOR Tag 42](https://github.com/ipld/cid-cbor/) enables the +creation of arbitrary DAGs. The equivalent DAG-JSON notation for linking +to different blocks is represented by `{ "/": "cid" }`. + +The Gateway supports traversing these links, enabling access to data +referenced by structures other than regular UnixFS directories: + +```console +$ echo '{"test.jpg": {"/": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi"}}' | ipfs dag put +bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4 # dag-cbor document linking to unixfs file + +$ ipfs resolve /ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg +/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi + +$ ipfs dag stat bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4 +Size: 119827, NumBlocks: 2 + +$ curl "http://127.0.0.1:8080/ipfs/bafyreihspwy3zlkzgphmec5d3xb5g5njrqwotd46lyubnelbzktnmsxkq4/test.jpg" > test.jpg +``` + +###### Example 3: UnixFS directory listing as JSON + +Finally, Gateway now supports the same [logical format projection](https://ipld.io/specs/codecs/dag-pb/spec/#logical-format) from +DAG-PB to DAG-JSON as the `ipfs dag get` command, enabling the retrieval of directory listings as JSON instead of HTML: ```console $ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq @@ -112,6 +167,8 @@ $ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=dag-json" | jq } ] } +$ ipfs dag get $DIR_CID +{"Data":{"/":{"bytes":"CAE"}},"Links":[{"Hash":{"/":"Qmc3zqKcwzbbvw3MQm3hXdg8BQoFjGdZiGdAfXAyAGGdLi"},"Name":"1 - Barrel - Part 1 - alt.txt","Tsize":21},{"Hash":{"/":"QmdMxMx29KVYhHnaCc1icWYxQqXwUNCae6t1wS2NqruiHd"},"Name":"1 - Barrel - Part 1 - transcript.txt","Tsize":195},{"Hash":{"/":"QmawceGscqN4o8Y8Fv26UUmB454kn2bnkXV5tEQYc4jBd6"},"Name":"1 - Barrel - Part 1.png","Tsize":24862}]} ``` ##### 🐎 Fast directory listings with DAG sizes