From e7faa5a7a4943c6e708a6f0ee11a73cb690926bd Mon Sep 17 00:00:00 2001
From: Philip Conrad <philip@chariot-chaser.net>
Date: Thu, 1 Aug 2024 15:22:51 -0400
Subject: [PATCH] util+server: Fix bug around chunked request handling. (#6906)

This commit fixes a request handling bug introduced in #6868, which
caused OPA to treat all incoming chunked requests as if they had
zero-length request bodies.

The fix detects cases where the request body size is unknown in the
DecodingLimits handler, and propagates a request context key down to
the `util.ReadMaybeCompressedBody` function, allowing it to correctly
select between using the original `io.ReadAll` style for chunked
requests, or the newer preallocated buffers approach (for requests of
known size).

This change has a small, but barely visible performance impact for large
requests (<5% increase in GC pauses for a 1GB request JSON blob), and
minimal, if any, effect on RPS under load.

Fixes: #6904

Signed-off-by: Philip Conrad <philip@chariot-chaser.net>
(cherry picked from commit ee9ab0bf758553cfe416ed3083e0c86feafc12d3)
---
 CHANGELOG.md                |  6 ++++
 server/handlers/decoding.go | 15 +++++++++-
 server/server_test.go       | 56 +++++++++++++++++++++++++++++++++----
 util/decoding/context.go    | 10 +++++++
 util/read_gzip_body.go      | 25 ++++++++++++-----
 5 files changed, 98 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 497d7b32a9..f14ca27bb5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,12 @@
 All notable changes to this project will be documented in this file. This
 project adheres to [Semantic Versioning](http://semver.org/).
 
+## Unreleased
+
+### Fixes
+
+- util+server: Fix bug around chunked request handling. This PR fixes a request handling bug introduced in ([#6868](https://github.com/open-policy-agent/opa/pull/6868)), which caused OPA to treat all incoming chunked requests as if they had zero-length request bodies. ([#6906](https://github.com/open-policy-agent/opa/pull/6906)) authored by @philipaconrad
+
 ## 0.67.0
 
 This release contains a mix of features, a new builtin function (`strings.count`), performance improvements, and bugfixes.
diff --git a/server/handlers/decoding.go b/server/handlers/decoding.go
index 696d111c1c..412d5975ea 100644
--- a/server/handlers/decoding.go
+++ b/server/handlers/decoding.go
@@ -20,12 +20,25 @@ import (
 func DecodingLimitsHandler(handler http.Handler, maxLength, gzipMaxLength int64) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		// Reject too-large requests before doing any further processing.
-		// Note(philipc): This likely does nothing in the case of "chunked"
+		// Note(philipc): This does nothing in the case of "chunked"
 		// requests, since those should report a ContentLength of -1.
 		if r.ContentLength > maxLength {
 			writer.Error(w, http.StatusBadRequest, types.NewErrorV1(types.CodeInvalidParameter, types.MsgDecodingLimitError))
 			return
 		}
+		// For requests where full size is not known in advance (such as chunked
+		// requests), pass server.decoding.max_length down, using the request
+		// context.
+
+		// Note(philipc): Unknown request body size is signaled to the server
+		// handler by net/http setting the Request.ContentLength field to -1. We
+		// don't check for the `Transfer-Encoding: chunked` header explicitly,
+		// because net/http will strip it out from requests automatically.
+		// Ref: https://pkg.go.dev/net/http#Request
+		if r.ContentLength < 0 {
+			ctx := util_decoding.AddServerDecodingMaxLen(r.Context(), maxLength)
+			r = r.WithContext(ctx)
+		}
 		// Pass server.decoding.gzip.max_length down, using the request context.
 		if strings.Contains(r.Header.Get("Content-Encoding"), "gzip") {
 			ctx := util_decoding.AddServerDecodingGzipMaxLen(r.Context(), gzipMaxLength)
diff --git a/server/server_test.go b/server/server_test.go
index 2ba52ee2d0..257e29130c 100644
--- a/server/server_test.go
+++ b/server/server_test.go
@@ -1713,8 +1713,10 @@ func generateJSONBenchmarkData(k, v int) map[string]interface{} {
 	}
 
 	return map[string]interface{}{
-		"keys":   keys,
-		"values": values,
+		"input": map[string]interface{}{
+			"keys":   keys,
+			"values": values,
+		},
 	}
 }
 
@@ -1832,10 +1834,12 @@ func TestDataPostV1CompressedDecodingLimits(t *testing.T) {
 	tests := []struct {
 		note                  string
 		wantGzip              bool
+		wantChunkedEncoding   bool
 		payload               []byte
 		forceContentLen       int64  // Size to manually set the Content-Length header to.
 		forcePayloadSizeField uint32 // Size to manually set the payload field for the gzip blob.
 		expRespHTTPStatus     int
+		expWarningMsg         string
 		expErrorMsg           string
 		maxLen                int64
 		gzipMaxLen            int64
@@ -1844,30 +1848,44 @@ func TestDataPostV1CompressedDecodingLimits(t *testing.T) {
 			note:              "empty message",
 			payload:           []byte{},
 			expRespHTTPStatus: 200,
+			expWarningMsg:     "'input' key missing from the request",
 		},
 		{
 			note:              "empty message, gzip",
 			wantGzip:          true,
 			payload:           mustGZIPPayload([]byte{}),
 			expRespHTTPStatus: 200,
+			expWarningMsg:     "'input' key missing from the request",
 		},
 		{
 			note:              "empty message, malicious Content-Length",
 			payload:           []byte{},
 			forceContentLen:   2048, // Server should ignore this header entirely.
-			expRespHTTPStatus: 200,
+			expRespHTTPStatus: 400,
+			expErrorMsg:       "request body too large",
 		},
 		{
 			note:              "empty message, gzip, malicious Content-Length",
 			wantGzip:          true,
 			payload:           mustGZIPPayload([]byte{}),
 			forceContentLen:   2048, // Server should ignore this header entirely.
-			expRespHTTPStatus: 200,
+			expRespHTTPStatus: 400,
+			expErrorMsg:       "request body too large",
 		},
 		{
 			note:                  "basic - malicious size field, expect reject on gzip payload length",
 			wantGzip:              true,
-			payload:               mustGZIPPayload([]byte(`{"user": "alice"}`)),
+			payload:               mustGZIPPayload([]byte(`{"input": {"user": "alice"}}`)),
+			expRespHTTPStatus:     400,
+			forcePayloadSizeField: 134217728, // 128 MB
+			expErrorMsg:           "gzip payload too large",
+			gzipMaxLen:            1024,
+		},
+		{
+			note:                  "basic - malicious size field, expect reject on gzip payload length, chunked encoding",
+			wantGzip:              true,
+			wantChunkedEncoding:   true,
+			payload:               mustGZIPPayload([]byte(`{"input": {"user": "alice"}}`)),
 			expRespHTTPStatus:     400,
 			forcePayloadSizeField: 134217728, // 128 MB
 			expErrorMsg:           "gzip payload too large",
@@ -1886,6 +1904,13 @@ func TestDataPostV1CompressedDecodingLimits(t *testing.T) {
 			maxLen:            512,
 			expErrorMsg:       "request body too large",
 		},
+		{
+			note:                "basic, large payload, expect reject on Content-Length, chunked encoding",
+			wantChunkedEncoding: true,
+			payload:             util.MustMarshalJSON(generateJSONBenchmarkData(100, 100)),
+			expRespHTTPStatus:   200,
+			maxLen:              134217728,
+		},
 		{
 			note:              "basic, gzip, large payload",
 			wantGzip:          true,
@@ -1957,13 +1982,19 @@ allow if {
 			if test.wantGzip {
 				req.Header.Set("Content-Encoding", "gzip")
 			}
+			if test.wantChunkedEncoding {
+				req.ContentLength = -1
+				req.TransferEncoding = []string{"chunked"}
+				req.Header.Set("Transfer-Encoding", "chunked")
+			}
 			if test.forceContentLen > 0 {
+				req.ContentLength = test.forceContentLen
 				req.Header.Set("Content-Length", strconv.FormatInt(test.forceContentLen, 10))
 			}
 			f.reset()
 			f.server.Handler.ServeHTTP(f.recorder, req)
 			if f.recorder.Code != test.expRespHTTPStatus {
-				t.Fatalf("Unexpected HTTP status code, (exp,got): %d, %d", test.expRespHTTPStatus, f.recorder.Code)
+				t.Fatalf("Unexpected HTTP status code, (exp,got): %d, %d, response body: %s", test.expRespHTTPStatus, f.recorder.Code, f.recorder.Body.Bytes())
 			}
 			if test.expErrorMsg != "" {
 				var serverErr types.ErrorV1
@@ -1973,6 +2004,19 @@ allow if {
 				if !strings.Contains(serverErr.Message, test.expErrorMsg) {
 					t.Fatalf("Expected error message to have message '%s', got message: '%s'", test.expErrorMsg, serverErr.Message)
 				}
+			} else {
+				var resp types.DataResponseV1
+				if err := json.Unmarshal(f.recorder.Body.Bytes(), &resp); err != nil {
+					t.Fatalf("Could not deserialize response: %s, message was: %s", err.Error(), f.recorder.Body.Bytes())
+				}
+				if test.expWarningMsg != "" {
+					if !strings.Contains(resp.Warning.Message, test.expWarningMsg) {
+						t.Fatalf("Expected warning message to have message '%s', got message: '%s'", test.expWarningMsg, resp.Warning.Message)
+					}
+				} else if resp.Warning != nil {
+					// Error on unexpected warnings. Something is wrong.
+					t.Fatalf("Unexpected warning: code: %s, message: %s", resp.Warning.Code, resp.Warning.Message)
+				}
 			}
 		})
 	}
diff --git a/util/decoding/context.go b/util/decoding/context.go
index cba5e40ed5..a817680f17 100644
--- a/util/decoding/context.go
+++ b/util/decoding/context.go
@@ -11,10 +11,20 @@ const (
 	reqCtxKeyGzipMaxLen = requestContextKey("server-decoding-plugin-context-gzip-max-length")
 )
 
+func AddServerDecodingMaxLen(ctx context.Context, maxLen int64) context.Context {
+	return context.WithValue(ctx, reqCtxKeyMaxLen, maxLen)
+}
+
 func AddServerDecodingGzipMaxLen(ctx context.Context, maxLen int64) context.Context {
 	return context.WithValue(ctx, reqCtxKeyGzipMaxLen, maxLen)
 }
 
+// Used for enforcing max body content limits when dealing with chunked requests.
+func GetServerDecodingMaxLen(ctx context.Context) (int64, bool) {
+	maxLength, ok := ctx.Value(reqCtxKeyMaxLen).(int64)
+	return maxLength, ok
+}
+
 func GetServerDecodingGzipMaxLen(ctx context.Context) (int64, bool) {
 	gzipMaxLength, ok := ctx.Value(reqCtxKeyGzipMaxLen).(int64)
 	return gzipMaxLength, ok
diff --git a/util/read_gzip_body.go b/util/read_gzip_body.go
index c6a1098a44..217638b363 100644
--- a/util/read_gzip_body.go
+++ b/util/read_gzip_body.go
@@ -27,13 +27,24 @@ var gzipReaderPool = sync.Pool{
 // payload size, but not an unbounded amount of memory, as was potentially
 // possible before.
 func ReadMaybeCompressedBody(r *http.Request) ([]byte, error) {
-	if r.ContentLength <= 0 {
-		return []byte{}, nil
-	}
-	// Read content from the request body into a buffer of known size.
-	content := bytes.NewBuffer(make([]byte, 0, r.ContentLength))
-	if _, err := io.CopyN(content, r.Body, r.ContentLength); err != nil {
-		return content.Bytes(), err
+	var content *bytes.Buffer
+	// Note(philipc): If the request body is of unknown length (such as what
+	// happens when 'Transfer-Encoding: chunked' is set), we have to do an
+	// incremental read of the body. In this case, we can't be too clever, we
+	// just do the best we can with whatever is streamed over to us.
+	// Fetch gzip payload size limit from request context.
+	if maxLength, ok := decoding.GetServerDecodingMaxLen(r.Context()); ok {
+		bs, err := io.ReadAll(io.LimitReader(r.Body, maxLength))
+		if err != nil {
+			return bs, err
+		}
+		content = bytes.NewBuffer(bs)
+	} else {
+		// Read content from the request body into a buffer of known size.
+		content = bytes.NewBuffer(make([]byte, 0, r.ContentLength))
+		if _, err := io.CopyN(content, r.Body, r.ContentLength); err != nil {
+			return content.Bytes(), err
+		}
 	}
 
 	// Decompress gzip content by reading from the buffer.