From daeab612b339813ae93f2ce1832ce98f871275bb Mon Sep 17 00:00:00 2001 From: Ashutosh Narkar Date: Thu, 2 Feb 2023 14:19:44 -0800 Subject: [PATCH] topdown: http.send to cache responses based on status code Currently http.send caches all responses. Now if the response had a status code of `500`(Internal Server Error ) for example, it's possible OPA will return a reponse from the cache for the next query. This can have unintened consequences as OPA will keep serving the cached response till it's fresh while at the same time it's possible the server has a proper response available. To avoid such as scenario this change updates the caching behavior to take into account the status code of the HTTP response before inserting a value into the cache. The list of status codes that can be cached is per https://www.rfc-editor.org/rfc/rfc7231#section-6.1. Fixes: #5617 Signed-off-by: Ashutosh Narkar --- docs/content/policy-reference.md | 7 ++- topdown/http.go | 30 ++++++++- topdown/http_test.go | 102 +++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 3 deletions(-) diff --git a/docs/content/policy-reference.md b/docs/content/policy-reference.md index 6dd62b9ae5..7b778b39c9 100644 --- a/docs/content/policy-reference.md +++ b/docs/content/policy-reference.md @@ -902,7 +902,8 @@ instead of halting evaluation, if `http.send` encounters an error, it can return set to `0` and `error` describing the actual error. This can be activated by setting the `raise_error` field in the `request` object to `false`. -If the `cache` field in the `request` object is `true`, `http.send` will return a cached response after it checks its freshness and validity. +If the `cache` field in the `request` object is `true`, `http.send` will return a cached response after it checks its +freshness and validity. `http.send` uses the `Cache-Control` and `Expires` response headers to check the freshness of the cached response. Specifically if the [max-age](https://tools.ietf.org/html/rfc7234#section-5.2.2.8) `Cache-Control` directive is set, `http.send` @@ -919,6 +920,10 @@ conjunction with the `force_cache_duration_seconds` field. If `force_cache` is ` Also, if `force_cache` is `true`, it overrides the `cache` field. +`http.send` only caches responses with the following HTTP status codes: `200`, `203`, `204`, `206`, `300`, `301`, +`404`, `405`, `410`, `414`, and `501`. This is behavior is as per https://www.rfc-editor.org/rfc/rfc7231#section-6.1 and +is enforced when caching responses within a single query or across queries via the `cache` and `force_cache` request fields. + {{< info >}} `http.send` uses the `Date` response header to calculate the current age of the response by comparing it with the current time. This value is used to determine the freshness of the cached response. As per https://tools.ietf.org/html/rfc7231#section-7.1.1.2, diff --git a/topdown/http.go b/topdown/http.go index 11798bc39e..48abbcf7d3 100644 --- a/topdown/http.go +++ b/topdown/http.go @@ -69,8 +69,24 @@ var allowedKeyNames = [...]string{ "caching_mode", } +// ref: https://www.rfc-editor.org/rfc/rfc7231#section-6.1 +var cacheableHTTPStatusCodes = [...]int{ + http.StatusOK, + http.StatusNonAuthoritativeInfo, + http.StatusNoContent, + http.StatusPartialContent, + http.StatusMultipleChoices, + http.StatusMovedPermanently, + http.StatusNotFound, + http.StatusMethodNotAllowed, + http.StatusGone, + http.StatusRequestURITooLong, + http.StatusNotImplemented, +} + var ( allowedKeys = ast.NewSet() + cacheableCodes = ast.NewSet() requiredKeys = ast.NewSet(ast.StringTerm("method"), ast.StringTerm("url")) httpSendLatencyMetricKey = "rego_builtin_" + strings.ReplaceAll(ast.HTTPSend.Name, ".", "_") httpSendInterQueryCacheHits = httpSendLatencyMetricKey + "_interquery_cache_hits" @@ -162,6 +178,7 @@ func getHTTPResponse(bctx BuiltinContext, req ast.Object) (*ast.Term, error) { func init() { createAllowedKeys() + createCacheableHTTPStatusCodes() initDefaults() RegisterBuiltinFunc(ast.HTTPSend.Name, builtinHTTPSend) } @@ -779,7 +796,7 @@ func (c *interQueryCache) checkHTTPSendInterQueryCache() (ast.Value, error) { // insertIntoHTTPSendInterQueryCache inserts given key and value in the inter-query cache func insertIntoHTTPSendInterQueryCache(bctx BuiltinContext, key ast.Value, resp *http.Response, respBody []byte, cacheParams *forceCacheParams) error { - if resp == nil || (!forceCaching(cacheParams) && !canStore(resp.Header)) { + if resp == nil || (!forceCaching(cacheParams) && !canStore(resp.Header)) || !cacheableCodes.Contains(ast.IntNumberTerm(resp.StatusCode)) { return nil } @@ -817,6 +834,12 @@ func createAllowedKeys() { } } +func createCacheableHTTPStatusCodes() { + for _, element := range cacheableHTTPStatusCodes { + cacheableCodes.Add(ast.IntNumberTerm(element)) + } +} + func parseTimeout(timeoutVal ast.Value) (time.Duration, error) { var timeout time.Duration switch t := timeoutVal.(type) { @@ -1321,7 +1344,10 @@ func (c *intraQueryCache) InsertIntoCache(value *http.Response) (ast.Value, erro return nil, handleHTTPSendErr(c.bctx, err) } - insertIntoHTTPSendCache(c.bctx, c.key, result) + if cacheableCodes.Contains(ast.IntNumberTerm(value.StatusCode)) { + insertIntoHTTPSendCache(c.bctx, c.key, result) + } + return result, nil } diff --git a/topdown/http_test.go b/topdown/http_test.go index dc364d5607..0c39660a51 100644 --- a/topdown/http_test.go +++ b/topdown/http_test.go @@ -2626,6 +2626,108 @@ func TestCertSelectionLogic(t *testing.T) { } } +func TestHTTPSendCacheDefaultStatusCodesIntraQueryCache(t *testing.T) { + + // run test server + var requests []*http.Request + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requests = append(requests, r) + if len(requests)%2 == 0 { + headers := w.Header() + headers["Cache-Control"] = []string{"max-age=290304000, public"} + w.WriteHeader(http.StatusOK) + } else { + w.WriteHeader(http.StatusInternalServerError) + } + })) + + defer ts.Close() + + t.Run("non-cacheable status code: intra-query cache", func(t *testing.T) { + base := fmt.Sprintf(`http.send({"method": "get", "url": %q, "cache": true})`, ts.URL) + query := fmt.Sprintf("%v;%v;%v", base, base, base) + + q := NewQuery(ast.MustParseBody(query)) + + // Execute three http.send calls within a query. + // Since the server returns a http.StatusInternalServerError on the first request, this should NOT be cached as + // http.StatusInternalServerError is not a cacheable status code. The second request should result in OPA reaching + // out to the server again and getting a http.StatusOK response status code. + // The third request should now be served from the cache. + + _, err := q.Run(context.Background()) + if err != nil { + t.Fatal(err) + } + + expectedReqCount := 2 + if len(requests) != expectedReqCount { + t.Fatalf("Expected to get %d requests, got %d", expectedReqCount, len(requests)) + } + }) +} + +func TestHTTPSendCacheDefaultStatusCodesInterQueryCache(t *testing.T) { + + // run test server + var requests []*http.Request + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + requests = append(requests, r) + if len(requests)%2 == 0 { + headers := w.Header() + headers["Cache-Control"] = []string{"max-age=290304000, public"} + w.WriteHeader(http.StatusOK) + } else { + w.WriteHeader(http.StatusInternalServerError) + } + })) + + defer ts.Close() + + t.Run("non-cacheable status code: inter-query cache", func(t *testing.T) { + + // add an inter-query cache + config, _ := iCache.ParseCachingConfig(nil) + interQueryCache := iCache.NewInterQueryCache(config) + + m := metrics.New() + + q := NewQuery(ast.MustParseBody(fmt.Sprintf(`http.send({"method": "get", "url": %q, "cache": true})`, ts.URL))). + WithMetrics(m).WithInterQueryBuiltinCache(interQueryCache) + + // Execute three queries. + // Since the server returns a http.StatusInternalServerError on the first request, this should NOT be cached as + // http.StatusInternalServerError is not a cacheable status code. The second request should result in OPA reaching + // out to the server again and getting a http.StatusOK response status code. + // The third request should now be served from the cache. + + _, err := q.Run(context.Background()) + if err != nil { + t.Fatal(err) + } + + _, err = q.Run(context.Background()) + if err != nil { + t.Fatal(err) + } + + _, err = q.Run(context.Background()) + if err != nil { + t.Fatal(err) + } + + expectedReqCount := 2 + if len(requests) != expectedReqCount { + t.Fatalf("Expected to get %d requests, got %d", expectedReqCount, len(requests)) + } + + // verify http.send inter-query cache hit metric is incremented due to the third request. + if exp, act := uint64(1), m.Counter(httpSendInterQueryCacheHits).Value(); exp != act { + t.Fatalf("expected %d cache hits, got %d", exp, act) + } + }) +} + func TestHTTPSendMetrics(t *testing.T) { // run test server