From 19618c1e66201a6f154ba2a38e58d8e20a99a3c2 Mon Sep 17 00:00:00 2001 From: kisunji Date: Mon, 14 Mar 2022 14:06:42 -0400 Subject: [PATCH 1/3] ca: fix a bug that caused a non blocking leaf cert query after a blocking leaf cert query to block Fixes #12048 Fixes #12319 Regression introduced in #11693 --- .changelog/12820.txt | 3 ++ agent/agent_endpoint_test.go | 97 ++++++++++++++++++++++++++++++++++++ agent/cache/cache.go | 7 +++ 3 files changed, 107 insertions(+) create mode 100644 .changelog/12820.txt diff --git a/.changelog/12820.txt b/.changelog/12820.txt new file mode 100644 index 000000000000..af5533b77de2 --- /dev/null +++ b/.changelog/12820.txt @@ -0,0 +1,3 @@ +```release-note:bug +ca: fix a bug that caused a non blocking leaf cert query after a blocking leaf cert query to block +``` diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 103243497046..d8b6ef8b3fbe 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -6202,6 +6202,103 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) { } } +func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBlock(t *testing.T) { + // see: https://github.com/hashicorp/consul/issues/12048 + + runStep := func(t *testing.T, name string, fn func(t *testing.T)) { + t.Helper() + if !t.Run(name, fn) { + t.FailNow() + } + } + + if testing.Short() { + t.Skip("too slow for testing.Short") + } + + t.Parallel() + + a := NewTestAgent(t, "") + testrpc.WaitForTestAgent(t, a.RPC, "dc1") + testrpc.WaitForActiveCARoot(t, a.RPC, "dc1", nil) + + { + // Register a local service + args := &structs.ServiceDefinition{ + ID: "foo", + Name: "test", + Address: "127.0.0.1", + Port: 8000, + Check: structs.CheckType{ + TTL: 15 * time.Second, + }, + } + req, err := http.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args)) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + if !assert.Equal(t, 200, resp.Code) { + t.Log("Body: ", resp.Body.String()) + } + } + + var ( + serialNumber string + index string + issued structs.IssuedCert + ) + runStep(t, "do initial non-blocking query", func(t *testing.T) { + req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + + dec := json.NewDecoder(resp.Body) + require.NoError(t, dec.Decode(&issued)) + serialNumber = issued.SerialNumber + + require.Equal(t, "MISS", resp.Header().Get("X-Cache"), + "for the leaf cert cache type these are always MISS") + index = resp.Header().Get("X-Consul-Index") + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + // launch goroutine for blocking query + resp := httptest.NewRecorder() + req, err := http.NewRequestWithContext(ctx, "GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil) + if err != nil { + t.Errorf("error: %v", err) + return + } + a.srv.h.ServeHTTP(resp, req) + }() + + // We just need to ensure that the above blocking query is in-flight before + // the next step, so do a little sleep. + time.Sleep(50 * time.Millisecond) + + runStep(t, "do a non-blocking query that should not block", func(t *testing.T) { + req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) + require.NoError(t, err) + resp := httptest.NewRecorder() + a.srv.h.ServeHTTP(resp, req) + + var issued2 structs.IssuedCert + dec := json.NewDecoder(resp.Body) + require.NoError(t, dec.Decode(&issued2)) + + require.Equal(t, "HIT", resp.Header().Get("X-Cache")) + + // If this is actually returning a cached result, the serial number + // should be unchanged. + require.Equal(t, serialNumber, issued2.SerialNumber) + + require.Equal(t, issued, issued2) + }) +} + func TestAgentConnectCALeafCert_Vault_doesNotChurnLeafCertsAtIdle(t *testing.T) { ca.SkipIfVaultNotPresent(t) diff --git a/agent/cache/cache.go b/agent/cache/cache.go index d104cdd3b5af..e012b68683e6 100644 --- a/agent/cache/cache.go +++ b/agent/cache/cache.go @@ -376,6 +376,13 @@ func (c *Cache) getEntryLocked( // Check if re-validate is requested. If so the first time round the // loop is not a hit but subsequent ones should be treated normally. if !tEntry.Opts.Refresh && info.MustRevalidate { + if entry.Fetching { + // There is an active blocking query for this data, which has not + // returned. We can logically deduce that the contents of the cache + // are actually current, and we can simply return this while + // leaving the blocking query alone. + return true, true, entry + } return true, false, entry } From 804f42661421cfc100211e3a5d3e70ba32e3c75b Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Tue, 19 Apr 2022 16:45:07 -0500 Subject: [PATCH 2/3] use httptest --- agent/agent_endpoint_test.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index d8b6ef8b3fbe..9c7249bc29c7 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -6233,8 +6233,7 @@ func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBl TTL: 15 * time.Second, }, } - req, err := http.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args)) - require.NoError(t, err) + req := httptest.NewRequest("PUT", "/v1/agent/service/register", jsonReader(args)) resp := httptest.NewRecorder() a.srv.h.ServeHTTP(resp, req) if !assert.Equal(t, 200, resp.Code) { @@ -6248,8 +6247,7 @@ func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBl issued structs.IssuedCert ) runStep(t, "do initial non-blocking query", func(t *testing.T) { - req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) - require.NoError(t, err) + req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) resp := httptest.NewRecorder() a.srv.h.ServeHTTP(resp, req) @@ -6266,12 +6264,8 @@ func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBl defer cancel() go func() { // launch goroutine for blocking query + req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil).Clone(ctx) resp := httptest.NewRecorder() - req, err := http.NewRequestWithContext(ctx, "GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil) - if err != nil { - t.Errorf("error: %v", err) - return - } a.srv.h.ServeHTTP(resp, req) }() @@ -6280,8 +6274,7 @@ func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBl time.Sleep(50 * time.Millisecond) runStep(t, "do a non-blocking query that should not block", func(t *testing.T) { - req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) - require.NoError(t, err) + req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) resp := httptest.NewRecorder() a.srv.h.ServeHTTP(resp, req) From b15007bd40bdb0b54f26189d399859e19fab5472 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Tue, 19 Apr 2022 16:50:46 -0500 Subject: [PATCH 3/3] add comment --- agent/agent_endpoint_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/agent/agent_endpoint_test.go b/agent/agent_endpoint_test.go index 9c7249bc29c7..fc29333e92d2 100644 --- a/agent/agent_endpoint_test.go +++ b/agent/agent_endpoint_test.go @@ -6273,6 +6273,11 @@ func TestAgentConnectCALeafCert_nonBlockingQuery_after_blockingQuery_shouldNotBl // the next step, so do a little sleep. time.Sleep(50 * time.Millisecond) + // The initial non-blocking query populated the leaf cert cache entry + // implicitly. The agent cache doesn't prune entries very often at all, so + // in between both of these steps the data should still be there, causing + // this to be a HIT that completes in less than 10m (the default inner leaf + // cert blocking query timeout). runStep(t, "do a non-blocking query that should not block", func(t *testing.T) { req := httptest.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil) resp := httptest.NewRecorder()