Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add MustRevalidate flag to connect_ca_leaf cache type; always use on non-blocking queries (#11693) #11714

Merged
merged 1 commit into from
Dec 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/11693.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
ca: fixes a bug that caused non blocking leaf cert queries to return the same cached response regardless of ca rotation or leaf cert expiry
```
12 changes: 11 additions & 1 deletion agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,9 @@ func (s *HTTPHandlers) AgentConnectCARoots(resp http.ResponseWriter, req *http.R
}

// AgentConnectCALeafCert returns the certificate bundle for a service
// instance. This supports blocking queries to update the returned bundle.
// instance. This endpoint ignores all "Cache-Control" attributes.
// This supports blocking queries to update the returned bundle.
// Non-blocking queries will always verify that the cache entry is still valid.
func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Get the service name. Note that this is the name of the service,
// not the ID of the service instance.
Expand All @@ -1338,6 +1340,14 @@ func (s *HTTPHandlers) AgentConnectCALeafCert(resp http.ResponseWriter, req *htt
args.MaxQueryTime = qOpts.MaxQueryTime
args.Token = qOpts.Token

// TODO(ffmmmm): maybe set MustRevalidate in ConnectCALeafRequest (as part of CacheInfo())
// We don't want non-blocking queries to return expired leaf certs
// or leaf certs not valid under the current CA. So always revalidate
// the leaf cert on non-blocking queries (ie when MinQueryIndex == 0)
if args.MinQueryIndex == 0 {
args.MustRevalidate = true
}

raw, m, err := s.agent.cache.Get(req.Context(), cachetype.ConnectCALeafName, &args)
if err != nil {
return nil, err
Expand Down
84 changes: 55 additions & 29 deletions agent/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5313,16 +5313,13 @@ func TestAgentConnectCALeafCert_good(t *testing.T) {
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)

// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}

// Set a new CA
ca2 := connect.TestCAConfigSet(t, a, nil)

// Issue a blocking query to ensure that the cert gets updated appropriately
{
// Set a new CA
ca := connect.TestCAConfigSet(t, a, nil)

resp := httptest.NewRecorder()
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test?index="+index, nil)
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
Expand All @@ -5332,12 +5329,59 @@ func TestAgentConnectCALeafCert_good(t *testing.T) {
require.NotEqual(issued.PrivateKeyPEM, issued2.PrivateKeyPEM)

// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca)
requireLeafValidUnderCA(t, issued2, ca2)

// Should not be a cache hit! The data was updated in response to the blocking
// query being made.
require.Equal("MISS", resp.Header().Get("X-Cache"))
}

t.Run("test non-blocking queries update leaf cert", func(t *testing.T) {
resp := httptest.NewRecorder()
obj, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)

// Get the issued cert
issued, ok := obj.(*structs.IssuedCert)
assert.True(ok)

// Verify that the cert is signed by the CA
requireLeafValidUnderCA(t, issued, ca2)

// Issue a non blocking query to ensure that the cert gets updated appropriately
{
// Set a new CA
ca3 := connect.TestCAConfigSet(t, a, nil)

resp := httptest.NewRecorder()
req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
require.NoError(err)
obj, err = a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
issued2 := obj.(*structs.IssuedCert)
require.NotEqual(issued.CertPEM, issued2.CertPEM)
require.NotEqual(issued.PrivateKeyPEM, issued2.PrivateKeyPEM)

// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca3)

// Should not be a cache hit!
require.Equal("MISS", resp.Header().Get("X-Cache"))
}

// Test caching for the leaf cert
{

for fetched := 0; fetched < 4; fetched++ {

// Fetch it again
resp := httptest.NewRecorder()
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)
}
}
})
}

// Test we can request a leaf cert for a service we have permission for
Expand Down Expand Up @@ -5406,9 +5450,6 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)

// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}

// Test Blocking - see https://github.com/hashicorp/consul/issues/4462
Expand Down Expand Up @@ -5456,12 +5497,7 @@ func TestAgentConnectCALeafCert_goodNotLocal(t *testing.T) {
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, ca)

// Should be a cache hit! The data should've updated in the cache
// in the background so this should've been fetched directly from
// the cache.
if resp.Header().Get("X-Cache") != "HIT" {
r.Fatalf("should be a cache hit")
}
require.NotEqual(issued, issued2)
})
}
}
Expand Down Expand Up @@ -5556,9 +5592,6 @@ func TestAgentConnectCALeafCert_Vault_doesNotChurnLeafCertsAtIdle(t *testing.T)
obj2, err := a.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)

// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}

// Test that we aren't churning leaves for no reason at idle.
Expand Down Expand Up @@ -5661,7 +5694,8 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
}

// List
req, _ := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
req, err := http.NewRequest("GET", "/v1/agent/connect/ca/leaf/test", nil)
require.NoError(err)
resp := httptest.NewRecorder()
obj, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
Expand All @@ -5686,9 +5720,6 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
obj2, err := a2.srv.AgentConnectCALeafCert(resp, req)
require.NoError(err)
require.Equal(obj, obj2)

// Should cache hit this time and not make request
require.Equal("HIT", resp.Header().Get("X-Cache"))
}

// Test that we aren't churning leaves for no reason at idle.
Expand Down Expand Up @@ -5753,12 +5784,7 @@ func TestAgentConnectCALeafCert_secondaryDC_good(t *testing.T) {
// Verify that the cert is signed by the new CA
requireLeafValidUnderCA(t, issued2, dc1_ca2)

// Should be a cache hit! The data should've updated in the cache
// in the background so this should've been fetched directly from
// the cache.
if resp.Header().Get("X-Cache") != "HIT" {
r.Fatalf("should be a cache hit")
}
require.NotEqual(issued, issued2)
})
}

Expand Down
51 changes: 36 additions & 15 deletions agent/cache-types/connect_ca_leaf.go
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,25 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
return c.generateNewLeaf(reqReal, lastResultWithNewState())
}

// If we called Fetch() with MustRevalidate then this call came from a non-blocking query.
// Any prior CA rotations should've already expired the cert.
// All we need to do is check whether the current CA is the one that signed the leaf. If not, generate a new leaf.
// This is not a perfect solution (as a CA rotation update can be missed) but it should take care of instances like
// see https://github.com/hashicorp/consul/issues/10871, https://github.com/hashicorp/consul/issues/9862
// This seems to me like a hack, so maybe we can revisit the caching/ fetching logic in this case
if req.CacheInfo().MustRevalidate {
roots, err := c.rootsFromCache()
if err != nil {
return lastResultWithNewState(), err
}
if activeRootHasKey(roots, state.authorityKeyID) {
return lastResultWithNewState(), nil
}

// if we reach here then the current leaf was not signed by the same CAs, just regen
return c.generateNewLeaf(reqReal, lastResultWithNewState())
}

// We are about to block and wait for a change or timeout.

// Make a chan we can be notified of changes to CA roots on. It must be
Expand All @@ -401,7 +420,7 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
c.fetchStart(rootUpdateCh)
defer c.fetchDone(rootUpdateCh)

// Setup the timeout chan outside the loop so we don't keep bumping the timout
// Setup the timeout chan outside the loop so we don't keep bumping the timeout
// later if we loop around.
timeoutCh := time.After(opts.Timeout)

Expand Down Expand Up @@ -492,7 +511,7 @@ func (c *ConnectCALeaf) rootsFromCache() (*structs.IndexedCARoots, error) {

// generateNewLeaf does the actual work of creating a new private key,
// generating a CSR and getting it signed by the servers. result argument
// represents the last result currently in cache if any along with it's state.
// represents the last result currently in cache if any along with its state.
func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest,
result cache.FetchResult) (cache.FetchResult, error) {

Expand Down Expand Up @@ -644,14 +663,15 @@ func (c *ConnectCALeaf) generateNewLeaf(req *ConnectCALeafRequest,
// since this is only used for cache-related requests and not forwarded
// directly to any Consul servers.
type ConnectCALeafRequest struct {
Token string
Datacenter string
Service string // Service name, not ID
Agent string // Agent name, not ID
DNSSAN []string
IPSAN []net.IP
MinQueryIndex uint64
MaxQueryTime time.Duration
Token string
Datacenter string
Service string // Service name, not ID
Agent string // Agent name, not ID
DNSSAN []string
IPSAN []net.IP
MinQueryIndex uint64
MaxQueryTime time.Duration
MustRevalidate bool

structs.EnterpriseMeta
}
Expand Down Expand Up @@ -681,10 +701,11 @@ func (r *ConnectCALeafRequest) Key() string {

func (r *ConnectCALeafRequest) CacheInfo() cache.RequestInfo {
return cache.RequestInfo{
Token: r.Token,
Key: r.Key(),
Datacenter: r.Datacenter,
MinIndex: r.MinQueryIndex,
Timeout: r.MaxQueryTime,
Token: r.Token,
Key: r.Key(),
Datacenter: r.Datacenter,
MinIndex: r.MinQueryIndex,
Timeout: r.MaxQueryTime,
MustRevalidate: r.MustRevalidate,
}
}