Skip to content

Commit

Permalink
Fix bug in leaf-cert cache type where multiple clients with different…
Browse files Browse the repository at this point in the history
… tokens would share certs and block incorrectly
  • Loading branch information
banks committed Oct 1, 2018
1 parent 1305b25 commit a99b7be
Show file tree
Hide file tree
Showing 2 changed files with 204 additions and 3 deletions.
23 changes: 20 additions & 3 deletions agent/cache-types/connect_ca_leaf.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,15 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
newRootCACh := make(chan error, 1)
go c.waitNewRootCA(reqReal.Datacenter, newRootCACh, opts.Timeout)

// Generate a cache key to lookup/store the cert. We MUST generate a new cert
// per token used to ensure revocation by ACL token is robust.
issuedKey := fmt.Sprintf("%s/%s", reqReal.Service, reqReal.Token)

// Get our prior cert (if we had one) and use that to determine our
// expiration time. If no cert exists, we expire immediately since we
// need to generate.
c.issuedCertsLock.RLock()
lastCert := c.issuedCerts[reqReal.Service]
lastCert := c.issuedCerts[issuedKey]
c.issuedCertsLock.RUnlock()

var leafExpiryCh <-chan time.Time
Expand All @@ -62,6 +66,19 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
if expiryDur := lastCert.ValidBefore.Sub(time.Now()); expiryDur > 0 {
leafExpiryCh = time.After(expiryDur - 1*time.Hour)
// TODO(mitchellh): 1 hour buffer is hardcoded above

// We should not depend on the cache package de-duplicating requests for
// the same service/token (which is all we care about keying our local
// issued cert cache on) since it might later make sense to partition
// clients for other reasons too. So if the request has a 0 MinIndex, and
// the cached cert is still valid, then the client is expecting an
// immediate response and hasn't already seen the cached cert, return it
// now.
if opts.MinIndex == 0 {
result.Value = lastCert
result.Index = lastCert.ModifyIndex
return result, nil
}
}
}

Expand Down Expand Up @@ -149,13 +166,13 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
// check just in case.
c.issuedCertsLock.Lock()
defer c.issuedCertsLock.Unlock()
lastCert = c.issuedCerts[reqReal.Service]
lastCert = c.issuedCerts[issuedKey]
if lastCert == nil || lastCert.ModifyIndex < reply.ModifyIndex {
if c.issuedCerts == nil {
c.issuedCerts = make(map[string]*structs.IssuedCert)
}

c.issuedCerts[reqReal.Service] = &reply
c.issuedCerts[issuedKey] = &reply
lastCert = &reply
}

Expand Down
184 changes: 184 additions & 0 deletions agent/cache-types/connect_ca_leaf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func TestConnectCALeaf_changingRoots(t *testing.T) {
}

// Second fetch should block with set index
opts.MinIndex = 1
fetchCh = TestFetchCh(t, typ, opts, req)
select {
case result := <-fetchCh:
Expand Down Expand Up @@ -156,6 +157,7 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {

// Third fetch should block since the cert is not expiring and
// we also didn't update CA certs.
opts.MinIndex = 2
fetchCh = TestFetchCh(t, typ, opts, req)
select {
case result := <-fetchCh:
Expand All @@ -164,6 +166,188 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
}
}

// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
// that subsequent clients get it returned immediately and don't block until it
// expires or their request times out. Note that typically FEtches at this level
// are de-duped by the cache higher up, but if the two clients are using
// different ACL tokens for example (common) that may not be the case, and we
// should wtill deliver correct blocking semantics to both.
//
// Additionally, we want to make sure that clients with different tokens
// generate distinct certs since we might later want to revoke all certs fetched
// with a given token but can't if a client using that token was served a cert
// generated under a different token (say the agent token).
func TestConnectCALeaf_multipleClientsDifferentTokens(t *testing.T) {
t.Parallel()

require := require.New(t)
rpc := TestRPC(t)
defer rpc.AssertExpectations(t)

typ, rootsCh := testCALeafType(t, rpc)
defer close(rootsCh)
rootsCh <- structs.IndexedCARoots{
ActiveRootID: "1",
TrustDomain: "fake-trust-domain.consul",
QueryMeta: structs.QueryMeta{Index: 1},
}

// Instrument ConnectCA.Sign to
var resp *structs.IssuedCert
var idx uint64
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
Run(func(args mock.Arguments) {
reply := args.Get(2).(*structs.IssuedCert)
reply.CreateIndex = atomic.AddUint64(&idx, 1)
reply.ModifyIndex = reply.CreateIndex
reply.ValidBefore = time.Now().Add(12 * time.Hour)
resp = reply
})

// We'll reuse the fetch options and request
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "A-token"}
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "B-token"}

// First fetch (Client A, MinIndex = 0) should return immediately
fetchCh := TestFetchCh(t, typ, opts, reqA)
var certA *structs.IssuedCert
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1,
}, result)
certA = result.(cache.FetchResult).Value.(*structs.IssuedCert)
}

// Second fetch (Client B, MinIndex = 0) should return immediately
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 2,
}, result)
// Different tokens should result in different certs. Note that we don't
// actually generate and sign real certs in this test with our mock RPC but
// this is enough to be sure we actually generated a different Private Key
// for each one and aren't just differnt due to index values.
require.NotEqual(certA.PrivateKeyPEM,
result.(cache.FetchResult).Value.(*structs.IssuedCert).PrivateKeyPEM)
}

// Third fetch (Client A, MinIndex = > 0) should block
opts.MinIndex = 2
fetchCh = TestFetchCh(t, typ, opts, reqA)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}

// Fourth fetch (Client B, MinIndex = > 0) should block
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
}

// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
// that subsequent clients get it returned immediately and don't block until it
// expires or their request times out. Note that typically Fetches at this level
// are de-duped by the cache higher up, the test above explicitly tests the case
// where two clients with different tokens request the same cert. However two
// clients sharing a token _may_ share the certificate, but the cachetype should
// not implicitly depend on the cache mechanism de-duping these clients.
//
// Genrally we _shouldn't_ rely on implementation details in the cache package
// about partitioning to behave correctly as that is likely to lead to subtle
// errors later when the implementation there changes, so this test ensures that
// even if the cache for some reason decides to not share an existing cache
// entry with a second client despite using the same token, that we don't block
// it's initial request assuming that it's already recieved the in-memory and
// still valid cert.
func TestConnectCALeaf_multipleClientsSameToken(t *testing.T) {
t.Parallel()

require := require.New(t)
rpc := TestRPC(t)
defer rpc.AssertExpectations(t)

typ, rootsCh := testCALeafType(t, rpc)
defer close(rootsCh)
rootsCh <- structs.IndexedCARoots{
ActiveRootID: "1",
TrustDomain: "fake-trust-domain.consul",
QueryMeta: structs.QueryMeta{Index: 1},
}

// Instrument ConnectCA.Sign to
var resp *structs.IssuedCert
var idx uint64
rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
Run(func(args mock.Arguments) {
reply := args.Get(2).(*structs.IssuedCert)
reply.CreateIndex = atomic.AddUint64(&idx, 1)
reply.ModifyIndex = reply.CreateIndex
reply.ValidBefore = time.Now().Add(12 * time.Hour)
resp = reply
})

// We'll reuse the fetch options and request
opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}

// First fetch (Client A, MinIndex = 0) should return immediately
fetchCh := TestFetchCh(t, typ, opts, reqA)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1,
}, result)
}

// Second fetch (Client B, MinIndex = 0) should return immediately
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case <-time.After(100 * time.Millisecond):
t.Fatal("shouldn't block waiting for fetch")
case result := <-fetchCh:
require.Equal(cache.FetchResult{
Value: resp,
Index: 1, // Same result as last fetch
}, result)
}

// Third fetch (Client A, MinIndex = > 0) should block
opts.MinIndex = 1
fetchCh = TestFetchCh(t, typ, opts, reqA)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}

// Fourth fetch (Client B, MinIndex = > 0) should block
fetchCh = TestFetchCh(t, typ, opts, reqB)
select {
case result := <-fetchCh:
t.Fatalf("should not return: %#v", result)
case <-time.After(100 * time.Millisecond):
}
}

// testCALeafType returns a *ConnectCALeaf that is pre-configured to
// use the given RPC implementation for "ConnectCA.Sign" operations.
func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) {
Expand Down

0 comments on commit a99b7be

Please sign in to comment.