diff --git a/agent/cache-types/connect_ca_leaf.go b/agent/cache-types/connect_ca_leaf.go
index 6fc88a847714..b85beb4c2632 100644
--- a/agent/cache-types/connect_ca_leaf.go
+++ b/agent/cache-types/connect_ca_leaf.go
@@ -1,6 +1,7 @@
 package cachetype
 
 import (
+	"crypto/sha256"
 	"errors"
 	"fmt"
 	"sync"
@@ -27,6 +28,19 @@ type ConnectCALeaf struct {
 	Cache *cache.Cache // Cache that has CA root certs via ConnectCARoot
 }
 
+// issuedKey returns the issuedCerts cache key for a given service and token. We
+// use a hash rather than concatenating strings to provide resilience against
+// user input containing our separator - both service name and token ID can be
+// freely manipulated by user so may contain any delimiter we choose. It also
+// has the benefit of not leaking the ACL token to a new place in memory it
+// might get accidentally dumped etc.
+func issuedKey(service, token string) string {
+	hash := sha256.New()
+	hash.Write([]byte(service))
+	hash.Write([]byte(token))
+	return fmt.Sprintf("%x", hash.Sum(nil))
+}
+
 func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache.FetchResult, error) {
 	var result cache.FetchResult
 
@@ -48,11 +62,15 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
 	newRootCACh := make(chan error, 1)
 	go c.waitNewRootCA(reqReal.Datacenter, newRootCACh, opts.Timeout)
 
+	// Generate a cache key to lookup/store the cert. We MUST generate a new cert
+	// per token used to ensure revocation by ACL token is robust.
+	issuedKey := issuedKey(reqReal.Service, reqReal.Token)
+
 	// Get our prior cert (if we had one) and use that to determine our
 	// expiration time. If no cert exists, we expire immediately since we
 	// need to generate.
 	c.issuedCertsLock.RLock()
-	lastCert := c.issuedCerts[reqReal.Service]
+	lastCert := c.issuedCerts[issuedKey]
 	c.issuedCertsLock.RUnlock()
 
 	var leafExpiryCh <-chan time.Time
@@ -62,6 +80,19 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
 		if expiryDur := lastCert.ValidBefore.Sub(time.Now()); expiryDur > 0 {
 			leafExpiryCh = time.After(expiryDur - 1*time.Hour)
 			// TODO(mitchellh): 1 hour buffer is hardcoded above
+
+			// We should not depend on the cache package de-duplicating requests for
+			// the same service/token (which is all we care about keying our local
+			// issued cert cache on) since it might later make sense to partition
+			// clients for other reasons too. So if the request has a 0 MinIndex, and
+			// the cached cert is still valid, then the client is expecting an
+			// immediate response and hasn't already seen the cached cert, return it
+			// now.
+			if opts.MinIndex == 0 {
+				result.Value = lastCert
+				result.Index = lastCert.ModifyIndex
+				return result, nil
+			}
 		}
 	}
 
@@ -149,13 +180,13 @@ func (c *ConnectCALeaf) Fetch(opts cache.FetchOptions, req cache.Request) (cache
 	// check just in case.
 	c.issuedCertsLock.Lock()
 	defer c.issuedCertsLock.Unlock()
-	lastCert = c.issuedCerts[reqReal.Service]
+	lastCert = c.issuedCerts[issuedKey]
 	if lastCert == nil || lastCert.ModifyIndex < reply.ModifyIndex {
 		if c.issuedCerts == nil {
 			c.issuedCerts = make(map[string]*structs.IssuedCert)
 		}
 
-		c.issuedCerts[reqReal.Service] = &reply
+		c.issuedCerts[issuedKey] = &reply
 		lastCert = &reply
 	}
 
diff --git a/agent/cache-types/connect_ca_leaf_test.go b/agent/cache-types/connect_ca_leaf_test.go
index d55caf408dce..c52c8ce02009 100644
--- a/agent/cache-types/connect_ca_leaf_test.go
+++ b/agent/cache-types/connect_ca_leaf_test.go
@@ -58,6 +58,7 @@ func TestConnectCALeaf_changingRoots(t *testing.T) {
 	}
 
 	// Second fetch should block with set index
+	opts.MinIndex = 1
 	fetchCh = TestFetchCh(t, typ, opts, req)
 	select {
 	case result := <-fetchCh:
@@ -156,6 +157,7 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
 
 	// Third fetch should block since the cert is not expiring and
 	// we also didn't update CA certs.
+	opts.MinIndex = 2
 	fetchCh = TestFetchCh(t, typ, opts, req)
 	select {
 	case result := <-fetchCh:
@@ -164,6 +166,188 @@ func TestConnectCALeaf_expiringLeaf(t *testing.T) {
 	}
 }
 
+// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
+// that subsequent clients get it returned immediately and don't block until it
+// expires or their request times out. Note that typically FEtches at this level
+// are de-duped by the cache higher up, but if the two clients are using
+// different ACL tokens for example (common) that may not be the case, and we
+// should wtill deliver correct blocking semantics to both.
+//
+// Additionally, we want to make sure that clients with different tokens
+// generate distinct certs since we might later want to revoke all certs fetched
+// with a given token but can't if a client using that token was served a cert
+// generated under a different token (say the agent token).
+func TestConnectCALeaf_multipleClientsDifferentTokens(t *testing.T) {
+	t.Parallel()
+
+	require := require.New(t)
+	rpc := TestRPC(t)
+	defer rpc.AssertExpectations(t)
+
+	typ, rootsCh := testCALeafType(t, rpc)
+	defer close(rootsCh)
+	rootsCh <- structs.IndexedCARoots{
+		ActiveRootID: "1",
+		TrustDomain:  "fake-trust-domain.consul",
+		QueryMeta:    structs.QueryMeta{Index: 1},
+	}
+
+	// Instrument ConnectCA.Sign to
+	var resp *structs.IssuedCert
+	var idx uint64
+	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
+		Run(func(args mock.Arguments) {
+			reply := args.Get(2).(*structs.IssuedCert)
+			reply.CreateIndex = atomic.AddUint64(&idx, 1)
+			reply.ModifyIndex = reply.CreateIndex
+			reply.ValidBefore = time.Now().Add(12 * time.Hour)
+			resp = reply
+		})
+
+	// We'll reuse the fetch options and request
+	opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
+	reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "A-token"}
+	reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "B-token"}
+
+	// First fetch (Client A, MinIndex = 0) should return immediately
+	fetchCh := TestFetchCh(t, typ, opts, reqA)
+	var certA *structs.IssuedCert
+	select {
+	case <-time.After(100 * time.Millisecond):
+		t.Fatal("shouldn't block waiting for fetch")
+	case result := <-fetchCh:
+		require.Equal(cache.FetchResult{
+			Value: resp,
+			Index: 1,
+		}, result)
+		certA = result.(cache.FetchResult).Value.(*structs.IssuedCert)
+	}
+
+	// Second fetch (Client B, MinIndex = 0) should return immediately
+	fetchCh = TestFetchCh(t, typ, opts, reqB)
+	select {
+	case <-time.After(100 * time.Millisecond):
+		t.Fatal("shouldn't block waiting for fetch")
+	case result := <-fetchCh:
+		require.Equal(cache.FetchResult{
+			Value: resp,
+			Index: 2,
+		}, result)
+		// Different tokens should result in different certs. Note that we don't
+		// actually generate and sign real certs in this test with our mock RPC but
+		// this is enough to be sure we actually generated a different Private Key
+		// for each one and aren't just differnt due to index values.
+		require.NotEqual(certA.PrivateKeyPEM,
+			result.(cache.FetchResult).Value.(*structs.IssuedCert).PrivateKeyPEM)
+	}
+
+	// Third fetch (Client A, MinIndex = > 0) should block
+	opts.MinIndex = 2
+	fetchCh = TestFetchCh(t, typ, opts, reqA)
+	select {
+	case result := <-fetchCh:
+		t.Fatalf("should not return: %#v", result)
+	case <-time.After(100 * time.Millisecond):
+	}
+
+	// Fourth fetch (Client B, MinIndex = > 0) should block
+	fetchCh = TestFetchCh(t, typ, opts, reqB)
+	select {
+	case result := <-fetchCh:
+		t.Fatalf("should not return: %#v", result)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
+// Test that once one client (e.g. the proxycfg.Manager) has fetched a cert,
+// that subsequent clients get it returned immediately and don't block until it
+// expires or their request times out. Note that typically Fetches at this level
+// are de-duped by the cache higher up, the test above explicitly tests the case
+// where two clients with different tokens request the same cert. However two
+// clients sharing a token _may_ share the certificate, but the cachetype should
+// not implicitly depend on the cache mechanism de-duping these clients.
+//
+// Genrally we _shouldn't_ rely on implementation details in the cache package
+// about partitioning to behave correctly as that is likely to lead to subtle
+// errors later when the implementation there changes, so this test ensures that
+// even if the cache for some reason decides to not share an existing cache
+// entry with a second client despite using the same token, that we don't block
+// it's initial request assuming that it's already recieved the in-memory and
+// still valid cert.
+func TestConnectCALeaf_multipleClientsSameToken(t *testing.T) {
+	t.Parallel()
+
+	require := require.New(t)
+	rpc := TestRPC(t)
+	defer rpc.AssertExpectations(t)
+
+	typ, rootsCh := testCALeafType(t, rpc)
+	defer close(rootsCh)
+	rootsCh <- structs.IndexedCARoots{
+		ActiveRootID: "1",
+		TrustDomain:  "fake-trust-domain.consul",
+		QueryMeta:    structs.QueryMeta{Index: 1},
+	}
+
+	// Instrument ConnectCA.Sign to
+	var resp *structs.IssuedCert
+	var idx uint64
+	rpc.On("RPC", "ConnectCA.Sign", mock.Anything, mock.Anything).Return(nil).
+		Run(func(args mock.Arguments) {
+			reply := args.Get(2).(*structs.IssuedCert)
+			reply.CreateIndex = atomic.AddUint64(&idx, 1)
+			reply.ModifyIndex = reply.CreateIndex
+			reply.ValidBefore = time.Now().Add(12 * time.Hour)
+			resp = reply
+		})
+
+	// We'll reuse the fetch options and request
+	opts := cache.FetchOptions{MinIndex: 0, Timeout: 10 * time.Minute}
+	reqA := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
+	reqB := &ConnectCALeafRequest{Datacenter: "dc1", Service: "web", Token: "shared-token"}
+
+	// First fetch (Client A, MinIndex = 0) should return immediately
+	fetchCh := TestFetchCh(t, typ, opts, reqA)
+	select {
+	case <-time.After(100 * time.Millisecond):
+		t.Fatal("shouldn't block waiting for fetch")
+	case result := <-fetchCh:
+		require.Equal(cache.FetchResult{
+			Value: resp,
+			Index: 1,
+		}, result)
+	}
+
+	// Second fetch (Client B, MinIndex = 0) should return immediately
+	fetchCh = TestFetchCh(t, typ, opts, reqB)
+	select {
+	case <-time.After(100 * time.Millisecond):
+		t.Fatal("shouldn't block waiting for fetch")
+	case result := <-fetchCh:
+		require.Equal(cache.FetchResult{
+			Value: resp,
+			Index: 1, // Same result as last fetch
+		}, result)
+	}
+
+	// Third fetch (Client A, MinIndex = > 0) should block
+	opts.MinIndex = 1
+	fetchCh = TestFetchCh(t, typ, opts, reqA)
+	select {
+	case result := <-fetchCh:
+		t.Fatalf("should not return: %#v", result)
+	case <-time.After(100 * time.Millisecond):
+	}
+
+	// Fourth fetch (Client B, MinIndex = > 0) should block
+	fetchCh = TestFetchCh(t, typ, opts, reqB)
+	select {
+	case result := <-fetchCh:
+		t.Fatalf("should not return: %#v", result)
+	case <-time.After(100 * time.Millisecond):
+	}
+}
+
 // testCALeafType returns a *ConnectCALeaf that is pre-configured to
 // use the given RPC implementation for "ConnectCA.Sign" operations.
 func testCALeafType(t *testing.T, rpc RPC) (*ConnectCALeaf, chan structs.IndexedCARoots) {