Skip to content

Commit

Permalink
Use agent token for service/check deregistration during anti-entropy (#…
Browse files Browse the repository at this point in the history
…16097)

Use only the agent token for deregistration during anti-entropy

The previous behavior had the agent attempt to use the "service" token
(i.e. from the `token` field in a service definition file), and if that
was not set then it would use the agent token.

The previous behavior was problematic because, if the service token had
been deleted, the deregistration request would fail. The agent would
retry the deregistration during each anti-entropy sync, and the
situation would never resolve.

The new behavior is to only/always use the agent token for service and
check deregistration during anti-entropy. This approach is:

* Simpler: No fallback logic to try different tokens
* Faster (slightly): No time spent attempting the service token
* Correct: The agent token is able to deregister services on that
  agent's node, because:
  * node:write permissions allow deregistration of services/checks on
    that node.
  * The agent token must have node:write permission, or else the agent
    is not be able to (de)register itself into the catalog

Co-authored-by: Vesa Hagström <weeezes@gmail.com>
  • Loading branch information
Paul Glass and weeezes authored Feb 3, 2023
1 parent e40b731 commit a884d0d
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 9 deletions.
3 changes: 3 additions & 0 deletions .changelog/16097.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug-fix
agent: Only use the `agent` token for internal deregistration of checks and services during anti-entropy syncing. The service token specified in the `token` field of the check or service definition is no longer used for deregistration. This fixes an issue where the agent failed to ever deregister a service or check if the service token was deleted.
```
11 changes: 9 additions & 2 deletions agent/local/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -1293,7 +1293,12 @@ func (l *State) deleteService(key structs.ServiceID) error {
return fmt.Errorf("ServiceID missing")
}

st := l.aclTokenForServiceSync(key, l.tokens.AgentToken)
// Always use the agent token to delete without trying the service token.
// This works because the agent token really must have node:write
// permission and node:write allows deregistration of services/checks on
// that node. Because the service token may have been deleted, using the
// agent token without fallback logic is a bit faster, simpler, and safer.
st := l.tokens.AgentToken()
req := structs.DeregisterRequest{
Datacenter: l.config.Datacenter,
Node: l.config.NodeName,
Expand Down Expand Up @@ -1344,7 +1349,9 @@ func (l *State) deleteCheck(key structs.CheckID) error {
return fmt.Errorf("CheckID missing")
}

ct := l.aclTokenForCheckSync(key, l.tokens.AgentToken)
// Always use the agent token for deletion. Refer to deleteService() for
// an explanation.
ct := l.tokens.AgentToken()
req := structs.DeregisterRequest{
Datacenter: l.config.Datacenter,
Node: l.config.NodeName,
Expand Down
41 changes: 34 additions & 7 deletions agent/local/state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -829,10 +829,6 @@ func TestAgentAntiEntropy_Services_WithChecks(t *testing.T) {
}

var testRegisterRules = `
node "" {
policy = "write"
}
service "api" {
policy = "write"
}
Expand Down Expand Up @@ -863,6 +859,9 @@ func TestAgentAntiEntropy_Services_ACLDeny(t *testing.T) {
defer a.Shutdown()
testrpc.WaitForLeader(t, a.RPC, "dc1")

// The agent token is the only token used for deleteService.
setAgentToken(t, a)

token := createToken(t, a, testRegisterRules)

// Create service (disallowed)
Expand Down Expand Up @@ -1153,15 +1152,19 @@ type RPC interface {
func createToken(t *testing.T, rpc RPC, policyRules string) string {
t.Helper()

uniqueId, err := uuid.GenerateUUID()
require.NoError(t, err)
policyName := "the-policy-" + uniqueId

reqPolicy := structs.ACLPolicySetRequest{
Datacenter: "dc1",
Policy: structs.ACLPolicy{
Name: "the-policy",
Name: policyName,
Rules: policyRules,
},
WriteRequest: structs.WriteRequest{Token: "root"},
}
err := rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
err = rpc.RPC(context.Background(), "ACL.PolicySet", &reqPolicy, &structs.ACLPolicy{})
require.NoError(t, err)

token, err := uuid.GenerateUUID()
Expand All @@ -1171,7 +1174,7 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
Datacenter: "dc1",
ACLToken: structs.ACLToken{
SecretID: token,
Policies: []structs.ACLTokenPolicyLink{{Name: "the-policy"}},
Policies: []structs.ACLTokenPolicyLink{{Name: policyName}},
},
WriteRequest: structs.WriteRequest{Token: "root"},
}
Expand All @@ -1180,6 +1183,27 @@ func createToken(t *testing.T, rpc RPC, policyRules string) string {
return token
}

// setAgentToken sets the 'agent' token for this agent. It creates a new token
// with node:write for the agent's node name, and service:write for any
// service.
func setAgentToken(t *testing.T, a *agent.TestAgent) {
var policy = fmt.Sprintf(`
node "%s" {
policy = "write"
}
service_prefix "" {
policy = "read"
}
`, a.Config.NodeName)

token := createToken(t, a, policy)

_, err := a.Client().Agent().UpdateAgentACLToken(token, &api.WriteOptions{Token: "root"})
if err != nil {
t.Fatalf("setting agent token: %v", err)
}
}

func TestAgentAntiEntropy_Checks(t *testing.T) {
if testing.Short() {
t.Skip("too slow for testing.Short")
Expand Down Expand Up @@ -1481,6 +1505,9 @@ func TestAgentAntiEntropy_Checks_ACLDeny(t *testing.T) {

testrpc.WaitForLeader(t, a.RPC, dc)

// The agent token is the only token used for deleteCheck.
setAgentToken(t, a)

token := createToken(t, a, testRegisterRules)

// Create services using the root token
Expand Down

0 comments on commit a884d0d

Please sign in to comment.