Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ACL Token Persistence and Reloading #5328

Merged
merged 8 commits into from
Feb 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agent/acl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ func TestACL_AgentMasterToken(t *testing.T) {
}

a := NewTestACLAgent(t.Name(), TestACLConfig(), resolveFn)
a.loadTokens(a.config)
authz, err := a.resolveToken("towel")
require.NotNil(t, authz)
require.Nil(t, err)
Expand Down
109 changes: 102 additions & 7 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ const (
checksDir = "checks"
checkStateDir = "checks/state"

// Name of the file tokens will be persisted within
tokensPath = "acl-tokens.json"

// Default reasons for node/service maintenance mode
defaultNodeMaintReason = "Maintenance mode is enabled for this node, " +
"but no reason was provided. This is a default message."
Expand Down Expand Up @@ -252,6 +255,11 @@ type Agent struct {
grpcServer *grpc.Server

tlsConfigurator *tlsutil.Configurator

// persistedTokensLock is used to synchronize access to the persisted token
// store within the data directory. This will prevent loading while writing as
// well as multiple concurrent writes.
persistedTokensLock sync.RWMutex
}

func New(c *config.RuntimeConfig) (*Agent, error) {
Expand Down Expand Up @@ -286,12 +294,6 @@ func New(c *config.RuntimeConfig) (*Agent, error) {
return nil, err
}

// Set up the initial state of the token store based on the config.
a.tokens.UpdateUserToken(a.config.ACLToken)
a.tokens.UpdateAgentToken(a.config.ACLAgentToken)
a.tokens.UpdateAgentMasterToken(a.config.ACLAgentMasterToken)
a.tokens.UpdateACLReplicationToken(a.config.ACLReplicationToken)

return a, nil
}

Expand Down Expand Up @@ -365,6 +367,10 @@ func (a *Agent) Start() error {
"1 and 63 bytes.", a.config.NodeName)
}

// load the tokens - this requires the logger to be setup
// which is why we can't do this in New
a.loadTokens(a.config)

// create the local state
a.State = local.NewState(LocalConfig(c), a.logger, a.tokens)

Expand Down Expand Up @@ -588,6 +594,7 @@ func (a *Agent) listenAndServeDNS() error {
select {
case addr := <-notif:
a.logger.Printf("[INFO] agent: Started DNS server %s (%s)", addr.String(), addr.Network())

case err := <-errCh:
merr = multierror.Append(merr, err)
case <-timeout:
Expand Down Expand Up @@ -1070,7 +1077,6 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
// Copy the Connect CA bootstrap config
if a.config.ConnectEnabled {
base.ConnectEnabled = true
base.ConnectReplicationToken = a.config.ConnectReplicationToken

// Allow config to specify cluster_id provided it's a valid UUID. This is
// meant only for tests where a deterministic ID makes fixtures much simpler
Expand Down Expand Up @@ -3207,6 +3213,90 @@ func (a *Agent) loadProxies(conf *config.RuntimeConfig) error {
return persistenceErr
}

type persistedTokens struct {
Replication string `json:"replication,omitempty"`
AgentMaster string `json:"agent_master,omitempty"`
Default string `json:"default,omitempty"`
Agent string `json:"agent,omitempty"`
}

func (a *Agent) getPersistedTokens() (*persistedTokens, error) {
persistedTokens := &persistedTokens{}
if !a.config.ACLEnableTokenPersistence {
return persistedTokens, nil
}

a.persistedTokensLock.RLock()
defer a.persistedTokensLock.RUnlock()

tokensFullPath := filepath.Join(a.config.DataDir, tokensPath)

buf, err := ioutil.ReadFile(tokensFullPath)
if err != nil {
if os.IsNotExist(err) {
// non-existence is not an error we care about
return persistedTokens, nil
}
return persistedTokens, fmt.Errorf("failed reading tokens file %q: %s", tokensFullPath, err)
}

if err := json.Unmarshal(buf, persistedTokens); err != nil {
return persistedTokens, fmt.Errorf("failed to decode tokens file %q: %s", tokensFullPath, err)
}

return persistedTokens, nil
}

func (a *Agent) loadTokens(conf *config.RuntimeConfig) error {
persistedTokens, persistenceErr := a.getPersistedTokens()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not hard-fail if the file is unreadable or corrupt? It seems like an unlikely event that should warrant extraordinary operational attention.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it could be a hard fail. I modeled this off of other load* functions which did similarly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should a corrupt file prevent the agent from starting at all? Thats what would happen with a hard fail.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should corrupt persisted services prevent the agent from starting up?. I think the answer is no and we should instead just log the warning.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we go for worst case scenario, the agent isn't going to be terribly useful on restart if the sole store of tokens is that file, it's corrupt, and the operator is has a full belt-and-suspenders ACL setup configured.

Sure the agent will start, but it won't have any acl tokens with which to operate. If you had a fleet of hundreds of agents how can you tell the difference between an operational one and one that starts up but has no valid tokens?

But then I guess this is nearly indistinguishable from an agent having all of its acl tokens be rotated into the bit bucket already today.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but with the agent up you can remedy the situation by PUTing new tokens to the API. Which is better than needing to, remote in, blow away/fix the corrupt file and restart (maybe still need to repost up tokens depending on the corruption).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well if you were being secure about it, the API would only be bound to loopback anyway, so remoting in is still required.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree that the only secure way to enable the API is bound to loopback. If using HTTPs + ACLs I would allow it on the LAN.

Additionally, even if not able to directly connect to the API I would say that doing a consul acl set-agent-token ... is much easier/reliable then having to hand edit files (which is still always a viable alternative).


if persistenceErr != nil {
a.logger.Printf("[WARN] unable to load persisted tokens: %v", persistenceErr)
}

if persistedTokens.Default != "" {
a.tokens.UpdateUserToken(persistedTokens.Default, token.TokenSourceAPI)

if conf.ACLToken != "" {
a.logger.Printf("[WARN] \"default\" token present in both the configuration and persisted token store, using the persisted token")
}
} else {
a.tokens.UpdateUserToken(conf.ACLToken, token.TokenSourceConfig)
}

if persistedTokens.Agent != "" {
a.tokens.UpdateAgentToken(persistedTokens.Agent, token.TokenSourceAPI)

if conf.ACLAgentToken != "" {
a.logger.Printf("[WARN] \"agent\" token present in both the configuration and persisted token store, using the persisted token")
}
} else {
a.tokens.UpdateAgentToken(conf.ACLAgentToken, token.TokenSourceConfig)
}

if persistedTokens.AgentMaster != "" {
a.tokens.UpdateAgentMasterToken(persistedTokens.AgentMaster, token.TokenSourceAPI)

if conf.ACLAgentMasterToken != "" {
a.logger.Printf("[WARN] \"agent_master\" token present in both the configuration and persisted token store, using the persisted token")
}
} else {
a.tokens.UpdateAgentMasterToken(conf.ACLAgentMasterToken, token.TokenSourceConfig)
}

if persistedTokens.Replication != "" {
a.tokens.UpdateReplicationToken(persistedTokens.Replication, token.TokenSourceAPI)

if conf.ACLReplicationToken != "" {
a.logger.Printf("[WARN] \"replication\" token present in both the configuration and persisted token store, using the persisted token")
}
} else {
a.tokens.UpdateReplicationToken(conf.ACLReplicationToken, token.TokenSourceConfig)
}

return persistenceErr
}

// unloadProxies will deregister all proxies known to the local agent.
func (a *Agent) unloadProxies() error {
a.proxyLock.Lock()
Expand Down Expand Up @@ -3371,6 +3461,11 @@ func (a *Agent) ReloadConfig(newCfg *config.RuntimeConfig) error {
}
a.unloadMetadata()

// Reload tokens - should be done before all the other loading
// to ensure the correct tokens are available for attaching to
// the checks and service registrations.
a.loadTokens(newCfg)

// Reload service/check definitions and metadata.
if err := a.loadServices(newCfg); err != nil {
return fmt.Errorf("Failed reloading services: %s", err)
Expand Down
66 changes: 55 additions & 11 deletions agent/agent_endpoint.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package agent

import (
"encoding/json"
"errors"
"fmt"
"log"
"net"
"net/http"
"path/filepath"
"strconv"
"strings"
"time"
Expand All @@ -22,9 +24,11 @@ import (
"github.com/hashicorp/consul/agent/debug"
"github.com/hashicorp/consul/agent/local"
"github.com/hashicorp/consul/agent/structs"
token_store "github.com/hashicorp/consul/agent/token"
"github.com/hashicorp/consul/api"
"github.com/hashicorp/consul/ipaddr"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/consul/lib/file"
"github.com/hashicorp/consul/logger"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/logutils"
Expand Down Expand Up @@ -1262,30 +1266,70 @@ func (s *HTTPServer) AgentToken(resp http.ResponseWriter, req *http.Request) (in
return nil, nil
}

if s.agent.config.ACLEnableTokenPersistence {
// we hold the lock around updating the internal token store
// as well as persisting the tokens because we don't want to write
// into the store to have something else wipe it out before we can
// persist everything (like an agent config reload). The token store
// lock is only held for those operations so other go routines that
// just need to read some token out of the store will not be impacted
// any more than they would be without token persistence.
s.agent.persistedTokensLock.Lock()
defer s.agent.persistedTokensLock.Unlock()
}

// Figure out the target token.
target := strings.TrimPrefix(req.URL.Path, "/v1/agent/token/")
switch target {
case "acl_token":
s.agent.tokens.UpdateUserToken(args.Token)

case "acl_agent_token":
s.agent.tokens.UpdateAgentToken(args.Token)
case "acl_token", "default":
s.agent.tokens.UpdateUserToken(args.Token, token_store.TokenSourceAPI)

case "acl_agent_master_token":
s.agent.tokens.UpdateAgentMasterToken(args.Token)
case "acl_agent_token", "agent":
s.agent.tokens.UpdateAgentToken(args.Token, token_store.TokenSourceAPI)

case "acl_replication_token":
s.agent.tokens.UpdateACLReplicationToken(args.Token)
case "acl_agent_master_token", "agent_master":
s.agent.tokens.UpdateAgentMasterToken(args.Token, token_store.TokenSourceAPI)

case "connect_replication_token":
s.agent.tokens.UpdateConnectReplicationToken(args.Token)
case "acl_replication_token", "replication":
s.agent.tokens.UpdateReplicationToken(args.Token, token_store.TokenSourceAPI)

default:
resp.WriteHeader(http.StatusNotFound)
fmt.Fprintf(resp, "Token %q is unknown", target)
return nil, nil
}

if s.agent.config.ACLEnableTokenPersistence {
tokens := persistedTokens{}

if tok, source := s.agent.tokens.UserTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Default = tok
}

if tok, source := s.agent.tokens.AgentTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Agent = tok
}

if tok, source := s.agent.tokens.AgentMasterTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.AgentMaster = tok
}

if tok, source := s.agent.tokens.ReplicationTokenAndSource(); tok != "" && source == token_store.TokenSourceAPI {
tokens.Replication = tok
}

data, err := json.Marshal(tokens)
if err != nil {
s.agent.logger.Printf("[WARN] agent: failed to persist tokens - %v", err)
return nil, fmt.Errorf("Failed to marshal tokens for persistence: %v", err)
}

if err := file.WriteAtomicWithPerms(filepath.Join(s.agent.config.DataDir, tokensPath), data, 0600); err != nil {
s.agent.logger.Printf("[WARN] agent: failed to persist tokens - %v", err)
mkeeler marked this conversation as resolved.
Show resolved Hide resolved
return nil, fmt.Errorf("Failed to persist tokens - %v", err)
}
}

s.agent.logger.Printf("[INFO] agent: Updated agent's ACL token %q", target)
return nil, nil
}
Expand Down
Loading