Skip to content

Commit

Permalink
prevent races when reloading, fully shut down raft
Browse files Browse the repository at this point in the history
  • Loading branch information
chelseakomlo committed Nov 28, 2017
1 parent eb75cb2 commit 1288b53
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 26 deletions.
3 changes: 3 additions & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,9 @@ func (a *Agent) Reload(newConfig *Config) error {

// GetConfigCopy creates a replica of the agent's config, excluding locks
func (a *Agent) GetConfig() *Config {
a.configLock.Lock()
defer a.configLock.Unlock()

return a.config
}

Expand Down
15 changes: 0 additions & 15 deletions nomad/raft_rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,3 @@ func (l *RaftLayer) Dial(address raft.ServerAddress, timeout time.Duration) (net
}
return conn, err
}

// ReloadTLS will re-initialize the TLS wrapper on the fly
func (l *RaftLayer) ReloadTLS(tlsWrap tlsutil.Wrapper) {
l.closeLock.Lock()
defer l.closeLock.Unlock()

if !l.closed {
l.closed = true
close(l.closeCh)
}

l.tlsWrap = tlsWrap
l.closeCh = make(chan struct{})
l.closed = false
}
38 changes: 27 additions & 11 deletions nomad/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,21 +99,28 @@ type Server struct {
leaderCh <-chan bool
raft *raft.Raft
raftLayer *RaftLayer
raftStore *raftboltdb.BoltStore
raftInmem *raft.InmemStore
raftTransport *raft.NetworkTransport
raftLayerLock sync.Mutex

raftStore *raftboltdb.BoltStore
raftInmem *raft.InmemStore

raftTransport *raft.NetworkTransport
raftTransportLock sync.Mutex

// fsm is the state machine used with Raft
fsm *nomadFSM

// rpcListener is used to listen for incoming connections
rpcListener net.Listener
rpcListener net.Listener
rpcListenerLock sync.Mutex

rpcServer *rpc.Server
rpcAdvertise net.Addr

// rpcTLS is the TLS config for incoming TLS requests
rpcTLS *tls.Config
rpcCancel context.CancelFunc
rpcTLS *tls.Config
rpcCancel context.CancelFunc
rpcTLSLock sync.Mutex

// peers is used to track the known Nomad servers. This is
// used for region forwarding and clustering.
Expand Down Expand Up @@ -365,9 +372,8 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error {
s.logger.Printf("[INFO] nomad: reloading server connections due to configuration changes")

s.configLock.Lock()
defer s.configLock.Unlock()

s.config.TLSConfig = newTLSConfig
s.configLock.Unlock()

var tlsWrap tlsutil.RegionWrapper
var incomingTLS *tls.Config
Expand All @@ -390,32 +396,42 @@ func (s *Server) ReloadTLSConnections(newTLSConfig *config.TLSConfig) error {
s.logger.Printf("[ERR] nomad: No TLS Context to reset")
return fmt.Errorf("Unable to reset tls context")
}
s.rpcCancel()

s.rpcTLSLock.Lock()
s.rpcTLS = incomingTLS
s.rpcTLSLock.Unlock()

s.rpcCancel()
s.raftTransportLock.Lock()
defer s.raftTransportLock.Unlock()
s.raftTransport.Close()
s.raftLayer.Close()

s.connPool.ReloadTLS(tlsWrap)

// reinitialize our rpc listener
s.rpcListenerLock.Lock()
s.rpcListener.Close()
time.Sleep(500 * time.Millisecond)
list, err := net.ListenTCP("tcp", s.config.RPCAddr)
if err != nil || list == nil {
s.logger.Printf("[ERR] nomad: No TLS listener to reload")
return err
}

s.rpcListener = list

// reinitialize the cancel context
ctx, cancel := context.WithCancel(context.Background())
s.rpcCancel = cancel
s.rpcListenerLock.Unlock()

go s.listen(ctx)

s.raftLayerLock.Lock()
s.raftLayer.Close()
wrapper := tlsutil.RegionSpecificWrapper(s.config.Region, tlsWrap)
s.raftLayer.ReloadTLS(wrapper)
s.raftLayer = NewRaftLayer(s.rpcAdvertise, wrapper)
s.raftLayerLock.Unlock()

// re-initialize the network transport with a re-initialized stream layer
trans := raft.NewNetworkTransport(s.raftLayer, 3, s.config.RaftTimeout,
Expand Down

0 comments on commit 1288b53

Please sign in to comment.