Skip to content

Commit

Permalink
server: fix ReadyFn when waitForInit==true
Browse files Browse the repository at this point in the history
ReadyFn was never called with waitForInit set to true since we were
already past blocking when we'd initiate the call.

Caught by `pkg/cli/interactive_tests/test_init_command.tcl`.

Release note: None
  • Loading branch information
tbg committed Apr 4, 2020
1 parent 25e3426 commit e0fbd35
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 31 deletions.
4 changes: 2 additions & 2 deletions pkg/cli/demo_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func setupTransientCluster(

// We force a wait for all servers until they are ready.
servReadyFnCh := make(chan struct{})
serv.Cfg.ReadyFn = func(_ bool) {
serv.Cfg.ReadyFn = func(bool) {
close(servReadyFnCh)
}

Expand Down Expand Up @@ -413,7 +413,7 @@ func (c *transientCluster) RestartNode(nodeID roachpb.NodeID) error {

// We want to only return after the server is ready.
readyCh := make(chan struct{})
serv.Cfg.ReadyFn = func(_ bool) {
serv.Cfg.ReadyFn = func(bool) {
close(readyCh)
}

Expand Down
9 changes: 4 additions & 5 deletions pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,11 +258,10 @@ type Config struct {

// ReadyFn is called when the server has started listening on its
// sockets.
// The argument waitForInit indicates (iff true) that the
// server is not bootstrapped yet, will not bootstrap itself and
// will be waiting for an `init` command or accept bootstrapping
// from a joined node. It is set in an advisory fashion, that is,
// should be used for logging output only.
//
// The bool parameter is true if the server is not bootstrapped yet, will not
// bootstrap itself and will be waiting for an `init` command or accept
// bootstrapping from a joined node. Must not block.
ReadyFn func(waitForInit bool)

// DelayedBootstrapFn is called if the boostrap process does not complete
Expand Down
51 changes: 27 additions & 24 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1494,10 +1494,37 @@ func (s *Server) Start(ctx context.Context) error {
// This opens the main listener.
startRPCServer(workersCtx)

// The start cli command wants to print helpful information if it looks as
// though this node didn't immediately manage to connect to the cluster.
// This isn't the prettiest way to achieve this, but it gets the job done.
//
//
// TODO(tbg): we should avoid this when the node is bootstrapped.
// Unfortunately this knowledge is nicely encapsulated away in ServeAndWait.
// Perhaps that method should be split up.
haveStateCh := make(chan struct{})
if s.cfg.ReadyFn != nil {
_ = s.stopper.RunAsyncTask(ctx, "signal-readiness", func(ctx context.Context) {
waitForInit := true
tm := time.After(2 * time.Second)
select {
case <-haveStateCh:
waitForInit = false
case <-tm:
case <-ctx.Done():
return
case <-s.stopper.ShouldQuiesce():
return
}
s.cfg.ReadyFn(waitForInit)
})
}

state, err := s.initServer.ServeAndWait(ctx, s.stopper, s.engines, s.gossip)
if err != nil {
return errors.Wrap(err, "during init")
}
close(haveStateCh)

s.rpcContext.ClusterID.Set(ctx, state.clusterID)
// If there's no NodeID here, then we didn't just bootstrap. The Node will
Expand Down Expand Up @@ -1576,30 +1603,6 @@ func (s *Server) Start(ctx context.Context) error {
)
}

ready := make(chan struct{})
if s.cfg.ReadyFn != nil {
// s.cfg.ReadyFn must be called in any case because the `start`
// command requires it to signal readiness to a process manager.
//
// However we want to be somewhat precisely informative to the user
// about whether the node is waiting on init / join, or whether
// the join was successful straight away. So we spawn this goroutine
// and either:
// - its timer will fire after 2 seconds and we call ReadyFn(true)
// - bootstrap completes earlier and the ready chan gets closed,
// then we call ReadyFn(false).
go func() {
waitForInit := false
tm := time.After(2 * time.Second)
select {
case <-tm:
waitForInit = true
case <-ready:
}
s.cfg.ReadyFn(waitForInit)
}()
}

// Record a walltime that is lower than the lowest hlc timestamp this current
// instance of the node can use. We do not use startTime because it is lower
// than the timestamp used to create the bootstrap schema.
Expand Down

0 comments on commit e0fbd35

Please sign in to comment.