From 7e1bec52b01ae8874a4807bfbf47fa6ea806b167 Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Wed, 3 Jun 2020 18:35:38 -0400 Subject: [PATCH] server,roachtest: remove leftover usage of `cockroach quit --decommission` Fixes #49635. This is fallout from #49350 that wasn't picked up by CI. Release note: None --- pkg/cmd/roachtest/autoupgrade.go | 8 ++--- pkg/cmd/roachtest/decommission.go | 58 +++++++++++++++---------------- pkg/server/admin.go | 6 ---- 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/pkg/cmd/roachtest/autoupgrade.go b/pkg/cmd/roachtest/autoupgrade.go index 7692ed103951..9ffb596c4772 100644 --- a/pkg/cmd/roachtest/autoupgrade.go +++ b/pkg/cmd/roachtest/autoupgrade.go @@ -77,12 +77,10 @@ func registerAutoUpgrade(r *testRegistry) { } decommissionAndStop := func(node int) error { - t.WorkerStatus("decomission") + t.WorkerStatus("decommission") port := fmt.Sprintf("{pgport:%d}", node) - // Note that the following command line needs to run against both v2.1 - // and the current branch. Do not change it in a manner that is - // incompatible with 2.1. - if err := c.RunE(ctx, c.Node(node), "./cockroach quit --decommission --insecure --port="+port); err != nil { + if err := c.RunE(ctx, c.Node(node), + fmt.Sprintf("./cockroach node decommission %d --insecure --port=%s", node, port)); err != nil { return err } t.WorkerStatus("stop") diff --git a/pkg/cmd/roachtest/decommission.go b/pkg/cmd/roachtest/decommission.go index dc1f24add56f..2b46c6b04639 100644 --- a/pkg/cmd/roachtest/decommission.go +++ b/pkg/cmd/roachtest/decommission.go @@ -382,9 +382,9 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { MaxBackoff: 5 * time.Second, Multiplier: 1, } - if err := retry.WithMaxAttempts(ctx, retryOpts, 20, func() error { + if err := retry.WithMaxAttempts(ctx, retryOpts, 50, func() error { o, err := decommission(ctx, 2, c.Node(1), - "decommission", "--wait", "none", "--format", "csv") + "decommission", "--wait=none", "--format=csv") if err != nil { t.Fatalf("decommission failed: %v", err) } @@ -403,7 +403,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { // Check that even though the node is decommissioned, we still see it (since // it remains live) in `node ls`. { - o, err := execCLI(ctx, t, c, 2, "node", "ls", "--format", "csv") + o, err := execCLI(ctx, t, c, 2, "node", "ls", "--format=csv") if err != nil { t.Fatalf("node-ls failed: %v", err) } @@ -420,7 +420,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { } // Ditto `node status`. { - o, err := execCLI(ctx, t, c, 2, "node", "status", "--format", "csv") + o, err := execCLI(ctx, t, c, 2, "node", "status", "--format=csv") if err != nil { t.Fatalf("node-status failed: %v", err) } @@ -442,7 +442,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { t.l.Printf("decommissioning second node from third, using --wait=all\n") { o, err := decommission(ctx, 3, c.Node(2), - "decommission", "--wait", "all", "--format", "csv") + "decommission", "--wait=all", "--format=csv") if err != nil { t.Fatalf("decommission failed: %v", err) } @@ -462,20 +462,25 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { t.Fatalf("recommission failed: %v", err) } - // TODO(knz): quit --decommission is deprecated in 20.1. Remove - // this part of the roachtest in 20.2. - t.l.Printf("decommissioning third node via `quit --decommission`\n") + t.l.Printf("decommissioning third node (from itself)\n") func() { // This should not take longer than five minutes, and if it does, it's // likely stuck forever and we want to see the output. timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) defer cancel() - if _, err := execCLI(timeoutCtx, t, c, 3, "quit", "--decommission"); err != nil { - if timeoutCtx.Err() != nil { - t.Fatalf("quit --decommission failed: %s", err) - } - // TODO(tschottdorf): grep the process output for the string announcing success? - t.l.Errorf("WARNING: ignoring error on quit --decommission: %s\n", err) + o, err := decommission(timeoutCtx, 3, c.Node(3), + "decommission", "--wait=all", "--format=csv") + if err != nil { + t.Fatalf("decommission failed: %v", err) + } + + exp := [][]string{ + decommissionHeader, + {"3", "true", "0", "true", "false"}, + decommissionFooter, + } + if err := matchCSV(o, exp); err != nil { + t.Fatal(err) } }() @@ -485,19 +490,14 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { t.l.Printf("checking that other nodes see node three as successfully decommissioned\n") { o, err := decommission(ctx, 2, c.Node(3), - "decommission", "--format", "csv") // wait=all is implied + "decommission", "--format=csv") // wait=all is implied if err != nil { t.Fatalf("decommission failed: %v", err) } exp := [][]string{ decommissionHeader, - // Expect the same as usual, except this time the node should be draining - // because it shut down cleanly (thanks to `quit --decommission`). It turns - // out that while it will always manage to mark itself as draining during a - // graceful shutdown, gossip may not yet have told this node. It's rare, - // but seems to happen (#41249). - {"3", "true", "0", "true", "true|false"}, + {"3", "true", "0", "true", "false"}, decommissionFooter, } if err := matchCSV(o, exp); err != nil { @@ -521,7 +521,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { t.l.Printf("decommission first node, starting with it down but restarting it for verification\n") { o, err := decommission(ctx, 2, c.Node(1), - "decommission", "--wait", "live") + "decommission", "--wait=live") if err != nil { t.Fatalf("decommission failed: %v", err) } @@ -540,7 +540,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { // Note that we specify "all" because even though the first node is // now running, it may not be live by the time the command runs. o, err = decommission(ctx, 2, c.Node(1), - "decommission", "--wait", "all", "--format", "csv") + "decommission", "--wait=all", "--format=csv") if err != nil { t.Fatalf("decommission failed: %v", err) } @@ -577,7 +577,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { t.l.Printf("decommission first node in absentia using --wait=live\n") { o, err := decommission(ctx, 3, c.Node(1), - "decommission", "--wait", "live", "--format", "csv") + "decommission", "--wait=live", "--format=csv") if err != nil { t.Fatalf("decommission failed: %v", err) } @@ -611,7 +611,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { // Check that (at least after a bit) the node disappears from `node ls` // because it is decommissioned and not live. for { - o, err := execCLI(ctx, t, c, 2, "node", "ls", "--format", "csv") + o, err := execCLI(ctx, t, c, 2, "node", "ls", "--format=csv") if err != nil { t.Fatalf("node-ls failed: %v", err) } @@ -630,7 +630,7 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { break } for { - o, err := execCLI(ctx, t, c, 2, "node", "status", "--format", "csv") + o, err := execCLI(ctx, t, c, 2, "node", "status", "--format=csv") if err != nil { t.Fatalf("node-status failed: %v", err) } @@ -656,8 +656,8 @@ func runDecommissionAcceptance(ctx context.Context, t *test, c *cluster) { c.InternalAddr(ctx, c.Node(2))[0]))) } - if err := retry.WithMaxAttempts(ctx, retryOpts, 20, func() error { - o, err := execCLI(ctx, t, c, 2, "node", "status", "--format", "csv") + if err := retry.WithMaxAttempts(ctx, retryOpts, 50, func() error { + o, err := execCLI(ctx, t, c, 2, "node", "status", "--format=csv") if err != nil { t.Fatalf("node-status failed: %v", err) } @@ -724,7 +724,7 @@ WHERE "eventType" IN ($1, $2) ORDER BY timestamp`, // // Specify wait=none because the command would block forever (the replicas have // nowhere to go). - if _, err := decommission(ctx, 2, c.All(), "decommission", "--wait", "none"); err != nil { + if _, err := decommission(ctx, 2, c.All(), "decommission", "--wait=none"); err != nil { t.Fatalf("decommission failed: %v", err) } diff --git a/pkg/server/admin.go b/pkg/server/admin.go index 87b19b0a5b6f..f8281dca3b68 100644 --- a/pkg/server/admin.go +++ b/pkg/server/admin.go @@ -1713,12 +1713,6 @@ func (s *adminServer) Decommission( ctx context.Context, req *serverpb.DecommissionRequest, ) (*serverpb.DecommissionStatusResponse, error) { nodeIDs := req.NodeIDs - if nodeIDs == nil { - // If no NodeIDs are specified, decommission the current node. This is - // used by `quit --decommission`. - // TODO(knz): This behavior is deprecated in 20.1. Remove in 20.2. - nodeIDs = []roachpb.NodeID{s.server.NodeID()} - } // Mark the target nodes as decommissioning. They'll find out as they // heartbeat their liveness.