From 84ad5cd4b32407902a948c5dc075885dc2eddfcc Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 20 Jan 2021 13:08:21 -0500 Subject: [PATCH] roachtest/tpcc: reduce load during rebalance period Relates to #58298. One thing I've noticed when looking into #58298 is that we were often badly overloading the cluster during the rebalance wait period. During this time, we just want to apply a small amount of load to help instruct load-based splitting and rebalancing. But in some cases, we were completely overloading the cluster. We also weren't ramping up the load, as we had intended to. This commit fixes both of these issues. It adds a ramp period for the first quarter of the rebalance time and it scales the txn rate based on the expected max warehouse count instead of the loaded warehouse count. --- pkg/cmd/roachtest/tpcc.go | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pkg/cmd/roachtest/tpcc.go b/pkg/cmd/roachtest/tpcc.go index f14b500773e4..6a6add34f222 100644 --- a/pkg/cmd/roachtest/tpcc.go +++ b/pkg/cmd/roachtest/tpcc.go @@ -677,12 +677,18 @@ func loadTPCCBench( return err } - // Split and scatter the tables. Ramp up to the expected load in the desired - // distribution. This should allow for load-based rebalancing to help - // distribute load. Optionally pass some load configuration-specific flags. + // Split and scatter the tables. Ramp up to the half of the expected load in + // the desired distribution. This should allow for load-based rebalancing to + // help distribute load. Optionally pass some load configuration-specific + // flags. + const txnsPerWarehousePerSecond = 12.8 * (23.0 / 10.0) * (1.0 / 60.0) // max_tpmC/warehouse * all_txns/new_order_txns * minutes/seconds + rateAtExpected := txnsPerWarehousePerSecond * float64(b.EstimatedMax) + maxRate := int(rateAtExpected / 2) + rampTime := (1 * rebalanceWait) / 4 + loadTime := (3 * rebalanceWait) / 4 cmd = fmt.Sprintf("./cockroach workload run tpcc --warehouses=%d --workers=%d --max-rate=%d "+ - "--wait=false --duration=%s --scatter --tolerate-errors {pgurl%s}", - b.LoadWarehouses, b.LoadWarehouses, b.LoadWarehouses/2, rebalanceWait, roachNodes) + "--wait=false --ramp=%s --duration=%s --scatter --tolerate-errors {pgurl%s}", + b.LoadWarehouses, b.LoadWarehouses, maxRate, rampTime, loadTime, roachNodes) if out, err := c.RunWithBuffer(ctx, c.l, loadNode, cmd); err != nil { return errors.Wrapf(err, "failed with output %q", string(out)) }