Skip to content

Commit

Permalink
Merge #29332
Browse files Browse the repository at this point in the history
29332: roachtest: tpcc with chaos r=benesch a=tschottdorf

This is an attempt to reproduce #28918.

Release note: None

Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
  • Loading branch information
craig[bot] and tbg committed Oct 3, 2018
2 parents 8e130b7 + 8f6e003 commit 7cda4be
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 26 deletions.
21 changes: 16 additions & 5 deletions pkg/cmd/roachtest/chaos.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ package main
import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/util/timeutil"
)

// ChaosTimer configures a chaos schedule.
Expand Down Expand Up @@ -54,22 +56,31 @@ type Chaos struct {
// setting off the monitor. The process returns without an error after the chaos
// duration.
func (ch *Chaos) Runner(c *cluster, m *monitor) func(context.Context) error {
return func(ctx context.Context) error {
return func(ctx context.Context) (err error) {
l, err := c.l.ChildLogger("CHAOS")
if err != nil {
return err
}
period, downTime := ch.Timer.Timing()
t := time.NewTicker(period)
defer func() {
l.Printf("chaos stopping: %v", err)
}()
t := timeutil.Timer{}
{
p, _ := ch.Timer.Timing()
t.Reset(p)
}
for {
select {
case <-ch.Stopper:
return nil
case <-ctx.Done():
return ctx.Err()
case <-t.C:
t.Read = true
}

period, downTime := ch.Timer.Timing()

target := ch.Target()
m.ExpectDeath()

Expand All @@ -88,8 +99,8 @@ func (ch *Chaos) Runner(c *cluster, m *monitor) func(context.Context) error {
return ctx.Err()
case <-time.After(downTime):
}

c.l.Printf("restarting %v after %s of downtime\n", target, downTime)
l.Printf("restarting %v after %s of downtime\n", target, downTime)
t.Reset(period)
c.Start(ctx, target)
}
}
Expand Down
108 changes: 87 additions & 21 deletions pkg/cmd/roachtest/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"math"
"math/rand"
"strconv"
"strings"
"time"
Expand All @@ -34,50 +35,83 @@ import (
"github.com/cockroachdb/ttycolor"
)

func runTPCC(
ctx context.Context, t *test, c *cluster, warehouses int, duration string, extraArgs ...string,
) {
type tpccOptions struct {
Warehouses int
Extra string
Chaos func() Chaos // for late binding of stopper
Duration time.Duration
ZFS bool
}

func runTPCC(ctx context.Context, t *test, c *cluster, opts tpccOptions) {
crdbNodes := c.Range(1, c.nodes-1)
workloadNode := c.Node(c.nodes)
if c.isLocal() {
warehouses = 1
duration = `10s`
} else {
opts.Warehouses = 1
opts.Duration = 10 * time.Second
} else if !opts.ZFS {
c.RemountNoBarrier(ctx)
}

c.Put(ctx, cockroach, "./cockroach", crdbNodes)
c.Put(ctx, workload, "./workload", workloadNode)
c.Start(ctx, crdbNodes)

t.Status("loading fixture")
fixtureWarehouses := -1
for _, w := range []int{1, 10, 100, 1000, 2000, 5000, 10000} {
if w >= warehouses {
if w >= opts.Warehouses {
fixtureWarehouses = w
break
}
}
if fixtureWarehouses == -1 {
t.Fatalf("could not find fixture big enough for %d warehouses", warehouses)
t.Fatalf("could not find fixture big enough for %d warehouses", opts.Warehouses)
}
c.Run(ctx, workloadNode, fmt.Sprintf(
`./workload fixtures load tpcc --warehouses=%d {pgurl:1}`, fixtureWarehouses,
))

t.Status("running tpcc")
func() {
db := c.Conn(ctx, 1)
defer db.Close()
if opts.ZFS {
if err := c.RunE(ctx, c.Node(1), "test -d /mnt/data1/.zfs/snapshot/pristine"); err != nil {
// Use ZFS so the initial store dumps can be instantly rolled back to their
// pristine state. Useful for iterating quickly on the test, especially when
// used in a repro.
c.Reformat(ctx, crdbNodes, "zfs")

t.Status("loading dataset")
c.Start(ctx, crdbNodes)
cmd := fmt.Sprintf(
"./workload fixtures load tpcc --warehouses=%d {pgurl:1}", fixtureWarehouses)
c.Run(ctx, workloadNode, cmd)
c.Stop(ctx, crdbNodes)

c.Run(ctx, crdbNodes, "test -e /sbin/zfs && sudo zfs snapshot data1@pristine")
}
t.Status(`restoring store dumps`)
c.Run(ctx, crdbNodes, "sudo zfs rollback data1@pristine")
c.Start(ctx, crdbNodes)
} else {
c.Start(ctx, crdbNodes)
c.Run(ctx, workloadNode, fmt.Sprintf(
`./workload fixtures load tpcc --warehouses=%d {pgurl:1}`, fixtureWarehouses,
))
}
}()
t.Status("waiting")
m := newMonitor(ctx, c, crdbNodes)
m.Go(func(ctx context.Context) error {
t.WorkerStatus("running tpcc")
cmd := fmt.Sprintf(
"./workload run tpcc --warehouses=%d --duration=%s {pgurl%s}",
warehouses, duration, crdbNodes)
cmd += ` --histograms=logs/stats.json`
for _, extraArg := range extraArgs {
cmd += extraArg
}
"./workload run tpcc --warehouses=%d --histograms=logs/stats.json "+
opts.Extra+" --duration=%s {pgurl:1-%d}",
opts.Warehouses, opts.Duration, c.nodes-1)
c.Run(ctx, workloadNode, cmd)
return nil
})
if opts.Chaos != nil {
chaos := opts.Chaos()
m.Go(chaos.Runner(c, m))
}
m.Wait()
}

Expand All @@ -92,15 +126,47 @@ func registerTPCC(r *registry) {
Stable: false,
Run: func(ctx context.Context, t *test, c *cluster) {
warehouses := 1400
runTPCC(ctx, t, c, warehouses, `120m`)
runTPCC(ctx, t, c, tpccOptions{
Warehouses: warehouses, Duration: 120 * time.Minute,
})
},
})
r.Add(testSpec{
Name: "tpcc-nowait/nodes=3/w=1",
Nodes: nodes(4, cpu(16)),
Stable: false,
Run: func(ctx context.Context, t *test, c *cluster) {
runTPCC(ctx, t, c, 1, `10m`, ` --wait=false`)
runTPCC(ctx, t, c, tpccOptions{
Warehouses: 1,
Duration: 10 * time.Minute,
Extra: "--wait=false",
})
},
})

r.Add(testSpec{
Name: "tpcc/w=100/nodes=3/chaos=true",
Nodes: nodes(4),
Stable: false,
Run: func(ctx context.Context, t *test, c *cluster) {
duration := 30 * time.Minute
runTPCC(ctx, t, c, tpccOptions{
Warehouses: 100,
Duration: duration,
Extra: "--wait=false --tolerate-errors",
Chaos: func() Chaos {
return Chaos{
Timer: Periodic{
Period: 45 * time.Second,
DownTime: 10 * time.Second,
},
Target: func() nodeListOption { return c.Node(1 + rand.Intn(c.nodes-1)) },
Stopper: time.After(duration),
DrainAndQuit: false,
}
},
ZFS: false, // change to true during debugging/development
})
},
})

Expand Down

0 comments on commit 7cda4be

Please sign in to comment.