Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tools: add mixed read-write performance evaluation scripts #13038

Merged
merged 1 commit into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions tools/benchmark/cmd/txn_mixed.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// Copyright 2021 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"context"
"encoding/binary"
"fmt"
"math"
"math/rand"
"os"
"time"

v3 "go.etcd.io/etcd/client/v3"
"go.etcd.io/etcd/pkg/v3/report"

"github.com/spf13/cobra"
"golang.org/x/time/rate"
"gopkg.in/cheggaaa/pb.v1"
)

// mixeTxnCmd represents the mixedTxn command
var mixedTxnCmd = &cobra.Command{
Use: "txn-mixed key [end-range]",
Short: "Benchmark a mixed load of txn-put & txn-range.",

Run: mixedTxnFunc,
}

var (
mixedTxnTotal int
mixedTxnRate int
mixedTxnReadWriteRatio float64
mixedTxnRangeLimit int64
mixedTxnEndKey string

writeOpsTotal uint64
readOpsTotal uint64
)

func init() {
RootCmd.AddCommand(mixedTxnCmd)
mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn")
mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn")
mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)")
mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests")
mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "",
"Read operation range end key. By default, we do full range query with the default limit of 1000.")
mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit")
mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio")
}

type request struct {
isWrite bool
op v3.Op
}

func mixedTxnFunc(cmd *cobra.Command, args []string) {
if keySpaceSize <= 0 {
fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
os.Exit(1)
}

if rangeConsistency == "l" {
fmt.Println("bench with linearizable range")
} else if rangeConsistency == "s" {
fmt.Println("bench with serializable range")
} else {
fmt.Fprintln(os.Stderr, cmd.Usage())
os.Exit(1)
}

requests := make(chan request, totalClients)
if mixedTxnRate == 0 {
mixedTxnRate = math.MaxInt32
}
limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1)
clients := mustCreateClients(totalClients, totalConns)
k, v := make([]byte, keySize), string(mustRandBytes(valSize))

bar = pb.New(mixedTxnTotal)
bar.Format("Bom !")
bar.Start()

reportRead := newReport()
reportWrite := newReport()
for i := range clients {
wg.Add(1)
go func(c *v3.Client) {
defer wg.Done()
for req := range requests {
limit.Wait(context.Background())
st := time.Now()
_, err := c.Txn(context.TODO()).Then(req.op).Commit()
if req.isWrite {
reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
} else {
reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
}
bar.Increment()
}
}(clients[i])
}

go func() {
for i := 0; i < mixedTxnTotal; i++ {
var req request
if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) {
opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)}
if rangeConsistency == "s" {
opts = append(opts, v3.WithSerializable())
}
opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit))
req.op = v3.OpGet("", opts...)
req.isWrite = false
readOpsTotal++
} else {
binary.PutVarint(k, int64(i%keySpaceSize))
req.op = v3.OpPut(string(k), v)
req.isWrite = true
writeOpsTotal++
}
requests <- req
}
close(requests)
}()

rcRead := reportRead.Run()
rcWrite := reportWrite.Run()
wg.Wait()
close(reportRead.Results())
close(reportWrite.Results())
bar.Finish()
fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal)
fmt.Println(<-rcRead)
fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal)
fmt.Println(<-rcWrite)
}
26 changes: 26 additions & 0 deletions tools/rw-heatmaps/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# etcd/tools/rw-heatmaps

`etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters.

## Execute

### Benchmark
To get a mixed read/write performance evaluation result:
```sh
# run with default configurations and specify the working directory
./rw-benchmark.sh -w ${WORKING_DIR}
```
`rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory.

Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred.

### Plot Graph
To generate a image based on the benchmark result csv file:
```sh
# to generate a image from one data csv file
./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}


# to generate a image comparing two data csv files
./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
```
143 changes: 143 additions & 0 deletions tools/rw-heatmaps/plot_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env python3
import six
import sys
import os
import argparse
import logging
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D


logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

params = None


def parse_args():
parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.')
parser.add_argument('input_file_a', type=str,
help='first input data files in csv format. (required)')
parser.add_argument('input_file_b', type=str, nargs='?',
help='second input data files in csv format. (optional)')
parser.add_argument('-t', '--title', dest='title', type=str, required=True,
help='plot graph title string')
parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
help='output image filename')
return parser.parse_args()


def load_data_files(*args):
df_list = []
try:
for i in args:
if i is not None:
logger.debug('loading csv file {}'.format(i))
df_list.append(pd.read_csv(i))
except FileNotFoundError as e:
logger.error(str(e))
sys.exit(1)
res = []
try:
for df in df_list:
new_df = df[['ratio', 'conn_size', 'value_size']].copy()
tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']]

read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
read_avg = sum(read_df)/len(read_df)
new_df['read'] = read_avg

write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
write_avg = sum(write_df)/len(write_df)
new_df['write'] = write_avg

new_df['ratio'] = new_df['ratio'].astype(float)
new_df['conn_size'] = new_df['conn_size'].astype(int)
new_df['value_size'] = new_df['value_size'].astype(int)
res.append(new_df)
except Exception as e:
logger.error(str(e))
sys.exit(1)
return res


def plot_data(title, *args):
if len(args) == 1:
figsize = (12, 16)
df0 = args[0]
fig = plt.figure(figsize=figsize)
count = 0
for val, df in df0.groupby('ratio'):
count += 1
plt.subplot(4, 2, count)
plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'])
plt.title('R/W Ratio {:.2f}'.format(val))
plt.yscale('log', base=2)
plt.ylabel('Value Size')
plt.xscale('log', base=2)
plt.xlabel('Connections Amount')
plt.colorbar()
plt.tight_layout()
elif len(args) == 2:
figsize = (12, 26)
df0 = args[0]
df1 = args[1]
fig = plt.figure(figsize=figsize)
count = 0
delta_df = df1.copy()
delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
for tmp in [df0, df1, delta_df]:
count += 1
count2 = count
for val, df in tmp.groupby('ratio'):
plt.subplot(8, 3, count2)
if count2 % 3 == 0:
cmap_name = 'bwr'
else:
cmap_name = 'viridis'
plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
if count2 == 1:
plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val))
elif count2 == 2:
plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val))
elif count2 == 3:
plt.title('Delta\nR/W Ratio {:.2f}'.format(val))
else:
plt.title('R/W Ratio {:.2f}'.format(val))
plt.yscale('log', base=2)
plt.ylabel('Value Size')
plt.xscale('log', base=2)
plt.xlabel('Connections Amount')
plt.colorbar()
plt.tight_layout()
count2 += 3
else:
raise Exception('invalid plot input data')
fig.suptitle(title)
fig.subplots_adjust(top=0.95)
plt.savefig(params.output)


def plot_data_3d(df, title):
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='3d')
ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
ax.set_title('{}'.format(title))
ax.set_zlabel('R/W Ratio')
ax.set_ylabel('Value Size')
ax.set_xlabel('Connections Amount')
plt.show()


def main():
global params
logging.basicConfig()
params = parse_args()
result = load_data_files(params.input_file_a, params.input_file_b)
plot_data(params.title, *result)


if __name__ == '__main__':
main()
Loading