etcd-io · gyuho · May 25, 2021 · May 22, 2021
diff --git a/tools/benchmark/cmd/txn_mixed.go b/tools/benchmark/cmd/txn_mixed.go
@@ -0,0 +1,152 @@
+// Copyright 2021 The etcd Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package cmd
+
+import (
+	"context"
+	"encoding/binary"
+	"fmt"
+	"math"
+	"math/rand"
+	"os"
+	"time"
+
+	v3 "go.etcd.io/etcd/client/v3"
+	"go.etcd.io/etcd/pkg/v3/report"
+
+	"github.com/spf13/cobra"
+	"golang.org/x/time/rate"
+	"gopkg.in/cheggaaa/pb.v1"
+)
+
+// mixeTxnCmd represents the mixedTxn command
+var mixedTxnCmd = &cobra.Command{
+	Use:   "txn-mixed key [end-range]",
+	Short: "Benchmark a mixed load of txn-put & txn-range.",
+
+	Run: mixedTxnFunc,
+}
+
+var (
+	mixedTxnTotal          int
+	mixedTxnRate           int
+	mixedTxnReadWriteRatio float64
+	mixedTxnRangeLimit     int64
+	mixedTxnEndKey         string
+
+	writeOpsTotal uint64
+	readOpsTotal  uint64
+)
+
+func init() {
+	RootCmd.AddCommand(mixedTxnCmd)
+	mixedTxnCmd.Flags().IntVar(&keySize, "key-size", 8, "Key size of mixed txn")
+	mixedTxnCmd.Flags().IntVar(&valSize, "val-size", 8, "Value size of mixed txn")
+	mixedTxnCmd.Flags().IntVar(&mixedTxnRate, "rate", 0, "Maximum txns per second (0 is no limit)")
+	mixedTxnCmd.Flags().IntVar(&mixedTxnTotal, "total", 10000, "Total number of txn requests")
+	mixedTxnCmd.Flags().StringVar(&mixedTxnEndKey, "end-key", "",
+		"Read operation range end key. By default, we do full range query with the default limit of 1000.")
+	mixedTxnCmd.Flags().Int64Var(&mixedTxnRangeLimit, "limit", 1000, "Read operation range result limit")
+	mixedTxnCmd.Flags().IntVar(&keySpaceSize, "key-space-size", 1, "Maximum possible keys")
+	mixedTxnCmd.Flags().StringVar(&rangeConsistency, "consistency", "l", "Linearizable(l) or Serializable(s)")
+	mixedTxnCmd.Flags().Float64Var(&mixedTxnReadWriteRatio, "rw-ratio", 1, "Read/write ops ratio")
+}
+
+type request struct {
+	isWrite bool
+	op      v3.Op
+}
+
+func mixedTxnFunc(cmd *cobra.Command, args []string) {
+	if keySpaceSize <= 0 {
+		fmt.Fprintf(os.Stderr, "expected positive --key-space-size, got (%v)", keySpaceSize)
+		os.Exit(1)
+	}
+
+	if rangeConsistency == "l" {
+		fmt.Println("bench with linearizable range")
+	} else if rangeConsistency == "s" {
+		fmt.Println("bench with serializable range")
+	} else {
+		fmt.Fprintln(os.Stderr, cmd.Usage())
+		os.Exit(1)
+	}
+
+	requests := make(chan request, totalClients)
+	if mixedTxnRate == 0 {
+		mixedTxnRate = math.MaxInt32
+	}
+	limit := rate.NewLimiter(rate.Limit(mixedTxnRate), 1)
+	clients := mustCreateClients(totalClients, totalConns)
+	k, v := make([]byte, keySize), string(mustRandBytes(valSize))
+
+	bar = pb.New(mixedTxnTotal)
+	bar.Format("Bom !")
+	bar.Start()
+
+	reportRead := newReport()
+	reportWrite := newReport()
+	for i := range clients {
+		wg.Add(1)
+		go func(c *v3.Client) {
+			defer wg.Done()
+			for req := range requests {
+				limit.Wait(context.Background())
+				st := time.Now()
+				_, err := c.Txn(context.TODO()).Then(req.op).Commit()
+				if req.isWrite {
+					reportWrite.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
+				} else {
+					reportRead.Results() <- report.Result{Err: err, Start: st, End: time.Now()}
+				}
+				bar.Increment()
+			}
+		}(clients[i])
+	}
+
+	go func() {
+		for i := 0; i < mixedTxnTotal; i++ {
+			var req request
+			if rand.Float64() < mixedTxnReadWriteRatio/(1+mixedTxnReadWriteRatio) {
+				opts := []v3.OpOption{v3.WithRange(mixedTxnEndKey)}
+				if rangeConsistency == "s" {
+					opts = append(opts, v3.WithSerializable())
+				}
+				opts = append(opts, v3.WithPrefix(), v3.WithLimit(mixedTxnRangeLimit))
+				req.op = v3.OpGet("", opts...)
+				req.isWrite = false
+				readOpsTotal++
+			} else {
+				binary.PutVarint(k, int64(i%keySpaceSize))
+				req.op = v3.OpPut(string(k), v)
+				req.isWrite = true
+				writeOpsTotal++
+			}
+			requests <- req
+		}
+		close(requests)
+	}()
+
+	rcRead := reportRead.Run()
+	rcWrite := reportWrite.Run()
+	wg.Wait()
+	close(reportRead.Results())
+	close(reportWrite.Results())
+	bar.Finish()
+	fmt.Printf("Total Read Ops: %d\nDetails:", readOpsTotal)
+	fmt.Println(<-rcRead)
+	fmt.Printf("Total Write Ops: %d\nDetails:", writeOpsTotal)
+	fmt.Println(<-rcWrite)
+}
diff --git a/tools/rw-heatmaps/README.md b/tools/rw-heatmaps/README.md
@@ -0,0 +1,26 @@
+# etcd/tools/rw-heatmaps
+
+`etcd/tools/rw-heatmaps` is the mixed read/write performance evaluation tool for etcd clusters.
+
+## Execute
+
+### Benchmark
+To get a mixed read/write performance evaluation result:
+```sh
+# run with default configurations and specify the working directory
+./rw-benchmark.sh -w ${WORKING_DIR}
+```
+`rw-benchmark.sh` will automatically use the etcd binary compiled under `etcd/bin/` directory.
+
+Note: the result csv file will be saved to current working directory. The working directory is where etcd database is saved. The working directory is designed for scenarios where a different mounted disk is preferred.
+
+### Plot Graph
+To generate a image based on the benchmark result csv file:
+```sh
+# to generate a image from one data csv file
+./plot_data.py ${FIRST_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
+
+
+# to generate a image comparing two data csv files
+./plot_data.py ${FIRST_CSV_FILE} ${SECOND_CSV_FILE} -t ${IMAGE_TITLE} -o ${OUTPUT_IMAGE_NAME}
+```
diff --git a/tools/rw-heatmaps/plot_data.py b/tools/rw-heatmaps/plot_data.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python3
+import six
+import sys
+import os
+import argparse
+import logging
+import pandas as pd
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+
+
+logging.basicConfig(format='[%(levelname)s %(asctime)s %(name)s] %(message)s')
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+params = None
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='plot graph using mixed read/write result file.')
+    parser.add_argument('input_file_a', type=str,
+                        help='first input data files in csv format. (required)')
+    parser.add_argument('input_file_b', type=str, nargs='?',
+                        help='second input data files in csv format. (optional)')
+    parser.add_argument('-t', '--title', dest='title', type=str, required=True,
+                        help='plot graph title string')
+    parser.add_argument('-o', '--output-image', dest='output', type=str, required=True,
+                        help='output image filename')
+    return parser.parse_args()
+
+
+def load_data_files(*args):
+    df_list = []
+    try:
+        for i in args:
+            if i is not None:
+                logger.debug('loading csv file {}'.format(i))
+                df_list.append(pd.read_csv(i))
+    except FileNotFoundError as e:
+        logger.error(str(e))
+        sys.exit(1)
+    res = []
+    try:
+        for df in df_list:
+            new_df = df[['ratio', 'conn_size', 'value_size']].copy()
+            tmp = [df[x].str.split(':') for x in ['1', '2', '3', '4', '5']]
+
+            read_df = [x.apply(lambda x: float(x[0])) for x in tmp]
+            read_avg = sum(read_df)/len(read_df)
+            new_df['read'] = read_avg
+
+            write_df = [x.apply(lambda x: float(x[1])) for x in tmp]
+            write_avg = sum(write_df)/len(write_df)
+            new_df['write'] = write_avg
+
+            new_df['ratio'] = new_df['ratio'].astype(float)
+            new_df['conn_size'] = new_df['conn_size'].astype(int)
+            new_df['value_size'] = new_df['value_size'].astype(int)
+            res.append(new_df)
+    except Exception as e:
+        logger.error(str(e))
+        sys.exit(1)
+    return res
+
+
+def plot_data(title, *args):
+    if len(args) == 1:
+        figsize = (12, 16)
+        df0 = args[0]
+        fig = plt.figure(figsize=figsize)
+        count = 0
+        for val, df in df0.groupby('ratio'):
+            count += 1
+            plt.subplot(4, 2, count)
+            plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'])
+            plt.title('R/W Ratio {:.2f}'.format(val))
+            plt.yscale('log', base=2)
+            plt.ylabel('Value Size')
+            plt.xscale('log', base=2)
+            plt.xlabel('Connections Amount')
+            plt.colorbar()
+            plt.tight_layout()
+    elif len(args) == 2:
+        figsize = (12, 26)
+        df0 = args[0]
+        df1 = args[1]
+        fig = plt.figure(figsize=figsize)
+        count = 0
+        delta_df = df1.copy()
+        delta_df[['read', 'write']] = (df1[['read', 'write']] - df0[['read', 'write']])/df0[['read', 'write']]
+        for tmp in [df0, df1, delta_df]:
+            count += 1
+            count2 = count
+            for val, df in tmp.groupby('ratio'):
+                plt.subplot(8, 3, count2)
+                if count2 % 3 == 0:
+                    cmap_name = 'bwr'
+                else:
+                    cmap_name = 'viridis'
+                plt.tripcolor(df['conn_size'], df['value_size'], df['read'] + df['write'], cmap=plt.get_cmap(cmap_name))
+                if count2 == 1:
+                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_a), val))
+                elif count2 == 2:
+                    plt.title('{}\nR/W Ratio {:.2f}'.format(os.path.basename(params.input_file_b), val))
+                elif count2 == 3:
+                    plt.title('Delta\nR/W Ratio {:.2f}'.format(val))
+                else:
+                    plt.title('R/W Ratio {:.2f}'.format(val))
+                plt.yscale('log', base=2)
+                plt.ylabel('Value Size')
+                plt.xscale('log', base=2)
+                plt.xlabel('Connections Amount')
+                plt.colorbar()
+                plt.tight_layout()
+                count2 += 3
+    else:
+        raise Exception('invalid plot input data')
+    fig.suptitle(title)
+    fig.subplots_adjust(top=0.95)
+    plt.savefig(params.output)
+
+
+def plot_data_3d(df, title):
+    fig = plt.figure(figsize=(10, 10))
+    ax = fig.add_subplot(projection='3d')
+    ax.scatter(df['conn_size'], df['value_size'], 1/(1+1/df['ratio']), c=df['read'] + df['write'])
+    ax.set_title('{}'.format(title))
+    ax.set_zlabel('R/W Ratio')
+    ax.set_ylabel('Value Size')
+    ax.set_xlabel('Connections Amount')
+    plt.show()
+
+
+def main():
+    global params
+    logging.basicConfig()
+    params = parse_args()
+    result = load_data_files(params.input_file_a, params.input_file_b)
+    plot_data(params.title, *result)
+
+
+if __name__ == '__main__':
+    main()