-
Notifications
You must be signed in to change notification settings - Fork 1
/
bench.sh
executable file
·142 lines (126 loc) · 5.12 KB
/
bench.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/bin/bash
set -euo pipefail
# Scripts which runs YCSB on all loader instances
#
# Supported arguments:
#
# * (unconditional right now) Run write throughput benchmark
# * isolation: set to "forbid_rmw" for writes without LWT (the default),
# or to "always_use_lwt" (with LWT).
# * aws: if empty (default), benchmark the Scylla servers.
# If "aws is set, we test the real DynamoDB in that Amazon
# region (e.g., "aws=us-east-1").
# * wcu: set the maximum rate sent by each loader
# to wcu / number_of_loaders.
# * fieldcount, fieldlength: Control the size of each item, as fieldcount
# attributes of size fieldlength each. The defaults are 10,256.
# * time: how much time (in seconds) to run the test. Defaults to 300.
isolation="${isolation:-'forbid_rmw'}"
aws="${aws:-'us-east-1'}"
wcu="${wcu:-}"
fieldcount="${fieldcount:-'10'}"
fieldlength="${fieldlength:-'256'}"
time="${time:-'300'}"
recordcount="${recordcount:-1000000}"
# Binary path on a loader machines
YCSB_DIR='/opt/scylla/ycsb-dynamodb-binding-0.18.0-fixed'
YCSB=${YCSB_DIR}'/bin/ycsb.sh'
loaders_json=(`terraform output -json loader_public_ips`)
readarray -t loaders < <(jq -r '.[]' <<<"$loaders_json")
declare -p loaders
TMPDIR=/tmp/bench-$$
mkdir ${TMPDIR}
echo "Keeping stats in ${TMPDIR}"
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
nodes_json=(`terraform output -json cluster_private_ips`)
readarray -t nodes < <(jq -r '.[]' <<<"$nodes_json")
declare -p nodes
# Number of YCSB loaders to use per loader/server combination.
# In other words, each loader will run MULT*nservers ycsb processes, and each
# server will be loaded by MULT*nloaders ycsb processes.
# TODO: I don't understand why with say fieldcount=1 MULT=2 it takes a very
# long time to get higher results than MULT=1.
MULT=${MULT:-1}
if ! test -z "$wcu"
then
nycsb=$((${#loaders[@]} * ${#nodes[@]} * MULT))
target="-target $((wcu/nycsb))"
else
target=
fi
for loader in ${loaders[@]}
do
# bogus
${SCRIPTPATH}/ec2-ssh $loader aws configure set aws_access_key_id 123123
${SCRIPTPATH}/ec2-ssh $loader aws configure set aws_secret_access_key 123123
done
# ${SCRIPTPATH}/ec2-ssh ${loaders[0]} -- aws dynamodb create-table \
# --endpoint-url http://${nodes[0]}:8000 \
# --table-name usertable \
# --attribute-definitions \
# AttributeName=p,AttributeType=S \
# --key-schema \
# AttributeName=p,KeyType=HASH \
# --billing-mode PAY_PER_REQUEST \
# --tags Key=system:write_isolation,Value=$isolation || true
typeset -i i=0
for loader in ${loaders[@]}
do
for node in ${nodes[@]}
do
# case $aws in
# "") # Scylla:
# endpoint=http://$node:8000
# ;;
# *) # AWS:
# endpoint=http://dynamodb.$aws.amazonaws.com
# ;;
# esac
endpoint=http://$node:8000
for mult in `seq $MULT`
do
let ++i
# For description of the following options, see
# https://github.com/brianfrankcooper/YCSB/blob/master/dynamodb/conf/dynamodb.properties
# NOTE: YCSB currently has two modes - "HASH" - with single-row
# partitions - and "HASH_AND_RANGE" - where we have a *single*
# partition and all the items in it. The HASH_AND_RANGE mode is
# worthless for benchmarking because it has a single hot
# partition.
${SCRIPTPATH}/ec2-ssh $loader "chmod +x $YCSB"
${SCRIPTPATH}/ec2-ssh $loader "$YCSB run dynamodb -P $YCSB_DIR/workloads/workloada -threads 100 \
${target} \
-p recordcount=${recordcount} \
-p insertstart=$((i*${recordcount})) \
-p requestdistribution=uniform \
-p fieldcount=${fieldcount} \
-p fieldlength=${fieldlength} \
-p readproportion=0 \
-p updateproportion=0 \
-p scanproportion=0 \
-p insertproportion=1 \
-p maxexecutiontime=${time} \
-p operationcount=999999999 \
-p measurementtype=hdrhistogram \
-p dynamodb.endpoint=${endpoint} \
-p dynamodb.connectMax=200 \
-p requestdistribution=uniform \
-p dynamodb.consistentReads=true \
-p dynamodb.primaryKey=p \
-p dynamodb.primaryKeyType=HASH \
-s" > $TMPDIR/$loader-$node-$mult 2>&1 &
done
done
done
# Wait for all the loaders started above to finish.
# TODO: catch interrupt and kill all the loaders.
wait
# Add the average (over the entire run) of the throughput of each loader.
# For the result to make sense, the loaders should all start and stop at
# roughly the same time, and the run must be long enough:
# * "time" of 300 (5 minutes) seems to give around 95% of the throughput
# * "time" of 600 (10 minutes) seems to give around 98% of the throughput
# A more accurate approach would be to calculate # the total throughput at
# each second and then graph (and average) those, but that's more complicated.
fgrep -h '[OVERALL], Throughput(ops/sec)' $TMPDIR/* |
awk '{sum+=$3} END {print sum}'