Skip to content

Commit

Permalink
[tools] add clickbench tools (apache#11009)
Browse files Browse the repository at this point in the history
* [tools] add clickbench tools

Co-authored-by: stephen <hello-stephen@qq.com>
  • Loading branch information
2 people authored and whutpencil committed Jul 29, 2022
1 parent 8573976 commit 4edc95b
Show file tree
Hide file tree
Showing 7 changed files with 623 additions and 0 deletions.
33 changes: 33 additions & 0 deletions tools/clickbench-tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

# Usage

**These scripts are used to do [ClickBench](https://benchmark.clickhouse.com/) test, more info [there](https://github.com/ClickHouse/ClickBench).**

## follow the steps below:

### 1. create table
./create-clickbench-table.sh

### 2. load data
./load-clickbench-data.sh

### 3. run queries
./run-clickbench-queries.sh
33 changes: 33 additions & 0 deletions tools/clickbench-tools/conf/doris-cluster.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# Any of FE host
export FE_HOST='127.0.0.1'
# BE host
export BE_HOST='127.0.0.1'
# http_port in fe.conf
export FE_HTTP_PORT=8030
# webserver_port in be.conf
export BE_WEBSERVER_PORT=8040
# query_port in fe.conf
export FE_QUERY_PORT=9030
# Doris username
export USER='root'
# Doris password
export PASSWORD=''
# The database name
export DB='clickbench'
98 changes: 98 additions & 0 deletions tools/clickbench-tools/create-clickbench-table.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

##############################################################
# This script is used to create ClickBench table.
##############################################################

set -eo pipefail

ROOT=$(dirname "$0")
ROOT=$(
cd "$ROOT"
pwd
)

CURDIR=${ROOT}

usage() {
echo "
This script is used to create ClickBench table,
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
Usage: $0
"
exit 1
}

OPTS=$(getopt \
-n $0 \
-o '' \
-- "$@")

eval set -- "$OPTS"
HELP=0

if [ $# == 0 ]; then
usage
fi

while true; do
case "$1" in
-h)
HELP=1
shift
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done

if [[ ${HELP} -eq 1 ]]; then
usage
exit
fi

check_prerequest() {
local CMD=$1
local NAME=$2
if ! $CMD; then
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
exit 1
fi
}

check_prerequest "mysql --version" "mysql"

source $CURDIR/conf/doris-cluster.conf
echo "FE_HOST: $FE_HOST"
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"

mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB"
mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB <$CURDIR/sql/create-clickbench-table.sql
mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "show create table hits;"

echo "DONE."
140 changes: 140 additions & 0 deletions tools/clickbench-tools/load-clickbench-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

##############################################################
# This script is used to load clickbench data into Doris
##############################################################

set -eo pipefail

ROOT=$(dirname "$0")
ROOT=$(
cd "$ROOT"
pwd
)

CURDIR=${ROOT}
DATA_DIR=$CURDIR/
# DATA_DIR=/mnt/disk1/stephen/data/clickbench

usage() {
echo "
This script is used to load ClickBench data,
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
Usage: $0
"
exit 1
}

OPTS=$(getopt \
-n $0 \
-o '' \
-o 'h' \
-- "$@")
eval set -- "$OPTS"

HELP=0
while true; do
case "$1" in
-h)
HELP=1
shift
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done

if [[ ${HELP} -eq 1 ]]; then
usage
exit
fi

check_prerequest() {
local CMD=$1
local NAME=$2
if ! $CMD; then
echo "$NAME is missing. This script depends on cURL to load data to Doris."
exit 1
fi
}

check_prerequest "mysql --version" "mysql"
check_prerequest "curl --version" "curl"
check_prerequest "wget --version" "wget"

source $CURDIR/conf/doris-cluster.conf

echo "FE_HOST: $FE_HOST"
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"

function check_doirs_conf() {
cv=$(mysql -h$FE_HOST -P$FE_QUERY_PORT -u$USER -e 'admin show frontend config' | grep 'stream_load_default_timeout_second' | awk '{print $2}')
if (($cv < 3600)); then
echo "advise: revise your Doris FE's conf to set 'stream_load_default_timeout_second=3600' or above"
fi

cv=$(curl "${BE_HOST}:${BE_WEBSERVER_PORT}/varz" 2>/dev/null | grep 'streaming_load_max_mb' | awk -F'=' '{print $2}')
if (($cv < 16000)); then
echo -e "advise: revise your Doris BE's conf to set 'streaming_load_max_mb=16000' or above and 'flush_thread_num_per_store=5' to speed up load."
fi
}

function load() {
echo "(1/2) prepare clickbench data file"
need_download=false
cd $DATA_DIR
for i in $(seq 0 9); do
if [ ! -f "$DATA_DIR/hits_split${i}" ]; then
echo "will download hits_split${i} to $DATA_DIR"
wget --continue "https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}" &
# wget --continue "https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}" &
fi
done

echo "wait for download task done..."
wait
cd -

echo "(2/2) load clickbench data file $DATA_DIR/hits_split[0-9] into Doris"
for i in $(seq 0 9); do
echo -e "
start loading hits_split${i}"
curl --location-trusted \
-u $USER:$PASSWORD \
-T "$DATA_DIR/hits_split${i}" \
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
done
}

echo "start..."
start=$(date +%s)
check_doirs_conf
load
end=$(date +%s)
echo "load cost time: $((end - start)) seconds"
Loading

0 comments on commit 4edc95b

Please sign in to comment.