forked from ndimiduk/phoenix-performance
-
Notifications
You must be signed in to change notification settings - Fork 4
/
load_data.sh
executable file
·86 lines (71 loc) · 2.18 KB
/
load_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/sh
# Use the Phoenix bulk loader to load data.
# Data needs to have been generated using generate_data.sh prior to this step.
PSQL=/usr/hdp/current/phoenix-client/bin/psql.py
CLIENT=/usr/hdp/current/phoenix-client/phoenix-client.jar
function usage {
echo "Usage: load_data.sh scale_factor zk [temp_directory]"
echo "Ex: load_data.sh 10 sandbox.hortonworks.com:2181:/hbase-unsecure"
exit 1
}
SCALE=$1
ZOOKEEPER=$2
DIR=$3
if [ X"$SCALE" = "X" ]; then
usage
fi
if [ X"$ZOOKEEPER" = "X" ]; then
usage
fi
if [ X"$DIR" = "X" ]; then
DIR=/tmp/phoenix-generate
fi
# Create the tables in Phoenix.
echo "Creating tables in Phoenix"
${PSQL} ${ZOOKEEPER} ddl/CreateTables.sql
# Pre-split STORE_SALES 128 ways.
echo "Pre-splitting STORE_SALES"
# Ensure the table does not keep deleted records.
cat ddl/disableKeepDeleted | hbase shell
# Load some data so we can split the table.
${PSQL} -t STORE_SALES ${ZOOKEEPER} ddl/50kRecords.csv
# Run the split and balancer
cat ddl/presplit | hbase shell
# Delete the data we added
${PSQL} -t STORE_SALES ${ZOOKEEPER} ddl/DeleteFromStoreSales.sql
# Run a major compaction
cat ddl/majorCompactStoreSales | hbase shell
# Bulk load the tables.
HADOOP_COMPAT=/usr/hdp/current/hbase-client/lib/hbase-hadoop-compat.jar
HADOOP2_COMPAT=/usr/hdp/current/hbase-client/lib/hbase-hadoop2-compat.jar
export LIBJARS=$HADOOP_COMPAT,$HADOOP2_COMPAT
export HADOOP_CLASSPATH=/etc/hbase/conf:/usr/hdp/current/hbase-client/lib/hbase-protocol.jar
TABLES="store_sales"
for t in $TABLES; do
echo "Loading $t"
hdfs dfs -ls ${DIR}/${SCALE}/${t} > /dev/null
if [ $? -ne 0 ]; then
echo "$t data is not generated"
exit 1
fi
UCT=`echo $t | tr '[:lower:]' '[:upper:]'`
INPUT=${DIR}/${SCALE}/${t}/${t}
hadoop jar ${CLIENT} \
org.apache.phoenix.mapreduce.CsvBulkLoadTool \
-libjars ${LIBJARS} \
--table ${UCT} \
--input ${INPUT} \
--zookeeper=${ZOOKEEPER} \
-g \
-d '|'
done
# Load date dimension. Needed a special hack to work around date requiring time.
hadoop fs -copyFromLocal data/date_dim.txt /tmp
hadoop jar ${CLIENT} \
org.apache.phoenix.mapreduce.CsvBulkLoadTool \
-libjars ${LIBJARS} \
--table DATE_DIM \
--input /tmp/date_dim.txt \
--zookeeper=${ZOOKEEPER} \
-g \
-d '|'