-
Notifications
You must be signed in to change notification settings - Fork 448
/
insert_sample_data
executable file
·118 lines (98 loc) · 2.64 KB
/
insert_sample_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env bash
set -e
MAPD_TCP_PORT=${MAPD_TCP_PORT:=6274}
DATA_URL=${DATA_URL:="https://data.mapd.com"}
ALLOW_DOWNLOADS=${ALLOW_DOWNLOADS:=true}
if hash wget 2>/dev/null; then
GETTER="wget --continue"
elif hash curl 2>/dev/null; then
GETTER="curl --continue - --remote-name --location"
else
GETTER="echo Please download: "
fi
download_and_extract_file() {
pushd $SAMPLE_PATH
echo "- downloading and extracting $1"
$GETTER "$DATA_URL/$1"
tar xvf "$1"
popd
}
while (( $# )); do
case "$1" in
--port)
shift
MAPD_TCP_PORT=$1 ;;
--url)
shift
DATA_URL=$1 ;;
--path)
shift
SAMPLE_PATH=$1 ;;
--no-download)
ALLOW_DOWNLOADS=false
;;
--data)
shift
MAPD_DATA=$1
;;
*)
break ;;
esac
shift
done
if [ -z "${MAPD_DATA}" ]; then
MAPD_DATA=${MAPD_DATA:="$PWD/storage"}
echo "Using default storage directory: \"$MAPD_DATA\" if file path is not whitelisted use '--data /path_to_server_data_directory'"
fi
SAMPLE_PATH=${SAMPLE_PATH:="$MAPD_DATA/import/sample_datasets"}
MKRES=$(mkdir -p "$SAMPLE_PATH")
if ! mkdir -p "$SAMPLE_PATH" || [ ! -w "$SAMPLE_PATH" ] ; then
SAMPLE_PATH2=$(mktemp -d)
echo "Cannot write sample data to: $SAMPLE_PATH"
echo "Saving instead to: $SAMPLE_PATH2"
echo
SAMPLE_PATH=$SAMPLE_PATH2
mkdir -p "$SAMPLE_PATH"
fi
if [ "$ALLOW_DOWNLOADS" = false ] ; then
GETTER="echo Using: "
fi
if [ "$ALLOW_DOWNLOADS" = true ] ; then
pushd "$SAMPLE_PATH"
rm -f manifest.tsv
$GETTER "$DATA_URL/manifest.tsv"
popd
fi
counter=1
while IFS=$'\t' read -r name size tablename filename ; do
names[$counter]=$name
sizes[$counter]=$size
tables[$counter]=$tablename
files[$counter]=$filename
counter=$((counter+1))
done < "$SAMPLE_PATH/manifest.tsv"
echo "Enter dataset number to download, or 'q' to quit:"
table=" # | Dataset | Rows | Table Name | File Name"
for key in "${!files[@]}"; do
table="$table
$key) | ${names[$key]} | ${sizes[$key]} | ${tables[$key]} | ${files[$key]}"
done
column -t -s'|' <(echo "$table")
read -r idxs
if [ -z "$idxs" ]; then
idxs=(${!files[@]})
fi
for idx in $idxs; do
if [ "${files[$idx]}" ]; then
filename="${files[$idx]}"
download_and_extract_file "$filename"
filebase="${filename%%.*}"
echo "- adding schema"
./bin/heavysql heavyai -u admin -p HyperInteractive --port "$MAPD_TCP_PORT" < "$SAMPLE_PATH/$filebase"/*.sql
table=${tables[$idx]}
for csv in $SAMPLE_PATH/$filebase/*csv; do
echo "- inserting file: $csv"
echo "copy $table from '${csv}' with (quoted='true');" | ./bin/heavysql heavyai -u admin -p HyperInteractive --port "$MAPD_TCP_PORT"
done
fi
done