Skip to content

Commit

Permalink
Add script for dumping and restoring table
Browse files Browse the repository at this point in the history
  • Loading branch information
hellais committed Feb 4, 2024
1 parent 6ede85e commit 60accae
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
45 changes: 45 additions & 0 deletions scripts/dump-tables-ch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
# This is to be run manually on the clickhouse host to dump schemas and table dumps
# You may want to make some tweaks to the dumping rules in order avoid dumping
# too much data (eg. fastpath)
# You should then scp the data over to the target host manually, by running:
# $ scp * clickhouse-instance2:/var/lib/clickhouse/ooni-dumps/
TABLES=(
"fastpath"
"jsonl"
"url_priorities"
"citizenlab"
"citizenlab_flip"
"test_groups"
"accounts"
"session_expunge"
"msmt_feedback"
"fingerprints_dns"
"fingerprints_http"
"asnmeta"
"counters_test_list"
"counters_asn_test_list"
"incidents"
"oonirun"
)

dump_dir="./dumps"
current_date=$(date +%Y%m%d)

# Directory to store the dumps
mkdir -p "$dump_dir"

# Iterate over each table
for table in "${TABLES[@]}"; do
# Define file names for schema and data dump
schema_file="${dump_dir}/${current_date}-${table}_schema.sql"
data_file="${dump_dir}/${current_date}-${table}_dump.clickhouse"

# Dump the table schema
echo "[+] dumping schema $schema_file"
clickhouse-client --query="SHOW CREATE TABLE ${table} FORMAT TabSeparatedRaw" > "$schema_file"

# Dump the table data in ClickHouse native format
echo "[+] dumping table data $data_file"
clickhouse-client --query="SELECT * FROM ${table} INTO OUTFILE '${data_file}' FORMAT Native"
done
34 changes: 34 additions & 0 deletions scripts/restore-dumps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Restore schema and sampled dumps to running clickhouse instance
# This script is to be run manually from with CWD set to contains the dumps and
# schema files generated from `dump-tables-ch.sh`
# See dump-tables-ch.sh for instruction on it's usage
for schema_file in *schema.sql;do
cat $schema_file | clickhouse-client;
done

dump_ts="20240202"
TABLES=(
"jsonl"
"url_priorities"
"citizenlab"
"citizenlab_flip"
"test_groups"
"accounts"
"session_expunge"
"msmt_feedback"
"fingerprints_dns"
"fingerprints_http"
"asnmeta"
"counters_test_list"
"counters_asn_test_list"
"incidents"
"oonirun"
)
for table in "${TABLES[@]}"; do
echo "Restoring ${table}"
cat ${dump_ts}-${table}_dump.clickhouse | clickhouse-client --query="INSERT INTO ${table} FORMAT Native"
done

echo "Restoring fastpath"
gzip -cd 20240109T1314-fastpath.clickhouse.gz | clickhouse-client --query="INSERT INTO fastpath FORMAT Native"

0 comments on commit 60accae

Please sign in to comment.