forked from satoshipay/stellar-core-parallel-catchup
-
Notifications
You must be signed in to change notification settings - Fork 0
/
catchup.sh
executable file
·193 lines (157 loc) · 7.05 KB
/
catchup.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/bin/bash
# Hacky parallel catchup
# see also https://github.com/stellar/docs/blob/3d060c0f1afb2eaff8a4076f673a8688d36e4aa5/software/known-issues.md
set -eu
set -o pipefail
if [ "$#" -ne 5 ]; then
echo "Usage: ./catchup.sh DOCKER_COMPOSE_FILE LEDGER_MIN LEDGER_MAX CHUNK_SIZE WORKERS"
exit 1
fi
DOCKER_COMPOSE_FILE=$1
LEDGER_MIN=$2
LEDGER_MAX=$3
CHUNK_SIZE=$4
WORKERS=$5
# temporary files, job queue, and locks
PREFIX=$(mktemp -u -t catchup-XXXX)
JOB_QUEUE=${PREFIX}-job-queue
JOB_QUEUE_LOCK=${PREFIX}-job-queue-lock
touch $JOB_QUEUE_LOCK
mkfifo $JOB_QUEUE
cleanup() {
rm $JOB_QUEUE
rm $JOB_QUEUE_LOCK
}
trap cleanup 0
log() {
echo "$(date +'%Y-%m-%d %H:%M:%S') $1"
}
run-catchup-job() {
JOB_ID=$1
CATCHUP_LEDGER_MIN=$2
CATCHUP_LEDGER_MAX=$3
JOB_LOG_FILE=${PREFIX}-job-${JOB_ID}.log
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} up -d stellar-core-postgres
sleep 30
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} run stellar-core stellar-core new-db --conf /stellar-core.cfg 2>&1 > $JOB_LOG_FILE
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} run stellar-core stellar-core new-hist local --conf /stellar-core.cfg 2>&1 >> $JOB_LOG_FILE
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} run stellar-core stellar-core catchup $CATCHUP_LEDGER_MAX/$(($CATCHUP_LEDGER_MAX - $CATCHUP_LEDGER_MIN)) --conf /stellar-core.cfg 2>&1 >> $JOB_LOG_FILE
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} run stellar-core stellar-core publish --conf /stellar-core.cfg 2>&1 >> $JOB_LOG_FILE
# free up resources (ram, networks), volumes are retained
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} down
touch ${PREFIX}-job-${JOB_ID}-finished
}
worker() {
WORKER=$1
exec 201<$JOB_QUEUE
exec 202<$JOB_QUEUE_LOCK
touch ${PREFIX}-worker-$WORKER-started
log "Worker $WORKER: started."
while true; do
flock 202
read -u 201 JOB_ID JOB_LEDGER_MIN JOB_LEDGER_MAX || { log "Worker $WORKER: finished."; exit 0; }
flock -u 202
log "Worker $WORKER: starting job $JOB_ID (ledgers ${JOB_LEDGER_MIN}–${JOB_LEDGER_MAX})."
run-catchup-job $JOB_ID $JOB_LEDGER_MIN $JOB_LEDGER_MAX
log "Worker $WORKER: finished job $JOB_ID (ledgers ${JOB_LEDGER_MIN}–${JOB_LEDGER_MAX})."
done
}
# start workers
for WORKER in $(seq 1 $WORKERS); do
worker $WORKER &
done
MAX_JOB_ID=$(( ( LEDGER_MAX - LEDGER_MIN ) / CHUNK_SIZE ))
if [ "$(( LEDGER_MIN - 1 + MAX_JOB_ID * CHUNK_SIZE ))" -lt "$LEDGER_MAX" ]; then
MAX_JOB_ID=$(( MAX_JOB_ID + 1 ))
fi
log "Running $MAX_JOB_ID jobs with $WORKERS workers"
# job producer
{
exec 201>$JOB_QUEUE
log "wait for workers"
# wait for workers to start
for WORKER in $(seq 1 $WORKERS); do
while [ ! -f ${PREFIX}-worker-$WORKER-started ]; do
sleep 1
done
done
# produce jobs
for JOB_ID in $(seq 1 $MAX_JOB_ID); do
JOB_LEDGER_MIN=$(( LEDGER_MIN + (JOB_ID - 1) * CHUNK_SIZE))
JOB_LEDGER_MAX=$(( LEDGER_MIN - 1 + JOB_ID * CHUNK_SIZE ))
if [ "$JOB_LEDGER_MAX" -ge "$LEDGER_MAX" ]; then
JOB_LEDGER_MAX=$LEDGER_MAX
fi
echo "$JOB_ID $JOB_LEDGER_MIN $JOB_LEDGER_MAX" >& 201
done
exec 201<&-
} &
# merge results
log "Starting result database..."
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result up -d stellar-core-postgres
sleep 60
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result run stellar-core stellar-core new-db --conf /stellar-core.cfg
# wipe data to prevent conflicts with job 1
for TABLE in ledgerheaders txhistory txfeehistory upgradehistory; do
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result exec -T stellar-core-postgres \
psql stellar-core postgres -c "DELETE FROM $TABLE"
done
for JOB_ID in $(seq 1 $MAX_JOB_ID); do
log "Waiting for job $JOB_ID..."
while [ ! -f ${PREFIX}-job-${JOB_ID}-finished ]; do
sleep 10
done
rm -f ${PREFIX}-job-${JOB_ID}-finished
log "Job $JOB_ID finished, recreating database container..."
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} up -d stellar-core-postgres
sleep 15
JOB_LEDGER_MIN=$(( LEDGER_MIN + (JOB_ID - 1) * CHUNK_SIZE))
JOB_LEDGER_MAX=$(( LEDGER_MIN - 1 + JOB_ID * CHUNK_SIZE ))
if [ "$JOB_LEDGER_MAX" -ge "$LEDGER_MAX" ]; then
JOB_LEDGER_MAX=$LEDGER_MAX
fi
if [ "$JOB_ID" != "1" ]; then
log "Match last hash of result data with previous hash of the first ledger of job $JOB_ID"
LAST_RESULT_LEDGER=$(( JOB_LEDGER_MIN - 1))
LAST_RESULT_HASH=$(docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result exec stellar-core-postgres psql -t stellar-core postgres -c "SELECT ledgerhash FROM ledgerheaders WHERE ledgerseq = $LAST_RESULT_LEDGER")
PREVIOUS_JOB_HASH=$(docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} exec stellar-core-postgres psql -t stellar-core postgres -c "SELECT prevhash FROM ledgerheaders WHERE ledgerseq = $JOB_LEDGER_MIN")
if [ "$LAST_RESULT_HASH" != "$PREVIOUS_JOB_HASH" ]; then
log "Last result hash $LAST_RESULT_HASH (ledger $LAST_RESULT_LEDGER) does not match previous hash $PREVIOUS_JOB_HASH of first ledger of job $JOB_ID (ledger $JOB_LEDGER_MIN)"
exit 1
fi
fi
log "Merging database of job $JOB_ID in result database..."
for TABLE in ledgerheaders txhistory txfeehistory upgradehistory; do
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} exec -T stellar-core-postgres \
psql stellar-core postgres -c "COPY (SELECT * FROM $TABLE WHERE ledgerseq >= $JOB_LEDGER_MIN AND ledgerseq <= $JOB_LEDGER_MAX) TO STDOUT WITH (FORMAT BINARY)" |
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result exec -T stellar-core-postgres \
psql stellar-core postgres -c "COPY $TABLE FROM STDIN WITH (FORMAT BINARY)"
done
if [ "$JOB_ID" = "$MAX_JOB_ID" ]; then
log "Copy state from job $JOB_ID to result database..."
for TABLE in accountdata accounts ban offers peers publishqueue pubsub quoruminfo scphistory scpquorums storestate trustlines; do
# wipe existing data
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result exec -T stellar-core-postgres \
psql stellar-core postgres -c "DELETE FROM $TABLE"
# copy state
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} exec -T stellar-core-postgres \
psql stellar-core postgres -c "COPY $TABLE TO STDOUT WITH (FORMAT BINARY)" |
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-result exec -T stellar-core-postgres \
psql stellar-core postgres -c "COPY $TABLE FROM STDIN WITH (FORMAT BINARY)"
done
fi
log "Merging history of job $JOB_ID..."
docker container create --name catchup-job-${JOB_ID} -v catchup-job-${JOB_ID}_core-data:/data hello-world
docker cp catchup-job-${JOB_ID}:/data/history ./history-${JOB_ID}
if [ "$JOB_ID" = "$MAX_JOB_ID" ]; then
log "Copy all data from job ${JOB_ID}..."
docker cp catchup-job-${JOB_ID}:/data ./data-result
fi
docker rm catchup-job-${JOB_ID}
rsync -a ./history-${JOB_ID}/ ./history-result/
rm -rf ./history-${JOB_ID}
# clean up job containers and volumes
docker-compose -f $DOCKER_COMPOSE_FILE -p catchup-job-${JOB_ID} down -v
done
wait
log "Done"