-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsetup_etl_web_server.sh
66 lines (51 loc) · 2.43 KB
/
setup_etl_web_server.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/usr/bin/env bash
set -e
# Make processed data directory
mkdir -p data/processed
# ------------------------------------------------------------------------
# Check if the ETL pipeline process is already running
if pgrep -f etl_pipeline.py >/dev/null; then
# Grab the PID of the ETL pipeline process
pid=$(pgrep -f etl_pipeline.py)
echo -e "ETL pipeline is already running. Skipping the startup command. \nYou can kill the process by executing: kill $pid"
else
# -----------------------------------------------------------------------
# Calculate the delay until the start of the next minute
current_seconds=$(date +%s)
next_minute_seconds=$(( (current_seconds / 60 + 1) * 60 ))
delay_seconds=$((next_minute_seconds - current_seconds))
# Get the current timestamp
timestamp=$(date +"%Y-%m-%d %H:%M:%S")
# Echo the sleep duration, timestamp, and reason to console
echo "[$timestamp] Sleeping for $delay_seconds seconds before starting the ETL pipeline to sync with the data source refresh schedule."
# Define the characters for the rolling cursor animation
cursor_chars=("◐" "◓" "◑" "◒")
# Calculate the duration for each frame
frame_duration=$((delay_seconds / ${#cursor_chars[@]}))
# Sleep until the start of the next minute
for ((i = 0; i < delay_seconds; i++)); do
# Calculate the index of the current cursor character
animation_index=$((i / frame_duration % ${#cursor_chars[@]}))
# Print the current cursor character and timestamp
timestamp=$(date +"%Y-%m-%d %H:%M:%S")
echo -ne "[$timestamp] ${cursor_chars[animation_index]}\r"
# Sleep for 1 second
sleep 1
done
# Start the ETL pipeline in the background and append stdout to output.log
nohup nice -n 10 python3 -u etl_pipeline.py >> output.log 2>&1 &
# Echo message before the sleep command
echo "Waiting for a few seconds to allow the process to start..."
# Sleep for a few seconds to allow the process to start
sleep 5
# Grab the PID of the ETL pipeline process
pid=$(pgrep -f etl_pipeline.py)
# Check if the process is running
if [ -n "$pid" ]; then
echo -e "ETL pipeline is running. PID: $pid \nYou can kill the process by executing: kill $pid"
# Monitor resource allocation using ps
ps -p "$pid" -o user,pid,ppid,ni,time,state,start,%cpu,%mem
else
echo "ETL pipeline failed to start."
fi
fi