forked from rfmiotto/parallel-remote-rsync
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parallel_transfer
111 lines (92 loc) · 4.24 KB
/
parallel_transfer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/bin/bash
: '
This bash script automatize the data transfer from one remote server to
a local host. The transfering is make in a parallel fashion, since usually
the internet rate transfer is the bottleneck.
DEPENDENCIES: GNU parallel and rsync
Before using this script, make sure you have a SSH passwordless login.
INSTRUCTIONS:
1) Generate authentication keys on your local machine:
$ ssh-keygen
2) Copy public key to the remote server:
$ ssh-copy-id -i ~/.ssh/id_rsa.pub UserName@RemoteServer
(ssh-copy-id connects to the remote machine and install your public key by
adding it the authorized_keys file.)
3) Add a private key to the authentication agent on the local server:
$ ssh-add
Author: Renato Fuzaro Miotto Date: Aug 04 2019
'
################################################################################
# INPUTS #
################################################################################
remote_serv="user@server.edu"
remote_dir="/home/miotto/teste_script"
local_dir="/home/miotto/Desktop/TESTE_BASH/local"
ignore_files=(*.o *.mod *.a *.h *.cgns) # formats that won't be transfered
nrstfiles=3 # number of last rstfiles to be transfered
nthreads=4 # Number of parallel process
deleteFromRemote=false
simulationIsRunning=true # If true, ignore the last modified file
compress=false # set as true only if your bandwidth is really bad. Compression takes time.
oldest_1st=false # set as true to transfer data ranked by the oldest to the newest (use `false` otherwise)
################################################################################
function adjust_format () {
: 'Add a slash at the end of a directory path, in case it doesnt have it'
local arg=$1
if [[ ${arg: -1} != '/' ]]; then
echo $arg/
else
echo $arg
fi
}
remote_dir=$(adjust_format "$remote_dir")
local_dir=$(adjust_format "$local_dir")
# Ignore all restart files for now and add only the desired ones later
ignore_files+=(rstfile.*)
# rstfiles=($(ssh -q $remote_serv find $remote_dir -type f -name "rstfile.*" | sort -r | head -$nrstfiles))
rstfiles=($(ssh -q $remote_serv find $remote_dir -type f -name "rstfile.*" -exec ls -t {} + | head -$nrstfiles))
rel_rstfiles=(${rstfiles[@]##$remote_dir}) # relative path
# If the simulation is running, we don't want to transfer a file that is being written.
if [ simulationIsRunning = true ]; then
lastModified=($(ssh -q $remote_serv find $remote_dir -type f -name *.cgns -printf '"%T@ %p\n"' | sort -n | tail -1 | cut -f2- -d" "))
lastModified=$(echo ${lastModified##*/}) # get only the filename
ignore_files+=($lastModified)
fi
# Create an array to be expanded in the bash find command. This array specifies
# that the formats passed in the input section of this script should be ignored
# by the find command.
prefix='! -name '
ignore_files=($(printf "\"%s\" " "${ignore_files[@]}")) # add double quotes in each element
ignore=("${ignore_files[@]/#/${prefix}}") # include prefix
# Flag to search for files based on last modified time
if [ $oldest_1st = true ]; then
flags='-tr'
else
flags='-t'
fi
# Search for all files but those whose formats were ignored
# selected_files=($(ssh -q $remote_serv find $remote_dir -type f ${ignore[@]} | sort))
selected_files=($(ssh -q $remote_serv find $remote_dir -type f ${ignore[@]} -exec ls $flags {} +))
rel_selected_files=(${selected_files[@]##$remote_dir}) # relative path
# Add the restart files to the selected files (relative path)
rel_selected_files+=(${rel_rstfiles[@]})
if [ -z $rel_selected_files ]; then
# No files to be transferred
exit 1
fi
# Prepare "transfer.log" temporary file that specifies the commands to be used
# in the GNU parallel.
printf '%s\n' "${rel_selected_files[@]}" > transfer.log
deleteFlag=''
if [ $deleteFromRemote = true ]; then
deleteFlag='--remove-source-files'
fi
flags='-am -vv '$deleteFlag
if [ $compress = true ]; then
flags='-azm -vv '$deleteFlag
fi
prefix='rsync '"$flags"' --stats --relative --safe-links --human-readable -e "ssh -q" '"$remote_serv"':'"$remote_dir./"
suffix=" $local_dir"
sed -i -e "s~^~$prefix~" transfer.log
sed -i -e "s~.*~&$suffix~" transfer.log
cat transfer.log | parallel --will-cite -j $nthreads {} > result.log