-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcurl_dir
executable file
·232 lines (184 loc) · 6.95 KB
/
curl_dir
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/bin/bash
#
# curl_dir
# A curl wrapper to transfer (get) all [matched] files from a directory.
#
# Synopsis:
# curl_dir [options] ftpDir [saveDir]
#
# Usage:
# curl_dir --help
#
# History:
# Nov 2013. Created. Yaswant Pradhan
# Dec 2014. Added batch download function + some optimisation. YP.
# Jan 2015. Deal with http:// directory too (use --filter option). YP.
# Jul 2018. Parse URL encoding (replace %chars) and bug fix. YP.
# Sep 2018. Add --dry-run option. YP.
# -----------------------------------------------------------------------------
BLDON=$(tput bold 2>/dev/null)
BLDOF=$(tput sgr0 2>/dev/null)
# utf2asc(){ iconv -c -f utf-8 -t ascii $1; }
msg() { echo -e "$(date -u +'%F %R:%S') $@"; }
hr() { for i in {1..38}; do echo -e "-\c"; done; echo -e "\n"; }
decode_url() { ## replace % encoding w/ approp characters from a url listfile
tmp=$(mktemp $(basename $0).flist.XXXX --tmpdir=${TMPDIR}/${USER})
sed 's@+@ @g;s@%@\\x@g' $1 | xargs -0 printf "%b" > $tmp
mv $tmp $1
}
urlstat() { ## check URL validity
echo "$1" | grep "tp\:\/\/" &>/dev/null
echo $?
}
usage(){
cat << EOF
${BLDON}NAME${BLDOF}
${0##*/}
A cURL wrapper to trasfer (fetch) all files from a ftp directory.
Existing files are updated (see curl -C flag) by automatically finding
where to resume the transfer.
${BLDON}SYNOPSIS${BLDOF}
${0##*/} [OPTION] ftpDir [saveDir]
${BLDON}DESCRIPTION${BLDOF}
Options:
-h, --help, -?
show this message.
-b, --batch <num>
Number of files in each batch. Download will forward to next batch
once all files in the previous batch are finished. (default: 10 files)
-k, --keep
Keep the temporary listFile for diagnostics purpose
(default action is to auto clean the tmp file)
-f, --filter <pat>
Filter the files with regular expression (advisable)
-fx <pat>
Filter the files with file suffix
-t, --retry <num>
Retry request <num>-times if transient problems occur. (default: 3)
-m, --maxtime <sec>
Maximum time in <seconds> that you allow the whole operation to
take. This is useful to preventing your batch jobs from hanging
over hours due to slow networks or links going down. If this option
is used several times, the last one will be used. (default: 5 min)
-n, --dry-run
Print a list of [matching] files on the server. Dont download yet.
-v, --verbose
Make the operation more talkative. (default: quiet)
-x, --proxy <proxyhost[:port]>
Use the specified HTTP proxy. Default is
<ftpDir> URL/PATH
Full Path to ftp directory including server address.
<saveDir> optional
saveDir is the directory where all other files and subdirectories
will be saved to, i.e. the top of the retrieval tree. The default
is base directory of the ftp address (in the current directory)
${BLDON}COPYRIGHT${BLDOF}
Copyright (C) 2011 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later
<http://gnu.org/licenses/gpl.html>.
This is free software: you are free to change and redistribute it. There is NO WARRANTY,
to the extent permitted by law.
${BLDON}AUTHOR${BLDOF}
Written by Yaswant Pradhan.
${BLDON}SEE ALSO${BLDOF}
curl(1).
EOF
}
#------------------------------------------------------------------------------
# Parse default settings
OPT= # cURL options
FILTER="" # Download files with matching regular pattern
BATCH=10 # Number of files in each batch
TRIES=3 # Number of retries for each file
MTIME=300 # Maximum time in seconds for each operation
quiet=1 # quiet mode
KEEPTMP= # Keep temporary fileList
PROXY=
DRY_RUN=0
while [[ $1 == -* ]]; do
case "$1" in
-h|--help|-\?) usage | more; exit 0 ;;
-b|--batch) BATCH="$2"; shift 2 ;;
-f|--filter) FILTER="$2"; shift 2 ;;
-fx|--fext) FILTER="$2"\$; shift 2;;
-k|--keep) KEEPTMP=1; shift 1 ;;
-m|--maxtime) MTIME="$2"; shift 2 ;;
-n|--dry-run) DRY_RUN=1; shift 1 ;;
-t|--tries) TRIES="$2"; shift 2 ;;
-v|--verbose) quiet=0; OPT="$OPT -v "; shift 1 ;;
-x|--proxy) PROXY="$2"; shift 2 ;;
--) shift; break ;;
-*) echo "invalid option: $1" 1>&2; usage | more; exit 1;;
esac
done
[[ "$quiet" -eq 1 ]] && OPT="$OPT -s "
# Parse arguments
[[ $# -lt 1 ]] && usage | more && exit 0
saveDir=${2:-${1##*/}}
# Create a temporary listfile (plain text) to download each items
# from the server recursively #fileList="/var/tmp/$USER/curl.files.list"
fileList=$(mktemp "${0##*/}".flist.XXXXXX --tmpdir="${TMPDIR}/${USER}")
[[ "$quiet" -eq 0 ]] && echo -e "Listfile: $fileList"
# Curl the FTP directory content (apply filters if necessary, see -f flag)
# and store the file names in $fileList
[ -n "$FILTER" ] && echo -e "File pattern: $FILTER"
# Check validity of the remote URL
curl -x $PROXY --fail ${1}/ &>/dev/null
[[ $? -ne 0 ]] && echo "E: invalid dir ${1}" && rm -f "$fileList" && exit 1
# Add files with prescribed filter in the list file
if [ -z "$FILTER" ]; then
curl -x $PROXY -s ${1}/ | tail -n+6 | grep -Po '(?<=href=").*' \
| cut -d'"' -f 1 > "$fileList"
else
curl -x $PROXY -s ${1}/ | grep -Po '(?<=href=").*' | cut -d'"' -f 1 \
| grep "$FILTER" > "$fileList"
fi
# Check number of files (in the list file) to download
nFiles=$(cat $fileList | wc -l)
[[ "$nFiles" -eq 0 ]] \
&& echo -e "\nE: $1 --Invalid URI or File Filter--\n" \
&& exit 1
[[ "$nFiles" -gt "$BATCH" && "$quiet" -eq 0 ]] \
&& echo -e "\nW: number of files to download $nFiles
I: batch_dnld ON; process will wait between each batch of $BATCH files\n"
decode_url "$fileList"
if (( DRY_RUN )); then
msg "[DRY-RUN] Listing $nFiles matching files on remote-server:"
# cat "$fileList" | sed 's!.*/!!' | sort | uniq | nl
sed 's!.*/!!' "$fileList" | sort | uniq | nl
rm -f "$fileList"
echo -e "\nNumber of matching files = $nFiles"
exit $?
fi
# Create a save Directory and cd to it where the files will be stored
mkdir -p "$saveDir" && cd "$saveDir" || exit $?
msg "Downloading $nFiles files to: $saveDir/"
cnt=0
# Download files listed in fileList (in batches of $BATCH files) --------------
while read -r file; do
printf "."
# Check if file is a valid URL
code=$(urlstat "$file")
if [[ "$code" -eq 0 ]]; then
# `file` includes full remote path
curl -x $PROXY -C - -OS --max-time $MTIME --retry $TRIES \
$OPT -L $file &
else
# `file` does not include remote address, add prefix to file
curl -x $PROXY -C - -OS --max-time $MTIME --retry $TRIES \
$OPT -L ${1}/${file##*/} &
fi
cnt=$((cnt + 1))
[[ $(( cnt % BATCH )) -eq 0 ]] && wait && echo -e " \c";
done < "$fileList"
wait
echo; msg "Download complete."; hr
# Housekeep -------------------------------------------------------------------
if [[ "$KEEPTMP" -eq 1 ]]; then
read -rp "Do you want to remove the listFile $fileList? " yn
case $yn in
[Yy]* ) rm -f "$fileList"; exit;;
[Nn]*| * ) exit;;
esac
else
rm -f "$fileList"
fi