-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy patheuroparl-meps-collect
executable file
·59 lines (55 loc) · 1.76 KB
/
europarl-meps-collect
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
# Collect all websites and social media for MEPs based on https://www.europarl.europa.eu/meps/en/full-list/all
# Writes to several file descriptors:
# - Info about what it's doing to stderr
# - Extracted URLs to FD 3
# - Warnings about EP Newshub links to FD 4
# https://unix.stackexchange.com/a/206848
if ! { >&3; } 2>/dev/null
then
echo "Error: FD 3 not open" >&1
exit 1
fi
if ! { >&4; } 2>/dev/null
then
echo "Error: FD 4 not open" >&1
exit 1
fi
scriptpath="$(cd "$(dirname "$0")"; pwd -P)"
export PATH="${scriptpath}:${PATH}"
echo "Fetching MEP list" >&1
curl-archivebot-ua -s "https://www.europarl.europa.eu/meps/en/full-list/all" | \
grep -Po '<a class="ep_content" href="\K/meps/en/\d+(?=")' | \
while read -r profileUrl
do
profileUrl="https://www.europarl.europa.eu${profileUrl}"
echo "Fetching ${profileUrl}" >&1
profilePage="$(curl-archivebot-ua -sL "${profileUrl}")"
mapfile -t urls < <(tr -d '\r\n' <<< "${profilePage}" | \
grep -Po '<div class="ep-a_share ep-layout_socialnetwok">.*?</ul>' | \
grep -Po '<a\s+([^>]*\s+)?href="\K(?!mailto:)[^"]+')
# Classification
for url in "${urls[@]}"
do
if [[ "${url}" =~ //((www|[a-z][a-z]-[a-z][a-z])\.)?facebook\.com/ ]]
then
echo "Facebook: ${url}"
elif [[ "${url}" =~ //(www\.)?instagram\.com/ ]]
then
echo "Instagram: ${url}"
elif [[ "${url}" =~ //(www\.)?twitter\.com/ ]]
then
echo "Twitter: ${url}"
elif [[ "${url}" =~ //([^/]+\.)?youtube\.com/ || "${url}" =~ //youtu\.be/ ]]
then
echo "YouTube: ${url}"
else
echo "Other: ${url}"
fi
done >&3
# Check if there's a newshub mention and print a warning about that if necessary
if grep -q 'container_header_newshub' <<< "${profilePage}"
then
echo "Has EP Newshub link: ${profileUrl}" >&4
fi
done