-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetcher.mjs
executable file
·75 lines (59 loc) · 1.93 KB
/
fetcher.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!node_modules/.bin/zx
import { argv, fs, path, sleep } from 'zx';
import _ from 'lodash';
import { spawn } from 'child_process';
import { parseHTML } from 'linkedom';
const listwlcc = [
"BE", "BG", "CZ", "DK", "DE", "EE", "IE",
"GR", "ES", "FR", "HR", "IT", "CY", "LV",
"LT", "LU", "HU", "MT", "NL", "AT", "PL",
"PT", "RO", "SI", "SK", "FI", "SE"
];
for (const TLC of listwlcc) {
const url = `https://www.europarl.europa.eu/meps/en/search/advanced?name=&euPoliticalGroupBodyRefNum=&countryCode=${TLC}&bodyType=ALL`;
console.log(url);
const destpath = path.join('MEPs', `${TLC}.html`);
if(fs.existsSync(destpath)) {
console.log(`File ${destpath} exists, skipping`);
continue;
}
console.log(`Fetching via CURL ${url}`);
await spawn("curl", [
"-o",
destpath,
url
]);
}
for (const TLC of listwlcc) {
const sourcepage = path.join('MEPs', `${TLC}.html`);
const pplsdir = path.join('MEPs', `ppls-${TLC}`);
const pplsdirjson = path.join('MEPs', `ppls-${TLC}.json`);
fs.ensureDir(pplsdir);
const htmlpage = await fs.readFile(sourcepage, 'utf-8');
const {
window, document, customElements,
HTMLElement,
Event, CustomEvent
} = parseHTML(htmlpage);
const y = document.querySelectorAll('.erpl_member-list-item');
const mepdets = _.map(y, function(a) {
const name = a.querySelector(".erpl_title-h4").textContent;
const urlimg = a.querySelector('img[loading="lazy"]').getAttribute('src');
const infos = a.querySelectorAll(".sln-additional-info");
if(infos.length !== 3) {
console.log(`Odd: ${infos.length} size`);
process.exit(1);
}
const mep = {
name,
urlimg,
group: infos[0].textContent,
nation: infos[1].textContent,
TLC,
party: infos[2].textContent
}
return mep;
});
fs.writeFileSync(pplsdirjson, JSON.stringify(mepdets, null, 2), 'utf-8');
console.log(`Saved ${mepdets.length} MEPs info in ${pplsdirjson}`);
}