-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
122 lines (109 loc) · 2.9 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import * as cheerio from 'cheerio';
import express from 'express';
import fetch from 'node-fetch';
const port = 5000;
const app = express();
// patterns used to detect type of paragraph
const legislativePattern = /Loi n°/;
const regulatoryPattern = /Décret exécutif n°/;
const mainUrl = 'https://www.me.gov.dz/fr/';
async function extractData(name) {
// fetch the website
const data = {
legislative: [],
regulatory_texts: [],
};
const response = await fetch(
`${mainUrl}${name}`
);
const body = await response.text();
const $ = cheerio.load(body);
// container of all paragraphs
const mainContainer = $(
'.elementor-element-b7198e9'
);
// suppose that paragraphs are in p element
mainContainer.find('p').each((i, e) => {
// suppose that legislatives are in p element
if (legislativePattern.test($(e).text())) {
data.legislative.push({
description: $(e).text(),
link: $(e).find('a').attr('href'),
});
}
// suppose that regulatory texts are in ul element
if (regulatoryPattern.test($(e).text())) {
// check if the text has ul element, hence add stops ( arretes )
if ($(e).next().is('ul')) {
const stops = [];
$(e)
.next()
.children()
.each((i, li) => {
stops.push({
description: $(li).text(),
link: $(li).find('a').attr('href'),
});
});
data.regulatory_texts.push({
description: $(e).text(),
link: $(e).find('a').attr('href'),
stops: stops,
});
} else {
data.regulatory_texts.push({
description: $(e).text(),
link: $(e).find('a').attr('href'),
});
}
}
});
// suppose that paragraphs are in div / ol
// suppose that legislative are in div
mainContainer
.find('div.elementor-widget-container')
.each((i, e) => {
if (legislativePattern.test($(e).text())) {
data.legislative.push({
description: $(e)
.text()
.match(/./g)
.filter((c) => !/[\t|\n]/.test(c))
.join(''),
link: $(e).find('a').attr('href'),
});
}
});
mainContainer
.find('div.elementor-widget-container')
.find('ol')
.find('li')
.each((i, e) => {
if (regulatoryPattern.test($(e).text())) {
data.regulatory_texts.push({
description: $(e)
.text()
.match(/./g)
.filter((c) => !/[\t|\n]/.test(c))
.join(''),
link: $(e).find('a').attr('href'),
});
}
});
return data;
}
app.get('/', (req, res) => {
res.send('Read docs for more infos');
});
app.get(
'/:category',
async (req, res) => {
const data = await extractData(
req.params.category
);
res.send(data);
}
);
app.listen(port, () =>
console.log(`Server Started on port : ${port}`)
);