-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.js
44 lines (41 loc) · 1.82 KB
/
main.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const axios = require('axios');
const cheerio = require ('cheerio');
const fs = require('fs');
const baseUrl = 'https://www.idealista.com';
const initialUrl = '/alquiler-viviendas/madrid/centro/sol/';
function scrapeUrl(url, items=[]) {
console.log('requesting', url);
return axios.get(url)
.then(response => {
const $ = cheerio.load(response.data);
const pageItems = $('.items-container .item').toArray()
.map(item => {
const $item = $(item);
return {
id: $item.attr('data-adid'),
title: $item.find('.item-link').text(),
link: baseUrl + $item.find('.item-link').attr('href'),
image: $item.find('.gallery-fallback img').attr('data-ondemand-img'),
price: $item.find('.item-price').text(),
rooms: $item.find(".item-detail small:contains('hab.')").parent().text(),
squareMeters: $item.find(".item-detail small:contains('m²')").parent().text(),
};
});
const allItems = items.concat(pageItems);
console.log(pageItems.length,'items retrieved', allItems.length, 'acumulated');
const nextUrl = $('.pagination .next a').attr('href');
return nextUrl ? scrapeUrl(baseUrl + nextUrl, allItems) : allItems;
})
.catch(error => {
console.log('error', error);
return items;
});
}
scrapeUrl(baseUrl + initialUrl)
.then(items => {
console.log('finish!');
fs.writeFile('./items.json', JSON.stringify(items), 'utf8', function(error) {
if (error) return console.log('error', error);
console.log(items.length, 'items saved');
});
});