-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscrap.js
executable file
·117 lines (102 loc) · 3.26 KB
/
scrap.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env node
const csv = require('csvtojson');
const dateFormat = require('dateformat');
const xlsx = require('xlsx');
const facebook = require('./networks/facebook');
const instagram = require('./networks/instagram');
const youtube = require('./networks/youtube');
(async () => {
try {
let inputFile, outputFile;
for(let i = 0; i < process.argv.length; ++i) {
if(process.argv[i] == '--input')
inputFile = process.argv[i + 1];
if(process.argv[i] == '--output')
outputFile = process.argv[i + 1];
}
if(!inputFile || !outputFile || !inputFile.endsWith('.csv') || !outputFile.endsWith('.xlsx')) {
console.log('Usage: scrap-social-networks --input in.csv --output out.xlsx');
process.exit();
}
const inputData = await csv({delimiter: ';'}).fromFile(inputFile),
accountsArray = [],
postsArray = [];
// headers
accountsArray.push([dateFormat(new Date(), 'dd/mm/yyyy hh:mm'), 'Facebook / Page URL', 'Facebook / Like count', 'Instagram / Account URL', 'Instagram / Follower count', 'Instagram / Post count', 'Youtube / Channel URL', 'Youtube / Video count', 'Youtube / View count', 'Youtube / Subscriber count']);
postsArray.push(['User', 'URL', 'Date de publication', 'Type', 'View count', 'Like count', 'Dislike count', 'Favorite count', 'Comment count', 'Engagement']);
let data;
for(let row of inputData) {
if(row['username Facebook']) {
console.log(`${row['PrenomNom']} : retrieving Facebook page likes...`);
data = await facebook.getFacebookPageLikes(row['username Facebook']);
accountsArray.push([
row['PrenomNom'],
'https://www.facebook.com/' + row['username Facebook'],
data
]);
}
if(row['username Instagram']) {
console.log(`${row['PrenomNom']} : retrieving Instagram account data...`);
data = await instagram.retrieveInfluencerData(row['username Instagram']);
accountsArray.push([
row['PrenomNom'],
'',
'',
data.url,
data.followerCount,
data.postCount
]);
for(let post of data.posts) {
postsArray.push([
row['PrenomNom'],
post.url,
dateFormat(post.publicationDate, 'dd/mm/yyyy hh:mm'),
post.type,
'',
post.likeCount,
'',
'',
post.commentCount,
post.engagement
]);
}
}
if(row['Chaine Youtube']) {
console.log(`${row['PrenomNom']} : retrieving Youtube channel data...`);
data = await youtube.retrieveChannelData(row['Chaine Youtube']);
accountsArray.push([
row['PrenomNom'],
'',
'',
'',
'',
'',
data.url,
data.videoCount,
data.viewCount,
data.subscriberCount
]);
for(let video of data.videos) {
postsArray.push([
row['PrenomNom'],
video.url,
dateFormat(video.publicationDate, 'dd/mm/yyyy hh:mm'),
'Youtube',
video.viewCount,
video.likeCount,
video.dislikeCount,
video.favoriteCount,
video.commentCount,
video.engagement
]);
}
}
}
const workbook = xlsx.utils.book_new();
xlsx.utils.book_append_sheet(workbook, xlsx.utils.aoa_to_sheet(accountsArray), 'accounts');
xlsx.utils.book_append_sheet(workbook, xlsx.utils.aoa_to_sheet(postsArray), 'posts');
xlsx.writeFile(workbook, outputFile);
} catch(e) {
console.error(e);
}
})();