This repository has been archived by the owner on Mar 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathnewsapi.js
85 lines (73 loc) · 2.66 KB
/
newsapi.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
const axios = require("axios");
const { Readability } = require("@mozilla/readability");
const { JSDOM } = require('jsdom');
const fs = require('fs')
const english = /^[a-zA-Z0-9]+$/;
const queries = []
const countries = [];
const createDOMPurify = require('dompurify');
const apiKeys = [];
const getURL = (data) => `https://newsapi.org/v2/${data?.category}?sortBy=publishedAt&apiKey=${apiKeys[Math.floor(Math.random() * apiKeys.length)]}${data?.query ? `&domains=${data.query}` : ''}${data?.country ? `&country=${data.country}` : ''}`;
const cache = require('./cache.js');
const baseURL = '';
const regex = (content) => {
return content.replace(/\s+/g, " ").replace(/(<([^>]+)>)/gi, "")
};
const contentFilter = (link, contentData) => {
try {
let dom = new JSDOM(contentData, {
url: link
});
let article = new Readability(dom.window.document).parse();
const DOMPurify = createDOMPurify(new JSDOM('').window);
const finalContent = DOMPurify.sanitize(article.textContent);
return `${regex(finalContent)}`;
}catch(e) {}
}
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
const allArticle = async (doCache = true, {n = 100} = {}) => {
var allData = [];
for(URL in queries) {
var content = (await axios.get(getURL({ category: 'everything', query: queries[URL] }))).data;
var data = content["articles"]
for(i in data) {
cache.set(`${data[i]?.title?.split(" ").join("-")}`, `${data[i].url}`);
data[i] = {
...data[i],
source: undefined,
content: undefined,
author: undefined,
urlToImage: undefined,
url: `/article/${data[i]?.title?.split(" ").join("-")}`,
link: `${baseURL}/article/${data[i]?.title?.split(" ").join("-")}`
}
data[i].description = regex(data[i].description);
}
allData = allData.concat(data);
if(allData && doCache) {
for(i in allData) {
try {
await wait(5000);
var contentRaw = await axios.get(allData[i].url);
cache.set(`${data[i]?.title?.replace(" ").join("-")}`, `${contentFilter(allData[i].url, contentRaw.data)}`);
}catch(e) {}
}
}
};
return allData;
};
const getArticle = async (id) => {
try {
var data = cache.get(id);
if(!data)return undefined;
if(data.startsWith('https://')){
const content = await axios.get(`${data}`);
const articleContent = contentFilter(data, content.data);
cache.set(`${id}`, `${articleContent}`);
return articleContent;
}else {
return data;
}
}catch(e) {}
}
module.exports = { allArticle, getArticle };