diff --git a/client/public/imgs/ABCNews_icon.png b/client/public/imgs/ABCNews_icon.png new file mode 100644 index 0000000..9fc9092 Binary files /dev/null and b/client/public/imgs/ABCNews_icon.png differ diff --git a/client/public/imgs/BusinessInsider_icon.png b/client/public/imgs/BusinessInsider_icon.png new file mode 100644 index 0000000..eb04d18 Binary files /dev/null and b/client/public/imgs/BusinessInsider_icon.png differ diff --git a/client/public/imgs/Engadget_icon.png b/client/public/imgs/Engadget_icon.png new file mode 100644 index 0000000..ce582be Binary files /dev/null and b/client/public/imgs/Engadget_icon.png differ diff --git a/client/public/imgs/TheNewYorkTimes_icon.png b/client/public/imgs/TheNewYorkTimes_icon.png new file mode 100644 index 0000000..9c09325 Binary files /dev/null and b/client/public/imgs/TheNewYorkTimes_icon.png differ diff --git a/client/src/pages/Main.js b/client/src/pages/Main.js index 7f316c2..4d7ae87 100644 --- a/client/src/pages/Main.js +++ b/client/src/pages/Main.js @@ -161,7 +161,15 @@ class Main extends Component { sortByState = "MostRecent"; } - let allPlatforms = ["Reddit", "Twitter", "Facebook"]; + let allPlatforms = [ + "Reddit", + "Twitter", + "Facebook", + "Business Insider", + "The New York Times", + "ABC News", + "Engadget", + ]; let switchStates = []; allPlatforms.forEach((item) => { switchStates[item] = splitSelectedPlatforms.includes(item); @@ -512,7 +520,7 @@ class Main extends Component { {" "} diff --git a/server/.env.template b/server/.env.template index 58e8bc9..3383235 100644 --- a/server/.env.template +++ b/server/.env.template @@ -9,4 +9,6 @@ Reddit_userAgent= Reddit_clientId= Reddit_clientSecret= Reddit_refreshToken= +newsApiKey= +nyt_key= mentionsCrawlerPW= \ No newline at end of file diff --git a/server/crawlers/index.js b/server/crawlers/index.js index 2a250cd..f638972 100644 --- a/server/crawlers/index.js +++ b/server/crawlers/index.js @@ -1,9 +1,11 @@ -const redditCrawler = require("./redditCrawler"); -const twitterCrawler = require("./twitterCrawler"); -const nytCrawler = require('./nytCrawler') +const redditCrawler = require('./redditCrawler'); +const twitterCrawler = require('./twitterCrawler'); +const nytCrawler = require('./nytCrawler'); +const newsApiCrawler = require('./newsApiCrawler'); module.exports = { redditCrawler, twitterCrawler, - nytCrawler + nytCrawler, + newsApiCrawler, }; diff --git a/server/crawlers/newsApiCrawler.js b/server/crawlers/newsApiCrawler.js new file mode 100644 index 0000000..2f22285 --- /dev/null +++ b/server/crawlers/newsApiCrawler.js @@ -0,0 +1,44 @@ +const axios = require("axios"); +const moment = require("moment"); + +// url params +const lastWeek = moment().subtract(1, "weeks").format("YYYY-DD-MM"); +const sources = + "business-insider,cnn,abc-news,ars-technica,associated-press,cbs-news,engadget,fortune,hacker-news,techcrunch"; +const pageSize = 100; +const page = 1; +const key = process.env.newsApiKey; + +const createEndpoint = (company, source) => + `http://newsapi.org/v2/everything?q=${company}&sources=${source}&from=${lastWeek}&pageSize=${pageSize}&page=${page}&sortBy=publishedAt&apiKey=${key}`; + +module.exports = async function newsApiScrape(company) { + let output = []; + let url = createEndpoint(company, sources); + let { + data: { articles }, + } = await axios.get(url); + + output = articles.map((article) => { + const destructuredUrl = article.url.split("/"); + // get the last portion of the destructured url and use it as the mention's id + const [id] = destructuredUrl.slice(-1); + const summary = article.description + ? article.description.replace(/(<([^>]+)>)/gi, "").trim() // need to relieve html tagging and extra white space + : ""; + + return { + id, + summary, + image: article.urlToImage, + title: article.title, + content: article.content, + date: article.publishedAt, + platform: article.source.name, + url: article.url, + popularity: 0, // temporary val + }; + }); + + return output; +}; diff --git a/server/models/mention.js b/server/models/mention.js index 4b1adf4..f7927de 100644 --- a/server/models/mention.js +++ b/server/models/mention.js @@ -1,9 +1,9 @@ -const { DataTypes } = require("sequelize"); -const db = require("../db"); +const { DataTypes } = require('sequelize'); +const db = require('../db'); -const Mention = db.define("Mention", { +const Mention = db.define('Mention', { id: { - type: DataTypes.STRING, + type: DataTypes.TEXT, primaryKey: true, }, title: { @@ -20,13 +20,13 @@ const Mention = db.define("Mention", { defaultValue: Date.now(), }, imageUrl: { - type: DataTypes.STRING, + type: DataTypes.TEXT, }, popularity: { type: DataTypes.INTEGER, }, url: { - type: DataTypes.STRING, + type: DataTypes.TEXT, }, summary: { type: DataTypes.TEXT, diff --git a/server/models/userMentions.js b/server/models/userMentions.js index 44369fe..a882f01 100644 --- a/server/models/userMentions.js +++ b/server/models/userMentions.js @@ -1,22 +1,22 @@ -const { DataTypes } = require("sequelize"); -const db = require("../db"); -const User = require("./user"); -const Mention = require("./mention"); +const { DataTypes } = require('sequelize'); +const db = require('../db'); +const User = require('./user'); +const Mention = require('./mention'); -const UserMentions = db.define("UserMentions", { +const UserMentions = db.define('UserMentions', { // model reference attributes UserId: { type: DataTypes.INTEGER, references: { model: User, - key: "id", + key: 'id', }, }, MentionId: { - type: DataTypes.STRING, + type: DataTypes.TEXT, references: { model: Mention, - key: "id", + key: 'id', }, }, diff --git a/server/scraper/scraper.js b/server/scraper/scraper.js index abd1249..c5a4ece 100644 --- a/server/scraper/scraper.js +++ b/server/scraper/scraper.js @@ -1,4 +1,9 @@ -const { redditCrawler, twitterCrawler, nytCrawler } = require("../crawlers"); +const { + redditCrawler, + twitterCrawler, + nytCrawler, + newsApiCrawler, +} = require("../crawlers"); module.exports = function ScraperManager() { this.run = async function run(companyName) { @@ -10,8 +15,14 @@ module.exports = function ScraperManager() { const redditMentions = await redditCrawler(companyName); // const twitterMentions = await twitterCrawler(companyName); const twitterMentions = []; - const nytMentions = await nytCrawler(companyName) - results = results.concat(redditMentions, twitterMentions, nytMentions); + const nytMentions = await nytCrawler(companyName); + const newsApiMentions = await newsApiCrawler(companyName); + results = results.concat( + redditMentions, + twitterMentions, + nytMentions, + newsApiMentions + ); return results; }; };