Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(route): add nber route: NBER working paper #12008

Merged
merged 18 commits into from
Mar 3, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/en/journal.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,20 @@ Only some journals are supported.

<RouteEn author="TonyRL" example="/nature/siteindex" path="/nature/siteindex"/>

## National Bureau of Economic Research (NBER) Working Papers
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved

### All Papers

<Route author="5upernova-heng" example="/nber/papers/20" path="/nber/papers/:perPage" :paramsDesc=['paper numbers per page']/>
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved

- Page size should be one of 20, 50, 100. By default the number is 50.

### New Papers

<Route author="5upernova-heng" example="/nber/news" path="/nber/news"/>
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved

Papers that are labeled by "new" at the website.

5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved
## Network and Distributed System Security (NDSS) Symposium

### Accepted papers
Expand Down
14 changes: 14 additions & 0 deletions docs/journal.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,20 @@ You can get all short name of a journal from <https://www.nature.com/siteindex>

<Route author="TonyRL" example="/nature/siteindex" path="/nature/siteindex"/>

## National Bureau of Economic Research (NBER) Working Papers

### 全部论文

<Route author="5upernova-heng" example="/nber/papers/20" path="/nber/papers/:perPage" :paramsDesc=['paper numbers per page']/>

- 页面文章数量必须为以下值之一:20,50,100. 默认为 50

### 新论文

<Route author="5upernova-heng" example="/nber/news" path="/nber/news"/>

在网站上被标记为 "new" 的论文

5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved
## Network and Distributed System Security (NDSS) Symposium

### Accepted papers
Expand Down
4 changes: 4 additions & 0 deletions lib/v2/nber/maintainer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = {
'/articles/:perPage?': ['5upernova-heng'],
'/news': ['5upernova-heng'],
};
39 changes: 39 additions & 0 deletions lib/v2/nber/news.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const { get_html, get_elements } = require('./utils');
const got = require('@/utils/got');
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved
const cheerio = require('cheerio');

module.exports = async (ctx) => {
url = `https://www.nber.org/papers?page=1&perPage=20&sortBy=public_date`;
Fixed Show fixed Hide fixed
const html = await get_html(url);
Fixed Show fixed Hide fixed
const elements = await get_elements(html, '.digest-card.is-new .digest-card__title a');

// Get Author and Abstarct
const baseUrl = 'https://www.nber.org';
const items = await Promise.all(
elements.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const detailResponse = await got({
method: 'get',
url: `${baseUrl}${item.link}`,
});
const content = cheerio.load(detailResponse.data);
const authors = [];
content('.page-header__author-item a').each((index, element) => {
const text = content(element).text();
const link = content(element).attr('href');
authors.push({ name: text, link });
});
item.authors = authors;
item.abstract = content('.page-header__intro-inner p').text();
return item;
})
)
);

ctx.state.data = {
title: 'NBER Working Paper News',
link: url,
Fixed Show fixed Hide fixed
item: items,
description: 'National Bureau of Economic Research Working Papers -- News',
};
};
47 changes: 47 additions & 0 deletions lib/v2/nber/papers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
const { get_html, get_elements } = require('./utils');
const got = require('@/utils/got');
const cheerio = require('cheerio');

module.exports = async (ctx) => {
let { perPage = '50' } = ctx.params;
perPage = parseInt(perPage);
// perPage has to be one of 20, 50, 100
if (perPage <= 35) {perPage = 20;}
if (35 < perPage && perPage <= 70) {perPage = 50;}
if (70 < perPage) {perPage = 100;}

// Get title and link
url = `https://www.nber.org/papers?page=1&perPage=${perPage}&sortBy=public_date`;
Fixed Show fixed Hide fixed
const html = await get_html(url);
Fixed Show fixed Hide fixed
const elements = await get_elements(html, '.digest-card .digest-card__title a');

// Get Author and Abstarct
const baseUrl = 'https://www.nber.org';
const items = await Promise.all(
elements.map((item) =>
ctx.cache.tryGet(item.link, async () => {
const detailResponse = await got({
method: 'get',
url: `${baseUrl}${item.link}`,
});
const content = cheerio.load(detailResponse.data);
const authors = [];
content('.page-header__author-item a').each((index, element) => {
const text = content(element).text();
const link = content(element).attr('href');
authors.push({ name: text, link });
});
item.authors = authors;
item.abstract = content('.page-header__intro-inner p').text();
return item;
})
)
);

ctx.state.data = {
title: 'NBER Working Paper',
link: url,
Fixed Show fixed Hide fixed
item: items,
description: `National Bureau of Economic Research Working Papers -- ${perPage} articles`,
};
};
19 changes: 19 additions & 0 deletions lib/v2/nber/radar.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module.exports = {
'nber.org': {
__name: 'NBER',
'.': [
{
title: 'New working paper',
docs: 'todo',
source: ['/news'],
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved
target: '/nber/news',
},
{
title: 'All working paper',
docs: 'todo',
source: ['/papers'],
target: '/nber/papers',
},
],
},
};
4 changes: 4 additions & 0 deletions lib/v2/nber/router.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module.exports = (router) => {
router.get('/papers/:perPage?', require('./papers'));
router.get('/news', require('./news'));
};
29 changes: 29 additions & 0 deletions lib/v2/nber/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
const cheerio = require('cheerio');

async function get_html(url) {
const browser = await require('@/utils/puppeteer')();
5upernova-heng marked this conversation as resolved.
Show resolved Hide resolved
const page = await browser.newPage();
await page.goto(url);
await page.waitForSelector('.promo-grid');
const html = await page.content();
await browser.close();

return html;
// Get title and link
}

async function get_elements(html, selector) {
Fixed Show fixed Hide fixed
const elements = [];
const $ = cheerio.load(html);
$(selector).each((index, element) => {
const text = $(element).text();
const href = $(element).attr('href');
elements.push({ title: text, link: href });
});
return elements;
}

module.exports = {
get_html,
get_elements,
};