diff --git a/docs/en/journal.md b/docs/en/journal.md index 894986b25f2616..f7be7bfd8822a4 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -331,21 +331,18 @@ Return results from 2020 -## Proceedings of The National Academy of Sciences (PNAS) +## Proceedings of The National Academy of Sciences -### Latest Articles - Articles by Topic - -### Proceedings of The National Academy of Sciences (PNAS) - Latest Articles +### Journal - + -- Using router (`/pnas/` + Topic of Interest) to query latest research paper for a certain topic from PNAS journal. - If the `:topic` parameter is blank, or equal to 'latest', then all the latest papers will return. +::: tip Tips +Some topics require adding `topic/` to `topicPath` like [`/pnas/topic/app-math`](https://rsshub.app/pnas/topic/app-math) and some don't like [`/pnas/biophysics-and-computational-biology`](https://rsshub.app/pnas/biophysics-and-computational-biology) +::: - - ## PubMed ### Trending articles diff --git a/docs/journal.md b/docs/journal.md index 73f54496a23101..44eee8800eb5b8 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -302,14 +302,15 @@ You can get all short name of a journal from -## PNAS +## Proceedings of The National Academy of Sciences -### 最新文章(可筛选领域) +### 期刊 - + -- 通过 `/pnas/` + “领域名称” 来获取对应 “领域” 的最新文章(Latest Research)。 - 若参数置空(`/pnas`)或为 latest(`/pnas/latest`),则默认获取全部文章。 +::: tip Tips +有些领域需要在 `topicPath` 中添加 `topic/`,如 [`/pnas/topic/app-math`](https://rsshub.app/pnas/topic/app-math),有些则不需要,如 [`/pnas/biophysics-and-computational-biology`](https://rsshub.app/pnas/biophysics-and-computational-biology) +::: diff --git a/docs/other.md b/docs/other.md index b2211dfa026813..c79f045a790f06 100644 --- a/docs/other.md +++ b/docs/other.md @@ -721,7 +721,7 @@ type 为 all 时,category 参数不支持 cost 和 free ### 今日油价查询 - + ::: tip 提示 diff --git a/lib/router.js b/lib/router.js index 7ed0478e4bf16c..500d796041eb0e 100644 --- a/lib/router.js +++ b/lib/router.js @@ -2165,7 +2165,7 @@ router.get('/elife/:tid', lazyloadRouteHandler('./routes/elife/index')); router.get('/ieee/author/:aid/:sortType/:count?', lazyloadRouteHandler('./routes/ieee/author')); // PNAS [Sci Journal] -router.get('/pnas/:topic?', lazyloadRouteHandler('./routes/pnas/index')); +// router.get('/pnas/:topic?', lazyloadRouteHandler('./routes/pnas/index')); // cell [Sci Journal] router.get('/cell/cell/:category', lazyloadRouteHandler('./routes/cell/cell/index')); diff --git a/lib/routes/pnas/index.js b/lib/routes/pnas/index.js deleted file mode 100644 index bec26221f7dd52..00000000000000 --- a/lib/routes/pnas/index.js +++ /dev/null @@ -1,66 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const baseUrl = `https://www.pnas.org`; - - const topic = ctx.params.topic; - - let url = `${baseUrl}/content/early/recent`; - if (topic && topic !== 'latest') { - url = `${baseUrl}/content/by/section/${ctx.params.topic}`; - } else { - ctx.params.topic = 'Latest Research'; - } - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.highwire-citation-pnas-list-complete').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('.highwire-cite-title').text(); - const partial = $('.highwire-cite-linked-title').attr('href'); - const address = `${baseUrl}${partial}`; - let author; - if ($('.highwire-citation-authors span').length > 3) { - author = $('.highwire-citation-author.first').text() + ' et al.'; - } else { - author = $('.highwire-citation-authors span').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - - const significance = capture('.executive-summary').html(); - const abstract = capture('.section.abstract').html(); - let contents; - if (abstract !== null) { - contents = significance + abstract; - } else { - contents = significance; - } - - const single = { - title, - author, - description: contents, - link: address, - guid: address, - doi: capture('meta[name="DC.Identifier"]')[0].attribs.content, - pubDate: new Date(capture('meta[name="DC.Date"]')[0].attribs.content).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `PNAS | ${ctx.params.topic}`, - link: url, - item: out, - }; -}; diff --git a/lib/v2/pnas/index.js b/lib/v2/pnas/index.js new file mode 100644 index 00000000000000..5cf8576f9876d2 --- /dev/null +++ b/lib/v2/pnas/index.js @@ -0,0 +1,101 @@ +const cheerio = require('cheerio'); +const got = require('@/utils/got'); +const { parseDate } = require('@/utils/parse-date'); +const { art } = require('@/utils/render'); +const path = require('path'); +const { setCookies } = require('@/utils/puppeteer-utils'); +const { CookieJar } = require('tough-cookie'); +const logger = require('@/utils/logger'); + +module.exports = async (ctx) => { + const baseUrl = 'https://www.pnas.org'; + const { topicPath } = ctx.params; + const link = `${baseUrl}/${topicPath ? topicPath : 'latest'}`; + + let cookieJar = await ctx.cache.get('pnas:cookieJar'); + const cacheMiss = !cookieJar; + cookieJar = cacheMiss ? new CookieJar() : CookieJar.fromJSON(cookieJar); + const { data: res } = await got(link, { + cookieJar, + }); + if (cacheMiss) { + await ctx.cache.set('pnas:cookieJar', cookieJar.toJSON()); + } + + const $ = cheerio.load(res); + const list = $('.card--row-reversed .card-content') + .toArray() + .map((item) => { + item = $(item); + const a = item.find('.article-title a'); + return { + title: a.text(), + link: new URL(a.attr('href'), baseUrl).href, + pubDate: parseDate(item.find('.card__meta__date').text()), + }; + }); + + const browser = await require('@/utils/puppeteer')(); + + const out = await Promise.all( + list.map((item) => + ctx.cache.tryGet(item.link, async () => { + const page = await browser.newPage(); + await setCookies(page, await cookieJar.getCookieString(item.link), '.pnas.org'); + await page.setRequestInterception(true); + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); + logger.debug(`Requesting ${item.link}`); + await page.goto(item.link, { + waitUntil: 'domcontentloaded', + referer: link, + }); + await page.waitForSelector('.core-container'); + + const res = await page.evaluate(() => document.documentElement.innerHTML); + await page.close(); + + const $ = cheerio.load(res); + const PNASdataLayer = JSON.parse( + $('script') + .text() + .match(/PNASdataLayer =(.*?);/)[1] + ); + + $('.signup-alert-ad, .citations-truncation button').remove(); + + const { keywords, topic } = PNASdataLayer.page.attributes; + + item.category = [...keywords, topic]; + item.author = PNASdataLayer.page.pageInfo.author; + item.doi = PNASdataLayer.page.pageInfo.DOI; + item.description = art(path.join(__dirname, 'templates', 'article.art'), { + access: PNASdataLayer.user.access === 'yes', + // + abstracts: $('#abstracts .core-container').html(), + // + articleBody: $('[property=articleBody]').html(), + // + dataAvailability: $('#data-availability').html(), + acknowledgments: $('#acknowledgments').html(), + supplementaryMaterials: $('#supplementary-materials').html(), + bibliography: $('#bibliography').html(), + }); + + return item; + }) + ) + ); + + browser.close(); + + ctx.state.data = { + title: `${$('.banner-widget__content h1').text()} - PNAS`, + description: $('.banner-widget__content p').text(), + image: 'https://www.pnas.org/favicon.ico', + language: 'en-US', + link, + item: out, + }; +}; diff --git a/lib/v2/pnas/maintainer.js b/lib/v2/pnas/maintainer.js new file mode 100644 index 00000000000000..9ec543e5fa4ebf --- /dev/null +++ b/lib/v2/pnas/maintainer.js @@ -0,0 +1,3 @@ +module.exports = { + '/:topicPath*': ['emdoe', 'y9c'], +}; diff --git a/lib/v2/pnas/radar.js b/lib/v2/pnas/radar.js new file mode 100644 index 00000000000000..fc9f5b0e833c42 --- /dev/null +++ b/lib/v2/pnas/radar.js @@ -0,0 +1,13 @@ +module.exports = { + 'pnas.org': { + _name: 'Proceedings of the National Academy of Sciences', + '.': [ + { + title: '期刊', + docs: 'https://docs.rsshub.app/journal.html#proceedings-of-the-national-academy-of-sciences', + source: ['/*topicPath'], + target: '/pnas/:topicPath', + }, + ], + }, +}; diff --git a/lib/v2/pnas/router.js b/lib/v2/pnas/router.js new file mode 100644 index 00000000000000..434e366b24f716 --- /dev/null +++ b/lib/v2/pnas/router.js @@ -0,0 +1,3 @@ +module.exports = (router) => { + router.get('/:topicPath*', require('./index')); +}; diff --git a/lib/v2/pnas/templates/article.art b/lib/v2/pnas/templates/article.art new file mode 100644 index 00000000000000..e947183a9d8c80 --- /dev/null +++ b/lib/v2/pnas/templates/article.art @@ -0,0 +1,8 @@ +{{ if abstracts }}{{@ abstracts }}{{ /if }} + +{{ if access }}{{@ articleBody }}{{ /if }} + +{{ if dataAvailability }}{{@ dataAvailability }}{{ /if }} +{{ if acknowledgments }}{{@ acknowledgments }}{{ /if }} +{{ if supplementaryMaterials }}{{@ supplementaryMaterials }}{{ /if }} +{{ if bibliography }}{{@ bibliography }}{{ /if }}