From 68e972464ce44104f2ceaae445448884bfe20bc0 Mon Sep 17 00:00:00 2001 From: Tony Date: Sat, 25 Feb 2023 05:12:06 -0200 Subject: [PATCH 1/3] feat(route): instagram from cookie (#11952) * feat(route): instagram 2 * docs: add docs * fix: get tabs * docs: fix tag --- docs/en/install/README.md | 7 +- docs/en/social-media.md | 12 ++-- docs/install/README.md | 7 +- docs/social-media.md | 10 ++- lib/config.js | 1 + lib/v2/instagram/common-utils.js | 59 ++++++++++++++++ lib/v2/instagram/maintainer.js | 1 + lib/v2/instagram/private-api/index.js | 63 ++--------------- lib/v2/instagram/router.js | 1 + lib/v2/instagram/web-api/index.js | 70 +++++++++++++++++++ lib/v2/instagram/web-api/utils.js | 99 +++++++++++++++++++++++++++ 11 files changed, 259 insertions(+), 71 deletions(-) create mode 100644 lib/v2/instagram/common-utils.js create mode 100644 lib/v2/instagram/web-api/index.js create mode 100644 lib/v2/instagram/web-api/utils.js diff --git a/docs/en/install/README.md b/docs/en/install/README.md index 2f58df7263dbd9..7e6c5192d659b7 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -677,9 +677,10 @@ See docs of the specified route and `lib/config.js` for detailed information. - Instagram: - - `IG_USERNAME`: Your Instagram username - - `IG_PASSWORD`: Your Instagram password - - `IG_PROXY`: Proxy URL for Instagram + - `IG_USERNAME`: Your Instagram username (Private API only) + - `IG_PASSWORD`: Your Instagram password (Private API only) + - `IG_PROXY`: Proxy URL for Instagram (Private API only, optional) + - `IG_COOKIE`: Your Instagram cookie (Cookie only) Warning: Two Factor Authentication is **not** supported. diff --git a/docs/en/social-media.md b/docs/en/social-media.md index 33d0ebf8f5fd08..c7f80753eb2fbf 100644 --- a/docs/en/social-media.md +++ b/docs/en/social-media.md @@ -148,15 +148,15 @@ Type ::: warning -Due to Instagram API restrictions, you have to setup your credentials on the server. See deployment guide for more. +Due to Instagram Private API restrictions, you have to setup your credentials on the server. 2FA is not supported. See [deployment guide](https://docs.rsshub.app/en/install/) for more. -If you don't want to setup credentials, use Picuki. +If you don't want to setup credentials, you can use [Picuki](#picuki). ::: -### User Profile / Hashtag +### User Profile / Hashtag - Private API - + | User timeline | Hashtag | | ---------- | ---- | @@ -168,6 +168,10 @@ It's highly recommended to deploy with Redis cache enabled. +### User Profile / Hashtag - Cookie + + + ## Lofter ### User diff --git a/docs/install/README.md b/docs/install/README.md index ced5ffe5057efe..f8c475196c6efa 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -712,9 +712,10 @@ RSSHub 支持使用访问密钥 / 码,白名单和黑名单三种方式进行 - Instagram: - - `IG_USERNAME`: Instagram 用户名。 - - `IG_PASSWORD`: Instagram 密码。 - - `IG_PROXY`: Instagram 代理 URL。 + - `IG_USERNAME`: Instagram 用户名(仅 Private API) + - `IG_PASSWORD`: Instagram 密码(仅 Private API) + - `IG_PROXY`: Instagram 代理 URL(仅 Private API,可选) + - `IG_COOKIE`: Instagram 登录后的 Cookie(仅 Cookie) 注意,暂**不支持**两步验证。 diff --git a/docs/social-media.md b/docs/social-media.md index 864003685b9960..06ea9e7cd345fc 100644 --- a/docs/social-media.md +++ b/docs/social-media.md @@ -488,13 +488,13 @@ Tiny Tiny RSS 会给所有 iframe 元素添加 `sandbox="allow-scripts"` 属性 ::: warning 注意 -由于 Instagram API 限制,必须在服务器上设置你的用户名和密码。暂不支持两步验证。步骤见部署指南。 +由于 Instagram Private API 限制,必须在服务器上设置你的用户名和密码。暂不支持两步验证。步骤见[部署指南](https://docs.rsshub.app/install/)。 -如需无登录的 feed,请用 Picuki。 +如需无登录的 feed,请用 [Picuki](#picuki)。 ::: -### 用户 / 标签 +### 用户 / 标签 - Private API @@ -508,6 +508,10 @@ Tiny Tiny RSS 会给所有 iframe 元素添加 `sandbox="allow-scripts"` 属性 +### 用户 / 标签 - Cookie + + + ## Keep ### 运动日记 diff --git a/lib/config.js b/lib/config.js index 124ef72a023647..beed36b5e666f6 100644 --- a/lib/config.js +++ b/lib/config.js @@ -185,6 +185,7 @@ const calculateValue = () => { username: envs.IG_USERNAME, password: envs.IG_PASSWORD, proxy: envs.IG_PROXY, + cookie: envs.IG_COOKIE, }, iwara: { cookie: envs.IWARA_COOKIE, diff --git a/lib/v2/instagram/common-utils.js b/lib/v2/instagram/common-utils.js new file mode 100644 index 00000000000000..8ab6c32e9c10bd --- /dev/null +++ b/lib/v2/instagram/common-utils.js @@ -0,0 +1,59 @@ +const { parseDate } = require('@/utils/parse-date'); +const { art } = require('@/utils/render'); +const path = require('path'); + +const renderItems = (items) => + items.map((item) => { + const { product_type } = item; // carousel_container, feed, clips, igtv + // Content + const summary = item.caption?.text ?? ''; + + let description = ''; + switch (product_type) { + case 'carousel_container': { + const images = item.carousel_media.map((i) => i.image_versions2.candidates[0]); + description = art(path.join(__dirname, 'templates/images.art'), { + summary, + images, + }); + break; + } + case 'clips': + case 'igtv': + description = art(path.join(__dirname, 'templates/video.art'), { + summary, + image: item.image_versions2.candidates[0].url, + video: item.video_versions[0], + }); + break; + case 'feed': { + const images = [item.image_versions2.candidates[0]]; + description = art(path.join(__dirname, 'templates/images.art'), { + summary, + images, + }); + break; + } + default: + throw Error(`Instagram: Unhandled feed type: ${product_type}`); + } + + // Metadata + const url = `https://www.instagram.com/p/${item.code}/`; + const pubDate = parseDate(item.taken_at, 'X'); + const title = summary.split('\n')[0]; + + return { + title, + id: item.pk, + pubDate, + author: item.user.username, + link: url, + summary, + description, + }; + }); + +module.exports = { + renderItems, +}; diff --git a/lib/v2/instagram/maintainer.js b/lib/v2/instagram/maintainer.js index 9e31b9d5323a7d..1d19aac1817850 100644 --- a/lib/v2/instagram/maintainer.js +++ b/lib/v2/instagram/maintainer.js @@ -1,3 +1,4 @@ module.exports = { '/:category/:key': ['oppilate', 'DIYgod'], + '/2/:category/:key': ['TonyRL'], }; diff --git a/lib/v2/instagram/private-api/index.js b/lib/v2/instagram/private-api/index.js index ab2b7a43f86470..bd188d57fa25be 100644 --- a/lib/v2/instagram/private-api/index.js +++ b/lib/v2/instagram/private-api/index.js @@ -1,9 +1,7 @@ const { ig, login } = require('./utils'); const logger = require('@/utils/logger'); const config = require('@/config').value; -const { parseDate } = require('@/utils/parse-date'); -const { art } = require('@/utils/render'); -const path = require('path'); +const { renderItems } = require('../common-utils'); // loadContent pulls the desired user/tag/etc async function loadContent(category, nameOrId, tryGet) { @@ -24,7 +22,8 @@ async function loadContent(category, nameOrId, tryGet) { } feedDescription = userInfo.biography; - feedLogo = userInfo.hd_profile_pic_url_info?.url ?? userInfo.profile_pic_url; + // exists in web api ?? exist in private api ?? exist in both + feedLogo = userInfo.profile_pic_url_hd ?? userInfo.hd_profile_pic_url_info?.url ?? userInfo.profile_pic_url; const fullName = userInfo.full_name; feedTitle = `${fullName} (@${username}) - Instagram`; feedLink = `https://www.instagram.com/${username}`; @@ -41,9 +40,8 @@ async function loadContent(category, nameOrId, tryGet) { itemsRaw = await tryGet(`instagram:tags:${tag}`, () => ig.feed.tags(tag, 'recent').items(), config.cache.routeExpire, false); break; } - default: { + default: break; - } } return { @@ -83,62 +81,11 @@ module.exports = async (ctx) => { throw e; } - const items = data.itemsRaw.map((item) => { - const { product_type } = item; // carousel_container, feed, clips, igtv - // Content - const summary = item.caption?.text ?? ''; - - let description = ''; - switch (product_type) { - case 'carousel_container': { - const images = item.carousel_media.map((i) => i.image_versions2.candidates[0]); - description = art(path.join(__dirname, '../templates/images.art'), { - summary, - images, - }); - break; - } - case 'clips': - case 'igtv': - description = art(path.join(__dirname, '../templates/video.art'), { - summary, - image: item.image_versions2.candidates[0].url, - video: item.video_versions[0], - }); - break; - case 'feed': { - const images = [item.image_versions2.candidates[0]]; - description = art(path.join(__dirname, '../templates/images.art'), { - summary, - images, - }); - break; - } - default: - throw Error(`Instagram: Unhandled feed type: ${product_type}`); - } - - // Metadata - const url = `https://www.instagram.com/p/${item.code}/`; - const pubDate = parseDate(item.taken_at, 'X'); - const title = summary.split('\n')[0]; - - return { - title, - id: item.pk, - pubDate, - author: item.user.username, - link: url, - summary, - description, - }; - }); - ctx.state.data = { title: data.feedTitle, link: data.feedLink, description: data.feedDescription, - item: items, + item: renderItems(data.itemsRaw), icon: 'https://www.instagram.com/static/images/ico/xxhdpi_launcher.png/99cf3909d459.png', logo: data.feedLogo, image: data.feedLogo, diff --git a/lib/v2/instagram/router.js b/lib/v2/instagram/router.js index eda1408aa6b0b3..9672c20cd58020 100644 --- a/lib/v2/instagram/router.js +++ b/lib/v2/instagram/router.js @@ -1,3 +1,4 @@ module.exports = (router) => { router.get('/:category/:key', require('./private-api/index')); + router.get('/2/:category/:key', require('./web-api/index')); }; diff --git a/lib/v2/instagram/web-api/index.js b/lib/v2/instagram/web-api/index.js new file mode 100644 index 00000000000000..658b8162a29457 --- /dev/null +++ b/lib/v2/instagram/web-api/index.js @@ -0,0 +1,70 @@ +const { CookieJar } = require('tough-cookie'); +const config = require('@/config').value; +const { renderItems } = require('../common-utils'); +const { baseUrl, COOKIE_URL, getUserInfo, getUserFeedItems, getTagsFeedItems } = require('./utils'); + +module.exports = async (ctx) => { + if (!config.instagram || !config.instagram.cookie) { + throw Error('Instagram RSS is disabled due to the lack of relevant config'); + } + const availableCategories = ['user', 'tags']; + const { category, key } = ctx.params; + const { cookie } = config.instagram; + if (!availableCategories.includes(category)) { + throw Error('Such feed is not supported.'); + } + + let cookieJar = await ctx.cache.get('instagram:cookieJar'); + const cacheMiss = !cookieJar; + if (cacheMiss) { + cookieJar = new CookieJar(); + for await (const c of cookie.split('; ')) { + await cookieJar.setCookie(c, COOKIE_URL); + } + } else { + cookieJar = CookieJar.fromJSON(cookieJar); + } + + let feedTitle, feedLink, feedDescription, feedLogo; + let items; + switch (category) { + case 'user': { + const userInfo = await getUserInfo(key, cookieJar, ctx.cache); + + // User feed metadata + const { biography, full_name, id, username } = userInfo; + feedTitle = `${full_name} (@${username}) - Instagram`; + feedDescription = biography; + // exists in web api ?? exist in private api ?? exist in both + feedLogo = userInfo.profile_pic_url_hd ?? userInfo.hd_profile_pic_url_info?.url ?? userInfo.profile_pic_url; + feedLink = `${baseUrl}/${username}`; + + items = await getUserFeedItems(id, username, cookieJar, ctx.cache.tryGet); + break; + } + case 'tags': { + const tag = key; + + feedTitle = `#${tag} - Instagram`; + feedLink = `${baseUrl}/explore/tags/${tag}`; + + items = await getTagsFeedItems(tag, 'recent', cookieJar, ctx.cache.tryGet); + break; + } + default: + break; + } + + await ctx.cache.set('instagram:cookieJar', cookieJar.toJSON(), 31536000); + + ctx.state.data = { + title: feedTitle, + link: feedLink, + description: feedDescription, + item: renderItems(items), + icon: `${baseUrl}/static/images/ico/xxhdpi_launcher.png/99cf3909d459.png`, + logo: feedLogo, + image: feedLogo, + allowEmpty: true, + }; +}; diff --git a/lib/v2/instagram/web-api/utils.js b/lib/v2/instagram/web-api/utils.js new file mode 100644 index 00000000000000..2bf6bc47412663 --- /dev/null +++ b/lib/v2/instagram/web-api/utils.js @@ -0,0 +1,99 @@ +const got = require('@/utils/got'); +const config = require('@/config').value; + +const baseUrl = 'https://www.instagram.com'; +const COOKIE_URL = 'https://instagram.com'; +let igWwwClaim; + +const getCSRFTokenFromJar = async (cookieJar) => { + const cookieString = await cookieJar.getCookieString(COOKIE_URL); + return cookieString.match(/csrftoken=([^;]+)/)?.[1]; +}; + +const getHeaders = async (cookieJar) => ({ + 'X-ASBD-ID': 198387, + 'X-CSRFToken': await getCSRFTokenFromJar(cookieJar), + 'X-IG-App-ID': 936619743392459, + 'X-IG-WWW-Claim': igWwwClaim, +}); + +const getUserInfo = async (username, cookieJar, cache) => { + let webProfileInfo; + let id = await cache.get(`instagram:getIdByUsername:${username}`); + let userInfoCache = await cache.get(`instagram:userInfo:${id}`); + + if (!userInfoCache) { + const response = await got(`${baseUrl}/api/v1/users/web_profile_info/`, { + cookieJar, + headers: await getHeaders(cookieJar), + searchParams: { + username, + }, + }); + if (response.url.includes('/accounts/login/')) { + throw Error('Invalid cookie'); + } + igWwwClaim = response.headers['x-ig-set-www-claim'] || igWwwClaim; + + webProfileInfo = response.data.data.user; + id = webProfileInfo.id; + + await cache.set(`instagram:getIdByUsername:${username}`, id, 31536000); // 1 year since it will never change + await cache.set(`instagram:userInfo:${id}`, webProfileInfo); + } + + userInfoCache = typeof userInfoCache === 'string' ? JSON.parse(userInfoCache) : userInfoCache; + + return userInfoCache || webProfileInfo; +}; + +const getUserFeedItems = (id, username, cookieJar, tryGet) => + tryGet( + `instagram:feed:${id}`, + async () => { + const response = await got(`${baseUrl}/api/v1/feed/user/${username}/username/`, { + cookieJar, + headers: await getHeaders(cookieJar), + searchParams: { + count: 30, + }, + }); + // 401 Unauthorized if cookie does not match with IP + igWwwClaim = response.headers['x-ig-set-www-claim'] || igWwwClaim; + + return response.data.items; + }, + config.cache.routeExpire, + false + ); + +const getTagsFeedItems = (tag, tab, cookieJar, tryGet) => + tryGet( + `instagram:tags:${tag}`, + async () => { + const response = await got(`${baseUrl}/api/v1/tags/web_info/`, { + // cookieJar, cookieJar is behaving weirdly here, so we use cookie header instead + headers: { + cookie: await cookieJar.getCookieString(COOKIE_URL), + ...(await getHeaders(cookieJar)), + }, + searchParams: { + tag_name: tag, + }, + }); + // Looks like cookie IP check is not applied to tags + igWwwClaim = response.headers['x-ig-set-www-claim'] || igWwwClaim; + + return response.data.data[tab].sections.flatMap((section) => section.layout_content.medias.map((media) => media.media)); + }, + config.cache.routeExpire, + false + ); + +module.exports = { + baseUrl, + COOKIE_URL, + getUserInfo, + getUserFeedItems, + getTagsFeedItems, +}; From d9826eb3bc19a11c1fa089546bdc4fc41edc27d8 Mon Sep 17 00:00:00 2001 From: Goren G Date: Sat, 25 Feb 2023 22:58:29 +0800 Subject: [PATCH 2/3] fix(router): parse images from `.pattl` (#11951) * fix: parse images from .pattl * chore: use for of * refactor: update route path * docs: update table mapping --------- --- docs/multimedia.md | 14 +- lib/v2/dsndsht23/index.js | 121 ---------------- lib/v2/sehuatang/index.js | 129 ++++++++++++++++++ lib/v2/{dsndsht23 => sehuatang}/maintainer.js | 0 lib/v2/{dsndsht23 => sehuatang}/radar.js | 4 +- lib/v2/{dsndsht23 => sehuatang}/router.js | 0 6 files changed, 138 insertions(+), 130 deletions(-) delete mode 100644 lib/v2/dsndsht23/index.js create mode 100644 lib/v2/sehuatang/index.js rename lib/v2/{dsndsht23 => sehuatang}/maintainer.js (100%) rename lib/v2/{dsndsht23 => sehuatang}/radar.js (81%) rename lib/v2/{dsndsht23 => sehuatang}/router.js (100%) diff --git a/docs/multimedia.md b/docs/multimedia.md index 417cef71af6b7c..46d4e85edd0011 100644 --- a/docs/multimedia.md +++ b/docs/multimedia.md @@ -1561,19 +1561,19 @@ JavDB 有多个备用域名,本路由默认使用永久域名 + **原创 BT 电影** -| 每日合集 | 国产原创 | 亚洲无码原创 | 亚洲有码原创 | 高清中文字幕 | 三级写真 | 亚洲名站有码 | VR 系列 | 欧美无码 | 动漫原创 | AI 换脸电影 | 原档收藏 WMV | 综合讨论区 | -| ---- | ---- | ------ | ------ | ------ | ---- | ------ | ----- | ---- | ---- | ------- | -------- | ----- | -| mrhj | gcyc | yzwmyc | yzymyc | gqzwzm | sjxz | yzmzym | vr | omwm | dmyc | ai | ydsc | zhtlq | +| 国产原创 | 亚洲无码原创 | 亚洲有码原创 | 高清中文字幕 | 三级写真 | VR 视频 | 素人有码 | 欧美无码 | 韩国主播 | 动漫原创 | 综合讨论 | +| ---- | ------ | ------ | ------ | ---- | ----- | ---- | ---- | ---- | ---- | ---- | +| gcyc | yzwmyc | yzymyc | gqzwzm | sjxz | vr | srym | omwm | hgzb | dmyc | zhtl | **色花图片** -| 华人性爱自拍 | 华人街拍区 | 亚洲性爱 | 欧美性爱 | 卡通动漫 | -| ------ | ----- | ---- | ---- | ---- | -| hrxazp | hrjpq | yzxa | omxa | ktdm | +| 原创自拍 | 转贴自拍 | 华人街拍 | 亚洲性爱 | 欧美性爱 | 卡通动漫 | 套图下载 | +| ---- | ---- | ---- | ---- | ---- | ---- | ---- | +| yczp | ztzp | hrjp | yzxa | omxa | ktdm | ttxz | diff --git a/lib/v2/dsndsht23/index.js b/lib/v2/dsndsht23/index.js deleted file mode 100644 index 1aac5fe1f1d44a..00000000000000 --- a/lib/v2/dsndsht23/index.js +++ /dev/null @@ -1,121 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const { parseDate } = require('@/utils/parse-date'); - -const host = 'https://www.sehuatang.net/'; - -const forumIdMaps = { - mrhj: '106', - gcyc: '2', - yzwmyc: '36', - yzymyc: '37', - gqzwzm: '103', - sjxz: '107', - yzmzym: '104', - vr: '102', - omwm: '38', - dmyc: '39', - ai: '113', - ydsc: '111', - hrxazp: '98', - hrjpq: '50', - yzxa: '48', - omxa: '49', - ktdm: '117', - zhtlq: '95', -}; - -module.exports = async (ctx) => { - const subformName = ctx.params.subforumid ?? 'gqzwzm'; - const subformId = subformName in forumIdMaps ? forumIdMaps[subformName] : subformName; - const typefilter = ctx.params.type ? `&filter=typeid&typeid=${ctx.params.type}` : ''; - const link = `${host}forum.php?mod=forumdisplay&orderby=dateline&fid=${subformId}${typefilter}`; - const headers = { - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', - }; - let c; - const response = await got(link, { - headers, - hooks: { - beforeRedirect: [ - (options, response) => { - const cookie = response.headers['set-cookie']; - if (cookie) { - const cook = cookie.map((c) => c.split(';')[0]).join('; '); - options.headers.Cookie = cook; - c = cook; - options.headers.Referer = response.url; - } - }, - ], - }, - }); - headers.Cookie = c; - const $ = cheerio.load(response.data); - - const list = $('#threadlisttableid tbody[id^=normalthread]') - .slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 25) - .map(function () { - const info = { - title: '[' + $(this).find('th em a').text() + '] ' + $(this).find('a.xst').text(), - link: $(this).find('a.xst').attr('href'), - date: $(this).find('td.by').find('em span span').attr('title') || $(this).find('td.by').find('em span').first().text(), - }; - return info; - }) - .get(); - - const out = await Promise.all( - list.map((info) => { - const title = info.title; - const date = info.date; - const itemUrl = host + info.link; - - return ctx.cache.tryGet(itemUrl, async () => { - const response = await got(itemUrl, { - headers, - }); - - const $ = cheerio.load(response.data); - const postMessage = $("td[id^='postmessage']").slice(0, 1); - const images = $(postMessage).find('img'); - for (let k = 0; k < images.length; k++) { - if (!$(images[k]).attr('file') || $(images[k]).attr('file') === 'undefined') { - $(images[k]).replaceWith(''); - } else { - $(images[k]).replaceWith(``); - } - } - const description = (postMessage.html() || '抓取原帖失败').replace(/ignore_js_op/g, 'div'); - - const single = { - title, - link: itemUrl, - description, - pubDate: date ? parseDate(date) : null, - }; - const magnet = postMessage.find('div.blockcode li').first().text(); - const isMag = magnet.startsWith('magnet'); - const torrent = postMessage.find('a[href^=forum\\.php\\?mod\\=attachment]:not([href$=nothumb\\=yes])').attr('href'); - - const hasEnclosureUrl = isMag || torrent !== undefined; - if (hasEnclosureUrl) { - const enclosureUrl = isMag ? magnet : new URL(torrent, host).href; - const enclosure = { - enclosure_url: enclosureUrl, - enclosure_type: isMag ? 'application/x-bittorrent' : 'application/octet-stream', - }; - Object.assign(single, enclosure); - } - return single; - }); - }) - ); - - ctx.state.data = { - title: `色花堂 - ${$('#pt > div:nth-child(1) > a:last-child').text()}`, - link, - item: out, - }; -}; diff --git a/lib/v2/sehuatang/index.js b/lib/v2/sehuatang/index.js new file mode 100644 index 00000000000000..06451d87b3daae --- /dev/null +++ b/lib/v2/sehuatang/index.js @@ -0,0 +1,129 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const timezone = require('@/utils/timezone'); +const { CookieJar } = require('tough-cookie'); +const cookieJar = new CookieJar(); + +const host = 'https://www.sehuatang.net/'; + +const forumIdMaps = { + // 原创 BT 电影 + gcyc: '2', // 国产原创 + yzwmyc: '36', // 亚洲无码原创 + yzymyc: '37', // 亚洲有码原创 + gqzwzm: '103', // 高清中文字幕 + sjxz: '107', // 三级写真 + vr: '160', // VR 视频 + srym: '104', // 素人有码 + omwm: '38', // 欧美无码 + '4k': '151', // 4K 原版 + hgzb: '152', // 韩国主播 + dmyc: '39', // 动漫原创 + // 色花图片 + yczp: '155', // 原创自拍 + ztzp: '125', // 转贴自拍 + hrjp: '50', // 华人街拍 + yzxa: '48', // 亚洲性爱 + omxa: '49', // 欧美性爱 + ktdm: '117', // 卡通动漫 + ttxz: '165', // 套图下载 + + zhtl: '95', // 综合讨论 + // no longer updated/available + mrhj: '106', // 每日合集 + ai: '113', // AI 换脸电影 + ydsc: '111', // 原档收藏 WMV + hrxazp: '98', // 华人性爱自拍 +}; + +module.exports = async (ctx) => { + const subformName = ctx.params.subforumid ?? 'gqzwzm'; + const subformId = subformName in forumIdMaps ? forumIdMaps[subformName] : subformName; + const { type } = ctx.params; + const typefilter = type ? `&filter=typeid&typeid=${type}` : ''; + const link = `${host}forum.php?mod=forumdisplay&orderby=dateline&fid=${subformId}${typefilter}`; + const headers = { + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + }; + const response = await got(link, { + cookieJar, + headers, + }); + const $ = cheerio.load(response.data); + + const list = $('#threadlisttableid tbody[id^=normalthread]') + .slice(0, ctx.query.limit ? parseInt(ctx.query.limit) : 25) + .toArray() + .map((item) => { + item = $(item); + const hasCategory = item.find('th em a').length; + return { + title: `${hasCategory ? `[${item.find('th em a').text()}]` : ''} ${item.find('a.xst').text()}`, + link: host + item.find('a.xst').attr('href'), + pubDate: parseDate(item.find('td.by').find('em span span').attr('title')), + author: item.find('td.by cite a').first().text(), + }; + }); + + const out = await Promise.all( + list.map((info) => + ctx.cache.tryGet(info.link, async () => { + const response = await got(info.link, { + cookieJar, + headers, + }); + + const $ = cheerio.load(response.data); + const postMessage = $("td[id^='postmessage']").slice(0, 1); + const images = $(postMessage).find('img'); + for (const image of images) { + const file = $(image).attr('file'); + if (!file || file === 'undefined') { + $(image).replaceWith(''); + } else { + $(image).replaceWith($(``)); + } + } + // if postMessage does not have any images, try to parse image url from `.pattl` + if (images.length === 0) { + const pattl = $('.pattl'); + const pattlImages = $(pattl).find('img'); + for (const pattlImage of pattlImages) { + const file = $(pattlImage).attr('file'); + if (!file || file === 'undefined') { + $(pattlImage).replaceWith(''); + } else { + $(pattlImage).replaceWith($(``)); + } + } + postMessage.append($(pattl)); + } + $('em[onclick]').remove(); + + info.description = (postMessage.html() || '抓取原帖失败').replace(/ignore_js_op/g, 'div'); + info.pubDate = timezone(parseDate($('.authi em span').attr('title')), 8); + + const magnet = postMessage.find('div.blockcode li').first().text(); + const isMag = magnet.startsWith('magnet'); + const torrent = postMessage.find('p.attnm a').attr('href'); + + const hasEnclosureUrl = isMag || torrent !== undefined; + if (hasEnclosureUrl) { + const enclosureUrl = isMag ? magnet : new URL(torrent, host).href; + info.enclosure_url = enclosureUrl; + info.enclosure_type = isMag ? 'application/x-bittorrent' : 'application/octet-stream'; + } + + return info; + }) + ) + ); + + ctx.state.data = { + title: `色花堂 - ${$('#pt > div:nth-child(1) > a:last-child').text()}`, + link, + item: out, + }; +}; diff --git a/lib/v2/dsndsht23/maintainer.js b/lib/v2/sehuatang/maintainer.js similarity index 100% rename from lib/v2/dsndsht23/maintainer.js rename to lib/v2/sehuatang/maintainer.js diff --git a/lib/v2/dsndsht23/radar.js b/lib/v2/sehuatang/radar.js similarity index 81% rename from lib/v2/dsndsht23/radar.js rename to lib/v2/sehuatang/radar.js index 1037947f05137a..592f8fae1903c6 100644 --- a/lib/v2/dsndsht23/radar.js +++ b/lib/v2/sehuatang/radar.js @@ -8,10 +8,10 @@ module.exports = { source: ['/:category', '/'], target: (params, url) => { const theUrl = new URL(url); - const matches = String(theUrl).match(/forum-(\d)+-\d+/); + const matches = theUrl.href.match(/forum-(\d)+-\d+/); const fid = theUrl.searchParams.get('fid') || (matches ? matches[1] : ''); const tid = theUrl.searchParams.get('typeid'); - return `/dsndsht23${fid ? `/${fid}` : ''}${tid ? `/${tid}` : ''}`; + return `/sehuatang${fid ? `/${fid}` : ''}${tid ? `/${tid}` : ''}`; }, }, ], diff --git a/lib/v2/dsndsht23/router.js b/lib/v2/sehuatang/router.js similarity index 100% rename from lib/v2/dsndsht23/router.js rename to lib/v2/sehuatang/router.js From 7505eb6dcd1bd24ac8298a232d8e12cee7834de8 Mon Sep 17 00:00:00 2001 From: Tony Date: Sat, 25 Feb 2023 14:10:24 -0200 Subject: [PATCH 3/3] fix(route): apkpure (#11955) --- docs/en/program-update.md | 6 ++--- docs/program-update.md | 8 +++--- lib/router.js | 2 +- lib/routes/apkpure/versions.js | 23 ---------------- lib/v2/apkpure/maintainer.js | 3 +++ lib/v2/apkpure/radar.js | 13 +++++++++ lib/v2/apkpure/router.js | 3 +++ lib/v2/apkpure/versions.js | 48 ++++++++++++++++++++++++++++++++++ 8 files changed, 75 insertions(+), 31 deletions(-) delete mode 100644 lib/routes/apkpure/versions.js create mode 100644 lib/v2/apkpure/maintainer.js create mode 100644 lib/v2/apkpure/radar.js create mode 100644 lib/v2/apkpure/router.js create mode 100644 lib/v2/apkpure/versions.js diff --git a/docs/en/program-update.md b/docs/en/program-update.md index e206db39589665..a85083b2524104 100644 --- a/docs/en/program-update.md +++ b/docs/en/program-update.md @@ -22,11 +22,11 @@ pageClass: routes -## Apkpure +## APKPure ### Versions - + ## App Center @@ -116,7 +116,7 @@ Language ### BlueStacks 5 Release Notes - + ## Brave diff --git a/docs/program-update.md b/docs/program-update.md index 66ed5c4fd080e0..8074916f321b71 100644 --- a/docs/program-update.md +++ b/docs/program-update.md @@ -32,11 +32,11 @@ pageClass: routes -## Apkpure +## APKPure -### Versions +### 所有版本 - + ## App Center @@ -114,7 +114,7 @@ pageClass: routes ### BlueStacks 5 版本日誌 - + ## Brave diff --git a/lib/router.js b/lib/router.js index f99202a55b4e11..9b1d94e06fd601 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1088,7 +1088,7 @@ router.get('/anigamer/new_anime', lazyloadRouteHandler('./routes/anigamer/new_an router.get('/anigamer/anime/:sn', lazyloadRouteHandler('./routes/anigamer/anime')); // Apkpure -router.get('/apkpure/versions/:region/:pkg', lazyloadRouteHandler('./routes/apkpure/versions')); +// router.get('/apkpure/versions/:region/:pkg', lazyloadRouteHandler('./routes/apkpure/versions')); // 豆瓣美女 migrated to v2 // router.get('/dbmv/:category?', lazyloadRouteHandler('./routes/dbmv/index')); diff --git a/lib/routes/apkpure/versions.js b/lib/routes/apkpure/versions.js deleted file mode 100644 index f108a85c6dd6b1..00000000000000 --- a/lib/routes/apkpure/versions.js +++ /dev/null @@ -1,23 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); - -module.exports = async (ctx) => { - const { region, pkg } = ctx.params; - const link = `https://apkpure.com/${region}/${pkg}/versions`; - const $ = await got.get(link).then((r) => cheerio.load(r.data)); - const img = new URL($('.ver-top img').attr('src')); - img.searchParams.delete('w'); // get full resolution icon - ctx.state.data = { - title: $('.ver-top-h1').text(), - description: ` ` + $('.ver-top-title>h2').text(), - link: decodeURI(link), - item: $('.ver li') - .toArray() - .map((ver) => ({ - title: $(ver).find('.ver-item-n').text(), - description: $(ver).find('a').attr('title'), - link: `https://apkpure.com${decodeURI($(ver).find('a').attr('href').split('?from')[0])}`, - pubDate: new Date($(ver).find('.update-on').text().replace(/年|月/g, '-').replace('日', '')).toUTCString(), - })), - }; -}; diff --git a/lib/v2/apkpure/maintainer.js b/lib/v2/apkpure/maintainer.js new file mode 100644 index 00000000000000..b95ea3b28c2939 --- /dev/null +++ b/lib/v2/apkpure/maintainer.js @@ -0,0 +1,3 @@ +module.exports = { + '/versions/:pkg/:region?': ['maple3142'], +}; diff --git a/lib/v2/apkpure/radar.js b/lib/v2/apkpure/radar.js new file mode 100644 index 00000000000000..18ef4d5d648617 --- /dev/null +++ b/lib/v2/apkpure/radar.js @@ -0,0 +1,13 @@ +module.exports = { + 'apkpure.com': { + _name: 'APKPure', + '.': [ + { + title: '所有版本', + docs: 'https://docs.rsshub.app/program-update.html#apkpure', + source: ['/:region/:stuff/:pkg/versions', '/:stuff/:pkg/versions', '/:stuff/:pkg'], + target: (params) => `/apkpure/versions/${params.pkg}${params.region ? `/${params.region}` : ''}`, + }, + ], + }, +}; diff --git a/lib/v2/apkpure/router.js b/lib/v2/apkpure/router.js new file mode 100644 index 00000000000000..6e779c25c4eb55 --- /dev/null +++ b/lib/v2/apkpure/router.js @@ -0,0 +1,3 @@ +module.exports = (router) => { + router.get('/versions/:pkg/:region?', require('./versions')); +}; diff --git a/lib/v2/apkpure/versions.js b/lib/v2/apkpure/versions.js new file mode 100644 index 00000000000000..263b75d906d210 --- /dev/null +++ b/lib/v2/apkpure/versions.js @@ -0,0 +1,48 @@ +const cheerio = require('cheerio'); +const logger = require('@/utils/logger'); +const { parseDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const { pkg, region = 'en' } = ctx.params; + const baseUrl = 'https://apkpure.com'; + const link = `${baseUrl}/${region}/${pkg}/versions`; + + const browser = await require('@/utils/puppeteer')(); + const page = await browser.newPage(); + await page.setRequestInterception(true); + page.on('request', (request) => { + request.resourceType() === 'document' ? request.continue() : request.abort(); + }); + logger.debug(`Requesting ${link}`); + await page.goto(link, { + waitUntil: 'domcontentloaded', + }); + + const r = await page.evaluate(() => document.documentElement.innerHTML); + browser.close(); + + const $ = cheerio.load(r); + const img = new URL($('.ver-top img').attr('src')); + img.searchParams.delete('w'); // get full resolution icon + + const items = $('.ver li') + .toArray() + .map((ver) => { + ver = $(ver); + return { + title: ver.find('.ver-item-n').text(), + description: ver.html(), + link: `${baseUrl}${ver.find('a').attr('href')}`, + pubDate: parseDate(ver.find('.update-on').text().replace(/年|月/g, '-').replace('日', '')), + }; + }); + + ctx.state.data = { + title: $('.ver-top-h1').text(), + description: $('.ver-top-title p').text(), + image: img.href, + language: region ? region : 'en', + link, + item: items, + }; +};