feat(route): add GDUFS news route and GDUFS xwxy news (DIYgod#17822)

* feat(route):add GDUFS news route && GDUFS xwxy news * fix(xwxy-news): update authors extraction to use toArray() method * fix(xwxy-news): improve article detail fetching by explicitly passing item to fetchArticleDetail * fix(xwxy-news): reorder authors extraction to trim text after converting to array * refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): simplify route name in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): remove protocol from URLs in gdufs/news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): simplify route name in gdufs/xwxy-news.ts Co-authored-by: Tony <TonyRL@users.noreply.github.com> * refactor(routes): use cache to optimize article content fetching and author extraction in gdufs/news.ts and gdufs/xwxy-news.ts * refactor(routes): Cache the entire item object in /gdufs/news & /gdufs/xwxy-news route ---------
Jiang10086 · Dec 16, 2024 · 07c1e88 · 07c1e88
1 parent 030924a
commit 07c1e88
Show file tree

Hide file tree

Showing 3 changed files with 191 additions and 0 deletions.
diff --git a/lib/routes/gdufs/namespace.ts b/lib/routes/gdufs/namespace.ts
@@ -0,0 +1,7 @@
+import type { Namespace } from '@/types';
+
+export const namespace: Namespace = {
+    name: '广东外语外贸大学',
+    url: 'gdufs.edu.cn',
+    lang: 'zh-CN',
+};
diff --git a/lib/routes/gdufs/news.ts b/lib/routes/gdufs/news.ts
@@ -0,0 +1,94 @@
+import { Route } from '@/types';
+import { load } from 'cheerio';
+import cache from '@/utils/cache';
+import got from '@/utils/got';
+import { parseDate } from '@/utils/parse-date';
+
+const site = 'https://www.gdufs.edu.cn';
+
+export const route: Route = {
+    path: '/news',
+    categories: ['university'],
+    example: '/gdufs/news',
+    parameters: {},
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: false,
+    },
+    radar: [
+        {
+            source: ['www.gdufs.edu.cn/gwxw/gwxw1.htm', 'www.gdufs.edu.cn/'],
+        },
+    ],
+    name: '新闻',
+    maintainers: ['gz4zzxc'],
+    handler,
+    url: 'www.gdufs.edu.cn/gwxw/gwxw1.htm',
+};
+
+async function handler() {
+    const link = 'https://www.gdufs.edu.cn/gwxw/gwxw1.htm';
+
+    const response = await got(link);
+    const $ = load(response.body);
+    const list = $('ul.list_luntan li');
+
+    const items = await Promise.all(
+        list.toArray().map((element) => {
+            const item = $(element);
+            const href = item.find('a').attr('href') || '';
+            const title = item.find('h5').text().trim();
+            const day = item.find('h3').text().trim();
+            const yearMonth = item.find('h6').text().trim();
+            const dateString = yearMonth + '/' + day;
+            const fullLink = href.startsWith('http') ? href : new URL(href, site).href;
+            const pubDate = parseDate(dateString).toUTCString();
+
+            return cache.tryGet(fullLink, async () => {
+                try {
+                    const articleRes = await got(fullLink);
+                    const $$ = load(articleRes.body);
+                    const description = $$('.v_news_content').html()?.trim() || '';
+
+                    let author = '';
+                    const authorSpans = $$('.nav01 h6 .ll span');
+                    authorSpans.each((_, el) => {
+                        const text = $$(el).text().trim();
+                        if (text.includes('责任编辑：')) {
+                            author = text.replace('责任编辑：', '').trim();
+                        } else if (text.includes('文字：')) {
+                            author = text.replace('文字：', '').trim();
+                        }
+                    });
+
+                    return {
+                        title,
+                        link: fullLink,
+                        description,
+                        pubDate,
+                        author,
+                    };
+                } catch {
+                    return {
+                        title,
+                        link: fullLink,
+                        description: '内容获取失败。',
+                        pubDate,
+                        author: '',
+                    };
+                }
+            });
+        })
+    );
+
+    return {
+        title: '广外-大学要闻',
+        link,
+        description: '广东外语外贸大学-大学要闻',
+        item: items,
+    };
+}
diff --git a/lib/routes/gdufs/xwxy/xwxy-news.ts b/lib/routes/gdufs/xwxy/xwxy-news.ts
@@ -0,0 +1,90 @@
+import { Route } from '@/types';
+import { load } from 'cheerio';
+import cache from '@/utils/cache';
+import got from '@/utils/got';
+import { parseDate } from '@/utils/parse-date';
+
+export const route: Route = {
+    path: '/xwxy-news',
+    categories: ['university'],
+    example: '/gdufs/xwxy-news',
+    parameters: {},
+    features: {
+        requireConfig: false,
+        requirePuppeteer: false,
+        antiCrawler: false,
+        supportBT: false,
+        supportPodcast: false,
+        supportScihub: false,
+    },
+    radar: [
+        {
+            source: ['xwxy.gdufs.edu.cn/xwzx/xyxw', 'xwxy.gdufs.edu.cn/'],
+        },
+    ],
+    name: '新闻学院-学院新闻',
+    maintainers: ['gz4zzxc'],
+    handler,
+    url: 'xwxy.gdufs.edu.cn/xwzx/xyxw',
+};
+
+async function handler() {
+    const BASE_URL = 'https://xwxy.gdufs.edu.cn';
+    const link = `${BASE_URL}/xwzx/xyxw.htm`;
+
+    const response = await got(link);
+    if (!response.body) {
+        throw new Error('No response body');
+    }
+    const $ = load(response.body);
+    const list = $('div.flex-center a.clearfix');
+
+    const items = list.toArray().map((element) => {
+        const item = $(element);
+        const href = item.attr('href') || '';
+        const dateText = item.find('i').text().trim();
+        const pubDate = parseDate(dateText).toUTCString();
+        return {
+            title: item.find('h5').text().trim(),
+            link: href.startsWith('http') ? href : new URL(href, BASE_URL).href,
+            pubDate,
+        };
+    });
+
+    const enhancedItems = await Promise.all(
+        items.map((item) =>
+            cache.tryGet(item.link, async () => {
+                try {
+                    const articleResponse = await got(item.link);
+                    if (!articleResponse.body) {
+                        throw new Error('No article body');
+                    }
+                    const $$ = load(articleResponse.body);
+                    const content = $$('#vsb_content .v_news_content').html() || '';
+                    const authors = $$('.show01 p i')
+                        .toArray()
+                        .map((el) => $$(el).text().trim());
+
+                    return {
+                        ...item,
+                        description: content,
+                        author: authors.join(' '),
+                    };
+                } catch {
+                    return {
+                        ...item,
+                        description: '无法获取内容',
+                        author: '',
+                    };
+                }
+            })
+        )
+    );
+
+    return {
+        title: '广外新传学院-学院新闻',
+        link,
+        description: '广东外语外贸大学新闻与传播学院官网-学院新闻',
+        item: enhancedItems,
+    };
+}