Merge pull request #202 from NekoAria/2.0

V2.4.3
Quan666 · Oct 9, 2021 · 3dec5ce · 3dec5ce
2 parents 2827eef + 67c7aa0
commit 3dec5ce
Show file tree

Hide file tree

Showing 6 changed files with 124 additions and 5 deletions.
diff --git a/.env b/.env
@@ -1,2 +1,2 @@
 ENVIRONMENT=prod
-VERSION='v2.4.2'
+VERSION='v2.4.3'
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ELF_RSS"
-version = "2.4.2"
+version = "2.4.3"
 description = "ELF_RSS"
 authors = ["Quan666"]
 license = "GPL v3"

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setuptools.setup(
     name="ELF_RSS",
-    version="2.4.2",
+    version="2.4.3",
     author="Quan666",
     author_email="i@oy.mk",
     description="QQ机器人 RSS订阅 插件，订阅源建议选择 RSSHub",

diff --git a/src/plugins/ELF_RSS2/RSS/routes/Parsing/cache_manage.py b/src/plugins/ELF_RSS2/RSS/routes/Parsing/cache_manage.py
@@ -7,6 +7,7 @@
 from pyquery import PyQuery as Pq
 from tinydb import TinyDB
 
+from .check_update import get_item_date
 from .handle_images import download_image
 from ... import rss_class
 from ....config import config
@@ -63,7 +64,9 @@ async def cache_db_manage(conn: sqlite3.connect) -> None:
 async def cache_json_manage(db: TinyDB, new_data_length: int) -> None:
     # 只保留最多 config.limit + new_data_length 条的记录
     limit = config.limit + new_data_length
-    retains = db.all()[-limit:]
+    retains = db.all()
+    retains.sort(key=get_item_date)
+    retains = retains[-limit:]
     db.truncate()
     db.insert_multiple(retains)
 

diff --git a/src/plugins/ELF_RSS2/RSS/routes/__init__.py b/src/plugins/ELF_RSS2/RSS/routes/__init__.py
@@ -1 +1 @@
-from . import nga, pixiv, south_plus, weibo  # noqa
+from . import nga, pixiv, south_plus, weibo, danbooru  # noqa
diff --git a/src/plugins/ELF_RSS2/RSS/routes/danbooru.py b/src/plugins/ELF_RSS2/RSS/routes/danbooru.py
@@ -0,0 +1,116 @@
+import httpx
+import sqlite3
+
+from nonebot import logger
+from pyquery import PyQuery as Pq
+
+from .Parsing import (
+    ParsingBase,
+    get_proxy,
+    write_item,
+    cache_db_manage,
+    duplicate_exists,
+)
+from .Parsing.handle_images import handle_img_combo
+from ..rss_class import Rss
+from ...config import DATA_PATH
+
+
+# 处理图片
+@ParsingBase.append_handler(parsing_type="picture", rex="danbooru")
+async def handle_picture(
+    rss: Rss, state: dict, item: dict, item_msg: str, tmp: str, tmp_state: dict
+) -> str:
+
+    # 判断是否开启了只推送标题
+    if rss.only_title:
+        return ""
+
+    res = await handle_img(
+        url=item["link"],
+        img_proxy=rss.img_proxy,
+    )
+
+    # 判断是否开启了只推送图片
+    if rss.only_pic:
+        return f"{res}\n"
+
+    return f"{tmp + res}\n"
+
+
+# 处理图片、视频
+async def handle_img(url: str, img_proxy: bool) -> str:
+    img_str = ""
+
+    # 处理图片
+    async with httpx.AsyncClient(proxies=get_proxy(img_proxy)) as client:
+        response = await client.get(url)
+        d = Pq(response.text)
+        img = d("img#image")
+        if img:
+            url = img.attr("src")
+        else:
+            img_str += "视频封面："
+            url = d("meta[property='og:image']").attr("content")
+        img_str += await handle_img_combo(url, img_proxy)
+
+    return img_str
+
+
+# 如果启用了去重模式，对推送列表进行过滤
+@ParsingBase.append_before_handler(priority=12, rex="danbooru")
+async def handle_check_update(rss: Rss, state: dict):
+    change_data = state.get("change_data")
+    conn = state.get("conn")
+    db = state.get("tinydb")
+
+    # 检查是否启用去重 使用 duplicate_filter_mode 字段
+    if not rss.duplicate_filter_mode:
+        return {"change_data": change_data}
+
+    if not conn:
+        conn = sqlite3.connect(DATA_PATH / "cache.db")
+        conn.set_trace_callback(logger.debug)
+
+    await cache_db_manage(conn)
+
+    delete = []
+    for index, item in enumerate(change_data):
+        summary = await get_summary(item, rss.img_proxy)
+        is_duplicate, image_hash = await duplicate_exists(
+            rss=rss,
+            conn=conn,
+            link=item["link"],
+            title=item["title"],
+            summary=summary,
+        )
+        if is_duplicate:
+            write_item(db, item)
+            delete.append(index)
+        else:
+            change_data[index]["image_hash"] = str(image_hash)
+
+    change_data = [
+        item for index, item in enumerate(change_data) if index not in delete
+    ]
+
+    return {
+        "change_data": change_data,
+        "conn": conn,
+    }
+
+
+# 获取正文
+async def get_summary(item: dict, img_proxy: bool) -> str:
+    summary = (
+        item["content"][0].get("value") if item.get("content") else item["summary"]
+    )
+    # 如果图片非视频封面，替换为更清晰的预览图
+    summary_doc = Pq(summary)
+    async with httpx.AsyncClient(proxies=get_proxy(img_proxy)) as client:
+        response = await client.get(item["link"])
+        d = Pq(response.text)
+        img = d("img#image")
+        if img:
+            summary_doc("img").attr("src", img.attr("src"))
+    return str(summary_doc)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from . import nga, pixiv, south_plus, weibo # noqa
		from . import nga, pixiv, south_plus, weibo, danbooru # noqa