Merge pull request #150 from NekoAria/2.0

重构 `dict_hash()` 以及相关的逻辑
Quan666 · Jul 8, 2021 · a375c6d · a375c6d
2 parents d874e90 + 9d9733a
commit a375c6d
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 17 deletions.
diff --git a/.env b/.env
@@ -1,2 +1,2 @@
 ENVIRONMENT=prod
-VERSION='v2.2.6'
+VERSION='v2.2.7'
diff --git a/src/plugins/ELF_RSS2/RSS/rss_parsing.py b/src/plugins/ELF_RSS2/RSS/rss_parsing.py
@@ -182,7 +182,10 @@ async def start(rss: rss_class.Rss) -> None:
         item_msg += await handle_source(source=item["link"])
 
         # 处理时间
-        item_msg += await handle_date(date=item.get("published_parsed"))
+        date = item.get("published_parsed")
+        if not date:
+            date = item.get("updated_parsed")
+        item_msg += await handle_date(date=date)
 
         # 处理种子
         try:
@@ -802,15 +805,8 @@ async def handle_translation(content: str) -> str:
 
 # 将 dict 对象转换为 json 字符串后，计算哈希值，供后续比较
 def dict_hash(dictionary: Dict[str, Any]) -> str:
-    dictionary_temp = dictionary.copy()
-    # 避免部分缺失 published_parsed 的消息导致检查更新出问题，进行过滤
-    if dictionary.get("published_parsed"):
-        dictionary_temp.pop("published_parsed")
-    # 某些情况下，如微博带视频的消息，正文可能不一样，先过滤
-    if dictionary.get("summary"):
-        dictionary_temp.pop("summary")
-    if dictionary.get("summary_detail"):
-        dictionary_temp.pop("summary_detail")
+    keys = ["id", "link", "published", "updated", "title"]
+    dictionary_temp = {k: dictionary[k] for k in keys if k in dictionary}
     d_hash = hashlib.md5()
     encoded = json.dumps(dictionary_temp, sort_keys=True).encode()
     d_hash.update(encoded)
@@ -828,14 +824,15 @@ async def check_update(new: list, old: list) -> list:
             i["hash"] = hash_temp
             temp.append(i)
     # 将结果进行去重，避免消息重复发送
-    temp = [value for index, value in enumerate(temp) if value not in temp[index + 1 :]]
-    # 因为最新的消息会在最上面，所以要反转处理（主要是为了那些缺失 published_parsed 的消息）
-    result = []
-    for t in temp:
-        result.insert(0, t)
+    result = [
+        value for index, value in enumerate(temp) if value not in temp[index + 1 :]
+    ]
     # 对结果按照发布时间排序
     result_with_date = [
-        (await handle_date(i.get("published_parsed")), i) for i in result
+        (await handle_date(i.get("updated_parsed")), i)
+        if i.get("updated_parsed")
+        else (await handle_date(i.get("published_parsed")), i)
+        for i in result
     ]
     result_with_date.sort(key=lambda tup: tup[0])
     result = [i for key, i in result_with_date]

diff --git a/src/plugins/ELF_RSS2/start.py b/src/plugins/ELF_RSS2/start.py
@@ -1,11 +1,31 @@
+import codecs
 import nonebot
+import os
+import re
 from nonebot import logger, on_metaevent
 from nonebot.adapters.cqhttp import Bot, Event, LifecycleMetaEvent
+from pathlib import Path
 
 from .config import config
 from .RSS import rss_class
 from .RSS import my_trigger as rt
 
+FILE_PATH = str(str(Path.cwd()) + os.sep + "data" + os.sep)
+
+
+def hash_clear():
+    json_paths = list(Path(FILE_PATH).glob("*.json"))
+
+    for j in [str(i) for i in json_paths if i != "rss.json"]:
+
+        with codecs.open(j, "r", "utf-8") as f:
+            lines = f.readlines()
+
+        with codecs.open(j, "w", "utf-8") as f:
+            for line in lines:
+                if not re.search(r'"hash": "[0-9a-zA-Z]{32}",', line):
+                    f.write(line)
+
 
 async def start():
     (bot,) = nonebot.get_bots().values()
@@ -29,6 +49,7 @@ async def start():
             ),
         )
         logger.info("ELF_RSS 订阅器启动成功！")
+        hash_clear()
     except Exception as e:
         await bot.send_msg(
             message_type="private",