Skip to content

Commit

Permalink
Merge pull request #342 from yajiwa/main
Browse files Browse the repository at this point in the history
fix bug
  • Loading branch information
HibiKier authored May 19, 2022
2 parents cb643c0 + 53e5723 commit 89b87a3
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1,700 deletions.
16 changes: 5 additions & 11 deletions plugins/word_clouds/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
import re
from datetime import datetime, timedelta
from typing import Tuple, Union

try:
from zoneinfo import ZoneInfo
except ImportError:
from backports.zoneinfo import ZoneInfo # type: ignore

import pytz
from nonebot import on_command, get_driver
from nonebot.adapters.onebot.v11 import Message, MessageSegment
from nonebot.adapters.onebot.v11.event import GroupMessageEvent
from nonebot.matcher import Matcher
from nonebot.params import Arg, Command, CommandArg, Depends
from nonebot.typing import T_State
from .data_source import draw_word_cloud, get_list_msg
from .data_source import draw_word_cloud, get_list_msg
from configs.config import Config

__zx_plugin_name__ = "词云"
Expand Down Expand Up @@ -191,11 +186,10 @@ async def handle_message(
user_id = int(event.user_id)
else:
user_id = None

# 排除机器人自己发的消息
# 将时间转换到 UTC 时区
# 将时间转换到 东八 时区
messages = await get_list_msg(user_id, int(event.group_id),
days=[start.astimezone(ZoneInfo("UTC")), stop.astimezone(ZoneInfo("UTC"))])
days=(start.astimezone(pytz.timezone("Asia/Shanghai")),
stop.astimezone(pytz.timezone("Asia/Shanghai"))))
if messages:
image_bytes = await draw_word_cloud(messages, get_driver().config)
if image_bytes:
Expand Down
36 changes: 9 additions & 27 deletions plugins/word_clouds/data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import random
import jieba.analyse
import re
from collections import Counter
from typing import List
from PIL import Image as IMG
import jieba
Expand All @@ -12,18 +11,18 @@
import numpy as np
import matplotlib.pyplot as plt
from io import BytesIO
from configs.path_config import IMAGE_PATH, FONT_PATH, TEXT_PATH
from configs.path_config import IMAGE_PATH, FONT_PATH
from utils.http_utils import AsyncHttpx
from models.chat_history import ChatHistory
from configs.config import Config


async def pre_precess(msg: List[str], wordcloud_stopwords_dir: str, config) -> str:
async def pre_precess(msg: List[str], config) -> str:
return await asyncio.get_event_loop().run_in_executor(
None, _pre_precess, msg, wordcloud_stopwords_dir, config)
None, _pre_precess, msg,config)


def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str:
def _pre_precess(msg: List[str],config) -> str:
"""对消息进行预处理"""
# 过滤掉命令
command_start = tuple([i for i in config.command_start if i])
Expand All @@ -33,38 +32,27 @@ def _pre_precess(msg: List[str], wordcloud_stopwords_dir, config) -> str:
msg = re.sub(r"https?://[\w/:%#\$&\?\(\)~\.=\+\-]+", "", msg)

# 去除 \u200b
msg = re.sub(r"\u200b", "", msg)
msg = re.sub(r"[\u200b]", "", msg)

# 去除cq码
msg = re.sub(r"\[CQ:.*?]", "", msg)

# 去除&#91&#93
msg = re.sub("[&#9(1|3);]", "", msg)

# 去除 emoji
# https://github.com/carpedm20/emoji
msg = replace_emoji(msg)
# 分词
msg = "".join(cut_message(msg, wordcloud_stopwords_dir))
return msg


def cut_message(msg: str, wordcloud_stopwords_dir) -> List[str]:
"""分词"""
with wordcloud_stopwords_dir.open("r", encoding="utf8") as f:
stopwords = [word.strip() for word in f.readlines()]
f.close()
words = jieba.lcut(msg)
return [word.strip() for word in words if word.strip() not in stopwords]


async def draw_word_cloud(messages, config):
wordcloud_dir = IMAGE_PATH / "wordcloud"
wordcloud_dir.mkdir(exist_ok=True, parents=True)
# 默认用真寻图片
zx_logo_path = wordcloud_dir / "default.png"
wordcloud_ttf = FONT_PATH / "STKAITI.TTF"

wordcloud_test_dir = TEXT_PATH / "wordcloud"
wordcloud_test_dir.mkdir(exist_ok=True, parents=True)
wordcloud_stopwords_dir = wordcloud_test_dir / "stopwords.txt"
if not os.listdir(wordcloud_dir):
url = "https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/image/wordcloud/default.png"
try:
Expand All @@ -77,15 +65,9 @@ async def draw_word_cloud(messages, config):
await AsyncHttpx.download_file(ttf_url, wordcloud_ttf)
except:
return False
if not wordcloud_stopwords_dir.exists():
stopword_url = 'https://ghproxy.com/https://raw.githubusercontent.com/HibiKier/zhenxun_bot/main/resources/text/wordcloud/stopwords.txt'
try:
await AsyncHttpx.download_file(stopword_url, wordcloud_stopwords_dir)
except:
return False

topK = min(int(len(messages)), 100000)
read_name = jieba.analyse.extract_tags(await pre_precess(messages, wordcloud_stopwords_dir, config), topK=topK,
read_name = jieba.analyse.extract_tags(await pre_precess(messages, config), topK=topK,
withWeight=True,
allowPOS=())
name = []
Expand Down
Loading

0 comments on commit 89b87a3

Please sign in to comment.