From 0b885c9cc1501a4cf7a3037544ef48266bb4e0b3 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Thu, 25 Apr 2024 02:41:24 +0800 Subject: [PATCH 1/6] feat!(db): new field {Sub,User}.display_entry_tags With corresponding migration files. Co-authored-by: maooyer Signed-off-by: Rongrong --- .../models/4_20240425020849_display_entry_tags.py | 13 +++++++++++++ .../models/4_20240425020849_display_entry_tags.py | 13 +++++++++++++ src/db/models.py | 5 +++++ 3 files changed, 31 insertions(+) create mode 100644 src/db/migrations_pgsql/models/4_20240425020849_display_entry_tags.py create mode 100644 src/db/migrations_sqlite/models/4_20240425020849_display_entry_tags.py diff --git a/src/db/migrations_pgsql/models/4_20240425020849_display_entry_tags.py b/src/db/migrations_pgsql/models/4_20240425020849_display_entry_tags.py new file mode 100644 index 0000000000..f8ba32fcb4 --- /dev/null +++ b/src/db/migrations_pgsql/models/4_20240425020849_display_entry_tags.py @@ -0,0 +1,13 @@ +from tortoise import BaseDBAsyncClient + + +async def upgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "sub" ADD "display_entry_tags" SMALLINT NOT NULL DEFAULT -100; + ALTER TABLE "user" ADD "display_entry_tags" SMALLINT NOT NULL DEFAULT -1;""" + + +async def downgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "sub" DROP COLUMN "display_entry_tags"; + ALTER TABLE "user" DROP COLUMN "display_entry_tags";""" diff --git a/src/db/migrations_sqlite/models/4_20240425020849_display_entry_tags.py b/src/db/migrations_sqlite/models/4_20240425020849_display_entry_tags.py new file mode 100644 index 0000000000..f8ba32fcb4 --- /dev/null +++ b/src/db/migrations_sqlite/models/4_20240425020849_display_entry_tags.py @@ -0,0 +1,13 @@ +from tortoise import BaseDBAsyncClient + + +async def upgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "sub" ADD "display_entry_tags" SMALLINT NOT NULL DEFAULT -100; + ALTER TABLE "user" ADD "display_entry_tags" SMALLINT NOT NULL DEFAULT -1;""" + + +async def downgrade(db: BaseDBAsyncClient) -> str: + return """ + ALTER TABLE "sub" DROP COLUMN "display_entry_tags"; + ALTER TABLE "user" DROP COLUMN "display_entry_tags";""" diff --git a/src/db/models.py b/src/db/models.py index 63f4e01936..9a65005dc3 100644 --- a/src/db/models.py +++ b/src/db/models.py @@ -42,6 +42,7 @@ class User(Model, Base): display_author = fields.SmallIntField(default=0) display_via = fields.SmallIntField(default=0) display_title = fields.SmallIntField(default=0) + display_entry_tags = fields.SmallIntField(default=-1) style = fields.SmallIntField(default=0) display_media = fields.SmallIntField(default=0) @@ -88,6 +89,8 @@ def __str__(self): return self.link +# TODO: migrate the default value of all fields after `notify` (inclusive) to -100 +# TODO: description makes a lot trouble on SQLite, remove the description of all fields after `notify` (inclusive) class Sub(Model, Base): """ Sub model. @@ -122,6 +125,8 @@ class Sub(Model, Base): '0=auto, 1=force display') display_title = fields.SmallIntField(default=0, description='Display title or not?' '-1=disable, 0=auto, 1=force display') + # new field, use the de facto default value (-100) and with description unset to avoid future migration + display_entry_tags = fields.SmallIntField(default=-100) style = fields.SmallIntField(default=0, description='Style of posts: ' '0=RSStT, 1=flowerss') display_media = fields.SmallIntField(default=0, description='Display media or not?' From 6211835f45e4beba5f4c8d5e1154b46abe8a8e4e Mon Sep 17 00:00:00 2001 From: Rongrong Date: Thu, 25 Apr 2024 02:43:28 +0800 Subject: [PATCH 2/6] feat(parsing): display_entry_tags to include tags in feed entry Co-authored-by: maooyer Signed-off-by: Rongrong --- src/parsing/post.py | 41 +++++++++++++++++++++++++---------- src/parsing/post_formatter.py | 12 +++++++++- src/parsing/utils.py | 13 +++++++++++ 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/parsing/post.py b/src/parsing/post.py index 3219840067..9fb51fb0e2 100644 --- a/src/parsing/post.py +++ b/src/parsing/post.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import Optional -from .. import db, env +from .. import db from ..errors_collection import MediaSendFailErrors from .utils import parse_entry, logger, Enclosure from .post_formatter import PostFormatter @@ -10,8 +10,16 @@ async def get_post_from_entry(entry, feed_title: str, feed_link: str = None) -> 'Post': entry_parsed = await parse_entry(entry, feed_link) - return Post(entry_parsed.content, entry_parsed.title, feed_title, entry_parsed.link, entry_parsed.author, - feed_link=feed_link, enclosures=entry_parsed.enclosures) + return Post( + html=entry_parsed.content, + title=entry_parsed.title, + feed_title=feed_title, + link=entry_parsed.link, + author=entry_parsed.author, + tags=entry_parsed.tags, + feed_link=feed_link, + enclosures=entry_parsed.enclosures + ) class Post: @@ -21,6 +29,7 @@ def __init__(self, feed_title: Optional[str] = None, link: Optional[str] = None, author: Optional[str] = None, + tags: Optional[list[str]] = None, feed_link: Optional[str] = None, enclosures: list[Enclosure] = None): """ @@ -29,6 +38,7 @@ def __init__(self, :param feed_title: feed title :param link: post link :param author: post author + :param tags: post tags :param feed_link: the url of the feed where the post from """ self.html = html @@ -36,16 +46,20 @@ def __init__(self, self.feed_title = feed_title self.link = link self.author = author + self.tags = tags self.feed_link = feed_link self.enclosures = enclosures - self.post_formatter = PostFormatter(html=self.html, - title=self.title, - feed_title=self.feed_title, - link=self.link, - author=self.author, - feed_link=self.feed_link, - enclosures=self.enclosures) + self.post_formatter = PostFormatter( + html=self.html, + title=self.title, + feed_title=self.feed_title, + link=self.link, + author=self.author, + tags=self.tags, + feed_link=self.feed_link, + enclosures=self.enclosures + ) async def send_formatted_post_according_to_sub(self, sub: db.Sub): if not isinstance(sub.feed, db.User): @@ -61,6 +75,7 @@ async def send_formatted_post_according_to_sub(self, sub: db.Sub): display_author=sub.display_author if sub.display_author != -100 else user.display_author, display_via=sub.display_via if sub.display_via != -100 else user.display_via, display_title=sub.display_title if sub.display_title != -100 else user.display_title, + display_entry_tags=sub.display_entry_tags if sub.display_entry_tags != -100 else user.display_entry_tags, style=sub.style if sub.style != -100 else user.style, display_media=sub.display_media if sub.display_media != -100 else user.display_media, silent=not (sub.notify if sub.notify != -100 else user.notify) @@ -76,6 +91,7 @@ async def send_formatted_post(self, display_author: int = 0, display_via: int = 0, display_title: int = 0, + display_entry_tags: int = -1, style: int = 0, display_media: int = 0, silent: bool = False): @@ -92,6 +108,7 @@ async def send_formatted_post(self, :param display_author: -1=disable, 0=auto, 1=force display :param display_via: -2=completely disable, -1=disable but display link, 0=auto, 1=force display :param display_title: -1=disable, 0=auto, 1=force display + :param display_entry_tags: -1=disable, 1=force display :param style: 0=RSStT, 1=flowerss :param display_media: -1=disable, 0=enable :param silent: whether to send with notification sound @@ -107,6 +124,7 @@ async def send_formatted_post(self, display_author=display_author, display_via=display_via, display_title=display_title, + display_entry_tags=display_entry_tags, style=style, display_media=display_media) @@ -168,6 +186,7 @@ async def test_format(self, user_id: int): display_title=user.display_title, style=user.style, display_media=user.display_media, - silent=not user.notify + silent=not user.notify, + display_entry_tags=user.display_entry_tags, ) return await self.send_formatted_post_according_to_sub(sub=sub) diff --git a/src/parsing/post_formatter.py b/src/parsing/post_formatter.py index c46e34a2a4..0c2d6d1394 100644 --- a/src/parsing/post_formatter.py +++ b/src/parsing/post_formatter.py @@ -66,6 +66,7 @@ def __init__(self, feed_title: Optional[str] = None, link: Optional[str] = None, author: Optional[str] = None, + tags: Optional[list[str]] = None, feed_link: str = None, enclosures: list[utils.Enclosure] = None): """ @@ -74,6 +75,7 @@ def __init__(self, :param feed_title: feed title :param link: post link :param author: post author + :param tags: post tags :param feed_link: the url of the feed where the post from """ self.html = html @@ -81,6 +83,7 @@ def __init__(self, self.feed_title = feed_title self.link = link self.author = author + self.tags = tags self.feed_link = feed_link self.enclosures = enclosures @@ -117,6 +120,7 @@ async def get_formatted_post(self, display_author: int = 0, display_via: int = 0, display_title: int = 0, + display_entry_tags: int = -1, style: int = 0, display_media: int = 0) -> Optional[tuple[str, bool, bool]]: """ @@ -132,6 +136,7 @@ async def get_formatted_post(self, :param display_via: -3=disable but display link as post title, -2=completely disable, -1=disable but display link at the end, 0=feed title and link, 1=feed title and link as post title :param display_title: -1=disable, 0=auto, 1=force display + :param display_entry_tags: -1=disable, 1=force display :param style: 0=RSStT, 1=flowerss :param display_media: -1=disable, 0=enable :return: (formatted post, need media, need linkpreview) @@ -143,6 +148,7 @@ async def get_formatted_post(self, assert display_via in {NO_FEED_TITLE_BUT_LINK_AS_POST_TITLE, COMPLETELY_DISABLE, NO_FEED_TITLE_BUT_TEXT_LINK, NO_FEED_TITLE_BUT_BARE_LINK, FEED_TITLE_AND_LINK, FEED_TITLE_AND_LINK_AS_POST_TITLE} assert display_title in {DISABLE, AUTO, FORCE_DISPLAY} + assert display_entry_tags in {DISABLE, FORCE_DISPLAY} assert display_media in {DISABLE, AUTO, ONLY_MEDIA_NO_CONTENT} assert style in {RSSTT, FLOWERSS} @@ -150,7 +156,7 @@ async def get_formatted_post(self, tags = tags or [] param_hash = f'{sub_title}|{tags}|{send_mode}|{length_limit}|{link_preview}|' \ - f'{display_author}|{display_via}|{display_title}|{display_media}|{style}' + f'{display_author}|{display_via}|{display_title}|{display_entry_tags}|{display_media}|{style}' if param_hash in self.__param_to_option_cache: option_hash = self.__param_to_option_cache[param_hash] @@ -215,6 +221,10 @@ async def get_formatted_post(self, ) ) + # ---- determine tags ---- + if display_entry_tags == FORCE_DISPLAY: + tags = utils.merge_tags(tags, self.tags) + # ---- determine message_style ---- if style == FLOWERSS: message_style = FLOWERSS_STYLE diff --git a/src/parsing/utils.py b/src/parsing/utils.py index f7ac7a9a4c..8787e9826f 100644 --- a/src/parsing/utils.py +++ b/src/parsing/utils.py @@ -11,6 +11,7 @@ from functools import partial from urllib.parse import urljoin from os import path +from itertools import chain from .. import log from ..aio_helper import run_async @@ -156,6 +157,7 @@ class EntryParsed: content: str = '' link: Optional[str] = None author: Optional[str] = None + tags: Optional[list[str]] = None title: Optional[str] = None enclosures: list[Enclosure] = None @@ -184,6 +186,8 @@ class EntryParsed: title = entry.get('title') title = html_space_stripper(title, enable_emojify=True) if title else None EntryParsed.title = title or None # reject empty string + if (tags := entry.get('tags')) and isinstance(tags, list): + EntryParsed.tags = list(filter(None, (tag.get('term') for tag in tags))) enclosures = [] @@ -299,3 +303,12 @@ def merge_contiguous_entities(entities: Sequence[TypeMessageEntity]) -> list[Typ entity.length = new_end_pos - new_start_pos merged_entities.append(entity) return merged_entities + + +def merge_tags(*tag_lists: Optional[list[str]]) -> list[str]: + non_empty_tag_lists: list[list[str]] = list(filter(None, tag_lists)) + if not non_empty_tag_lists: + return [] + if len(non_empty_tag_lists) == 1: + return non_empty_tag_lists[0] + return list(dict.fromkeys(chain(*non_empty_tag_lists))) From bd539210f984a0619c987dd0bd3d49f5a94ff770 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Thu, 25 Apr 2024 02:54:14 +0800 Subject: [PATCH 3/6] feat(command): display_entry_tags to include tags in feed entry Co-authored-by: maooyer Signed-off-by: Rongrong --- src/command/customization.py | 7 ++++--- src/command/inner/customization.py | 19 +++++++++++++++++-- src/command/inner/sub.py | 29 +++++++++++++++++------------ 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/command/customization.py b/src/command/customization.py index ef84aa9033..5eac76d6fd 100644 --- a/src/command/customization.py +++ b/src/command/customization.py @@ -56,6 +56,7 @@ async def callback_set(event: events.CallbackQuery.Event, display_author: -1=disable, 0=auto, 1=force display display_via: -2=completely disable, -1=disable but display link, 0=auto, 1=force display display_title: -1=disable, 0=auto, 1=force display + display_entry_tags: -1=disable, 1=force display style: 0=RSStT, 1=flowerss """ chat_id = chat_id or event.chat_id @@ -162,7 +163,7 @@ async def callback_reset(event: events.CallbackQuery.Event, sub.interval = None update_interval_flag = True sub.length_limit = sub.notify = sub.send_mode = sub.link_preview = sub.display_author = sub.display_media = \ - sub.display_title = sub.display_via = sub.style = -100 + sub.display_title = sub.display_entry_tags = sub.display_via = sub.style = -100 await sub.save() if update_interval_flag: await inner.utils.update_interval(sub) @@ -206,9 +207,9 @@ async def callback_reset_all(event: events.CallbackQuery.Event, tasks.append(inner.utils.update_interval(sub)) sub.interval = None sub.length_limit = sub.notify = sub.send_mode = sub.link_preview = sub.display_author = sub.display_media = \ - sub.display_title = sub.display_via = sub.style = -100 + sub.display_title = sub.display_entry_tags = sub.display_via = sub.style = -100 await db.Sub.bulk_update(subs, ('interval', 'length_limit', 'notify', 'send_mode', 'link_preview', 'display_author', - 'display_media', 'display_title', 'display_via', 'style')) + 'display_media', 'display_title', 'display_entry_tags', 'display_via', 'style')) for task in tasks: env.loop.create_task(task) await event.edit(i18n[lang]['reset_all_successful']) diff --git a/src/command/inner/customization.py b/src/command/inner/customization.py index 349ed61249..ba4a84489d 100644 --- a/src/command/inner/customization.py +++ b/src/command/inner/customization.py @@ -19,6 +19,7 @@ "display_author": (0, 1, -1), "display_via": (0, 1, -3, -1, -4, -2), "display_title": (0, 1, -1), + "display_entry_tags": (1, -1), "style": (0, 1) } @@ -54,7 +55,7 @@ async def get_customization_buttons(sub_or_user: Union[db.Sub, db.User], is_user = isinstance(sub_or_user, db.User) if is_user: interval_d = length_limit_d = notify_d = send_mode_d = link_preview_d = display_media_d = display_author_d = \ - display_via_d = display_title_d = style_d = False + display_via_d = display_title_d = display_entry_tags_d = style_d = False all_default = None else: if not isinstance(sub_or_user.user, db.User): @@ -68,9 +69,10 @@ async def get_customization_buttons(sub_or_user: Union[db.Sub, db.User], display_author_d = sub_or_user.display_author == -100 display_via_d = sub_or_user.display_via == -100 display_title_d = sub_or_user.display_title == -100 + display_entry_tags_d = sub_or_user.display_entry_tags == -100 style_d = sub_or_user.style == -100 all_default = all((interval_d, length_limit_d, notify_d, send_mode_d, link_preview_d, display_media_d, - display_author_d, display_via_d, display_title_d, style_d)) + display_author_d, display_via_d, display_title_d, display_entry_tags_d, style_d)) interval = sub_or_user.user.interval if interval_d else sub_or_user.interval length_limit = sub_or_user.user.length_limit if length_limit_d else sub_or_user.length_limit notify = sub_or_user.user.notify if notify_d else sub_or_user.notify @@ -80,6 +82,7 @@ async def get_customization_buttons(sub_or_user: Union[db.Sub, db.User], display_author = sub_or_user.user.display_author if display_author_d else sub_or_user.display_author display_via = sub_or_user.user.display_via if display_via_d else sub_or_user.display_via display_title = sub_or_user.user.display_title if display_title_d else sub_or_user.display_title + display_entry_tags = sub_or_user.user.display_entry_tags if display_entry_tags_d else sub_or_user.display_entry_tags style = sub_or_user.user.style if style_d else sub_or_user.style buttons = ( ( @@ -180,6 +183,18 @@ async def get_customization_buttons(sub_or_user: Union[db.Sub, db.User], ), ), ), + ( + Button.inline( + f"{i18n[lang]['display_entry_tags']}: " + + (FALLBACK_TO_USER_DEFAULT_EMOJI if display_entry_tags_d else '') + + i18n[lang][f'display_entry_tags_{display_entry_tags}'], + data=( + f'set_default=display_entry_tags{tail}' + if is_user + else f'set={sub_or_user.id},display_entry_tags|{page}{tail}' + ), + ), + ), ( Button.inline( f"{i18n[lang]['display_via']}: " diff --git a/src/command/inner/sub.py b/src/command/inner/sub.py index fa4dd0d241..786cde5356 100644 --- a/src/command/inner/sub.py +++ b/src/command/inner/sub.py @@ -92,18 +92,23 @@ async def sub(user_id: int, sub_title = sub_title if feed.title != sub_title else None if not _sub: # create a new sub if needed - _sub, created_new_sub = await db.Sub.get_or_create(user_id=user_id, feed=feed, - defaults={'title': sub_title if sub_title else None, - 'interval': None, - 'notify': -100, - 'send_mode': -100, - 'length_limit': -100, - 'link_preview': -100, - 'display_author': -100, - 'display_via': -100, - 'display_title': -100, - 'style': -100, - 'display_media': -100}) + _sub, created_new_sub = await db.Sub.get_or_create( + user_id=user_id, feed=feed, + defaults={ + 'title': sub_title if sub_title else None, + 'interval': None, + 'notify': -100, + 'send_mode': -100, + 'length_limit': -100, + 'link_preview': -100, + 'display_author': -100, + 'display_via': -100, + 'display_title': -100, + 'display_entry_tags': -100, + 'style': -100, + 'display_media': -100 + } + ) if not created_new_sub: if _sub.title == sub_title and _sub.state == 1: From 179296d0974ca711e568af3e62443d7b6b1c9d41 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Thu, 25 Apr 2024 03:06:39 +0800 Subject: [PATCH 4/6] chore(i18n): display_entry_tags for `en`, `yue`, `zh-Hans`, `zh-Hant` Signed-off-by: Rongrong --- src/i18n/en.json | 3 +++ src/i18n/yue.json | 3 +++ src/i18n/zh-Hans.json | 3 +++ src/i18n/zh-Hant.json | 3 +++ 4 files changed, 12 insertions(+) diff --git a/src/i18n/en.json b/src/i18n/en.json index ead4b7213b..f0ff21f058 100644 --- a/src/i18n/en.json +++ b/src/i18n/en.json @@ -126,6 +126,9 @@ "display_title_-1": "Disable", "display_title_0": "Auto", "display_title_1": "Enable", + "display_entry_tags": "Hashtags from post (feed entry)", + "display_entry_tags_-1": "Disable", + "display_entry_tags_1": "Enable", "style": "Style", "style_0": "RSStT", "style_1": "flowerss", diff --git a/src/i18n/yue.json b/src/i18n/yue.json index d910476d4b..c1a1d80b95 100644 --- a/src/i18n/yue.json +++ b/src/i18n/yue.json @@ -126,6 +126,9 @@ "display_title_-1": "閂", "display_title_0": "自動", "display_title_1": "開", + "display_entry_tags": "嚟自文章 (源條目) 嘅 hashtag", + "display_entry_tags_-1": "閂", + "display_entry_tags_1": "開", "style": "格式", "style_0": "", "style_1": "", diff --git a/src/i18n/zh-Hans.json b/src/i18n/zh-Hans.json index 3702634ed9..6aa70ad456 100644 --- a/src/i18n/zh-Hans.json +++ b/src/i18n/zh-Hans.json @@ -126,6 +126,9 @@ "display_title_-1": "禁用", "display_title_0": "自动", "display_title_1": "启用", + "display_entry_tags": "来自文章 (源条目) 的 hashtag", + "display_entry_tags_-1": "禁用", + "display_entry_tags_1": "启用", "style": "风格", "style_0": "", "style_1": "", diff --git a/src/i18n/zh-Hant.json b/src/i18n/zh-Hant.json index 5087c09b4f..ec4c0e0ba4 100644 --- a/src/i18n/zh-Hant.json +++ b/src/i18n/zh-Hant.json @@ -126,6 +126,9 @@ "display_title_-1": "禁用", "display_title_0": "自動", "display_title_1": "啟用", + "display_entry_tags": "來自文章 (源條目) 的 hashtag", + "display_entry_tags_-1": "禁用", + "display_entry_tags_1": "啟用", "style": "樣式", "style_0": "", "style_1": "", From 412dc1a7698ea8c4fac5a37866206c93edf70e03 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Fri, 26 Apr 2024 01:35:23 +0800 Subject: [PATCH 5/6] fix(parsing): punctuations break entry hashtags Fix entry hashtags by replacing any punctuations and whitespaces by '_'. Signed-off-by: Rongrong --- src/parsing/post_formatter.py | 10 +++++++--- src/parsing/utils.py | 29 +++++++++++++++++++---------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/parsing/post_formatter.py b/src/parsing/post_formatter.py index 0c2d6d1394..0fb857fa81 100644 --- a/src/parsing/post_formatter.py +++ b/src/parsing/post_formatter.py @@ -94,6 +94,7 @@ def __init__(self, self.parsed_html: Optional[str] = None self.plain_length: Optional[int] = None self.telegraph_link: Optional[Union[str, False]] = None # if generating failed, will be False + self.tags_escaped: Optional[list[str]] = None self.__title_similarity: Optional[int] = None @@ -223,7 +224,10 @@ async def get_formatted_post(self, # ---- determine tags ---- if display_entry_tags == FORCE_DISPLAY: - tags = utils.merge_tags(tags, self.tags) + if self.tags_escaped is None: + self.tags_escaped = list(utils.escape_hashtags(self.tags)) + if self.tags_escaped: + tags = utils.merge_tags(tags, self.tags_escaped) if tags else self.tags_escaped # ---- determine message_style ---- if style == FLOWERSS: @@ -352,7 +356,7 @@ async def get_formatted_post(self, def get_post_header_and_footer(self, sub_title: Optional[str], - tags: list, + tags: list[str], title_type: TypePostTitleType, via_type: TypeViaType, need_author: bool, @@ -496,7 +500,7 @@ def get_post_header_and_footer(self, def generate_formatted_post(self, sub_title: Optional[str], - tags: list, + tags: list[str], title_type: TypePostTitleType, via_type: TypeViaType, need_author: bool, diff --git a/src/parsing/utils.py b/src/parsing/utils.py index 8787e9826f..93219a3d8a 100644 --- a/src/parsing/utils.py +++ b/src/parsing/utils.py @@ -1,8 +1,9 @@ from __future__ import annotations -from typing import Optional, Sequence, Union +from typing import Optional, Sequence, Union, Final, Iterable import re import json +import string from contextlib import suppress from bs4.element import Tag from html import unescape @@ -20,7 +21,7 @@ logger = log.getLogger('RSStT.parsing') # noinspection SpellCheckingInspection -SPACES = ( +SPACES: Final[str] = ( # all characters here, except for \u200c, \u200d and \u2060, are converted to space on TDesktop, but Telegram # Android preserves all ' ' # '\x20', SPACE @@ -42,7 +43,7 @@ # '\u2060' # WORD JOINER '\u3000' # IDEOGRAPHIC SPACE ) -INVALID_CHARACTERS = ( +INVALID_CHARACTERS: Final[str] = ( # all characters here are converted to space server-side '\x00' # NULL '\x01' # START OF HEADING @@ -77,6 +78,10 @@ '\u2028' # LINE SEPARATOR '\u2029' # PARAGRAPH SEPARATOR ) +CHARACTERS_TO_ESCAPE_IN_HASHTAG: Final[str] = ''.join( + # all characters here will be replaced with '_' + sorted(set(SPACES + INVALID_CHARACTERS + string.punctuation + string.whitespace)) +) # load emoji dict with open(path.join(path.dirname(__file__), 'emojify.json'), 'r', encoding='utf-8') as emojify_json: @@ -88,6 +93,7 @@ stripLineEnd = partial(re.compile(rf'[{SPACES}]+\n').sub, '\n') # use firstly stripNewline = partial(re.compile(r'\n{3,}').sub, '\n\n') # use secondly stripAnySpace = partial(re.compile(r'\s+').sub, ' ') +escapeHashtag = partial(re.compile(rf'[{CHARACTERS_TO_ESCAPE_IN_HASHTAG}]+').sub, '_') isAbsoluteHttpLink = re.compile(r'^https?://').match isSmallIcon = re.compile(r'(width|height): ?(([012]?\d|30)(\.\d)?px|([01](\.\d)?|2)r?em)').search @@ -305,10 +311,13 @@ def merge_contiguous_entities(entities: Sequence[TypeMessageEntity]) -> list[Typ return merged_entities -def merge_tags(*tag_lists: Optional[list[str]]) -> list[str]: - non_empty_tag_lists: list[list[str]] = list(filter(None, tag_lists)) - if not non_empty_tag_lists: - return [] - if len(non_empty_tag_lists) == 1: - return non_empty_tag_lists[0] - return list(dict.fromkeys(chain(*non_empty_tag_lists))) +def escape_hashtag(tag: str) -> str: + return escapeHashtag(tag).strip('_') + + +def escape_hashtags(tags: Optional[Iterable[str]]) -> Iterable[str]: + return filter(None, map(escape_hashtag, tags)) if tags else () + + +def merge_tags(*tag_lists: Optional[Iterable[str]]) -> list[str]: + return list(dict.fromkeys(chain(*tag_lists))) From d81ace418fc483a82c49305bd5c7d881eda06e83 Mon Sep 17 00:00:00 2001 From: Rongrong Date: Fri, 26 Apr 2024 03:17:37 +0800 Subject: [PATCH 6/6] docs(CHANGELOG): add hashtags from posts to message Signed-off-by: Rongrong --- docs/CHANGELOG.md | 1 + docs/CHANGELOG.zh.md | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 5c026dc833..3eb624b491 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -8,6 +8,7 @@ ### Highlights +- **#Hashtags from post (feed entry)**: If enabled in `/set` or `/set_default`, hashtags from posts (feed entries), merged with the custom hashtags of the feed, will be added to the message. The term "hashtags from post" refers to `` elements in RSS `` or Atom ``. This feature is disabled by default. Thanks [@maooyer](https://github.com/maooyer) for their first contribution in [#449](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/pull/449). - **Support Python 3.12**: Minor fixes have been made to support Python 3.12. The official Docker image is now based on Python 3.12 as well. - **Helper scripts to make contributions easier**: When performing contributions that update database models, creating database migration files is not an easy job. [scripts/aerich_helper.py](../scripts/aerich_helper.py) is a helper script that can simplify the process. Passing `--help` to the script to see a detailed usage guide. diff --git a/docs/CHANGELOG.zh.md b/docs/CHANGELOG.zh.md index 42683a745d..a75588622c 100644 --- a/docs/CHANGELOG.zh.md +++ b/docs/CHANGELOG.zh.md @@ -8,6 +8,7 @@ ### 亮点 +- **来自文章 (源条目) 的 #hashtag**: 如果在 `/set` 或 `/set_default` 中启用,来自文章 (源条目) 的 hashtag,与源的自定义 hashtag 合并后,将被添加到消息中。术语 "来自文章的 hashtag" 指的是 RSS `` 或 Atom `` 中的 `` 元素。此功能默认禁用。感谢 [@maooyer](https://github.com/maooyer) 在 [#449](https://github.com/Rongronggg9/RSS-to-Telegram-Bot/pull/449) 中的作出的初次贡献。 - **支持 Python 3.12**: 进行了一些小的修复以支持 Python 3.12。官方 Docker 镜像现在也基于 Python 3.12。 - **使贡献更容易的辅助脚本**: 在进行更新数据库模型的贡献时,创建数据库迁移文件并不是一件容易的事。[scripts/aerich_helper.py](../scripts/aerich_helper.py) 是一个可以简化这个流程的辅助脚本。将 `--help` 传递给脚本以查看详细的使用指南。