From ab1511074c6eb03063b9258e8e42a117a57f5853 Mon Sep 17 00:00:00 2001 From: Isaac Muse Date: Tue, 25 Feb 2025 20:57:51 -0700 Subject: [PATCH] Replaced regex with backrefs package in search plugin (#8034) Use Re in all places in the search plugin except where Unicode properties are desired (checking for Chinese chars). Use Backrefs' `bre` to check for Unicode properties. To minimize any differences, explicitly use `script` as Backrefs uses `script_extensions` (or `scx`) by default with `Is*` properties. --- material/plugins/search/plugin.py | 5 +++-- requirements.txt | 2 +- src/plugins/search/plugin.py | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/material/plugins/search/plugin.py b/material/plugins/search/plugin.py index 146d89cea07..ce23c15cb7e 100644 --- a/material/plugins/search/plugin.py +++ b/material/plugins/search/plugin.py @@ -21,7 +21,8 @@ import json import logging import os -import regex as re +import re +from backrefs import bre from html import escape from html.parser import HTMLParser @@ -285,7 +286,7 @@ def _find_toc_by_id(self, toc, id): # Find and segment Chinese characters in string def _segment_chinese(self, data): - expr = re.compile(r"(\p{IsHan}+)", re.UNICODE) + expr = bre.compile(r"(\p{script: Han}+)", bre.UNICODE) # Replace callback def replace(match): diff --git a/requirements.txt b/requirements.txt index 380f6bc705f..5cf8dad769f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,5 +30,5 @@ pymdown-extensions~=10.2 babel~=2.10 colorama~=0.4 paginate~=0.5 -regex>=2022.4 +backrefs~=5.8 requests~=2.26 diff --git a/src/plugins/search/plugin.py b/src/plugins/search/plugin.py index 146d89cea07..7630774257e 100644 --- a/src/plugins/search/plugin.py +++ b/src/plugins/search/plugin.py @@ -21,7 +21,8 @@ import json import logging import os -import regex as re +import re +import backrefs as bre from html import escape from html.parser import HTMLParser @@ -285,7 +286,7 @@ def _find_toc_by_id(self, toc, id): # Find and segment Chinese characters in string def _segment_chinese(self, data): - expr = re.compile(r"(\p{IsHan}+)", re.UNICODE) + expr = bre.compile(r"(\p{script: Han}+)", bre.UNICODE) # Replace callback def replace(match):