From b10bf34ec355bceb6b37c62ecfc47b7e0c704610 Mon Sep 17 00:00:00 2001 From: Tasos Katsoulas Date: Tue, 8 Oct 2024 16:44:26 +0300 Subject: [PATCH] WIP: deprecate py-wikimarkup --- kitsune/sumo/parser.py | 147 ++++++++------------- kitsune/sumo/templatetags/jinja_helpers.py | 3 - kitsune/wiki/events.py | 4 +- kitsune/wiki/parser.py | 8 +- poetry.lock | 63 +++++---- pyproject.toml | 2 +- 6 files changed, 99 insertions(+), 128 deletions(-) diff --git a/kitsune/sumo/parser.py b/kitsune/sumo/parser.py index 77e79c45873..186d95a7c6a 100644 --- a/kitsune/sumo/parser.py +++ b/kitsune/sumo/parser.py @@ -1,18 +1,31 @@ -import re from os.path import basename from urllib.parse import parse_qs, urlparse +import mwparserfromhell from django.conf import settings from django.template.loader import render_to_string -from django.utils.translation import gettext as _ from django.utils.translation import gettext_lazy as _lazy from sentry_sdk import capture_exception -from wikimarkup.parser import ALLOWED_TAGS, Parser from kitsune.gallery.models import Image, Video from kitsune.sumo import email_utils from kitsune.sumo.urlresolvers import reverse +ALLOWED_TAGS = [ + "a", + "div", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "li", + "span", + "img", + "video", + "source", +] ALLOWED_ATTRIBUTES = { "a": ["href", "title", "class", "rel", "data-mozilla-ui-reset", "data-mozilla-ui-preferences"], "div": ["id", "class", "style", "data-for", "title", "data-target", "data-modal"], @@ -54,11 +67,7 @@ def wiki_to_html( """Wiki Markup -> HTML""" return WikiParser().parse( wiki_markup, - show_toc=False, locale=locale, - nofollow=nofollow, - tags=tags, - attributes=attributes, ) @@ -197,41 +206,20 @@ def build_hook_params(string, locale, allowed_params=[], allowed_param_values={} return (title, params) -class WikiParser(Parser): - """Wrapper for wikimarkup which adds Kitsune-specific callbacks - and setup. - """ +class WikiParser: + """Wrapper for mwparserfromhell which adds Kitsune-specific parsing and setup.""" image_template = "wikiparser/hook_image.html" def __init__(self, base_url=None): - super(WikiParser, self).__init__(base_url) - - # Register default hooks - self.registerInternalLinkHook(None, self._hook_internal_link) - self.registerInternalLinkHook("Image", self._hook_image_tag) - self.registerInternalLinkHook("Video", self._hook_video) - self.registerInternalLinkHook("V", self._hook_video) - self.registerInternalLinkHook("Button", self._hook_button) - self.registerInternalLinkHook("UI", self._hook_ui_component) - - # Register the abbr and acronym tags - self.registerTagHook("abbr", self._abbr_tag_hook) - + self.locale = settings.WIKI_DEFAULT_LANGUAGE self.youtube_videos = set() self.ui_components = set() def parse( self, text, - show_toc=None, - tags=None, - attributes=None, - styles=None, locale=settings.WIKI_DEFAULT_LANGUAGE, - nofollow=False, - youtube_embeds=True, - ui_component_embeds=True, **kwargs, ): """Given wiki markup, return HTML. @@ -265,14 +253,8 @@ def parse( @email_utils.safe_translation def _parse(locale): try: - return super(WikiParser, self).parse( + return mwparserfromhell.parse( text, - show_toc=show_toc, - tags=tags or ALLOWED_TAGS, - attributes=attributes or ALLOWED_ATTRIBUTES, - styles=styles or ALLOWED_STYLES, - nofollow=nofollow, - strip_comments=True, **kwargs, ) except TypeError as e: @@ -281,13 +263,10 @@ def _parse(locale): capture_exception(e) return "� There was an error parsing this content. �" - html = _parse(locale) + html = str(_parse(locale)) - if youtube_embeds: - html = self.add_youtube_embeds(html) - - if ui_component_embeds: - html = self.add_ui_component_embeds(html) + html = self.add_youtube_embeds(html) + html = self.add_ui_component_embeds(html) return html @@ -325,58 +304,40 @@ def add_ui_component_embeds(self, html): return html - def _hook_internal_link(self, parser, space, name): + def _hook_internal_link(self, wikicode): """Parses text and returns internal link.""" - text = False - title = name - - # Split on pipe -- [[href|name]] - if "|" in name: - title, text = title.split("|", 1) - title = re.sub(r"\s+", " ", title).strip() - - hash = "" - if "#" in title: - title, hash = title.split("#", 1) - - # Sections use _, page names use + - if hash != "": - hash = "#" + hash.replace(" ", "_") - - # Links to this page can just contain href="#hash" - if title == "" and hash != "": - if not text: - text = hash.replace("_", " ") - return '%s' % (hash, text) - - link = _get_wiki_link(title, self.locale) - extra_a_attr = "" - if not link["found"]: - extra_a_attr += ' class="new" title="{tooltip}"'.format( - tooltip=_("Page does not exist.") + for link in wikicode.filter_wikilinks(): + title = str(link.title) + if "|" in title: + title, text = title.split("|", 1) + else: + text = title + + # Get the correct link info + link_info = _get_wiki_link(title, self.locale) + link_html = f'{text}' + wikicode.replace(link, link_html) + + return str(wikicode) + + def _hook_image_tag(self, wikicode): + for template in wikicode.filter_templates(matches="Image"): + title, params = build_hook_params( + template.get("title"), self.locale, IMAGE_PARAMS, IMAGE_PARAM_VALUES + ) + image = get_object_fallback( + Image, title, self.locale, _lazy('The image "%s" does not exist.') % title ) - if not text: - text = link["text"] - return '{text}'.format( - url=link["url"], hash=hash, extra=extra_a_attr, text=text - ) - def _hook_image_tag(self, parser, space, name): - """Adds syntax for inserting images.""" - title, params = build_hook_params(name, self.locale, IMAGE_PARAMS, IMAGE_PARAM_VALUES) - - message = _lazy('The image "%s" does not exist.') % title - image = get_object_fallback(Image, title, self.locale, message) - if isinstance(image, str): - return image - - return render_to_string( - self.image_template, - { - "image": image, - "params": params, - }, - ) + if isinstance(image, str): + wikicode.replace(template, image) + else: + image_html = render_to_string( + self.image_template, {"image": image, "params": params} + ) + wikicode.replace(template, image_html) + + return str(wikicode) # Videos are objects that can have one or more files attached to them # diff --git a/kitsune/sumo/templatetags/jinja_helpers.py b/kitsune/sumo/templatetags/jinja_helpers.py index f82a12c23d8..fa6442f47c3 100644 --- a/kitsune/sumo/templatetags/jinja_helpers.py +++ b/kitsune/sumo/templatetags/jinja_helpers.py @@ -7,7 +7,6 @@ import bleach import jinja2 -import wikimarkup.parser from babel.dates import format_date, format_datetime, format_time from babel.numbers import format_decimal from django.conf import settings @@ -137,8 +136,6 @@ def wiki_to_safe_html(wiki_markup, locale=settings.WIKI_DEFAULT_LANGUAGE, nofoll wiki_markup, locale=locale, nofollow=nofollow, - tags=wikimarkup.parser.ALLOWED_TAGS + ["abbr"], - attributes=wikimarkup.parser.ALLOWED_ATTRIBUTES | {"abbr": ["title"]}, ) return Markup( bleach.clean(html, tags=ALLOWED_BIO_TAGS, attributes=ALLOWED_BIO_ATTRIBUTES, strip=True) diff --git a/kitsune/wiki/events.py b/kitsune/wiki/events.py index 9cba76dd575..5dd1b1f3091 100644 --- a/kitsune/wiki/events.py +++ b/kitsune/wiki/events.py @@ -5,11 +5,11 @@ from django.conf import settings from django.contrib.sites.models import Site from django.urls import reverse as django_reverse -from django.utils.translation import gettext_lazy as _lazy from django.utils.translation import gettext as _ -from wikimarkup.parser import ALLOWED_ATTRIBUTES, ALLOWED_TAGS +from django.utils.translation import gettext_lazy as _lazy from kitsune.sumo import email_utils +from kitsune.sumo.parser import ALLOWED_ATTRIBUTES, ALLOWED_TAGS from kitsune.sumo.templatetags.jinja_helpers import add_utm from kitsune.sumo.urlresolvers import reverse from kitsune.tidings.events import Event, EventUnion, InstanceEvent diff --git a/kitsune/wiki/parser.py b/kitsune/wiki/parser.py index 38ce27043fa..f506cf06f70 100644 --- a/kitsune/wiki/parser.py +++ b/kitsune/wiki/parser.py @@ -14,11 +14,15 @@ from html5lib.treebuilders import getTreeBuilder from html5lib.treewalkers import getTreeWalker from lxml.etree import Element -from wikimarkup.parser import ALLOWED_TAGS from kitsune.gallery.models import Image from kitsune.sumo import parser as sumo_parser -from kitsune.sumo.parser import ALLOWED_ATTRIBUTES, ALLOWED_STYLES, get_object_fallback +from kitsune.sumo.parser import ( + ALLOWED_ATTRIBUTES, + ALLOWED_STYLES, + ALLOWED_TAGS, + get_object_fallback, +) from kitsune.wiki.models import Document # block elements wikimarkup knows about (and thus preserves) diff --git a/poetry.lock b/poetry.lock index 2e90f1d796b..a8bbed7e54e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2781,6 +2781,41 @@ Django = ">=3.2" josepy = "*" requests = "*" +[[package]] +name = "mwparserfromhell" +version = "0.6.6" +description = "MWParserFromHell is a parser for MediaWiki wikicode." +optional = false +python-versions = ">= 3.8" +files = [ + {file = "mwparserfromhell-0.6.6-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:d6995b9cfe6ec79556db0232a39210ac11aa69ee304cfc95b29c51be381e202b"}, + {file = "mwparserfromhell-0.6.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebc70f8a24aa60e54728be740f1c12a4acb1b12d1cc947d87b067cc1c83339fd"}, + {file = "mwparserfromhell-0.6.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9136696d6b29838adcf8f428e3f7028b2c6e788fc05fe1beeb4b135429c356df"}, + {file = "mwparserfromhell-0.6.6-cp310-cp310-win32.whl", hash = "sha256:6b11dea3bcdebe4554933169eade815e9d6b898175faa5a20a744524fd99210f"}, + {file = "mwparserfromhell-0.6.6-cp310-cp310-win_amd64.whl", hash = "sha256:6a89edf53f15877223d923e122e9a97f3f7b85f56dc56d91a3d77b89c9dd4126"}, + {file = "mwparserfromhell-0.6.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fff66e97f7c02aa0fd57ff8f702977a9c5a1d72ef55b64ee9b146291e4c41057"}, + {file = "mwparserfromhell-0.6.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59633d3cc09993af75ced8dfbd6800e1e38e64620851a095575621548448875c"}, + {file = "mwparserfromhell-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:007d0859e5467241b73c6e974df039a074609ce4e2b9df8c2263a8920554d032"}, + {file = "mwparserfromhell-0.6.6-cp311-cp311-win32.whl", hash = "sha256:dbe5976b1b524e26aa2eb71b6219960f2578f56b536c68e0a79deb63e3b7f710"}, + {file = "mwparserfromhell-0.6.6-cp311-cp311-win_amd64.whl", hash = "sha256:063c1e79befd1f55d77c358e0f5006f5ecf88ddf218ff6af55188d686139330e"}, + {file = "mwparserfromhell-0.6.6-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:910d36bc70e8bea758380e75c12fd47626b295abec9f73a6099d8f937a649e77"}, + {file = "mwparserfromhell-0.6.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2febd92a55a3f19b461833267726cb81429c3d6cb0006ad1691dfa849789e5d"}, + {file = "mwparserfromhell-0.6.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b75fae6d01c8fda19dbf127175122d7aa2964ef6454690e6868bbc3d80a7bc1"}, + {file = "mwparserfromhell-0.6.6-cp312-cp312-win32.whl", hash = "sha256:19e9a4bcd85707c83172405eb2a9a046eff9d38dd7f1a56a5e5ecbbfef4a640a"}, + {file = "mwparserfromhell-0.6.6-cp312-cp312-win_amd64.whl", hash = "sha256:cdc46c115b2495d4025920b7b30a6885a96d2b797ccc4009bf3cc02940ae55d3"}, + {file = "mwparserfromhell-0.6.6-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:fd05481adc0806f4b8f8f8cb309ec56924b17ce386cb1c2f73919d8a012e6b16"}, + {file = "mwparserfromhell-0.6.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03e03b8bec729af850457d045b04d0c9d3e296ff8bf66b455f754cccb29c3bea"}, + {file = "mwparserfromhell-0.6.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d2422659abb29191a0fa096cf8bead837ac3ecd343065569b2acc7a84ecf866"}, + {file = "mwparserfromhell-0.6.6-cp38-cp38-win32.whl", hash = "sha256:a58251a5d5c77abdfd061624dc05667c2774e93e8178a2fbd1a3b45f8673f1a9"}, + {file = "mwparserfromhell-0.6.6-cp38-cp38-win_amd64.whl", hash = "sha256:e28ffa9a7e0748ec64002a84234201ef69c2d4a710508baf9cc25f4ee274c6bd"}, + {file = "mwparserfromhell-0.6.6-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:746bad799179684994ecee72a26352e0bbe2b697f6a7e35dc5ad151606bcb8ab"}, + {file = "mwparserfromhell-0.6.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50c482e703d2d51401f7e36a71ae9493901f170225940196292f97398713dde5"}, + {file = "mwparserfromhell-0.6.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1915fe4f5e5ae34f16242d4cd98da2adc81a810ab94105ec2af3dc95d7ce74aa"}, + {file = "mwparserfromhell-0.6.6-cp39-cp39-win32.whl", hash = "sha256:54e2dd30edc1a358408d14343b30dcca0b4613227781e4bbee968bd4395d94ff"}, + {file = "mwparserfromhell-0.6.6-cp39-cp39-win_amd64.whl", hash = "sha256:1960bcc5115ea57427df130150edf1dbfc2fb03465e548e630bb6eb37976d793"}, + {file = "mwparserfromhell-0.6.6.tar.gz", hash = "sha256:71afec1e9784ba576e95d6f34845582d3c733a3a52ba770dd8a9c3a40e5b649f"}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -3337,21 +3372,6 @@ files = [ {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, ] -[[package]] -name = "py-wikimarkup" -version = "2.3.0" -description = "A basic MediaWiki markup parser." -optional = false -python-versions = ">=3.7" -files = [ - {file = "py-wikimarkup-2.3.0.tar.gz", hash = "sha256:7081ab990afb95bf9d55b762a3db6f779838844a8de0bf8d2589443a6e378e1c"}, - {file = "py_wikimarkup-2.3.0-py3-none-any.whl", hash = "sha256:90060fbdfda0e2c611d04fe963be4a9e894f12cb28a21d2d515fe504925fc55b"}, -] - -[package.dependencies] -bleach = {version = "==6.*", extras = ["css"]} -unidecode = "==1.*" - [[package]] name = "pyasn1" version = "0.5.1" @@ -4862,17 +4882,6 @@ files = [ {file = "ua_parser-0.18.0-py2.py3-none-any.whl", hash = "sha256:9d94ac3a80bcb0166823956a779186c746b50ea4c9fd9bf30fdb758553c38950"}, ] -[[package]] -name = "unidecode" -version = "1.3.8" -description = "ASCII transliterations of Unicode text" -optional = false -python-versions = ">=3.5" -files = [ - {file = "Unidecode-1.3.8-py3-none-any.whl", hash = "sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39"}, - {file = "Unidecode-1.3.8.tar.gz", hash = "sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4"}, -] - [[package]] name = "urllib3" version = "1.26.19" @@ -5306,4 +5315,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "fea53028c4e4ddabf04699afcfa6c7a578b206abbe24032297429a3129e5c3d8" +content-hash = "621b3c71d3b90787816ff98aa32a1b51293f6b8e9bc9ca767584f8408b38d8f6" diff --git a/pyproject.toml b/pyproject.toml index db610479749..4b8d521f22f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,6 @@ zenpy = "^2.0.47" idna = "^3.7" django-allow-cidr = "^0.6.0" html5lib = "^1.1" -py-wikimarkup = "^2.3.0" python-dateutil = "^2.8.2" python-memcached = "^1.61" redis = "^4.6.0" @@ -92,6 +91,7 @@ wagtail = "6.1.3" wagtail-localize = "1.9" django-silk = "^5.1.0" requests = "^2.32.3" +mwparserfromhell = "^0.6.6" [tool.poetry.group.dev.dependencies] ipdb = "^0.13.11"