Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: deprecate py-wikimarkup #6279

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 54 additions & 93 deletions kitsune/sumo/parser.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,31 @@
import re
from os.path import basename
from urllib.parse import parse_qs, urlparse

import mwparserfromhell
from django.conf import settings
from django.template.loader import render_to_string
from django.utils.translation import gettext as _
from django.utils.translation import gettext_lazy as _lazy
from sentry_sdk import capture_exception
from wikimarkup.parser import ALLOWED_TAGS, Parser

from kitsune.gallery.models import Image, Video
from kitsune.sumo import email_utils
from kitsune.sumo.urlresolvers import reverse

ALLOWED_TAGS = [
"a",
"div",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"li",
"span",
"img",
"video",
"source",
]
ALLOWED_ATTRIBUTES = {
"a": ["href", "title", "class", "rel", "data-mozilla-ui-reset", "data-mozilla-ui-preferences"],
"div": ["id", "class", "style", "data-for", "title", "data-target", "data-modal"],
Expand Down Expand Up @@ -54,11 +67,7 @@ def wiki_to_html(
"""Wiki Markup -> HTML"""
return WikiParser().parse(
wiki_markup,
show_toc=False,
locale=locale,
nofollow=nofollow,
tags=tags,
attributes=attributes,
)


Expand Down Expand Up @@ -197,41 +206,20 @@ def build_hook_params(string, locale, allowed_params=[], allowed_param_values={}
return (title, params)


class WikiParser(Parser):
"""Wrapper for wikimarkup which adds Kitsune-specific callbacks
and setup.
"""
class WikiParser:
"""Wrapper for mwparserfromhell which adds Kitsune-specific parsing and setup."""

image_template = "wikiparser/hook_image.html"

def __init__(self, base_url=None):
super(WikiParser, self).__init__(base_url)

# Register default hooks
self.registerInternalLinkHook(None, self._hook_internal_link)
self.registerInternalLinkHook("Image", self._hook_image_tag)
self.registerInternalLinkHook("Video", self._hook_video)
self.registerInternalLinkHook("V", self._hook_video)
self.registerInternalLinkHook("Button", self._hook_button)
self.registerInternalLinkHook("UI", self._hook_ui_component)

# Register the abbr and acronym tags
self.registerTagHook("abbr", self._abbr_tag_hook)

self.locale = settings.WIKI_DEFAULT_LANGUAGE
self.youtube_videos = set()
self.ui_components = set()

def parse(
self,
text,
show_toc=None,
tags=None,
attributes=None,
styles=None,
locale=settings.WIKI_DEFAULT_LANGUAGE,
nofollow=False,
youtube_embeds=True,
ui_component_embeds=True,
**kwargs,
):
"""Given wiki markup, return HTML.
Expand Down Expand Up @@ -265,14 +253,8 @@ def parse(
@email_utils.safe_translation
def _parse(locale):
try:
return super(WikiParser, self).parse(
return mwparserfromhell.parse(
text,
show_toc=show_toc,
tags=tags or ALLOWED_TAGS,
attributes=attributes or ALLOWED_ATTRIBUTES,
styles=styles or ALLOWED_STYLES,
nofollow=nofollow,
strip_comments=True,
**kwargs,
)
except TypeError as e:
Expand All @@ -281,13 +263,10 @@ def _parse(locale):
capture_exception(e)
return "� There was an error parsing this content. �"

html = _parse(locale)
html = str(_parse(locale))

if youtube_embeds:
html = self.add_youtube_embeds(html)

if ui_component_embeds:
html = self.add_ui_component_embeds(html)
html = self.add_youtube_embeds(html)
html = self.add_ui_component_embeds(html)

return html

Expand Down Expand Up @@ -325,58 +304,40 @@ def add_ui_component_embeds(self, html):

return html

def _hook_internal_link(self, parser, space, name):
def _hook_internal_link(self, wikicode):
"""Parses text and returns internal link."""
text = False
title = name

# Split on pipe -- [[href|name]]
if "|" in name:
title, text = title.split("|", 1)
title = re.sub(r"\s+", " ", title).strip()

hash = ""
if "#" in title:
title, hash = title.split("#", 1)

# Sections use _, page names use +
if hash != "":
hash = "#" + hash.replace(" ", "_")

# Links to this page can just contain href="#hash"
if title == "" and hash != "":
if not text:
text = hash.replace("_", " ")
return '<a href="%s">%s</a>' % (hash, text)

link = _get_wiki_link(title, self.locale)
extra_a_attr = ""
if not link["found"]:
extra_a_attr += ' class="new" title="{tooltip}"'.format(
tooltip=_("Page does not exist.")
for link in wikicode.filter_wikilinks():
title = str(link.title)
if "|" in title:
title, text = title.split("|", 1)
else:
text = title

# Get the correct link info
link_info = _get_wiki_link(title, self.locale)
link_html = f'<a href="{link_info["url"]}">{text}</a>'
wikicode.replace(link, link_html)

return str(wikicode)

def _hook_image_tag(self, wikicode):
for template in wikicode.filter_templates(matches="Image"):
title, params = build_hook_params(
template.get("title"), self.locale, IMAGE_PARAMS, IMAGE_PARAM_VALUES
)
image = get_object_fallback(
Image, title, self.locale, _lazy('The image "%s" does not exist.') % title
)
if not text:
text = link["text"]
return '<a href="{url}{hash}"{extra}>{text}</a>'.format(
url=link["url"], hash=hash, extra=extra_a_attr, text=text
)

def _hook_image_tag(self, parser, space, name):
"""Adds syntax for inserting images."""
title, params = build_hook_params(name, self.locale, IMAGE_PARAMS, IMAGE_PARAM_VALUES)

message = _lazy('The image "%s" does not exist.') % title
image = get_object_fallback(Image, title, self.locale, message)
if isinstance(image, str):
return image

return render_to_string(
self.image_template,
{
"image": image,
"params": params,
},
)
if isinstance(image, str):
wikicode.replace(template, image)
else:
image_html = render_to_string(
self.image_template, {"image": image, "params": params}
)
wikicode.replace(template, image_html)

return str(wikicode)

# Videos are objects that can have one or more files attached to them
#
Expand Down
3 changes: 0 additions & 3 deletions kitsune/sumo/templatetags/jinja_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import bleach
import jinja2
import wikimarkup.parser
from babel.dates import format_date, format_datetime, format_time
from babel.numbers import format_decimal
from django.conf import settings
Expand Down Expand Up @@ -137,8 +136,6 @@ def wiki_to_safe_html(wiki_markup, locale=settings.WIKI_DEFAULT_LANGUAGE, nofoll
wiki_markup,
locale=locale,
nofollow=nofollow,
tags=wikimarkup.parser.ALLOWED_TAGS + ["abbr"],
attributes=wikimarkup.parser.ALLOWED_ATTRIBUTES | {"abbr": ["title"]},
)
return Markup(
bleach.clean(html, tags=ALLOWED_BIO_TAGS, attributes=ALLOWED_BIO_ATTRIBUTES, strip=True)
Expand Down
4 changes: 2 additions & 2 deletions kitsune/wiki/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from django.conf import settings
from django.contrib.sites.models import Site
from django.urls import reverse as django_reverse
from django.utils.translation import gettext_lazy as _lazy
from django.utils.translation import gettext as _
from wikimarkup.parser import ALLOWED_ATTRIBUTES, ALLOWED_TAGS
from django.utils.translation import gettext_lazy as _lazy

from kitsune.sumo import email_utils
from kitsune.sumo.parser import ALLOWED_ATTRIBUTES, ALLOWED_TAGS
from kitsune.sumo.templatetags.jinja_helpers import add_utm
from kitsune.sumo.urlresolvers import reverse
from kitsune.tidings.events import Event, EventUnion, InstanceEvent
Expand Down
8 changes: 6 additions & 2 deletions kitsune/wiki/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@
from html5lib.treebuilders import getTreeBuilder
from html5lib.treewalkers import getTreeWalker
from lxml.etree import Element
from wikimarkup.parser import ALLOWED_TAGS

from kitsune.gallery.models import Image
from kitsune.sumo import parser as sumo_parser
from kitsune.sumo.parser import ALLOWED_ATTRIBUTES, ALLOWED_STYLES, get_object_fallback
from kitsune.sumo.parser import (
ALLOWED_ATTRIBUTES,
ALLOWED_STYLES,
ALLOWED_TAGS,
get_object_fallback,
)
from kitsune.wiki.models import Document

# block elements wikimarkup knows about (and thus preserves)
Expand Down
63 changes: 36 additions & 27 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ zenpy = "^2.0.47"
idna = "^3.7"
django-allow-cidr = "^0.6.0"
html5lib = "^1.1"
py-wikimarkup = "^2.3.0"
python-dateutil = "^2.8.2"
python-memcached = "^1.61"
redis = "^4.6.0"
Expand Down Expand Up @@ -92,6 +91,7 @@ wagtail = "6.1.3"
wagtail-localize = "1.9"
django-silk = "^5.1.0"
requests = "^2.32.3"
mwparserfromhell = "^0.6.6"

[tool.poetry.group.dev.dependencies]
ipdb = "^0.13.11"
Expand Down