Skip to content

Commit

Permalink
Reuse markdown_excerpt for <meta name="description">s on profiles…
Browse files Browse the repository at this point in the history
… and submissions
  • Loading branch information
charmander committed May 14, 2024
1 parent 19a6150 commit bfe1277
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 47 deletions.
32 changes: 0 additions & 32 deletions libweasyl/html.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,4 @@
"""
Utilities for dealing with HTML.
Specifically, utilities for creating HTML and utilities for removing HTML.
"""

import json
from html.parser import HTMLParser


class _HtmlToText(HTMLParser):

def __init__(self, handle_data):
super().__init__()
self.handle_data = handle_data

def handle_starttag(self, tag, attrs):
if tag == "img":
alt = next((value for key, value in attrs if key == "alt"), None)

if alt:
self.handle_data(f"[{alt}]")


def html_to_text(markdown: str) -> str:
"""
Convert HTML to a plain text representation suitable for summaries.
"""
text_parts = []
parser = _HtmlToText(text_parts.append)
parser.feed(markdown)
parser.close()
return " ".join("".join(text_parts).split())


def inline_json(obj):
Expand Down
10 changes: 0 additions & 10 deletions libweasyl/test/test_html.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
from libweasyl import html


def test_html_to_text():
assert html.html_to_text('<div style="text-align: center">**foo**</div><!-- comment -->') == "**foo**", "`html_to_text` should strip HTML tags"
assert html.html_to_text('1 < 3 > 2 "foo"') == '1 < 3 > 2 "foo"', "the output of `html_to_text` should be plain text, not HTML"
assert html.html_to_text("&copy;") == "©", "`html_to_text` should decode named character references"
assert html.html_to_text("&#xec;") == "ì", "`html_to_text` should decode numeric character references"
assert html.html_to_text('foo <img alt="bar"> baz') == "foo [bar] baz", "`html_to_text` should replace images with their alt text"
assert html.html_to_text(" foo\nbar baz\t") == "foo bar baz", "`html_to_text` should normalize whitespace"
assert html.html_to_text("a<![/b]>c") == "ac", "`html_to_text` shouldn’t throw on invalid HTML"


def test_inline_json():
assert html.inline_json('</script>') == r'"<\/script>"'
assert html.inline_json('</SCRIPT>') == r'"<\/SCRIPT>"'
Expand Down
7 changes: 7 additions & 0 deletions libweasyl/test/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ def test_tag_stripping():
(u'single-codepoint graphemes😊😊😊😊', u'single-codepoint graphemes😊😊😊😊'),
(u'single-codepoint graphemes😊😊😊😊😊', u'single-codepoint graphemes😊😊😊…'),
(u'test\n - lists\n - of\n - items\n\ntest', u'test lists of items test'),
('<div style="text-align: center">**foo**</div>\n<!-- comment -->', "foo"),
('1 < 3 > 2 "foo"', '1 < 3 > 2 "foo"'),
("&copy;", "©"),
("&#xec;", "ì"),
('foo <!bar> baz', "foo [bar] baz"),
(" foo\nbar baz\t", "foo bar baz"),
("<![/b]>", "<![/b]>"),
]


Expand Down
2 changes: 2 additions & 0 deletions libweasyl/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ def markdown(target):
def _itertext_spaced(element):
if element.text:
yield element.text
elif element.tag == "img" and (alt := element.get("alt")):
yield "[%s]" % (alt,)

for child in element:
is_block = child.tag in _EXCERPT_BLOCK_ELEMENTS
Expand Down
5 changes: 2 additions & 3 deletions weasyl/controllers/detail.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from pyramid import httpexceptions
from pyramid.response import Response

from libweasyl.html import html_to_text
from libweasyl.models.content import Submission
from libweasyl.text import slug_for
from libweasyl.text import markdown_excerpt, slug_for
from weasyl import (
character, define, journal, macro, media, profile, searchtag, submission)
from weasyl.controllers.decorators import moderator_only
Expand Down Expand Up @@ -69,7 +68,7 @@ def submission_(request):
else:
twitter_meta['title'] = title_with_attribution

meta_description = define.summarize(html_to_text(item['content']).strip())
meta_description = markdown_excerpt(item['content'])
if meta_description:
twitter_meta['description'] = ogp['description'] = meta_description

Expand Down
4 changes: 2 additions & 2 deletions weasyl/controllers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from pyramid.response import Response

from libweasyl import staff
from libweasyl.html import html_to_text
from libweasyl.text import markdown_excerpt

from weasyl import (
character, collection, commishinfo, define, favorite, folder,
Expand Down Expand Up @@ -60,7 +60,7 @@ def profile_(request):
username = userprofile["username"]
canonical_path = request.route_path("profile_tilde", name=define.get_sysname(username))
title = f"{username}’s profile"
meta_description = define.summarize(html_to_text(userprofile["profile_text"]).strip())
meta_description = markdown_excerpt(userprofile["profile_text"])
avatar_url = define.absolutify_url(userprofile['user_media']['avatar'][0]['display_url'])
twitter_meta = {
"card": "summary",
Expand Down

0 comments on commit bfe1277

Please sign in to comment.