Skip to content

Commit

Permalink
Allow embedded iframe elements from YouTube and Vimeo videos
Browse files Browse the repository at this point in the history
  • Loading branch information
passiomatic committed Aug 7, 2023
1 parent d66a934 commit 5738cae
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 7 deletions.
29 changes: 25 additions & 4 deletions coldsweat/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


# Allow iframe elements in FeedParser
#_HTMLSanitizer.acceptable_elements.add("iframe")
_HTMLSanitizer.acceptable_elements.add("iframe")

def _normalize_attrs(attrs):
'''
Expand Down Expand Up @@ -46,6 +46,8 @@ def unknown_endtag(self, tag):
pass


DOMAIN_WHITELIST = set(["www.youtube.com", "www.youtube-nocookie.com", "player.vimeo.com"])

class BaseProcessor(BaseParser):
'''
Parse and partially reconstruct the input document
Expand All @@ -58,6 +60,7 @@ class BaseProcessor(BaseParser):

def __init__(self, xhtml_mode=False):
BaseParser.__init__(self)
self.allowed_iframe = False
self.xhtml_mode = xhtml_mode

# @@NOTE: reset is called implicitly by base class
Expand All @@ -66,12 +69,30 @@ def reset(self):
BaseParser.reset(self)
self.pieces = []

def output(self):
def get_output(self):
'''
Return processed HTML as a single string
'''
return ''.join(self.pieces)

def start_iframe(self, attrs):
d = dict(_normalize_attrs(attrs))
if self.is_allowed(d['src']):
# Reconstruct element
self.allowed_iframe = True
self.unknown_starttag('iframe', attrs)

def end_iframe(self):
if self.allowed_iframe:
self.allowed_iframe = False
self.unknown_endtag('iframe')

def is_allowed(self, url):
schema, netloc, path, params, query, fragment \
= urlparse.urlparse(url)

return (netloc in DOMAIN_WHITELIST)

def unknown_starttag(self, tag, attrs):
# Called for each unhandled tag, where attrs is a list of
# (attr, value) tuples
Expand Down Expand Up @@ -274,7 +295,7 @@ def strip_html(data):
'''
p = Stripper()
_parse(p, data)
return p.output()
return p.get_output()


def scrub_html(data, blacklist):
Expand All @@ -283,4 +304,4 @@ def scrub_html(data, blacklist):
'''
p = Scrubber(blacklist)
_parse(p, data)
return p.output()
return p.get_output()
2 changes: 1 addition & 1 deletion makefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ create-test-data:
sqlite3 ./instance/coldsweat-test.db ".dump" > tests/test-data.sql

test:
python -m pytest
python -m pytest -s

# Build Wheel/PyPI Support

Expand Down
27 changes: 27 additions & 0 deletions tests/markup/iframe-in.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" version="2.0">
<channel>
<title>Red Punk</title>
<atom:link href="https://redpunk.com/feed/" rel="self" type="application/rss+xml" />
<link>https://redpunk.com</link>
<description>Me against the machine.</description>
<lastBuildDate>Mon, 07 Aug 2023 10:00:04 +0000</lastBuildDate>
<language>it-IT</language>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<generator>https://wordpress.org/?v=6.2.2</generator>
<item>
<title>Test YouTube Embed</title>
<link>https://redpunk.com/articoli/test-youtube-embed/</link>
<comments>https://redpunk.com/articoli/test-youtube-embed/#respond</comments>
<dc:creator><![CDATA[Andrea Peltrin]]></dc:creator>
<pubDate>Mon, 07 Aug 2023 09:57:28 +0000</pubDate>
<category><![CDATA[Internet]]></category>
<guid isPermaLink="false">https://redpunk.com/?p=1575</guid>
<description><![CDATA[Test :)]]></description>
<content:encoded><![CDATA[<iframe width="560" height="315" src="https://www.youtube-nocookie.com/embed/jfKfPfyJRdk" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>]]></content:encoded>
<wfw:commentRss>https://redpunk.com/articoli/test-youtube-embed/feed/</wfw:commentRss>
<slash:comments>0</slash:comments>
</item>
</channel>
</rss>
27 changes: 27 additions & 0 deletions tests/markup/iframe-out.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:wfw="http://wellformedweb.org/CommentAPI/" version="2.0">
<channel>
<title>Red Punk</title>
<atom:link href="https://redpunk.com/feed/" rel="self" type="application/rss+xml" />
<link>https://redpunk.com</link>
<description>Me against the machine.</description>
<lastBuildDate>Mon, 07 Aug 2023 10:00:04 +0000</lastBuildDate>
<language>it-IT</language>
<sy:updatePeriod>hourly</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<generator>https://wordpress.org/?v=6.2.2</generator>
<item>
<title>Test YouTube Embed</title>
<link>https://redpunk.com/articoli/test-youtube-embed/</link>
<comments>https://redpunk.com/articoli/test-youtube-embed/#respond</comments>
<dc:creator><![CDATA[Andrea Peltrin]]></dc:creator>
<pubDate>Mon, 07 Aug 2023 09:57:28 +0000</pubDate>
<category><![CDATA[Internet]]></category>
<guid isPermaLink="false">https://redpunk.com/?p=1575</guid>
<description><![CDATA[Test :)]]></description>
<content:encoded><![CDATA[<iframe width="560" height="315" src="https://www.youtube-nocookie.com/embed/jfKfPfyJRdk" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen></iframe>]]></content:encoded>
<wfw:commentRss>https://redpunk.com/articoli/test-youtube-embed/feed/</wfw:commentRss>
<slash:comments>0</slash:comments>
</item>
</channel>
</rss>
5 changes: 3 additions & 2 deletions tests/test_markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def test_stripping_html(value, wanted):


@pytest.mark.parametrize("file_in, file_out", [
('markup/in.xml', 'markup/out.xml')
('markup/in.xml', 'markup/out.xml'),
('markup/iframe-in.xml', 'markup/iframe-out.xml')
]
)
def test_processor(file_in, file_out):
Expand All @@ -52,4 +53,4 @@ def test_processor(file_in, file_out):
entry_out = soup_out.entries[0]
processor.reset()
processor.feed(entry_in.content[0].value)
assert processor.output() == entry_out.content[0].value
assert processor.get_output() == entry_out.content[0].value

0 comments on commit 5738cae

Please sign in to comment.