Skip to content

Commit

Permalink
Transforms to text/xxx should deal with bytes and text. Output is text
Browse files Browse the repository at this point in the history
  • Loading branch information
pbauer committed May 17, 2018
1 parent fdcef5d commit 5e297b6
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 7 deletions.
2 changes: 2 additions & 0 deletions Products/PortalTransforms/libtransforms/retransform.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from Products.CMFPlone.utils import safe_unicode
from Products.PortalTransforms.interfaces import ITransform
from zope.interface import implementer

Expand All @@ -25,6 +26,7 @@ def addRegex(self, pat, repl):
self.regexes.append((r, repl))

def convert(self, orig, data, **kwargs):
orig = safe_unicode(orig)

This comment has been minimized.

Copy link
@ale-rt

ale-rt Oct 19, 2018

Member

It seems this has to be:

if not six.PY2:
    orig = safe_unicode(orig)

This comment has been minimized.

Copy link
@petschki

petschki Oct 19, 2018

Member

Maybe use Products.CMFPlone.utils.safe_nativestring instead of safe_unicode?

This comment has been minimized.

Copy link
@ale-rt

ale-rt Oct 19, 2018

Member

Definitely the way to go in the long run, but I am not sure this will be present in Plone 5.1.4

This comment has been minimized.

Copy link
@petschki

petschki Oct 19, 2018

Member

Yes you‘re right. Not available in 5.1.x ... sorry for the Noise.

for r, repl in self.regexes:
orig = r.sub(repl, orig)
data.setData(orig)
Expand Down
3 changes: 3 additions & 0 deletions Products/PortalTransforms/libtransforms/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from Products.CMFPlone.utils import safe_unicode
from Products.PortalTransforms.utils import log

import os
Expand Down Expand Up @@ -76,7 +77,9 @@ def bodyfinder(text):
""" Return body or unchanged text if no body tags found.
Always use html_headcheck() first.
Accepts bytes or text. Returns text.
"""
text = safe_unicode(text)
lowertext = text.lower()
bodystart = lowertext.find('<body')
if bodystart == -1:
Expand Down
12 changes: 6 additions & 6 deletions Products/PortalTransforms/transforms/html_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def sub_func(matchobj):
return res.encode('utf-8')

return html_to_text("html_to_text",
('<script [^>]>.*</script>(?im)', ' '),
('<style [^>]>.*</style>(?im)', ' '),
('<head [^>]>.*</head>(?im)', ' '),
('(?im)</?(font|em|i|strong|b)(?=\W)[^>]*>', ''),
('<[^>]*>(?i)(?m)', ' '),
(r'&([a-zA-Z0-9#]*?);', sub_func),
(u'<script [^>]>.*</script>(?im)', ' '),
(u'<style [^>]>.*</style>(?im)', ' '),
(u'<head [^>]>.*</head>(?im)', ' '),
(u'(?im)</?(font|em|i|strong|b)(?=\W)[^>]*>', ''),
(u'<[^>]*>(?i)(?m)', ' '),
(ur'&([a-zA-Z0-9#]*?);', sub_func),
)
2 changes: 1 addition & 1 deletion Products/PortalTransforms/transforms/pdf_to_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def invokeCommand(self, tmpdir, fullname):
subprocess.run(cmd, shell=True)
try:
htmlfilename = os.path.join(tmpdir, sansext(fullname) + '.html')
htmlfile = open(htmlfilename, 'r')
htmlfile = open(htmlfilename, 'rb')
html = htmlfile.read()
htmlfile.close()
except:
Expand Down

0 comments on commit 5e297b6

Please sign in to comment.