diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 5beb8439686f..286c9c136f1f 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -34,13 +34,17 @@ from calibre.utils.serialize import json_dumps, json_loads, msgpack_dumps, msgpack_loads from calibre.utils.short_uuid import uuid4 from calibre_extensions.fast_css_transform import transform_properties -from calibre_extensions.speedup import get_element_char_length from polyglot.binary import as_base64_unicode as encode_component from polyglot.binary import from_base64_bytes from polyglot.binary import from_base64_unicode as decode_component from polyglot.builtins import as_bytes, iteritems from polyglot.urllib import quote, urlparse +try: + from calibre_extensions.speedup import get_num_of_significant_chars +except ImportError: # running from source without updated binary + def get_num_of_significant_chars(elem): + return len(getattr(elem, 'text', '')) + len(getattr(elem, 'tail', '')) RENDER_VERSION = 1 BLANK_JPEG = b'\xff\xd8\xff\xdb\x00C\x00\x03\x02\x02\x02\x02\x02\x03\x02\x02\x02\x03\x03\x03\x03\x04\x06\x04\x04\x04\x04\x04\x08\x06\x06\x05\x06\t\x08\n\n\t\x08\t\t\n\x0c\x0f\x0c\n\x0b\x0e\x0b\t\t\r\x11\r\x0e\x0f\x10\x10\x11\x10\n\x0c\x12\x13\x12\x10\x13\x0f\x10\x10\x10\xff\xc9\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00\xff\xcc\x00\x06\x00\x10\x10\x05\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xd2\xcf \xff\xd9' # noqa @@ -142,17 +146,10 @@ def anchor_map(root): def get_length(root): ans = 0 - - def count(elem): - tag = getattr(elem, 'tag', count) - if callable(tag): - return get_element_char_length('', None, getattr(elem, 'tail', None)) - return get_element_char_length(tag, elem.text, elem.tail) - for body in root.iterchildren(XHTML('body')): - ans += count(body) + ans += get_num_of_significant_chars(body) for elem in body.iterdescendants(): - ans += count(elem) + ans += get_num_of_significant_chars(elem) return ans diff --git a/src/calibre/srv/tests/fast_css_transform.py b/src/calibre/srv/tests/fast_css_transform.py index bf53b3e145c8..33e2d4896195 100644 --- a/src/calibre/srv/tests/fast_css_transform.py +++ b/src/calibre/srv/tests/fast_css_transform.py @@ -10,6 +10,20 @@ class TestTransform(SimpleTest): + def test_counting_chars_in_elems(self): + from lxml import etree + + from calibre.ebooks.oeb.polish.parsing import parse + from calibre.srv.render_book import get_length + def t(html, expected): + root = parse(html, force_html5_parse=True) + self.assertEqual(expected, get_length(root), etree.tostring(root, encoding=str)) + t('
abcdefx yz