Skip to content

Commit c4dd677

Browse files
committed
Move a whole bunch of private modules to be underscore prefixed
This moves: html5lib.ihatexml -> html5lib._ihatexml html5lib.inputstream -> html5lib._inputstream html5lib.tokenizer -> html5lib._tokenizer html5lib.trie -> html5lib._trie html5lib.utils -> html5lib._utils
1 parent 8db5828 commit c4dd677

21 files changed

+82
-82
lines changed
File renamed without changes.

Diff for: html5lib/inputstream.py renamed to html5lib/_inputstream.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
1212
from .constants import ReparseException
13-
from . import utils
13+
from . import _utils
1414

1515
from io import StringIO
1616

@@ -28,7 +28,7 @@
2828

2929
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
3030

31-
if utils.supports_lone_surrogates:
31+
if _utils.supports_lone_surrogates:
3232
# Use one extra step of indirection and create surrogates with
3333
# eval. Not using this indirection would introduce an illegal
3434
# unicode literal on platforms not supporting such lone
@@ -176,7 +176,7 @@ def __init__(self, source):
176176
177177
"""
178178

179-
if not utils.supports_lone_surrogates:
179+
if not _utils.supports_lone_surrogates:
180180
# Such platforms will have already checked for such
181181
# surrogate errors, so no need to do this checking.
182182
self.reportCharacterErrors = None
@@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
304304
codepoint = ord(match.group())
305305
pos = match.start()
306306
# Pretty sure there should be endianness issues here
307-
if utils.isSurrogatePair(data[pos:pos + 2]):
307+
if _utils.isSurrogatePair(data[pos:pos + 2]):
308308
# We have a surrogate pair!
309-
char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
309+
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
310310
if char_val in non_bmp_invalid_codepoints:
311311
self.errors.append("invalid-codepoint")
312312
skip = True

Diff for: html5lib/tokenizer.py renamed to html5lib/_tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
from .constants import tokenTypes, tagTokenTypes
1212
from .constants import replacementCharacters
1313

14-
from .inputstream import HTMLInputStream
14+
from ._inputstream import HTMLInputStream
1515

16-
from .trie import Trie
16+
from ._trie import Trie
1717

1818
entitiesTrie = Trie(entities)
1919

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

Diff for: html5lib/utils.py renamed to html5lib/_utils.py

File renamed without changes.

Diff for: html5lib/html5parser.py

+45-45
Large diffs are not rendered by default.

Diff for: html5lib/serializer.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from .constants import voidElements, booleanAttributes, spaceCharacters
99
from .constants import rcdataElements, entities, xmlEntities
10-
from . import treewalkers, utils
10+
from . import treewalkers, _utils
1111
from xml.sax.saxutils import escape
1212

1313
spaceCharacters = "".join(spaceCharacters)
@@ -33,7 +33,7 @@
3333
continue
3434
if v != "&":
3535
if len(v) == 2:
36-
v = utils.surrogatePairToCodepoint(v)
36+
v = _utils.surrogatePairToCodepoint(v)
3737
else:
3838
v = ord(v)
3939
if v not in encode_entity_map or k.islower():
@@ -51,8 +51,8 @@ def htmlentityreplace_errors(exc):
5151
skip = False
5252
continue
5353
index = i + exc.start
54-
if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
55-
codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
54+
if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
55+
codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
5656
skip = True
5757
else:
5858
codepoint = ord(c)

Diff for: html5lib/tests/test_encoding.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
import pytest
66

77
from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
8-
from html5lib import HTMLParser, inputstream
8+
from html5lib import HTMLParser, _inputstream
99

1010

1111
def test_basic_prescan_length():
1212
data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
1313
pad = 1024 - len(data) + 1
1414
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
1515
assert len(data) == 1024 # Sanity
16-
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
16+
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
1717
assert 'utf-8' == stream.charEncoding[0].name
1818

1919

@@ -22,7 +22,7 @@ def test_parser_reparse():
2222
pad = 10240 - len(data) + 1
2323
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
2424
assert len(data) == 10240 # Sanity
25-
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
25+
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
2626
assert 'windows-1252' == stream.charEncoding[0].name
2727
p = HTMLParser(namespaceHTMLElements=False)
2828
doc = p.parse(data, useChardet=False)
@@ -47,7 +47,7 @@ def test_parser_reparse():
4747
("windows-1252", b"", {}),
4848
])
4949
def test_parser_args(expected, data, kwargs):
50-
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
50+
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
5151
assert expected == stream.charEncoding[0].name
5252
p = HTMLParser()
5353
p.parse(data, useChardet=False, **kwargs)
@@ -85,7 +85,7 @@ def runParserEncodingTest(data, encoding):
8585

8686

8787
def runPreScanEncodingTest(data, encoding):
88-
stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
88+
stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
8989
encoding = encoding.lower().decode("ascii")
9090

9191
# Very crude way to ignore irrelevant tests
@@ -111,6 +111,6 @@ def test_encoding():
111111
else:
112112
def test_chardet():
113113
with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
114-
encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
114+
encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
115115
assert encoding[0].name == "big5"
116116
# pylint:enable=wrong-import-position

Diff for: html5lib/tests/test_stream.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
import six
1212
from six.moves import http_client, urllib
1313

14-
from html5lib.inputstream import (BufferedStream, HTMLInputStream,
15-
HTMLUnicodeInputStream, HTMLBinaryInputStream)
16-
from html5lib.utils import supports_lone_surrogates
14+
from html5lib._inputstream import (BufferedStream, HTMLInputStream,
15+
HTMLUnicodeInputStream, HTMLBinaryInputStream)
16+
from html5lib._utils import supports_lone_surrogates
1717

1818

1919
def test_basic():

Diff for: html5lib/tests/tokenizer.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
import pytest
99
from six import unichr
1010

11-
from html5lib.tokenizer import HTMLTokenizer
12-
from html5lib import constants, utils
11+
from html5lib._tokenizer import HTMLTokenizer
12+
from html5lib import constants, _utils
1313

1414

1515
class TokenizerTestParser(object):
@@ -156,7 +156,7 @@ def repl(m):
156156
except ValueError:
157157
# This occurs when unichr throws ValueError, which should
158158
# only be for a lone-surrogate.
159-
if utils.supports_lone_surrogates:
159+
if _utils.supports_lone_surrogates:
160160
raise
161161
return None
162162

Diff for: html5lib/treebuilders/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from __future__ import absolute_import, division, unicode_literals
3030

31-
from ..utils import default_etree
31+
from .._utils import default_etree
3232

3333
treeBuilderCache = {}
3434

Diff for: html5lib/treebuilders/dom.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from . import base
99
from .. import constants
1010
from ..constants import namespaces
11-
from ..utils import moduleFactoryFactory
11+
from .._utils import moduleFactoryFactory
1212

1313

1414
def getDomBuilder(DomImplementation):

Diff for: html5lib/treebuilders/etree.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
import re
77

88
from . import base
9-
from .. import ihatexml
9+
from .. import _ihatexml
1010
from .. import constants
1111
from ..constants import namespaces
12-
from ..utils import moduleFactoryFactory
12+
from .._utils import moduleFactoryFactory
1313

1414
tag_regexp = re.compile("{([^}]*)}(.*)")
1515

@@ -259,7 +259,7 @@ def serializeElement(element, indent=0):
259259
def tostring(element): # pylint:disable=unused-variable
260260
"""Serialize an element and its child nodes to a string"""
261261
rv = []
262-
filter = ihatexml.InfosetFilter()
262+
filter = _ihatexml.InfosetFilter()
263263

264264
def serializeElement(element):
265265
if isinstance(element, ElementTree.ElementTree):

Diff for: html5lib/treebuilders/etree_lxml.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from ..constants import DataLossWarning
2121
from .. import constants
2222
from . import etree as etree_builders
23-
from .. import ihatexml
23+
from .. import _ihatexml
2424

2525
import lxml.etree as etree
2626

@@ -54,7 +54,7 @@ def _getChildNodes(self):
5454

5555
def testSerializer(element):
5656
rv = []
57-
infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
57+
infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
5858

5959
def serializeElement(element, indent=0):
6060
if not hasattr(element, "tag"):
@@ -182,7 +182,7 @@ class TreeBuilder(base.TreeBuilder):
182182

183183
def __init__(self, namespaceHTMLElements, fullTree=False):
184184
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
185-
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
185+
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
186186
self.namespaceHTMLElements = namespaceHTMLElements
187187

188188
class Attributes(dict):

Diff for: html5lib/treewalkers/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from __future__ import absolute_import, division, unicode_literals
1212

1313
from .. import constants
14-
from ..utils import default_etree
14+
from .._utils import default_etree
1515

1616
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
1717

Diff for: html5lib/treewalkers/etree.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from six import string_types
1414

1515
from . import base
16-
from ..utils import moduleFactoryFactory
16+
from .._utils import moduleFactoryFactory
1717

1818
tag_regexp = re.compile("{([^}]*)}(.*)")
1919

Diff for: html5lib/treewalkers/etree_lxml.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from . import base
88

9-
from .. import ihatexml
9+
from .. import _ihatexml
1010

1111

1212
def ensure_str(s):
@@ -132,7 +132,7 @@ def __init__(self, tree):
132132
self.fragmentChildren = set()
133133
tree = Root(tree)
134134
base.NonRecursiveTreeWalker.__init__(self, tree)
135-
self.filter = ihatexml.InfosetFilter()
135+
self.filter = _ihatexml.InfosetFilter()
136136

137137
def getNodeDetails(self, node):
138138
if isinstance(node, tuple): # Text node

Diff for: parse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from html5lib import html5parser
1212
from html5lib import treebuilders, serializer, treewalkers
1313
from html5lib import constants
14-
from html5lib import utils
14+
from html5lib import _utils
1515

1616

1717
def parse():
@@ -116,7 +116,7 @@ def printOutput(parser, document, opts):
116116
import lxml.etree
117117
sys.stdout.write(lxml.etree.tostring(document, encoding="unicode"))
118118
elif tb == "etree":
119-
sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode"))
119+
sys.stdout.write(_utils.default_etree.tostring(document, encoding="unicode"))
120120
elif opts.tree:
121121
if not hasattr(document, '__getitem__'):
122122
document = [document]

0 commit comments

Comments
 (0)