Move a whole bunch of private modules to be underscore prefixed

gsnedders · gsnedders · commit c4dd67718368 · 2016-07-14T02:47:22.000+01:00
This moves: html5lib.ihatexml -&gt; html5lib._ihatexml
            html5lib.inputstream -&gt; html5lib._inputstream
            html5lib.tokenizer -&gt; html5lib._tokenizer
            html5lib.trie -&gt; html5lib._trie
            html5lib.utils -&gt; html5lib._utils
diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
@@ -10,7 +10,7 @@
 
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
 from .constants import ReparseException
-from . import utils
+from . import _utils
 
 from io import StringIO
 
@@ -28,7 +28,7 @@
 
 invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
 
-if utils.supports_lone_surrogates:
+if _utils.supports_lone_surrogates:
     # Use one extra step of indirection and create surrogates with
     # eval. Not using this indirection would introduce an illegal
     # unicode literal on platforms not supporting such lone
@@ -176,7 +176,7 @@ def __init__(self, source):
 
         """
 
-        if not utils.supports_lone_surrogates:
+        if not _utils.supports_lone_surrogates:
             # Such platforms will have already checked for such
             # surrogate errors, so no need to do this checking.
             self.reportCharacterErrors = None
@@ -304,9 +304,9 @@ def characterErrorsUCS2(self, data):
             codepoint = ord(match.group())
             pos = match.start()
             # Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos + 2]):
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
                 # We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                 if char_val in non_bmp_invalid_codepoints:
                     self.errors.append("invalid-codepoint")
                 skip = True
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
@@ -11,9 +11,9 @@
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
 
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
 
-from .trie import Trie
+from ._trie import Trie
 
 entitiesTrie = Trie(entities)
 
diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py
diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
diff --git a/html5lib/_trie/datrie.py b/html5lib/_trie/datrie.py
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
@@ -7,7 +7,7 @@
 
 from .constants import voidElements, booleanAttributes, spaceCharacters
 from .constants import rcdataElements, entities, xmlEntities
-from . import treewalkers, utils
+from . import treewalkers, _utils
 from xml.sax.saxutils import escape
 
 spaceCharacters = "".join(spaceCharacters)
@@ -33,7 +33,7 @@
         continue
     if v != "&":
         if len(v) == 2:
-            v = utils.surrogatePairToCodepoint(v)
+            v = _utils.surrogatePairToCodepoint(v)
         else:
             v = ord(v)
         if v not in encode_entity_map or k.islower():
@@ -51,8 +51,8 @@ def htmlentityreplace_errors(exc):
                 skip = False
                 continue
             index = i + exc.start
-            if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
-                codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
                 skip = True
             else:
                 codepoint = ord(c)
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
@@ -5,15 +5,15 @@
 import pytest
 
 from .support import get_data_files, test_dir, errorMessage, TestData as _TestData
-from html5lib import HTMLParser, inputstream
+from html5lib import HTMLParser, _inputstream
 
 
 def test_basic_prescan_length():
     data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
     pad = 1024 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 1024  # Sanity
-    stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
     assert 'utf-8' == stream.charEncoding[0].name
 
 
@@ -22,7 +22,7 @@ def test_parser_reparse():
     pad = 10240 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 10240  # Sanity
-    stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
     assert 'windows-1252' == stream.charEncoding[0].name
     p = HTMLParser(namespaceHTMLElements=False)
     doc = p.parse(data, useChardet=False)
@@ -47,7 +47,7 @@ def test_parser_reparse():
     ("windows-1252", b"", {}),
 ])
 def test_parser_args(expected, data, kwargs):
-    stream = inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False, **kwargs)
     assert expected == stream.charEncoding[0].name
     p = HTMLParser()
     p.parse(data, useChardet=False, **kwargs)
@@ -85,7 +85,7 @@ def runParserEncodingTest(data, encoding):
 
 
 def runPreScanEncodingTest(data, encoding):
-    stream = inputstream.HTMLBinaryInputStream(data, useChardet=False)
+    stream = _inputstream.HTMLBinaryInputStream(data, useChardet=False)
     encoding = encoding.lower().decode("ascii")
 
     # Very crude way to ignore irrelevant tests
@@ -111,6 +111,6 @@ def test_encoding():
 else:
     def test_chardet():
         with open(os.path.join(test_dir, "encoding", "chardet", "test_big5.txt"), "rb") as fp:
-            encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
+            encoding = _inputstream.HTMLInputStream(fp.read()).charEncoding
             assert encoding[0].name == "big5"
 # pylint:enable=wrong-import-position
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
@@ -11,9 +11,9 @@
 import six
 from six.moves import http_client, urllib
 
-from html5lib.inputstream import (BufferedStream, HTMLInputStream,
-                                  HTMLUnicodeInputStream, HTMLBinaryInputStream)
-from html5lib.utils import supports_lone_surrogates
+from html5lib._inputstream import (BufferedStream, HTMLInputStream,
+                                   HTMLUnicodeInputStream, HTMLBinaryInputStream)
+from html5lib._utils import supports_lone_surrogates
 
 
 def test_basic():
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
@@ -8,8 +8,8 @@
 import pytest
 from six import unichr
 
-from html5lib.tokenizer import HTMLTokenizer
-from html5lib import constants, utils
+from html5lib._tokenizer import HTMLTokenizer
+from html5lib import constants, _utils
 
 
 class TokenizerTestParser(object):
@@ -156,7 +156,7 @@ def repl(m):
         except ValueError:
             # This occurs when unichr throws ValueError, which should
             # only be for a lone-surrogate.
-            if utils.supports_lone_surrogates:
+            if _utils.supports_lone_surrogates:
                 raise
             return None
 
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
@@ -28,7 +28,7 @@
 
 from __future__ import absolute_import, division, unicode_literals
 
-from ..utils import default_etree
+from .._utils import default_etree
 
 treeBuilderCache = {}
 
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
@@ -8,7 +8,7 @@
 from . import base
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 
 def getDomBuilder(DomImplementation):
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
@@ -6,10 +6,10 @@
 import re
 
 from . import base
-from .. import ihatexml
+from .. import _ihatexml
 from .. import constants
 from ..constants import namespaces
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
@@ -259,7 +259,7 @@ def serializeElement(element, indent=0):
     def tostring(element):  # pylint:disable=unused-variable
         """Serialize an element and its child nodes to a string"""
         rv = []
-        filter = ihatexml.InfosetFilter()
+        filter = _ihatexml.InfosetFilter()
 
         def serializeElement(element):
             if isinstance(element, ElementTree.ElementTree):
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
@@ -20,7 +20,7 @@
 from ..constants import DataLossWarning
 from .. import constants
 from . import etree as etree_builders
-from .. import ihatexml
+from .. import _ihatexml
 
 import lxml.etree as etree
 
@@ -54,7 +54,7 @@ def _getChildNodes(self):
 
 def testSerializer(element):
     rv = []
-    infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+    infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
@@ -182,7 +182,7 @@ class TreeBuilder(base.TreeBuilder):
 
     def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
-        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
+        infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
         self.namespaceHTMLElements = namespaceHTMLElements
 
         class Attributes(dict):
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
@@ -11,7 +11,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
 from .. import constants
-from ..utils import default_etree
+from .._utils import default_etree
 
 __all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
 
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
@@ -13,7 +13,7 @@
 from six import string_types
 
 from . import base
-from ..utils import moduleFactoryFactory
+from .._utils import moduleFactoryFactory
 
 tag_regexp = re.compile("{([^}]*)}(.*)")
 
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
@@ -6,7 +6,7 @@
 
 from . import base
 
-from .. import ihatexml
+from .. import _ihatexml
 
 
 def ensure_str(s):
@@ -132,7 +132,7 @@ def __init__(self, tree):
             self.fragmentChildren = set()
             tree = Root(tree)
         base.NonRecursiveTreeWalker.__init__(self, tree)
-        self.filter = ihatexml.InfosetFilter()
+        self.filter = _ihatexml.InfosetFilter()
 
     def getNodeDetails(self, node):
         if isinstance(node, tuple):  # Text node
diff --git a/parse.py b/parse.py
@@ -11,7 +11,7 @@
 from html5lib import html5parser
 from html5lib import treebuilders, serializer, treewalkers
 from html5lib import constants
-from html5lib import utils
+from html5lib import _utils
 
 
 def parse():
@@ -116,7 +116,7 @@ def printOutput(parser, document, opts):
                 import lxml.etree
                 sys.stdout.write(lxml.etree.tostring(document, encoding="unicode"))
             elif tb == "etree":
-                sys.stdout.write(utils.default_etree.tostring(document, encoding="unicode"))
+                sys.stdout.write(_utils.default_etree.tostring(document, encoding="unicode"))
         elif opts.tree:
             if not hasattr(document, '__getitem__'):
                 document = [document]