From c2aa3af8443348a95ff7a046162cd5d31cec0546 Mon Sep 17 00:00:00 2001 From: Diego Date: Mon, 27 Aug 2018 18:13:37 +0200 Subject: [PATCH 1/8] [Python3] Add folders lib2, lib3, ext2, ext3 into sys path --- medusa/app.py | 4 ++ medusa/init/__init__.py | 24 +++++------ medusa/server/web/config/post_processing.py | 2 +- tests/__init__.py | 48 +++++++++++++++++++-- 4 files changed, 61 insertions(+), 17 deletions(-) diff --git a/medusa/app.py b/medusa/app.py index 6270a53f51..f493adb869 100644 --- a/medusa/app.py +++ b/medusa/app.py @@ -13,7 +13,11 @@ SRC_FOLDER = __name__ LEGACY_SRC_FOLDERS = ('sickbeard', 'sickrage', 'gui') LIB_FOLDER = 'lib' +LIB2_FOLDER = 'lib2' +LIB3_FOLDER = 'lib3' EXT_FOLDER = 'ext' +EXT2_FOLDER = 'ext2' +EXT3_FOLDER = 'ext3' STATIC_FOLDER = 'static' UNKNOWN_RELEASE_GROUP = 'Medusa' BACKUP_DIR = 'backup' diff --git a/medusa/init/__init__.py b/medusa/init/__init__.py index 4566fef165..6d056b427f 100644 --- a/medusa/init/__init__.py +++ b/medusa/init/__init__.py @@ -11,6 +11,8 @@ import site import sys +from medusa import app + def initialize(): """Initialize all fixes and workarounds.""" @@ -39,12 +41,8 @@ def _check_python_version(): sys.exit(1) -def _lib_location(): - return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'lib')) - - -def _ext_lib_location(): - return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'ext')) +def _get_lib_location(relative_path): + return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', relative_path)) def _configure_syspath(): @@ -55,21 +53,21 @@ def _configure_syspath(): # For example: [ cwd, pathN, ..., path1, path0, ] paths_to_insert = [ - _lib_location(), - _ext_lib_location() + _get_lib_location(app.LIB_FOLDER), + _get_lib_location(app.EXT_FOLDER) ] if sys.version_info[0] == 2: # Add Python 2-only vendored libraries paths_to_insert.extend([ - # path_to_lib2, - # path_to_ext2 + _get_lib_location(app.LIB2_FOLDER), + _get_lib_location(app.EXT2_FOLDER) ]) elif sys.version_info[0] == 3: # Add Python 3-only vendored libraries paths_to_insert.extend([ - # path_to_lib3, - # path_to_ext3 + _get_lib_location(app.LIB3_FOLDER), + _get_lib_location(app.EXT3_FOLDER) ]) # Insert paths into `sys.path` and handle `.pth` files @@ -212,7 +210,7 @@ def _configure_knowit(): from knowit.utils import detect_os os_family = detect_os() - suggested_path = os.path.join(_lib_location(), 'native', os_family) + suggested_path = os.path.join(_get_lib_location(app.LIB_FOLDER), 'native', os_family) if os_family == 'windows': subfolder = 'x86_64' if sys.maxsize > 2 ** 32 else 'i386' suggested_path = os.path.join(suggested_path, subfolder) diff --git a/medusa/server/web/config/post_processing.py b/medusa/server/web/config/post_processing.py index 8f320c9932..6b611efd96 100644 --- a/medusa/server/web/config/post_processing.py +++ b/medusa/server/web/config/post_processing.py @@ -178,7 +178,7 @@ def isRarSupported(): - Simulating in memory rar extraction on test.rar file """ try: - rar_path = os.path.join(app.PROG_DIR, 'lib', 'unrar2', 'test.rar') + rar_path = os.path.join(app.PROG_DIR, app.LIB_FOLDER, 'unrar2', 'test.rar') testing = RarFile(rar_path).read_files('*test.txt') if testing[0][1] == b'This is only a test.': return 'supported' diff --git a/tests/__init__.py b/tests/__init__.py index 7820ec0660..2f065a7556 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,9 +1,51 @@ # coding=utf-8 import os +import site import sys -sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), '../lib'))) -sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), '../ext'))) -sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from medusa import app + + +def _get_lib_location(relative_path): + return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', relative_path)) + + +def _configure_syspath(): + """Add the vendored libraries into `sys.path`.""" + # Note: These paths will be inserted into `sys.path` in reverse order (LIFO) + # So the last path on this list will be inserted as the first path on `sys.path` + # right after the current working dir. + # For example: [ cwd, pathN, ..., path1, path0, ] + + paths_to_insert = [ + _get_lib_location(app.LIB_FOLDER), + _get_lib_location(app.EXT_FOLDER) + ] + + if sys.version_info[0] == 2: + # Add Python 2-only vendored libraries + paths_to_insert.extend([ + _get_lib_location(app.LIB2_FOLDER), + _get_lib_location(app.EXT2_FOLDER) + ]) + elif sys.version_info[0] == 3: + # Add Python 3-only vendored libraries + paths_to_insert.extend([ + _get_lib_location(app.LIB3_FOLDER), + _get_lib_location(app.EXT3_FOLDER) + ]) + + # Insert paths into `sys.path` and handle `.pth` files + # Inspired by: https://bugs.python.org/issue7744 + for dirpath in paths_to_insert: + # Clear `sys.path` + sys.path, remainder = sys.path[:1], sys.path[1:] + # Add directory as a site-packages directory and handle `.pth` files + site.addsitedir(dirpath) + # Restore rest of `sys.path` + sys.path.extend(remainder) + + +_configure_syspath() sys._called_from_test = True From 6f487383b3381801a26297f529a91b6263d4239c Mon Sep 17 00:00:00 2001 From: ngosang Date: Tue, 28 Aug 2018 22:25:25 +0200 Subject: [PATCH 2/8] [Python3] Move backported python2 libs to ext2 folder --- {ext => ext2}/_dummy_thread/__init__.py | 0 {ext => ext2}/_markupbase/__init__.py | 0 {ext => ext2}/_thread/__init__.py | 0 {ext => ext2}/builtins/__init__.py | 0 {ext => ext2}/concurrent/__init__.py | 0 {ext => ext2}/concurrent/futures/__init__.py | 0 {ext => ext2}/concurrent/futures/_base.py | 0 {ext => ext2}/concurrent/futures/process.py | 0 {ext => ext2}/concurrent/futures/thread.py | 0 {ext => ext2}/copyreg/__init__.py | 0 {ext => ext2}/future/__init__.py | 0 {ext => ext2}/future/backports/__init__.py | 0 {ext => ext2}/future/backports/_markupbase.py | 0 {ext => ext2}/future/backports/datetime.py | 0 {ext => ext2}/future/backports/email/__init__.py | 0 {ext => ext2}/future/backports/email/_encoded_words.py | 0 .../future/backports/email/_header_value_parser.py | 0 {ext => ext2}/future/backports/email/_parseaddr.py | 0 {ext => ext2}/future/backports/email/_policybase.py | 0 {ext => ext2}/future/backports/email/base64mime.py | 0 {ext => ext2}/future/backports/email/charset.py | 0 {ext => ext2}/future/backports/email/encoders.py | 0 {ext => ext2}/future/backports/email/errors.py | 0 {ext => ext2}/future/backports/email/feedparser.py | 0 {ext => ext2}/future/backports/email/generator.py | 0 {ext => ext2}/future/backports/email/header.py | 0 {ext => ext2}/future/backports/email/headerregistry.py | 0 {ext => ext2}/future/backports/email/iterators.py | 0 {ext => ext2}/future/backports/email/message.py | 0 {ext => ext2}/future/backports/email/mime/__init__.py | 0 {ext => ext2}/future/backports/email/mime/application.py | 0 {ext => ext2}/future/backports/email/mime/audio.py | 0 {ext => ext2}/future/backports/email/mime/base.py | 0 {ext => ext2}/future/backports/email/mime/image.py | 0 {ext => ext2}/future/backports/email/mime/message.py | 0 {ext => ext2}/future/backports/email/mime/multipart.py | 0 {ext => ext2}/future/backports/email/mime/nonmultipart.py | 0 {ext => ext2}/future/backports/email/mime/text.py | 0 {ext => ext2}/future/backports/email/parser.py | 0 {ext => ext2}/future/backports/email/policy.py | 0 {ext => ext2}/future/backports/email/quoprimime.py | 0 {ext => ext2}/future/backports/email/utils.py | 0 {ext => ext2}/future/backports/html/__init__.py | 0 {ext => ext2}/future/backports/html/entities.py | 0 {ext => ext2}/future/backports/html/parser.py | 0 {ext => ext2}/future/backports/http/__init__.py | 0 {ext => ext2}/future/backports/http/client.py | 0 {ext => ext2}/future/backports/http/cookiejar.py | 0 {ext => ext2}/future/backports/http/cookies.py | 0 {ext => ext2}/future/backports/http/server.py | 0 {ext => ext2}/future/backports/misc.py | 0 {ext => ext2}/future/backports/socket.py | 0 {ext => ext2}/future/backports/socketserver.py | 0 {ext => ext2}/future/backports/test/__init__.py | 0 {ext => ext2}/future/backports/test/badcert.pem | 0 {ext => ext2}/future/backports/test/badkey.pem | 0 {ext => ext2}/future/backports/test/dh512.pem | 0 .../future/backports/test/https_svn_python_org_root.pem | 0 {ext => ext2}/future/backports/test/keycert.passwd.pem | 0 {ext => ext2}/future/backports/test/keycert.pem | 0 {ext => ext2}/future/backports/test/keycert2.pem | 0 {ext => ext2}/future/backports/test/nokia.pem | 0 {ext => ext2}/future/backports/test/nullbytecert.pem | 0 {ext => ext2}/future/backports/test/nullcert.pem | 0 {ext => ext2}/future/backports/test/pystone.py | 0 {ext => ext2}/future/backports/test/sha256.pem | 0 {ext => ext2}/future/backports/test/ssl_cert.pem | 0 {ext => ext2}/future/backports/test/ssl_key.passwd.pem | 0 {ext => ext2}/future/backports/test/ssl_key.pem | 0 {ext => ext2}/future/backports/test/ssl_servers.py | 0 {ext => ext2}/future/backports/test/support.py | 0 {ext => ext2}/future/backports/total_ordering.py | 0 {ext => ext2}/future/backports/urllib/__init__.py | 0 {ext => ext2}/future/backports/urllib/error.py | 0 {ext => ext2}/future/backports/urllib/parse.py | 0 {ext => ext2}/future/backports/urllib/request.py | 0 {ext => ext2}/future/backports/urllib/response.py | 0 {ext => ext2}/future/backports/urllib/robotparser.py | 0 {ext => ext2}/future/backports/xmlrpc/__init__.py | 0 {ext => ext2}/future/backports/xmlrpc/client.py | 0 {ext => ext2}/future/backports/xmlrpc/server.py | 0 {ext => ext2}/future/builtins/__init__.py | 0 {ext => ext2}/future/builtins/disabled.py | 0 {ext => ext2}/future/builtins/iterators.py | 0 {ext => ext2}/future/builtins/misc.py | 0 {ext => ext2}/future/builtins/newnext.py | 0 {ext => ext2}/future/builtins/newround.py | 0 {ext => ext2}/future/builtins/newsuper.py | 0 {ext => ext2}/future/moves/__init__.py | 0 {ext => ext2}/future/moves/_dummy_thread.py | 0 {ext => ext2}/future/moves/_markupbase.py | 0 {ext => ext2}/future/moves/_thread.py | 0 {ext => ext2}/future/moves/builtins.py | 0 {ext => ext2}/future/moves/collections.py | 0 {ext => ext2}/future/moves/configparser.py | 0 {ext => ext2}/future/moves/copyreg.py | 0 {ext => ext2}/future/moves/dbm/__init__.py | 0 {ext => ext2}/future/moves/dbm/dumb.py | 0 {ext => ext2}/future/moves/dbm/gnu.py | 0 {ext => ext2}/future/moves/dbm/ndbm.py | 0 {ext => ext2}/future/moves/html/__init__.py | 0 {ext => ext2}/future/moves/html/entities.py | 0 {ext => ext2}/future/moves/html/parser.py | 0 {ext => ext2}/future/moves/http/__init__.py | 0 {ext => ext2}/future/moves/http/client.py | 0 {ext => ext2}/future/moves/http/cookiejar.py | 0 {ext => ext2}/future/moves/http/cookies.py | 0 {ext => ext2}/future/moves/http/server.py | 0 {ext => ext2}/future/moves/itertools.py | 0 {ext => ext2}/future/moves/pickle.py | 0 {ext => ext2}/future/moves/queue.py | 0 {ext => ext2}/future/moves/reprlib.py | 0 {ext => ext2}/future/moves/socketserver.py | 0 {ext => ext2}/future/moves/subprocess.py | 0 {ext => ext2}/future/moves/sys.py | 0 {ext => ext2}/future/moves/test/__init__.py | 0 {ext => ext2}/future/moves/test/support.py | 0 {ext => ext2}/future/moves/tkinter/__init__.py | 0 {ext => ext2}/future/moves/tkinter/colorchooser.py | 0 {ext => ext2}/future/moves/tkinter/commondialog.py | 0 {ext => ext2}/future/moves/tkinter/constants.py | 0 {ext => ext2}/future/moves/tkinter/dialog.py | 0 {ext => ext2}/future/moves/tkinter/dnd.py | 0 {ext => ext2}/future/moves/tkinter/filedialog.py | 0 {ext => ext2}/future/moves/tkinter/font.py | 0 {ext => ext2}/future/moves/tkinter/messagebox.py | 0 {ext => ext2}/future/moves/tkinter/scrolledtext.py | 0 {ext => ext2}/future/moves/tkinter/simpledialog.py | 0 {ext => ext2}/future/moves/tkinter/tix.py | 0 {ext => ext2}/future/moves/tkinter/ttk.py | 0 {ext => ext2}/future/moves/urllib/__init__.py | 0 {ext => ext2}/future/moves/urllib/error.py | 0 {ext => ext2}/future/moves/urllib/parse.py | 0 {ext => ext2}/future/moves/urllib/request.py | 0 {ext => ext2}/future/moves/urllib/response.py | 0 {ext => ext2}/future/moves/urllib/robotparser.py | 0 {ext => ext2}/future/moves/winreg.py | 0 {ext => ext2}/future/moves/xmlrpc/__init__.py | 0 {ext => ext2}/future/moves/xmlrpc/client.py | 0 {ext => ext2}/future/moves/xmlrpc/server.py | 0 {ext => ext2}/future/standard_library/__init__.py | 0 {ext => ext2}/future/tests/__init__.py | 0 {ext => ext2}/future/tests/base.py | 0 {ext => ext2}/future/types/__init__.py | 0 {ext => ext2}/future/types/newbytes.py | 0 {ext => ext2}/future/types/newdict.py | 0 {ext => ext2}/future/types/newint.py | 0 {ext => ext2}/future/types/newlist.py | 0 {ext => ext2}/future/types/newmemoryview.py | 0 {ext => ext2}/future/types/newobject.py | 0 {ext => ext2}/future/types/newopen.py | 0 {ext => ext2}/future/types/newrange.py | 0 {ext => ext2}/future/types/newstr.py | 0 {ext => ext2}/future/utils/__init__.py | 0 {ext => ext2}/future/utils/surrogateescape.py | 0 {ext => ext2}/html/__init__.py | 0 {ext => ext2}/html/entities.py | 0 {ext => ext2}/html/parser.py | 0 {ext => ext2}/http/__init__.py | 0 {ext => ext2}/http/client.py | 0 {ext => ext2}/http/cookiejar.py | 0 {ext => ext2}/http/cookies.py | 0 {ext => ext2}/http/server.py | 0 {ext => ext2}/libfuturize/__init__.py | 0 {ext => ext2}/libfuturize/fixer_util.py | 0 {ext => ext2}/libfuturize/fixes/__init__.py | 0 {ext => ext2}/libfuturize/fixes/fix_UserDict.py | 0 {ext => ext2}/libfuturize/fixes/fix_absolute_import.py | 0 .../fix_add__future__imports_except_unicode_literals.py | 0 {ext => ext2}/libfuturize/fixes/fix_basestring.py | 0 {ext => ext2}/libfuturize/fixes/fix_bytes.py | 0 {ext => ext2}/libfuturize/fixes/fix_cmp.py | 0 {ext => ext2}/libfuturize/fixes/fix_division.py | 0 {ext => ext2}/libfuturize/fixes/fix_division_safe.py | 0 {ext => ext2}/libfuturize/fixes/fix_execfile.py | 0 {ext => ext2}/libfuturize/fixes/fix_future_builtins.py | 0 .../libfuturize/fixes/fix_future_standard_library.py | 0 .../fixes/fix_future_standard_library_urllib.py | 0 {ext => ext2}/libfuturize/fixes/fix_metaclass.py | 0 {ext => ext2}/libfuturize/fixes/fix_next_call.py | 0 {ext => ext2}/libfuturize/fixes/fix_object.py | 0 {ext => ext2}/libfuturize/fixes/fix_oldstr_wrap.py | 0 .../libfuturize/fixes/fix_order___future__imports.py | 0 {ext => ext2}/libfuturize/fixes/fix_print.py | 0 {ext => ext2}/libfuturize/fixes/fix_print_with_import.py | 0 {ext => ext2}/libfuturize/fixes/fix_raise.py | 0 .../libfuturize/fixes/fix_remove_old__future__imports.py | 0 {ext => ext2}/libfuturize/fixes/fix_unicode_keep_u.py | 0 .../libfuturize/fixes/fix_unicode_literals_import.py | 0 {ext => ext2}/libfuturize/fixes/fix_xrange_with_import.py | 0 {ext => ext2}/libfuturize/main.py | 0 {ext => ext2}/libpasteurize/__init__.py | 0 {ext => ext2}/libpasteurize/fixes/__init__.py | 0 {ext => ext2}/libpasteurize/fixes/feature_base.py | 0 .../libpasteurize/fixes/fix_add_all__future__imports.py | 0 .../libpasteurize/fixes/fix_add_all_future_builtins.py | 0 .../fixes/fix_add_future_standard_library_import.py | 0 {ext => ext2}/libpasteurize/fixes/fix_annotations.py | 0 {ext => ext2}/libpasteurize/fixes/fix_division.py | 0 {ext => ext2}/libpasteurize/fixes/fix_features.py | 0 {ext => ext2}/libpasteurize/fixes/fix_fullargspec.py | 0 {ext => ext2}/libpasteurize/fixes/fix_future_builtins.py | 0 {ext => ext2}/libpasteurize/fixes/fix_getcwd.py | 0 {ext => ext2}/libpasteurize/fixes/fix_imports.py | 0 {ext => ext2}/libpasteurize/fixes/fix_imports2.py | 0 {ext => ext2}/libpasteurize/fixes/fix_kwargs.py | 0 {ext => ext2}/libpasteurize/fixes/fix_memoryview.py | 0 {ext => ext2}/libpasteurize/fixes/fix_metaclass.py | 0 {ext => ext2}/libpasteurize/fixes/fix_newstyle.py | 0 {ext => ext2}/libpasteurize/fixes/fix_next.py | 0 {ext => ext2}/libpasteurize/fixes/fix_printfunction.py | 0 {ext => ext2}/libpasteurize/fixes/fix_raise.py | 0 {ext => ext2}/libpasteurize/fixes/fix_raise_.py | 0 {ext => ext2}/libpasteurize/fixes/fix_throw.py | 0 {ext => ext2}/libpasteurize/fixes/fix_unpacking.py | 0 {ext => ext2}/libpasteurize/main.py | 0 {ext => ext2}/past/__init__.py | 0 {ext => ext2}/past/builtins/__init__.py | 0 {ext => ext2}/past/builtins/misc.py | 0 {ext => ext2}/past/builtins/noniterators.py | 0 {ext => ext2}/past/tests/__init__.py | 0 {ext => ext2}/past/translation/__init__.py | 0 {ext => ext2}/past/types/__init__.py | 0 {ext => ext2}/past/types/basestring.py | 0 {ext => ext2}/past/types/olddict.py | 0 {ext => ext2}/past/types/oldstr.py | 0 {ext => ext2}/past/utils/__init__.py | 0 {ext => ext2}/queue/__init__.py | 0 {ext => ext2}/reprlib/__init__.py | 0 {ext => ext2}/socketserver/__init__.py | 0 {ext => ext2}/tkinter/__init__.py | 0 {ext => ext2}/tkinter/colorchooser.py | 0 {ext => ext2}/tkinter/commondialog.py | 0 {ext => ext2}/tkinter/constants.py | 0 {ext => ext2}/tkinter/dialog.py | 0 {ext => ext2}/tkinter/dnd.py | 0 {ext => ext2}/tkinter/filedialog.py | 0 {ext => ext2}/tkinter/font.py | 0 {ext => ext2}/tkinter/messagebox.py | 0 {ext => ext2}/tkinter/scrolledtext.py | 0 {ext => ext2}/tkinter/simpledialog.py | 0 {ext => ext2}/tkinter/tix.py | 0 {ext => ext2}/tkinter/ttk.py | 0 {ext => ext2}/winreg/__init__.py | 0 {ext => ext2}/xmlrpc/__init__.py | 0 {ext => ext2}/xmlrpc/client.py | 0 {ext => ext2}/xmlrpc/server.py | 0 medusa/server/api/v1/core.py | 7 ++++--- 248 files changed, 4 insertions(+), 3 deletions(-) rename {ext => ext2}/_dummy_thread/__init__.py (100%) rename {ext => ext2}/_markupbase/__init__.py (100%) rename {ext => ext2}/_thread/__init__.py (100%) rename {ext => ext2}/builtins/__init__.py (100%) rename {ext => ext2}/concurrent/__init__.py (100%) rename {ext => ext2}/concurrent/futures/__init__.py (100%) rename {ext => ext2}/concurrent/futures/_base.py (100%) rename {ext => ext2}/concurrent/futures/process.py (100%) rename {ext => ext2}/concurrent/futures/thread.py (100%) rename {ext => ext2}/copyreg/__init__.py (100%) rename {ext => ext2}/future/__init__.py (100%) rename {ext => ext2}/future/backports/__init__.py (100%) rename {ext => ext2}/future/backports/_markupbase.py (100%) rename {ext => ext2}/future/backports/datetime.py (100%) rename {ext => ext2}/future/backports/email/__init__.py (100%) rename {ext => ext2}/future/backports/email/_encoded_words.py (100%) rename {ext => ext2}/future/backports/email/_header_value_parser.py (100%) rename {ext => ext2}/future/backports/email/_parseaddr.py (100%) rename {ext => ext2}/future/backports/email/_policybase.py (100%) rename {ext => ext2}/future/backports/email/base64mime.py (100%) rename {ext => ext2}/future/backports/email/charset.py (100%) rename {ext => ext2}/future/backports/email/encoders.py (100%) rename {ext => ext2}/future/backports/email/errors.py (100%) rename {ext => ext2}/future/backports/email/feedparser.py (100%) rename {ext => ext2}/future/backports/email/generator.py (100%) rename {ext => ext2}/future/backports/email/header.py (100%) rename {ext => ext2}/future/backports/email/headerregistry.py (100%) rename {ext => ext2}/future/backports/email/iterators.py (100%) rename {ext => ext2}/future/backports/email/message.py (100%) rename {ext => ext2}/future/backports/email/mime/__init__.py (100%) rename {ext => ext2}/future/backports/email/mime/application.py (100%) rename {ext => ext2}/future/backports/email/mime/audio.py (100%) rename {ext => ext2}/future/backports/email/mime/base.py (100%) rename {ext => ext2}/future/backports/email/mime/image.py (100%) rename {ext => ext2}/future/backports/email/mime/message.py (100%) rename {ext => ext2}/future/backports/email/mime/multipart.py (100%) rename {ext => ext2}/future/backports/email/mime/nonmultipart.py (100%) rename {ext => ext2}/future/backports/email/mime/text.py (100%) rename {ext => ext2}/future/backports/email/parser.py (100%) rename {ext => ext2}/future/backports/email/policy.py (100%) rename {ext => ext2}/future/backports/email/quoprimime.py (100%) rename {ext => ext2}/future/backports/email/utils.py (100%) rename {ext => ext2}/future/backports/html/__init__.py (100%) rename {ext => ext2}/future/backports/html/entities.py (100%) rename {ext => ext2}/future/backports/html/parser.py (100%) rename {ext => ext2}/future/backports/http/__init__.py (100%) rename {ext => ext2}/future/backports/http/client.py (100%) rename {ext => ext2}/future/backports/http/cookiejar.py (100%) rename {ext => ext2}/future/backports/http/cookies.py (100%) rename {ext => ext2}/future/backports/http/server.py (100%) rename {ext => ext2}/future/backports/misc.py (100%) rename {ext => ext2}/future/backports/socket.py (100%) rename {ext => ext2}/future/backports/socketserver.py (100%) rename {ext => ext2}/future/backports/test/__init__.py (100%) rename {ext => ext2}/future/backports/test/badcert.pem (100%) rename {ext => ext2}/future/backports/test/badkey.pem (100%) rename {ext => ext2}/future/backports/test/dh512.pem (100%) rename {ext => ext2}/future/backports/test/https_svn_python_org_root.pem (100%) rename {ext => ext2}/future/backports/test/keycert.passwd.pem (100%) rename {ext => ext2}/future/backports/test/keycert.pem (100%) rename {ext => ext2}/future/backports/test/keycert2.pem (100%) rename {ext => ext2}/future/backports/test/nokia.pem (100%) rename {ext => ext2}/future/backports/test/nullbytecert.pem (100%) rename {ext => ext2}/future/backports/test/nullcert.pem (100%) rename {ext => ext2}/future/backports/test/pystone.py (100%) rename {ext => ext2}/future/backports/test/sha256.pem (100%) rename {ext => ext2}/future/backports/test/ssl_cert.pem (100%) rename {ext => ext2}/future/backports/test/ssl_key.passwd.pem (100%) rename {ext => ext2}/future/backports/test/ssl_key.pem (100%) rename {ext => ext2}/future/backports/test/ssl_servers.py (100%) rename {ext => ext2}/future/backports/test/support.py (100%) rename {ext => ext2}/future/backports/total_ordering.py (100%) rename {ext => ext2}/future/backports/urllib/__init__.py (100%) rename {ext => ext2}/future/backports/urllib/error.py (100%) rename {ext => ext2}/future/backports/urllib/parse.py (100%) rename {ext => ext2}/future/backports/urllib/request.py (100%) rename {ext => ext2}/future/backports/urllib/response.py (100%) rename {ext => ext2}/future/backports/urllib/robotparser.py (100%) rename {ext => ext2}/future/backports/xmlrpc/__init__.py (100%) rename {ext => ext2}/future/backports/xmlrpc/client.py (100%) rename {ext => ext2}/future/backports/xmlrpc/server.py (100%) rename {ext => ext2}/future/builtins/__init__.py (100%) rename {ext => ext2}/future/builtins/disabled.py (100%) rename {ext => ext2}/future/builtins/iterators.py (100%) rename {ext => ext2}/future/builtins/misc.py (100%) rename {ext => ext2}/future/builtins/newnext.py (100%) rename {ext => ext2}/future/builtins/newround.py (100%) rename {ext => ext2}/future/builtins/newsuper.py (100%) rename {ext => ext2}/future/moves/__init__.py (100%) rename {ext => ext2}/future/moves/_dummy_thread.py (100%) rename {ext => ext2}/future/moves/_markupbase.py (100%) rename {ext => ext2}/future/moves/_thread.py (100%) rename {ext => ext2}/future/moves/builtins.py (100%) rename {ext => ext2}/future/moves/collections.py (100%) rename {ext => ext2}/future/moves/configparser.py (100%) rename {ext => ext2}/future/moves/copyreg.py (100%) rename {ext => ext2}/future/moves/dbm/__init__.py (100%) rename {ext => ext2}/future/moves/dbm/dumb.py (100%) rename {ext => ext2}/future/moves/dbm/gnu.py (100%) rename {ext => ext2}/future/moves/dbm/ndbm.py (100%) rename {ext => ext2}/future/moves/html/__init__.py (100%) rename {ext => ext2}/future/moves/html/entities.py (100%) rename {ext => ext2}/future/moves/html/parser.py (100%) rename {ext => ext2}/future/moves/http/__init__.py (100%) rename {ext => ext2}/future/moves/http/client.py (100%) rename {ext => ext2}/future/moves/http/cookiejar.py (100%) rename {ext => ext2}/future/moves/http/cookies.py (100%) rename {ext => ext2}/future/moves/http/server.py (100%) rename {ext => ext2}/future/moves/itertools.py (100%) rename {ext => ext2}/future/moves/pickle.py (100%) rename {ext => ext2}/future/moves/queue.py (100%) rename {ext => ext2}/future/moves/reprlib.py (100%) rename {ext => ext2}/future/moves/socketserver.py (100%) rename {ext => ext2}/future/moves/subprocess.py (100%) rename {ext => ext2}/future/moves/sys.py (100%) rename {ext => ext2}/future/moves/test/__init__.py (100%) rename {ext => ext2}/future/moves/test/support.py (100%) rename {ext => ext2}/future/moves/tkinter/__init__.py (100%) rename {ext => ext2}/future/moves/tkinter/colorchooser.py (100%) rename {ext => ext2}/future/moves/tkinter/commondialog.py (100%) rename {ext => ext2}/future/moves/tkinter/constants.py (100%) rename {ext => ext2}/future/moves/tkinter/dialog.py (100%) rename {ext => ext2}/future/moves/tkinter/dnd.py (100%) rename {ext => ext2}/future/moves/tkinter/filedialog.py (100%) rename {ext => ext2}/future/moves/tkinter/font.py (100%) rename {ext => ext2}/future/moves/tkinter/messagebox.py (100%) rename {ext => ext2}/future/moves/tkinter/scrolledtext.py (100%) rename {ext => ext2}/future/moves/tkinter/simpledialog.py (100%) rename {ext => ext2}/future/moves/tkinter/tix.py (100%) rename {ext => ext2}/future/moves/tkinter/ttk.py (100%) rename {ext => ext2}/future/moves/urllib/__init__.py (100%) rename {ext => ext2}/future/moves/urllib/error.py (100%) rename {ext => ext2}/future/moves/urllib/parse.py (100%) rename {ext => ext2}/future/moves/urllib/request.py (100%) rename {ext => ext2}/future/moves/urllib/response.py (100%) rename {ext => ext2}/future/moves/urllib/robotparser.py (100%) rename {ext => ext2}/future/moves/winreg.py (100%) rename {ext => ext2}/future/moves/xmlrpc/__init__.py (100%) rename {ext => ext2}/future/moves/xmlrpc/client.py (100%) rename {ext => ext2}/future/moves/xmlrpc/server.py (100%) rename {ext => ext2}/future/standard_library/__init__.py (100%) rename {ext => ext2}/future/tests/__init__.py (100%) rename {ext => ext2}/future/tests/base.py (100%) rename {ext => ext2}/future/types/__init__.py (100%) rename {ext => ext2}/future/types/newbytes.py (100%) rename {ext => ext2}/future/types/newdict.py (100%) rename {ext => ext2}/future/types/newint.py (100%) rename {ext => ext2}/future/types/newlist.py (100%) rename {ext => ext2}/future/types/newmemoryview.py (100%) rename {ext => ext2}/future/types/newobject.py (100%) rename {ext => ext2}/future/types/newopen.py (100%) rename {ext => ext2}/future/types/newrange.py (100%) rename {ext => ext2}/future/types/newstr.py (100%) rename {ext => ext2}/future/utils/__init__.py (100%) rename {ext => ext2}/future/utils/surrogateescape.py (100%) rename {ext => ext2}/html/__init__.py (100%) rename {ext => ext2}/html/entities.py (100%) rename {ext => ext2}/html/parser.py (100%) rename {ext => ext2}/http/__init__.py (100%) rename {ext => ext2}/http/client.py (100%) rename {ext => ext2}/http/cookiejar.py (100%) rename {ext => ext2}/http/cookies.py (100%) rename {ext => ext2}/http/server.py (100%) rename {ext => ext2}/libfuturize/__init__.py (100%) rename {ext => ext2}/libfuturize/fixer_util.py (100%) rename {ext => ext2}/libfuturize/fixes/__init__.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_UserDict.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_absolute_import.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_basestring.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_bytes.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_cmp.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_division.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_division_safe.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_execfile.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_future_builtins.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_future_standard_library.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_future_standard_library_urllib.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_metaclass.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_next_call.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_object.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_oldstr_wrap.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_order___future__imports.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_print.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_print_with_import.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_raise.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_remove_old__future__imports.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_unicode_keep_u.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_unicode_literals_import.py (100%) rename {ext => ext2}/libfuturize/fixes/fix_xrange_with_import.py (100%) rename {ext => ext2}/libfuturize/main.py (100%) rename {ext => ext2}/libpasteurize/__init__.py (100%) rename {ext => ext2}/libpasteurize/fixes/__init__.py (100%) rename {ext => ext2}/libpasteurize/fixes/feature_base.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_add_all__future__imports.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_add_all_future_builtins.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_add_future_standard_library_import.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_annotations.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_division.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_features.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_fullargspec.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_future_builtins.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_getcwd.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_imports.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_imports2.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_kwargs.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_memoryview.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_metaclass.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_newstyle.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_next.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_printfunction.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_raise.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_raise_.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_throw.py (100%) rename {ext => ext2}/libpasteurize/fixes/fix_unpacking.py (100%) rename {ext => ext2}/libpasteurize/main.py (100%) rename {ext => ext2}/past/__init__.py (100%) rename {ext => ext2}/past/builtins/__init__.py (100%) rename {ext => ext2}/past/builtins/misc.py (100%) rename {ext => ext2}/past/builtins/noniterators.py (100%) rename {ext => ext2}/past/tests/__init__.py (100%) rename {ext => ext2}/past/translation/__init__.py (100%) rename {ext => ext2}/past/types/__init__.py (100%) rename {ext => ext2}/past/types/basestring.py (100%) rename {ext => ext2}/past/types/olddict.py (100%) rename {ext => ext2}/past/types/oldstr.py (100%) rename {ext => ext2}/past/utils/__init__.py (100%) rename {ext => ext2}/queue/__init__.py (100%) rename {ext => ext2}/reprlib/__init__.py (100%) rename {ext => ext2}/socketserver/__init__.py (100%) rename {ext => ext2}/tkinter/__init__.py (100%) rename {ext => ext2}/tkinter/colorchooser.py (100%) rename {ext => ext2}/tkinter/commondialog.py (100%) rename {ext => ext2}/tkinter/constants.py (100%) rename {ext => ext2}/tkinter/dialog.py (100%) rename {ext => ext2}/tkinter/dnd.py (100%) rename {ext => ext2}/tkinter/filedialog.py (100%) rename {ext => ext2}/tkinter/font.py (100%) rename {ext => ext2}/tkinter/messagebox.py (100%) rename {ext => ext2}/tkinter/scrolledtext.py (100%) rename {ext => ext2}/tkinter/simpledialog.py (100%) rename {ext => ext2}/tkinter/tix.py (100%) rename {ext => ext2}/tkinter/ttk.py (100%) rename {ext => ext2}/winreg/__init__.py (100%) rename {ext => ext2}/xmlrpc/__init__.py (100%) rename {ext => ext2}/xmlrpc/client.py (100%) rename {ext => ext2}/xmlrpc/server.py (100%) diff --git a/ext/_dummy_thread/__init__.py b/ext2/_dummy_thread/__init__.py similarity index 100% rename from ext/_dummy_thread/__init__.py rename to ext2/_dummy_thread/__init__.py diff --git a/ext/_markupbase/__init__.py b/ext2/_markupbase/__init__.py similarity index 100% rename from ext/_markupbase/__init__.py rename to ext2/_markupbase/__init__.py diff --git a/ext/_thread/__init__.py b/ext2/_thread/__init__.py similarity index 100% rename from ext/_thread/__init__.py rename to ext2/_thread/__init__.py diff --git a/ext/builtins/__init__.py b/ext2/builtins/__init__.py similarity index 100% rename from ext/builtins/__init__.py rename to ext2/builtins/__init__.py diff --git a/ext/concurrent/__init__.py b/ext2/concurrent/__init__.py similarity index 100% rename from ext/concurrent/__init__.py rename to ext2/concurrent/__init__.py diff --git a/ext/concurrent/futures/__init__.py b/ext2/concurrent/futures/__init__.py similarity index 100% rename from ext/concurrent/futures/__init__.py rename to ext2/concurrent/futures/__init__.py diff --git a/ext/concurrent/futures/_base.py b/ext2/concurrent/futures/_base.py similarity index 100% rename from ext/concurrent/futures/_base.py rename to ext2/concurrent/futures/_base.py diff --git a/ext/concurrent/futures/process.py b/ext2/concurrent/futures/process.py similarity index 100% rename from ext/concurrent/futures/process.py rename to ext2/concurrent/futures/process.py diff --git a/ext/concurrent/futures/thread.py b/ext2/concurrent/futures/thread.py similarity index 100% rename from ext/concurrent/futures/thread.py rename to ext2/concurrent/futures/thread.py diff --git a/ext/copyreg/__init__.py b/ext2/copyreg/__init__.py similarity index 100% rename from ext/copyreg/__init__.py rename to ext2/copyreg/__init__.py diff --git a/ext/future/__init__.py b/ext2/future/__init__.py similarity index 100% rename from ext/future/__init__.py rename to ext2/future/__init__.py diff --git a/ext/future/backports/__init__.py b/ext2/future/backports/__init__.py similarity index 100% rename from ext/future/backports/__init__.py rename to ext2/future/backports/__init__.py diff --git a/ext/future/backports/_markupbase.py b/ext2/future/backports/_markupbase.py similarity index 100% rename from ext/future/backports/_markupbase.py rename to ext2/future/backports/_markupbase.py diff --git a/ext/future/backports/datetime.py b/ext2/future/backports/datetime.py similarity index 100% rename from ext/future/backports/datetime.py rename to ext2/future/backports/datetime.py diff --git a/ext/future/backports/email/__init__.py b/ext2/future/backports/email/__init__.py similarity index 100% rename from ext/future/backports/email/__init__.py rename to ext2/future/backports/email/__init__.py diff --git a/ext/future/backports/email/_encoded_words.py b/ext2/future/backports/email/_encoded_words.py similarity index 100% rename from ext/future/backports/email/_encoded_words.py rename to ext2/future/backports/email/_encoded_words.py diff --git a/ext/future/backports/email/_header_value_parser.py b/ext2/future/backports/email/_header_value_parser.py similarity index 100% rename from ext/future/backports/email/_header_value_parser.py rename to ext2/future/backports/email/_header_value_parser.py diff --git a/ext/future/backports/email/_parseaddr.py b/ext2/future/backports/email/_parseaddr.py similarity index 100% rename from ext/future/backports/email/_parseaddr.py rename to ext2/future/backports/email/_parseaddr.py diff --git a/ext/future/backports/email/_policybase.py b/ext2/future/backports/email/_policybase.py similarity index 100% rename from ext/future/backports/email/_policybase.py rename to ext2/future/backports/email/_policybase.py diff --git a/ext/future/backports/email/base64mime.py b/ext2/future/backports/email/base64mime.py similarity index 100% rename from ext/future/backports/email/base64mime.py rename to ext2/future/backports/email/base64mime.py diff --git a/ext/future/backports/email/charset.py b/ext2/future/backports/email/charset.py similarity index 100% rename from ext/future/backports/email/charset.py rename to ext2/future/backports/email/charset.py diff --git a/ext/future/backports/email/encoders.py b/ext2/future/backports/email/encoders.py similarity index 100% rename from ext/future/backports/email/encoders.py rename to ext2/future/backports/email/encoders.py diff --git a/ext/future/backports/email/errors.py b/ext2/future/backports/email/errors.py similarity index 100% rename from ext/future/backports/email/errors.py rename to ext2/future/backports/email/errors.py diff --git a/ext/future/backports/email/feedparser.py b/ext2/future/backports/email/feedparser.py similarity index 100% rename from ext/future/backports/email/feedparser.py rename to ext2/future/backports/email/feedparser.py diff --git a/ext/future/backports/email/generator.py b/ext2/future/backports/email/generator.py similarity index 100% rename from ext/future/backports/email/generator.py rename to ext2/future/backports/email/generator.py diff --git a/ext/future/backports/email/header.py b/ext2/future/backports/email/header.py similarity index 100% rename from ext/future/backports/email/header.py rename to ext2/future/backports/email/header.py diff --git a/ext/future/backports/email/headerregistry.py b/ext2/future/backports/email/headerregistry.py similarity index 100% rename from ext/future/backports/email/headerregistry.py rename to ext2/future/backports/email/headerregistry.py diff --git a/ext/future/backports/email/iterators.py b/ext2/future/backports/email/iterators.py similarity index 100% rename from ext/future/backports/email/iterators.py rename to ext2/future/backports/email/iterators.py diff --git a/ext/future/backports/email/message.py b/ext2/future/backports/email/message.py similarity index 100% rename from ext/future/backports/email/message.py rename to ext2/future/backports/email/message.py diff --git a/ext/future/backports/email/mime/__init__.py b/ext2/future/backports/email/mime/__init__.py similarity index 100% rename from ext/future/backports/email/mime/__init__.py rename to ext2/future/backports/email/mime/__init__.py diff --git a/ext/future/backports/email/mime/application.py b/ext2/future/backports/email/mime/application.py similarity index 100% rename from ext/future/backports/email/mime/application.py rename to ext2/future/backports/email/mime/application.py diff --git a/ext/future/backports/email/mime/audio.py b/ext2/future/backports/email/mime/audio.py similarity index 100% rename from ext/future/backports/email/mime/audio.py rename to ext2/future/backports/email/mime/audio.py diff --git a/ext/future/backports/email/mime/base.py b/ext2/future/backports/email/mime/base.py similarity index 100% rename from ext/future/backports/email/mime/base.py rename to ext2/future/backports/email/mime/base.py diff --git a/ext/future/backports/email/mime/image.py b/ext2/future/backports/email/mime/image.py similarity index 100% rename from ext/future/backports/email/mime/image.py rename to ext2/future/backports/email/mime/image.py diff --git a/ext/future/backports/email/mime/message.py b/ext2/future/backports/email/mime/message.py similarity index 100% rename from ext/future/backports/email/mime/message.py rename to ext2/future/backports/email/mime/message.py diff --git a/ext/future/backports/email/mime/multipart.py b/ext2/future/backports/email/mime/multipart.py similarity index 100% rename from ext/future/backports/email/mime/multipart.py rename to ext2/future/backports/email/mime/multipart.py diff --git a/ext/future/backports/email/mime/nonmultipart.py b/ext2/future/backports/email/mime/nonmultipart.py similarity index 100% rename from ext/future/backports/email/mime/nonmultipart.py rename to ext2/future/backports/email/mime/nonmultipart.py diff --git a/ext/future/backports/email/mime/text.py b/ext2/future/backports/email/mime/text.py similarity index 100% rename from ext/future/backports/email/mime/text.py rename to ext2/future/backports/email/mime/text.py diff --git a/ext/future/backports/email/parser.py b/ext2/future/backports/email/parser.py similarity index 100% rename from ext/future/backports/email/parser.py rename to ext2/future/backports/email/parser.py diff --git a/ext/future/backports/email/policy.py b/ext2/future/backports/email/policy.py similarity index 100% rename from ext/future/backports/email/policy.py rename to ext2/future/backports/email/policy.py diff --git a/ext/future/backports/email/quoprimime.py b/ext2/future/backports/email/quoprimime.py similarity index 100% rename from ext/future/backports/email/quoprimime.py rename to ext2/future/backports/email/quoprimime.py diff --git a/ext/future/backports/email/utils.py b/ext2/future/backports/email/utils.py similarity index 100% rename from ext/future/backports/email/utils.py rename to ext2/future/backports/email/utils.py diff --git a/ext/future/backports/html/__init__.py b/ext2/future/backports/html/__init__.py similarity index 100% rename from ext/future/backports/html/__init__.py rename to ext2/future/backports/html/__init__.py diff --git a/ext/future/backports/html/entities.py b/ext2/future/backports/html/entities.py similarity index 100% rename from ext/future/backports/html/entities.py rename to ext2/future/backports/html/entities.py diff --git a/ext/future/backports/html/parser.py b/ext2/future/backports/html/parser.py similarity index 100% rename from ext/future/backports/html/parser.py rename to ext2/future/backports/html/parser.py diff --git a/ext/future/backports/http/__init__.py b/ext2/future/backports/http/__init__.py similarity index 100% rename from ext/future/backports/http/__init__.py rename to ext2/future/backports/http/__init__.py diff --git a/ext/future/backports/http/client.py b/ext2/future/backports/http/client.py similarity index 100% rename from ext/future/backports/http/client.py rename to ext2/future/backports/http/client.py diff --git a/ext/future/backports/http/cookiejar.py b/ext2/future/backports/http/cookiejar.py similarity index 100% rename from ext/future/backports/http/cookiejar.py rename to ext2/future/backports/http/cookiejar.py diff --git a/ext/future/backports/http/cookies.py b/ext2/future/backports/http/cookies.py similarity index 100% rename from ext/future/backports/http/cookies.py rename to ext2/future/backports/http/cookies.py diff --git a/ext/future/backports/http/server.py b/ext2/future/backports/http/server.py similarity index 100% rename from ext/future/backports/http/server.py rename to ext2/future/backports/http/server.py diff --git a/ext/future/backports/misc.py b/ext2/future/backports/misc.py similarity index 100% rename from ext/future/backports/misc.py rename to ext2/future/backports/misc.py diff --git a/ext/future/backports/socket.py b/ext2/future/backports/socket.py similarity index 100% rename from ext/future/backports/socket.py rename to ext2/future/backports/socket.py diff --git a/ext/future/backports/socketserver.py b/ext2/future/backports/socketserver.py similarity index 100% rename from ext/future/backports/socketserver.py rename to ext2/future/backports/socketserver.py diff --git a/ext/future/backports/test/__init__.py b/ext2/future/backports/test/__init__.py similarity index 100% rename from ext/future/backports/test/__init__.py rename to ext2/future/backports/test/__init__.py diff --git a/ext/future/backports/test/badcert.pem b/ext2/future/backports/test/badcert.pem similarity index 100% rename from ext/future/backports/test/badcert.pem rename to ext2/future/backports/test/badcert.pem diff --git a/ext/future/backports/test/badkey.pem b/ext2/future/backports/test/badkey.pem similarity index 100% rename from ext/future/backports/test/badkey.pem rename to ext2/future/backports/test/badkey.pem diff --git a/ext/future/backports/test/dh512.pem b/ext2/future/backports/test/dh512.pem similarity index 100% rename from ext/future/backports/test/dh512.pem rename to ext2/future/backports/test/dh512.pem diff --git a/ext/future/backports/test/https_svn_python_org_root.pem b/ext2/future/backports/test/https_svn_python_org_root.pem similarity index 100% rename from ext/future/backports/test/https_svn_python_org_root.pem rename to ext2/future/backports/test/https_svn_python_org_root.pem diff --git a/ext/future/backports/test/keycert.passwd.pem b/ext2/future/backports/test/keycert.passwd.pem similarity index 100% rename from ext/future/backports/test/keycert.passwd.pem rename to ext2/future/backports/test/keycert.passwd.pem diff --git a/ext/future/backports/test/keycert.pem b/ext2/future/backports/test/keycert.pem similarity index 100% rename from ext/future/backports/test/keycert.pem rename to ext2/future/backports/test/keycert.pem diff --git a/ext/future/backports/test/keycert2.pem b/ext2/future/backports/test/keycert2.pem similarity index 100% rename from ext/future/backports/test/keycert2.pem rename to ext2/future/backports/test/keycert2.pem diff --git a/ext/future/backports/test/nokia.pem b/ext2/future/backports/test/nokia.pem similarity index 100% rename from ext/future/backports/test/nokia.pem rename to ext2/future/backports/test/nokia.pem diff --git a/ext/future/backports/test/nullbytecert.pem b/ext2/future/backports/test/nullbytecert.pem similarity index 100% rename from ext/future/backports/test/nullbytecert.pem rename to ext2/future/backports/test/nullbytecert.pem diff --git a/ext/future/backports/test/nullcert.pem b/ext2/future/backports/test/nullcert.pem similarity index 100% rename from ext/future/backports/test/nullcert.pem rename to ext2/future/backports/test/nullcert.pem diff --git a/ext/future/backports/test/pystone.py b/ext2/future/backports/test/pystone.py similarity index 100% rename from ext/future/backports/test/pystone.py rename to ext2/future/backports/test/pystone.py diff --git a/ext/future/backports/test/sha256.pem b/ext2/future/backports/test/sha256.pem similarity index 100% rename from ext/future/backports/test/sha256.pem rename to ext2/future/backports/test/sha256.pem diff --git a/ext/future/backports/test/ssl_cert.pem b/ext2/future/backports/test/ssl_cert.pem similarity index 100% rename from ext/future/backports/test/ssl_cert.pem rename to ext2/future/backports/test/ssl_cert.pem diff --git a/ext/future/backports/test/ssl_key.passwd.pem b/ext2/future/backports/test/ssl_key.passwd.pem similarity index 100% rename from ext/future/backports/test/ssl_key.passwd.pem rename to ext2/future/backports/test/ssl_key.passwd.pem diff --git a/ext/future/backports/test/ssl_key.pem b/ext2/future/backports/test/ssl_key.pem similarity index 100% rename from ext/future/backports/test/ssl_key.pem rename to ext2/future/backports/test/ssl_key.pem diff --git a/ext/future/backports/test/ssl_servers.py b/ext2/future/backports/test/ssl_servers.py similarity index 100% rename from ext/future/backports/test/ssl_servers.py rename to ext2/future/backports/test/ssl_servers.py diff --git a/ext/future/backports/test/support.py b/ext2/future/backports/test/support.py similarity index 100% rename from ext/future/backports/test/support.py rename to ext2/future/backports/test/support.py diff --git a/ext/future/backports/total_ordering.py b/ext2/future/backports/total_ordering.py similarity index 100% rename from ext/future/backports/total_ordering.py rename to ext2/future/backports/total_ordering.py diff --git a/ext/future/backports/urllib/__init__.py b/ext2/future/backports/urllib/__init__.py similarity index 100% rename from ext/future/backports/urllib/__init__.py rename to ext2/future/backports/urllib/__init__.py diff --git a/ext/future/backports/urllib/error.py b/ext2/future/backports/urllib/error.py similarity index 100% rename from ext/future/backports/urllib/error.py rename to ext2/future/backports/urllib/error.py diff --git a/ext/future/backports/urllib/parse.py b/ext2/future/backports/urllib/parse.py similarity index 100% rename from ext/future/backports/urllib/parse.py rename to ext2/future/backports/urllib/parse.py diff --git a/ext/future/backports/urllib/request.py b/ext2/future/backports/urllib/request.py similarity index 100% rename from ext/future/backports/urllib/request.py rename to ext2/future/backports/urllib/request.py diff --git a/ext/future/backports/urllib/response.py b/ext2/future/backports/urllib/response.py similarity index 100% rename from ext/future/backports/urllib/response.py rename to ext2/future/backports/urllib/response.py diff --git a/ext/future/backports/urllib/robotparser.py b/ext2/future/backports/urllib/robotparser.py similarity index 100% rename from ext/future/backports/urllib/robotparser.py rename to ext2/future/backports/urllib/robotparser.py diff --git a/ext/future/backports/xmlrpc/__init__.py b/ext2/future/backports/xmlrpc/__init__.py similarity index 100% rename from ext/future/backports/xmlrpc/__init__.py rename to ext2/future/backports/xmlrpc/__init__.py diff --git a/ext/future/backports/xmlrpc/client.py b/ext2/future/backports/xmlrpc/client.py similarity index 100% rename from ext/future/backports/xmlrpc/client.py rename to ext2/future/backports/xmlrpc/client.py diff --git a/ext/future/backports/xmlrpc/server.py b/ext2/future/backports/xmlrpc/server.py similarity index 100% rename from ext/future/backports/xmlrpc/server.py rename to ext2/future/backports/xmlrpc/server.py diff --git a/ext/future/builtins/__init__.py b/ext2/future/builtins/__init__.py similarity index 100% rename from ext/future/builtins/__init__.py rename to ext2/future/builtins/__init__.py diff --git a/ext/future/builtins/disabled.py b/ext2/future/builtins/disabled.py similarity index 100% rename from ext/future/builtins/disabled.py rename to ext2/future/builtins/disabled.py diff --git a/ext/future/builtins/iterators.py b/ext2/future/builtins/iterators.py similarity index 100% rename from ext/future/builtins/iterators.py rename to ext2/future/builtins/iterators.py diff --git a/ext/future/builtins/misc.py b/ext2/future/builtins/misc.py similarity index 100% rename from ext/future/builtins/misc.py rename to ext2/future/builtins/misc.py diff --git a/ext/future/builtins/newnext.py b/ext2/future/builtins/newnext.py similarity index 100% rename from ext/future/builtins/newnext.py rename to ext2/future/builtins/newnext.py diff --git a/ext/future/builtins/newround.py b/ext2/future/builtins/newround.py similarity index 100% rename from ext/future/builtins/newround.py rename to ext2/future/builtins/newround.py diff --git a/ext/future/builtins/newsuper.py b/ext2/future/builtins/newsuper.py similarity index 100% rename from ext/future/builtins/newsuper.py rename to ext2/future/builtins/newsuper.py diff --git a/ext/future/moves/__init__.py b/ext2/future/moves/__init__.py similarity index 100% rename from ext/future/moves/__init__.py rename to ext2/future/moves/__init__.py diff --git a/ext/future/moves/_dummy_thread.py b/ext2/future/moves/_dummy_thread.py similarity index 100% rename from ext/future/moves/_dummy_thread.py rename to ext2/future/moves/_dummy_thread.py diff --git a/ext/future/moves/_markupbase.py b/ext2/future/moves/_markupbase.py similarity index 100% rename from ext/future/moves/_markupbase.py rename to ext2/future/moves/_markupbase.py diff --git a/ext/future/moves/_thread.py b/ext2/future/moves/_thread.py similarity index 100% rename from ext/future/moves/_thread.py rename to ext2/future/moves/_thread.py diff --git a/ext/future/moves/builtins.py b/ext2/future/moves/builtins.py similarity index 100% rename from ext/future/moves/builtins.py rename to ext2/future/moves/builtins.py diff --git a/ext/future/moves/collections.py b/ext2/future/moves/collections.py similarity index 100% rename from ext/future/moves/collections.py rename to ext2/future/moves/collections.py diff --git a/ext/future/moves/configparser.py b/ext2/future/moves/configparser.py similarity index 100% rename from ext/future/moves/configparser.py rename to ext2/future/moves/configparser.py diff --git a/ext/future/moves/copyreg.py b/ext2/future/moves/copyreg.py similarity index 100% rename from ext/future/moves/copyreg.py rename to ext2/future/moves/copyreg.py diff --git a/ext/future/moves/dbm/__init__.py b/ext2/future/moves/dbm/__init__.py similarity index 100% rename from ext/future/moves/dbm/__init__.py rename to ext2/future/moves/dbm/__init__.py diff --git a/ext/future/moves/dbm/dumb.py b/ext2/future/moves/dbm/dumb.py similarity index 100% rename from ext/future/moves/dbm/dumb.py rename to ext2/future/moves/dbm/dumb.py diff --git a/ext/future/moves/dbm/gnu.py b/ext2/future/moves/dbm/gnu.py similarity index 100% rename from ext/future/moves/dbm/gnu.py rename to ext2/future/moves/dbm/gnu.py diff --git a/ext/future/moves/dbm/ndbm.py b/ext2/future/moves/dbm/ndbm.py similarity index 100% rename from ext/future/moves/dbm/ndbm.py rename to ext2/future/moves/dbm/ndbm.py diff --git a/ext/future/moves/html/__init__.py b/ext2/future/moves/html/__init__.py similarity index 100% rename from ext/future/moves/html/__init__.py rename to ext2/future/moves/html/__init__.py diff --git a/ext/future/moves/html/entities.py b/ext2/future/moves/html/entities.py similarity index 100% rename from ext/future/moves/html/entities.py rename to ext2/future/moves/html/entities.py diff --git a/ext/future/moves/html/parser.py b/ext2/future/moves/html/parser.py similarity index 100% rename from ext/future/moves/html/parser.py rename to ext2/future/moves/html/parser.py diff --git a/ext/future/moves/http/__init__.py b/ext2/future/moves/http/__init__.py similarity index 100% rename from ext/future/moves/http/__init__.py rename to ext2/future/moves/http/__init__.py diff --git a/ext/future/moves/http/client.py b/ext2/future/moves/http/client.py similarity index 100% rename from ext/future/moves/http/client.py rename to ext2/future/moves/http/client.py diff --git a/ext/future/moves/http/cookiejar.py b/ext2/future/moves/http/cookiejar.py similarity index 100% rename from ext/future/moves/http/cookiejar.py rename to ext2/future/moves/http/cookiejar.py diff --git a/ext/future/moves/http/cookies.py b/ext2/future/moves/http/cookies.py similarity index 100% rename from ext/future/moves/http/cookies.py rename to ext2/future/moves/http/cookies.py diff --git a/ext/future/moves/http/server.py b/ext2/future/moves/http/server.py similarity index 100% rename from ext/future/moves/http/server.py rename to ext2/future/moves/http/server.py diff --git a/ext/future/moves/itertools.py b/ext2/future/moves/itertools.py similarity index 100% rename from ext/future/moves/itertools.py rename to ext2/future/moves/itertools.py diff --git a/ext/future/moves/pickle.py b/ext2/future/moves/pickle.py similarity index 100% rename from ext/future/moves/pickle.py rename to ext2/future/moves/pickle.py diff --git a/ext/future/moves/queue.py b/ext2/future/moves/queue.py similarity index 100% rename from ext/future/moves/queue.py rename to ext2/future/moves/queue.py diff --git a/ext/future/moves/reprlib.py b/ext2/future/moves/reprlib.py similarity index 100% rename from ext/future/moves/reprlib.py rename to ext2/future/moves/reprlib.py diff --git a/ext/future/moves/socketserver.py b/ext2/future/moves/socketserver.py similarity index 100% rename from ext/future/moves/socketserver.py rename to ext2/future/moves/socketserver.py diff --git a/ext/future/moves/subprocess.py b/ext2/future/moves/subprocess.py similarity index 100% rename from ext/future/moves/subprocess.py rename to ext2/future/moves/subprocess.py diff --git a/ext/future/moves/sys.py b/ext2/future/moves/sys.py similarity index 100% rename from ext/future/moves/sys.py rename to ext2/future/moves/sys.py diff --git a/ext/future/moves/test/__init__.py b/ext2/future/moves/test/__init__.py similarity index 100% rename from ext/future/moves/test/__init__.py rename to ext2/future/moves/test/__init__.py diff --git a/ext/future/moves/test/support.py b/ext2/future/moves/test/support.py similarity index 100% rename from ext/future/moves/test/support.py rename to ext2/future/moves/test/support.py diff --git a/ext/future/moves/tkinter/__init__.py b/ext2/future/moves/tkinter/__init__.py similarity index 100% rename from ext/future/moves/tkinter/__init__.py rename to ext2/future/moves/tkinter/__init__.py diff --git a/ext/future/moves/tkinter/colorchooser.py b/ext2/future/moves/tkinter/colorchooser.py similarity index 100% rename from ext/future/moves/tkinter/colorchooser.py rename to ext2/future/moves/tkinter/colorchooser.py diff --git a/ext/future/moves/tkinter/commondialog.py b/ext2/future/moves/tkinter/commondialog.py similarity index 100% rename from ext/future/moves/tkinter/commondialog.py rename to ext2/future/moves/tkinter/commondialog.py diff --git a/ext/future/moves/tkinter/constants.py b/ext2/future/moves/tkinter/constants.py similarity index 100% rename from ext/future/moves/tkinter/constants.py rename to ext2/future/moves/tkinter/constants.py diff --git a/ext/future/moves/tkinter/dialog.py b/ext2/future/moves/tkinter/dialog.py similarity index 100% rename from ext/future/moves/tkinter/dialog.py rename to ext2/future/moves/tkinter/dialog.py diff --git a/ext/future/moves/tkinter/dnd.py b/ext2/future/moves/tkinter/dnd.py similarity index 100% rename from ext/future/moves/tkinter/dnd.py rename to ext2/future/moves/tkinter/dnd.py diff --git a/ext/future/moves/tkinter/filedialog.py b/ext2/future/moves/tkinter/filedialog.py similarity index 100% rename from ext/future/moves/tkinter/filedialog.py rename to ext2/future/moves/tkinter/filedialog.py diff --git a/ext/future/moves/tkinter/font.py b/ext2/future/moves/tkinter/font.py similarity index 100% rename from ext/future/moves/tkinter/font.py rename to ext2/future/moves/tkinter/font.py diff --git a/ext/future/moves/tkinter/messagebox.py b/ext2/future/moves/tkinter/messagebox.py similarity index 100% rename from ext/future/moves/tkinter/messagebox.py rename to ext2/future/moves/tkinter/messagebox.py diff --git a/ext/future/moves/tkinter/scrolledtext.py b/ext2/future/moves/tkinter/scrolledtext.py similarity index 100% rename from ext/future/moves/tkinter/scrolledtext.py rename to ext2/future/moves/tkinter/scrolledtext.py diff --git a/ext/future/moves/tkinter/simpledialog.py b/ext2/future/moves/tkinter/simpledialog.py similarity index 100% rename from ext/future/moves/tkinter/simpledialog.py rename to ext2/future/moves/tkinter/simpledialog.py diff --git a/ext/future/moves/tkinter/tix.py b/ext2/future/moves/tkinter/tix.py similarity index 100% rename from ext/future/moves/tkinter/tix.py rename to ext2/future/moves/tkinter/tix.py diff --git a/ext/future/moves/tkinter/ttk.py b/ext2/future/moves/tkinter/ttk.py similarity index 100% rename from ext/future/moves/tkinter/ttk.py rename to ext2/future/moves/tkinter/ttk.py diff --git a/ext/future/moves/urllib/__init__.py b/ext2/future/moves/urllib/__init__.py similarity index 100% rename from ext/future/moves/urllib/__init__.py rename to ext2/future/moves/urllib/__init__.py diff --git a/ext/future/moves/urllib/error.py b/ext2/future/moves/urllib/error.py similarity index 100% rename from ext/future/moves/urllib/error.py rename to ext2/future/moves/urllib/error.py diff --git a/ext/future/moves/urllib/parse.py b/ext2/future/moves/urllib/parse.py similarity index 100% rename from ext/future/moves/urllib/parse.py rename to ext2/future/moves/urllib/parse.py diff --git a/ext/future/moves/urllib/request.py b/ext2/future/moves/urllib/request.py similarity index 100% rename from ext/future/moves/urllib/request.py rename to ext2/future/moves/urllib/request.py diff --git a/ext/future/moves/urllib/response.py b/ext2/future/moves/urllib/response.py similarity index 100% rename from ext/future/moves/urllib/response.py rename to ext2/future/moves/urllib/response.py diff --git a/ext/future/moves/urllib/robotparser.py b/ext2/future/moves/urllib/robotparser.py similarity index 100% rename from ext/future/moves/urllib/robotparser.py rename to ext2/future/moves/urllib/robotparser.py diff --git a/ext/future/moves/winreg.py b/ext2/future/moves/winreg.py similarity index 100% rename from ext/future/moves/winreg.py rename to ext2/future/moves/winreg.py diff --git a/ext/future/moves/xmlrpc/__init__.py b/ext2/future/moves/xmlrpc/__init__.py similarity index 100% rename from ext/future/moves/xmlrpc/__init__.py rename to ext2/future/moves/xmlrpc/__init__.py diff --git a/ext/future/moves/xmlrpc/client.py b/ext2/future/moves/xmlrpc/client.py similarity index 100% rename from ext/future/moves/xmlrpc/client.py rename to ext2/future/moves/xmlrpc/client.py diff --git a/ext/future/moves/xmlrpc/server.py b/ext2/future/moves/xmlrpc/server.py similarity index 100% rename from ext/future/moves/xmlrpc/server.py rename to ext2/future/moves/xmlrpc/server.py diff --git a/ext/future/standard_library/__init__.py b/ext2/future/standard_library/__init__.py similarity index 100% rename from ext/future/standard_library/__init__.py rename to ext2/future/standard_library/__init__.py diff --git a/ext/future/tests/__init__.py b/ext2/future/tests/__init__.py similarity index 100% rename from ext/future/tests/__init__.py rename to ext2/future/tests/__init__.py diff --git a/ext/future/tests/base.py b/ext2/future/tests/base.py similarity index 100% rename from ext/future/tests/base.py rename to ext2/future/tests/base.py diff --git a/ext/future/types/__init__.py b/ext2/future/types/__init__.py similarity index 100% rename from ext/future/types/__init__.py rename to ext2/future/types/__init__.py diff --git a/ext/future/types/newbytes.py b/ext2/future/types/newbytes.py similarity index 100% rename from ext/future/types/newbytes.py rename to ext2/future/types/newbytes.py diff --git a/ext/future/types/newdict.py b/ext2/future/types/newdict.py similarity index 100% rename from ext/future/types/newdict.py rename to ext2/future/types/newdict.py diff --git a/ext/future/types/newint.py b/ext2/future/types/newint.py similarity index 100% rename from ext/future/types/newint.py rename to ext2/future/types/newint.py diff --git a/ext/future/types/newlist.py b/ext2/future/types/newlist.py similarity index 100% rename from ext/future/types/newlist.py rename to ext2/future/types/newlist.py diff --git a/ext/future/types/newmemoryview.py b/ext2/future/types/newmemoryview.py similarity index 100% rename from ext/future/types/newmemoryview.py rename to ext2/future/types/newmemoryview.py diff --git a/ext/future/types/newobject.py b/ext2/future/types/newobject.py similarity index 100% rename from ext/future/types/newobject.py rename to ext2/future/types/newobject.py diff --git a/ext/future/types/newopen.py b/ext2/future/types/newopen.py similarity index 100% rename from ext/future/types/newopen.py rename to ext2/future/types/newopen.py diff --git a/ext/future/types/newrange.py b/ext2/future/types/newrange.py similarity index 100% rename from ext/future/types/newrange.py rename to ext2/future/types/newrange.py diff --git a/ext/future/types/newstr.py b/ext2/future/types/newstr.py similarity index 100% rename from ext/future/types/newstr.py rename to ext2/future/types/newstr.py diff --git a/ext/future/utils/__init__.py b/ext2/future/utils/__init__.py similarity index 100% rename from ext/future/utils/__init__.py rename to ext2/future/utils/__init__.py diff --git a/ext/future/utils/surrogateescape.py b/ext2/future/utils/surrogateescape.py similarity index 100% rename from ext/future/utils/surrogateescape.py rename to ext2/future/utils/surrogateescape.py diff --git a/ext/html/__init__.py b/ext2/html/__init__.py similarity index 100% rename from ext/html/__init__.py rename to ext2/html/__init__.py diff --git a/ext/html/entities.py b/ext2/html/entities.py similarity index 100% rename from ext/html/entities.py rename to ext2/html/entities.py diff --git a/ext/html/parser.py b/ext2/html/parser.py similarity index 100% rename from ext/html/parser.py rename to ext2/html/parser.py diff --git a/ext/http/__init__.py b/ext2/http/__init__.py similarity index 100% rename from ext/http/__init__.py rename to ext2/http/__init__.py diff --git a/ext/http/client.py b/ext2/http/client.py similarity index 100% rename from ext/http/client.py rename to ext2/http/client.py diff --git a/ext/http/cookiejar.py b/ext2/http/cookiejar.py similarity index 100% rename from ext/http/cookiejar.py rename to ext2/http/cookiejar.py diff --git a/ext/http/cookies.py b/ext2/http/cookies.py similarity index 100% rename from ext/http/cookies.py rename to ext2/http/cookies.py diff --git a/ext/http/server.py b/ext2/http/server.py similarity index 100% rename from ext/http/server.py rename to ext2/http/server.py diff --git a/ext/libfuturize/__init__.py b/ext2/libfuturize/__init__.py similarity index 100% rename from ext/libfuturize/__init__.py rename to ext2/libfuturize/__init__.py diff --git a/ext/libfuturize/fixer_util.py b/ext2/libfuturize/fixer_util.py similarity index 100% rename from ext/libfuturize/fixer_util.py rename to ext2/libfuturize/fixer_util.py diff --git a/ext/libfuturize/fixes/__init__.py b/ext2/libfuturize/fixes/__init__.py similarity index 100% rename from ext/libfuturize/fixes/__init__.py rename to ext2/libfuturize/fixes/__init__.py diff --git a/ext/libfuturize/fixes/fix_UserDict.py b/ext2/libfuturize/fixes/fix_UserDict.py similarity index 100% rename from ext/libfuturize/fixes/fix_UserDict.py rename to ext2/libfuturize/fixes/fix_UserDict.py diff --git a/ext/libfuturize/fixes/fix_absolute_import.py b/ext2/libfuturize/fixes/fix_absolute_import.py similarity index 100% rename from ext/libfuturize/fixes/fix_absolute_import.py rename to ext2/libfuturize/fixes/fix_absolute_import.py diff --git a/ext/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py b/ext2/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py similarity index 100% rename from ext/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py rename to ext2/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py diff --git a/ext/libfuturize/fixes/fix_basestring.py b/ext2/libfuturize/fixes/fix_basestring.py similarity index 100% rename from ext/libfuturize/fixes/fix_basestring.py rename to ext2/libfuturize/fixes/fix_basestring.py diff --git a/ext/libfuturize/fixes/fix_bytes.py b/ext2/libfuturize/fixes/fix_bytes.py similarity index 100% rename from ext/libfuturize/fixes/fix_bytes.py rename to ext2/libfuturize/fixes/fix_bytes.py diff --git a/ext/libfuturize/fixes/fix_cmp.py b/ext2/libfuturize/fixes/fix_cmp.py similarity index 100% rename from ext/libfuturize/fixes/fix_cmp.py rename to ext2/libfuturize/fixes/fix_cmp.py diff --git a/ext/libfuturize/fixes/fix_division.py b/ext2/libfuturize/fixes/fix_division.py similarity index 100% rename from ext/libfuturize/fixes/fix_division.py rename to ext2/libfuturize/fixes/fix_division.py diff --git a/ext/libfuturize/fixes/fix_division_safe.py b/ext2/libfuturize/fixes/fix_division_safe.py similarity index 100% rename from ext/libfuturize/fixes/fix_division_safe.py rename to ext2/libfuturize/fixes/fix_division_safe.py diff --git a/ext/libfuturize/fixes/fix_execfile.py b/ext2/libfuturize/fixes/fix_execfile.py similarity index 100% rename from ext/libfuturize/fixes/fix_execfile.py rename to ext2/libfuturize/fixes/fix_execfile.py diff --git a/ext/libfuturize/fixes/fix_future_builtins.py b/ext2/libfuturize/fixes/fix_future_builtins.py similarity index 100% rename from ext/libfuturize/fixes/fix_future_builtins.py rename to ext2/libfuturize/fixes/fix_future_builtins.py diff --git a/ext/libfuturize/fixes/fix_future_standard_library.py b/ext2/libfuturize/fixes/fix_future_standard_library.py similarity index 100% rename from ext/libfuturize/fixes/fix_future_standard_library.py rename to ext2/libfuturize/fixes/fix_future_standard_library.py diff --git a/ext/libfuturize/fixes/fix_future_standard_library_urllib.py b/ext2/libfuturize/fixes/fix_future_standard_library_urllib.py similarity index 100% rename from ext/libfuturize/fixes/fix_future_standard_library_urllib.py rename to ext2/libfuturize/fixes/fix_future_standard_library_urllib.py diff --git a/ext/libfuturize/fixes/fix_metaclass.py b/ext2/libfuturize/fixes/fix_metaclass.py similarity index 100% rename from ext/libfuturize/fixes/fix_metaclass.py rename to ext2/libfuturize/fixes/fix_metaclass.py diff --git a/ext/libfuturize/fixes/fix_next_call.py b/ext2/libfuturize/fixes/fix_next_call.py similarity index 100% rename from ext/libfuturize/fixes/fix_next_call.py rename to ext2/libfuturize/fixes/fix_next_call.py diff --git a/ext/libfuturize/fixes/fix_object.py b/ext2/libfuturize/fixes/fix_object.py similarity index 100% rename from ext/libfuturize/fixes/fix_object.py rename to ext2/libfuturize/fixes/fix_object.py diff --git a/ext/libfuturize/fixes/fix_oldstr_wrap.py b/ext2/libfuturize/fixes/fix_oldstr_wrap.py similarity index 100% rename from ext/libfuturize/fixes/fix_oldstr_wrap.py rename to ext2/libfuturize/fixes/fix_oldstr_wrap.py diff --git a/ext/libfuturize/fixes/fix_order___future__imports.py b/ext2/libfuturize/fixes/fix_order___future__imports.py similarity index 100% rename from ext/libfuturize/fixes/fix_order___future__imports.py rename to ext2/libfuturize/fixes/fix_order___future__imports.py diff --git a/ext/libfuturize/fixes/fix_print.py b/ext2/libfuturize/fixes/fix_print.py similarity index 100% rename from ext/libfuturize/fixes/fix_print.py rename to ext2/libfuturize/fixes/fix_print.py diff --git a/ext/libfuturize/fixes/fix_print_with_import.py b/ext2/libfuturize/fixes/fix_print_with_import.py similarity index 100% rename from ext/libfuturize/fixes/fix_print_with_import.py rename to ext2/libfuturize/fixes/fix_print_with_import.py diff --git a/ext/libfuturize/fixes/fix_raise.py b/ext2/libfuturize/fixes/fix_raise.py similarity index 100% rename from ext/libfuturize/fixes/fix_raise.py rename to ext2/libfuturize/fixes/fix_raise.py diff --git a/ext/libfuturize/fixes/fix_remove_old__future__imports.py b/ext2/libfuturize/fixes/fix_remove_old__future__imports.py similarity index 100% rename from ext/libfuturize/fixes/fix_remove_old__future__imports.py rename to ext2/libfuturize/fixes/fix_remove_old__future__imports.py diff --git a/ext/libfuturize/fixes/fix_unicode_keep_u.py b/ext2/libfuturize/fixes/fix_unicode_keep_u.py similarity index 100% rename from ext/libfuturize/fixes/fix_unicode_keep_u.py rename to ext2/libfuturize/fixes/fix_unicode_keep_u.py diff --git a/ext/libfuturize/fixes/fix_unicode_literals_import.py b/ext2/libfuturize/fixes/fix_unicode_literals_import.py similarity index 100% rename from ext/libfuturize/fixes/fix_unicode_literals_import.py rename to ext2/libfuturize/fixes/fix_unicode_literals_import.py diff --git a/ext/libfuturize/fixes/fix_xrange_with_import.py b/ext2/libfuturize/fixes/fix_xrange_with_import.py similarity index 100% rename from ext/libfuturize/fixes/fix_xrange_with_import.py rename to ext2/libfuturize/fixes/fix_xrange_with_import.py diff --git a/ext/libfuturize/main.py b/ext2/libfuturize/main.py similarity index 100% rename from ext/libfuturize/main.py rename to ext2/libfuturize/main.py diff --git a/ext/libpasteurize/__init__.py b/ext2/libpasteurize/__init__.py similarity index 100% rename from ext/libpasteurize/__init__.py rename to ext2/libpasteurize/__init__.py diff --git a/ext/libpasteurize/fixes/__init__.py b/ext2/libpasteurize/fixes/__init__.py similarity index 100% rename from ext/libpasteurize/fixes/__init__.py rename to ext2/libpasteurize/fixes/__init__.py diff --git a/ext/libpasteurize/fixes/feature_base.py b/ext2/libpasteurize/fixes/feature_base.py similarity index 100% rename from ext/libpasteurize/fixes/feature_base.py rename to ext2/libpasteurize/fixes/feature_base.py diff --git a/ext/libpasteurize/fixes/fix_add_all__future__imports.py b/ext2/libpasteurize/fixes/fix_add_all__future__imports.py similarity index 100% rename from ext/libpasteurize/fixes/fix_add_all__future__imports.py rename to ext2/libpasteurize/fixes/fix_add_all__future__imports.py diff --git a/ext/libpasteurize/fixes/fix_add_all_future_builtins.py b/ext2/libpasteurize/fixes/fix_add_all_future_builtins.py similarity index 100% rename from ext/libpasteurize/fixes/fix_add_all_future_builtins.py rename to ext2/libpasteurize/fixes/fix_add_all_future_builtins.py diff --git a/ext/libpasteurize/fixes/fix_add_future_standard_library_import.py b/ext2/libpasteurize/fixes/fix_add_future_standard_library_import.py similarity index 100% rename from ext/libpasteurize/fixes/fix_add_future_standard_library_import.py rename to ext2/libpasteurize/fixes/fix_add_future_standard_library_import.py diff --git a/ext/libpasteurize/fixes/fix_annotations.py b/ext2/libpasteurize/fixes/fix_annotations.py similarity index 100% rename from ext/libpasteurize/fixes/fix_annotations.py rename to ext2/libpasteurize/fixes/fix_annotations.py diff --git a/ext/libpasteurize/fixes/fix_division.py b/ext2/libpasteurize/fixes/fix_division.py similarity index 100% rename from ext/libpasteurize/fixes/fix_division.py rename to ext2/libpasteurize/fixes/fix_division.py diff --git a/ext/libpasteurize/fixes/fix_features.py b/ext2/libpasteurize/fixes/fix_features.py similarity index 100% rename from ext/libpasteurize/fixes/fix_features.py rename to ext2/libpasteurize/fixes/fix_features.py diff --git a/ext/libpasteurize/fixes/fix_fullargspec.py b/ext2/libpasteurize/fixes/fix_fullargspec.py similarity index 100% rename from ext/libpasteurize/fixes/fix_fullargspec.py rename to ext2/libpasteurize/fixes/fix_fullargspec.py diff --git a/ext/libpasteurize/fixes/fix_future_builtins.py b/ext2/libpasteurize/fixes/fix_future_builtins.py similarity index 100% rename from ext/libpasteurize/fixes/fix_future_builtins.py rename to ext2/libpasteurize/fixes/fix_future_builtins.py diff --git a/ext/libpasteurize/fixes/fix_getcwd.py b/ext2/libpasteurize/fixes/fix_getcwd.py similarity index 100% rename from ext/libpasteurize/fixes/fix_getcwd.py rename to ext2/libpasteurize/fixes/fix_getcwd.py diff --git a/ext/libpasteurize/fixes/fix_imports.py b/ext2/libpasteurize/fixes/fix_imports.py similarity index 100% rename from ext/libpasteurize/fixes/fix_imports.py rename to ext2/libpasteurize/fixes/fix_imports.py diff --git a/ext/libpasteurize/fixes/fix_imports2.py b/ext2/libpasteurize/fixes/fix_imports2.py similarity index 100% rename from ext/libpasteurize/fixes/fix_imports2.py rename to ext2/libpasteurize/fixes/fix_imports2.py diff --git a/ext/libpasteurize/fixes/fix_kwargs.py b/ext2/libpasteurize/fixes/fix_kwargs.py similarity index 100% rename from ext/libpasteurize/fixes/fix_kwargs.py rename to ext2/libpasteurize/fixes/fix_kwargs.py diff --git a/ext/libpasteurize/fixes/fix_memoryview.py b/ext2/libpasteurize/fixes/fix_memoryview.py similarity index 100% rename from ext/libpasteurize/fixes/fix_memoryview.py rename to ext2/libpasteurize/fixes/fix_memoryview.py diff --git a/ext/libpasteurize/fixes/fix_metaclass.py b/ext2/libpasteurize/fixes/fix_metaclass.py similarity index 100% rename from ext/libpasteurize/fixes/fix_metaclass.py rename to ext2/libpasteurize/fixes/fix_metaclass.py diff --git a/ext/libpasteurize/fixes/fix_newstyle.py b/ext2/libpasteurize/fixes/fix_newstyle.py similarity index 100% rename from ext/libpasteurize/fixes/fix_newstyle.py rename to ext2/libpasteurize/fixes/fix_newstyle.py diff --git a/ext/libpasteurize/fixes/fix_next.py b/ext2/libpasteurize/fixes/fix_next.py similarity index 100% rename from ext/libpasteurize/fixes/fix_next.py rename to ext2/libpasteurize/fixes/fix_next.py diff --git a/ext/libpasteurize/fixes/fix_printfunction.py b/ext2/libpasteurize/fixes/fix_printfunction.py similarity index 100% rename from ext/libpasteurize/fixes/fix_printfunction.py rename to ext2/libpasteurize/fixes/fix_printfunction.py diff --git a/ext/libpasteurize/fixes/fix_raise.py b/ext2/libpasteurize/fixes/fix_raise.py similarity index 100% rename from ext/libpasteurize/fixes/fix_raise.py rename to ext2/libpasteurize/fixes/fix_raise.py diff --git a/ext/libpasteurize/fixes/fix_raise_.py b/ext2/libpasteurize/fixes/fix_raise_.py similarity index 100% rename from ext/libpasteurize/fixes/fix_raise_.py rename to ext2/libpasteurize/fixes/fix_raise_.py diff --git a/ext/libpasteurize/fixes/fix_throw.py b/ext2/libpasteurize/fixes/fix_throw.py similarity index 100% rename from ext/libpasteurize/fixes/fix_throw.py rename to ext2/libpasteurize/fixes/fix_throw.py diff --git a/ext/libpasteurize/fixes/fix_unpacking.py b/ext2/libpasteurize/fixes/fix_unpacking.py similarity index 100% rename from ext/libpasteurize/fixes/fix_unpacking.py rename to ext2/libpasteurize/fixes/fix_unpacking.py diff --git a/ext/libpasteurize/main.py b/ext2/libpasteurize/main.py similarity index 100% rename from ext/libpasteurize/main.py rename to ext2/libpasteurize/main.py diff --git a/ext/past/__init__.py b/ext2/past/__init__.py similarity index 100% rename from ext/past/__init__.py rename to ext2/past/__init__.py diff --git a/ext/past/builtins/__init__.py b/ext2/past/builtins/__init__.py similarity index 100% rename from ext/past/builtins/__init__.py rename to ext2/past/builtins/__init__.py diff --git a/ext/past/builtins/misc.py b/ext2/past/builtins/misc.py similarity index 100% rename from ext/past/builtins/misc.py rename to ext2/past/builtins/misc.py diff --git a/ext/past/builtins/noniterators.py b/ext2/past/builtins/noniterators.py similarity index 100% rename from ext/past/builtins/noniterators.py rename to ext2/past/builtins/noniterators.py diff --git a/ext/past/tests/__init__.py b/ext2/past/tests/__init__.py similarity index 100% rename from ext/past/tests/__init__.py rename to ext2/past/tests/__init__.py diff --git a/ext/past/translation/__init__.py b/ext2/past/translation/__init__.py similarity index 100% rename from ext/past/translation/__init__.py rename to ext2/past/translation/__init__.py diff --git a/ext/past/types/__init__.py b/ext2/past/types/__init__.py similarity index 100% rename from ext/past/types/__init__.py rename to ext2/past/types/__init__.py diff --git a/ext/past/types/basestring.py b/ext2/past/types/basestring.py similarity index 100% rename from ext/past/types/basestring.py rename to ext2/past/types/basestring.py diff --git a/ext/past/types/olddict.py b/ext2/past/types/olddict.py similarity index 100% rename from ext/past/types/olddict.py rename to ext2/past/types/olddict.py diff --git a/ext/past/types/oldstr.py b/ext2/past/types/oldstr.py similarity index 100% rename from ext/past/types/oldstr.py rename to ext2/past/types/oldstr.py diff --git a/ext/past/utils/__init__.py b/ext2/past/utils/__init__.py similarity index 100% rename from ext/past/utils/__init__.py rename to ext2/past/utils/__init__.py diff --git a/ext/queue/__init__.py b/ext2/queue/__init__.py similarity index 100% rename from ext/queue/__init__.py rename to ext2/queue/__init__.py diff --git a/ext/reprlib/__init__.py b/ext2/reprlib/__init__.py similarity index 100% rename from ext/reprlib/__init__.py rename to ext2/reprlib/__init__.py diff --git a/ext/socketserver/__init__.py b/ext2/socketserver/__init__.py similarity index 100% rename from ext/socketserver/__init__.py rename to ext2/socketserver/__init__.py diff --git a/ext/tkinter/__init__.py b/ext2/tkinter/__init__.py similarity index 100% rename from ext/tkinter/__init__.py rename to ext2/tkinter/__init__.py diff --git a/ext/tkinter/colorchooser.py b/ext2/tkinter/colorchooser.py similarity index 100% rename from ext/tkinter/colorchooser.py rename to ext2/tkinter/colorchooser.py diff --git a/ext/tkinter/commondialog.py b/ext2/tkinter/commondialog.py similarity index 100% rename from ext/tkinter/commondialog.py rename to ext2/tkinter/commondialog.py diff --git a/ext/tkinter/constants.py b/ext2/tkinter/constants.py similarity index 100% rename from ext/tkinter/constants.py rename to ext2/tkinter/constants.py diff --git a/ext/tkinter/dialog.py b/ext2/tkinter/dialog.py similarity index 100% rename from ext/tkinter/dialog.py rename to ext2/tkinter/dialog.py diff --git a/ext/tkinter/dnd.py b/ext2/tkinter/dnd.py similarity index 100% rename from ext/tkinter/dnd.py rename to ext2/tkinter/dnd.py diff --git a/ext/tkinter/filedialog.py b/ext2/tkinter/filedialog.py similarity index 100% rename from ext/tkinter/filedialog.py rename to ext2/tkinter/filedialog.py diff --git a/ext/tkinter/font.py b/ext2/tkinter/font.py similarity index 100% rename from ext/tkinter/font.py rename to ext2/tkinter/font.py diff --git a/ext/tkinter/messagebox.py b/ext2/tkinter/messagebox.py similarity index 100% rename from ext/tkinter/messagebox.py rename to ext2/tkinter/messagebox.py diff --git a/ext/tkinter/scrolledtext.py b/ext2/tkinter/scrolledtext.py similarity index 100% rename from ext/tkinter/scrolledtext.py rename to ext2/tkinter/scrolledtext.py diff --git a/ext/tkinter/simpledialog.py b/ext2/tkinter/simpledialog.py similarity index 100% rename from ext/tkinter/simpledialog.py rename to ext2/tkinter/simpledialog.py diff --git a/ext/tkinter/tix.py b/ext2/tkinter/tix.py similarity index 100% rename from ext/tkinter/tix.py rename to ext2/tkinter/tix.py diff --git a/ext/tkinter/ttk.py b/ext2/tkinter/ttk.py similarity index 100% rename from ext/tkinter/ttk.py rename to ext2/tkinter/ttk.py diff --git a/ext/winreg/__init__.py b/ext2/winreg/__init__.py similarity index 100% rename from ext/winreg/__init__.py rename to ext2/winreg/__init__.py diff --git a/ext/xmlrpc/__init__.py b/ext2/xmlrpc/__init__.py similarity index 100% rename from ext/xmlrpc/__init__.py rename to ext2/xmlrpc/__init__.py diff --git a/ext/xmlrpc/client.py b/ext2/xmlrpc/client.py similarity index 100% rename from ext/xmlrpc/client.py rename to ext2/xmlrpc/client.py diff --git a/ext/xmlrpc/server.py b/ext2/xmlrpc/server.py similarity index 100% rename from ext/xmlrpc/server.py rename to ext2/xmlrpc/server.py diff --git a/medusa/server/api/v1/core.py b/medusa/server/api/v1/core.py index 233f587075..65375fa12b 100644 --- a/medusa/server/api/v1/core.py +++ b/medusa/server/api/v1/core.py @@ -25,12 +25,11 @@ import json import logging import os +import sys import time from collections import OrderedDict from datetime import date, datetime -from future import standard_library - from medusa import ( app, classes, db, helpers, image_cache, network_timezones, process_tv, sbdatetime, subtitles, ui, @@ -69,7 +68,9 @@ from tornado.web import RequestHandler -standard_library.install_aliases() +if sys.version_info[0] == 2: + from future import standard_library + standard_library.install_aliases() log = BraceAdapter(logging.getLogger(__name__)) log.logger.addHandler(logging.NullHandler()) From ed0389ff578051b46034d853b1bb0970e46c7692 Mon Sep 17 00:00:00 2001 From: Diego Date: Mon, 27 Aug 2018 18:17:52 +0200 Subject: [PATCH 3/8] [Python3] Add ext3/bs4 package --- {ext => ext2}/bs4/__init__.py | 0 {ext => ext2}/bs4/builder/__init__.py | 0 {ext => ext2}/bs4/builder/_html5lib.py | 0 {ext => ext2}/bs4/builder/_htmlparser.py | 0 {ext => ext2}/bs4/builder/_lxml.py | 0 {ext => ext2}/bs4/dammit.py | 0 {ext => ext2}/bs4/diagnose.py | 0 {ext => ext2}/bs4/element.py | 0 {ext => ext2}/bs4/testing.py | 0 {ext => ext2}/bs4/tests/__init__.py | 0 .../bs4/tests/test_builder_registry.py | 0 {ext => ext2}/bs4/tests/test_docs.py | 0 {ext => ext2}/bs4/tests/test_html5lib.py | 0 {ext => ext2}/bs4/tests/test_htmlparser.py | 0 {ext => ext2}/bs4/tests/test_lxml.py | 0 {ext => ext2}/bs4/tests/test_soup.py | 0 {ext => ext2}/bs4/tests/test_tree.py | 0 ext3/bs4/__init__.py | 584 +++++ ext3/bs4/builder/__init__.py | 339 +++ ext3/bs4/builder/_html5lib.py | 426 ++++ ext3/bs4/builder/_htmlparser.py | 347 +++ ext3/bs4/builder/_lxml.py | 262 +++ ext3/bs4/dammit.py | 842 +++++++ ext3/bs4/diagnose.py | 225 ++ ext3/bs4/element.py | 1885 +++++++++++++++ ext3/bs4/testing.py | 810 +++++++ ext3/bs4/tests/__init__.py | 1 + ext3/bs4/tests/test_builder_registry.py | 147 ++ ext3/bs4/tests/test_docs.py | 36 + ext3/bs4/tests/test_html5lib.py | 130 + ext3/bs4/tests/test_htmlparser.py | 49 + ext3/bs4/tests/test_lxml.py | 82 + ext3/bs4/tests/test_soup.py | 501 ++++ ext3/bs4/tests/test_tree.py | 2090 +++++++++++++++++ 34 files changed, 8756 insertions(+) rename {ext => ext2}/bs4/__init__.py (100%) rename {ext => ext2}/bs4/builder/__init__.py (100%) rename {ext => ext2}/bs4/builder/_html5lib.py (100%) rename {ext => ext2}/bs4/builder/_htmlparser.py (100%) rename {ext => ext2}/bs4/builder/_lxml.py (100%) rename {ext => ext2}/bs4/dammit.py (100%) rename {ext => ext2}/bs4/diagnose.py (100%) rename {ext => ext2}/bs4/element.py (100%) rename {ext => ext2}/bs4/testing.py (100%) rename {ext => ext2}/bs4/tests/__init__.py (100%) rename {ext => ext2}/bs4/tests/test_builder_registry.py (100%) rename {ext => ext2}/bs4/tests/test_docs.py (100%) rename {ext => ext2}/bs4/tests/test_html5lib.py (100%) rename {ext => ext2}/bs4/tests/test_htmlparser.py (100%) rename {ext => ext2}/bs4/tests/test_lxml.py (100%) rename {ext => ext2}/bs4/tests/test_soup.py (100%) rename {ext => ext2}/bs4/tests/test_tree.py (100%) create mode 100644 ext3/bs4/__init__.py create mode 100644 ext3/bs4/builder/__init__.py create mode 100644 ext3/bs4/builder/_html5lib.py create mode 100644 ext3/bs4/builder/_htmlparser.py create mode 100644 ext3/bs4/builder/_lxml.py create mode 100644 ext3/bs4/dammit.py create mode 100644 ext3/bs4/diagnose.py create mode 100644 ext3/bs4/element.py create mode 100644 ext3/bs4/testing.py create mode 100644 ext3/bs4/tests/__init__.py create mode 100644 ext3/bs4/tests/test_builder_registry.py create mode 100644 ext3/bs4/tests/test_docs.py create mode 100644 ext3/bs4/tests/test_html5lib.py create mode 100644 ext3/bs4/tests/test_htmlparser.py create mode 100644 ext3/bs4/tests/test_lxml.py create mode 100644 ext3/bs4/tests/test_soup.py create mode 100644 ext3/bs4/tests/test_tree.py diff --git a/ext/bs4/__init__.py b/ext2/bs4/__init__.py similarity index 100% rename from ext/bs4/__init__.py rename to ext2/bs4/__init__.py diff --git a/ext/bs4/builder/__init__.py b/ext2/bs4/builder/__init__.py similarity index 100% rename from ext/bs4/builder/__init__.py rename to ext2/bs4/builder/__init__.py diff --git a/ext/bs4/builder/_html5lib.py b/ext2/bs4/builder/_html5lib.py similarity index 100% rename from ext/bs4/builder/_html5lib.py rename to ext2/bs4/builder/_html5lib.py diff --git a/ext/bs4/builder/_htmlparser.py b/ext2/bs4/builder/_htmlparser.py similarity index 100% rename from ext/bs4/builder/_htmlparser.py rename to ext2/bs4/builder/_htmlparser.py diff --git a/ext/bs4/builder/_lxml.py b/ext2/bs4/builder/_lxml.py similarity index 100% rename from ext/bs4/builder/_lxml.py rename to ext2/bs4/builder/_lxml.py diff --git a/ext/bs4/dammit.py b/ext2/bs4/dammit.py similarity index 100% rename from ext/bs4/dammit.py rename to ext2/bs4/dammit.py diff --git a/ext/bs4/diagnose.py b/ext2/bs4/diagnose.py similarity index 100% rename from ext/bs4/diagnose.py rename to ext2/bs4/diagnose.py diff --git a/ext/bs4/element.py b/ext2/bs4/element.py similarity index 100% rename from ext/bs4/element.py rename to ext2/bs4/element.py diff --git a/ext/bs4/testing.py b/ext2/bs4/testing.py similarity index 100% rename from ext/bs4/testing.py rename to ext2/bs4/testing.py diff --git a/ext/bs4/tests/__init__.py b/ext2/bs4/tests/__init__.py similarity index 100% rename from ext/bs4/tests/__init__.py rename to ext2/bs4/tests/__init__.py diff --git a/ext/bs4/tests/test_builder_registry.py b/ext2/bs4/tests/test_builder_registry.py similarity index 100% rename from ext/bs4/tests/test_builder_registry.py rename to ext2/bs4/tests/test_builder_registry.py diff --git a/ext/bs4/tests/test_docs.py b/ext2/bs4/tests/test_docs.py similarity index 100% rename from ext/bs4/tests/test_docs.py rename to ext2/bs4/tests/test_docs.py diff --git a/ext/bs4/tests/test_html5lib.py b/ext2/bs4/tests/test_html5lib.py similarity index 100% rename from ext/bs4/tests/test_html5lib.py rename to ext2/bs4/tests/test_html5lib.py diff --git a/ext/bs4/tests/test_htmlparser.py b/ext2/bs4/tests/test_htmlparser.py similarity index 100% rename from ext/bs4/tests/test_htmlparser.py rename to ext2/bs4/tests/test_htmlparser.py diff --git a/ext/bs4/tests/test_lxml.py b/ext2/bs4/tests/test_lxml.py similarity index 100% rename from ext/bs4/tests/test_lxml.py rename to ext2/bs4/tests/test_lxml.py diff --git a/ext/bs4/tests/test_soup.py b/ext2/bs4/tests/test_soup.py similarity index 100% rename from ext/bs4/tests/test_soup.py rename to ext2/bs4/tests/test_soup.py diff --git a/ext/bs4/tests/test_tree.py b/ext2/bs4/tests/test_tree.py similarity index 100% rename from ext/bs4/tests/test_tree.py rename to ext2/bs4/tests/test_tree.py diff --git a/ext3/bs4/__init__.py b/ext3/bs4/__init__.py new file mode 100644 index 0000000000..797a6826eb --- /dev/null +++ b/ext3/bs4/__init__.py @@ -0,0 +1,584 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup uses a pluggable XML or HTML parser to parse a +(possibly invalid) document into a tree representation. Beautiful Soup +provides methods and Pythonic idioms that make it easy to navigate, +search, and modify the parse tree. + +Beautiful Soup works with Python 2.7 and up. It works better if lxml +and/or html5lib is installed. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/bs4/doc/ + +""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "4.6.3" +__copyright__ = "Copyright (c) 2004-2018 Leonard Richardson" +__license__ = "MIT" + +__all__ = ['BeautifulSoup'] + +import os +import re +import sys +import traceback +import warnings + +from .builder import builder_registry, ParserRejectedMarkup +from .dammit import UnicodeDammit +from .element import ( + CData, + Comment, + DEFAULT_OUTPUT_ENCODING, + Declaration, + Doctype, + NavigableString, + PageElement, + ProcessingInstruction, + ResultSet, + SoupStrainer, + Tag, + ) + +# The very first thing we do is give a useful error if someone is +# running this code under Python 3 without converting it. +'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' + +class BeautifulSoup(Tag): + """ + This class defines the basic interface called by the tree builders. + + These methods will be called by the parser: + reset() + feed(markup) + + The tree builder may call these methods from its feed() implementation: + handle_starttag(name, attrs) # See note about return value + handle_endtag(name) + handle_data(data) # Appends to the current data node + endData(containerClass=NavigableString) # Ends the current data node + + No matter how complicated the underlying parser is, you should be + able to build a tree using 'start tag' events, 'end tag' events, + 'data' events, and "done with data" events. + + If you encounter an empty-element tag (aka a self-closing tag, + like HTML's
tag), call handle_starttag and then + handle_endtag. + """ + ROOT_TAG_NAME = '[document]' + + # If the end-user gives no indication which tree builder they + # want, look for one with these features. + DEFAULT_BUILDER_FEATURES = ['html', 'fast'] + + ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' + + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" + + def __init__(self, markup="", features=None, builder=None, + parse_only=None, from_encoding=None, exclude_encodings=None, + **kwargs): + """Constructor. + + :param markup: A string or a file-like object representing + markup to be parsed. + + :param features: Desirable features of the parser to be used. This + may be the name of a specific parser ("lxml", "lxml-xml", + "html.parser", or "html5lib") or it may be the type of markup + to be used ("html", "html5", "xml"). It's recommended that you + name a specific parser, so that Beautiful Soup gives you the + same results across platforms and virtual environments. + + :param builder: A specific TreeBuilder to use instead of looking one + up based on `features`. You shouldn't need to use this. + + :param parse_only: A SoupStrainer. Only parts of the document + matching the SoupStrainer will be considered. This is useful + when parsing part of a document that would otherwise be too + large to fit into memory. + + :param from_encoding: A string indicating the encoding of the + document to be parsed. Pass this in if Beautiful Soup is + guessing wrongly about the document's encoding. + + :param exclude_encodings: A list of strings indicating + encodings known to be wrong. Pass this in if you don't know + the document's encoding but you know Beautiful Soup's guess is + wrong. + + :param kwargs: For backwards compatibility purposes, the + constructor accepts certain keyword arguments used in + Beautiful Soup 3. None of these arguments do anything in + Beautiful Soup 4 and there's no need to actually pass keyword + arguments into the constructor. + """ + + if 'convertEntities' in kwargs: + warnings.warn( + "BS4 does not respect the convertEntities argument to the " + "BeautifulSoup constructor. Entities are always converted " + "to Unicode characters.") + + if 'markupMassage' in kwargs: + del kwargs['markupMassage'] + warnings.warn( + "BS4 does not respect the markupMassage argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for any necessary markup massage.") + + if 'smartQuotesTo' in kwargs: + del kwargs['smartQuotesTo'] + warnings.warn( + "BS4 does not respect the smartQuotesTo argument to the " + "BeautifulSoup constructor. Smart quotes are always converted " + "to Unicode characters.") + + if 'selfClosingTags' in kwargs: + del kwargs['selfClosingTags'] + warnings.warn( + "BS4 does not respect the selfClosingTags argument to the " + "BeautifulSoup constructor. The tree builder is responsible " + "for understanding self-closing tags.") + + if 'isHTML' in kwargs: + del kwargs['isHTML'] + warnings.warn( + "BS4 does not respect the isHTML argument to the " + "BeautifulSoup constructor. Suggest you use " + "features='lxml' for HTML and features='lxml-xml' for " + "XML.") + + def deprecated_argument(old_name, new_name): + if old_name in kwargs: + warnings.warn( + 'The "%s" argument to the BeautifulSoup constructor ' + 'has been renamed to "%s."' % (old_name, new_name)) + value = kwargs[old_name] + del kwargs[old_name] + return value + return None + + parse_only = parse_only or deprecated_argument( + "parseOnlyThese", "parse_only") + + from_encoding = from_encoding or deprecated_argument( + "fromEncoding", "from_encoding") + + if from_encoding and isinstance(markup, str): + warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") + from_encoding = None + + if len(kwargs) > 0: + arg = list(kwargs.keys()).pop() + raise TypeError( + "__init__() got an unexpected keyword argument '%s'" % arg) + + if builder is None: + original_features = features + if isinstance(features, str): + features = [features] + if features is None or len(features) == 0: + features = self.DEFAULT_BUILDER_FEATURES + builder_class = builder_registry.lookup(*features) + if builder_class is None: + raise FeatureNotFound( + "Couldn't find a tree builder with the features you " + "requested: %s. Do you need to install a parser library?" + % ",".join(features)) + builder = builder_class() + if not (original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES): + if builder.is_xml: + markup_type = "XML" + else: + markup_type = "HTML" + + # This code adapted from warnings.py so that we get the same line + # of code as our warnings.warn() call gets, even if the answer is wrong + # (as it may be in a multithreading situation). + caller = None + try: + caller = sys._getframe(1) + except ValueError: + pass + if caller: + globals = caller.f_globals + line_number = caller.f_lineno + else: + globals = sys.__dict__ + line_number= 1 + filename = globals.get('__file__') + if filename: + fnl = filename.lower() + if fnl.endswith((".pyc", ".pyo")): + filename = filename[:-1] + if filename: + # If there is no filename at all, the user is most likely in a REPL, + # and the warning is not necessary. + values = dict( + filename=filename, + line_number=line_number, + parser=builder.NAME, + markup_type=markup_type + ) + warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2) + + self.builder = builder + self.is_xml = builder.is_xml + self.known_xml = self.is_xml + self.builder.soup = self + + self.parse_only = parse_only + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + elif len(markup) <= 256 and ( + (isinstance(markup, bytes) and not b'<' in markup) + or (isinstance(markup, str) and not '<' in markup) + ): + # Print out warnings for a couple beginner problems + # involving passing non-markup to Beautiful Soup. + # Beautiful Soup will still parse the input as markup, + # just in case that's what the user really wants. + if (isinstance(markup, str) + and not os.path.supports_unicode_filenames): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + is_file = False + try: + is_file = os.path.exists(possible_filename) + except Exception as e: + # This is almost certainly a problem involving + # characters not valid in filenames on this + # system. Just let it go. + pass + if is_file: + if isinstance(markup, str): + markup = markup.encode("utf8") + warnings.warn( + '"%s" looks like a filename, not markup. You should' + ' probably open this file and pass the filehandle into' + ' Beautiful Soup.' % markup) + self._check_markup_is_url(markup) + + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup( + markup, from_encoding, exclude_encodings=exclude_encodings)): + self.reset() + try: + self._feed() + break + except ParserRejectedMarkup: + pass + + # Clear out the markup and remove the builder's circular + # reference to this object. + self.markup = None + self.builder.soup = None + + def __copy__(self): + copy = type(self)( + self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' + ) + + # Although we encoded the tree to UTF-8, that may not have + # been the encoding of the original markup. Set the copy's + # .original_encoding to reflect the original object's + # .original_encoding. + copy.original_encoding = self.original_encoding + return copy + + def __getstate__(self): + # Frequently a tree builder can't be pickled. + d = dict(self.__dict__) + if 'builder' in d and not self.builder.picklable: + d['builder'] = None + return d + + @staticmethod + def _check_markup_is_url(markup): + """ + Check if markup looks like it's actually a url and raise a warning + if so. Markup can be unicode or str (py2) / bytes (py3). + """ + if isinstance(markup, bytes): + space = b' ' + cant_start_with = (b"http:", b"https:") + elif isinstance(markup, str): + space = ' ' + cant_start_with = ("http:", "https:") + else: + return + + if any(markup.startswith(prefix) for prefix in cant_start_with): + if not space in markup: + if isinstance(markup, bytes): + decoded_markup = markup.decode('utf-8', 'replace') + else: + decoded_markup = markup + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an' + ' HTTP client. You should probably use an HTTP client like' + ' requests to get the document behind the URL, and feed' + ' that document to Beautiful Soup.' % decoded_markup + ) + + def _feed(self): + # Convert the document to Unicode. + self.builder.reset() + + self.builder.feed(self.markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def reset(self): + Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) + self.hidden = 1 + self.builder.reset() + self.current_data = [] + self.currentTag = None + self.tagStack = [] + self.preserve_whitespace_tag_stack = [] + self.pushTag(self) + + def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, **kwattrs): + """Create a new tag associated with this soup.""" + kwattrs.update(attrs) + return Tag(None, self.builder, name, namespace, nsprefix, kwattrs) + + def new_string(self, s, subclass=NavigableString): + """Create a new NavigableString associated with this soup.""" + return subclass(s) + + def insert_before(self, successor): + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") + + def insert_after(self, successor): + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") + + def popTag(self): + tag = self.tagStack.pop() + if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: + self.preserve_whitespace_tag_stack.pop() + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + if tag.name in self.builder.preserve_whitespace_tags: + self.preserve_whitespace_tag_stack.append(tag) + + def endData(self, containerClass=NavigableString): + if self.current_data: + current_data = ''.join(self.current_data) + # If whitespace is not preserved, and this string contains + # nothing but ASCII spaces, replace it with a single space + # or newline. + if not self.preserve_whitespace_tag_stack: + strippable = True + for i in current_data: + if i not in self.ASCII_SPACES: + strippable = False + break + if strippable: + if '\n' in current_data: + current_data = '\n' + else: + current_data = ' ' + + # Reset the data collector. + self.current_data = [] + + # Should we add this string to the tree at all? + if self.parse_only and len(self.tagStack) <= 1 and \ + (not self.parse_only.text or \ + not self.parse_only.search(current_data)): + return + + o = containerClass(current_data) + self.object_was_parsed(o) + + def object_was_parsed(self, o, parent=None, most_recent_element=None): + """Add an object to the parse tree.""" + parent = parent or self.currentTag + previous_element = most_recent_element or self._most_recent_element + + next_element = previous_sibling = next_sibling = None + if isinstance(o, Tag): + next_element = o.next_element + next_sibling = o.next_sibling + previous_sibling = o.previous_sibling + if not previous_element: + previous_element = o.previous_element + + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) + + self._most_recent_element = o + parent.contents.append(o) + + if parent.next_sibling: + # This node is being inserted into an element that has + # already been parsed. Deal with any dangling references. + index = len(parent.contents)-1 + while index >= 0: + if parent.contents[index] is o: + break + index -= 1 + else: + raise ValueError( + "Error building tree: supposedly %r was inserted " + "into %r after the fact, but I don't see it!" % ( + o, parent + ) + ) + if index == 0: + previous_element = parent + previous_sibling = None + else: + previous_element = previous_sibling = parent.contents[index-1] + if index == len(parent.contents)-1: + next_element = parent.next_sibling + next_sibling = None + else: + next_element = next_sibling = parent.contents[index+1] + + o.previous_element = previous_element + if previous_element: + previous_element.next_element = o + o.next_element = next_element + if next_element: + next_element.previous_element = o + o.next_sibling = next_sibling + if next_sibling: + next_sibling.previous_sibling = o + o.previous_sibling = previous_sibling + if previous_sibling: + previous_sibling.next_sibling = o + + def _popToTag(self, name, nsprefix=None, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + # The BeautifulSoup object itself can never be popped. + return + + most_recently_popped = None + + stack_size = len(self.tagStack) + for i in range(stack_size - 1, 0, -1): + t = self.tagStack[i] + if (name == t.name and nsprefix == t.prefix): + if inclusivePop: + most_recently_popped = self.popTag() + break + most_recently_popped = self.popTag() + + return most_recently_popped + + def handle_starttag(self, name, namespace, nsprefix, attrs): + """Push a start tag on to the stack. + + If this method returns None, the tag was rejected by the + SoupStrainer. You should proceed as if the tag had not occurred + in the document. For instance, if this was a self-closing tag, + don't call handle_endtag. + """ + + # print "Start tag %s: %s" % (name, attrs) + self.endData() + + if (self.parse_only and len(self.tagStack) <= 1 + and (self.parse_only.text + or not self.parse_only.search_tag(name, attrs))): + return None + + tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, + self.currentTag, self._most_recent_element) + if tag is None: + return tag + if self._most_recent_element: + self._most_recent_element.next_element = tag + self._most_recent_element = tag + self.pushTag(tag) + return tag + + def handle_endtag(self, name, nsprefix=None): + #print "End tag: " + name + self.endData() + self._popToTag(name, nsprefix) + + def handle_data(self, data): + self.current_data.append(data) + + def decode(self, pretty_print=False, + eventual_encoding=DEFAULT_OUTPUT_ENCODING, + formatter="minimal"): + """Returns a string or Unicode representation of this document. + To get Unicode, pass None for encoding.""" + + if self.is_xml: + # Print the XML declaration + encoding_part = '' + if eventual_encoding != None: + encoding_part = ' encoding="%s"' % eventual_encoding + prefix = '\n' % encoding_part + else: + prefix = '' + if not pretty_print: + indent_level = None + else: + indent_level = 0 + return prefix + super(BeautifulSoup, self).decode( + indent_level, eventual_encoding, formatter) + +# Alias to make it easier to type import: 'from bs4 import _soup' +_s = BeautifulSoup +_soup = BeautifulSoup + +class BeautifulStoneSoup(BeautifulSoup): + """Deprecated interface to an XML parser.""" + + def __init__(self, *args, **kwargs): + kwargs['features'] = 'xml' + warnings.warn( + 'The BeautifulStoneSoup class is deprecated. Instead of using ' + 'it, pass features="xml" into the BeautifulSoup constructor.') + super(BeautifulStoneSoup, self).__init__(*args, **kwargs) + + +class StopParsing(Exception): + pass + +class FeatureNotFound(ValueError): + pass + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print(soup.prettify()) diff --git a/ext3/bs4/builder/__init__.py b/ext3/bs4/builder/__init__.py new file mode 100644 index 0000000000..b80ad6845c --- /dev/null +++ b/ext3/bs4/builder/__init__.py @@ -0,0 +1,339 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +from collections import defaultdict +import itertools +import sys +from bs4.element import ( + CharsetMetaAttributeValue, + ContentMetaAttributeValue, + HTMLAwareEntitySubstitution, + whitespace_re + ) + +__all__ = [ + 'HTMLTreeBuilder', + 'SAXTreeBuilder', + 'TreeBuilder', + 'TreeBuilderRegistry', + ] + +# Some useful features for a TreeBuilder to have. +FAST = 'fast' +PERMISSIVE = 'permissive' +STRICT = 'strict' +XML = 'xml' +HTML = 'html' +HTML_5 = 'html5' + + +class TreeBuilderRegistry(object): + + def __init__(self): + self.builders_for_feature = defaultdict(list) + self.builders = [] + + def register(self, treebuilder_class): + """Register a treebuilder based on its advertised features.""" + for feature in treebuilder_class.features: + self.builders_for_feature[feature].insert(0, treebuilder_class) + self.builders.insert(0, treebuilder_class) + + def lookup(self, *features): + if len(self.builders) == 0: + # There are no builders at all. + return None + + if len(features) == 0: + # They didn't ask for any features. Give them the most + # recently registered builder. + return self.builders[0] + + # Go down the list of features in order, and eliminate any builders + # that don't match every feature. + features = list(features) + features.reverse() + candidates = None + candidate_set = None + while len(features) > 0: + feature = features.pop() + we_have_the_feature = self.builders_for_feature.get(feature, []) + if len(we_have_the_feature) > 0: + if candidates is None: + candidates = we_have_the_feature + candidate_set = set(candidates) + else: + # Eliminate any candidates that don't have this feature. + candidate_set = candidate_set.intersection( + set(we_have_the_feature)) + + # The only valid candidates are the ones in candidate_set. + # Go through the original list of candidates and pick the first one + # that's in candidate_set. + if candidate_set is None: + return None + for candidate in candidates: + if candidate in candidate_set: + return candidate + return None + +# The BeautifulSoup class will take feature lists from developers and use them +# to look up builders in this registry. +builder_registry = TreeBuilderRegistry() + +class TreeBuilder(object): + """Turn a document into a Beautiful Soup object tree.""" + + NAME = "[Unknown tree builder]" + ALTERNATE_NAMES = [] + features = [] + + is_xml = False + picklable = False + preserve_whitespace_tags = set() + empty_element_tags = None # A tag will be considered an empty-element + # tag when and only when it has no contents. + + # A value for these tag/attribute combinations is a space- or + # comma-separated list of CDATA, rather than a single CDATA. + cdata_list_attributes = {} + + + def __init__(self): + self.soup = None + + def reset(self): + pass + + def can_be_empty_element(self, tag_name): + """Might a tag with this name be an empty-element tag? + + The final markup may or may not actually present this tag as + self-closing. + + For instance: an HTMLBuilder does not consider a

tag to be + an empty-element tag (it's not in + HTMLBuilder.empty_element_tags). This means an empty

tag + will be presented as "

", not "

". + + The default implementation has no opinion about which tags are + empty-element tags, so a tag will be presented as an + empty-element tag if and only if it has no contents. + "" will become "", and "bar" will + be left alone. + """ + if self.empty_element_tags is None: + return True + return tag_name in self.empty_element_tags + + def feed(self, markup): + raise NotImplementedError() + + def prepare_markup(self, markup, user_specified_encoding=None, + document_declared_encoding=None): + return markup, None, None, False + + def test_fragment_to_document(self, fragment): + """Wrap an HTML fragment to make it look like a document. + + Different parsers do this differently. For instance, lxml + introduces an empty tag, and html5lib + doesn't. Abstracting this away lets us write simple tests + which run HTML fragments through the parser and compare the + results against other HTML fragments. + + This method should not be used outside of tests. + """ + return fragment + + def set_up_substitutions(self, tag): + return False + + def _replace_cdata_list_attribute_values(self, tag_name, attrs): + """Replaces class="foo bar" with class=["foo", "bar"] + + Modifies its input in place. + """ + if not attrs: + return attrs + if self.cdata_list_attributes: + universal = self.cdata_list_attributes.get('*', []) + tag_specific = self.cdata_list_attributes.get( + tag_name.lower(), None) + for attr in list(attrs.keys()): + if attr in universal or (tag_specific and attr in tag_specific): + # We have a "class"-type attribute whose string + # value is a whitespace-separated list of + # values. Split it into a list. + value = attrs[attr] + if isinstance(value, str): + values = whitespace_re.split(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value + attrs[attr] = values + return attrs + +class SAXTreeBuilder(TreeBuilder): + """A Beautiful Soup treebuilder that listens for SAX events.""" + + def feed(self, markup): + raise NotImplementedError() + + def close(self): + pass + + def startElement(self, name, attrs): + attrs = dict((key[1], value) for key, value in list(attrs.items())) + #print "Start %s, %r" % (name, attrs) + self.soup.handle_starttag(name, attrs) + + def endElement(self, name): + #print "End %s" % name + self.soup.handle_endtag(name) + + def startElementNS(self, nsTuple, nodeName, attrs): + # Throw away (ns, nodeName) for now. + self.startElement(nodeName, attrs) + + def endElementNS(self, nsTuple, nodeName): + # Throw away (ns, nodeName) for now. + self.endElement(nodeName) + #handler.endElementNS((ns, node.nodeName), node.nodeName) + + def startPrefixMapping(self, prefix, nodeValue): + # Ignore the prefix for now. + pass + + def endPrefixMapping(self, prefix): + # Ignore the prefix for now. + # handler.endPrefixMapping(prefix) + pass + + def characters(self, content): + self.soup.handle_data(content) + + def startDocument(self): + pass + + def endDocument(self): + pass + + +class HTMLTreeBuilder(TreeBuilder): + """This TreeBuilder knows facts about HTML. + + Such as which tags are empty-element tags. + """ + + preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags + empty_element_tags = set([ + # These are from HTML5. + 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', + + # These are from earlier versions of HTML and are removed in HTML5. + 'basefont', 'bgsound', 'command', 'frame', 'image', 'isindex', 'nextid', 'spacer' + ]) + + # The HTML standard defines these as block-level elements. Beautiful + # Soup does not treat these elements differently from other elements, + # but it may do so eventually, and this information is available if + # you need to use it. + block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"]) + + # The HTML standard defines these attributes as containing a + # space-separated list of values, not a single value. That is, + # class="foo bar" means that the 'class' attribute has two values, + # 'foo' and 'bar', not the single value 'foo bar'. When we + # encounter one of these attributes, we will parse its value into + # a list of values if possible. Upon output, the list will be + # converted back into a string. + cdata_list_attributes = { + "*" : ['class', 'accesskey', 'dropzone'], + "a" : ['rel', 'rev'], + "link" : ['rel', 'rev'], + "td" : ["headers"], + "th" : ["headers"], + "td" : ["headers"], + "form" : ["accept-charset"], + "object" : ["archive"], + + # These are HTML5 specific, as are *.accesskey and *.dropzone above. + "area" : ["rel"], + "icon" : ["sizes"], + "iframe" : ["sandbox"], + "output" : ["for"], + } + + def set_up_substitutions(self, tag): + # We are only interested in tags + if tag.name != 'meta': + return False + + http_equiv = tag.get('http-equiv') + content = tag.get('content') + charset = tag.get('charset') + + # We are interested in tags that say what encoding the + # document was originally in. This means HTML 5-style + # tags that provide the "charset" attribute. It also means + # HTML 4-style tags that provide the "content" + # attribute and have "http-equiv" set to "content-type". + # + # In both cases we will replace the value of the appropriate + # attribute with a standin object that can take on any + # encoding. + meta_encoding = None + if charset is not None: + # HTML 5 style: + # + meta_encoding = charset + tag['charset'] = CharsetMetaAttributeValue(charset) + + elif (content is not None and http_equiv is not None + and http_equiv.lower() == 'content-type'): + # HTML 4 style: + # + tag['content'] = ContentMetaAttributeValue(content) + + return (meta_encoding is not None) + +def register_treebuilders_from(module): + """Copy TreeBuilders from the given module into this module.""" + # I'm fairly sure this is not the best way to do this. + this_module = sys.modules['bs4.builder'] + for name in module.__all__: + obj = getattr(module, name) + + if issubclass(obj, TreeBuilder): + setattr(this_module, name, obj) + this_module.__all__.append(name) + # Register the builder while we're at it. + this_module.builder_registry.register(obj) + +class ParserRejectedMarkup(Exception): + pass + +# Builders are registered in reverse order of priority, so that custom +# builder registrations will take precedence. In general, we want lxml +# to take precedence over html5lib, because it's faster. And we only +# want to use HTMLParser as a last result. +from . import _htmlparser +register_treebuilders_from(_htmlparser) +try: + from . import _html5lib + register_treebuilders_from(_html5lib) +except ImportError: + # They don't have html5lib installed. + pass +try: + from . import _lxml + register_treebuilders_from(_lxml) +except ImportError: + # They don't have lxml installed. + pass diff --git a/ext3/bs4/builder/_html5lib.py b/ext3/bs4/builder/_html5lib.py new file mode 100644 index 0000000000..d9d468f5bb --- /dev/null +++ b/ext3/bs4/builder/_html5lib.py @@ -0,0 +1,426 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__all__ = [ + 'HTML5TreeBuilder', + ] + +import warnings +import re +from bs4.builder import ( + PERMISSIVE, + HTML, + HTML_5, + HTMLTreeBuilder, + ) +from bs4.element import ( + NamespacedAttribute, + whitespace_re, +) +import html5lib +from html5lib.constants import ( + namespaces, + prefixes, + ) +from bs4.element import ( + Comment, + Doctype, + NavigableString, + Tag, + ) + +try: + # Pre-0.99999999 + from html5lib.treebuilders import _base as treebuilder_base + new_html5lib = False +except ImportError as e: + # 0.99999999 and up + from html5lib.treebuilders import base as treebuilder_base + new_html5lib = True + +class HTML5TreeBuilder(HTMLTreeBuilder): + """Use html5lib to build a tree.""" + + NAME = "html5lib" + + features = [NAME, PERMISSIVE, HTML_5, HTML] + + def prepare_markup(self, markup, user_specified_encoding, + document_declared_encoding=None, exclude_encodings=None): + # Store the user-specified encoding for use later on. + self.user_specified_encoding = user_specified_encoding + + # document_declared_encoding and exclude_encodings aren't used + # ATM because the html5lib TreeBuilder doesn't use + # UnicodeDammit. + if exclude_encodings: + warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") + yield (markup, None, None, False) + + # These methods are defined by Beautiful Soup. + def feed(self, markup): + if self.soup.parse_only is not None: + warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") + parser = html5lib.HTMLParser(tree=self.create_treebuilder) + + extra_kwargs = dict() + if not isinstance(markup, str): + if new_html5lib: + extra_kwargs['override_encoding'] = self.user_specified_encoding + else: + extra_kwargs['encoding'] = self.user_specified_encoding + doc = parser.parse(markup, **extra_kwargs) + + # Set the character encoding detected by the tokenizer. + if isinstance(markup, str): + # We need to special-case this because html5lib sets + # charEncoding to UTF-8 if it gets Unicode input. + doc.original_encoding = None + else: + original_encoding = parser.tokenizer.stream.charEncoding[0] + if not isinstance(original_encoding, str): + # In 0.99999999 and up, the encoding is an html5lib + # Encoding object. We want to use a string for compatibility + # with other tree builders. + original_encoding = original_encoding.name + doc.original_encoding = original_encoding + + def create_treebuilder(self, namespaceHTMLElements): + self.underlying_builder = TreeBuilderForHtml5lib( + namespaceHTMLElements, self.soup) + return self.underlying_builder + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '%s' % fragment + + +class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): + + def __init__(self, namespaceHTMLElements, soup=None): + if soup: + self.soup = soup + else: + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") + super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) + + def documentClass(self): + self.soup.reset() + return Element(self.soup, self.soup, None) + + def insertDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + + doctype = Doctype.for_name_and_ids(name, publicId, systemId) + self.soup.object_was_parsed(doctype) + + def elementClass(self, name, namespace): + tag = self.soup.new_tag(name, namespace) + return Element(tag, self.soup, namespace) + + def commentClass(self, data): + return TextNode(Comment(data), self.soup) + + def fragmentClass(self): + from bs4 import BeautifulSoup + self.soup = BeautifulSoup("", "html.parser") + self.soup.name = "[document_fragment]" + return Element(self.soup, self.soup, None) + + def appendChild(self, node): + # XXX This code is not covered by the BS4 tests. + self.soup.append(node.element) + + def getDocument(self): + return self.soup + + def getFragment(self): + return treebuilder_base.TreeBuilder.getFragment(self).element + + def testSerializer(self, element): + from bs4 import BeautifulSoup + rv = [] + doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') + + def serializeElement(element, indent=0): + if isinstance(element, BeautifulSoup): + pass + if isinstance(element, Doctype): + m = doctype_re.match(element) + if m: + name = m.group(1) + if m.lastindex > 1: + publicId = m.group(2) or "" + systemId = m.group(3) or m.group(4) or "" + rv.append("""|%s""" % + (' ' * indent, name, publicId, systemId)) + else: + rv.append("|%s" % (' ' * indent, name)) + else: + rv.append("|%s" % (' ' * indent,)) + elif isinstance(element, Comment): + rv.append("|%s" % (' ' * indent, element)) + elif isinstance(element, NavigableString): + rv.append("|%s\"%s\"" % (' ' * indent, element)) + else: + if element.namespace: + name = "%s %s" % (prefixes[element.namespace], + element.name) + else: + name = element.name + rv.append("|%s<%s>" % (' ' * indent, name)) + if element.attrs: + attributes = [] + for name, value in list(element.attrs.items()): + if isinstance(name, NamespacedAttribute): + name = "%s %s" % (prefixes[name.namespace], name.name) + if isinstance(value, list): + value = " ".join(value) + attributes.append((name, value)) + + for name, value in sorted(attributes): + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + indent += 2 + for child in element.children: + serializeElement(child, indent) + serializeElement(element, 0) + + return "\n".join(rv) + +class AttrList(object): + def __init__(self, element): + self.element = element + self.attrs = dict(self.element.attrs) + def __iter__(self): + return list(self.attrs.items()).__iter__() + def __setitem__(self, name, value): + # If this attribute is a multi-valued attribute for this element, + # turn its value into a list. + list_attr = HTML5TreeBuilder.cdata_list_attributes + if (name in list_attr['*'] + or (self.element.name in list_attr + and name in list_attr[self.element.name])): + # A node that is being cloned may have already undergone + # this procedure. + if not isinstance(value, list): + value = whitespace_re.split(value) + self.element[name] = value + def items(self): + return list(self.attrs.items()) + def keys(self): + return list(self.attrs.keys()) + def __len__(self): + return len(self.attrs) + def __getitem__(self, name): + return self.attrs[name] + def __contains__(self, name): + return name in list(self.attrs.keys()) + + +class Element(treebuilder_base.Node): + def __init__(self, element, soup, namespace): + treebuilder_base.Node.__init__(self, element.name) + self.element = element + self.soup = soup + self.namespace = namespace + + def appendChild(self, node): + string_child = child = None + if isinstance(node, str): + # Some other piece of code decided to pass in a string + # instead of creating a TextElement object to contain the + # string. + string_child = child = node + elif isinstance(node, Tag): + # Some other piece of code decided to pass in a Tag + # instead of creating an Element object to contain the + # Tag. + child = node + elif node.element.__class__ == NavigableString: + string_child = child = node.element + node.parent = self + else: + child = node.element + node.parent = self + + if not isinstance(child, str) and child.parent is not None: + node.element.extract() + + if (string_child and self.element.contents + and self.element.contents[-1].__class__ == NavigableString): + # We are appending a string onto another string. + # TODO This has O(n^2) performance, for input like + # "aaa..." + old_element = self.element.contents[-1] + new_element = self.soup.new_string(old_element + string_child) + old_element.replace_with(new_element) + self.soup._most_recent_element = new_element + else: + if isinstance(node, str): + # Create a brand new NavigableString from this string. + child = self.soup.new_string(node) + + # Tell Beautiful Soup to act as if it parsed this element + # immediately after the parent's last descendant. (Or + # immediately after the parent, if it has no children.) + if self.element.contents: + most_recent_element = self.element._last_descendant(False) + elif self.element.next_element is not None: + # Something from further ahead in the parse tree is + # being inserted into this earlier element. This is + # very annoying because it means an expensive search + # for the last element in the tree. + most_recent_element = self.soup._last_descendant() + else: + most_recent_element = self.element + + self.soup.object_was_parsed( + child, parent=self.element, + most_recent_element=most_recent_element) + + def getAttributes(self): + if isinstance(self.element, Comment): + return {} + return AttrList(self.element) + + def setAttributes(self, attributes): + + if attributes is not None and len(attributes) > 0: + + converted_attributes = [] + for name, value in list(attributes.items()): + if isinstance(name, tuple): + new_name = NamespacedAttribute(*name) + del attributes[name] + attributes[new_name] = value + + self.soup.builder._replace_cdata_list_attribute_values( + self.name, attributes) + for name, value in list(attributes.items()): + self.element[name] = value + + # The attributes may contain variables that need substitution. + # Call set_up_substitutions manually. + # + # The Tag constructor called this method when the Tag was created, + # but we just set/changed the attributes, so call it again. + self.soup.builder.set_up_substitutions(self.element) + attributes = property(getAttributes, setAttributes) + + def insertText(self, data, insertBefore=None): + text = TextNode(self.soup.new_string(data), self.soup) + if insertBefore: + self.insertBefore(text, insertBefore) + else: + self.appendChild(text) + + def insertBefore(self, node, refNode): + index = self.element.index(refNode.element) + if (node.element.__class__ == NavigableString and self.element.contents + and self.element.contents[index-1].__class__ == NavigableString): + # (See comments in appendChild) + old_node = self.element.contents[index-1] + new_str = self.soup.new_string(old_node + node.element) + old_node.replace_with(new_str) + else: + self.element.insert(index, node.element) + node.parent = self + + def removeChild(self, node): + node.element.extract() + + def reparentChildren(self, new_parent): + """Move all of this tag's children into another tag.""" + # print "MOVE", self.element.contents + # print "FROM", self.element + # print "TO", new_parent.element + + element = self.element + new_parent_element = new_parent.element + # Determine what this tag's next_element will be once all the children + # are removed. + final_next_element = element.next_sibling + + new_parents_last_descendant = new_parent_element._last_descendant(False, False) + if len(new_parent_element.contents) > 0: + # The new parent already contains children. We will be + # appending this tag's children to the end. + new_parents_last_child = new_parent_element.contents[-1] + new_parents_last_descendant_next_element = new_parents_last_descendant.next_element + else: + # The new parent contains no children. + new_parents_last_child = None + new_parents_last_descendant_next_element = new_parent_element.next_element + + to_append = element.contents + if len(to_append) > 0: + # Set the first child's previous_element and previous_sibling + # to elements within the new parent + first_child = to_append[0] + if new_parents_last_descendant: + first_child.previous_element = new_parents_last_descendant + else: + first_child.previous_element = new_parent_element + first_child.previous_sibling = new_parents_last_child + if new_parents_last_descendant: + new_parents_last_descendant.next_element = first_child + else: + new_parent_element.next_element = first_child + if new_parents_last_child: + new_parents_last_child.next_sibling = first_child + + # Find the very last element being moved. It is now the + # parent's last descendant. It has no .next_sibling and + # its .next_element is whatever the previous last + # descendant had. + last_childs_last_descendant = to_append[-1]._last_descendant(False, True) + + last_childs_last_descendant.next_element = new_parents_last_descendant_next_element + if new_parents_last_descendant_next_element: + # TODO: This code has no test coverage and I'm not sure + # how to get html5lib to go through this path, but it's + # just the other side of the previous line. + new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant + last_childs_last_descendant.next_sibling = None + + for child in to_append: + child.parent = new_parent_element + new_parent_element.contents.append(child) + + # Now that this element has no children, change its .next_element. + element.contents = [] + element.next_element = final_next_element + + # print "DONE WITH MOVE" + # print "FROM", self.element + # print "TO", new_parent_element + + def cloneNode(self): + tag = self.soup.new_tag(self.element.name, self.namespace) + node = Element(tag, self.soup, self.namespace) + for key,value in self.attributes: + node.attributes[key] = value + return node + + def hasContent(self): + return self.element.contents + + def getNameTuple(self): + if self.namespace == None: + return namespaces["html"], self.name + else: + return self.namespace, self.name + + nameTuple = property(getNameTuple) + +class TextNode(Element): + def __init__(self, element, soup): + treebuilder_base.Node.__init__(self, None) + self.element = element + self.soup = soup + + def cloneNode(self): + raise NotImplementedError diff --git a/ext3/bs4/builder/_htmlparser.py b/ext3/bs4/builder/_htmlparser.py new file mode 100644 index 0000000000..7ae60272dd --- /dev/null +++ b/ext3/bs4/builder/_htmlparser.py @@ -0,0 +1,347 @@ +# encoding: utf-8 +"""Use the HTMLParser library to parse HTML files that aren't too bad.""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +__all__ = [ + 'HTMLParserTreeBuilder', + ] + +from html.parser import HTMLParser + +try: + from html.parser import HTMLParseError +except ImportError as e: + # HTMLParseError is removed in Python 3.5. Since it can never be + # thrown in 3.5, we can just define our own class as a placeholder. + class HTMLParseError(Exception): + pass + +import sys +import warnings + +# Starting in Python 3.2, the HTMLParser constructor takes a 'strict' +# argument, which we'd like to set to False. Unfortunately, +# http://bugs.python.org/issue13273 makes strict=True a better bet +# before Python 3.2.3. +# +# At the end of this file, we monkeypatch HTMLParser so that +# strict=True works well on Python 3.2.2. +major, minor, release = sys.version_info[:3] +CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3 +CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3 +CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4 + + +from bs4.element import ( + CData, + Comment, + Declaration, + Doctype, + ProcessingInstruction, + ) +from bs4.dammit import EntitySubstitution, UnicodeDammit + +from bs4.builder import ( + HTML, + HTMLTreeBuilder, + STRICT, + ) + + +HTMLPARSER = 'html.parser' + +class BeautifulSoupHTMLParser(HTMLParser): + + def __init__(self, *args, **kwargs): + HTMLParser.__init__(self, *args, **kwargs) + + # Keep a list of empty-element tags that were encountered + # without an explicit closing tag. If we encounter a closing tag + # of this type, we'll associate it with one of those entries. + # + # This isn't a stack because we don't care about the + # order. It's a list of closing tags we've already handled and + # will ignore, assuming they ever show up. + self.already_closed_empty_element = [] + + def error(self, msg): + """In Python 3, HTMLParser subclasses must implement error(), although this + requirement doesn't appear to be documented. + + In Python 2, HTMLParser implements error() as raising an exception. + + In any event, this method is called only on very strange markup and our best strategy + is to pretend it didn't happen and keep going. + """ + warnings.warn(msg) + + def handle_startendtag(self, name, attrs): + # This is only called when the markup looks like + # . + + # is_startend() tells handle_starttag not to close the tag + # just because its name matches a known empty-element tag. We + # know that this is an empty-element tag and we want to call + # handle_endtag ourselves. + tag = self.handle_starttag(name, attrs, handle_empty_element=False) + self.handle_endtag(name) + + def handle_starttag(self, name, attrs, handle_empty_element=True): + # XXX namespace + attr_dict = {} + for key, value in attrs: + # Change None attribute values to the empty string + # for consistency with the other tree builders. + if value is None: + value = '' + attr_dict[key] = value + attrvalue = '""' + #print "START", name + tag = self.soup.handle_starttag(name, None, None, attr_dict) + if tag and tag.is_empty_element and handle_empty_element: + # Unlike other parsers, html.parser doesn't send separate end tag + # events for empty-element tags. (It's handled in + # handle_startendtag, but only if the original markup looked like + # .) + # + # So we need to call handle_endtag() ourselves. Since we + # know the start event is identical to the end event, we + # don't want handle_endtag() to cross off any previous end + # events for tags of this name. + self.handle_endtag(name, check_already_closed=False) + + # But we might encounter an explicit closing tag for this tag + # later on. If so, we want to ignore it. + self.already_closed_empty_element.append(name) + + def handle_endtag(self, name, check_already_closed=True): + #print "END", name + if check_already_closed and name in self.already_closed_empty_element: + # This is a redundant end tag for an empty-element tag. + # We've already called handle_endtag() for it, so just + # check it off the list. + # print "ALREADY CLOSED", name + self.already_closed_empty_element.remove(name) + else: + self.soup.handle_endtag(name) + + def handle_data(self, data): + self.soup.handle_data(data) + + def handle_charref(self, name): + # XXX workaround for a bug in HTMLParser. Remove this once + # it's fixed in all supported versions. + # http://bugs.python.org/issue13633 + if name.startswith('x'): + real_name = int(name.lstrip('x'), 16) + elif name.startswith('X'): + real_name = int(name.lstrip('X'), 16) + else: + real_name = int(name) + + data = None + if real_name < 256: + # HTML numeric entities are supposed to reference Unicode + # code points, but sometimes they reference code points in + # some other encoding (ahem, Windows-1252). E.g. “ + # instead of É for LEFT DOUBLE QUOTATION MARK. This + # code tries to detect this situation and compensate. + for encoding in (self.soup.original_encoding, 'windows-1252'): + if not encoding: + continue + try: + data = bytearray([real_name]).decode(encoding) + except UnicodeDecodeError as e: + pass + if not data: + try: + data = chr(real_name) + except (ValueError, OverflowError) as e: + pass + data = data or "\N{REPLACEMENT CHARACTER}" + self.handle_data(data) + + def handle_entityref(self, name): + character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) + if character is not None: + data = character + else: + # If this were XML, it would be ambiguous whether "&foo" + # was an character entity reference with a missing + # semicolon or the literal string "&foo". Since this is + # HTML, we have a complete list of all character entity references, + # and this one wasn't found, so assume it's the literal string "&foo". + data = "&%s" % name + self.handle_data(data) + + def handle_comment(self, data): + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(Comment) + + def handle_decl(self, data): + self.soup.endData() + if data.startswith("DOCTYPE "): + data = data[len("DOCTYPE "):] + elif data == 'DOCTYPE': + # i.e. "" + data = '' + self.soup.handle_data(data) + self.soup.endData(Doctype) + + def unknown_decl(self, data): + if data.upper().startswith('CDATA['): + cls = CData + data = data[len('CDATA['):] + else: + cls = Declaration + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(cls) + + def handle_pi(self, data): + self.soup.endData() + self.soup.handle_data(data) + self.soup.endData(ProcessingInstruction) + + +class HTMLParserTreeBuilder(HTMLTreeBuilder): + + is_xml = False + picklable = True + NAME = HTMLPARSER + features = [NAME, HTML, STRICT] + + def __init__(self, *args, **kwargs): + if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: + kwargs['strict'] = False + if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: + kwargs['convert_charrefs'] = False + self.parser_args = (args, kwargs) + + def prepare_markup(self, markup, user_specified_encoding=None, + document_declared_encoding=None, exclude_encodings=None): + """ + :return: A 4-tuple (markup, original encoding, encoding + declared within markup, whether any characters had to be + replaced with REPLACEMENT CHARACTER). + """ + if isinstance(markup, str): + yield (markup, None, None, False) + return + + try_encodings = [user_specified_encoding, document_declared_encoding] + dammit = UnicodeDammit(markup, try_encodings, is_html=True, + exclude_encodings=exclude_encodings) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) + + def feed(self, markup): + args, kwargs = self.parser_args + parser = BeautifulSoupHTMLParser(*args, **kwargs) + parser.soup = self.soup + try: + parser.feed(markup) + parser.close() + except HTMLParseError as e: + warnings.warn(RuntimeWarning( + "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) + raise e + parser.already_closed_empty_element = [] + +# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some +# 3.2.3 code. This ensures they don't treat markup like

as a +# string. +# +# XXX This code can be removed once most Python 3 users are on 3.2.3. +if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: + import re + attrfind_tolerant = re.compile( + r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') + HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant + + locatestarttagend = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:\s+ # whitespace before attribute name + (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name + (?:\s*=\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |\"[^\"]*\" # LIT-enclosed value + |[^'\">\s]+ # bare value + ) + )? + ) + )* + \s* # trailing whitespace +""", re.VERBOSE) + BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend + + from html.parser import tagfind, attrfind + + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = tagfind.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = rawdata[i+1:k].lower() + while k < endpos: + if self.strict: + m = attrfind.match(rawdata, k) + else: + m = attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = self.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + if self.strict: + self.error("junk characters in start tag: %r" + % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + self.handle_starttag(tag, attrs) + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + return endpos + + def set_cdata_mode(self, elem): + self.cdata_elem = elem.lower() + self.interesting = re.compile(r'' % self.cdata_elem, re.I) + + BeautifulSoupHTMLParser.parse_starttag = parse_starttag + BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode + + CONSTRUCTOR_TAKES_STRICT = True diff --git a/ext3/bs4/builder/_lxml.py b/ext3/bs4/builder/_lxml.py new file mode 100644 index 0000000000..0dc9affede --- /dev/null +++ b/ext3/bs4/builder/_lxml.py @@ -0,0 +1,262 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__all__ = [ + 'LXMLTreeBuilderForXML', + 'LXMLTreeBuilder', + ] + +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable + +from io import BytesIO +from io import StringIO +from lxml import etree +from bs4.element import ( + Comment, + Doctype, + NamespacedAttribute, + ProcessingInstruction, + XMLProcessingInstruction, +) +from bs4.builder import ( + FAST, + HTML, + HTMLTreeBuilder, + PERMISSIVE, + ParserRejectedMarkup, + TreeBuilder, + XML) +from bs4.dammit import EncodingDetector + +LXML = 'lxml' + +class LXMLTreeBuilderForXML(TreeBuilder): + DEFAULT_PARSER_CLASS = etree.XMLParser + + is_xml = True + processing_instruction_class = XMLProcessingInstruction + + NAME = "lxml-xml" + ALTERNATE_NAMES = ["xml"] + + # Well, it's permissive by XML parser standards. + features = [NAME, LXML, XML, FAST, PERMISSIVE] + + CHUNK_SIZE = 512 + + # This namespace mapping is specified in the XML Namespace + # standard. + DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} + + def default_parser(self, encoding): + # This can either return a parser object or a class, which + # will be instantiated with default arguments. + if self._default_parser is not None: + return self._default_parser + return etree.XMLParser( + target=self, strip_cdata=False, recover=True, encoding=encoding) + + def parser_for(self, encoding): + # Use the default parser. + parser = self.default_parser(encoding) + + if isinstance(parser, Callable): + # Instantiate the parser with default arguments + parser = parser(target=self, strip_cdata=False, encoding=encoding) + return parser + + def __init__(self, parser=None, empty_element_tags=None): + # TODO: Issue a warning if parser is present but not a + # callable, since that means there's no way to create new + # parsers for different encodings. + self._default_parser = parser + if empty_element_tags is not None: + self.empty_element_tags = set(empty_element_tags) + self.soup = None + self.nsmaps = [self.DEFAULT_NSMAPS] + + def _getNsTag(self, tag): + # Split the namespace URL out of a fully-qualified lxml tag + # name. Copied from lxml's src/lxml/sax.py. + if tag[0] == '{': + return tuple(tag[1:].split('}', 1)) + else: + return (None, tag) + + def prepare_markup(self, markup, user_specified_encoding=None, + exclude_encodings=None, + document_declared_encoding=None): + """ + :yield: A series of 4-tuples. + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for parsing the document. + """ + # Instead of using UnicodeDammit to convert the bytestring to + # Unicode using different encodings, use EncodingDetector to + # iterate over the encodings, and tell lxml to try to parse + # the document as each one in turn. + is_html = not self.is_xml + if is_html: + self.processing_instruction_class = ProcessingInstruction + else: + self.processing_instruction_class = XMLProcessingInstruction + + if isinstance(markup, str): + # We were given Unicode. Maybe lxml can parse Unicode on + # this system? + yield markup, None, document_declared_encoding, False + + if isinstance(markup, str): + # No, apparently not. Convert the Unicode to UTF-8 and + # tell lxml to parse it as UTF-8. + yield (markup.encode("utf8"), "utf8", + document_declared_encoding, False) + + try_encodings = [user_specified_encoding, document_declared_encoding] + detector = EncodingDetector( + markup, try_encodings, is_html, exclude_encodings) + for encoding in detector.encodings: + yield (detector.markup, encoding, document_declared_encoding, False) + + def feed(self, markup): + if isinstance(markup, bytes): + markup = BytesIO(markup) + elif isinstance(markup, str): + markup = StringIO(markup) + + # Call feed() at least once, even if the markup is empty, + # or the parser won't be initialized. + data = markup.read(self.CHUNK_SIZE) + try: + self.parser = self.parser_for(self.soup.original_encoding) + self.parser.feed(data) + while len(data) != 0: + # Now call feed() on the rest of the data, chunk by chunk. + data = markup.read(self.CHUNK_SIZE) + if len(data) != 0: + self.parser.feed(data) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(str(e)) + + def close(self): + self.nsmaps = [self.DEFAULT_NSMAPS] + + def start(self, name, attrs, nsmap={}): + # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. + attrs = dict(attrs) + nsprefix = None + # Invert each namespace map as it comes in. + if len(nsmap) == 0 and len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. + self.nsmaps.append(None) + elif len(nsmap) > 0: + # A new namespace mapping has come into play. + inverted_nsmap = dict((value, key) for key, value in list(nsmap.items())) + self.nsmaps.append(inverted_nsmap) + # Also treat the namespace mapping as a set of attributes on the + # tag, so we can recreate it later. + attrs = attrs.copy() + for prefix, namespace in list(nsmap.items()): + attribute = NamespacedAttribute( + "xmlns", prefix, "http://www.w3.org/2000/xmlns/") + attrs[attribute] = namespace + + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in list(attrs.items()): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + + namespace, name = self._getNsTag(name) + nsprefix = self._prefix_for_namespace(namespace) + self.soup.handle_starttag(name, namespace, nsprefix, attrs) + + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + return None + + def end(self, name): + self.soup.endData() + completed_tag = self.soup.tagStack[-1] + namespace, name = self._getNsTag(name) + nsprefix = None + if namespace is not None: + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + nsprefix = inverted_nsmap[namespace] + break + self.soup.handle_endtag(name, nsprefix) + if len(self.nsmaps) > 1: + # This tag, or one of its parents, introduced a namespace + # mapping, so pop it off the stack. + self.nsmaps.pop() + + def pi(self, target, data): + self.soup.endData() + self.soup.handle_data(target + ' ' + data) + self.soup.endData(self.processing_instruction_class) + + def data(self, content): + self.soup.handle_data(content) + + def doctype(self, name, pubid, system): + self.soup.endData() + doctype = Doctype.for_name_and_ids(name, pubid, system) + self.soup.object_was_parsed(doctype) + + def comment(self, content): + "Handle comments as Comment objects." + self.soup.endData() + self.soup.handle_data(content) + self.soup.endData(Comment) + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '\n%s' % fragment + + +class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): + + NAME = LXML + ALTERNATE_NAMES = ["lxml-html"] + + features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] + is_xml = False + processing_instruction_class = ProcessingInstruction + + def default_parser(self, encoding): + return etree.HTMLParser + + def feed(self, markup): + encoding = self.soup.original_encoding + try: + self.parser = self.parser_for(encoding) + self.parser.feed(markup) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError) as e: + raise ParserRejectedMarkup(str(e)) + + + def test_fragment_to_document(self, fragment): + """See `TreeBuilder`.""" + return '%s' % fragment diff --git a/ext3/bs4/dammit.py b/ext3/bs4/dammit.py new file mode 100644 index 0000000000..ae6d4ad81d --- /dev/null +++ b/ext3/bs4/dammit.py @@ -0,0 +1,842 @@ +# -*- coding: utf-8 -*- +"""Beautiful Soup bonus library: Unicode, Dammit + +This library converts a bytestream to Unicode through any means +necessary. It is heavily based on code from Mark Pilgrim's Universal +Feed Parser. It works best on XML and HTML, but it does not rewrite the +XML or HTML to reflect a new encoding; that's the tree builder's job. +""" +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +import codecs +from html.entities import codepoint2name +import re +import logging +import string + +# Import a library to autodetect character encodings. +chardet_type = None +try: + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + return cchardet.detect(s)['encoding'] +except ImportError: + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None + +# Available from http://cjkpython.i18n.org/. +try: + import iconv_codec +except ImportError: + pass + +xml_encoding_re = re.compile( + '^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>'.encode(), re.I) +html_meta_re = re.compile( + '<\\s*meta[^>]+charset\\s*=\\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) + +class EntitySubstitution(object): + + """Substitute XML or HTML entities for the corresponding characters.""" + + def _populate_class_variables(): + lookup = {} + reverse_lookup = {} + characters_for_re = [] + for codepoint, name in list(codepoint2name.items()): + character = chr(codepoint) + if codepoint != 34: + # There's no point in turning the quotation mark into + # ", unless it happens within an attribute value, which + # is handled elsewhere. + characters_for_re.append(character) + lookup[character] = name + # But we do want to turn " into the quotation mark. + reverse_lookup[name] = character + re_definition = "[%s]" % "".join(characters_for_re) + return lookup, reverse_lookup, re.compile(re_definition) + (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, + CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() + + CHARACTER_TO_XML_ENTITY = { + "'": "apos", + '"': "quot", + "&": "amp", + "<": "lt", + ">": "gt", + } + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + "&(?!#\\d+;|#x[0-9a-fA-F]+;|\\w+;)" + ")") + + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + + @classmethod + def _substitute_html_entity(cls, matchobj): + entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) + return "&%s;" % entity + + @classmethod + def _substitute_xml_entity(cls, matchobj): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] + return "&%s;" % entity + + @classmethod + def quoted_attribute_value(self, value): + """Make a value into a quoted XML attribute, possibly escaping it. + + Most strings will be quoted using double quotes. + + Bob's Bar -> "Bob's Bar" + + If a string contains double quotes, it will be quoted using + single quotes. + + Welcome to "my bar" -> 'Welcome to "my bar"' + + If a string contains both single and double quotes, the + double quotes will be escaped, and the string will be quoted + using double quotes. + + Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" + """ + quote_with = '"' + if '"' in value: + if "'" in value: + # The string contains both single and double + # quotes. Turn the double quotes into + # entities. We quote the double quotes rather than + # the single quotes because the entity name is + # """ whether this is HTML or XML. If we + # quoted the single quotes, we'd have to decide + # between ' and &squot;. + replace_with = """ + value = value.replace('"', replace_with) + else: + # There are double quotes but no single quotes. + # We can use single quotes to quote the attribute. + quote_with = "'" + return quote_with + value + quote_with + + @classmethod + def substitute_xml(cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + + :param value: A string to be substituted. The less-than sign will + become <, the greater-than sign will become >, and any + ampersands that are not part of an entity defition will + become &. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets, and ampersands that aren't part of + # entities. + value = cls.BARE_AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_html(cls, s): + """Replace certain Unicode characters with named HTML entities. + + This differs from data.encode(encoding, 'xmlcharrefreplace') + in that the goal is to make the result more readable (to those + with ASCII displays) rather than to recover from + errors. There's absolutely nothing wrong with a UTF-8 string + containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that + character with "é" will make it more readable to some + people. + """ + return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( + cls._substitute_html_entity, s) + + +class EncodingDetector: + """Suggests a number of possible encodings for a bytestring. + + Order of precedence: + + 1. Encodings you specifically tell EncodingDetector to try first + (the override_encodings argument to the constructor). + + 2. An encoding declared within the bytestring itself, either in an + XML declaration (if the bytestring is to be interpreted as an XML + document), or in a tag (if the bytestring is to be + interpreted as an HTML document.) + + 3. An encoding detected through textual analysis by chardet, + cchardet, or a similar external library. + + 4. UTF-8. + + 5. Windows-1252. + """ + def __init__(self, markup, override_encodings=None, is_html=False, + exclude_encodings=None): + self.override_encodings = override_encodings or [] + exclude_encodings = exclude_encodings or [] + self.exclude_encodings = set([x.lower() for x in exclude_encodings]) + self.chardet_encoding = None + self.is_html = is_html + self.declared_encoding = None + + # First order of business: strip a byte-order mark. + self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) + + def _usable(self, encoding, tried): + if encoding is not None: + encoding = encoding.lower() + if encoding in self.exclude_encodings: + return False + if encoding not in tried: + tried.add(encoding) + return True + return False + + @property + def encodings(self): + """Yield a number of encodings that might work for this markup.""" + tried = set() + for e in self.override_encodings: + if self._usable(e, tried): + yield e + + # Did the document originally start with a byte-order mark + # that indicated its encoding? + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding + + # Look within the document for an XML or HTML encoding + # declaration. + if self.declared_encoding is None: + self.declared_encoding = self.find_declared_encoding( + self.markup, self.is_html) + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding + + # Use third-party character set detection to guess at the + # encoding. + if self.chardet_encoding is None: + self.chardet_encoding = chardet_dammit(self.markup) + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding + + # As a last-ditch effort, try utf-8 and windows-1252. + for e in ('utf-8', 'windows-1252'): + if self._usable(e, tried): + yield e + + @classmethod + def strip_byte_order_mark(cls, data): + """If a byte-order mark is present, strip it and return the encoding it implies.""" + encoding = None + if isinstance(data, str): + # Unicode data cannot have a byte-order mark. + return data, encoding + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == b'\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == b'\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == b'\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + return data, encoding + + @classmethod + def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): + """Given a document, tries to find its declared encoding. + + An XML encoding is declared at the beginning of the document. + + An HTML encoding is declared in a tag, hopefully near the + beginning of the document. + """ + if search_entire_document: + xml_endpos = html_endpos = len(markup) + else: + xml_endpos = 1024 + html_endpos = max(2048, int(len(markup) * 0.05)) + + declared_encoding = None + declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) + if not declared_encoding_match and is_html: + declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) + if declared_encoding_match is not None: + declared_encoding = declared_encoding_match.groups()[0].decode( + 'ascii', 'replace') + if declared_encoding: + return declared_encoding.lower() + return None + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = {"macintosh": "mac-roman", + "x-sjis": "shift-jis"} + + ENCODINGS_WITH_SMART_QUOTES = [ + "windows-1252", + "iso-8859-1", + "iso-8859-2", + ] + + def __init__(self, markup, override_encodings=[], + smart_quotes_to=None, is_html=False, exclude_encodings=[]): + self.smart_quotes_to = smart_quotes_to + self.tried_encodings = [] + self.contains_replacement_characters = False + self.is_html = is_html + self.log = logging.getLogger(__name__) + self.detector = EncodingDetector( + markup, override_encodings, is_html, exclude_encodings) + + # Short-circuit if the data is in Unicode to begin with. + if isinstance(markup, str) or markup == '': + self.markup = markup + self.unicode_markup = str(markup) + self.original_encoding = None + return + + # The encoding detector may have stripped a byte-order mark. + # Use the stripped markup from this point on. + self.markup = self.detector.markup + + u = None + for encoding in self.detector.encodings: + markup = self.detector.markup + u = self._convert_from(encoding) + if u is not None: + break + + if not u: + # None of the encodings worked. As an absolute last resort, + # try them again with character replacement. + + for encoding in self.detector.encodings: + if encoding != "ascii": + u = self._convert_from(encoding, "replace") + if u is not None: + self.log.warning( + "Some characters could not be decoded, and were " + "replaced with REPLACEMENT CHARACTER." + ) + self.contains_replacement_characters = True + break + + # If none of that worked, we could at this point force it to + # ASCII, but that would destroy so much data that I think + # giving up is better. + self.unicode_markup = u + if not u: + self.original_encoding = None + + def _sub_ms_char(self, match): + """Changes a MS smart quote character to an XML or HTML + entity, or an ASCII character.""" + orig = match.group(1) + if self.smart_quotes_to == 'ascii': + sub = self.MS_CHARS_TO_ASCII.get(orig).encode() + else: + sub = self.MS_CHARS.get(orig) + if type(sub) == tuple: + if self.smart_quotes_to == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: + sub = '&'.encode() + sub[0].encode() + ';'.encode() + else: + sub = sub.encode() + return sub + + def _convert_from(self, proposed, errors="strict"): + proposed = self.find_codec(proposed) + if not proposed or (proposed, errors) in self.tried_encodings: + return None + self.tried_encodings.append((proposed, errors)) + markup = self.markup + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if (self.smart_quotes_to is not None + and proposed in self.ENCODINGS_WITH_SMART_QUOTES): + smart_quotes_re = b"([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) + + try: + #print "Trying to convert document to %s (errors=%s)" % ( + # proposed, errors) + u = self._to_unicode(markup, proposed, errors) + self.markup = u + self.original_encoding = proposed + except Exception as e: + #print "That didn't work!" + #print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _to_unicode(self, data, encoding, errors="strict"): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + return str(data, encoding, errors) + + @property + def declared_html_encoding(self): + if not self.is_html: + return None + return self.detector.declared_encoding + + def find_codec(self, charset): + value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) + or (charset and self._codec(charset.replace("-", ""))) + or (charset and self._codec(charset.replace("-", "_"))) + or (charset and charset.lower()) + or charset + ) + if value: + return value.lower() + return None + + def _codec(self, charset): + if not charset: + return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + + # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. + MS_CHARS = {b'\x80': ('euro', '20AC'), + b'\x81': ' ', + b'\x82': ('sbquo', '201A'), + b'\x83': ('fnof', '192'), + b'\x84': ('bdquo', '201E'), + b'\x85': ('hellip', '2026'), + b'\x86': ('dagger', '2020'), + b'\x87': ('Dagger', '2021'), + b'\x88': ('circ', '2C6'), + b'\x89': ('permil', '2030'), + b'\x8A': ('Scaron', '160'), + b'\x8B': ('lsaquo', '2039'), + b'\x8C': ('OElig', '152'), + b'\x8D': '?', + b'\x8E': ('#x17D', '17D'), + b'\x8F': '?', + b'\x90': '?', + b'\x91': ('lsquo', '2018'), + b'\x92': ('rsquo', '2019'), + b'\x93': ('ldquo', '201C'), + b'\x94': ('rdquo', '201D'), + b'\x95': ('bull', '2022'), + b'\x96': ('ndash', '2013'), + b'\x97': ('mdash', '2014'), + b'\x98': ('tilde', '2DC'), + b'\x99': ('trade', '2122'), + b'\x9a': ('scaron', '161'), + b'\x9b': ('rsaquo', '203A'), + b'\x9c': ('oelig', '153'), + b'\x9d': '?', + b'\x9e': ('#x17E', '17E'), + b'\x9f': ('Yuml', ''),} + + # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains + # horrors like stripping diacritical marks to turn á into a, but also + # contains non-horrors like turning “ into ". + MS_CHARS_TO_ASCII = { + b'\x80' : 'EUR', + b'\x81' : ' ', + b'\x82' : ',', + b'\x83' : 'f', + b'\x84' : ',,', + b'\x85' : '...', + b'\x86' : '+', + b'\x87' : '++', + b'\x88' : '^', + b'\x89' : '%', + b'\x8a' : 'S', + b'\x8b' : '<', + b'\x8c' : 'OE', + b'\x8d' : '?', + b'\x8e' : 'Z', + b'\x8f' : '?', + b'\x90' : '?', + b'\x91' : "'", + b'\x92' : "'", + b'\x93' : '"', + b'\x94' : '"', + b'\x95' : '*', + b'\x96' : '-', + b'\x97' : '--', + b'\x98' : '~', + b'\x99' : '(TM)', + b'\x9a' : 's', + b'\x9b' : '>', + b'\x9c' : 'oe', + b'\x9d' : '?', + b'\x9e' : 'z', + b'\x9f' : 'Y', + b'\xa0' : ' ', + b'\xa1' : '!', + b'\xa2' : 'c', + b'\xa3' : 'GBP', + b'\xa4' : '$', #This approximation is especially parochial--this is the + #generic currency symbol. + b'\xa5' : 'YEN', + b'\xa6' : '|', + b'\xa7' : 'S', + b'\xa8' : '..', + b'\xa9' : '', + b'\xaa' : '(th)', + b'\xab' : '<<', + b'\xac' : '!', + b'\xad' : ' ', + b'\xae' : '(R)', + b'\xaf' : '-', + b'\xb0' : 'o', + b'\xb1' : '+-', + b'\xb2' : '2', + b'\xb3' : '3', + b'\xb4' : ("'", 'acute'), + b'\xb5' : 'u', + b'\xb6' : 'P', + b'\xb7' : '*', + b'\xb8' : ',', + b'\xb9' : '1', + b'\xba' : '(th)', + b'\xbb' : '>>', + b'\xbc' : '1/4', + b'\xbd' : '1/2', + b'\xbe' : '3/4', + b'\xbf' : '?', + b'\xc0' : 'A', + b'\xc1' : 'A', + b'\xc2' : 'A', + b'\xc3' : 'A', + b'\xc4' : 'A', + b'\xc5' : 'A', + b'\xc6' : 'AE', + b'\xc7' : 'C', + b'\xc8' : 'E', + b'\xc9' : 'E', + b'\xca' : 'E', + b'\xcb' : 'E', + b'\xcc' : 'I', + b'\xcd' : 'I', + b'\xce' : 'I', + b'\xcf' : 'I', + b'\xd0' : 'D', + b'\xd1' : 'N', + b'\xd2' : 'O', + b'\xd3' : 'O', + b'\xd4' : 'O', + b'\xd5' : 'O', + b'\xd6' : 'O', + b'\xd7' : '*', + b'\xd8' : 'O', + b'\xd9' : 'U', + b'\xda' : 'U', + b'\xdb' : 'U', + b'\xdc' : 'U', + b'\xdd' : 'Y', + b'\xde' : 'b', + b'\xdf' : 'B', + b'\xe0' : 'a', + b'\xe1' : 'a', + b'\xe2' : 'a', + b'\xe3' : 'a', + b'\xe4' : 'a', + b'\xe5' : 'a', + b'\xe6' : 'ae', + b'\xe7' : 'c', + b'\xe8' : 'e', + b'\xe9' : 'e', + b'\xea' : 'e', + b'\xeb' : 'e', + b'\xec' : 'i', + b'\xed' : 'i', + b'\xee' : 'i', + b'\xef' : 'i', + b'\xf0' : 'o', + b'\xf1' : 'n', + b'\xf2' : 'o', + b'\xf3' : 'o', + b'\xf4' : 'o', + b'\xf5' : 'o', + b'\xf6' : 'o', + b'\xf7' : '/', + b'\xf8' : 'o', + b'\xf9' : 'u', + b'\xfa' : 'u', + b'\xfb' : 'u', + b'\xfc' : 'u', + b'\xfd' : 'y', + b'\xfe' : 'b', + b'\xff' : 'y', + } + + # A map used when removing rogue Windows-1252/ISO-8859-1 + # characters in otherwise UTF-8 documents. + # + # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in + # Windows-1252. + WINDOWS_1252_TO_UTF8 = { + 0x80 : b'\xe2\x82\xac', # € + 0x82 : b'\xe2\x80\x9a', # ‚ + 0x83 : b'\xc6\x92', # ƒ + 0x84 : b'\xe2\x80\x9e', # „ + 0x85 : b'\xe2\x80\xa6', # … + 0x86 : b'\xe2\x80\xa0', # † + 0x87 : b'\xe2\x80\xa1', # ‡ + 0x88 : b'\xcb\x86', # ˆ + 0x89 : b'\xe2\x80\xb0', # ‰ + 0x8a : b'\xc5\xa0', # Š + 0x8b : b'\xe2\x80\xb9', # ‹ + 0x8c : b'\xc5\x92', # Œ + 0x8e : b'\xc5\xbd', # Ž + 0x91 : b'\xe2\x80\x98', # ‘ + 0x92 : b'\xe2\x80\x99', # ’ + 0x93 : b'\xe2\x80\x9c', # “ + 0x94 : b'\xe2\x80\x9d', # ” + 0x95 : b'\xe2\x80\xa2', # • + 0x96 : b'\xe2\x80\x93', # – + 0x97 : b'\xe2\x80\x94', # — + 0x98 : b'\xcb\x9c', # ˜ + 0x99 : b'\xe2\x84\xa2', # ™ + 0x9a : b'\xc5\xa1', # š + 0x9b : b'\xe2\x80\xba', # › + 0x9c : b'\xc5\x93', # œ + 0x9e : b'\xc5\xbe', # ž + 0x9f : b'\xc5\xb8', # Ÿ + 0xa0 : b'\xc2\xa0', #   + 0xa1 : b'\xc2\xa1', # ¡ + 0xa2 : b'\xc2\xa2', # ¢ + 0xa3 : b'\xc2\xa3', # £ + 0xa4 : b'\xc2\xa4', # ¤ + 0xa5 : b'\xc2\xa5', # ¥ + 0xa6 : b'\xc2\xa6', # ¦ + 0xa7 : b'\xc2\xa7', # § + 0xa8 : b'\xc2\xa8', # ¨ + 0xa9 : b'\xc2\xa9', # © + 0xaa : b'\xc2\xaa', # ª + 0xab : b'\xc2\xab', # « + 0xac : b'\xc2\xac', # ¬ + 0xad : b'\xc2\xad', # ­ + 0xae : b'\xc2\xae', # ® + 0xaf : b'\xc2\xaf', # ¯ + 0xb0 : b'\xc2\xb0', # ° + 0xb1 : b'\xc2\xb1', # ± + 0xb2 : b'\xc2\xb2', # ² + 0xb3 : b'\xc2\xb3', # ³ + 0xb4 : b'\xc2\xb4', # ´ + 0xb5 : b'\xc2\xb5', # µ + 0xb6 : b'\xc2\xb6', # ¶ + 0xb7 : b'\xc2\xb7', # · + 0xb8 : b'\xc2\xb8', # ¸ + 0xb9 : b'\xc2\xb9', # ¹ + 0xba : b'\xc2\xba', # º + 0xbb : b'\xc2\xbb', # » + 0xbc : b'\xc2\xbc', # ¼ + 0xbd : b'\xc2\xbd', # ½ + 0xbe : b'\xc2\xbe', # ¾ + 0xbf : b'\xc2\xbf', # ¿ + 0xc0 : b'\xc3\x80', # À + 0xc1 : b'\xc3\x81', # Á + 0xc2 : b'\xc3\x82', #  + 0xc3 : b'\xc3\x83', # à + 0xc4 : b'\xc3\x84', # Ä + 0xc5 : b'\xc3\x85', # Å + 0xc6 : b'\xc3\x86', # Æ + 0xc7 : b'\xc3\x87', # Ç + 0xc8 : b'\xc3\x88', # È + 0xc9 : b'\xc3\x89', # É + 0xca : b'\xc3\x8a', # Ê + 0xcb : b'\xc3\x8b', # Ë + 0xcc : b'\xc3\x8c', # Ì + 0xcd : b'\xc3\x8d', # Í + 0xce : b'\xc3\x8e', # Î + 0xcf : b'\xc3\x8f', # Ï + 0xd0 : b'\xc3\x90', # Ð + 0xd1 : b'\xc3\x91', # Ñ + 0xd2 : b'\xc3\x92', # Ò + 0xd3 : b'\xc3\x93', # Ó + 0xd4 : b'\xc3\x94', # Ô + 0xd5 : b'\xc3\x95', # Õ + 0xd6 : b'\xc3\x96', # Ö + 0xd7 : b'\xc3\x97', # × + 0xd8 : b'\xc3\x98', # Ø + 0xd9 : b'\xc3\x99', # Ù + 0xda : b'\xc3\x9a', # Ú + 0xdb : b'\xc3\x9b', # Û + 0xdc : b'\xc3\x9c', # Ü + 0xdd : b'\xc3\x9d', # Ý + 0xde : b'\xc3\x9e', # Þ + 0xdf : b'\xc3\x9f', # ß + 0xe0 : b'\xc3\xa0', # à + 0xe1 : b'\xa1', # á + 0xe2 : b'\xc3\xa2', # â + 0xe3 : b'\xc3\xa3', # ã + 0xe4 : b'\xc3\xa4', # ä + 0xe5 : b'\xc3\xa5', # å + 0xe6 : b'\xc3\xa6', # æ + 0xe7 : b'\xc3\xa7', # ç + 0xe8 : b'\xc3\xa8', # è + 0xe9 : b'\xc3\xa9', # é + 0xea : b'\xc3\xaa', # ê + 0xeb : b'\xc3\xab', # ë + 0xec : b'\xc3\xac', # ì + 0xed : b'\xc3\xad', # í + 0xee : b'\xc3\xae', # î + 0xef : b'\xc3\xaf', # ï + 0xf0 : b'\xc3\xb0', # ð + 0xf1 : b'\xc3\xb1', # ñ + 0xf2 : b'\xc3\xb2', # ò + 0xf3 : b'\xc3\xb3', # ó + 0xf4 : b'\xc3\xb4', # ô + 0xf5 : b'\xc3\xb5', # õ + 0xf6 : b'\xc3\xb6', # ö + 0xf7 : b'\xc3\xb7', # ÷ + 0xf8 : b'\xc3\xb8', # ø + 0xf9 : b'\xc3\xb9', # ù + 0xfa : b'\xc3\xba', # ú + 0xfb : b'\xc3\xbb', # û + 0xfc : b'\xc3\xbc', # ü + 0xfd : b'\xc3\xbd', # ý + 0xfe : b'\xc3\xbe', # þ + } + + MULTIBYTE_MARKERS_AND_SIZES = [ + (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF + (0xe0, 0xef, 3), # 3-byte characters start with E0-EF + (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4 + ] + + FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0] + LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1] + + @classmethod + def detwingle(cls, in_bytes, main_encoding="utf8", + embedded_encoding="windows-1252"): + """Fix characters from one encoding embedded in some other encoding. + + Currently the only situation supported is Windows-1252 (or its + subset ISO-8859-1), embedded in UTF-8. + + The input must be a bytestring. If you've already converted + the document to Unicode, you're too late. + + The output is a bytestring in which `embedded_encoding` + characters have been converted to their `main_encoding` + equivalents. + """ + if embedded_encoding.replace('_', '-').lower() not in ( + 'windows-1252', 'windows_1252'): + raise NotImplementedError( + "Windows-1252 and ISO-8859-1 are the only currently supported " + "embedded encodings.") + + if main_encoding.lower() not in ('utf8', 'utf-8'): + raise NotImplementedError( + "UTF-8 is the only currently supported main encoding.") + + byte_chunks = [] + + chunk_start = 0 + pos = 0 + while pos < len(in_bytes): + byte = in_bytes[pos] + if not isinstance(byte, int): + # Python 2.x + byte = ord(byte) + if (byte >= cls.FIRST_MULTIBYTE_MARKER + and byte <= cls.LAST_MULTIBYTE_MARKER): + # This is the start of a UTF-8 multibyte character. Skip + # to the end. + for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES: + if byte >= start and byte <= end: + pos += size + break + elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8: + # We found a Windows-1252 character! + # Save the string up to this point as a chunk. + byte_chunks.append(in_bytes[chunk_start:pos]) + + # Now translate the Windows-1252 character into UTF-8 + # and add it as another, one-byte chunk. + byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte]) + pos += 1 + chunk_start = pos + else: + # Go on to the next character. + pos += 1 + if chunk_start == 0: + # The string is unchanged. + return in_bytes + else: + # Store the final chunk. + byte_chunks.append(in_bytes[chunk_start:]) + return b''.join(byte_chunks) + diff --git a/ext3/bs4/diagnose.py b/ext3/bs4/diagnose.py new file mode 100644 index 0000000000..f97c77992c --- /dev/null +++ b/ext3/bs4/diagnose.py @@ -0,0 +1,225 @@ +"""Diagnostic functions, mainly for use when doing tech support.""" + +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +import cProfile +from io import StringIO +from html.parser import HTMLParser +import bs4 +from bs4 import BeautifulSoup, __version__ +from bs4.builder import builder_registry + +import os +import pstats +import random +import tempfile +import time +import traceback +import sys +import cProfile + +def diagnose(data): + """Diagnostic suite for isolating common problems.""" + print("Diagnostic running on Beautiful Soup %s" % __version__) + print("Python version %s" % sys.version) + + basic_parsers = ["html.parser", "html5lib", "lxml"] + for name in basic_parsers: + for builder in builder_registry.builders: + if name in builder.features: + break + else: + basic_parsers.remove(name) + print(( + "I noticed that %s is not installed. Installing it may help." % + name)) + + if 'lxml' in basic_parsers: + basic_parsers.append("lxml-xml") + try: + from lxml import etree + print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) + except ImportError as e: + print ( + "lxml is not installed or couldn't be imported.") + + + if 'html5lib' in basic_parsers: + try: + import html5lib + print("Found html5lib version %s" % html5lib.__version__) + except ImportError as e: + print ( + "html5lib is not installed or couldn't be imported.") + + if hasattr(data, 'read'): + data = data.read() + elif data.startswith("http:") or data.startswith("https:"): + print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) + print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") + return + else: + try: + if os.path.exists(data): + print('"%s" looks like a filename. Reading data from the file.' % data) + with open(data) as fp: + data = fp.read() + except ValueError: + # This can happen on some platforms when the 'filename' is + # too long. Assume it's data and not a filename. + pass + print() + + for parser in basic_parsers: + print("Trying to parse your markup with %s" % parser) + success = False + try: + soup = BeautifulSoup(data, features=parser) + success = True + except Exception as e: + print("%s could not parse the markup." % parser) + traceback.print_exc() + if success: + print("Here's what %s did with the markup:" % parser) + print(soup.prettify()) + + print("-" * 80) + +def lxml_trace(data, html=True, **kwargs): + """Print out the lxml events that occur during parsing. + + This lets you see how lxml parses a document when no Beautiful + Soup code is running. + """ + from lxml import etree + for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + print(("%s, %4s, %s" % (event, element.tag, element.text))) + +class AnnouncingParser(HTMLParser): + """Announces HTMLParser parse events, without doing anything else.""" + + def _p(self, s): + print(s) + + def handle_starttag(self, name, attrs): + self._p("%s START" % name) + + def handle_endtag(self, name): + self._p("%s END" % name) + + def handle_data(self, data): + self._p("%s DATA" % data) + + def handle_charref(self, name): + self._p("%s CHARREF" % name) + + def handle_entityref(self, name): + self._p("%s ENTITYREF" % name) + + def handle_comment(self, data): + self._p("%s COMMENT" % data) + + def handle_decl(self, data): + self._p("%s DECL" % data) + + def unknown_decl(self, data): + self._p("%s UNKNOWN-DECL" % data) + + def handle_pi(self, data): + self._p("%s PI" % data) + +def htmlparser_trace(data): + """Print out the HTMLParser events that occur during parsing. + + This lets you see how HTMLParser parses a document when no + Beautiful Soup code is running. + """ + parser = AnnouncingParser() + parser.feed(data) + +_vowels = "aeiou" +_consonants = "bcdfghjklmnpqrstvwxyz" + +def rword(length=5): + "Generate a random word-like string." + s = '' + for i in range(length): + if i % 2 == 0: + t = _consonants + else: + t = _vowels + s += random.choice(t) + return s + +def rsentence(length=4): + "Generate a random sentence-like string." + return " ".join(rword(random.randint(4,9)) for i in range(length)) + +def rdoc(num_elements=1000): + """Randomly generate an invalid HTML document.""" + tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] + elements = [] + for i in range(num_elements): + choice = random.randint(0,3) + if choice == 0: + # New tag. + tag_name = random.choice(tag_names) + elements.append("<%s>" % tag_name) + elif choice == 1: + elements.append(rsentence(random.randint(1,4))) + elif choice == 2: + # Close a tag. + tag_name = random.choice(tag_names) + elements.append("" % tag_name) + return "" + "\n".join(elements) + "" + +def benchmark_parsers(num_elements=100000): + """Very basic head-to-head performance benchmark.""" + print("Comparative parser benchmark on Beautiful Soup %s" % __version__) + data = rdoc(num_elements) + print("Generated a large invalid HTML document (%d bytes)." % len(data)) + + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: + success = False + try: + a = time.time() + soup = BeautifulSoup(data, parser) + b = time.time() + success = True + except Exception as e: + print("%s could not parse the markup." % parser) + traceback.print_exc() + if success: + print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) + + from lxml import etree + a = time.time() + etree.HTML(data) + b = time.time() + print("Raw lxml parsed the markup in %.2fs." % (b-a)) + + import html5lib + parser = html5lib.HTMLParser() + a = time.time() + parser.parse(data) + b = time.time() + print("Raw html5lib parsed the markup in %.2fs." % (b-a)) + +def profile(num_elements=100000, parser="lxml"): + + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + # stats.strip_dirs() + stats.sort_stats("cumulative") + stats.print_stats('_html5lib|bs4', 50) + +if __name__ == '__main__': + diagnose(sys.stdin.read()) diff --git a/ext3/bs4/element.py b/ext3/bs4/element.py new file mode 100644 index 0000000000..d938905294 --- /dev/null +++ b/ext3/bs4/element.py @@ -0,0 +1,1885 @@ +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. +__license__ = "MIT" + +try: + from collections.abc import Callable # Python 3.6 +except ImportError as e: + from collections import Callable +import re +import shlex +import sys +import warnings +from bs4.dammit import EntitySubstitution + +DEFAULT_OUTPUT_ENCODING = "utf-8" +PY3K = (sys.version_info[0] > 2) + +whitespace_re = re.compile(r"\s+") + +def _alias(attr): + """Alias one attribute name to another for backward compatibility""" + @property + def alias(self): + return getattr(self, attr) + + @alias.setter + def alias(self): + return setattr(self, attr) + return alias + + +class NamespacedAttribute(str): + + def __new__(cls, prefix, name, namespace=None): + if name is None: + obj = str.__new__(cls, prefix) + elif prefix is None: + # Not really namespaced. + obj = str.__new__(cls, name) + else: + obj = str.__new__(cls, prefix + ":" + name) + obj.prefix = prefix + obj.name = name + obj.namespace = namespace + return obj + +class AttributeValueWithCharsetSubstitution(str): + """A stand-in object for a character encoding specified in HTML.""" + +class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'charset' attribute. + + When Beautiful Soup parses the markup '', the + value of the 'charset' attribute will be one of these objects. + """ + + def __new__(cls, original_value): + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + return encoding + + +class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): + """A generic stand-in for the value of a meta tag's 'content' attribute. + + When Beautiful Soup parses the markup: + + + The value of the 'content' attribute will be one of these objects. + """ + + CHARSET_RE = re.compile(r"((^|;)\s*charset=)([^;]*)", re.M) + + def __new__(cls, original_value): + match = cls.CHARSET_RE.search(original_value) + if match is None: + # No substitution necessary. + return str.__new__(str, original_value) + + obj = str.__new__(cls, original_value) + obj.original_value = original_value + return obj + + def encode(self, encoding): + def rewrite(match): + return match.group(1) + encoding + return self.CHARSET_RE.sub(rewrite, self.original_value) + +class HTMLAwareEntitySubstitution(EntitySubstitution): + + """Entity substitution rules that are aware of some HTML quirks. + + Specifically, the contents of + +Hello, world! + + +''' + soup = self.soup(html) + self.assertEqual("text/javascript", soup.find('script')['type']) + + def test_comment(self): + # Comments are represented as Comment objects. + markup = "

foobaz

" + self.assertSoupEquals(markup) + + soup = self.soup(markup) + comment = soup.find(text="foobar") + self.assertEqual(comment.__class__, Comment) + + # The comment is properly integrated into the tree. + foo = soup.find(text="foo") + self.assertEqual(comment, foo.next_element) + baz = soup.find(text="baz") + self.assertEqual(comment, baz.previous_element) + + def test_preserved_whitespace_in_pre_and_textarea(self): + """Whitespace must be preserved in
 and "
+        self.assertSoupEquals(pre_markup)
+        self.assertSoupEquals(textarea_markup)
+
+        soup = self.soup(pre_markup)
+        self.assertEqual(soup.pre.prettify(), pre_markup)
+
+        soup = self.soup(textarea_markup)
+        self.assertEqual(soup.textarea.prettify(), textarea_markup)
+
+        soup = self.soup("")
+        self.assertEqual(soup.textarea.prettify(), "")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "Inside a B tag"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "

A nested tag

" + self.assertSoupEquals(nested_b_tag) + + double_nested_b_tag = "

A doubly nested tag

" + self.assertSoupEquals(nested_b_tag) + + def test_nested_block_level_elements(self): + """Block elements can be nested.""" + soup = self.soup('

Foo

') + blockquote = soup.blockquote + self.assertEqual(blockquote.p.b.string, 'Foo') + self.assertEqual(blockquote.b.string, 'Foo') + + def test_correctly_nested_tables(self): + """One table can go inside another one.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + + def test_multivalued_attribute_on_html(self): + # html5lib uses a different API to set the attributes ot the + # tag. This has caused problems with multivalued + # attributes. + markup = '' + soup = self.soup(markup) + self.assertEqual(["a", "b"], soup.html['class']) + + def test_angle_brackets_in_attribute_values_are_escaped(self): + self.assertSoupEquals('', '') + + def test_strings_resembling_character_entity_references(self): + # "&T" and "&p" look like incomplete character entities, but they are + # not. + self.assertSoupEquals( + "

• AT&T is in the s&p 500

", + "

\u2022 AT&T is in the s&p 500

" + ) + + def test_entities_in_foreign_document_encoding(self): + # “ and ” are invalid numeric entities referencing + # Windows-1252 characters. - references a character common + # to Windows-1252 and Unicode, and ☃ references a + # character only found in Unicode. + # + # All of these entities should be converted to Unicode + # characters. + markup = "

“Hello” -☃

" + soup = self.soup(markup) + self.assertEqual("“Hello” -☃", soup.p.string) + + def test_entities_in_attributes_converted_to_unicode(self): + expect = '

' + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + self.assertSoupEquals('

', expect) + + def test_entities_in_text_converted_to_unicode(self): + expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + self.assertSoupEquals("

piñata

", expect) + + def test_quot_entity_converted_to_quotation_mark(self): + self.assertSoupEquals("

I said "good day!"

", + '

I said "good day!"

') + + def test_out_of_range_entity(self): + expect = "\N{REPLACEMENT CHARACTER}" + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + self.assertSoupEquals("�", expect) + + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("

\nfoo

") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + self.assertConnectedness(soup) + + def test_empty_element_tags(self): + """Verify consistent handling of empty-element tags, + no matter how they come in through the markup. + """ + self.assertSoupEquals('


', "


") + self.assertSoupEquals('


', "


") + + def test_head_tag_between_head_and_body(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + foo + +""" + soup = self.soup(content) + self.assertNotEqual(None, soup.html.body) + self.assertConnectedness(soup) + + def test_multiple_copies_of_a_tag(self): + "Prevent recurrence of a bug in the html5lib treebuilder." + content = """ + + + + + +""" + soup = self.soup(content) + self.assertConnectedness(soup.article) + + def test_basic_namespaces(self): + """Parsers don't need to *understand* namespaces, but at the + very least they should not choke on namespaces or lose + data.""" + + markup = b'4' + soup = self.soup(markup) + self.assertEqual(markup, soup.encode()) + html = soup.html + self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) + self.assertEqual( + 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) + self.assertEqual( + 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) + + def test_multivalued_attribute_value_becomes_list(self): + markup = b'' + soup = self.soup(markup) + self.assertEqual(['foo', 'bar'], soup.a['class']) + + # + # Generally speaking, tests below this point are more tests of + # Beautiful Soup than tests of the tree builders. But parsers are + # weird, so we run these tests separately for every tree builder + # to detect any differences between them. + # + + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.body.string) + + def test_soupstrainer(self): + """Parsers should be able to work with SoupStrainers.""" + strainer = SoupStrainer("b") + soup = self.soup("A bold statement", + parse_only=strainer) + self.assertEqual(soup.decode(), "bold") + + def test_single_quote_attribute_values_become_double_quotes(self): + self.assertSoupEquals("", + '') + + def test_attribute_values_with_nested_quotes_are_left_alone(self): + text = """a""" + self.assertSoupEquals(text) + + def test_attribute_values_with_double_nested_quotes_get_quoted(self): + text = """a""" + soup = self.soup(text) + soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' + self.assertSoupEquals( + soup.foo.decode(), + """a""") + + def test_ampersand_in_attribute_value_gets_escaped(self): + self.assertSoupEquals('', + '') + + self.assertSoupEquals( + 'foo', + 'foo') + + def test_escaped_ampersand_in_attribute_value_is_left_alone(self): + self.assertSoupEquals('') + + def test_entities_in_strings_converted_during_parsing(self): + # Both XML and HTML entities are converted to Unicode characters + # during parsing. + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" + self.assertSoupEquals(text, expected) + + def test_smart_quotes_converted_on_the_way_in(self): + # Microsoft smart quotes are converted to Unicode characters during + # parsing. + quote = b"

\x91Foo\x92

" + soup = self.soup(quote) + self.assertEqual( + soup.p.string, + "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") + + def test_non_breaking_spaces_converted_on_the_way_in(self): + soup = self.soup("  ") + self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) + + def test_entities_converted_on_the_way_out(self): + text = "

<<sacré bleu!>>

" + expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") + soup = self.soup(text) + self.assertEqual(soup.p.encode("utf-8"), expected) + + def test_real_iso_latin_document(self): + # Smoke test of interrelated functionality, using an + # easy-to-understand document. + + # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. + unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' + + # That's because we're going to encode it into ISO-Latin-1, and use + # that to test. + iso_latin_html = unicode_html.encode("iso-8859-1") + + # Parse the ISO-Latin-1 HTML. + soup = self.soup(iso_latin_html) + # Encode it to UTF-8. + result = soup.encode("utf-8") + + # What do we expect the result to look like? Well, it would + # look like unicode_html, except that the META tag would say + # UTF-8 instead of ISO-Latin-1. + expected = unicode_html.replace("ISO-Latin-1", "utf-8") + + # And, of course, it would be in UTF-8, not Unicode. + expected = expected.encode("utf-8") + + # Ta-da! + self.assertEqual(result, expected) + + def test_real_shift_jis_document(self): + # Smoke test to make sure the parser can handle a document in + # Shift-JIS encoding, without choking. + shift_jis_html = ( + b'
'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'
') + unicode_html = shift_jis_html.decode("shift-jis") + soup = self.soup(unicode_html) + + # Make sure the parse tree is correctly encoded to various + # encodings. + self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) + self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) + + def test_real_hebrew_document(self): + # A real-world test to make sure we can convert ISO-8859-9 (a + # Hebrew encoding) to UTF-8. + hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' + soup = self.soup( + hebrew_document, from_encoding="iso8859-8") + # Some tree builders call it iso8859-8, others call it iso-8859-9. + # That's not a difference we really care about. + assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') + self.assertEqual( + soup.encode('utf-8'), + hebrew_document.decode("iso8859-8").encode("utf-8")) + + def test_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) + content = parsed_meta['content'] + self.assertEqual('text/html; charset=x-sjis', content) + + # But that value is actually a ContentMetaAttributeValue object. + self.assertTrue(isinstance(content, ContentMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('text/html; charset=utf8', content.encode("utf8")) + + # For the rest of the story, see TestSubstitutions in + # test_tree.py. + + def test_html5_style_meta_tag_reflects_current_encoding(self): + # Here's the tag saying that a document is + # encoded in Shift-JIS. + meta_tag = ('') + + # Here's a document incorporating that meta tag. + shift_jis_html = ( + '\n%s\n' + '' + 'Shift-JIS markup goes here.') % meta_tag + soup = self.soup(shift_jis_html) + + # Parse the document, and the charset is seemingly unaffected. + parsed_meta = soup.find('meta', id="encoding") + charset = parsed_meta['charset'] + self.assertEqual('x-sjis', charset) + + # But that value is actually a CharsetMetaAttributeValue object. + self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) + + # And it will take on a value that reflects its current + # encoding. + self.assertEqual('utf8', charset.encode("utf8")) + + def test_tag_with_no_attributes_can_have_attributes_added(self): + data = self.soup("text") + data.a['foo'] = 'bar' + self.assertEqual('text', data.a.decode()) + +class XMLTreeBuilderSmokeTest(object): + + def test_pickle_and_unpickle_identity(self): + # Pickling a tree, then unpickling it, yields a tree identical + # to the original. + tree = self.soup("foo") + dumped = pickle.dumps(tree, 2) + loaded = pickle.loads(dumped) + self.assertEqual(loaded.__class__, BeautifulSoup) + self.assertEqual(loaded.decode(), tree.decode()) + + def test_docstring_generated(self): + soup = self.soup("") + self.assertEqual( + soup.encode(), b'\n') + + def test_xml_declaration(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_processing_instruction(self): + markup = b"""\n""" + soup = self.soup(markup) + self.assertEqual(markup, soup.encode("utf8")) + + def test_real_xhtml_document(self): + """A real XHTML document should come out *exactly* the same as it went in.""" + markup = b""" + + +Hello. +Goodbye. +""" + soup = self.soup(markup) + self.assertEqual( + soup.encode("utf-8"), markup) + + def test_nested_namespaces(self): + doc = b""" + + + + + +""" + soup = self.soup(doc) + self.assertEqual(doc, soup.encode()) + + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + +""" + soup = BeautifulSoup(doc, "lxml-xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' + soup = self.soup(markup) + self.assertEqual('Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + str(soup.rss), markup) + + def test_docstring_includes_correct_encoding(self): + soup = self.soup("") + self.assertEqual( + soup.encode("latin1"), + b'\n') + + def test_large_xml_document(self): + """A large XML document should come out the same as it went in.""" + markup = (b'\n' + + b'0' * (2**12) + + b'') + soup = self.soup(markup) + self.assertEqual(soup.encode("utf-8"), markup) + + + def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): + self.assertSoupEquals("

", "

") + self.assertSoupEquals("

foo

") + + def test_namespaces_are_preserved(self): + markup = 'This tag is in the a namespaceThis tag is in the b namespace' + soup = self.soup(markup) + root = soup.root + self.assertEqual("http://example.com/", root['xmlns:a']) + self.assertEqual("http://example.net/", root['xmlns:b']) + + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(str(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = 'bar' + soup = self.soup(markup) + self.assertEqual(str(soup.foo), markup) + + def test_find_by_prefixed_name(self): + doc = """ +foo + bar + baz + +""" + soup = self.soup(doc) + + # There are three tags. + self.assertEqual(3, len(soup.find_all('tag'))) + + # But two of them are ns1:tag and one of them is ns2:tag. + self.assertEqual(2, len(soup.find_all('ns1:tag'))) + self.assertEqual(1, len(soup.find_all('ns2:tag'))) + + self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) + self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) + + def test_copy_tag_preserves_namespace(self): + xml = """ +""" + + soup = self.soup(xml) + tag = soup.document + duplicate = copy.copy(tag) + + # The two tags have the same namespace prefix. + self.assertEqual(tag.prefix, duplicate.prefix) + + +class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): + """Smoke test for a tree builder that supports HTML5.""" + + def test_real_xhtml_document(self): + # Since XHTML is not HTML5, HTML5 parsers are not tested to handle + # XHTML documents in any particular way. + pass + + def test_html_tags_have_namespace(self): + markup = "" + soup = self.soup(markup) + self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) + + def test_svg_tags_have_namespace(self): + markup = '' + soup = self.soup(markup) + namespace = "http://www.w3.org/2000/svg" + self.assertEqual(namespace, soup.svg.namespace) + self.assertEqual(namespace, soup.circle.namespace) + + + def test_mathml_tags_have_namespace(self): + markup = '5' + soup = self.soup(markup) + namespace = 'http://www.w3.org/1998/Math/MathML' + self.assertEqual(namespace, soup.math.namespace) + self.assertEqual(namespace, soup.msqrt.namespace) + + def test_xml_declaration_becomes_comment(self): + markup = '' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) + +def skipIf(condition, reason): + def nothing(test, *args, **kwargs): + return None + + def decorator(test_item): + if condition: + return nothing + else: + return test_item + + return decorator diff --git a/ext3/bs4/tests/__init__.py b/ext3/bs4/tests/__init__.py new file mode 100644 index 0000000000..142c8cc3f1 --- /dev/null +++ b/ext3/bs4/tests/__init__.py @@ -0,0 +1 @@ +"The beautifulsoup tests." diff --git a/ext3/bs4/tests/test_builder_registry.py b/ext3/bs4/tests/test_builder_registry.py new file mode 100644 index 0000000000..90cad82933 --- /dev/null +++ b/ext3/bs4/tests/test_builder_registry.py @@ -0,0 +1,147 @@ +"""Tests of the builder registry.""" + +import unittest +import warnings + +from bs4 import BeautifulSoup +from bs4.builder import ( + builder_registry as registry, + HTMLParserTreeBuilder, + TreeBuilderRegistry, +) + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError: + HTML5LIB_PRESENT = False + +try: + from bs4.builder import ( + LXMLTreeBuilderForXML, + LXMLTreeBuilder, + ) + LXML_PRESENT = True +except ImportError: + LXML_PRESENT = False + + +class BuiltInRegistryTest(unittest.TestCase): + """Test the built-in registry with the default builders registered.""" + + def test_combination(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('fast', 'html'), + LXMLTreeBuilder) + + if LXML_PRESENT: + self.assertEqual(registry.lookup('permissive', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('strict', 'html'), + HTMLParserTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib', 'html'), + HTML5TreeBuilder) + + def test_lookup_by_markup_type(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) + self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) + else: + self.assertEqual(registry.lookup('xml'), None) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) + else: + self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) + + def test_named_library(self): + if LXML_PRESENT: + self.assertEqual(registry.lookup('lxml', 'xml'), + LXMLTreeBuilderForXML) + self.assertEqual(registry.lookup('lxml', 'html'), + LXMLTreeBuilder) + if HTML5LIB_PRESENT: + self.assertEqual(registry.lookup('html5lib'), + HTML5TreeBuilder) + + self.assertEqual(registry.lookup('html.parser'), + HTMLParserTreeBuilder) + + def test_beautifulsoup_constructor_does_lookup(self): + + with warnings.catch_warnings(record=True) as w: + # This will create a warning about not explicitly + # specifying a parser, but we'll ignore it. + + # You can pass in a string. + BeautifulSoup("", features="html") + # Or a list of strings. + BeautifulSoup("", features=["html", "fast"]) + + # You'll get an exception if BS can't find an appropriate + # builder. + self.assertRaises(ValueError, BeautifulSoup, + "", features="no-such-feature") + +class RegistryTest(unittest.TestCase): + """Test the TreeBuilderRegistry class in general.""" + + def setUp(self): + self.registry = TreeBuilderRegistry() + + def builder_for_features(self, *feature_list): + cls = type('Builder_' + '_'.join(feature_list), + (object,), {'features' : feature_list}) + + self.registry.register(cls) + return cls + + def test_register_with_no_features(self): + builder = self.builder_for_features() + + # Since the builder advertises no features, you can't find it + # by looking up features. + self.assertEqual(self.registry.lookup('foo'), None) + + # But you can find it by doing a lookup with no features, if + # this happens to be the only registered builder. + self.assertEqual(self.registry.lookup(), builder) + + def test_register_with_features_makes_lookup_succeed(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('foo'), builder) + self.assertEqual(self.registry.lookup('bar'), builder) + + def test_lookup_fails_when_no_builder_implements_feature(self): + builder = self.builder_for_features('foo', 'bar') + self.assertEqual(self.registry.lookup('baz'), None) + + def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): + builder1 = self.builder_for_features('foo') + builder2 = self.builder_for_features('bar') + self.assertEqual(self.registry.lookup(), builder2) + + def test_lookup_fails_when_no_tree_builders_registered(self): + self.assertEqual(self.registry.lookup(), None) + + def test_lookup_gets_most_recent_builder_supporting_all_features(self): + has_one = self.builder_for_features('foo') + has_the_other = self.builder_for_features('bar') + has_both_early = self.builder_for_features('foo', 'bar', 'baz') + has_both_late = self.builder_for_features('foo', 'bar', 'quux') + lacks_one = self.builder_for_features('bar') + has_the_other = self.builder_for_features('foo') + + # There are two builders featuring 'foo' and 'bar', but + # the one that also features 'quux' was registered later. + self.assertEqual(self.registry.lookup('foo', 'bar'), + has_both_late) + + # There is only one builder featuring 'foo', 'bar', and 'baz'. + self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), + has_both_early) + + def test_lookup_fails_when_cannot_reconcile_requested_features(self): + builder1 = self.builder_for_features('foo', 'bar') + builder2 = self.builder_for_features('foo', 'baz') + self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/ext3/bs4/tests/test_docs.py b/ext3/bs4/tests/test_docs.py new file mode 100644 index 0000000000..5b9f677093 --- /dev/null +++ b/ext3/bs4/tests/test_docs.py @@ -0,0 +1,36 @@ +"Test harness for doctests." + +# pylint: disable-msg=E0611,W0142 + +__metaclass__ = type +__all__ = [ + 'additional_tests', + ] + +import atexit +import doctest +import os +#from pkg_resources import ( +# resource_filename, resource_exists, resource_listdir, cleanup_resources) +import unittest + +DOCTEST_FLAGS = ( + doctest.ELLIPSIS | + doctest.NORMALIZE_WHITESPACE | + doctest.REPORT_NDIFF) + + +# def additional_tests(): +# "Run the doc tests (README.txt and docs/*, if any exist)" +# doctest_files = [ +# os.path.abspath(resource_filename('bs4', 'README.txt'))] +# if resource_exists('bs4', 'docs'): +# for name in resource_listdir('bs4', 'docs'): +# if name.endswith('.txt'): +# doctest_files.append( +# os.path.abspath( +# resource_filename('bs4', 'docs/%s' % name))) +# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) +# atexit.register(cleanup_resources) +# return unittest.TestSuite(( +# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/ext3/bs4/tests/test_html5lib.py b/ext3/bs4/tests/test_html5lib.py new file mode 100644 index 0000000000..81fb7d3b28 --- /dev/null +++ b/ext3/bs4/tests/test_html5lib.py @@ -0,0 +1,130 @@ +"""Tests to ensure that the html5lib tree builder generates good trees.""" + +import warnings + +try: + from bs4.builder import HTML5TreeBuilder + HTML5LIB_PRESENT = True +except ImportError as e: + HTML5LIB_PRESENT = False +from bs4.element import SoupStrainer +from bs4.testing import ( + HTML5TreeBuilderSmokeTest, + SoupTest, + skipIf, +) + +@skipIf( + not HTML5LIB_PRESENT, + "html5lib seems not to be present, not testing its tree builder.") +class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): + """See ``HTML5TreeBuilderSmokeTest``.""" + + @property + def default_builder(self): + return HTML5TreeBuilder() + + def test_soupstrainer(self): + # The html5lib tree builder does not support SoupStrainers. + strainer = SoupStrainer("b") + markup = "

A bold statement.

" + with warnings.catch_warnings(record=True) as w: + soup = self.soup(markup, parse_only=strainer) + self.assertEqual( + soup.decode(), self.document_for(markup)) + + self.assertTrue( + "the html5lib tree builder doesn't support parse_only" in + str(w[0].message)) + + def test_correctly_nested_tables(self): + """html5lib inserts tags where other parsers don't.""" + markup = ('' + '' + "') + + self.assertSoupEquals( + markup, + '
Here's another table:" + '' + '' + '
foo
Here\'s another table:' + '
foo
' + '
') + + self.assertSoupEquals( + "" + "" + "
Foo
Bar
Baz
") + + def test_xml_declaration_followed_by_doctype(self): + markup = ''' + + + + + +

foo

+ +''' + soup = self.soup(markup) + # Verify that we can reach the

tag; this means the tree is connected. + self.assertEqual(b"

foo

", soup.p.encode()) + + def test_reparented_markup(self): + markup = '

foo

\n

bar

' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + + def test_reparented_markup_ends_with_whitespace(self): + markup = '

foo

\n

bar

\n' + soup = self.soup(markup) + self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) + self.assertEqual(2, len(soup.find_all('p'))) + + def test_reparented_markup_containing_identical_whitespace_nodes(self): + """Verify that we keep the two whitespace nodes in this + document distinct when reparenting the adjacent tags. + """ + markup = '
' + soup = self.soup(markup) + space1, space2 = soup.find_all(string=' ') + tbody1, tbody2 = soup.find_all('tbody') + assert space1.next_element is tbody1 + assert tbody2.next_element is space2 + + def test_reparented_markup_containing_children(self): + markup = '' + soup = self.soup(markup) + noscript = soup.noscript + self.assertEqual("target", noscript.next_element) + target = soup.find(string='target') + + # The 'aftermath' string was duplicated; we want the second one. + final_aftermath = soup.find_all(string='aftermath')[-1] + + # The