diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index f1de048ea4..6b9fab5d7c 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -29,10 +29,10 @@ jobs:
           # we don't know what commit the last tag was it's safer to get entire repo so previousStableVersion resolves
           fetch-depth: 0
 
-      - name: Set up python 2
-        uses: actions/setup-python@v2
+      - name: Set up python 3
+        uses: actions/setup-python@v3
         with:
-          python-version: '2.x'
+          python-version: '3.x'
           architecture: 'x64'
 
       - name: Set up JDK ${{ matrix.jdk }}
@@ -79,4 +79,4 @@ jobs:
           name: play-${{ github.sha }}
           if-no-files-found: error
           path: |
-            ./framework/dist/*
\ No newline at end of file
+            ./framework/dist/*
diff --git a/framework/pym/play/application.py b/framework/pym/play/application.py
index cb79af335f..644d041708 100644
--- a/framework/pym/play/application.py
+++ b/framework/pym/play/application.py
@@ -1,3 +1,6 @@
+from __future__ import print_function
+from builtins import input
+from builtins import object
 import sys
 import os
 import os.path
@@ -24,23 +27,23 @@ def __init__(self, application_path, env, ignoreMissingModules = False):
         # only parse conf it is exists - if it should be there, it will be caught later 
         # (depends on command)
         confExists = os.path.exists(os.path.join(self.path, 'conf', 'application.conf')); 
-        if application_path is not None and confExists:
+        if application_path != None and confExists:
             confFolder = os.path.join(application_path, 'conf/')
             try:
                 self.conf = PlayConfParser(confFolder, env)
             except Exception as err:
-                print "~ Failed to parse application configuration", err
+                print("~ Failed to parse application configuration", err)
                 self.conf = None # No app / Invalid app
         else:
             self.conf = None
         self.play_env = env
 
-        if env.has_key('jpda.port'):
+        if 'jpda.port' in env:
             self.jpda_port = env['jpda.port']
         else:
             self.jpda_port = self.readConf('jpda.port')
 
-        if env.has_key('jpda.address'):
+        if 'jpda.address' in env:
             self.jpda_address = env['jpda.address']
         else:
             self.jpda_address = self.readConf('jpda.address')
@@ -54,9 +57,9 @@ def check(self):
             assert os.path.exists(os.path.join(self.path, 'conf', 'routes'))
             assert os.path.exists(os.path.join(self.path, 'conf', 'application.conf'))
         except AssertionError:
-            print "~ Oops. conf/routes or conf/application.conf missing."
-            print "~ %s does not seem to host a valid application." % os.path.normpath(self.path)
-            print "~"
+            print("~ Oops. conf/routes or conf/application.conf missing.")
+            print("~ %s does not seem to host a valid application." % os.path.normpath(self.path))
+            print("~")
             sys.exit(-1)
 
     def readConf(self, key):
@@ -78,20 +81,20 @@ def modules(self):
             application_mode = "dev"
         if application_mode == 'dev':
             #Load docviewer module
-			modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer')))
+	        modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer')))
 			
         for m in self.readConfs('module.'):
             if '${play.path}' in m:
                 m = m.replace('${play.path}', self.play_env["basedir"])
-            if m[0] is not '/':
+            if m[0] != '/':
                 m = os.path.normpath(os.path.join(self.path, m))
             if not os.path.exists(m) and not self.ignoreMissingModules:
-                print "~ Oops,"
-                print "~ Module not found: %s" % (m)
-                print "~"
+                print("~ Oops,")
+                print("~ Module not found: %s" % (m))
+                print("~")
                 if m.startswith('${play.path}/modules'):
-                    print "~ You can try to install the missing module using 'play install %s'" % (m[21:])
-                    print "~"
+                    print("~ You can try to install the missing module using 'play install %s'" % (m[21:]))
+                    print("~")
                 sys.exit(-1)
             modules.append(m)
         if self.path and os.path.exists(os.path.join(self.path, 'modules')):
@@ -108,7 +111,7 @@ def modules(self):
         return set(modules) # Ensure we don't have duplicates
 
     def module_names(self):
-        return map(lambda x: x[7:],self.conf.getAllKeys("module."))
+        return [x[7:] for x in self.conf.getAllKeys("module.")]
 
     def override(self, f, t):
         fromFile = None
@@ -116,18 +119,18 @@ def override(self, f, t):
             pc = os.path.join(module, f)
             if os.path.exists(pc): fromFile = pc
         if not fromFile:
-            print "~ %s not found in any module" % f
-            print "~ "
+            print("~ %s not found in any module" % f)
+            print("~ ")
             sys.exit(-1)
         toFile = os.path.join(self.path, t)
         if os.path.exists(toFile):
-            response = raw_input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile)
+            response = input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile)
             if not response == 'y':
                 return
         if not os.path.exists(os.path.dirname(toFile)):
             os.makedirs(os.path.dirname(toFile))
         shutil.copyfile(fromFile, toFile)
-        print "~ Copied %s to %s " % (fromFile, toFile)
+        print("~ Copied %s to %s " % (fromFile, toFile))
 
     def name(self):
         return self.readConf("application.name")
@@ -207,15 +210,15 @@ def fw_cp_args(self):
         return cp_args
 
     def pid_path(self):
-        if self.play_env.has_key('pid_file'):
+        if 'pid_file' in self.play_env:
             return os.path.join(self.path, self.play_env['pid_file'])
-        elif os.environ.has_key('PLAY_PID_PATH'):
+        elif 'PLAY_PID_PATH' in os.environ:
             return os.environ['PLAY_PID_PATH']
         else:
             return os.path.join(self.path, 'server.pid')
 
     def log_path(self):
-        if not os.environ.has_key('PLAY_LOG_PATH'):
+        if 'PLAY_LOG_PATH' not in os.environ:
             log_path = os.path.join(self.path, 'logs')
         else:
             log_path = os.environ['PLAY_LOG_PATH']
@@ -231,12 +234,12 @@ def check_jpda(self):
             else:
                 s.bind((self.jpda_address, int(self.jpda_port)))
             s.close()
-        except socket.error, e:
+        except socket.error as e:
             if "disable_random_jpda" in self.play_env and self.play_env["disable_random_jpda"]:
-                print 'JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port
+                print('JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port)
                 sys.exit(-1)
             else:
-                print 'JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port
+                print('JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port)
                 self.jpda_port = 0
 
     def java_args_memory(self, java_args):
@@ -272,29 +275,29 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args
         if cp_args is None:
             cp_args = self.cp_args()
 
-        if self.play_env.has_key('jpda.port'):
+        if 'jpda.port' in self.play_env:
             self.jpda_port = self.play_env['jpda.port']
 
-        if self.play_env.has_key('jpda.address'):
+        if 'jpda.address' in self.play_env:
             self.jpda_address = self.play_env['jpda.address']
 
         application_mode = self.readConf('application.mode').lower()
         if not application_mode:
-            print "~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode."
+            print("~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode.")
             application_mode = "dev"
 
 
         if application_mode == 'prod':
             java_args.append('-server')
 
-        if self.play_env.has_key('jvm_version'):
+        if 'jvm_version' in self.play_env:
             javaVersion = self.play_env['jvm_version']
         else:
             javaVersion = getJavaVersion() 
-        print "~ using java version \"%s\"" % javaVersion
+        print("~ using java version \"%s\"" % javaVersion)
         
         if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"):
-            print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion
+            print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion)
             
         java_args.append('-noverify')
 
@@ -302,13 +305,13 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args
         if java_policy != '':
             policyFile = os.path.join(self.path, 'conf', java_policy)
             if os.path.exists(policyFile):
-                print "~ using policy file \"%s\"" % policyFile
+                print("~ using policy file \"%s\"" % policyFile)
                 java_args.append('-Djava.security.manager')
                 java_args.append('-Djava.security.policy==%s' % policyFile)
 
-        if self.play_env.has_key('http.port'):
+        if 'http.port' in self.play_env:
             args += ["--http.port=%s" % self.play_env['http.port']]
-        if self.play_env.has_key('https.port'):
+        if 'https.port' in self.play_env:
             args += ["--https.port=%s" % self.play_env['https.port']]
             
         java_args.append('-Dfile.encoding=utf-8')
@@ -345,7 +348,7 @@ def _absoluteToRelative(path, start):
         return os.path.curdir
     return os.path.join(*rel_list)
 
-class PlayConfParser:
+class PlayConfParser(object):
 
     DEFAULTS = {
         'http.port': '9000',
@@ -355,15 +358,15 @@ class PlayConfParser:
     def __init__(self, confFolder, env):
         self.id = env["id"]
         self.entries = self.readFile(confFolder, "application.conf")
-        if env.has_key('jpda.port'):
+        if 'jpda.port' in env:
             self.entries['jpda.port'] = env['jpda.port']
-        if env.has_key('http.port'):
+        if 'http.port' in env:
             self.entries['http.port'] = env['http.port']
-        if env.has_key('jvm_version'):
+        if 'jvm_version' in env:
             self.entries['jvm_version'] = env['jvm_version']
 
     def readFile(self, confFolder, filename):
-        f = file(confFolder + filename)
+        f = open(confFolder + filename, 'r')
         result = dict()
         for line in f:
             linedef = line.strip()
@@ -382,12 +385,12 @@ def readFile(self, confFolder, filename):
         washedResult = dict()
         
         # first get all keys with correct framework id
-        for (key, value) in result.items():
+        for (key, value) in list(result.items()):
             if key.startswith('%' + self.id + '.'):
                 stripedKey = key[(len(self.id)+2):]
                 washedResult[stripedKey]=value
         # now get all without framework id if we don't already have it
-        for (key, value) in result.items():
+        for (key, value) in list(result.items()):
             if not key.startswith('%'):
                 # check if we already have it
                 if not (key in washedResult):
@@ -396,7 +399,7 @@ def readFile(self, confFolder, filename):
                     
         # find all @include
         includeFiles = []
-        for (key, value) in washedResult.items():
+        for (key, value) in list(washedResult.items()):
             if key.startswith('@include.'):
                 includeFiles.append(value)
                 
@@ -407,10 +410,10 @@ def readFile(self, confFolder, filename):
                 fromIncludeFile = self.readFile(confFolder, self._expandValue(includeFile))
 
                 # add everything from include file 
-                for (key, value) in fromIncludeFile.items():
+                for (key, value) in list(fromIncludeFile.items()):
                     washedResult[key]=value
             except Exception as err:
-                print "~ Failed to load included configuration %s: %s" % (includeFile, err)
+                print("~ Failed to load included configuration %s: %s" % (includeFile, err))
         
         return washedResult
 
@@ -423,7 +426,7 @@ def get(self, key):
 
     def getAllKeys(self, query):
         result = []
-        for (key, value) in self.entries.items():
+        for (key, value) in list(self.entries.items()):
             if key.startswith(query):
                 result.append(key)
         return result
diff --git a/framework/pym/play/cmdloader.py b/framework/pym/play/cmdloader.py
index fa4406cecd..459f6850fc 100644
--- a/framework/pym/play/cmdloader.py
+++ b/framework/pym/play/cmdloader.py
@@ -1,6 +1,8 @@
+from __future__ import print_function
 import imp
 import os
 import warnings
+import traceback
 
 def play_formatwarning(msg, *a):
     # ignore everything except the message
@@ -9,7 +11,7 @@ def play_formatwarning(msg, *a):
 
 warnings.formatwarning = play_formatwarning
 
-class CommandLoader:
+class CommandLoader(object):
     def __init__(self, play_path):
         self.path = os.path.join(play_path, 'framework', 'pym', 'play', 'commands')
         self.commands = {}
@@ -23,7 +25,9 @@ def load_core(self):
                     name = filename.replace(".py", "")
                     mod = load_python_module(name, self.path)
                     self._load_cmd_from(mod)
-                except:
+                except Exception as e:
+                    print (e)
+                    traceback.print_exc()
                     warnings.warn("!! Warning: could not load core command file " + filename, RuntimeWarning)
 
     def load_play_module(self, modname):
@@ -33,10 +37,10 @@ def load_play_module(self, modname):
                 leafname = os.path.basename(modname).split('.')[0]
                 mod = imp.load_source(leafname, os.path.join(modname, "commands.py"))
                 self._load_cmd_from(mod)
-            except Exception, e:
-                print '~'
-                print '~ !! Error while loading %s: %s' % (commands, e)
-                print '~'
+            except Exception as e:
+                print('~')
+                print('~ !! Error while loading %s: %s' % (commands, e))
+                print('~')
                 pass # No command to load in this module
 
     def _load_cmd_from(self, mod):
@@ -57,6 +61,6 @@ def load_python_module(name, location):
     try:
         return imp.load_module(name, mod_desc[0], mod_desc[1], mod_desc[2])
     finally:
-        if mod_file is not None and not mod_file.closed:
+        if mod_file != None and not mod_file.closed:
             mod_file.close()
 
diff --git a/framework/pym/play/commands/ant.py b/framework/pym/play/commands/ant.py
index 366415701c..5cba882638 100644
--- a/framework/pym/play/commands/ant.py
+++ b/framework/pym/play/commands/ant.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import os, os.path
 import shutil
 import time
@@ -20,6 +21,6 @@ def execute(**kargs):
     
     shutil.copyfile(os.path.join(play_env["basedir"], 'resources/build.xml'), os.path.join(app.path, 'build.xml'))
     
-    print "~ OK, a build.xml file has been created"
-    print "~ Define the PLAY_PATH env property, and use it with ant run|start|stop"
-    print "~"
+    print("~ OK, a build.xml file has been created")
+    print("~ Define the PLAY_PATH env property, and use it with ant run|start|stop")
+    print("~")
diff --git a/framework/pym/play/commands/autotest.py b/framework/pym/play/commands/autotest.py
index 3b082bd8fa..22702c8e36 100644
--- a/framework/pym/play/commands/autotest.py
+++ b/framework/pym/play/commands/autotest.py
@@ -1,9 +1,10 @@
+from __future__ import print_function
 # Command related to execution: auto-test
 
 import sys
 import os, os.path
 import shutil
-import urllib, urllib2
+import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
 import subprocess
 import webbrowser
 import time
@@ -28,14 +29,14 @@ def execute(**kargs):
         
 def autotest(app, args):
     app.check()
-    print "~ Running in test mode"
-    print "~ Ctrl+C to stop"
-    print "~ "
+    print("~ Running in test mode")
+    print("~ Ctrl+C to stop")
+    print("~ ")
 
-    print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp'))
+    print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp')))
     if os.path.exists(os.path.join(app.path, 'tmp')):
         shutil.rmtree(os.path.join(app.path, 'tmp'))
-    print "~"
+    print("~")
 
     # Kill if exists
     http_port = 9000
@@ -46,16 +47,16 @@ def autotest(app, args):
     else:
         http_port = app.readConf('http.port')
     try:
-        proxy_handler = urllib2.ProxyHandler({})
-        opener = urllib2.build_opener(proxy_handler)
+        proxy_handler = urllib.request.ProxyHandler({})
+        opener = urllib.request.build_opener(proxy_handler)
         opener.open('http://localhost:%s/@kill' % http_port)
-    except Exception, e:
+    except Exception as e:
         pass
 
     # Do not run the app if SSL is configured and no cert store is configured
     keystore = app.readConf('keystore.file')
     if protocol == 'https' and not keystore:
-      print "https without keystore configured. play auto-test will fail. Exiting now."
+      print("https without keystore configured. play auto-test will fail. Exiting now.")
       sys.exit(-1)
       
     # read parameters
@@ -94,25 +95,25 @@ def autotest(app, args):
     try:
         play_process = subprocess.Popen(java_cmd, env=os.environ, stdout=sout)
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
     soutint = open(os.path.join(app.log_path(), 'system.out'), 'r')
     while True:
         if play_process.poll():
-            print "~"
-            print "~ Oops, application has not started?"
-            print "~"
+            print("~")
+            print("~ Oops, application has not started?")
+            print("~")
             sys.exit(-1)
         line = soutint.readline().strip()
         if line:
-            print line
+            print(line)
             if line.find('Server is up and running') > -1: # This line is written out by Server.java to system.out and is not log file dependent
                 soutint.close()
                 break
 
     # Run FirePhoque
-    print "~"
-    print "~ Starting FirePhoque..."
+    print("~")
+    print("~ Starting FirePhoque...")
 
     headless_browser = ''
     if app.readConf('headlessBrowser'):
@@ -134,25 +135,25 @@ def autotest(app, args):
     try:
         subprocess.call(java_cmd, env=os.environ)
     except OSError:
-        print "Could not execute the headless browser. "
+        print("Could not execute the headless browser. ")
         sys.exit(-1)
 
-    print "~"
+    print("~")
     time.sleep(1)
     
     # Kill if exists
     try:
-        proxy_handler = urllib2.ProxyHandler({})
-        opener = urllib2.build_opener(proxy_handler)
+        proxy_handler = urllib.request.ProxyHandler({})
+        opener = urllib.request.build_opener(proxy_handler)
         opener.open('%s://localhost:%s/@kill' % (protocol, http_port))
-    except Exception, e:
+    except Exception as e:
         pass
  
     if os.path.exists(os.path.join(app.path, 'test-result/result.passed')):
-        print "~ All tests passed"
-        print "~"
+        print("~ All tests passed")
+        print("~")
         testspassed = True
     if os.path.exists(os.path.join(app.path, 'test-result/result.failed')):
-        print "~ Some tests have failed. See file://%s for results" % test_result
-        print "~"
+        print("~ Some tests have failed. See file://%s for results" % test_result)
+        print("~")
         sys.exit(1)
diff --git a/framework/pym/play/commands/base.py b/framework/pym/play/commands/base.py
index d427f3b30d..7d21047f86 100644
--- a/framework/pym/play/commands/base.py
+++ b/framework/pym/play/commands/base.py
@@ -1,11 +1,14 @@
+from __future__ import print_function
 # Command related to creation and execution: run, new, clean
 
+from builtins import input
+from builtins import str
 import sys
 import os
 import subprocess
 import shutil
 import getopt
-import urllib2
+import urllib.request, urllib.error, urllib.parse
 import webbrowser
 import time
 import signal
@@ -54,14 +57,14 @@ def new(app, args, env, cmdloader=None):
                 withModules = a.split(',')
             if o in ('--name'):
                 application_name = a
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ Sorry, unrecognized option"
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ Sorry, unrecognized option")
+        print("~ ")
         sys.exit(-1)
     if os.path.exists(app.path):
-        print "~ Oops. %s already exists" % app.path
-        print "~"
+        print("~ Oops. %s already exists" % app.path)
+        print("~")
         sys.exit(-1)
 
     md = []
@@ -76,16 +79,16 @@ def new(app, args, env, cmdloader=None):
                     break
         
         if not dirname:
-            print "~ Oops. No module %s found" % m
-            print "~ Try to install it using 'play install %s'" % m
-            print "~"
+            print("~ Oops. No module %s found" % m)
+            print("~ Try to install it using 'play install %s'" % m)
+            print("~")
             sys.exit(-1)
 
         md.append(dirname)
 
-    print "~ The new application will be created in %s" % os.path.normpath(app.path)
+    print("~ The new application will be created in %s" % os.path.normpath(app.path))
     if application_name is None:
-        application_name = raw_input("~ What is the application name? [%s] " % os.path.basename(app.path))
+        application_name = input("~ What is the application name? [%s] " % os.path.basename(app.path))
     if application_name == "":
         application_name = os.path.basename(app.path)
     copy_directory(os.path.join(env["basedir"], 'resources/application-skel'), app.path)
@@ -94,7 +97,7 @@ def new(app, args, env, cmdloader=None):
     app.check()
     replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%APPLICATION_NAME%', application_name)
     replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%SECRET_KEY%', secretKey())
-    print "~"
+    print("~")
 
     # Configure modules 
     for m in md:
@@ -110,10 +113,10 @@ def new(app, args, env, cmdloader=None):
                 
     cmdloader.commands['dependencies'].execute(command='dependencies', app=app, args=['--sync'], env=env, cmdloader=cmdloader)
 
-    print "~ OK, the application is created."
-    print "~ Start it with : play run %s" % sys.argv[2]
-    print "~ Have fun!"
-    print "~"
+    print("~ OK, the application is created.")
+    print("~ Start it with : play run %s" % sys.argv[2])
+    print("~ Have fun!")
+    print("~")
 
 process = None
 
@@ -131,19 +134,19 @@ def handle_sigint(signum, frame):
     if 'process' in globals():
         if first_sigint:
             # Prefix with new line because ^C usually appears on the terminal
-            print "\nTerminating Java process"
+            print("\nTerminating Java process")
             process.terminate()
             first_sigint = False
         else:
-            print "\nKilling Java process"
+            print("\nKilling Java process")
             process.kill()
         
 def run(app, args):
     global process
     app.check()
     
-    print "~ Ctrl+C to stop"
-    print "~ "
+    print("~ Ctrl+C to stop")
+    print("~ ")
     java_cmd = app.java_cmd(args)
     try:
         process = subprocess.Popen (java_cmd, env=os.environ)
@@ -153,46 +156,46 @@ def run(app, args):
         if 0 != return_code:
             sys.exit(return_code)
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
-    print
+    print()
 
 def clean(app):
     app.check()
     tmp = app.readConf('play.tmp')
     if tmp is None or not tmp.strip():
         tmp = 'tmp'
-    print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp))
+    print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp)))
     if os.path.exists(os.path.join(app.path, tmp)):
         shutil.rmtree(os.path.join(app.path, tmp))
-    print "~"
+    print("~")
 
 def show_modules(app, args):
     app.check()
     modules = app.modules()
     if len(modules):
-        print "~ Application modules are:"
-        print "~ "
+        print("~ Application modules are:")
+        print("~ ")
         for module in modules:
-            print "~ %s" % module
+            print("~ %s" % module)
     else:
-        print "~ No modules installed in this application"
-    print "~ "
+        print("~ No modules installed in this application")
+    print("~ ")
     sys.exit(0)
 
 def id(play_env):
     if not play_env["id"]:
-        print "~ framework ID is not set"
-    new_id = raw_input("~ What is the new framework ID (or blank to unset)? ")
+        print("~ framework ID is not set")
+    new_id = input("~ What is the new framework ID (or blank to unset)? ")
     if new_id:
-        print "~"
-        print "~ OK, the framework ID is now %s" % new_id
-        print "~"
+        print("~")
+        print("~ OK, the framework ID is now %s" % new_id)
+        print("~")
         open(play_env["id_file"], 'w').write(new_id)
     else:
-        print "~"
-        print "~ OK, the framework ID is unset"
-        print "~"
+        print("~")
+        print("~ OK, the framework ID is unset")
+        print("~")
         if os.path.exists(play_env["id_file"]):
             os.remove(play_env["id_file"])
 
@@ -203,13 +206,13 @@ def kill(pid):
         import ctypes
         handle = ctypes.windll.kernel32.OpenProcess(1, False, int(pid))
         if not ctypes.windll.kernel32.TerminateProcess(handle, 0):
-            print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())
-            print "~ "
+            print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()))
+            print("~ ")
             sys.exit(-1)
     else:
         try:
             os.kill(int(pid), 15)
         except OSError:
-            print "~ Play was not running (Process id %s not found)" % pid
-            print "~"
+            print("~ Play was not running (Process id %s not found)" % pid)
+            print("~")
             sys.exit(-1)
diff --git a/framework/pym/play/commands/check.py b/framework/pym/play/commands/check.py
index 5ec55595a2..e176cd2379 100644
--- a/framework/pym/play/commands/check.py
+++ b/framework/pym/play/commands/check.py
@@ -1,6 +1,10 @@
+from __future__ import print_function
+from builtins import str
+from builtins import range
+from builtins import object
 import os, os.path
 import shutil
-import urllib, urllib2
+import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
 import simplejson as json
 
 from play.utils import *
@@ -27,23 +31,23 @@ def execute(**kargs):
     releases = allreleases()
 
     if len(releases) == 0:
-        print "~ No release found."
+        print("~ No release found.")
     elif current == max(releases):
-        print "~ You are using the latest version."
+        print("~ You are using the latest version.")
     else:
-        print "~  \tLatest release: " + str(max(releases))
-        print "~  \tYour version  : " + str(current)
-        print "~"
-        print "~ Latest release download: " + max(releases).url()
+        print("~  \tLatest release: " + str(max(releases)))
+        print("~  \tYour version  : " + str(current))
+        print("~")
+        print("~ Latest release download: " + max(releases).url())
 
-    print "~"
+    print("~")
 
 
 def allreleases():
     try:
-        req = urllib2.Request(TAGS_URL)
+        req = urllib.request.Request(TAGS_URL)
         req.add_header('Accept', 'application/json')
-        opener = urllib2.build_opener()
+        opener = urllib.request.build_opener()
         result = opener.open(req)  
         jsonObject = json.loads(result.read())    
         releases = []
@@ -51,18 +55,18 @@ def allreleases():
             releases.append(Release(tagObj["name"]))
         
         return releases
-    except urllib2.HTTPError, e:
-        print "~ Oops,"
-        print "~ Cannot contact github..."
-        print "~"
+    except urllib.error.HTTPError as e:
+        print("~ Oops,")
+        print("~ Cannot contact github...")
+        print("~")
         sys.exit(-1)
-    except urllib2.URLError, e:
-        print "~ Oops,"
-        print "~ Cannot contact github..."
-        print "~"
+    except urllib.error.URLError as e:
+        print("~ Oops,")
+        print("~ Cannot contact github...")
+        print("~")
         sys.exit(-1)
 
-class Release:
+class Release(object):
 
     # TODO: Be smarter at analysing the rest (ex: RC1 vs RC2)
     def __init__(self, strversion):
@@ -73,7 +77,7 @@ def __init__(self, strversion):
             self.numpart = ''
         self.rest = strversion.replace(self.numpart, "")
         try:
-            self.versions = map(lambda x: int(x), self.numpart.split("."))
+            self.versions = [int(x) for x in self.numpart.split(".")]
         except:
             self.versions = [0,0]
         if not self.rest: self.rest = "Z"
diff --git a/framework/pym/play/commands/classpath.py b/framework/pym/play/commands/classpath.py
index a887c9a4de..b249fe2dd8 100644
--- a/framework/pym/play/commands/classpath.py
+++ b/framework/pym/play/commands/classpath.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 # Show the computed classpath for the application
 
 COMMANDS = ['cp', 'classpath']
@@ -10,7 +11,7 @@ def execute(**kargs):
     command = kargs.get("command")
     app = kargs.get("app")
     args = kargs.get("args")
-    print "~ Computed classpath is:"
-    print "~ "
-    print app.getClasspath()
-    print "~ "
+    print("~ Computed classpath is:")
+    print("~ ")
+    print(app.getClasspath())
+    print("~ ")
diff --git a/framework/pym/play/commands/daemon.py b/framework/pym/play/commands/daemon.py
index 9f66612af3..f6b2512790 100644
--- a/framework/pym/play/commands/daemon.py
+++ b/framework/pym/play/commands/daemon.py
@@ -1,3 +1,6 @@
+from __future__ import print_function
+from builtins import str
+from builtins import range
 import errno
 import os
 import os.path
@@ -44,12 +47,12 @@ def start(app, args):
     if os.path.exists(app.pid_path()):
         pid = open(app.pid_path()).readline().strip()
         if process_running(pid):
-            print "~ Oops. %s is already started (pid:%s)! (or delete %s)" % (
-                os.path.normpath(app.path), pid, os.path.normpath(app.pid_path()))
-            print "~"
+            print("~ Oops. %s is already started (pid:%s)! (or delete %s)" % (
+                os.path.normpath(app.path), pid, os.path.normpath(app.pid_path())))
+            print("~")
             sys.exit(1)
         else:
-            print "~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid)
+            print("~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid))
             os.remove(app.pid_path())
 
     sysout = app.readConf('application.log.system.out')
@@ -61,35 +64,35 @@ def start(app, args):
     try:
         pid = subprocess.Popen(app.java_cmd(args), stdout=sout, env=os.environ).pid
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
-    print "~ OK, %s is started" % os.path.normpath(app.path)
+    print("~ OK, %s is started" % os.path.normpath(app.path))
     if sysout:
-        print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))
+        print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')))
     pid_file = open(app.pid_path(), 'w')
     pid_file.write(str(pid))
-    print "~ pid is %s" % pid
-    print "~"
+    print("~ pid is %s" % pid)
+    print("~")
 
 
 def stop(app):
     app.check()
     if not os.path.exists(app.pid_path()):
-        print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)
-        print "~"
+        print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path))
+        print("~")
         sys.exit(-1)
     pid = open(app.pid_path()).readline().strip()
     kill(pid)
     os.remove(app.pid_path())
-    print "~ OK, %s is stopped" % app.path
-    print "~"
+    print("~ OK, %s is stopped" % app.path)
+    print("~")
 
 
 def restart(app, args):
     app.check()
     if not os.path.exists(app.pid_path()):
-        print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)
-        print "~"
+        print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path))
+        print("~")
     else:
         pid = open(app.pid_path()).readline().strip()
         os.remove(app.pid_path())
@@ -105,34 +108,34 @@ def restart(app, args):
     try:
         pid = subprocess.Popen(java_cmd, stdout=sout, env=os.environ).pid
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
-    print "~ OK, %s is restarted" % os.path.normpath(app.path)
+    print("~ OK, %s is restarted" % os.path.normpath(app.path))
     if sysout:
-        print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))
+        print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')))
     pid_file = open(app.pid_path(), 'w')
     pid_file.write(str(pid))
-    print "~ New pid is %s" % pid
-    print "~"
+    print("~ New pid is %s" % pid)
+    print("~")
     sys.exit(0)
 
 
 def pid(app):
     app.check()
     if not os.path.exists(app.pid_path()):
-        print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)
-        print "~"
+        print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path))
+        print("~")
         sys.exit(-1)
     pid = open(app.pid_path()).readline().strip()
-    print "~ PID of the running applications is %s" % pid
-    print "~ "
+    print("~ PID of the running applications is %s" % pid)
+    print("~ ")
 
 
 def out(app):
     app.check()
     if not os.path.exists(os.path.join(app.log_path(), 'system.out')):
-        print "~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))
-        print "~"
+        print("~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out')))
+        print("~")
         sys.exit(-1)
     sout = open(os.path.join(app.log_path(), 'system.out'), 'r')
     try:
@@ -146,7 +149,7 @@ def out(app):
             time.sleep(1)
             sout.seek(where)
         else:
-            print line
+            print(line)
 
 
 def kill(pid):
@@ -156,16 +159,16 @@ def kill(pid):
         process = ctypes.windll.kernel32.TerminateProcess(handle, 0)
         ctypes.windll.kernel32.CloseHandle(handle)
         if not process:
-            print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())
-            print "~ "
+            print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()))
+            print("~ ")
             sys.exit(-1)
-        print "~ Process with PID %s terminated" % pid
+        print("~ Process with PID %s terminated" % pid)
     else:
         try:
             _terminate_unix_process_if_exists(int(pid))
         except OSError:
-            print "~ Play was not running (Process id %s not found)" % pid
-            print "~"
+            print("~ Play was not running (Process id %s not found)" % pid)
+            print("~")
             sys.exit(-1)
 
 
@@ -211,8 +214,8 @@ def process_list_nt():
         else:
             proc_dict[instance] = 0
     idProcessLocalizedName = win32pdhutil.find_pdh_counter_localized_name("ID Process")
-    for instance, max_instances in proc_dict.items():
-        for inum in xrange(max_instances + 1):
+    for instance, max_instances in list(proc_dict.items()):
+        for inum in range(max_instances + 1):
             hq = win32pdh.OpenQuery()  # initializes the query handle
             path = win32pdh.MakeCounterPath((None, processLocalizedName, instance, None, inum, idProcessLocalizedName))
             counter_handle = win32pdh.AddCounter(hq, path)
diff --git a/framework/pym/play/commands/deps.py b/framework/pym/play/commands/deps.py
index d82da2b62f..2d2b91d046 100644
--- a/framework/pym/play/commands/deps.py
+++ b/framework/pym/play/commands/deps.py
@@ -1,6 +1,7 @@
+from __future__ import print_function
 import os, os.path
 import shutil
-import urllib, urllib2
+import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
 import subprocess
 import simplejson as json
 
@@ -57,14 +58,14 @@ def execute(**kargs):
         add_options.append('-Dclearcache')
     if args.count('--jpda'):
         args.remove('--jpda')
-        print "~ Waiting for JPDA client to continue"
+        print("~ Waiting for JPDA client to continue")
         add_options.append('-Xdebug')
         add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port)
     for arg in args:
         if arg.startswith("-D"):
             add_options.append(arg)
         elif not arg.startswith('-Xm'):
-            print "~ WARNING: " + arg + " argument will be skipped"    
+            print("~ WARNING: " + arg + " argument will be skipped")    
 
     java_cmd = [java_path()] + add_options + args_memory + ['-classpath', app.fw_cp_args(), 'play.deps.DependenciesManager']
     try:
@@ -72,5 +73,5 @@ def execute(**kargs):
         if 0 != return_code:
             sys.exit(return_code);
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
diff --git a/framework/pym/play/commands/eclipse.py b/framework/pym/play/commands/eclipse.py
index 93211c16f2..01a6eb1b82 100644
--- a/framework/pym/play/commands/eclipse.py
+++ b/framework/pym/play/commands/eclipse.py
@@ -1,3 +1,5 @@
+from __future__ import print_function
+from builtins import str
 import os, os.path
 import shutil
 import time
@@ -30,9 +32,9 @@ def execute(**kargs):
     
     javaVersion = getJavaVersion()
     
-    print "~ using java version \"%s\"" % javaVersion
+    print("~ using java version \"%s\"" % javaVersion)
     if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"):
-        print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion
+        print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion)
             
     vm_arguments = vm_arguments +' -noverify'
 
@@ -77,7 +79,7 @@ def execute(**kargs):
         # pointers to source jars produced by 'play deps'
         src_file = os.path.join(lib_src, os.path.basename(el) + '.src')
         if os.path.exists(src_file):
-            f = file(src_file)
+            f = open(src_file, 'r')
             cpJarToSource[el] = f.readline().rstrip()
             f.close()
 
@@ -93,10 +95,10 @@ def execute(**kargs):
             if el == playJarPath:
                 cpXML += '<classpathentry kind="lib" path="%s" sourcepath="%s" />\n\t' % (os.path.normpath(el) , playSourcePath)
             else:
-                if cpJarToSource.has_key(el):
+                if el in cpJarToSource:
                     cpXML += '<classpathentry kind="lib" path="%s" sourcepath="%s"/>\n\t' % (os.path.normpath(el), cpJarToSource[el])
                 else:
-                    if javadocLocation.has_key(el):
+                    if el in javadocLocation:
                         cpXML += '<classpathentry kind="lib" path="%s">\n\t\t' % os.path.normpath(el)
                         cpXML += '<attributes>\n\t\t\t'
                         f = file(javadocLocation[el])
@@ -156,11 +158,11 @@ def execute(**kargs):
         os.rename(os.path.join(app.path, 'eclipse/debug.launch'), os.path.join(app.path, 'eclipse/%s.launch' % application_name))
    
     if is_application:
-        print "~ OK, the application \"%s\" is ready for eclipse" % application_name
+        print("~ OK, the application \"%s\" is ready for eclipse" % application_name)
     else:
-        print "~ OK, the module \"%s\" is ready for eclipse" % application_name
-    print "~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path)
-    print "~"
-    print "~ Use eclipsify again when you want to update eclipse configuration files."
-    print "~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches..."
-    print "~"
+        print("~ OK, the module \"%s\" is ready for eclipse" % application_name)
+    print("~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path))
+    print("~")
+    print("~ Use eclipsify again when you want to update eclipse configuration files.")
+    print("~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches...")
+    print("~")
diff --git a/framework/pym/play/commands/evolutions.py b/framework/pym/play/commands/evolutions.py
index acf5eb26bc..27449296ab 100644
--- a/framework/pym/play/commands/evolutions.py
+++ b/framework/pym/play/commands/evolutions.py
@@ -1,6 +1,7 @@
+from __future__ import print_function
 import os, os.path
 import shutil
-import urllib, urllib2
+import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse
 import subprocess
 import simplejson as json
 
@@ -36,7 +37,7 @@ def execute(**kargs):
 
     add_options = ['-Dapplication.path=%s' % (app.path), '-Dframework.path=%s' % (play_env['basedir']), '-Dplay.id=%s' % play_env['id'], '-Dplay.version=%s' % play_env['version']]
     if args.count('--jpda'):
-        print "~ Waiting for JPDA client to continue"
+        print("~ Waiting for JPDA client to continue")
         args.remove('--jpda')
         add_options.append('-Xdebug')
         add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port)
@@ -52,5 +53,5 @@ def execute(**kargs):
         if 0 != return_code:
             sys.exit(return_code);
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
diff --git a/framework/pym/play/commands/help.py b/framework/pym/play/commands/help.py
index 821428240b..2f4e4efa73 100644
--- a/framework/pym/play/commands/help.py
+++ b/framework/pym/play/commands/help.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 # Display help
 
 import sys, os, re
@@ -19,7 +20,7 @@ def execute(**kargs):
         cmd = args[0]
         help_file = os.path.join(play_env["basedir"], 'documentation', 'commands', 'cmd-%s.txt' % cmd)
         if os.path.exists(help_file):
-            print open(help_file, 'r').read()
+            print(open(help_file, 'r').read())
         else:
             exists = False
             slugCmd = re.sub('[-\s]+', '-', re.sub('[^\w\s-]', '', cmd.encode('ascii', 'ignore')).strip().lower())
@@ -30,38 +31,38 @@ def execute(**kargs):
                           % slugCmd)
                 exists = os.path.exists(help_file)
                 if exists:
-                    print open(help_file, 'r').read()
+                    print(open(help_file, 'r').read())
                     break
             if not exists:
-                print '~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd
-                print '~'
+                print('~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd)
+                print('~')
                 sys.exit(-1)
     else:
         main_help(cmdloader.commands, play_env)
 
 def main_help(commands, play_env):
     modules_commands = []
-    print "~ For all commands, if the application is not specified, the current directory is used"
-    print "~ Use 'play help cmd' to get more help on a specific command"
-    print "~"
-    print "~ Core commands:"
-    print "~ ~~~~~~~~~~~~~~"
+    print("~ For all commands, if the application is not specified, the current directory is used")
+    print("~ Use 'play help cmd' to get more help on a specific command")
+    print("~")
+    print("~ Core commands:")
+    print("~ ~~~~~~~~~~~~~~")
     for cmd in sorted(commands):
         if not isCore(commands[cmd], play_env):
             modules_commands.append(cmd)
             continue
         if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP:
-            print "~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd]
+            print("~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd])
     if len(modules_commands) > 0:
-        print "~"
-        print "~ Modules commands:"
-        print "~ ~~~~~~~~~~~~~~~~~"
+        print("~")
+        print("~ Modules commands:")
+        print("~ ~~~~~~~~~~~~~~~~~")
         for cmd in modules_commands:
             if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP:
-                print "~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd]
-    print "~"
-    print "~ Also refer to documentation at https://www.playframework.com/documentation"
-    print "~"
+                print("~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd])
+    print("~")
+    print("~ Also refer to documentation at https://www.playframework.com/documentation")
+    print("~")
 
 def isCore(mod, play_env):
     path = os.path.realpath(mod.__file__)
diff --git a/framework/pym/play/commands/intellij.py b/framework/pym/play/commands/intellij.py
index a5f1e675dc..4c61e9c7d6 100644
--- a/framework/pym/play/commands/intellij.py
+++ b/framework/pym/play/commands/intellij.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import os, os.path
 import shutil
 
@@ -66,7 +67,7 @@ def execute(**kargs):
         replaceAll(iprFile, r'%PROJECT_NAME%', application_name)
 
 
-    print "~ OK, the application is ready for Intellij Idea"
-    print "~ Use File, Open Project... to open \"" + application_name + ".ipr\""
-    print "~"
+    print("~ OK, the application is ready for Intellij Idea")
+    print("~ Use File, Open Project... to open \"" + application_name + ".ipr\"")
+    print("~")
 
diff --git a/framework/pym/play/commands/javadoc.py b/framework/pym/play/commands/javadoc.py
index dcde0319cc..b366c6dbbf 100644
--- a/framework/pym/play/commands/javadoc.py
+++ b/framework/pym/play/commands/javadoc.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import os, os.path
 import shutil
 import subprocess
@@ -19,7 +20,7 @@ def execute(**kargs):
     args = kargs.get("args")
     play_env = kargs.get("env")
 
-    if not os.environ.has_key('JAVA_HOME'):
+    if 'JAVA_HOME' not in os.environ:
         javadoc_path = "javadoc"
     else:
         javadoc_path = os.path.normpath("%s/bin/javadoc" % os.environ['JAVA_HOME'])
@@ -38,7 +39,7 @@ def execute(**kargs):
     defineJavadocFiles(app, outdir)
     javadoc_cmd = [javadoc_path, '@'+os.path.join(outdir,'javadocOptions'), '@'+os.path.join(outdir,'javadocFiles')]
     
-    print "Generating Javadoc in " + outdir + "..."
+    print("Generating Javadoc in " + outdir + "...")
     return_code = subprocess.call(javadoc_cmd, env=os.environ, stdout=sout, stderr=serr)
 
     # Remove configuration file
@@ -47,10 +48,10 @@ def execute(**kargs):
     
     # Display the status
     if return_code != 0:
-        print "Unable to create Javadocs.  See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors."
+        print("Unable to create Javadocs.  See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors.")
         sys.exit(return_code)
 
-    print "Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser."
+    print("Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser.")
 
 
 
@@ -69,21 +70,21 @@ def defineJavadocOptions(app, outdir, args):
         f.write(' -footer "<b>' +  app.readConf('application.name') + '</b>"')
   
     if args.count('--links'):
-        print "~ Build project Javadoc with links to :"
+        print("~ Build project Javadoc with links to :")
         args.remove('--links')
         # Add link to JavaDoc of JAVA
         
         javaVersion = getJavaVersion()
-        print "~ using java version \"%s\"" % javaVersion
+        print("~ using java version \"%s\"" % javaVersion)
         if javaVersion.startswith("1.5"):
-            print "~    Java(TM) Platform, Platform Standard Edition 5.0"        
-            print "~    Java(TM) EE 5 Specification APIs"
+            print("~    Java(TM) Platform, Platform Standard Edition 5.0")        
+            print("~    Java(TM) EE 5 Specification APIs")
             f.write(' -link http://docs.oracle.com/javase/1.5.0/docs/api/')
             f.write(' -link http://docs.oracle.com/javaee/5/api/')   
         else:
             urlVersion = javaVersion[2:3]
-            print "~    Java(TM) Platform, Standard Edition " + urlVersion + " API Specification"        
-            print "~    Java(TM) EE " + urlVersion + " Specification APIs"
+            print("~    Java(TM) Platform, Standard Edition " + urlVersion + " API Specification")        
+            print("~    Java(TM) EE " + urlVersion + " Specification APIs")
             f.write(' -link http://docs.oracle.com/javase/' + urlVersion + '/docs/api/')
             f.write(' -link http://docs.oracle.com/javaee/' + urlVersion + '/api/')         
      
@@ -91,10 +92,10 @@ def defineJavadocOptions(app, outdir, args):
         # Add link to JavaDoc of Play Framework
         playVersion = app.play_env['version']
         if "localbuild" in playVersion:
-            print "~    API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION
+            print("~    API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION)
             playVersion = DEFAULT_API_VERSION
 
-        print "~    Play Framework V" + playVersion + " API documentation"     
+        print("~    Play Framework V" + playVersion + " API documentation")     
         f.write(' -link https://www.playframework.com/documentation/' + playVersion + '/api/')
 
    
diff --git a/framework/pym/play/commands/modulesrepo.py b/framework/pym/play/commands/modulesrepo.py
index d8de6670fe..c05dbd125d 100644
--- a/framework/pym/play/commands/modulesrepo.py
+++ b/framework/pym/play/commands/modulesrepo.py
@@ -1,14 +1,20 @@
+from __future__ import print_function
+from __future__ import division
+from builtins import str
+from builtins import input
+from builtins import range
+from builtins import object
 import os
 import subprocess
 import sys
 import re
 import zipfile
-import urllib2
+import urllib.request, urllib.error, urllib.parse
 import shutil
 import string
 import imp
 import time
-import urllib
+import urllib.request, urllib.parse, urllib.error
 import yaml
 
 from play.utils import *
@@ -78,12 +84,12 @@ class Downloader(object):
     before = .0
     history = []
     cycles = 0
-    average = lambda self: sum(self.history) / (len(self.history) or 1)
+    average = lambda self: sum(self.history) // (len(self.history) or 1)
 
     def __init__(self, width=55):
         self.width = width
-        self.kibi = lambda bits: bits / 2 ** 10
-        self.proc = lambda a, b: a / (b * 0.01)
+        self.kibi = lambda bits: bits // (2 ** 10)
+        self.proc = lambda a, b: a // (b * 0.01)
 
     def retrieve(self, url, destination, callback=None):
         self.size = 0
@@ -92,12 +98,12 @@ def retrieve(self, url, destination, callback=None):
           headers={'User-Agent':DEFAULT_USER_AGENT,
                   'Accept': 'application/json'
           } 
-          req = urllib2.Request(url, headers=headers)
-          result = urllib2.urlopen(req)
+          req = urllib.request.Request(url, headers=headers)
+          result = urllib.request.urlopen(req)
           self.chunk_read(result, destination, report_hook=self.chunk_report)        
         except KeyboardInterrupt:
-            print '\n~ Download cancelled'
-            print '~'
+            print('\n~ Download cancelled')
+            print('~')
             for i in range(5):
                 try:
                     os.remove(destination)
@@ -107,7 +113,7 @@ def retrieve(self, url, destination, callback=None):
             else: raise
             if callback: callback()
             sys.exit()
-        print ''
+        print('')
         return self.size
 
     def chunk_read(self, response, destination, chunk_size=8192, report_hook=None):
@@ -151,13 +157,13 @@ def progress(self, bytes_so_far, blocksize, filesize):
             now = time.clock()
             elapsed = now-self.before
             if elapsed:
-                speed = self.kibi(blocksize * 3 / elapsed)
+                speed = self.kibi(blocksize * 3 // elapsed)
                 self.history.append(speed)
                 self.history = self.history[-4:]
             self.before = now
-        average = round(sum(self.history[-4:]) / 4, 1)
+        average = round(sum(self.history[-4:]) // 4, 1)
         self.size = self.kibi(bits)
-        print '\r~ [%s] %s KiB/s  ' % (bar, str(average)),
+        print('\r~ [%s] %s KiB/s  ' % (bar, str(average)), end=' ')
 
     def bar(self, bytes_so_far, filesize, done):
         span = self.width * done * 0.01
@@ -165,7 +171,7 @@ def bar(self, bytes_so_far, filesize, done):
         result = ('%s of %s KiB (%d%%)' % (self.kibi(bytes_so_far), self.kibi(filesize), done,)).center(self.width)
         return result.replace(' ', '-', int(span - offset))
 
-class Unzip:
+class Unzip(object):
     def __init__(self, verbose = False, percent = 10):
         self.verbose = verbose
         self.percent = percent
@@ -178,12 +184,12 @@ def extract(self, file, dir):
         self._createstructure(file, dir)
         num_files = len(zf.namelist())
         percent = self.percent
-        divisions = 100 / percent
-        perc = int(num_files / divisions)
+        divisions = 100 // percent
+        perc = int( num_files / divisions)
         # extract files to directory structure
         for i, name in enumerate(zf.namelist()):
             if self.verbose == True:
-                print "Extracting %s" % name
+                print("Extracting %s" % name)
             elif perc > 0 and (i % perc) == 0 and i > 0:
                 complete = int (i / perc) * percent
             if not name.endswith('/'):
@@ -219,12 +225,12 @@ def _listdirs(self, file):
 
 def new(app, args, play_env):
     if os.path.exists(app.path):
-        print "~ Oops. %s already exists" % app.path
-        print "~"
+        print("~ Oops. %s already exists" % app.path)
+        print("~")
         sys.exit(-1)
 
-    print "~ The new module will be created in %s" % os.path.normpath(app.path)
-    print "~"
+    print("~ The new module will be created in %s" % os.path.normpath(app.path))
+    print("~")
     application_name = os.path.basename(app.path)
     copy_directory(os.path.join(play_env["basedir"], 'resources/module-skel'), app.path)
     # check_application()
@@ -247,25 +253,25 @@ def new(app, args, play_env):
     os.mkdir(os.path.join(app.path, 'src/play/modules'))
     os.mkdir(os.path.join(app.path, 'src/play/modules/%s' % application_name))
 
-    print "~ OK, the module is created."
-    print "~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation."
-    print "~"
-    print "~ Have fun!"
-    print "~"
+    print("~ OK, the module is created.")
+    print("~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation.")
+    print("~")
+    print("~ Have fun!")
+    print("~")
 
 
 def list(app, args):
-    print "~ You can also browse this list online at:"
+    print("~ You can also browse this list online at:")
     for repo in repositories:
-        print "~    %s/modules" % repo
-    print "~"
+        print("~    %s/modules" % repo)
+    print("~")
 
     modules_list = load_module_list()
 
     for mod in modules_list:
-        print "~ [%s]" % mod['name']
-        print "~   %s" % mod['fullname']
-        print "~   %s/modules/%s" % (mod['server'], mod['name'])
+        print("~ [%s]" % mod['name'])
+        print("~   %s" % mod['fullname'])
+        print("~   %s/modules/%s" % (mod['server'], mod['name']))
 
         vl = ''
         i = 0
@@ -276,17 +282,17 @@ def list(app, args):
                 vl += ', '
 
         if vl:
-            print "~   Versions: %s" % vl
+            print("~   Versions: %s" % vl)
         else:
-            print "~   (No versions released yet)"
-        print "~"
+            print("~   (No versions released yet)")
+        print("~")
 
-    print "~ To install one of these modules use:"
-    print "~ play install module-version (eg: play install scala-1.0)"
-    print "~"
-    print "~ Or you can just install the default release of a module using:"
-    print "~ play install module (eg: play install scala)"
-    print "~"
+    print("~ To install one of these modules use:")
+    print("~ play install module-version (eg: play install scala-1.0)")
+    print("~")
+    print("~ Or you can just install the default release of a module using:")
+    print("~ play install module (eg: play install scala)")
+    print("~")
 
 
 def build(app, args, env):
@@ -305,9 +311,9 @@ def build(app, args, env):
                 version = a
             if o in ('--require'):
                 fwkMatch = a
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ ")
         sys.exit(-1)
 
     deps_file = os.path.join(app.path, 'conf', 'dependencies.yml')
@@ -324,7 +330,7 @@ def build(app, args, env):
                        version = splitted.pop()
                        name = splitted.pop()
             for dep in deps["require"]:
-                if isinstance(dep, basestring):
+                if isinstance(dep, str):
                     splitted = dep.split(" ")
                     if len(splitted) == 2 and splitted[0] == "play":
                         fwkMatch = splitted[1]
@@ -334,9 +340,9 @@ def build(app, args, env):
     if name is None:
         name = os.path.basename(app.path)
     if version is None:
-        version = raw_input("~ What is the module version number? ")
+        version = input("~ What is the module version number? ")
     if fwkMatch is None:
-        fwkMatch = raw_input("~ What are the playframework versions required? ")
+        fwkMatch = input("~ What are the playframework versions required? ")
 
     if os.path.exists(deps_file):
         f = open(deps_file)
@@ -358,11 +364,11 @@ def build(app, args, env):
 
     build_file = os.path.join(app.path, 'build.xml')
     if os.path.exists(build_file):
-        print "~"
-        print "~ Building..."
-        print "~"
+        print("~")
+        print("~ Building...")
+        print("~")
         status = subprocess.call('ant -f %s -Dplay.path=%s' % (build_file, ftb), shell=True)
-        print "~"
+        print("~")
         if status:
             sys.exit(status)
 
@@ -404,16 +410,16 @@ def build(app, args, env):
         except:
             pass
 
-    print "~"
-    print "~ Done!"
-    print "~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv)
-    print "~"
+    print("~")
+    print("~ Done!")
+    print("~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv))
+    print("~")
 
 
 def install(app, args, env):
     if len(sys.argv) < 3:
         help_file = os.path.join(env["basedir"], 'documentation/commands/cmd-install.txt')
-        print open(help_file, 'r').read()
+        print(open(help_file, 'r').read())
         sys.exit(0)
 
     name = cmd = sys.argv[2]
@@ -422,7 +428,7 @@ def install(app, args, env):
     version = groups.group(3)
 
     server = None
-    if args is not None:
+    if args != None:
         for param in args:
             if param.startswith("--force-server="):
                 server = param[15:]
@@ -433,48 +439,48 @@ def install(app, args, env):
         if mod['name'] == module:
             for v in mod['versions']:
                 if version is None and v['isDefault']:
-                    print '~ Will install %s-%s' % (module, v['version'])
-                    print '~ This module is compatible with: %s' % v['matches']
-                    ok = raw_input('~ Do you want to install this version (y/n)? ')
+                    print('~ Will install %s-%s' % (module, v['version']))
+                    print('~ This module is compatible with: %s' % v['matches'])
+                    ok = input('~ Do you want to install this version (y/n)? ')
                     if not ok == 'y':
-                        print '~'
+                        print('~')
                         sys.exit(-1)
-                    print '~ Installing module %s-%s...' % (module, v['version'])
+                    print('~ Installing module %s-%s...' % (module, v['version']))
                     fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version'])
                     break
                 if version  == v['version']:
-                    print '~ Will install %s-%s' % (module, v['version'])
-                    print '~ This module is compatible with: %s' % v['matches']
-                    ok = raw_input('~ Do you want to install this version (y/n)? ')
+                    print('~ Will install %s-%s' % (module, v['version']))
+                    print('~ This module is compatible with: %s' % v['matches'])
+                    ok = input('~ Do you want to install this version (y/n)? ')
                     if not ok == 'y':
-                        print '~'
+                        print('~')
                         sys.exit(-1)
 
-                    print '~ Installing module %s-%s...' % (module, v['version'])
+                    print('~ Installing module %s-%s...' % (module, v['version']))
                     fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version'])
                     break
 
     if fetch is None:
-        print '~ No module found \'%s\'' % name
-        print '~ Try play list-modules to get the modules list'
-        print '~'
+        print('~ No module found \'%s\'' % name)
+        print('~ Try play list-modules to get the modules list')
+        print('~')
         sys.exit(-1)
 
     archive = os.path.join(env["basedir"], 'modules/%s-%s.zip' % (module, v['version']))
     if os.path.exists(archive):
         os.remove(archive)
 
-    print '~'
-    print '~ Fetching %s' % fetch
+    print('~')
+    print('~ Fetching %s' % fetch)
 
     Downloader().retrieve(fetch, archive)
 
     if not os.path.exists(archive):
-        print '~ Oops, file does not exist'
-        print '~'
+        print('~ Oops, file does not exist')
+        print('~')
         sys.exist(-1)
 
-    print '~ Unzipping...'
+    print('~ Unzipping...')
 
     if os.path.exists(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))):
         shutil.rmtree(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version'])))
@@ -482,13 +488,13 @@ def install(app, args, env):
 
     Unzip().extract(archive, os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version'])))
     os.remove(archive)
-    print '~'
-    print '~ Module %s-%s is installed!' % (module, v['version'])
-    print '~ You can now use it by adding it to the dependencies.yml file:'
-    print '~'
-    print '~ require:'
-    print '~     play -> %s %s' % (module, v['version'])
-    print '~'
+    print('~')
+    print('~ Module %s-%s is installed!' % (module, v['version']))
+    print('~ You can now use it by adding it to the dependencies.yml file:')
+    print('~')
+    print('~ require:')
+    print('~     play -> %s %s' % (module, v['version']))
+    print('~')
     sys.exit(0)
 
 
@@ -501,20 +507,20 @@ def add(app, args, env):
         for o, a in optlist:
             if o in ('--module'):
                 m = a
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ ")
         sys.exit(-1)
 
     if m is None:
-        print "~ Usage: play add --module=<modulename>"
-        print "~ "
+        print("~ Usage: play add --module=<modulename>")
+        print("~ ")
         sys.exit(-1)
 
     appConf = os.path.join(app.path, 'conf/application.conf')
     if not fileHas(appConf, '# ---- MODULES ----'):
-        print "~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command."
-        print "~ "
+        print("~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command.")
+        print("~ ")
         sys.exit(-1)
 
     mn = m
@@ -522,13 +528,13 @@ def add(app, args, env):
         mn = mn[:mn.find('-')]
 
     if mn in app.module_names():
-        print "~ Module %s already declared in application.conf, not doing anything." % mn
-        print "~ "
+        print("~ Module %s already declared in application.conf, not doing anything." % mn)
+        print("~ ")
         sys.exit(-1)
 
     replaceAll(appConf, r'# ---- MODULES ----', '# ---- MODULES ----\nmodule.%s=${play.path}/modules/%s' % (mn, m) )
-    print "~ Module %s add to application %s." % (mn, app.name())
-    print "~ "
+    print("~ Module %s add to application %s." % (mn, app.name()))
+    print("~ ")
 
 
 def load_module_list(custom_server=None):
@@ -543,7 +549,7 @@ def any(arr, func):
         return False
 
     modules = None
-    if custom_server is not None:
+    if custom_server != None:
         rev = [custom_server]
     else:
         rev = repositories[:] # clone
@@ -552,7 +558,7 @@ def any(arr, func):
     for repo in rev:
         result = load_modules_from(repo)
         if modules is None:
-            modules = map(lambda m: addServer(m, repo), result['modules'])
+            modules = [addServer(m, repo) for m in result['modules']]
         else:
             for module in result['modules']:
                 if not any(modules, lambda m: m['name'] == module['name']):
@@ -566,17 +572,17 @@ def load_modules_from(modules_server):
         headers={'User-Agent':DEFAULT_USER_AGENT,
                 'Accept': 'application/json'
         } 
-        req = urllib2.Request(url, headers=headers)
-        result = urllib2.urlopen(req)
+        req = urllib.request.Request(url, headers=headers)
+        result = urllib.request.urlopen(req)
         return json.loads(result.read())
-    except urllib2.HTTPError, e:
-        print "~ Oops,"
-        print "~ Cannot fetch the modules list from %s (%s)..." % (url, e.code)
-        print e.reason
-        print "~"
+    except urllib.error.HTTPError as e:
+        print("~ Oops,")
+        print("~ Cannot fetch the modules list from %s (%s)..." % (url, e.code))
+        print(e.reason)
+        print("~")
         sys.exit(-1)
-    except urllib2.URLError, e:
-        print "~ Oops,"
-        print "~ Cannot fetch the modules list from %s ..." % (url)
-        print "~"
+    except urllib.error.URLError as e:
+        print("~ Oops,")
+        print("~ Cannot fetch the modules list from %s ..." % (url))
+        print("~")
         sys.exit(-1)
diff --git a/framework/pym/play/commands/netbeans.py b/framework/pym/play/commands/netbeans.py
index f0ce397ed7..cca700f087 100644
--- a/framework/pym/play/commands/netbeans.py
+++ b/framework/pym/play/commands/netbeans.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import os, os.path
 import shutil
 import time
@@ -43,8 +44,8 @@ def execute(**kargs):
             if not re.search("\.[svn|git|hg|scc|vssscc]", dir):
                 mr = '<source-folder style="tree"><label>%s</label><location>%s</location></source-folder>' % (dir, dir)
     replaceAll(os.path.join(nbproject, 'project.xml'), r'%MORE%', mr)
-    print "~ OK, the application is ready for netbeans"
-    print "~ Just open %s as a netbeans project" % os.path.normpath(app.path)
-    print "~"
-    print "~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again."
-    print "~"
+    print("~ OK, the application is ready for netbeans")
+    print("~ Just open %s as a netbeans project" % os.path.normpath(app.path))
+    print("~")
+    print("~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again.")
+    print("~")
diff --git a/framework/pym/play/commands/precompile.py b/framework/pym/play/commands/precompile.py
index 9b3c1e1784..9064d79af3 100644
--- a/framework/pym/play/commands/precompile.py
+++ b/framework/pym/play/commands/precompile.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import os, os.path
 import shutil
 import subprocess
@@ -26,6 +27,6 @@ def execute(**kargs):
     try:
         return subprocess.call(java_cmd, env=os.environ)
     except OSError:
-        print "~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
-        print "~ "
+        print("~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
+        print("~ ")
         sys.exit(-1)
diff --git a/framework/pym/play/commands/secret.py b/framework/pym/play/commands/secret.py
index a5237d24bd..a67fe733fe 100644
--- a/framework/pym/play/commands/secret.py
+++ b/framework/pym/play/commands/secret.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from play.utils import *
 
 COMMANDS = ['secret']
@@ -10,8 +11,8 @@ def execute(**kargs):
     app = kargs.get("app")
 
     app.check()
-    print "~ Generating the secret key..."
+    print("~ Generating the secret key...")
     sk = secretKey()
     replaceAll(os.path.join(app.path, 'conf', 'application.conf'), r'application.secret=.*', 'application.secret=%s' % sk, True)
-    print "~ Keep the secret : %s" % sk
-    print "~"
+    print("~ Keep the secret : %s" % sk)
+    print("~")
diff --git a/framework/pym/play/commands/status.py b/framework/pym/play/commands/status.py
index a053d8ba6a..538b190d6c 100644
--- a/framework/pym/play/commands/status.py
+++ b/framework/pym/play/commands/status.py
@@ -1,7 +1,9 @@
+from __future__ import print_function
+from builtins import str
 import os, os.path
 import shutil
 import getopt
-import urllib2
+import urllib.request, urllib.error, urllib.parse
 
 from play.utils import *
 
@@ -29,9 +31,9 @@ def execute(**kargs):
                     url = a + '/@status'
             if o in ('--secret'):
                 secret_key = a
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ ")
         sys.exit(-1)
 
     if not url or not secret_key:
@@ -43,21 +45,21 @@ def execute(**kargs):
             secret_key = app.readConf('application.statusKey')
 
     try:
-        proxy_handler = urllib2.ProxyHandler({})
-        req = urllib2.Request(url)
+        proxy_handler = urllib.request.ProxyHandler({})
+        req = urllib.request.Request(url)
         req.add_header('Authorization', secret_key)
-        opener = urllib2.build_opener(proxy_handler)
+        opener = urllib.request.build_opener(proxy_handler)
         status = opener.open(req)
-        print '~ Status from %s,' % url
-        print '~'
-        print status.read()
-        print '~'
-    except urllib2.HTTPError, e:
-        print "~ Cannot retrieve the application status... (%s)" % (e.code)
-        print "~"
+        print('~ Status from %s,' % url)
+        print('~')
+        print(status.read())
+        print('~')
+    except urllib.error.HTTPError as e:
+        print("~ Cannot retrieve the application status... (%s)" % (e.code))
+        print("~")
         sys.exit(-1)
-    except urllib2.URLError, e:
-        print "~ Cannot contact the application..."
-        print "~"
+    except urllib.error.URLError as e:
+        print("~ Cannot contact the application...")
+        print("~")
         sys.exit(-1)
-    print
+    print()
diff --git a/framework/pym/play/commands/test.py b/framework/pym/play/commands/test.py
index f956c11072..2288e2819d 100644
--- a/framework/pym/play/commands/test.py
+++ b/framework/pym/play/commands/test.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import sys
 import subprocess
 
@@ -21,17 +22,17 @@ def execute(**kargs):
 def test(app, args):
     app.check()
     java_cmd = app.java_cmd(args)
-    print "~ Running in test mode"
-    print "~ Ctrl+C to stop"
-    print "~ "
+    print("~ Running in test mode")
+    print("~ Ctrl+C to stop")
+    print("~ ")
 
     try:
         return_code = subprocess.call(java_cmd, env=os.environ)
         if 0 != return_code:
             sys.exit(return_code)
     except OSError:
-        print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+        print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
         sys.exit(-1)
-    print "~ "
+    print("~ ")
 
 
diff --git a/framework/pym/play/commands/version.py b/framework/pym/play/commands/version.py
index 309d6e187c..83512cb01f 100644
--- a/framework/pym/play/commands/version.py
+++ b/framework/pym/play/commands/version.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 
 COMMANDS = ['version']
 
@@ -11,4 +12,4 @@ def execute(**kargs):
 
     # If we've shown the logo, then the version has already been printed
     if not showLogo:
-        print env["version"]
+        print(env["version"])
diff --git a/framework/pym/play/commands/war.py b/framework/pym/play/commands/war.py
index cf69f33087..35eac30f7c 100644
--- a/framework/pym/play/commands/war.py
+++ b/framework/pym/play/commands/war.py
@@ -1,3 +1,5 @@
+from __future__ import print_function
+from builtins import str
 import sys
 import os
 import getopt
@@ -32,50 +34,50 @@ def execute(**kargs):
                 war_zip_path = war_path + '.war'
             if o in ('--exclude'):
                 war_exclusion_list = a.split(':')
-                print "~ Excluding these directories :"
+                print("~ Excluding these directories :")
                 for excluded in war_exclusion_list:
-                    print "~  %s" %excluded
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ Please specify a path where to generate the WAR, using the -o or --output option."
-        print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)."
-        print "~ "
+                    print("~  %s" %excluded)
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ Please specify a path where to generate the WAR, using the -o or --output option.")
+        print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).")
+        print("~ ")
         sys.exit(-1)
 
     if not war_path:
-        print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option"
-        print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)."
-        print "~"
+        print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option")
+        print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).")
+        print("~")
         sys.exit(-1)
 
     if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')):
-        print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure"
-        print "~"
+        print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure")
+        print("~")
         sys.exit(-1)
 
     if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list):
-        print "~ Oops. Please specify a destination directory outside of the application"
-        print "~ or exclude war destination directory using the --exclude option and ':'-separator "
-        print "~ (eg: --exclude .svn:target:logs:tmp)."
-        print "~"
+        print("~ Oops. Please specify a destination directory outside of the application")
+        print("~ or exclude war destination directory using the --exclude option and ':'-separator ")
+        print("~ (eg: --exclude .svn:target:logs:tmp).")
+        print("~")
         sys.exit(-1)
 
     # Precompile first
     precompilation_result = play.commands.precompile.execute(command=command, app=app, args=args, env=env)
 
     if precompilation_result != 0:
-        print "~ Please fix compilation errors before packaging WAR"
-        print "~"
+        print("~ Please fix compilation errors before packaging WAR")
+        print("~")
         sys.exit(precompilation_result)
 
     # Package
     package_as_war(app, env, war_path, war_zip_path, war_exclusion_list)
 
-    print "~ Done !"
-    print "~"
-    print "~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path))
-    print "~ You can't use play standard commands to run/stop/debug the WAR application..."
-    print "~ ... just use your servlet container commands instead"
-    print "~"
-    print "~ Have fun!"
-    print "~"
+    print("~ Done !")
+    print("~")
+    print("~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path)))
+    print("~ You can't use play standard commands to run/stop/debug the WAR application...")
+    print("~ ... just use your servlet container commands instead")
+    print("~")
+    print("~ Have fun!")
+    print("~")
diff --git a/framework/pym/play/utils.py b/framework/pym/play/utils.py
index 88866ba1f7..d695c9b225 100644
--- a/framework/pym/play/utils.py
+++ b/framework/pym/play/utils.py
@@ -1,3 +1,6 @@
+from __future__ import print_function
+from builtins import str
+from builtins import range
 import sys
 import os, os.path
 import re
@@ -75,8 +78,8 @@ def getWithModules(args, env):
                     dirname = os.path.join(env["basedir"], 'modules/%s' % f)
                     break
         if not dirname:
-            print "~ Oops. Module " + m + " not found (try running `play install " + m + "`)"
-            print "~"
+            print("~ Oops. Module " + m + " not found (try running `play install " + m + "`)")
+            print("~")
             sys.exit(-1)
         
         md.append(dirname)
@@ -91,23 +94,23 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None):
     classpath = app.getClasspath()
 
     if not war_path:
-        print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option"
-        print "~"
+        print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option")
+        print("~")
         sys.exit(-1)
 
     if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')):
-        print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure"
-        print "~"
+        print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure")
+        print("~")
         sys.exit(-1)
 
     if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list):
-        print "~ Oops. Please specify a destination directory outside of the application"
-        print "~ or exclude war destination directory using the --exclude option and ':'-separator "
-        print "~ (eg: --exclude .svn:target:logs:tmp)."
-        print "~"
+        print("~ Oops. Please specify a destination directory outside of the application")
+        print("~ or exclude war destination directory using the --exclude option and ':'-separator ")
+        print("~ (eg: --exclude .svn:target:logs:tmp).")
+        print("~")
         sys.exit(-1)
 
-    print "~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path))
+    print("~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path)))
     if os.path.exists(war_path): shutil.rmtree(war_path)
     if os.path.exists(os.path.join(app.path, 'war')):
         copy_directory(os.path.join(app.path, 'war'), war_path)
@@ -118,7 +121,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None):
         shutil.copyfile(os.path.join(env["basedir"], 'resources/war/web.xml'), os.path.join(war_path, 'WEB-INF/web.xml'))
     application_name = app.readConf('application.name')
     replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%APPLICATION_NAME%', application_name)
-    if env["id"] is not "":
+    if env["id"] != "":
         replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', env["id"])
     else:
         replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', 'war')
@@ -167,7 +170,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None):
     shutil.copyfile(os.path.join(env["basedir"], 'resources/messages'), os.path.join(war_path, 'WEB-INF/resources/messages'))
 
     if war_zip_path:
-        print "~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path))
+        print("~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path)))
         if os.path.exists(war_zip_path):
             os.remove(war_zip_path)
         zip = zipfile.ZipFile(war_zip_path, 'w', zipfile.ZIP_STORED)
@@ -242,7 +245,7 @@ def isTestFrameworkId( framework_id ):
     return (framework_id == 'test' or (framework_id.startswith('test-') and framework_id.__len__() >= 6 ))
 
 def java_path():
-    if not os.environ.has_key('JAVA_HOME'):
+    if 'JAVA_HOME' not in os.environ:
         return "java"
     else:
         return os.path.normpath("%s/bin/java" % os.environ['JAVA_HOME'])
@@ -257,5 +260,5 @@ def getJavaVersion():
     if result:
         return result.group(1)
     else:
-        print "Unable to retrieve java version from " + javaVersion
+        print("Unable to retrieve java version from " + javaVersion)
         return ""
diff --git a/framework/pym/simplejson/__init__.py b/framework/pym/simplejson/__init__.py
index d5b4d39913..2e3b838ee8 100644
--- a/framework/pym/simplejson/__init__.py
+++ b/framework/pym/simplejson/__init__.py
@@ -5,24 +5,23 @@
 :mod:`simplejson` exposes an API familiar to users of the standard library
 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
 version of the :mod:`json` library contained in Python 2.6, but maintains
-compatibility with Python 2.4 and Python 2.5 and (currently) has
-significant performance advantages, even without using the optional C
-extension for speedups.
+compatibility back to Python 2.5 and (currently) has significant performance
+advantages, even without using the optional C extension for speedups.
 
 Encoding basic Python object hierarchies::
 
     >>> import simplejson as json
     >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
-    >>> print json.dumps("\"foo\bar")
+    >>> print(json.dumps("\"foo\bar"))
     "\"foo\bar"
-    >>> print json.dumps(u'\u1234')
+    >>> print(json.dumps(u'\u1234'))
     "\u1234"
-    >>> print json.dumps('\\')
+    >>> print(json.dumps('\\'))
     "\\"
-    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+    >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True))
     {"a": 0, "b": 0, "c": 0}
-    >>> from StringIO import StringIO
+    >>> from simplejson.compat import StringIO
     >>> io = StringIO()
     >>> json.dump(['streaming API'], io)
     >>> io.getvalue()
@@ -31,14 +30,14 @@
 Compact encoding::
 
     >>> import simplejson as json
-    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+    >>> obj = [1,2,3,{'4': 5, '6': 7}]
+    >>> json.dumps(obj, separators=(',',':'), sort_keys=True)
     '[1,2,3,{"4":5,"6":7}]'
 
 Pretty printing::
 
     >>> import simplejson as json
-    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
-    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
+    >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent='    '))
     {
         "4": 5,
         "6": 7
@@ -52,7 +51,7 @@
     True
     >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
     True
-    >>> from StringIO import StringIO
+    >>> from simplejson.compat import StringIO
     >>> io = StringIO('["streaming API"]')
     >>> json.load(io)[0] == 'streaming API'
     True
@@ -68,8 +67,8 @@
     >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     ...     object_hook=as_complex)
     (1+2j)
-    >>> import decimal
-    >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1')
+    >>> from decimal import Decimal
+    >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1')
     True
 
 Specializing JSON object encoding::
@@ -78,7 +77,8 @@
     >>> def encode_complex(obj):
     ...     if isinstance(obj, complex):
     ...         return [obj.real, obj.imag]
-    ...     raise TypeError(repr(o) + " is not JSON serializable")
+    ...     raise TypeError('Object of type %s is not JSON serializable' %
+    ...                     obj.__class__.__name__)
     ...
     >>> json.dumps(2 + 1j, default=encode_complex)
     '[2.0, 1.0]'
@@ -87,7 +87,6 @@
     >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
     '[2.0, 1.0]'
 
-
 Using simplejson.tool from the shell to validate and pretty-print::
 
     $ echo '{"json":"obj"}' | python -m simplejson.tool
@@ -95,18 +94,60 @@
         "json": "obj"
     }
     $ echo '{ 1.2:3.4}' | python -m simplejson.tool
-    Expecting property name: line 1 column 2 (char 2)
+    Expecting property name: line 1 column 3 (char 2)
+
+Parsing multiple documents serialized as JSON lines (newline-delimited JSON)::
+
+    >>> import simplejson as json
+    >>> def loads_lines(docs):
+    ...     for doc in docs.splitlines():
+    ...         yield json.loads(doc)
+    ...
+    >>> sum(doc["count"] for doc in loads_lines('{"count":1}\n{"count":2}\n{"count":3}\n'))
+    6
+
+Serializing multiple objects to JSON lines (newline-delimited JSON)::
+
+    >>> import simplejson as json
+    >>> def dumps_lines(objs):
+    ...     for obj in objs:
+    ...         yield json.dumps(obj, separators=(',',':')) + '\n'
+    ...
+    >>> ''.join(dumps_lines([{'count': 1}, {'count': 2}, {'count': 3}]))
+    '{"count":1}\n{"count":2}\n{"count":3}\n'
+
 """
-__version__ = '2.0.9'
+from __future__ import absolute_import
+__version__ = '3.17.2'
 __all__ = [
     'dump', 'dumps', 'load', 'loads',
-    'JSONDecoder', 'JSONEncoder',
+    'JSONDecoder', 'JSONDecodeError', 'JSONEncoder',
+    'OrderedDict', 'simple_first', 'RawJSON'
 ]
 
 __author__ = 'Bob Ippolito <bob@redivi.com>'
 
-from decoder import JSONDecoder
-from encoder import JSONEncoder
+from decimal import Decimal
+
+from .errors import JSONDecodeError
+from .raw_json import RawJSON
+from .decoder import JSONDecoder
+from .encoder import JSONEncoder, JSONEncoderForHTML
+def _import_OrderedDict():
+    import collections
+    try:
+        return collections.OrderedDict
+    except AttributeError:
+        from . import ordered_dict
+        return ordered_dict.OrderedDict
+OrderedDict = _import_OrderedDict()
+
+def _import_c_make_encoder():
+    try:
+        from ._speedups import make_encoder
+        return make_encoder
+    except ImportError:
+        return None
 
 _default_encoder = JSONEncoder(
     skipkeys=False,
@@ -117,56 +158,115 @@
     separators=None,
     encoding='utf-8',
     default=None,
+    use_decimal=True,
+    namedtuple_as_object=True,
+    tuple_as_array=True,
+    iterable_as_array=False,
+    bigint_as_string=False,
+    item_sort_key=None,
+    for_json=False,
+    ignore_nan=False,
+    int_as_string_bitcount=None,
 )
 
 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
-        allow_nan=True, cls=None, indent=None, separators=None,
-        encoding='utf-8', default=None, **kw):
+         allow_nan=True, cls=None, indent=None, separators=None,
+         encoding='utf-8', default=None, use_decimal=True,
+         namedtuple_as_object=True, tuple_as_array=True,
+         bigint_as_string=False, sort_keys=False, item_sort_key=None,
+         for_json=False, ignore_nan=False, int_as_string_bitcount=None,
+         iterable_as_array=False, **kw):
     """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
     ``.write()``-supporting file-like object).
 
-    If ``skipkeys`` is true then ``dict`` keys that are not basic types
-    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    If *skipkeys* is true then ``dict`` keys that are not basic types
+    (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``)
     will be skipped instead of raising a ``TypeError``.
 
-    If ``ensure_ascii`` is false, then the some chunks written to ``fp``
-    may be ``unicode`` instances, subject to normal Python ``str`` to
-    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
-    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
-    to cause an error.
-
-    If ``check_circular`` is false, then the circular reference check
-    for container types will be skipped and a circular reference will
-    result in an ``OverflowError`` (or worse).
+    If *ensure_ascii* is false (default: ``True``), then the output may
+    contain non-ASCII characters, so long as they do not need to be escaped
+    by JSON. When it is true, all non-ASCII characters are escaped.
 
-    If ``allow_nan`` is false, then it will be a ``ValueError`` to
+    If *allow_nan* is false, then it will be a ``ValueError`` to
     serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
-    in strict compliance of the JSON specification, instead of using the
-    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+    in strict compliance of the original JSON specification, instead of using
+    the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See
+    *ignore_nan* for ECMA-262 compliant behavior.
 
-    If ``indent`` is a non-negative integer, then JSON array elements and object
-    members will be pretty-printed with that indent level. An indent level
-    of 0 will only insert newlines. ``None`` is the most compact representation.
+    If *indent* is a string, then JSON array elements and object members
+    will be pretty-printed with a newline followed by that string repeated
+    for each level of nesting. ``None`` (the default) selects the most compact
+    representation without any newlines.
 
-    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
-    then it will be used instead of the default ``(', ', ': ')`` separators.
-    ``(',', ':')`` is the most compact JSON representation.
+    If specified, *separators* should be an
+    ``(item_separator, key_separator)`` tuple.  The default is ``(', ', ': ')``
+    if *indent* is ``None`` and ``(',', ': ')`` otherwise.  To get the most
+    compact JSON representation, you should specify ``(',', ':')`` to eliminate
+    whitespace.
 
-    ``encoding`` is the character encoding for str instances, default is UTF-8.
+    *encoding* is the character encoding for str instances, default is UTF-8.
 
-    ``default(obj)`` is a function that should return a serializable version
-    of obj or raise TypeError. The default simply raises TypeError.
+    *default(obj)* is a function that should return a serializable version
+    of obj or raise ``TypeError``. The default simply raises ``TypeError``.
+
+    If *use_decimal* is true (default: ``True``) then decimal.Decimal
+    will be natively serialized to JSON with full precision.
+
+    If *namedtuple_as_object* is true (default: ``True``),
+    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
+    as JSON objects.
+
+    If *tuple_as_array* is true (default: ``True``),
+    :class:`tuple` (and subclasses) will be encoded as JSON arrays.
+
+    If *iterable_as_array* is true (default: ``False``),
+    any object not in the above table that implements ``__iter__()``
+    will be encoded as a JSON array.
+
+    If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher
+    or lower than -2**53 will be encoded as strings. This is to avoid the
+    rounding that happens in Javascript otherwise. Note that this is still a
+    lossy operation that will not round-trip correctly and should be used
+    sparingly.
+
+    If *int_as_string_bitcount* is a positive number (n), then int of size
+    greater than or equal to 2**n or lower than or equal to -2**n will be
+    encoded as strings.
+
+    If specified, *item_sort_key* is a callable used to sort the items in
+    each dictionary. This is useful if you want to sort items other than
+    in alphabetical order by key. This option takes precedence over
+    *sort_keys*.
+
+    If *sort_keys* is true (default: ``False``), the output of dictionaries
+    will be sorted by item.
+
+    If *for_json* is true (default: ``False``), objects with a ``for_json()``
+    method will use the return value of that method for encoding as JSON
+    instead of the object.
+
+    If *ignore_nan* is true (default: ``False``), then out of range
+    :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+    ``null`` in compliance with the ECMA-262 specification. If true, this will
+    override *allow_nan*.
 
     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
     ``.default()`` method to serialize additional types), specify it with
-    the ``cls`` kwarg.
+    the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead
+    of subclassing whenever possible.
 
     """
     # cached encoder
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        encoding == 'utf-8' and default is None and not kw):
+        encoding == 'utf-8' and default is None and use_decimal
+        and namedtuple_as_object and tuple_as_array and not iterable_as_array
+        and not bigint_as_string and not sort_keys
+        and not item_sort_key and not for_json
+        and not ignore_nan and int_as_string_bitcount is None
+        and not kw
+    ):
         iterable = _default_encoder.iterencode(obj)
     else:
         if cls is None:
@@ -174,7 +274,17 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
         iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
             separators=separators, encoding=encoding,
-            default=default, **kw).iterencode(obj)
+            default=default, use_decimal=use_decimal,
+            namedtuple_as_object=namedtuple_as_object,
+            tuple_as_array=tuple_as_array,
+            iterable_as_array=iterable_as_array,
+            bigint_as_string=bigint_as_string,
+            sort_keys=sort_keys,
+            item_sort_key=item_sort_key,
+            for_json=for_json,
+            ignore_nan=ignore_nan,
+            int_as_string_bitcount=int_as_string_bitcount,
+            **kw).iterencode(obj)
     # could accelerate with writelines in some versions of Python, at
     # a debuggability cost
     for chunk in iterable:
@@ -182,17 +292,21 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
 
 
 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
-        allow_nan=True, cls=None, indent=None, separators=None,
-        encoding='utf-8', default=None, **kw):
+          allow_nan=True, cls=None, indent=None, separators=None,
+          encoding='utf-8', default=None, use_decimal=True,
+          namedtuple_as_object=True, tuple_as_array=True,
+          bigint_as_string=False, sort_keys=False, item_sort_key=None,
+          for_json=False, ignore_nan=False, int_as_string_bitcount=None,
+          iterable_as_array=False, **kw):
     """Serialize ``obj`` to a JSON formatted ``str``.
 
     If ``skipkeys`` is false then ``dict`` keys that are not basic types
-    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+    (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``)
     will be skipped instead of raising a ``TypeError``.
 
-    If ``ensure_ascii`` is false, then the return value will be a
-    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
-    coercion rules instead of being escaped to an ASCII ``str``.
+    If *ensure_ascii* is false (default: ``True``), then the output may
+    contain non-ASCII characters, so long as they do not need to be escaped
+    by JSON. When it is true, all non-ASCII characters are escaped.
 
     If ``check_circular`` is false, then the circular reference check
     for container types will be skipped and a circular reference will
@@ -203,30 +317,81 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
     strict compliance of the JSON specification, instead of using the
     JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
 
-    If ``indent`` is a non-negative integer, then JSON array elements and
-    object members will be pretty-printed with that indent level. An indent
-    level of 0 will only insert newlines. ``None`` is the most compact
-    representation.
+    If ``indent`` is a string, then JSON array elements and object members
+    will be pretty-printed with a newline followed by that string repeated
+    for each level of nesting. ``None`` (the default) selects the most compact
+    representation without any newlines. For backwards compatibility with
+    versions of simplejson earlier than 2.1.0, an integer is also accepted
+    and is converted to a string with that many spaces.
 
-    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
-    then it will be used instead of the default ``(', ', ': ')`` separators.
-    ``(',', ':')`` is the most compact JSON representation.
+    If specified, ``separators`` should be an
+    ``(item_separator, key_separator)`` tuple.  The default is ``(', ', ': ')``
+    if *indent* is ``None`` and ``(',', ': ')`` otherwise.  To get the most
+    compact JSON representation, you should specify ``(',', ':')`` to eliminate
+    whitespace.
 
-    ``encoding`` is the character encoding for str instances, default is UTF-8.
+    ``encoding`` is the character encoding for bytes instances, default is
+    UTF-8.
 
     ``default(obj)`` is a function that should return a serializable version
     of obj or raise TypeError. The default simply raises TypeError.
 
+    If *use_decimal* is true (default: ``True``) then decimal.Decimal
+    will be natively serialized to JSON with full precision.
+
+    If *namedtuple_as_object* is true (default: ``True``),
+    :class:`tuple` subclasses with ``_asdict()`` methods will be encoded
+    as JSON objects.
+
+    If *tuple_as_array* is true (default: ``True``),
+    :class:`tuple` (and subclasses) will be encoded as JSON arrays.
+
+    If *iterable_as_array* is true (default: ``False``),
+    any object not in the above table that implements ``__iter__()``
+    will be encoded as a JSON array.
+
+    If *bigint_as_string* is true (not the default), ints 2**53 and higher
+    or lower than -2**53 will be encoded as strings. This is to avoid the
+    rounding that happens in Javascript otherwise.
+
+    If *int_as_string_bitcount* is a positive number (n), then int of size
+    greater than or equal to 2**n or lower than or equal to -2**n will be
+    encoded as strings.
+
+    If specified, *item_sort_key* is a callable used to sort the items in
+    each dictionary. This is useful if you want to sort items other than
+    in alphabetical order by key. This option takes precendence over
+    *sort_keys*.
+
+    If *sort_keys* is true (default: ``False``), the output of dictionaries
+    will be sorted by item.
+
+    If *for_json* is true (default: ``False``), objects with a ``for_json()``
+    method will use the return value of that method for encoding as JSON
+    instead of the object.
+
+    If *ignore_nan* is true (default: ``False``), then out of range
+    :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as
+    ``null`` in compliance with the ECMA-262 specification. If true, this will
+    override *allow_nan*.
+
     To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
     ``.default()`` method to serialize additional types), specify it with
-    the ``cls`` kwarg.
+    the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing
+    whenever possible.
 
     """
     # cached encoder
     if (not skipkeys and ensure_ascii and
         check_circular and allow_nan and
         cls is None and indent is None and separators is None and
-        encoding == 'utf-8' and default is None and not kw):
+        encoding == 'utf-8' and default is None and use_decimal
+        and namedtuple_as_object and tuple_as_array and not iterable_as_array
+        and not bigint_as_string and not sort_keys
+        and not item_sort_key and not for_json
+        and not ignore_nan and int_as_string_bitcount is None
+        and not kw
+    ):
         return _default_encoder.encode(obj)
     if cls is None:
         cls = JSONEncoder
@@ -234,85 +399,186 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
         skipkeys=skipkeys, ensure_ascii=ensure_ascii,
         check_circular=check_circular, allow_nan=allow_nan, indent=indent,
         separators=separators, encoding=encoding, default=default,
+        use_decimal=use_decimal,
+        namedtuple_as_object=namedtuple_as_object,
+        tuple_as_array=tuple_as_array,
+        iterable_as_array=iterable_as_array,
+        bigint_as_string=bigint_as_string,
+        sort_keys=sort_keys,
+        item_sort_key=item_sort_key,
+        for_json=for_json,
+        ignore_nan=ignore_nan,
+        int_as_string_bitcount=int_as_string_bitcount,
         **kw).encode(obj)
 
 
-_default_decoder = JSONDecoder(encoding=None, object_hook=None)
+_default_decoder = JSONDecoder(encoding=None, object_hook=None,
+                               object_pairs_hook=None)
 
 
 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
+        parse_int=None, parse_constant=None, object_pairs_hook=None,
+        use_decimal=False, namedtuple_as_object=True, tuple_as_array=True,
+        **kw):
     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
-    a JSON document) to a Python object.
-
-    If the contents of ``fp`` is encoded with an ASCII based encoding other
-    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
-    be specified. Encodings that are not ASCII based (such as UCS-2) are
-    not allowed, and should be wrapped with
-    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
-    object and passed to ``loads()``
-
-    ``object_hook`` is an optional function that will be called with the
-    result of any object literal decode (a ``dict``). The return value of
-    ``object_hook`` will be used instead of the ``dict``. This feature
-    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+    a JSON document as `str` or `bytes`) to a Python object.
+
+    *encoding* determines the encoding used to interpret any
+    `bytes` objects decoded by this instance (``'utf-8'`` by
+    default). It has no effect when decoding `str` objects.
+
+    *object_hook*, if specified, will be called with the result of every
+    JSON object decoded and its return value will be used in place of the
+    given :class:`dict`.  This can be used to provide custom
+    deserializations (e.g. to support JSON-RPC class hinting).
+
+    *object_pairs_hook* is an optional function that will be called with
+    the result of any object literal decode with an ordered list of pairs.
+    The return value of *object_pairs_hook* will be used instead of the
+    :class:`dict`.  This feature can be used to implement custom decoders
+    that rely on the order that the key and value pairs are decoded (for
+    example, :func:`collections.OrderedDict` will remember the order of
+    insertion). If *object_hook* is also defined, the *object_pairs_hook*
+    takes priority.
+
+    *parse_float*, if specified, will be called with the string of every
+    JSON float to be decoded.  By default, this is equivalent to
+    ``float(num_str)``. This can be used to use another datatype or parser
+    for JSON floats (e.g. :class:`decimal.Decimal`).
+
+    *parse_int*, if specified, will be called with the string of every
+    JSON int to be decoded.  By default, this is equivalent to
+    ``int(num_str)``.  This can be used to use another datatype or parser
+    for JSON integers (e.g. :class:`float`).
+
+    *parse_constant*, if specified, will be called with one of the
+    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+    can be used to raise an exception if invalid JSON numbers are
+    encountered.
+
+    If *use_decimal* is true (default: ``False``) then it implies
+    parse_float=decimal.Decimal for parity with ``dump``.
 
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
-    kwarg.
+    kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+    of subclassing whenever possible.
 
     """
     return loads(fp.read(),
         encoding=encoding, cls=cls, object_hook=object_hook,
         parse_float=parse_float, parse_int=parse_int,
-        parse_constant=parse_constant, **kw)
+        parse_constant=parse_constant, object_pairs_hook=object_pairs_hook,
+        use_decimal=use_decimal, **kw)
 
 
 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
+        parse_int=None, parse_constant=None, object_pairs_hook=None,
+        use_decimal=False, **kw):
     """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
     document) to a Python object.
 
-    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
-    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
-    must be specified. Encodings that are not ASCII based (such as UCS-2)
-    are not allowed and should be decoded to ``unicode`` first.
-
-    ``object_hook`` is an optional function that will be called with the
-    result of any object literal decode (a ``dict``). The return value of
-    ``object_hook`` will be used instead of the ``dict``. This feature
-    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
-
-    ``parse_float``, if specified, will be called with the string
-    of every JSON float to be decoded. By default this is equivalent to
-    float(num_str). This can be used to use another datatype or parser
-    for JSON floats (e.g. decimal.Decimal).
-
-    ``parse_int``, if specified, will be called with the string
-    of every JSON int to be decoded. By default this is equivalent to
-    int(num_str). This can be used to use another datatype or parser
-    for JSON integers (e.g. float).
-
-    ``parse_constant``, if specified, will be called with one of the
-    following strings: -Infinity, Infinity, NaN, null, true, false.
-    This can be used to raise an exception if invalid JSON numbers
-    are encountered.
+    *encoding* determines the encoding used to interpret any
+    :class:`bytes` objects decoded by this instance (``'utf-8'`` by
+    default). It has no effect when decoding :class:`unicode` objects.
+
+    *object_hook*, if specified, will be called with the result of every
+    JSON object decoded and its return value will be used in place of the
+    given :class:`dict`.  This can be used to provide custom
+    deserializations (e.g. to support JSON-RPC class hinting).
+
+    *object_pairs_hook* is an optional function that will be called with
+    the result of any object literal decode with an ordered list of pairs.
+    The return value of *object_pairs_hook* will be used instead of the
+    :class:`dict`.  This feature can be used to implement custom decoders
+    that rely on the order that the key and value pairs are decoded (for
+    example, :func:`collections.OrderedDict` will remember the order of
+    insertion). If *object_hook* is also defined, the *object_pairs_hook*
+    takes priority.
+
+    *parse_float*, if specified, will be called with the string of every
+    JSON float to be decoded.  By default, this is equivalent to
+    ``float(num_str)``. This can be used to use another datatype or parser
+    for JSON floats (e.g. :class:`decimal.Decimal`).
+
+    *parse_int*, if specified, will be called with the string of every
+    JSON int to be decoded.  By default, this is equivalent to
+    ``int(num_str)``.  This can be used to use another datatype or parser
+    for JSON integers (e.g. :class:`float`).
+
+    *parse_constant*, if specified, will be called with one of the
+    following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+    can be used to raise an exception if invalid JSON numbers are
+    encountered.
+
+    If *use_decimal* is true (default: ``False``) then it implies
+    parse_float=decimal.Decimal for parity with ``dump``.
 
     To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
-    kwarg.
+    kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead
+    of subclassing whenever possible.
 
     """
     if (cls is None and encoding is None and object_hook is None and
             parse_int is None and parse_float is None and
-            parse_constant is None and not kw):
+            parse_constant is None and object_pairs_hook is None
+            and not use_decimal and not kw):
         return _default_decoder.decode(s)
     if cls is None:
         cls = JSONDecoder
     if object_hook is not None:
         kw['object_hook'] = object_hook
+    if object_pairs_hook is not None:
+        kw['object_pairs_hook'] = object_pairs_hook
     if parse_float is not None:
         kw['parse_float'] = parse_float
     if parse_int is not None:
         kw['parse_int'] = parse_int
     if parse_constant is not None:
         kw['parse_constant'] = parse_constant
+    if use_decimal:
+        if parse_float is not None:
+            raise TypeError("use_decimal=True implies parse_float=Decimal")
+        kw['parse_float'] = Decimal
     return cls(encoding=encoding, **kw).decode(s)
+
+
+def _toggle_speedups(enabled):
+    from . import decoder as dec
+    from . import encoder as enc
+    from . import scanner as scan
+    c_make_encoder = _import_c_make_encoder()
+    if enabled:
+        dec.scanstring = dec.c_scanstring or dec.py_scanstring
+        enc.c_make_encoder = c_make_encoder
+        enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or
+            enc.py_encode_basestring_ascii)
+        scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner
+    else:
+        dec.scanstring = dec.py_scanstring
+        enc.c_make_encoder = None
+        enc.encode_basestring_ascii = enc.py_encode_basestring_ascii
+        scan.make_scanner = scan.py_make_scanner
+    dec.make_scanner = scan.make_scanner
+    global _default_decoder
+    _default_decoder = JSONDecoder(
+        encoding=None,
+        object_hook=None,
+        object_pairs_hook=None,
+    )
+    global _default_encoder
+    _default_encoder = JSONEncoder(
+       skipkeys=False,
+       ensure_ascii=True,
+       check_circular=True,
+       allow_nan=True,
+       indent=None,
+       separators=None,
+       encoding='utf-8',
+       default=None,
+   )
+
+def simple_first(kv):
+    """Helper function to pass to item_sort_key to sort simple
+    elements to the top, then container elements.
+    """
+    return (isinstance(kv[1], (list, dict, tuple)), kv[0])
diff --git a/framework/pym/simplejson/_speedups.c b/framework/pym/simplejson/_speedups.c
deleted file mode 100644
index 23b5f4a6e6..0000000000
--- a/framework/pym/simplejson/_speedups.c
+++ /dev/null
@@ -1,2329 +0,0 @@
-#include "Python.h"
-#include "structmember.h"
-#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
-#define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
-#endif
-#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
-typedef int Py_ssize_t;
-#define PY_SSIZE_T_MAX INT_MAX
-#define PY_SSIZE_T_MIN INT_MIN
-#define PyInt_FromSsize_t PyInt_FromLong
-#define PyInt_AsSsize_t PyInt_AsLong
-#endif
-#ifndef Py_IS_FINITE
-#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
-#endif
-
-#ifdef __GNUC__
-#define UNUSED __attribute__((__unused__))
-#else
-#define UNUSED
-#endif
-
-#define DEFAULT_ENCODING "utf-8"
-
-#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
-#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
-#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
-#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
-
-static PyTypeObject PyScannerType;
-static PyTypeObject PyEncoderType;
-
-typedef struct _PyScannerObject {
-    PyObject_HEAD
-    PyObject *encoding;
-    PyObject *strict;
-    PyObject *object_hook;
-    PyObject *parse_float;
-    PyObject *parse_int;
-    PyObject *parse_constant;
-} PyScannerObject;
-
-static PyMemberDef scanner_members[] = {
-    {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
-    {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
-    {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
-    {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
-    {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
-    {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
-    {NULL}
-};
-
-typedef struct _PyEncoderObject {
-    PyObject_HEAD
-    PyObject *markers;
-    PyObject *defaultfn;
-    PyObject *encoder;
-    PyObject *indent;
-    PyObject *key_separator;
-    PyObject *item_separator;
-    PyObject *sort_keys;
-    PyObject *skipkeys;
-    int fast_encode;
-    int allow_nan;
-} PyEncoderObject;
-
-static PyMemberDef encoder_members[] = {
-    {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
-    {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
-    {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
-    {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
-    {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
-    {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
-    {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
-    {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
-    {NULL}
-};
-
-static Py_ssize_t
-ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
-static PyObject *
-ascii_escape_unicode(PyObject *pystr);
-static PyObject *
-ascii_escape_str(PyObject *pystr);
-static PyObject *
-py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
-void init_speedups(void);
-static PyObject *
-scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
-static PyObject *
-scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
-static PyObject *
-_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
-static PyObject *
-scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
-static int
-scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
-static void
-scanner_dealloc(PyObject *self);
-static int
-scanner_clear(PyObject *self);
-static PyObject *
-encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
-static int
-encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
-static void
-encoder_dealloc(PyObject *self);
-static int
-encoder_clear(PyObject *self);
-static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
-static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
-static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
-static PyObject *
-_encoded_const(PyObject *const);
-static void
-raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
-static PyObject *
-encoder_encode_string(PyEncoderObject *s, PyObject *obj);
-static int
-_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
-static PyObject *
-_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
-static PyObject *
-encoder_encode_float(PyEncoderObject *s, PyObject *obj);
-
-#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
-#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
-
-#define MIN_EXPANSION 6
-#ifdef Py_UNICODE_WIDE
-#define MAX_EXPANSION (2 * MIN_EXPANSION)
-#else
-#define MAX_EXPANSION MIN_EXPANSION
-#endif
-
-static int
-_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
-{
-    /* PyObject to Py_ssize_t converter */
-    *size_ptr = PyInt_AsSsize_t(o);
-    if (*size_ptr == -1 && PyErr_Occurred());
-        return 1;
-    return 0;
-}
-
-static PyObject *
-_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
-{
-    /* Py_ssize_t to PyObject converter */
-    return PyInt_FromSsize_t(*size_ptr);
-}
-
-static Py_ssize_t
-ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
-{
-    /* Escape unicode code point c to ASCII escape sequences
-    in char *output. output must have at least 12 bytes unused to
-    accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
-    output[chars++] = '\\';
-    switch (c) {
-        case '\\': output[chars++] = (char)c; break;
-        case '"': output[chars++] = (char)c; break;
-        case '\b': output[chars++] = 'b'; break;
-        case '\f': output[chars++] = 'f'; break;
-        case '\n': output[chars++] = 'n'; break;
-        case '\r': output[chars++] = 'r'; break;
-        case '\t': output[chars++] = 't'; break;
-        default:
-#ifdef Py_UNICODE_WIDE
-            if (c >= 0x10000) {
-                /* UTF-16 surrogate pair */
-                Py_UNICODE v = c - 0x10000;
-                c = 0xd800 | ((v >> 10) & 0x3ff);
-                output[chars++] = 'u';
-                output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
-                output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
-                output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
-                output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
-                c = 0xdc00 | (v & 0x3ff);
-                output[chars++] = '\\';
-            }
-#endif
-            output[chars++] = 'u';
-            output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
-            output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
-            output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
-            output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
-    }
-    return chars;
-}
-
-static PyObject *
-ascii_escape_unicode(PyObject *pystr)
-{
-    /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
-    Py_ssize_t i;
-    Py_ssize_t input_chars;
-    Py_ssize_t output_size;
-    Py_ssize_t max_output_size;
-    Py_ssize_t chars;
-    PyObject *rval;
-    char *output;
-    Py_UNICODE *input_unicode;
-
-    input_chars = PyUnicode_GET_SIZE(pystr);
-    input_unicode = PyUnicode_AS_UNICODE(pystr);
-
-    /* One char input can be up to 6 chars output, estimate 4 of these */
-    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
-    max_output_size = 2 + (input_chars * MAX_EXPANSION);
-    rval = PyString_FromStringAndSize(NULL, output_size);
-    if (rval == NULL) {
-        return NULL;
-    }
-    output = PyString_AS_STRING(rval);
-    chars = 0;
-    output[chars++] = '"';
-    for (i = 0; i < input_chars; i++) {
-        Py_UNICODE c = input_unicode[i];
-        if (S_CHAR(c)) {
-            output[chars++] = (char)c;
-        }
-        else {
-            chars = ascii_escape_char(c, output, chars);
-        }
-        if (output_size - chars < (1 + MAX_EXPANSION)) {
-            /* There's more than four, so let's resize by a lot */
-            Py_ssize_t new_output_size = output_size * 2;
-            /* This is an upper bound */
-            if (new_output_size > max_output_size) {
-                new_output_size = max_output_size;
-            }
-            /* Make sure that the output size changed before resizing */
-            if (new_output_size != output_size) {
-                output_size = new_output_size;
-                if (_PyString_Resize(&rval, output_size) == -1) {
-                    return NULL;
-                }
-                output = PyString_AS_STRING(rval);
-            }
-        }
-    }
-    output[chars++] = '"';
-    if (_PyString_Resize(&rval, chars) == -1) {
-        return NULL;
-    }
-    return rval;
-}
-
-static PyObject *
-ascii_escape_str(PyObject *pystr)
-{
-    /* Take a PyString pystr and return a new ASCII-only escaped PyString */
-    Py_ssize_t i;
-    Py_ssize_t input_chars;
-    Py_ssize_t output_size;
-    Py_ssize_t chars;
-    PyObject *rval;
-    char *output;
-    char *input_str;
-
-    input_chars = PyString_GET_SIZE(pystr);
-    input_str = PyString_AS_STRING(pystr);
-
-    /* Fast path for a string that's already ASCII */
-    for (i = 0; i < input_chars; i++) {
-        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
-        if (!S_CHAR(c)) {
-            /* If we have to escape something, scan the string for unicode */
-            Py_ssize_t j;
-            for (j = i; j < input_chars; j++) {
-                c = (Py_UNICODE)(unsigned char)input_str[j];
-                if (c > 0x7f) {
-                    /* We hit a non-ASCII character, bail to unicode mode */
-                    PyObject *uni;
-                    uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
-                    if (uni == NULL) {
-                        return NULL;
-                    }
-                    rval = ascii_escape_unicode(uni);
-                    Py_DECREF(uni);
-                    return rval;
-                }
-            }
-            break;
-        }
-    }
-
-    if (i == input_chars) {
-        /* Input is already ASCII */
-        output_size = 2 + input_chars;
-    }
-    else {
-        /* One char input can be up to 6 chars output, estimate 4 of these */
-        output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
-    }
-    rval = PyString_FromStringAndSize(NULL, output_size);
-    if (rval == NULL) {
-        return NULL;
-    }
-    output = PyString_AS_STRING(rval);
-    output[0] = '"';
-
-    /* We know that everything up to i is ASCII already */
-    chars = i + 1;
-    memcpy(&output[1], input_str, i);
-
-    for (; i < input_chars; i++) {
-        Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
-        if (S_CHAR(c)) {
-            output[chars++] = (char)c;
-        }
-        else {
-            chars = ascii_escape_char(c, output, chars);
-        }
-        /* An ASCII char can't possibly expand to a surrogate! */
-        if (output_size - chars < (1 + MIN_EXPANSION)) {
-            /* There's more than four, so let's resize by a lot */
-            output_size *= 2;
-            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
-                output_size = 2 + (input_chars * MIN_EXPANSION);
-            }
-            if (_PyString_Resize(&rval, output_size) == -1) {
-                return NULL;
-            }
-            output = PyString_AS_STRING(rval);
-        }
-    }
-    output[chars++] = '"';
-    if (_PyString_Resize(&rval, chars) == -1) {
-        return NULL;
-    }
-    return rval;
-}
-
-static void
-raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
-{
-    /* Use the Python function simplejson.decoder.errmsg to raise a nice
-    looking ValueError exception */
-    static PyObject *errmsg_fn = NULL;
-    PyObject *pymsg;
-    if (errmsg_fn == NULL) {
-        PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
-        if (decoder == NULL)
-            return;
-        errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
-        Py_DECREF(decoder);
-        if (errmsg_fn == NULL)
-            return;
-    }
-    pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
-    if (pymsg) {
-        PyErr_SetObject(PyExc_ValueError, pymsg);
-        Py_DECREF(pymsg);
-    }
-}
-
-static PyObject *
-join_list_unicode(PyObject *lst)
-{
-    /* return u''.join(lst) */
-    static PyObject *joinfn = NULL;
-    if (joinfn == NULL) {
-        PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
-        if (ustr == NULL)
-            return NULL;
-
-        joinfn = PyObject_GetAttrString(ustr, "join");
-        Py_DECREF(ustr);
-        if (joinfn == NULL)
-            return NULL;
-    }
-    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
-}
-
-static PyObject *
-join_list_string(PyObject *lst)
-{
-    /* return ''.join(lst) */
-    static PyObject *joinfn = NULL;
-    if (joinfn == NULL) {
-        PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
-        if (ustr == NULL)
-            return NULL;
-
-        joinfn = PyObject_GetAttrString(ustr, "join");
-        Py_DECREF(ustr);
-        if (joinfn == NULL)
-            return NULL;
-    }
-    return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
-}
-
-static PyObject *
-_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
-    /* return (rval, idx) tuple, stealing reference to rval */
-    PyObject *tpl;
-    PyObject *pyidx;
-    /*
-    steal a reference to rval, returns (rval, idx)
-    */
-    if (rval == NULL) {
-        return NULL;
-    }
-    pyidx = PyInt_FromSsize_t(idx);
-    if (pyidx == NULL) {
-        Py_DECREF(rval);
-        return NULL;
-    }
-    tpl = PyTuple_New(2);
-    if (tpl == NULL) {
-        Py_DECREF(pyidx);
-        Py_DECREF(rval);
-        return NULL;
-    }
-    PyTuple_SET_ITEM(tpl, 0, rval);
-    PyTuple_SET_ITEM(tpl, 1, pyidx);
-    return tpl;
-}
-
-static PyObject *
-scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
-{
-    /* Read the JSON string from PyString pystr.
-    end is the index of the first character after the quote.
-    encoding is the encoding of pystr (must be an ASCII superset)
-    if strict is zero then literal control characters are allowed
-    *next_end_ptr is a return-by-reference index of the character
-        after the end quote
-
-    Return value is a new PyString (if ASCII-only) or PyUnicode
-    */
-    PyObject *rval;
-    Py_ssize_t len = PyString_GET_SIZE(pystr);
-    Py_ssize_t begin = end - 1;
-    Py_ssize_t next = begin;
-    int has_unicode = 0;
-    char *buf = PyString_AS_STRING(pystr);
-    PyObject *chunks = PyList_New(0);
-    if (chunks == NULL) {
-        goto bail;
-    }
-    if (end < 0 || len <= end) {
-        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
-        goto bail;
-    }
-    while (1) {
-        /* Find the end of the string or the next escape */
-        Py_UNICODE c = 0;
-        PyObject *chunk = NULL;
-        for (next = end; next < len; next++) {
-            c = (unsigned char)buf[next];
-            if (c == '"' || c == '\\') {
-                break;
-            }
-            else if (strict && c <= 0x1f) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
-            }
-            else if (c > 0x7f) {
-                has_unicode = 1;
-            }
-        }
-        if (!(c == '"' || c == '\\')) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
-            goto bail;
-        }
-        /* Pick up this chunk if it's not zero length */
-        if (next != end) {
-            PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
-            if (strchunk == NULL) {
-                goto bail;
-            }
-            if (has_unicode) {
-                chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
-                Py_DECREF(strchunk);
-                if (chunk == NULL) {
-                    goto bail;
-                }
-            }
-            else {
-                chunk = strchunk;
-            }
-            if (PyList_Append(chunks, chunk)) {
-                Py_DECREF(chunk);
-                goto bail;
-            }
-            Py_DECREF(chunk);
-        }
-        next++;
-        if (c == '"') {
-            end = next;
-            break;
-        }
-        if (next == len) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
-            goto bail;
-        }
-        c = buf[next];
-        if (c != 'u') {
-            /* Non-unicode backslash escapes */
-            end = next + 1;
-            switch (c) {
-                case '"': break;
-                case '\\': break;
-                case '/': break;
-                case 'b': c = '\b'; break;
-                case 'f': c = '\f'; break;
-                case 'n': c = '\n'; break;
-                case 'r': c = '\r'; break;
-                case 't': c = '\t'; break;
-                default: c = 0;
-            }
-            if (c == 0) {
-                raise_errmsg("Invalid \\escape", pystr, end - 2);
-                goto bail;
-            }
-        }
-        else {
-            c = 0;
-            next++;
-            end = next + 4;
-            if (end >= len) {
-                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
-                goto bail;
-            }
-            /* Decode 4 hex digits */
-            for (; next < end; next++) {
-                Py_UNICODE digit = buf[next];
-                c <<= 4;
-                switch (digit) {
-                    case '0': case '1': case '2': case '3': case '4':
-                    case '5': case '6': case '7': case '8': case '9':
-                        c |= (digit - '0'); break;
-                    case 'a': case 'b': case 'c': case 'd': case 'e':
-                    case 'f':
-                        c |= (digit - 'a' + 10); break;
-                    case 'A': case 'B': case 'C': case 'D': case 'E':
-                    case 'F':
-                        c |= (digit - 'A' + 10); break;
-                    default:
-                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
-                        goto bail;
-                }
-            }
-#ifdef Py_UNICODE_WIDE
-            /* Surrogate pair */
-            if ((c & 0xfc00) == 0xd800) {
-                Py_UNICODE c2 = 0;
-                if (end + 6 >= len) {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                if (buf[next++] != '\\' || buf[next++] != 'u') {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                end += 6;
-                /* Decode 4 hex digits */
-                for (; next < end; next++) {
-                    c2 <<= 4;
-                    Py_UNICODE digit = buf[next];
-                    switch (digit) {
-                        case '0': case '1': case '2': case '3': case '4':
-                        case '5': case '6': case '7': case '8': case '9':
-                            c2 |= (digit - '0'); break;
-                        case 'a': case 'b': case 'c': case 'd': case 'e':
-                        case 'f':
-                            c2 |= (digit - 'a' + 10); break;
-                        case 'A': case 'B': case 'C': case 'D': case 'E':
-                        case 'F':
-                            c2 |= (digit - 'A' + 10); break;
-                        default:
-                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
-                            goto bail;
-                    }
-                }
-                if ((c2 & 0xfc00) != 0xdc00) {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
-            }
-            else if ((c & 0xfc00) == 0xdc00) {
-                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
-                goto bail;
-            }
-#endif
-        }
-        if (c > 0x7f) {
-            has_unicode = 1;
-        }
-        if (has_unicode) {
-            chunk = PyUnicode_FromUnicode(&c, 1);
-            if (chunk == NULL) {
-                goto bail;
-            }
-        }
-        else {
-            char c_char = Py_CHARMASK(c);
-            chunk = PyString_FromStringAndSize(&c_char, 1);
-            if (chunk == NULL) {
-                goto bail;
-            }
-        }
-        if (PyList_Append(chunks, chunk)) {
-            Py_DECREF(chunk);
-            goto bail;
-        }
-        Py_DECREF(chunk);
-    }
-
-    rval = join_list_string(chunks);
-    if (rval == NULL) {
-        goto bail;
-    }
-    Py_CLEAR(chunks);
-    *next_end_ptr = end;
-    return rval;
-bail:
-    *next_end_ptr = -1;
-    Py_XDECREF(chunks);
-    return NULL;
-}
-
-
-static PyObject *
-scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
-{
-    /* Read the JSON string from PyUnicode pystr.
-    end is the index of the first character after the quote.
-    if strict is zero then literal control characters are allowed
-    *next_end_ptr is a return-by-reference index of the character
-        after the end quote
-
-    Return value is a new PyUnicode
-    */
-    PyObject *rval;
-    Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
-    Py_ssize_t begin = end - 1;
-    Py_ssize_t next = begin;
-    const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
-    PyObject *chunks = PyList_New(0);
-    if (chunks == NULL) {
-        goto bail;
-    }
-    if (end < 0 || len <= end) {
-        PyErr_SetString(PyExc_ValueError, "end is out of bounds");
-        goto bail;
-    }
-    while (1) {
-        /* Find the end of the string or the next escape */
-        Py_UNICODE c = 0;
-        PyObject *chunk = NULL;
-        for (next = end; next < len; next++) {
-            c = buf[next];
-            if (c == '"' || c == '\\') {
-                break;
-            }
-            else if (strict && c <= 0x1f) {
-                raise_errmsg("Invalid control character at", pystr, next);
-                goto bail;
-            }
-        }
-        if (!(c == '"' || c == '\\')) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
-            goto bail;
-        }
-        /* Pick up this chunk if it's not zero length */
-        if (next != end) {
-            chunk = PyUnicode_FromUnicode(&buf[end], next - end);
-            if (chunk == NULL) {
-                goto bail;
-            }
-            if (PyList_Append(chunks, chunk)) {
-                Py_DECREF(chunk);
-                goto bail;
-            }
-            Py_DECREF(chunk);
-        }
-        next++;
-        if (c == '"') {
-            end = next;
-            break;
-        }
-        if (next == len) {
-            raise_errmsg("Unterminated string starting at", pystr, begin);
-            goto bail;
-        }
-        c = buf[next];
-        if (c != 'u') {
-            /* Non-unicode backslash escapes */
-            end = next + 1;
-            switch (c) {
-                case '"': break;
-                case '\\': break;
-                case '/': break;
-                case 'b': c = '\b'; break;
-                case 'f': c = '\f'; break;
-                case 'n': c = '\n'; break;
-                case 'r': c = '\r'; break;
-                case 't': c = '\t'; break;
-                default: c = 0;
-            }
-            if (c == 0) {
-                raise_errmsg("Invalid \\escape", pystr, end - 2);
-                goto bail;
-            }
-        }
-        else {
-            c = 0;
-            next++;
-            end = next + 4;
-            if (end >= len) {
-                raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
-                goto bail;
-            }
-            /* Decode 4 hex digits */
-            for (; next < end; next++) {
-                Py_UNICODE digit = buf[next];
-                c <<= 4;
-                switch (digit) {
-                    case '0': case '1': case '2': case '3': case '4':
-                    case '5': case '6': case '7': case '8': case '9':
-                        c |= (digit - '0'); break;
-                    case 'a': case 'b': case 'c': case 'd': case 'e':
-                    case 'f':
-                        c |= (digit - 'a' + 10); break;
-                    case 'A': case 'B': case 'C': case 'D': case 'E':
-                    case 'F':
-                        c |= (digit - 'A' + 10); break;
-                    default:
-                        raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
-                        goto bail;
-                }
-            }
-#ifdef Py_UNICODE_WIDE
-            /* Surrogate pair */
-            if ((c & 0xfc00) == 0xd800) {
-                Py_UNICODE c2 = 0;
-                if (end + 6 >= len) {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                if (buf[next++] != '\\' || buf[next++] != 'u') {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                end += 6;
-                /* Decode 4 hex digits */
-                for (; next < end; next++) {
-                    c2 <<= 4;
-                    Py_UNICODE digit = buf[next];
-                    switch (digit) {
-                        case '0': case '1': case '2': case '3': case '4':
-                        case '5': case '6': case '7': case '8': case '9':
-                            c2 |= (digit - '0'); break;
-                        case 'a': case 'b': case 'c': case 'd': case 'e':
-                        case 'f':
-                            c2 |= (digit - 'a' + 10); break;
-                        case 'A': case 'B': case 'C': case 'D': case 'E':
-                        case 'F':
-                            c2 |= (digit - 'A' + 10); break;
-                        default:
-                            raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
-                            goto bail;
-                    }
-                }
-                if ((c2 & 0xfc00) != 0xdc00) {
-                    raise_errmsg("Unpaired high surrogate", pystr, end - 5);
-                    goto bail;
-                }
-                c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
-            }
-            else if ((c & 0xfc00) == 0xdc00) {
-                raise_errmsg("Unpaired low surrogate", pystr, end - 5);
-                goto bail;
-            }
-#endif
-        }
-        chunk = PyUnicode_FromUnicode(&c, 1);
-        if (chunk == NULL) {
-            goto bail;
-        }
-        if (PyList_Append(chunks, chunk)) {
-            Py_DECREF(chunk);
-            goto bail;
-        }
-        Py_DECREF(chunk);
-    }
-
-    rval = join_list_unicode(chunks);
-    if (rval == NULL) {
-        goto bail;
-    }
-    Py_DECREF(chunks);
-    *next_end_ptr = end;
-    return rval;
-bail:
-    *next_end_ptr = -1;
-    Py_XDECREF(chunks);
-    return NULL;
-}
-
-PyDoc_STRVAR(pydoc_scanstring,
-    "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
-    "\n"
-    "Scan the string s for a JSON string. End is the index of the\n"
-    "character in s after the quote that started the JSON string.\n"
-    "Unescapes all valid JSON string escape sequences and raises ValueError\n"
-    "on attempt to decode an invalid string. If strict is False then literal\n"
-    "control characters are allowed in the string.\n"
-    "\n"
-    "Returns a tuple of the decoded string and the index of the character in s\n"
-    "after the end quote."
-);
-
-static PyObject *
-py_scanstring(PyObject* self UNUSED, PyObject *args)
-{
-    PyObject *pystr;
-    PyObject *rval;
-    Py_ssize_t end;
-    Py_ssize_t next_end = -1;
-    char *encoding = NULL;
-    int strict = 1;
-    if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
-        return NULL;
-    }
-    if (encoding == NULL) {
-        encoding = DEFAULT_ENCODING;
-    }
-    if (PyString_Check(pystr)) {
-        rval = scanstring_str(pystr, end, encoding, strict, &next_end);
-    }
-    else if (PyUnicode_Check(pystr)) {
-        rval = scanstring_unicode(pystr, end, strict, &next_end);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                     "first argument must be a string, not %.80s",
-                     Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
-    return _build_rval_index_tuple(rval, next_end);
-}
-
-PyDoc_STRVAR(pydoc_encode_basestring_ascii,
-    "encode_basestring_ascii(basestring) -> str\n"
-    "\n"
-    "Return an ASCII-only JSON representation of a Python string"
-);
-
-static PyObject *
-py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
-{
-    /* Return an ASCII-only JSON representation of a Python string */
-    /* METH_O */
-    if (PyString_Check(pystr)) {
-        return ascii_escape_str(pystr);
-    }
-    else if (PyUnicode_Check(pystr)) {
-        return ascii_escape_unicode(pystr);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                     "first argument must be a string, not %.80s",
-                     Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
-}
-
-static void
-scanner_dealloc(PyObject *self)
-{
-    /* Deallocate scanner object */
-    scanner_clear(self);
-    Py_TYPE(self)->tp_free(self);
-}
-
-static int
-scanner_traverse(PyObject *self, visitproc visit, void *arg)
-{
-    PyScannerObject *s;
-    assert(PyScanner_Check(self));
-    s = (PyScannerObject *)self;
-    Py_VISIT(s->encoding);
-    Py_VISIT(s->strict);
-    Py_VISIT(s->object_hook);
-    Py_VISIT(s->parse_float);
-    Py_VISIT(s->parse_int);
-    Py_VISIT(s->parse_constant);
-    return 0;
-}
-
-static int
-scanner_clear(PyObject *self)
-{
-    PyScannerObject *s;
-    assert(PyScanner_Check(self));
-    s = (PyScannerObject *)self;
-    Py_CLEAR(s->encoding);
-    Py_CLEAR(s->strict);
-    Py_CLEAR(s->object_hook);
-    Py_CLEAR(s->parse_float);
-    Py_CLEAR(s->parse_int);
-    Py_CLEAR(s->parse_constant);
-    return 0;
-}
-
-static PyObject *
-_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON object from PyString pystr.
-    idx is the index of the first character after the opening curly brace.
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the closing curly brace.
-
-    Returns a new PyObject (usually a dict, but object_hook can change that)
-    */
-    char *str = PyString_AS_STRING(pystr);
-    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
-    PyObject *rval = PyDict_New();
-    PyObject *key = NULL;
-    PyObject *val = NULL;
-    char *encoding = PyString_AS_STRING(s->encoding);
-    int strict = PyObject_IsTrue(s->strict);
-    Py_ssize_t next_idx;
-    if (rval == NULL)
-        return NULL;
-
-    /* skip whitespace after { */
-    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-    /* only loop if the object is non-empty */
-    if (idx <= end_idx && str[idx] != '}') {
-        while (idx <= end_idx) {
-            /* read key */
-            if (str[idx] != '"') {
-                raise_errmsg("Expecting property name", pystr, idx);
-                goto bail;
-            }
-            key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
-            if (key == NULL)
-                goto bail;
-            idx = next_idx;
-
-            /* skip whitespace between key and : delimiter, read :, skip whitespace */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-            if (idx > end_idx || str[idx] != ':') {
-                raise_errmsg("Expecting : delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* read any JSON data type */
-            val = scan_once_str(s, pystr, idx, &next_idx);
-            if (val == NULL)
-                goto bail;
-
-            if (PyDict_SetItem(rval, key, val) == -1)
-                goto bail;
-
-            Py_CLEAR(key);
-            Py_CLEAR(val);
-            idx = next_idx;
-
-            /* skip whitespace before } or , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* bail if the object is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (str[idx] == '}') {
-                break;
-            }
-            else if (str[idx] != ',') {
-                raise_errmsg("Expecting , delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-
-            /* skip whitespace after , delimiter */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-        }
-    }
-    /* verify that idx < end_idx, str[idx] should be '}' */
-    if (idx > end_idx || str[idx] != '}') {
-        raise_errmsg("Expecting object", pystr, end_idx);
-        goto bail;
-    }
-    /* if object_hook is not None: rval = object_hook(rval) */
-    if (s->object_hook != Py_None) {
-        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
-        if (val == NULL)
-            goto bail;
-        Py_DECREF(rval);
-        rval = val;
-        val = NULL;
-    }
-    *next_idx_ptr = idx + 1;
-    return rval;
-bail:
-    Py_XDECREF(key);
-    Py_XDECREF(val);
-    Py_DECREF(rval);
-    return NULL;
-}
-
-static PyObject *
-_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON object from PyUnicode pystr.
-    idx is the index of the first character after the opening curly brace.
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the closing curly brace.
-
-    Returns a new PyObject (usually a dict, but object_hook can change that)
-    */
-    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
-    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
-    PyObject *val = NULL;
-    PyObject *rval = PyDict_New();
-    PyObject *key = NULL;
-    int strict = PyObject_IsTrue(s->strict);
-    Py_ssize_t next_idx;
-    if (rval == NULL)
-        return NULL;
-
-    /* skip whitespace after { */
-    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-    /* only loop if the object is non-empty */
-    if (idx <= end_idx && str[idx] != '}') {
-        while (idx <= end_idx) {
-            /* read key */
-            if (str[idx] != '"') {
-                raise_errmsg("Expecting property name", pystr, idx);
-                goto bail;
-            }
-            key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
-            if (key == NULL)
-                goto bail;
-            idx = next_idx;
-
-            /* skip whitespace between key and : delimiter, read :, skip whitespace */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-            if (idx > end_idx || str[idx] != ':') {
-                raise_errmsg("Expecting : delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* read any JSON term */
-            val = scan_once_unicode(s, pystr, idx, &next_idx);
-            if (val == NULL)
-                goto bail;
-
-            if (PyDict_SetItem(rval, key, val) == -1)
-                goto bail;
-
-            Py_CLEAR(key);
-            Py_CLEAR(val);
-            idx = next_idx;
-
-            /* skip whitespace before } or , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* bail if the object is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (str[idx] == '}') {
-                break;
-            }
-            else if (str[idx] != ',') {
-                raise_errmsg("Expecting , delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-
-            /* skip whitespace after , delimiter */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-        }
-    }
-
-    /* verify that idx < end_idx, str[idx] should be '}' */
-    if (idx > end_idx || str[idx] != '}') {
-        raise_errmsg("Expecting object", pystr, end_idx);
-        goto bail;
-    }
-
-    /* if object_hook is not None: rval = object_hook(rval) */
-    if (s->object_hook != Py_None) {
-        val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
-        if (val == NULL)
-            goto bail;
-        Py_DECREF(rval);
-        rval = val;
-        val = NULL;
-    }
-    *next_idx_ptr = idx + 1;
-    return rval;
-bail:
-    Py_XDECREF(key);
-    Py_XDECREF(val);
-    Py_DECREF(rval);
-    return NULL;
-}
-
-static PyObject *
-_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON array from PyString pystr.
-    idx is the index of the first character after the opening brace.
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the closing brace.
-
-    Returns a new PyList
-    */
-    char *str = PyString_AS_STRING(pystr);
-    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
-    PyObject *val = NULL;
-    PyObject *rval = PyList_New(0);
-    Py_ssize_t next_idx;
-    if (rval == NULL)
-        return NULL;
-
-    /* skip whitespace after [ */
-    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-    /* only loop if the array is non-empty */
-    if (idx <= end_idx && str[idx] != ']') {
-        while (idx <= end_idx) {
-
-            /* read any JSON term and de-tuplefy the (rval, idx) */
-            val = scan_once_str(s, pystr, idx, &next_idx);
-            if (val == NULL)
-                goto bail;
-
-            if (PyList_Append(rval, val) == -1)
-                goto bail;
-
-            Py_CLEAR(val);
-            idx = next_idx;
-
-            /* skip whitespace between term and , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* bail if the array is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (str[idx] == ']') {
-                break;
-            }
-            else if (str[idx] != ',') {
-                raise_errmsg("Expecting , delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-
-            /* skip whitespace after , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-        }
-    }
-
-    /* verify that idx < end_idx, str[idx] should be ']' */
-    if (idx > end_idx || str[idx] != ']') {
-        raise_errmsg("Expecting object", pystr, end_idx);
-        goto bail;
-    }
-    *next_idx_ptr = idx + 1;
-    return rval;
-bail:
-    Py_XDECREF(val);
-    Py_DECREF(rval);
-    return NULL;
-}
-
-static PyObject *
-_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON array from PyString pystr.
-    idx is the index of the first character after the opening brace.
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the closing brace.
-
-    Returns a new PyList
-    */
-    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
-    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
-    PyObject *val = NULL;
-    PyObject *rval = PyList_New(0);
-    Py_ssize_t next_idx;
-    if (rval == NULL)
-        return NULL;
-
-    /* skip whitespace after [ */
-    while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-    /* only loop if the array is non-empty */
-    if (idx <= end_idx && str[idx] != ']') {
-        while (idx <= end_idx) {
-
-            /* read any JSON term  */
-            val = scan_once_unicode(s, pystr, idx, &next_idx);
-            if (val == NULL)
-                goto bail;
-
-            if (PyList_Append(rval, val) == -1)
-                goto bail;
-
-            Py_CLEAR(val);
-            idx = next_idx;
-
-            /* skip whitespace between term and , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-
-            /* bail if the array is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (str[idx] == ']') {
-                break;
-            }
-            else if (str[idx] != ',') {
-                raise_errmsg("Expecting , delimiter", pystr, idx);
-                goto bail;
-            }
-            idx++;
-
-            /* skip whitespace after , */
-            while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
-        }
-    }
-
-    /* verify that idx < end_idx, str[idx] should be ']' */
-    if (idx > end_idx || str[idx] != ']') {
-        raise_errmsg("Expecting object", pystr, end_idx);
-        goto bail;
-    }
-    *next_idx_ptr = idx + 1;
-    return rval;
-bail:
-    Py_XDECREF(val);
-    Py_DECREF(rval);
-    return NULL;
-}
-
-static PyObject *
-_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON constant from PyString pystr.
-    constant is the constant string that was found
-        ("NaN", "Infinity", "-Infinity").
-    idx is the index of the first character of the constant
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the constant.
-
-    Returns the result of parse_constant
-    */
-    PyObject *cstr;
-    PyObject *rval;
-    /* constant is "NaN", "Infinity", or "-Infinity" */
-    cstr = PyString_InternFromString(constant);
-    if (cstr == NULL)
-        return NULL;
-
-    /* rval = parse_constant(constant) */
-    rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
-    idx += PyString_GET_SIZE(cstr);
-    Py_DECREF(cstr);
-    *next_idx_ptr = idx;
-    return rval;
-}
-
-static PyObject *
-_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON number from PyString pystr.
-    idx is the index of the first character of the number
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the number.
-
-    Returns a new PyObject representation of that number:
-        PyInt, PyLong, or PyFloat.
-        May return other types if parse_int or parse_float are set
-    */
-    char *str = PyString_AS_STRING(pystr);
-    Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
-    Py_ssize_t idx = start;
-    int is_float = 0;
-    PyObject *rval;
-    PyObject *numstr;
-
-    /* read a sign if it's there, make sure it's not the end of the string */
-    if (str[idx] == '-') {
-        idx++;
-        if (idx > end_idx) {
-            PyErr_SetNone(PyExc_StopIteration);
-            return NULL;
-        }
-    }
-
-    /* read as many integer digits as we find as long as it doesn't start with 0 */
-    if (str[idx] >= '1' && str[idx] <= '9') {
-        idx++;
-        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-    }
-    /* if it starts with 0 we only expect one integer digit */
-    else if (str[idx] == '0') {
-        idx++;
-    }
-    /* no integer digits, error */
-    else {
-        PyErr_SetNone(PyExc_StopIteration);
-        return NULL;
-    }
-
-    /* if the next char is '.' followed by a digit then read all float digits */
-    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
-        is_float = 1;
-        idx += 2;
-        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-    }
-
-    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
-    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
-
-        /* save the index of the 'e' or 'E' just in case we need to backtrack */
-        Py_ssize_t e_start = idx;
-        idx++;
-
-        /* read an exponent sign if present */
-        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
-
-        /* read all digits */
-        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-
-        /* if we got a digit, then parse as float. if not, backtrack */
-        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
-            is_float = 1;
-        }
-        else {
-            idx = e_start;
-        }
-    }
-
-    /* copy the section we determined to be a number */
-    numstr = PyString_FromStringAndSize(&str[start], idx - start);
-    if (numstr == NULL)
-        return NULL;
-    if (is_float) {
-        /* parse as a float using a fast path if available, otherwise call user defined method */
-        if (s->parse_float != (PyObject *)&PyFloat_Type) {
-            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
-        }
-        else {
-            rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr)));
-        }
-    }
-    else {
-        /* parse as an int using a fast path if available, otherwise call user defined method */
-        if (s->parse_int != (PyObject *)&PyInt_Type) {
-            rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
-        }
-        else {
-            rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
-        }
-    }
-    Py_DECREF(numstr);
-    *next_idx_ptr = idx;
-    return rval;
-}
-
-static PyObject *
-_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
-    /* Read a JSON number from PyUnicode pystr.
-    idx is the index of the first character of the number
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the number.
-
-    Returns a new PyObject representation of that number:
-        PyInt, PyLong, or PyFloat.
-        May return other types if parse_int or parse_float are set
-    */
-    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
-    Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
-    Py_ssize_t idx = start;
-    int is_float = 0;
-    PyObject *rval;
-    PyObject *numstr;
-
-    /* read a sign if it's there, make sure it's not the end of the string */
-    if (str[idx] == '-') {
-        idx++;
-        if (idx > end_idx) {
-            PyErr_SetNone(PyExc_StopIteration);
-            return NULL;
-        }
-    }
-
-    /* read as many integer digits as we find as long as it doesn't start with 0 */
-    if (str[idx] >= '1' && str[idx] <= '9') {
-        idx++;
-        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-    }
-    /* if it starts with 0 we only expect one integer digit */
-    else if (str[idx] == '0') {
-        idx++;
-    }
-    /* no integer digits, error */
-    else {
-        PyErr_SetNone(PyExc_StopIteration);
-        return NULL;
-    }
-
-    /* if the next char is '.' followed by a digit then read all float digits */
-    if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
-        is_float = 1;
-        idx += 2;
-        while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-    }
-
-    /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
-    if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
-        Py_ssize_t e_start = idx;
-        idx++;
-
-        /* read an exponent sign if present */
-        if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
-
-        /* read all digits */
-        while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
-
-        /* if we got a digit, then parse as float. if not, backtrack */
-        if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
-            is_float = 1;
-        }
-        else {
-            idx = e_start;
-        }
-    }
-
-    /* copy the section we determined to be a number */
-    numstr = PyUnicode_FromUnicode(&str[start], idx - start);
-    if (numstr == NULL)
-        return NULL;
-    if (is_float) {
-        /* parse as a float using a fast path if available, otherwise call user defined method */
-        if (s->parse_float != (PyObject *)&PyFloat_Type) {
-            rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
-        }
-        else {
-            rval = PyFloat_FromString(numstr, NULL);
-        }
-    }
-    else {
-        /* no fast path for unicode -> int, just call */
-        rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
-    }
-    Py_DECREF(numstr);
-    *next_idx_ptr = idx;
-    return rval;
-}
-
-static PyObject *
-scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
-{
-    /* Read one JSON term (of any kind) from PyString pystr.
-    idx is the index of the first character of the term
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the number.
-
-    Returns a new PyObject representation of the term.
-    */
-    char *str = PyString_AS_STRING(pystr);
-    Py_ssize_t length = PyString_GET_SIZE(pystr);
-    if (idx >= length) {
-        PyErr_SetNone(PyExc_StopIteration);
-        return NULL;
-    }
-    switch (str[idx]) {
-        case '"':
-            /* string */
-            return scanstring_str(pystr, idx + 1,
-                PyString_AS_STRING(s->encoding),
-                PyObject_IsTrue(s->strict),
-                next_idx_ptr);
-        case '{':
-            /* object */
-            return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
-        case '[':
-            /* array */
-            return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
-        case 'n':
-            /* null */
-            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
-                Py_INCREF(Py_None);
-                *next_idx_ptr = idx + 4;
-                return Py_None;
-            }
-            break;
-        case 't':
-            /* true */
-            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
-                Py_INCREF(Py_True);
-                *next_idx_ptr = idx + 4;
-                return Py_True;
-            }
-            break;
-        case 'f':
-            /* false */
-            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
-                Py_INCREF(Py_False);
-                *next_idx_ptr = idx + 5;
-                return Py_False;
-            }
-            break;
-        case 'N':
-            /* NaN */
-            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
-                return _parse_constant(s, "NaN", idx, next_idx_ptr);
-            }
-            break;
-        case 'I':
-            /* Infinity */
-            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
-                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
-            }
-            break;
-        case '-':
-            /* -Infinity */
-            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
-                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
-            }
-            break;
-    }
-    /* Didn't find a string, object, array, or named constant. Look for a number. */
-    return _match_number_str(s, pystr, idx, next_idx_ptr);
-}
-
-static PyObject *
-scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
-{
-    /* Read one JSON term (of any kind) from PyUnicode pystr.
-    idx is the index of the first character of the term
-    *next_idx_ptr is a return-by-reference index to the first character after
-        the number.
-
-    Returns a new PyObject representation of the term.
-    */
-    Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
-    Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
-    if (idx >= length) {
-        PyErr_SetNone(PyExc_StopIteration);
-        return NULL;
-    }
-    switch (str[idx]) {
-        case '"':
-            /* string */
-            return scanstring_unicode(pystr, idx + 1,
-                PyObject_IsTrue(s->strict),
-                next_idx_ptr);
-        case '{':
-            /* object */
-            return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
-        case '[':
-            /* array */
-            return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
-        case 'n':
-            /* null */
-            if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
-                Py_INCREF(Py_None);
-                *next_idx_ptr = idx + 4;
-                return Py_None;
-            }
-            break;
-        case 't':
-            /* true */
-            if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
-                Py_INCREF(Py_True);
-                *next_idx_ptr = idx + 4;
-                return Py_True;
-            }
-            break;
-        case 'f':
-            /* false */
-            if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
-                Py_INCREF(Py_False);
-                *next_idx_ptr = idx + 5;
-                return Py_False;
-            }
-            break;
-        case 'N':
-            /* NaN */
-            if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
-                return _parse_constant(s, "NaN", idx, next_idx_ptr);
-            }
-            break;
-        case 'I':
-            /* Infinity */
-            if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
-                return _parse_constant(s, "Infinity", idx, next_idx_ptr);
-            }
-            break;
-        case '-':
-            /* -Infinity */
-            if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
-                return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
-            }
-            break;
-    }
-    /* Didn't find a string, object, array, or named constant. Look for a number. */
-    return _match_number_unicode(s, pystr, idx, next_idx_ptr);
-}
-
-static PyObject *
-scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
-{
-    /* Python callable interface to scan_once_{str,unicode} */
-    PyObject *pystr;
-    PyObject *rval;
-    Py_ssize_t idx;
-    Py_ssize_t next_idx = -1;
-    static char *kwlist[] = {"string", "idx", NULL};
-    PyScannerObject *s;
-    assert(PyScanner_Check(self));
-    s = (PyScannerObject *)self;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
-        return NULL;
-
-    if (PyString_Check(pystr)) {
-        rval = scan_once_str(s, pystr, idx, &next_idx);
-    }
-    else if (PyUnicode_Check(pystr)) {
-        rval = scan_once_unicode(s, pystr, idx, &next_idx);
-    }
-    else {
-        PyErr_Format(PyExc_TypeError,
-                 "first argument must be a string, not %.80s",
-                 Py_TYPE(pystr)->tp_name);
-        return NULL;
-    }
-    return _build_rval_index_tuple(rval, next_idx);
-}
-
-static PyObject *
-scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
-{
-    PyScannerObject *s;
-    s = (PyScannerObject *)type->tp_alloc(type, 0);
-    if (s != NULL) {
-        s->encoding = NULL;
-        s->strict = NULL;
-        s->object_hook = NULL;
-        s->parse_float = NULL;
-        s->parse_int = NULL;
-        s->parse_constant = NULL;
-    }
-    return (PyObject *)s;
-}
-
-static int
-scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
-{
-    /* Initialize Scanner object */
-    PyObject *ctx;
-    static char *kwlist[] = {"context", NULL};
-    PyScannerObject *s;
-
-    assert(PyScanner_Check(self));
-    s = (PyScannerObject *)self;
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
-        return -1;
-
-    /* PyString_AS_STRING is used on encoding */
-    s->encoding = PyObject_GetAttrString(ctx, "encoding");
-    if (s->encoding == Py_None) {
-        Py_DECREF(Py_None);
-        s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
-    }
-    else if (PyUnicode_Check(s->encoding)) {
-        PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
-        Py_DECREF(s->encoding);
-        s->encoding = tmp;
-    }
-    if (s->encoding == NULL || !PyString_Check(s->encoding))
-        goto bail;
-
-    /* All of these will fail "gracefully" so we don't need to verify them */
-    s->strict = PyObject_GetAttrString(ctx, "strict");
-    if (s->strict == NULL)
-        goto bail;
-    s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
-    if (s->object_hook == NULL)
-        goto bail;
-    s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
-    if (s->parse_float == NULL)
-        goto bail;
-    s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
-    if (s->parse_int == NULL)
-        goto bail;
-    s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
-    if (s->parse_constant == NULL)
-        goto bail;
-
-    return 0;
-
-bail:
-    Py_CLEAR(s->encoding);
-    Py_CLEAR(s->strict);
-    Py_CLEAR(s->object_hook);
-    Py_CLEAR(s->parse_float);
-    Py_CLEAR(s->parse_int);
-    Py_CLEAR(s->parse_constant);
-    return -1;
-}
-
-PyDoc_STRVAR(scanner_doc, "JSON scanner object");
-
-static
-PyTypeObject PyScannerType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                    /* tp_internal */
-    "simplejson._speedups.Scanner",       /* tp_name */
-    sizeof(PyScannerObject), /* tp_basicsize */
-    0,                    /* tp_itemsize */
-    scanner_dealloc, /* tp_dealloc */
-    0,                    /* tp_print */
-    0,                    /* tp_getattr */
-    0,                    /* tp_setattr */
-    0,                    /* tp_compare */
-    0,                    /* tp_repr */
-    0,                    /* tp_as_number */
-    0,                    /* tp_as_sequence */
-    0,                    /* tp_as_mapping */
-    0,                    /* tp_hash */
-    scanner_call,         /* tp_call */
-    0,                    /* tp_str */
-    0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
-    0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
-    0,                    /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
-    scanner_doc,          /* tp_doc */
-    scanner_traverse,                    /* tp_traverse */
-    scanner_clear,                    /* tp_clear */
-    0,                    /* tp_richcompare */
-    0,                    /* tp_weaklistoffset */
-    0,                    /* tp_iter */
-    0,                    /* tp_iternext */
-    0,                    /* tp_methods */
-    scanner_members,                    /* tp_members */
-    0,                    /* tp_getset */
-    0,                    /* tp_base */
-    0,                    /* tp_dict */
-    0,                    /* tp_descr_get */
-    0,                    /* tp_descr_set */
-    0,                    /* tp_dictoffset */
-    scanner_init,                    /* tp_init */
-    0,/* PyType_GenericAlloc, */        /* tp_alloc */
-    scanner_new,          /* tp_new */
-    0,/* PyObject_GC_Del, */              /* tp_free */
-};
-
-static PyObject *
-encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
-{
-    PyEncoderObject *s;
-    s = (PyEncoderObject *)type->tp_alloc(type, 0);
-    if (s != NULL) {
-        s->markers = NULL;
-        s->defaultfn = NULL;
-        s->encoder = NULL;
-        s->indent = NULL;
-        s->key_separator = NULL;
-        s->item_separator = NULL;
-        s->sort_keys = NULL;
-        s->skipkeys = NULL;
-    }
-    return (PyObject *)s;
-}
-
-static int
-encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
-{
-    /* initialize Encoder object */
-    static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
-
-    PyEncoderObject *s;
-    PyObject *allow_nan;
-
-    assert(PyEncoder_Check(self));
-    s = (PyEncoderObject *)self;
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
-        &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
-        return -1;
-
-    Py_INCREF(s->markers);
-    Py_INCREF(s->defaultfn);
-    Py_INCREF(s->encoder);
-    Py_INCREF(s->indent);
-    Py_INCREF(s->key_separator);
-    Py_INCREF(s->item_separator);
-    Py_INCREF(s->sort_keys);
-    Py_INCREF(s->skipkeys);
-    s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
-    s->allow_nan = PyObject_IsTrue(allow_nan);
-    return 0;
-}
-
-static PyObject *
-encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
-{
-    /* Python callable interface to encode_listencode_obj */
-    static char *kwlist[] = {"obj", "_current_indent_level", NULL};
-    PyObject *obj;
-    PyObject *rval;
-    Py_ssize_t indent_level;
-    PyEncoderObject *s;
-    assert(PyEncoder_Check(self));
-    s = (PyEncoderObject *)self;
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
-        &obj, _convertPyInt_AsSsize_t, &indent_level))
-        return NULL;
-    rval = PyList_New(0);
-    if (rval == NULL)
-        return NULL;
-    if (encoder_listencode_obj(s, rval, obj, indent_level)) {
-        Py_DECREF(rval);
-        return NULL;
-    }
-    return rval;
-}
-
-static PyObject *
-_encoded_const(PyObject *obj)
-{
-    /* Return the JSON string representation of None, True, False */
-    if (obj == Py_None) {
-        static PyObject *s_null = NULL;
-        if (s_null == NULL) {
-            s_null = PyString_InternFromString("null");
-        }
-        Py_INCREF(s_null);
-        return s_null;
-    }
-    else if (obj == Py_True) {
-        static PyObject *s_true = NULL;
-        if (s_true == NULL) {
-            s_true = PyString_InternFromString("true");
-        }
-        Py_INCREF(s_true);
-        return s_true;
-    }
-    else if (obj == Py_False) {
-        static PyObject *s_false = NULL;
-        if (s_false == NULL) {
-            s_false = PyString_InternFromString("false");
-        }
-        Py_INCREF(s_false);
-        return s_false;
-    }
-    else {
-        PyErr_SetString(PyExc_ValueError, "not a const");
-        return NULL;
-    }
-}
-
-static PyObject *
-encoder_encode_float(PyEncoderObject *s, PyObject *obj)
-{
-    /* Return the JSON representation of a PyFloat */
-    double i = PyFloat_AS_DOUBLE(obj);
-    if (!Py_IS_FINITE(i)) {
-        if (!s->allow_nan) {
-            PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
-            return NULL;
-        }
-        if (i > 0) {
-            return PyString_FromString("Infinity");
-        }
-        else if (i < 0) {
-            return PyString_FromString("-Infinity");
-        }
-        else {
-            return PyString_FromString("NaN");
-        }
-    }
-    /* Use a better float format here? */
-    return PyObject_Repr(obj);
-}
-
-static PyObject *
-encoder_encode_string(PyEncoderObject *s, PyObject *obj)
-{
-    /* Return the JSON representation of a string */
-    if (s->fast_encode)
-        return py_encode_basestring_ascii(NULL, obj);
-    else
-        return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
-}
-
-static int
-_steal_list_append(PyObject *lst, PyObject *stolen)
-{
-    /* Append stolen and then decrement its reference count */
-    int rval = PyList_Append(lst, stolen);
-    Py_DECREF(stolen);
-    return rval;
-}
-
-static int
-encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
-{
-    /* Encode Python object obj to a JSON term, rval is a PyList */
-    PyObject *newobj;
-    int rv;
-
-    if (obj == Py_None || obj == Py_True || obj == Py_False) {
-        PyObject *cstr = _encoded_const(obj);
-        if (cstr == NULL)
-            return -1;
-        return _steal_list_append(rval, cstr);
-    }
-    else if (PyString_Check(obj) || PyUnicode_Check(obj))
-    {
-        PyObject *encoded = encoder_encode_string(s, obj);
-        if (encoded == NULL)
-            return -1;
-        return _steal_list_append(rval, encoded);
-    }
-    else if (PyInt_Check(obj) || PyLong_Check(obj)) {
-        PyObject *encoded = PyObject_Str(obj);
-        if (encoded == NULL)
-            return -1;
-        return _steal_list_append(rval, encoded);
-    }
-    else if (PyFloat_Check(obj)) {
-        PyObject *encoded = encoder_encode_float(s, obj);
-        if (encoded == NULL)
-            return -1;
-        return _steal_list_append(rval, encoded);
-    }
-    else if (PyList_Check(obj) || PyTuple_Check(obj)) {
-        return encoder_listencode_list(s, rval, obj, indent_level);
-    }
-    else if (PyDict_Check(obj)) {
-        return encoder_listencode_dict(s, rval, obj, indent_level);
-    }
-    else {
-        PyObject *ident = NULL;
-        if (s->markers != Py_None) {
-            int has_key;
-            ident = PyLong_FromVoidPtr(obj);
-            if (ident == NULL)
-                return -1;
-            has_key = PyDict_Contains(s->markers, ident);
-            if (has_key) {
-                if (has_key != -1)
-                    PyErr_SetString(PyExc_ValueError, "Circular reference detected");
-                Py_DECREF(ident);
-                return -1;
-            }
-            if (PyDict_SetItem(s->markers, ident, obj)) {
-                Py_DECREF(ident);
-                return -1;
-            }
-        }
-        newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
-        if (newobj == NULL) {
-            Py_XDECREF(ident);
-            return -1;
-        }
-        rv = encoder_listencode_obj(s, rval, newobj, indent_level);
-        Py_DECREF(newobj);
-        if (rv) {
-            Py_XDECREF(ident);
-            return -1;
-        }
-        if (ident != NULL) {
-            if (PyDict_DelItem(s->markers, ident)) {
-                Py_XDECREF(ident);
-                return -1;
-            }
-            Py_XDECREF(ident);
-        }
-        return rv;
-    }
-}
-
-static int
-encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
-{
-    /* Encode Python dict dct a JSON term, rval is a PyList */
-    static PyObject *open_dict = NULL;
-    static PyObject *close_dict = NULL;
-    static PyObject *empty_dict = NULL;
-    PyObject *kstr = NULL;
-    PyObject *ident = NULL;
-    PyObject *key, *value;
-    Py_ssize_t pos;
-    int skipkeys;
-    Py_ssize_t idx;
-
-    if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
-        open_dict = PyString_InternFromString("{");
-        close_dict = PyString_InternFromString("}");
-        empty_dict = PyString_InternFromString("{}");
-        if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
-            return -1;
-    }
-    if (PyDict_Size(dct) == 0)
-        return PyList_Append(rval, empty_dict);
-
-    if (s->markers != Py_None) {
-        int has_key;
-        ident = PyLong_FromVoidPtr(dct);
-        if (ident == NULL)
-            goto bail;
-        has_key = PyDict_Contains(s->markers, ident);
-        if (has_key) {
-            if (has_key != -1)
-                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
-            goto bail;
-        }
-        if (PyDict_SetItem(s->markers, ident, dct)) {
-            goto bail;
-        }
-    }
-
-    if (PyList_Append(rval, open_dict))
-        goto bail;
-
-    if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level += 1;
-        /*
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            separator = _item_separator + newline_indent
-            buf += newline_indent
-        */
-    }
-
-    /* TODO: C speedup not implemented for sort_keys */
-
-    pos = 0;
-    skipkeys = PyObject_IsTrue(s->skipkeys);
-    idx = 0;
-    while (PyDict_Next(dct, &pos, &key, &value)) {
-        PyObject *encoded;
-
-        if (PyString_Check(key) || PyUnicode_Check(key)) {
-            Py_INCREF(key);
-            kstr = key;
-        }
-        else if (PyFloat_Check(key)) {
-            kstr = encoder_encode_float(s, key);
-            if (kstr == NULL)
-                goto bail;
-        }
-        else if (PyInt_Check(key) || PyLong_Check(key)) {
-            kstr = PyObject_Str(key);
-            if (kstr == NULL)
-                goto bail;
-        }
-        else if (key == Py_True || key == Py_False || key == Py_None) {
-            kstr = _encoded_const(key);
-            if (kstr == NULL)
-                goto bail;
-        }
-        else if (skipkeys) {
-            continue;
-        }
-        else {
-            /* TODO: include repr of key */
-            PyErr_SetString(PyExc_ValueError, "keys must be a string");
-            goto bail;
-        }
-
-        if (idx) {
-            if (PyList_Append(rval, s->item_separator))
-                goto bail;
-        }
-
-        encoded = encoder_encode_string(s, kstr);
-        Py_CLEAR(kstr);
-        if (encoded == NULL)
-            goto bail;
-        if (PyList_Append(rval, encoded)) {
-            Py_DECREF(encoded);
-            goto bail;
-        }
-        Py_DECREF(encoded);
-        if (PyList_Append(rval, s->key_separator))
-            goto bail;
-        if (encoder_listencode_obj(s, rval, value, indent_level))
-            goto bail;
-        idx += 1;
-    }
-    if (ident != NULL) {
-        if (PyDict_DelItem(s->markers, ident))
-            goto bail;
-        Py_CLEAR(ident);
-    }
-    if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level -= 1;
-        /*
-            yield '\n' + (' ' * (_indent * _current_indent_level))
-        */
-    }
-    if (PyList_Append(rval, close_dict))
-        goto bail;
-    return 0;
-
-bail:
-    Py_XDECREF(kstr);
-    Py_XDECREF(ident);
-    return -1;
-}
-
-
-static int
-encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
-{
-    /* Encode Python list seq to a JSON term, rval is a PyList */
-    static PyObject *open_array = NULL;
-    static PyObject *close_array = NULL;
-    static PyObject *empty_array = NULL;
-    PyObject *ident = NULL;
-    PyObject *s_fast = NULL;
-    Py_ssize_t num_items;
-    PyObject **seq_items;
-    Py_ssize_t i;
-
-    if (open_array == NULL || close_array == NULL || empty_array == NULL) {
-        open_array = PyString_InternFromString("[");
-        close_array = PyString_InternFromString("]");
-        empty_array = PyString_InternFromString("[]");
-        if (open_array == NULL || close_array == NULL || empty_array == NULL)
-            return -1;
-    }
-    ident = NULL;
-    s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
-    if (s_fast == NULL)
-        return -1;
-    num_items = PySequence_Fast_GET_SIZE(s_fast);
-    if (num_items == 0) {
-        Py_DECREF(s_fast);
-        return PyList_Append(rval, empty_array);
-    }
-
-    if (s->markers != Py_None) {
-        int has_key;
-        ident = PyLong_FromVoidPtr(seq);
-        if (ident == NULL)
-            goto bail;
-        has_key = PyDict_Contains(s->markers, ident);
-        if (has_key) {
-            if (has_key != -1)
-                PyErr_SetString(PyExc_ValueError, "Circular reference detected");
-            goto bail;
-        }
-        if (PyDict_SetItem(s->markers, ident, seq)) {
-            goto bail;
-        }
-    }
-
-    seq_items = PySequence_Fast_ITEMS(s_fast);
-    if (PyList_Append(rval, open_array))
-        goto bail;
-    if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level += 1;
-        /*
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
-            separator = _item_separator + newline_indent
-            buf += newline_indent
-        */
-    }
-    for (i = 0; i < num_items; i++) {
-        PyObject *obj = seq_items[i];
-        if (i) {
-            if (PyList_Append(rval, s->item_separator))
-                goto bail;
-        }
-        if (encoder_listencode_obj(s, rval, obj, indent_level))
-            goto bail;
-    }
-    if (ident != NULL) {
-        if (PyDict_DelItem(s->markers, ident))
-            goto bail;
-        Py_CLEAR(ident);
-    }
-    if (s->indent != Py_None) {
-        /* TODO: DOES NOT RUN */
-        indent_level -= 1;
-        /*
-            yield '\n' + (' ' * (_indent * _current_indent_level))
-        */
-    }
-    if (PyList_Append(rval, close_array))
-        goto bail;
-    Py_DECREF(s_fast);
-    return 0;
-
-bail:
-    Py_XDECREF(ident);
-    Py_DECREF(s_fast);
-    return -1;
-}
-
-static void
-encoder_dealloc(PyObject *self)
-{
-    /* Deallocate Encoder */
-    encoder_clear(self);
-    Py_TYPE(self)->tp_free(self);
-}
-
-static int
-encoder_traverse(PyObject *self, visitproc visit, void *arg)
-{
-    PyEncoderObject *s;
-    assert(PyEncoder_Check(self));
-    s = (PyEncoderObject *)self;
-    Py_VISIT(s->markers);
-    Py_VISIT(s->defaultfn);
-    Py_VISIT(s->encoder);
-    Py_VISIT(s->indent);
-    Py_VISIT(s->key_separator);
-    Py_VISIT(s->item_separator);
-    Py_VISIT(s->sort_keys);
-    Py_VISIT(s->skipkeys);
-    return 0;
-}
-
-static int
-encoder_clear(PyObject *self)
-{
-    /* Deallocate Encoder */
-    PyEncoderObject *s;
-    assert(PyEncoder_Check(self));
-    s = (PyEncoderObject *)self;
-    Py_CLEAR(s->markers);
-    Py_CLEAR(s->defaultfn);
-    Py_CLEAR(s->encoder);
-    Py_CLEAR(s->indent);
-    Py_CLEAR(s->key_separator);
-    Py_CLEAR(s->item_separator);
-    Py_CLEAR(s->sort_keys);
-    Py_CLEAR(s->skipkeys);
-    return 0;
-}
-
-PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
-
-static
-PyTypeObject PyEncoderType = {
-    PyObject_HEAD_INIT(NULL)
-    0,                    /* tp_internal */
-    "simplejson._speedups.Encoder",       /* tp_name */
-    sizeof(PyEncoderObject), /* tp_basicsize */
-    0,                    /* tp_itemsize */
-    encoder_dealloc, /* tp_dealloc */
-    0,                    /* tp_print */
-    0,                    /* tp_getattr */
-    0,                    /* tp_setattr */
-    0,                    /* tp_compare */
-    0,                    /* tp_repr */
-    0,                    /* tp_as_number */
-    0,                    /* tp_as_sequence */
-    0,                    /* tp_as_mapping */
-    0,                    /* tp_hash */
-    encoder_call,         /* tp_call */
-    0,                    /* tp_str */
-    0,                    /* tp_getattro */
-    0,                    /* tp_setattro */
-    0,                    /* tp_as_buffer */
-    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
-    encoder_doc,          /* tp_doc */
-    encoder_traverse,     /* tp_traverse */
-    encoder_clear,        /* tp_clear */
-    0,                    /* tp_richcompare */
-    0,                    /* tp_weaklistoffset */
-    0,                    /* tp_iter */
-    0,                    /* tp_iternext */
-    0,                    /* tp_methods */
-    encoder_members,      /* tp_members */
-    0,                    /* tp_getset */
-    0,                    /* tp_base */
-    0,                    /* tp_dict */
-    0,                    /* tp_descr_get */
-    0,                    /* tp_descr_set */
-    0,                    /* tp_dictoffset */
-    encoder_init,         /* tp_init */
-    0,                    /* tp_alloc */
-    encoder_new,          /* tp_new */
-    0,                    /* tp_free */
-};
-
-static PyMethodDef speedups_methods[] = {
-    {"encode_basestring_ascii",
-        (PyCFunction)py_encode_basestring_ascii,
-        METH_O,
-        pydoc_encode_basestring_ascii},
-    {"scanstring",
-        (PyCFunction)py_scanstring,
-        METH_VARARGS,
-        pydoc_scanstring},
-    {NULL, NULL, 0, NULL}
-};
-
-PyDoc_STRVAR(module_doc,
-"simplejson speedups\n");
-
-void
-init_speedups(void)
-{
-    PyObject *m;
-    PyScannerType.tp_new = PyType_GenericNew;
-    if (PyType_Ready(&PyScannerType) < 0)
-        return;
-    PyEncoderType.tp_new = PyType_GenericNew;
-    if (PyType_Ready(&PyEncoderType) < 0)
-        return;
-    m = Py_InitModule3("_speedups", speedups_methods, module_doc);
-    Py_INCREF((PyObject*)&PyScannerType);
-    PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
-    Py_INCREF((PyObject*)&PyEncoderType);
-    PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
-}
diff --git a/framework/pym/simplejson/compat.py b/framework/pym/simplejson/compat.py
new file mode 100644
index 0000000000..5fc1412844
--- /dev/null
+++ b/framework/pym/simplejson/compat.py
@@ -0,0 +1,34 @@
+"""Python 3 compatibility shims
+"""
+import sys
+if sys.version_info[0] < 3:
+    PY3 = False
+    def b(s):
+        return s
+    try:
+        from cStringIO import StringIO
+    except ImportError:
+        from StringIO import StringIO
+    BytesIO = StringIO
+    text_type = unicode
+    binary_type = str
+    string_types = (basestring,)
+    integer_types = (int, long)
+    unichr = unichr
+    reload_module = reload
+else:
+    PY3 = True
+    if sys.version_info[:2] >= (3, 4):
+        from importlib import reload as reload_module
+    else:
+        from imp import reload as reload_module
+    def b(s):
+        return bytes(s, 'latin1')
+    from io import StringIO, BytesIO
+    text_type = str
+    binary_type = bytes
+    string_types = (str,)
+    integer_types = (int,)
+    unichr = chr
+
+long_type = integer_types[-1]
diff --git a/framework/pym/simplejson/decoder.py b/framework/pym/simplejson/decoder.py
index b769ea486c..7f0b0568fd 100644
--- a/framework/pym/simplejson/decoder.py
+++ b/framework/pym/simplejson/decoder.py
@@ -1,53 +1,37 @@
 """Implementation of JSONDecoder
 """
+from __future__ import absolute_import
 import re
 import sys
 import struct
-
-from simplejson.scanner import make_scanner
-try:
-    from simplejson._speedups import scanstring as c_scanstring
-except ImportError:
-    c_scanstring = None
-
+from .compat import PY3, unichr
+from .scanner import make_scanner, JSONDecodeError
+
+def _import_c_scanstring():
+    try:
+        from ._speedups import scanstring
+        return scanstring
+    except ImportError:
+        return None
+c_scanstring = _import_c_scanstring()
+
+# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
+# compatibility, but it was never in the __all__
 __all__ = ['JSONDecoder']
 
 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
 
 def _floatconstants():
-    _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
-    if sys.byteorder != 'big':
-        _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
-    nan, inf = struct.unpack('dd', _BYTES)
+    if sys.version_info < (2, 6):
+        _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
+        nan, inf = struct.unpack('>dd', _BYTES)
+    else:
+        nan = float('nan')
+        inf = float('inf')
     return nan, inf, -inf
 
 NaN, PosInf, NegInf = _floatconstants()
 
-
-def linecol(doc, pos):
-    lineno = doc.count('\n', 0, pos) + 1
-    if lineno == 1:
-        colno = pos
-    else:
-        colno = pos - doc.rindex('\n', 0, pos)
-    return lineno, colno
-
-
-def errmsg(msg, doc, pos, end=None):
-    # Note that this function is called from _speedups
-    lineno, colno = linecol(doc, pos)
-    if end is None:
-        #fmt = '{0}: line {1} column {2} (char {3})'
-        #return fmt.format(msg, lineno, colno, pos)
-        fmt = '%s: line %d column %d (char %d)'
-        return fmt % (msg, lineno, colno, pos)
-    endlineno, endcolno = linecol(doc, end)
-    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
-    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
-    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
-    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
-
-
 _CONSTANTS = {
     '-Infinity': NegInf,
     'Infinity': PosInf,
@@ -62,13 +46,15 @@ def errmsg(msg, doc, pos, end=None):
 
 DEFAULT_ENCODING = "utf-8"
 
-def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
+def py_scanstring(s, end, encoding=None, strict=True,
+        _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
+        _PY3=PY3, _maxunicode=sys.maxunicode):
     """Scan the string s for a JSON string. End is the index of the
     character in s after the quote that started the JSON string.
     Unescapes all valid JSON string escape sequences and raises ValueError
     on attempt to decode an invalid string. If strict is False then literal
     control characters are allowed in the string.
-    
+
     Returns a tuple of the decoded string and the index of the character in s
     after the end quote."""
     if encoding is None:
@@ -79,13 +65,13 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
     while 1:
         chunk = _m(s, end)
         if chunk is None:
-            raise ValueError(
-                errmsg("Unterminated string starting at", s, begin))
+            raise JSONDecodeError(
+                "Unterminated string starting at", s, begin)
         end = chunk.end()
         content, terminator = chunk.groups()
         # Content is contains zero or more unescaped string characters
         if content:
-            if not isinstance(content, unicode):
+            if not _PY3 and not isinstance(content, unicode):
                 content = unicode(content, encoding)
             _append(content)
         # Terminator is the end of string, a literal control character,
@@ -94,49 +80,57 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
             break
         elif terminator != '\\':
             if strict:
-                msg = "Invalid control character %r at" % (terminator,)
-                #msg = "Invalid control character {0!r} at".format(terminator)
-                raise ValueError(errmsg(msg, s, end))
+                msg = "Invalid control character %r at"
+                raise JSONDecodeError(msg, s, end)
             else:
                 _append(terminator)
                 continue
         try:
             esc = s[end]
         except IndexError:
-            raise ValueError(
-                errmsg("Unterminated string starting at", s, begin))
+            raise JSONDecodeError(
+                "Unterminated string starting at", s, begin)
         # If not a unicode escape sequence, must be in the lookup table
         if esc != 'u':
             try:
                 char = _b[esc]
             except KeyError:
-                msg = "Invalid \\escape: " + repr(esc)
-                raise ValueError(errmsg(msg, s, end))
+                msg = "Invalid \\X escape sequence %r"
+                raise JSONDecodeError(msg, s, end)
             end += 1
         else:
             # Unicode escape sequence
+            msg = "Invalid \\uXXXX escape sequence"
             esc = s[end + 1:end + 5]
-            next_end = end + 5
-            if len(esc) != 4:
-                msg = "Invalid \\uXXXX escape"
-                raise ValueError(errmsg(msg, s, end))
-            uni = int(esc, 16)
+            escX = esc[1:2]
+            if len(esc) != 4 or escX == 'x' or escX == 'X':
+                raise JSONDecodeError(msg, s, end - 1)
+            try:
+                uni = int(esc, 16)
+            except ValueError:
+                raise JSONDecodeError(msg, s, end - 1)
+            end += 5
             # Check for surrogate pair on UCS-4 systems
-            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
-                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
-                if not s[end + 5:end + 7] == '\\u':
-                    raise ValueError(errmsg(msg, s, end))
-                esc2 = s[end + 7:end + 11]
-                if len(esc2) != 4:
-                    raise ValueError(errmsg(msg, s, end))
-                uni2 = int(esc2, 16)
-                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
-                next_end += 6
+            # Note that this will join high/low surrogate pairs
+            # but will also pass unpaired surrogates through
+            if (_maxunicode > 65535 and
+                uni & 0xfc00 == 0xd800 and
+                s[end:end + 2] == '\\u'):
+                esc2 = s[end + 2:end + 6]
+                escX = esc2[1:2]
+                if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
+                    try:
+                        uni2 = int(esc2, 16)
+                    except ValueError:
+                        raise JSONDecodeError(msg, s, end)
+                    if uni2 & 0xfc00 == 0xdc00:
+                        uni = 0x10000 + (((uni - 0xd800) << 10) |
+                                         (uni2 - 0xdc00))
+                        end += 6
             char = unichr(uni)
-            end = next_end
         # Append the unescaped character
         _append(char)
-    return u''.join(chunks), end
+    return _join(chunks), end
 
 
 # Use speedup if available
@@ -145,8 +139,15 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
 WHITESPACE_STR = ' \t\n\r'
 
-def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
-    pairs = {}
+def JSONObject(state, encoding, strict, scan_once, object_hook,
+        object_pairs_hook, memo=None,
+        _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    (s, end) = state
+    # Backwards compatibility
+    if memo is None:
+        memo = {}
+    memo_get = memo.setdefault
+    pairs = []
     # Use a slice to prevent IndexError from being raised, the following
     # check will raise a more specific ValueError if the string is empty
     nextchar = s[end:end + 1]
@@ -157,19 +158,28 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
             nextchar = s[end:end + 1]
         # Trivial empty object
         if nextchar == '}':
+            if object_pairs_hook is not None:
+                result = object_pairs_hook(pairs)
+                return result, end + 1
+            pairs = {}
+            if object_hook is not None:
+                pairs = object_hook(pairs)
             return pairs, end + 1
         elif nextchar != '"':
-            raise ValueError(errmsg("Expecting property name", s, end))
+            raise JSONDecodeError(
+                "Expecting property name enclosed in double quotes",
+                s, end)
     end += 1
     while True:
         key, end = scanstring(s, end, encoding, strict)
+        key = memo_get(key, key)
 
         # To skip some function call overhead we optimize the fast paths where
         # the JSON key separator is ": " or just ":".
         if s[end:end + 1] != ':':
             end = _w(s, end).end()
             if s[end:end + 1] != ':':
-                raise ValueError(errmsg("Expecting : delimiter", s, end))
+                raise JSONDecodeError("Expecting ':' delimiter", s, end)
 
         end += 1
 
@@ -181,11 +191,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
         except IndexError:
             pass
 
-        try:
-            value, end = scan_once(s, end)
-        except StopIteration:
-            raise ValueError(errmsg("Expecting object", s, end))
-        pairs[key] = value
+        value, end = scan_once(s, end)
+        pairs.append((key, value))
 
         try:
             nextchar = s[end]
@@ -199,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
         if nextchar == '}':
             break
         elif nextchar != ',':
-            raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
+            raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
 
         try:
             nextchar = s[end]
@@ -214,13 +221,20 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE
 
         end += 1
         if nextchar != '"':
-            raise ValueError(errmsg("Expecting property name", s, end - 1))
-
+            raise JSONDecodeError(
+                "Expecting property name enclosed in double quotes",
+                s, end - 1)
+
+    if object_pairs_hook is not None:
+        result = object_pairs_hook(pairs)
+        return result, end
+    pairs = dict(pairs)
     if object_hook is not None:
         pairs = object_hook(pairs)
     return pairs, end
 
-def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
+    (s, end) = state
     values = []
     nextchar = s[end:end + 1]
     if nextchar in _ws:
@@ -229,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     # Look-ahead for trivial empty array
     if nextchar == ']':
         return values, end + 1
+    elif nextchar == '':
+        raise JSONDecodeError("Expecting value or ']'", s, end)
     _append = values.append
     while True:
-        try:
-            value, end = scan_once(s, end)
-        except StopIteration:
-            raise ValueError(errmsg("Expecting object", s, end))
+        value, end = scan_once(s, end)
         _append(value)
         nextchar = s[end:end + 1]
         if nextchar in _ws:
@@ -244,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
         if nextchar == ']':
             break
         elif nextchar != ',':
-            raise ValueError(errmsg("Expecting , delimiter", s, end))
+            raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
 
         try:
             if s[end] in _ws:
@@ -268,7 +281,7 @@ class JSONDecoder(object):
     +---------------+-------------------+
     | array         | list              |
     +---------------+-------------------+
-    | string        | unicode           |
+    | string        | str, unicode      |
     +---------------+-------------------+
     | number (int)  | int, long         |
     +---------------+-------------------+
@@ -287,37 +300,56 @@ class JSONDecoder(object):
     """
 
     def __init__(self, encoding=None, object_hook=None, parse_float=None,
-            parse_int=None, parse_constant=None, strict=True):
-        """``encoding`` determines the encoding used to interpret any ``str``
-        objects decoded by this instance (utf-8 by default).  It has no
-        effect when decoding ``unicode`` objects.
+            parse_int=None, parse_constant=None, strict=True,
+            object_pairs_hook=None):
+        """
+        *encoding* determines the encoding used to interpret any
+        :class:`str` objects decoded by this instance (``'utf-8'`` by
+        default).  It has no effect when decoding :class:`unicode` objects.
 
         Note that currently only encodings that are a superset of ASCII work,
-        strings of other encodings should be passed in as ``unicode``.
+        strings of other encodings should be passed in as :class:`unicode`.
 
-        ``object_hook``, if specified, will be called with the result
-        of every JSON object decoded and its return value will be used in
-        place of the given ``dict``.  This can be used to provide custom
+        *object_hook*, if specified, will be called with the result of every
+        JSON object decoded and its return value will be used in place of the
+        given :class:`dict`.  This can be used to provide custom
         deserializations (e.g. to support JSON-RPC class hinting).
 
-        ``parse_float``, if specified, will be called with the string
-        of every JSON float to be decoded. By default this is equivalent to
-        float(num_str). This can be used to use another datatype or parser
-        for JSON floats (e.g. decimal.Decimal).
-
-        ``parse_int``, if specified, will be called with the string
-        of every JSON int to be decoded. By default this is equivalent to
-        int(num_str). This can be used to use another datatype or parser
-        for JSON integers (e.g. float).
-
-        ``parse_constant``, if specified, will be called with one of the
-        following strings: -Infinity, Infinity, NaN.
-        This can be used to raise an exception if invalid JSON numbers
-        are encountered.
+        *object_pairs_hook* is an optional function that will be called with
+        the result of any object literal decode with an ordered list of pairs.
+        The return value of *object_pairs_hook* will be used instead of the
+        :class:`dict`.  This feature can be used to implement custom decoders
+        that rely on the order that the key and value pairs are decoded (for
+        example, :func:`collections.OrderedDict` will remember the order of
+        insertion). If *object_hook* is also defined, the *object_pairs_hook*
+        takes priority.
+
+        *parse_float*, if specified, will be called with the string of every
+        JSON float to be decoded.  By default, this is equivalent to
+        ``float(num_str)``. This can be used to use another datatype or parser
+        for JSON floats (e.g. :class:`decimal.Decimal`).
+
+        *parse_int*, if specified, will be called with the string of every
+        JSON int to be decoded.  By default, this is equivalent to
+        ``int(num_str)``.  This can be used to use another datatype or parser
+        for JSON integers (e.g. :class:`float`).
+
+        *parse_constant*, if specified, will be called with one of the
+        following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.  This
+        can be used to raise an exception if invalid JSON numbers are
+        encountered.
+
+        *strict* controls the parser's behavior when it encounters an
+        invalid control character in a string. The default setting of
+        ``True`` means that unescaped control characters are parse errors, if
+        ``False`` then control characters will be allowed in strings.
 
         """
+        if encoding is None:
+            encoding = DEFAULT_ENCODING
         self.encoding = encoding
         self.object_hook = object_hook
+        self.object_pairs_hook = object_pairs_hook
         self.parse_float = parse_float or float
         self.parse_int = parse_int or int
         self.parse_constant = parse_constant or _CONSTANTS.__getitem__
@@ -325,30 +357,44 @@ def __init__(self, encoding=None, object_hook=None, parse_float=None,
         self.parse_object = JSONObject
         self.parse_array = JSONArray
         self.parse_string = scanstring
+        self.memo = {}
         self.scan_once = make_scanner(self)
 
-    def decode(self, s, _w=WHITESPACE.match):
+    def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
         """Return the Python representation of ``s`` (a ``str`` or ``unicode``
         instance containing a JSON document)
 
         """
-        obj, end = self.raw_decode(s, idx=_w(s, 0).end())
+        if _PY3 and isinstance(s, bytes):
+            s = str(s, self.encoding)
+        obj, end = self.raw_decode(s)
         end = _w(s, end).end()
         if end != len(s):
-            raise ValueError(errmsg("Extra data", s, end, len(s)))
+            raise JSONDecodeError("Extra data", s, end, len(s))
         return obj
 
-    def raw_decode(self, s, idx=0):
-        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
-        with a JSON document) and return a 2-tuple of the Python
+    def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
+        """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
+        beginning with a JSON document) and return a 2-tuple of the Python
         representation and the index in ``s`` where the document ended.
+        Optionally, ``idx`` can be used to specify an offset in ``s`` where
+        the JSON document begins.
 
         This can be used to decode a JSON document from a string that may
         have extraneous data at the end.
 
         """
-        try:
-            obj, end = self.scan_once(s, idx)
-        except StopIteration:
-            raise ValueError("No JSON object could be decoded")
-        return obj, end
+        if idx < 0:
+            # Ensure that raw_decode bails on negative indexes, the regex
+            # would otherwise mask this behavior. #98
+            raise JSONDecodeError('Expecting value', s, idx)
+        if _PY3 and not isinstance(s, str):
+            raise TypeError("Input string must be text, not bytes")
+        # strip UTF-8 bom
+        if len(s) > idx:
+            ord0 = ord(s[idx])
+            if ord0 == 0xfeff:
+                idx += 1
+            elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
+                idx += 3
+        return self.scan_once(s, idx=_w(s, idx).end())
diff --git a/framework/pym/simplejson/encoder.py b/framework/pym/simplejson/encoder.py
index cf58290366..7ea172e7d2 100644
--- a/framework/pym/simplejson/encoder.py
+++ b/framework/pym/simplejson/encoder.py
@@ -1,17 +1,23 @@
 """Implementation of JSONEncoder
 """
+from __future__ import absolute_import
 import re
-
-try:
-    from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
-except ImportError:
-    c_encode_basestring_ascii = None
-try:
-    from simplejson._speedups import make_encoder as c_make_encoder
-except ImportError:
-    c_make_encoder = None
-
-ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
+from operator import itemgetter
+# Do not import Decimal directly to avoid reload issues
+import decimal
+from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3
+def _import_speedups():
+    try:
+        from . import _speedups
+        return _speedups.encode_basestring_ascii, _speedups.make_encoder
+    except ImportError:
+        return None, None
+c_encode_basestring_ascii, c_make_encoder = _import_speedups()
+
+from .decoder import PosInf
+from .raw_json import RawJSON
+
+ESCAPE = re.compile(r'[\x00-\x1f\\"]')
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 HAS_UTF8 = re.compile(r'[\x80-\xff]')
 ESCAPE_DCT = {
@@ -27,25 +33,57 @@
     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
 
-# Assume this produces an infinity on all machines (probably not guaranteed)
-INFINITY = float('1e66666')
 FLOAT_REPR = repr
 
-def encode_basestring(s):
+def encode_basestring(s, _PY3=PY3, _q=u'"'):
     """Return a JSON representation of a Python string
 
     """
+    if _PY3:
+        if isinstance(s, bytes):
+            s = str(s, 'utf-8')
+        elif type(s) is not str:
+            # convert an str subclass instance to exact str
+            # raise a TypeError otherwise
+            s = str.__str__(s)
+    else:
+        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+            s = unicode(s, 'utf-8')
+        elif type(s) not in (str, unicode):
+            # convert an str subclass instance to exact str
+            # convert a unicode subclass instance to exact unicode
+            # raise a TypeError otherwise
+            if isinstance(s, str):
+                s = str.__str__(s)
+            else:
+                s = unicode.__getnewargs__(s)[0]
     def replace(match):
         return ESCAPE_DCT[match.group(0)]
-    return '"' + ESCAPE.sub(replace, s) + '"'
+    return _q + ESCAPE.sub(replace, s) + _q
 
 
-def py_encode_basestring_ascii(s):
+def py_encode_basestring_ascii(s, _PY3=PY3):
     """Return an ASCII-only JSON representation of a Python string
 
     """
-    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
-        s = s.decode('utf-8')
+    if _PY3:
+        if isinstance(s, bytes):
+            s = str(s, 'utf-8')
+        elif type(s) is not str:
+            # convert an str subclass instance to exact str
+            # raise a TypeError otherwise
+            s = str.__str__(s)
+    else:
+        if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+            s = unicode(s, 'utf-8')
+        elif type(s) not in (str, unicode):
+            # convert an str subclass instance to exact str
+            # convert a unicode subclass instance to exact unicode
+            # raise a TypeError otherwise
+            if isinstance(s, str):
+                s = str.__str__(s)
+            else:
+                s = unicode.__getnewargs__(s)[0]
     def replace(match):
         s = match.group(0)
         try:
@@ -65,7 +103,8 @@ def replace(match):
     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
 
 
-encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
+encode_basestring_ascii = (
+    c_encode_basestring_ascii or py_encode_basestring_ascii)
 
 class JSONEncoder(object):
     """Extensible JSON <http://json.org> encoder for Python data structures.
@@ -75,7 +114,7 @@ class JSONEncoder(object):
     +-------------------+---------------+
     | Python            | JSON          |
     +===================+===============+
-    | dict              | object        |
+    | dict, namedtuple  | object        |
     +-------------------+---------------+
     | list, tuple       | array         |
     +-------------------+---------------+
@@ -98,9 +137,14 @@ class JSONEncoder(object):
     """
     item_separator = ', '
     key_separator = ': '
+
     def __init__(self, skipkeys=False, ensure_ascii=True,
-            check_circular=True, allow_nan=True, sort_keys=False,
-            indent=None, separators=None, encoding='utf-8', default=None):
+                 check_circular=True, allow_nan=True, sort_keys=False,
+                 indent=None, separators=None, encoding='utf-8', default=None,
+                 use_decimal=True, namedtuple_as_object=True,
+                 tuple_as_array=True, bigint_as_string=False,
+                 item_sort_key=None, for_json=False, ignore_nan=False,
+                 int_as_string_bitcount=None, iterable_as_array=False):
         """Constructor for JSONEncoder, with sensible defaults.
 
         If skipkeys is false, then it is a TypeError to attempt
@@ -125,14 +169,17 @@ def __init__(self, skipkeys=False, ensure_ascii=True,
         sorted by key; this is useful for regression tests to ensure
         that JSON serializations can be compared on a day-to-day basis.
 
-        If indent is a non-negative integer, then JSON array
-        elements and object members will be pretty-printed with that
-        indent level.  An indent level of 0 will only insert newlines.
-        None is the most compact representation.
+        If indent is a string, then JSON array elements and object members
+        will be pretty-printed with a newline followed by that string repeated
+        for each level of nesting. ``None`` (the default) selects the most compact
+        representation without any newlines. For backwards compatibility with
+        versions of simplejson earlier than 2.1.0, an integer is also accepted
+        and is converted to a string with that many spaces.
 
-        If specified, separators should be a (item_separator, key_separator)
-        tuple.  The default is (', ', ': ').  To get the most compact JSON
-        representation you should specify (',', ':') to eliminate whitespace.
+        If specified, separators should be an (item_separator, key_separator)
+        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
+        (',', ': ') otherwise.  To get the most compact JSON representation,
+        you should specify (',', ':') to eliminate whitespace.
 
         If specified, default is a function that gets called for objects
         that can't otherwise be serialized.  It should return a JSON encodable
@@ -142,6 +189,41 @@ def __init__(self, skipkeys=False, ensure_ascii=True,
         transformed into unicode using that encoding prior to JSON-encoding.
         The default is UTF-8.
 
+        If use_decimal is true (default: ``True``), ``decimal.Decimal`` will
+        be supported directly by the encoder. For the inverse, decode JSON
+        with ``parse_float=decimal.Decimal``.
+
+        If namedtuple_as_object is true (the default), objects with
+        ``_asdict()`` methods will be encoded as JSON objects.
+
+        If tuple_as_array is true (the default), tuple (and subclasses) will
+        be encoded as JSON arrays.
+
+        If *iterable_as_array* is true (default: ``False``),
+        any object not in the above table that implements ``__iter__()``
+        will be encoded as a JSON array.
+
+        If bigint_as_string is true (not the default), ints 2**53 and higher
+        or lower than -2**53 will be encoded as strings. This is to avoid the
+        rounding that happens in Javascript otherwise.
+
+        If int_as_string_bitcount is a positive number (n), then int of size
+        greater than or equal to 2**n or lower than or equal to -2**n will be
+        encoded as strings.
+
+        If specified, item_sort_key is a callable used to sort the items in
+        each dictionary. This is useful if you want to sort items other than
+        in alphabetical order by key.
+
+        If for_json is true (not the default), objects with a ``for_json()``
+        method will use the return value of that method for encoding as JSON
+        instead of the object.
+
+        If *ignore_nan* is true (default: ``False``), then out of range
+        :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
+        as ``null`` in compliance with the ECMA-262 specification. If true,
+        this will override *allow_nan*.
+
         """
 
         self.skipkeys = skipkeys
@@ -149,9 +231,22 @@ def __init__(self, skipkeys=False, ensure_ascii=True,
         self.check_circular = check_circular
         self.allow_nan = allow_nan
         self.sort_keys = sort_keys
+        self.use_decimal = use_decimal
+        self.namedtuple_as_object = namedtuple_as_object
+        self.tuple_as_array = tuple_as_array
+        self.iterable_as_array = iterable_as_array
+        self.bigint_as_string = bigint_as_string
+        self.item_sort_key = item_sort_key
+        self.for_json = for_json
+        self.ignore_nan = ignore_nan
+        self.int_as_string_bitcount = int_as_string_bitcount
+        if indent is not None and not isinstance(indent, string_types):
+            indent = indent * ' '
         self.indent = indent
         if separators is not None:
             self.item_separator, self.key_separator = separators
+        elif indent is not None:
+            self.item_separator = ','
         if default is not None:
             self.default = default
         self.encoding = encoding
@@ -174,22 +269,23 @@ def default(self, o):
                 return JSONEncoder.default(self, o)
 
         """
-        raise TypeError(repr(o) + " is not JSON serializable")
+        raise TypeError('Object of type %s is not JSON serializable' %
+                        o.__class__.__name__)
 
     def encode(self, o):
         """Return a JSON string representation of a Python data structure.
 
+        >>> from simplejson import JSONEncoder
         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
         '{"foo": ["bar", "baz"]}'
 
         """
         # This is for extremely simple cases and benchmarks.
-        if isinstance(o, basestring):
-            if isinstance(o, str):
-                _encoding = self.encoding
-                if (_encoding is not None
-                        and not (_encoding == 'utf-8')):
-                    o = o.decode(_encoding)
+        if isinstance(o, binary_type):
+            _encoding = self.encoding
+            if (_encoding is not None and not (_encoding == 'utf-8')):
+                o = text_type(o, _encoding)
+        if isinstance(o, string_types):
             if self.ensure_ascii:
                 return encode_basestring_ascii(o)
             else:
@@ -200,7 +296,10 @@ def encode(self, o):
         chunks = self.iterencode(o, _one_shot=True)
         if not isinstance(chunks, (list, tuple)):
             chunks = list(chunks)
-        return ''.join(chunks)
+        if self.ensure_ascii:
+            return ''.join(chunks)
+        else:
+            return u''.join(chunks)
 
     def iterencode(self, o, _one_shot=False):
         """Encode the given object and yield each string
@@ -220,15 +319,17 @@ def iterencode(self, o, _one_shot=False):
             _encoder = encode_basestring_ascii
         else:
             _encoder = encode_basestring
-        if self.encoding != 'utf-8':
+        if self.encoding != 'utf-8' and self.encoding is not None:
             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
-                if isinstance(o, str):
-                    o = o.decode(_encoding)
+                if isinstance(o, binary_type):
+                    o = text_type(o, _encoding)
                 return _orig_encoder(o)
 
-        def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
-            # Check for specials.  Note that this type of test is processor- and/or
-            # platform-specific, so do tests which don't depend on the internals.
+        def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
+                _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
+            # Check for specials. Note that this type of test is processor
+            # and/or platform-specific, so do tests which don't depend on
+            # the internals.
 
             if o != o:
                 text = 'NaN'
@@ -237,44 +338,135 @@ def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _negi
             elif o == _neginf:
                 text = '-Infinity'
             else:
+                if type(o) != float:
+                    # See #118, do not trust custom str/repr
+                    o = float(o)
                 return _repr(o)
 
-            if not allow_nan:
+            if ignore_nan:
+                text = 'null'
+            elif not allow_nan:
                 raise ValueError(
                     "Out of range float values are not JSON compliant: " +
                     repr(o))
 
             return text
 
-
-        if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
+        key_memo = {}
+        int_as_string_bitcount = (
+            53 if self.bigint_as_string else self.int_as_string_bitcount)
+        if (_one_shot and c_make_encoder is not None
+                and self.indent is None):
             _iterencode = c_make_encoder(
                 markers, self.default, _encoder, self.indent,
                 self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, self.allow_nan)
+                self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
+                self.namedtuple_as_object, self.tuple_as_array,
+                int_as_string_bitcount,
+                self.item_sort_key, self.encoding, self.for_json,
+                self.ignore_nan, decimal.Decimal, self.iterable_as_array)
         else:
             _iterencode = _make_iterencode(
                 markers, self.default, _encoder, self.indent, floatstr,
                 self.key_separator, self.item_separator, self.sort_keys,
-                self.skipkeys, _one_shot)
-        return _iterencode(o, 0)
+                self.skipkeys, _one_shot, self.use_decimal,
+                self.namedtuple_as_object, self.tuple_as_array,
+                int_as_string_bitcount,
+                self.item_sort_key, self.encoding, self.for_json,
+                self.iterable_as_array, Decimal=decimal.Decimal)
+        try:
+            return _iterencode(o, 0)
+        finally:
+            key_memo.clear()
+
+
+class JSONEncoderForHTML(JSONEncoder):
+    """An encoder that produces JSON safe to embed in HTML.
 
-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+    To embed JSON content in, say, a script tag on a web page, the
+    characters &, < and > should be escaped. They cannot be escaped
+    with the usual entities (e.g. &amp;) because they are not expanded
+    within <script> tags.
+
+    This class also escapes the line separator and paragraph separator
+    characters U+2028 and U+2029, irrespective of the ensure_ascii setting,
+    as these characters are not valid in JavaScript strings (see
+    http://timelessrepo.com/json-isnt-a-javascript-subset).
+    """
+
+    def encode(self, o):
+        # Override JSONEncoder.encode because it has hacks for
+        # performance that make things more complicated.
+        chunks = self.iterencode(o, True)
+        if self.ensure_ascii:
+            return ''.join(chunks)
+        else:
+            return u''.join(chunks)
+
+    def iterencode(self, o, _one_shot=False):
+        chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
+        for chunk in chunks:
+            chunk = chunk.replace('&', '\\u0026')
+            chunk = chunk.replace('<', '\\u003c')
+            chunk = chunk.replace('>', '\\u003e')
+
+            if not self.ensure_ascii:
+                chunk = chunk.replace(u'\u2028', '\\u2028')
+                chunk = chunk.replace(u'\u2029', '\\u2029')
+
+            yield chunk
+
+
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
+        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        _use_decimal, _namedtuple_as_object, _tuple_as_array,
+        _int_as_string_bitcount, _item_sort_key,
+        _encoding,_for_json,
+        _iterable_as_array,
         ## HACK: hand-optimized bytecode; turn globals into locals
-        False=False,
-        True=True,
+        _PY3=PY3,
         ValueError=ValueError,
-        basestring=basestring,
+        string_types=string_types,
+        Decimal=None,
         dict=dict,
         float=float,
         id=id,
-        int=int,
+        integer_types=integer_types,
         isinstance=isinstance,
         list=list,
-        long=long,
         str=str,
         tuple=tuple,
+        iter=iter,
     ):
+    if _use_decimal and Decimal is None:
+        Decimal = decimal.Decimal
+    if _item_sort_key and not callable(_item_sort_key):
+        raise TypeError("item_sort_key must be None or callable")
+    elif _sort_keys and not _item_sort_key:
+        _item_sort_key = itemgetter(0)
+
+    if (_int_as_string_bitcount is not None and
+        (_int_as_string_bitcount <= 0 or
+         not isinstance(_int_as_string_bitcount, integer_types))):
+        raise TypeError("int_as_string_bitcount must be a positive integer")
+
+    def _encode_int(value):
+        skip_quoting = (
+            _int_as_string_bitcount is None
+            or
+            _int_as_string_bitcount < 1
+        )
+        if type(value) not in integer_types:
+            # See #118, do not trust custom str/repr
+            value = int(value)
+        if (
+            skip_quoting or
+            (-1 << _int_as_string_bitcount)
+            < value <
+            (1 << _int_as_string_bitcount)
+        ):
+            return str(value)
+        return '"' + str(value) + '"'
 
     def _iterencode_list(lst, _current_indent_level):
         if not lst:
@@ -288,7 +480,7 @@ def _iterencode_list(lst, _current_indent_level):
         buf = '['
         if _indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            newline_indent = '\n' + (_indent * _current_indent_level)
             separator = _item_separator + newline_indent
             buf += newline_indent
         else:
@@ -300,35 +492,82 @@ def _iterencode_list(lst, _current_indent_level):
                 first = False
             else:
                 buf = separator
-            if isinstance(value, basestring):
+            if isinstance(value, string_types):
+                yield buf + _encoder(value)
+            elif _PY3 and isinstance(value, bytes) and _encoding is not None:
                 yield buf + _encoder(value)
+            elif isinstance(value, RawJSON):
+                yield buf + value.encoded_json
             elif value is None:
                 yield buf + 'null'
             elif value is True:
                 yield buf + 'true'
             elif value is False:
                 yield buf + 'false'
-            elif isinstance(value, (int, long)):
-                yield buf + str(value)
+            elif isinstance(value, integer_types):
+                yield buf + _encode_int(value)
             elif isinstance(value, float):
                 yield buf + _floatstr(value)
+            elif _use_decimal and isinstance(value, Decimal):
+                yield buf + str(value)
             else:
                 yield buf
-                if isinstance(value, (list, tuple)):
+                for_json = _for_json and getattr(value, 'for_json', None)
+                if for_json and callable(for_json):
+                    chunks = _iterencode(for_json(), _current_indent_level)
+                elif isinstance(value, list):
                     chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
+                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+                    if _asdict and callable(_asdict):
+                        chunks = _iterencode_dict(_asdict(),
+                                                  _current_indent_level)
+                    elif _tuple_as_array and isinstance(value, tuple):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
                 for chunk in chunks:
                     yield chunk
-        if newline_indent is not None:
-            _current_indent_level -= 1
-            yield '\n' + (' ' * (_indent * _current_indent_level))
-        yield ']'
+        if first:
+            # iterable_as_array misses the fast path at the top
+            yield '[]'
+        else:
+            if newline_indent is not None:
+                _current_indent_level -= 1
+                yield '\n' + (_indent * _current_indent_level)
+            yield ']'
         if markers is not None:
             del markers[markerid]
 
+    def _stringify_key(key):
+        if isinstance(key, string_types): # pragma: no cover
+            pass
+        elif _PY3 and isinstance(key, bytes) and _encoding is not None:
+            key = str(key, _encoding)
+        elif isinstance(key, float):
+            key = _floatstr(key)
+        elif key is True:
+            key = 'true'
+        elif key is False:
+            key = 'false'
+        elif key is None:
+            key = 'null'
+        elif isinstance(key, integer_types):
+            if type(key) not in integer_types:
+                # See #118, do not trust custom str/repr
+                key = int(key)
+            key = str(key)
+        elif _use_decimal and isinstance(key, Decimal):
+            key = str(key)
+        elif _skipkeys:
+            key = None
+        else:
+            raise TypeError('keys must be str, int, float, bool or None, '
+                            'not %s' % key.__class__.__name__)
+        return key
+
     def _iterencode_dict(dct, _current_indent_level):
         if not dct:
             yield '{}'
@@ -341,100 +580,143 @@ def _iterencode_dict(dct, _current_indent_level):
         yield '{'
         if _indent is not None:
             _current_indent_level += 1
-            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            newline_indent = '\n' + (_indent * _current_indent_level)
             item_separator = _item_separator + newline_indent
             yield newline_indent
         else:
             newline_indent = None
             item_separator = _item_separator
         first = True
-        if _sort_keys:
-            items = dct.items()
-            items.sort(key=lambda kv: kv[0])
+        if _PY3:
+            iteritems = dct.items()
+        else:
+            iteritems = dct.iteritems()
+        if _item_sort_key:
+            items = []
+            for k, v in dct.items():
+                if not isinstance(k, string_types):
+                    k = _stringify_key(k)
+                    if k is None:
+                        continue
+                items.append((k, v))
+            items.sort(key=_item_sort_key)
         else:
-            items = dct.iteritems()
+            items = iteritems
         for key, value in items:
-            if isinstance(key, basestring):
-                pass
-            # JavaScript is weakly typed for these, so it makes sense to
-            # also allow them.  Many encoders seem to do something like this.
-            elif isinstance(key, float):
-                key = _floatstr(key)
-            elif key is True:
-                key = 'true'
-            elif key is False:
-                key = 'false'
-            elif key is None:
-                key = 'null'
-            elif isinstance(key, (int, long)):
-                key = str(key)
-            elif _skipkeys:
-                continue
-            else:
-                raise TypeError("key " + repr(key) + " is not a string")
+            if not (_item_sort_key or isinstance(key, string_types)):
+                key = _stringify_key(key)
+                if key is None:
+                    # _skipkeys must be True
+                    continue
             if first:
                 first = False
             else:
                 yield item_separator
             yield _encoder(key)
             yield _key_separator
-            if isinstance(value, basestring):
+            if isinstance(value, string_types):
+                yield _encoder(value)
+            elif _PY3 and isinstance(value, bytes) and _encoding is not None:
                 yield _encoder(value)
+            elif isinstance(value, RawJSON):
+                yield value.encoded_json
             elif value is None:
                 yield 'null'
             elif value is True:
                 yield 'true'
             elif value is False:
                 yield 'false'
-            elif isinstance(value, (int, long)):
-                yield str(value)
+            elif isinstance(value, integer_types):
+                yield _encode_int(value)
             elif isinstance(value, float):
                 yield _floatstr(value)
+            elif _use_decimal and isinstance(value, Decimal):
+                yield str(value)
             else:
-                if isinstance(value, (list, tuple)):
+                for_json = _for_json and getattr(value, 'for_json', None)
+                if for_json and callable(for_json):
+                    chunks = _iterencode(for_json(), _current_indent_level)
+                elif isinstance(value, list):
                     chunks = _iterencode_list(value, _current_indent_level)
-                elif isinstance(value, dict):
-                    chunks = _iterencode_dict(value, _current_indent_level)
                 else:
-                    chunks = _iterencode(value, _current_indent_level)
+                    _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+                    if _asdict and callable(_asdict):
+                        chunks = _iterencode_dict(_asdict(),
+                                                  _current_indent_level)
+                    elif _tuple_as_array and isinstance(value, tuple):
+                        chunks = _iterencode_list(value, _current_indent_level)
+                    elif isinstance(value, dict):
+                        chunks = _iterencode_dict(value, _current_indent_level)
+                    else:
+                        chunks = _iterencode(value, _current_indent_level)
                 for chunk in chunks:
                     yield chunk
         if newline_indent is not None:
             _current_indent_level -= 1
-            yield '\n' + (' ' * (_indent * _current_indent_level))
+            yield '\n' + (_indent * _current_indent_level)
         yield '}'
         if markers is not None:
             del markers[markerid]
 
     def _iterencode(o, _current_indent_level):
-        if isinstance(o, basestring):
+        if isinstance(o, string_types):
+            yield _encoder(o)
+        elif _PY3 and isinstance(o, bytes) and _encoding is not None:
             yield _encoder(o)
+        elif isinstance(o, RawJSON):
+            yield o.encoded_json
         elif o is None:
             yield 'null'
         elif o is True:
             yield 'true'
         elif o is False:
             yield 'false'
-        elif isinstance(o, (int, long)):
-            yield str(o)
+        elif isinstance(o, integer_types):
+            yield _encode_int(o)
         elif isinstance(o, float):
             yield _floatstr(o)
-        elif isinstance(o, (list, tuple)):
-            for chunk in _iterencode_list(o, _current_indent_level):
-                yield chunk
-        elif isinstance(o, dict):
-            for chunk in _iterencode_dict(o, _current_indent_level):
-                yield chunk
         else:
-            if markers is not None:
-                markerid = id(o)
-                if markerid in markers:
-                    raise ValueError("Circular reference detected")
-                markers[markerid] = o
-            o = _default(o)
-            for chunk in _iterencode(o, _current_indent_level):
-                yield chunk
-            if markers is not None:
-                del markers[markerid]
+            for_json = _for_json and getattr(o, 'for_json', None)
+            if for_json and callable(for_json):
+                for chunk in _iterencode(for_json(), _current_indent_level):
+                    yield chunk
+            elif isinstance(o, list):
+                for chunk in _iterencode_list(o, _current_indent_level):
+                    yield chunk
+            else:
+                _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
+                if _asdict and callable(_asdict):
+                    for chunk in _iterencode_dict(_asdict(),
+                            _current_indent_level):
+                        yield chunk
+                elif (_tuple_as_array and isinstance(o, tuple)):
+                    for chunk in _iterencode_list(o, _current_indent_level):
+                        yield chunk
+                elif isinstance(o, dict):
+                    for chunk in _iterencode_dict(o, _current_indent_level):
+                        yield chunk
+                elif _use_decimal and isinstance(o, Decimal):
+                    yield str(o)
+                else:
+                    while _iterable_as_array:
+                        # Markers are not checked here because it is valid for
+                        # an iterable to return self.
+                        try:
+                            o = iter(o)
+                        except TypeError:
+                            break
+                        for chunk in _iterencode_list(o, _current_indent_level):
+                            yield chunk
+                        return
+                    if markers is not None:
+                        markerid = id(o)
+                        if markerid in markers:
+                            raise ValueError("Circular reference detected")
+                        markers[markerid] = o
+                    o = _default(o)
+                    for chunk in _iterencode(o, _current_indent_level):
+                        yield chunk
+                    if markers is not None:
+                        del markers[markerid]
 
     return _iterencode
diff --git a/framework/pym/simplejson/errors.py b/framework/pym/simplejson/errors.py
new file mode 100644
index 0000000000..b97ab1e913
--- /dev/null
+++ b/framework/pym/simplejson/errors.py
@@ -0,0 +1,53 @@
+"""Error classes used by simplejson
+"""
+__all__ = ['JSONDecodeError']
+
+
+def linecol(doc, pos):
+    lineno = doc.count('\n', 0, pos) + 1
+    if lineno == 1:
+        colno = pos + 1
+    else:
+        colno = pos - doc.rindex('\n', 0, pos)
+    return lineno, colno
+
+
+def errmsg(msg, doc, pos, end=None):
+    lineno, colno = linecol(doc, pos)
+    msg = msg.replace('%r', repr(doc[pos:pos + 1]))
+    if end is None:
+        fmt = '%s: line %d column %d (char %d)'
+        return fmt % (msg, lineno, colno, pos)
+    endlineno, endcolno = linecol(doc, end)
+    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
+    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
+
+
+class JSONDecodeError(ValueError):
+    """Subclass of ValueError with the following additional properties:
+
+    msg: The unformatted error message
+    doc: The JSON document being parsed
+    pos: The start index of doc where parsing failed
+    end: The end index of doc where parsing failed (may be None)
+    lineno: The line corresponding to pos
+    colno: The column corresponding to pos
+    endlineno: The line corresponding to end (may be None)
+    endcolno: The column corresponding to end (may be None)
+
+    """
+    # Note that this exception is used from _speedups
+    def __init__(self, msg, doc, pos, end=None):
+        ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
+        self.msg = msg
+        self.doc = doc
+        self.pos = pos
+        self.end = end
+        self.lineno, self.colno = linecol(doc, pos)
+        if end is not None:
+            self.endlineno, self.endcolno = linecol(doc, end)
+        else:
+            self.endlineno, self.endcolno = None, None
+
+    def __reduce__(self):
+        return self.__class__, (self.msg, self.doc, self.pos, self.end)
diff --git a/framework/pym/simplejson/ordered_dict.py b/framework/pym/simplejson/ordered_dict.py
new file mode 100644
index 0000000000..d5a55ebd0f
--- /dev/null
+++ b/framework/pym/simplejson/ordered_dict.py
@@ -0,0 +1,103 @@
+"""Drop-in replacement for collections.OrderedDict by Raymond Hettinger
+
+http://code.activestate.com/recipes/576693/
+
+"""
+from UserDict import DictMixin
+
+class OrderedDict(dict, DictMixin):
+
+    def __init__(self, *args, **kwds):
+        if len(args) > 1:
+            raise TypeError('expected at most 1 arguments, got %d' % len(args))
+        try:
+            self.__end
+        except AttributeError:
+            self.clear()
+        self.update(*args, **kwds)
+
+    def clear(self):
+        self.__end = end = []
+        end += [None, end, end]         # sentinel node for doubly linked list
+        self.__map = {}                 # key --> [key, prev, next]
+        dict.clear(self)
+
+    def __setitem__(self, key, value):
+        if key not in self:
+            end = self.__end
+            curr = end[1]
+            curr[2] = end[1] = self.__map[key] = [key, curr, end]
+        dict.__setitem__(self, key, value)
+
+    def __delitem__(self, key):
+        dict.__delitem__(self, key)
+        key, prev, next = self.__map.pop(key)
+        prev[2] = next
+        next[1] = prev
+
+    def __iter__(self):
+        end = self.__end
+        curr = end[2]
+        while curr is not end:
+            yield curr[0]
+            curr = curr[2]
+
+    def __reversed__(self):
+        end = self.__end
+        curr = end[1]
+        while curr is not end:
+            yield curr[0]
+            curr = curr[1]
+
+    def popitem(self, last=True):
+        if not self:
+            raise KeyError('dictionary is empty')
+        key = reversed(self).next() if last else iter(self).next()
+        value = self.pop(key)
+        return key, value
+
+    def __reduce__(self):
+        items = [[k, self[k]] for k in self]
+        tmp = self.__map, self.__end
+        del self.__map, self.__end
+        inst_dict = vars(self).copy()
+        self.__map, self.__end = tmp
+        if inst_dict:
+            return (self.__class__, (items,), inst_dict)
+        return self.__class__, (items,)
+
+    def keys(self):
+        return list(self)
+
+    setdefault = DictMixin.setdefault
+    update = DictMixin.update
+    pop = DictMixin.pop
+    values = DictMixin.values
+    items = DictMixin.items
+    iterkeys = DictMixin.iterkeys
+    itervalues = DictMixin.itervalues
+    iteritems = DictMixin.iteritems
+
+    def __repr__(self):
+        if not self:
+            return '%s()' % (self.__class__.__name__,)
+        return '%s(%r)' % (self.__class__.__name__, self.items())
+
+    def copy(self):
+        return self.__class__(self)
+
+    @classmethod
+    def fromkeys(cls, iterable, value=None):
+        d = cls()
+        for key in iterable:
+            d[key] = value
+        return d
+
+    def __eq__(self, other):
+        if isinstance(other, OrderedDict):
+            return len(self)==len(other) and \
+                   all(p==q for p, q in  zip(self.items(), other.items()))
+        return dict.__eq__(self, other)
+
+    def __ne__(self, other):
+        return not self == other
diff --git a/framework/pym/simplejson/raw_json.py b/framework/pym/simplejson/raw_json.py
new file mode 100644
index 0000000000..2071a70206
--- /dev/null
+++ b/framework/pym/simplejson/raw_json.py
@@ -0,0 +1,9 @@
+"""Implementation of RawJSON
+"""
+
+class RawJSON(object):
+    """Wrap an encoded JSON document for direct embedding in the output
+
+    """
+    def __init__(self, encoded_json):
+        self.encoded_json = encoded_json
diff --git a/framework/pym/simplejson/scanner.py b/framework/pym/simplejson/scanner.py
index adbc6ec979..85e385e147 100644
--- a/framework/pym/simplejson/scanner.py
+++ b/framework/pym/simplejson/scanner.py
@@ -1,17 +1,22 @@
 """JSON token scanner
 """
 import re
-try:
-    from simplejson._speedups import make_scanner as c_make_scanner
-except ImportError:
-    c_make_scanner = None
+from .errors import JSONDecodeError
+def _import_c_make_scanner():
+    try:
+        from ._speedups import make_scanner
+        return make_scanner
+    except ImportError:
+        return None
+c_make_scanner = _import_c_make_scanner()
 
-__all__ = ['make_scanner']
+__all__ = ['make_scanner', 'JSONDecodeError']
 
 NUMBER_RE = re.compile(
     r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
     (re.VERBOSE | re.MULTILINE | re.DOTALL))
 
+
 def py_make_scanner(context):
     parse_object = context.parse_object
     parse_array = context.parse_array
@@ -23,17 +28,21 @@ def py_make_scanner(context):
     parse_int = context.parse_int
     parse_constant = context.parse_constant
     object_hook = context.object_hook
+    object_pairs_hook = context.object_pairs_hook
+    memo = context.memo
 
     def _scan_once(string, idx):
+        errmsg = 'Expecting value'
         try:
             nextchar = string[idx]
         except IndexError:
-            raise StopIteration
+            raise JSONDecodeError(errmsg, string, idx)
 
         if nextchar == '"':
             return parse_string(string, idx + 1, encoding, strict)
         elif nextchar == '{':
-            return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook)
+            return parse_object((string, idx + 1), encoding, strict,
+                _scan_once, object_hook, object_pairs_hook, memo)
         elif nextchar == '[':
             return parse_array((string, idx + 1), _scan_once)
         elif nextchar == 'n' and string[idx:idx + 4] == 'null':
@@ -58,8 +67,19 @@ def _scan_once(string, idx):
         elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
             return parse_constant('-Infinity'), idx + 9
         else:
-            raise StopIteration
+            raise JSONDecodeError(errmsg, string, idx)
+
+    def scan_once(string, idx):
+        if idx < 0:
+            # Ensure the same behavior as the C speedup, otherwise
+            # this would work for *some* negative string indices due
+            # to the behavior of __getitem__ for strings. #98
+            raise JSONDecodeError('Expecting value', string, idx)
+        try:
+            return _scan_once(string, idx)
+        finally:
+            memo.clear()
 
-    return _scan_once
+    return scan_once
 
 make_scanner = c_make_scanner or py_make_scanner
diff --git a/framework/pym/simplejson/tests/__init__.py b/framework/pym/simplejson/tests/__init__.py
deleted file mode 100644
index 17c97963bd..0000000000
--- a/framework/pym/simplejson/tests/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import unittest
-import doctest
-
-def additional_tests():
-    import simplejson
-    import simplejson.encoder
-    import simplejson.decoder
-    suite = unittest.TestSuite()
-    for mod in (simplejson, simplejson.encoder, simplejson.decoder):
-        suite.addTest(doctest.DocTestSuite(mod))
-    suite.addTest(doctest.DocFileSuite('../../index.rst'))
-    return suite
-
-def main():
-    suite = additional_tests()
-    runner = unittest.TextTestRunner()
-    runner.run(suite)
-
-if __name__ == '__main__':
-    import os
-    import sys
-    sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-    main()
diff --git a/framework/pym/simplejson/tests/test_check_circular.py b/framework/pym/simplejson/tests/test_check_circular.py
deleted file mode 100644
index af6463d6d8..0000000000
--- a/framework/pym/simplejson/tests/test_check_circular.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from unittest import TestCase
-import simplejson as json
-
-def default_iterable(obj):
-    return list(obj)
-
-class TestCheckCircular(TestCase):
-    def test_circular_dict(self):
-        dct = {}
-        dct['a'] = dct
-        self.assertRaises(ValueError, json.dumps, dct)
-
-    def test_circular_list(self):
-        lst = []
-        lst.append(lst)
-        self.assertRaises(ValueError, json.dumps, lst)
-
-    def test_circular_composite(self):
-        dct2 = {}
-        dct2['a'] = []
-        dct2['a'].append(dct2)
-        self.assertRaises(ValueError, json.dumps, dct2)
-
-    def test_circular_default(self):
-        json.dumps([set()], default=default_iterable)
-        self.assertRaises(TypeError, json.dumps, [set()])
-
-    def test_circular_off_default(self):
-        json.dumps([set()], default=default_iterable, check_circular=False)
-        self.assertRaises(TypeError, json.dumps, [set()], check_circular=False)
diff --git a/framework/pym/simplejson/tests/test_decode.py b/framework/pym/simplejson/tests/test_decode.py
deleted file mode 100644
index 1cd701d438..0000000000
--- a/framework/pym/simplejson/tests/test_decode.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import decimal
-from unittest import TestCase
-
-import simplejson as json
-
-class TestDecode(TestCase):
-    def test_decimal(self):
-        rval = json.loads('1.1', parse_float=decimal.Decimal)
-        self.assert_(isinstance(rval, decimal.Decimal))
-        self.assertEquals(rval, decimal.Decimal('1.1'))
-
-    def test_float(self):
-        rval = json.loads('1', parse_int=float)
-        self.assert_(isinstance(rval, float))
-        self.assertEquals(rval, 1.0)
-
-    def test_decoder_optimizations(self):
-        # Several optimizations were made that skip over calls to
-        # the whitespace regex, so this test is designed to try and
-        # exercise the uncommon cases. The array cases are already covered.
-        rval = json.loads('{   "key"    :    "value"    ,  "k":"v"    }')
-        self.assertEquals(rval, {"key":"value", "k":"v"})
diff --git a/framework/pym/simplejson/tests/test_default.py b/framework/pym/simplejson/tests/test_default.py
deleted file mode 100644
index 139e42bf4a..0000000000
--- a/framework/pym/simplejson/tests/test_default.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-class TestDefault(TestCase):
-    def test_default(self):
-        self.assertEquals(
-            json.dumps(type, default=repr),
-            json.dumps(repr(type)))
diff --git a/framework/pym/simplejson/tests/test_dump.py b/framework/pym/simplejson/tests/test_dump.py
deleted file mode 100644
index 4de37cf4b9..0000000000
--- a/framework/pym/simplejson/tests/test_dump.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from unittest import TestCase
-from cStringIO import StringIO
-
-import simplejson as json
-
-class TestDump(TestCase):
-    def test_dump(self):
-        sio = StringIO()
-        json.dump({}, sio)
-        self.assertEquals(sio.getvalue(), '{}')
-
-    def test_dumps(self):
-        self.assertEquals(json.dumps({}), '{}')
-
-    def test_encode_truefalse(self):
-        self.assertEquals(json.dumps(
-                 {True: False, False: True}, sort_keys=True),
-                 '{"false": true, "true": false}')
-        self.assertEquals(json.dumps(
-                {2: 3.0, 4.0: 5L, False: 1, 6L: True, "7": 0}, sort_keys=True),
-                '{"false": 1, "2": 3.0, "4.0": 5, "6": true, "7": 0}')
diff --git a/framework/pym/simplejson/tests/test_encode_basestring_ascii.py b/framework/pym/simplejson/tests/test_encode_basestring_ascii.py
deleted file mode 100644
index 7128495f41..0000000000
--- a/framework/pym/simplejson/tests/test_encode_basestring_ascii.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from unittest import TestCase
-
-import simplejson.encoder
-
-CASES = [
-    (u'/\\"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?', '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
-    (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
-    (u'controls', '"controls"'),
-    (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
-    (u'{"object with 1 member":["array with 1 element"]}', '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
-    (u' s p a c e d ', '" s p a c e d "'),
-    (u'\U0001d120', '"\\ud834\\udd20"'),
-    (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
-    ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
-    (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
-    ('\xce\xb1\xce\xa9', '"\\u03b1\\u03a9"'),
-    (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
-    (u'\u03b1\u03a9', '"\\u03b1\\u03a9"'),
-    (u"`1~!@#$%^&*()_+-={':[,]}|;.</>?", '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
-    (u'\x08\x0c\n\r\t', '"\\b\\f\\n\\r\\t"'),
-    (u'\u0123\u4567\u89ab\ucdef\uabcd\uef4a', '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
-]
-
-class TestEncodeBaseStringAscii(TestCase):
-    def test_py_encode_basestring_ascii(self):
-        self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii)
-
-    def test_c_encode_basestring_ascii(self):
-        if not simplejson.encoder.c_encode_basestring_ascii:
-            return
-        self._test_encode_basestring_ascii(simplejson.encoder.c_encode_basestring_ascii)
-
-    def _test_encode_basestring_ascii(self, encode_basestring_ascii):
-        fname = encode_basestring_ascii.__name__
-        for input_string, expect in CASES:
-            result = encode_basestring_ascii(input_string)
-            self.assertEquals(result, expect,
-                '%r != %r for %s(%r)' % (result, expect, fname, input_string))
diff --git a/framework/pym/simplejson/tests/test_fail.py b/framework/pym/simplejson/tests/test_fail.py
deleted file mode 100644
index 002eea08fc..0000000000
--- a/framework/pym/simplejson/tests/test_fail.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-# Fri Dec 30 18:57:26 2005
-JSONDOCS = [
-    # http://json.org/JSON_checker/test/fail1.json
-    '"A JSON payload should be an object or array, not a string."',
-    # http://json.org/JSON_checker/test/fail2.json
-    '["Unclosed array"',
-    # http://json.org/JSON_checker/test/fail3.json
-    '{unquoted_key: "keys must be quoted}',
-    # http://json.org/JSON_checker/test/fail4.json
-    '["extra comma",]',
-    # http://json.org/JSON_checker/test/fail5.json
-    '["double extra comma",,]',
-    # http://json.org/JSON_checker/test/fail6.json
-    '[   , "<-- missing value"]',
-    # http://json.org/JSON_checker/test/fail7.json
-    '["Comma after the close"],',
-    # http://json.org/JSON_checker/test/fail8.json
-    '["Extra close"]]',
-    # http://json.org/JSON_checker/test/fail9.json
-    '{"Extra comma": true,}',
-    # http://json.org/JSON_checker/test/fail10.json
-    '{"Extra value after close": true} "misplaced quoted value"',
-    # http://json.org/JSON_checker/test/fail11.json
-    '{"Illegal expression": 1 + 2}',
-    # http://json.org/JSON_checker/test/fail12.json
-    '{"Illegal invocation": alert()}',
-    # http://json.org/JSON_checker/test/fail13.json
-    '{"Numbers cannot have leading zeroes": 013}',
-    # http://json.org/JSON_checker/test/fail14.json
-    '{"Numbers cannot be hex": 0x14}',
-    # http://json.org/JSON_checker/test/fail15.json
-    '["Illegal backslash escape: \\x15"]',
-    # http://json.org/JSON_checker/test/fail16.json
-    '["Illegal backslash escape: \\\'"]',
-    # http://json.org/JSON_checker/test/fail17.json
-    '["Illegal backslash escape: \\017"]',
-    # http://json.org/JSON_checker/test/fail18.json
-    '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
-    # http://json.org/JSON_checker/test/fail19.json
-    '{"Missing colon" null}',
-    # http://json.org/JSON_checker/test/fail20.json
-    '{"Double colon":: null}',
-    # http://json.org/JSON_checker/test/fail21.json
-    '{"Comma instead of colon", null}',
-    # http://json.org/JSON_checker/test/fail22.json
-    '["Colon instead of comma": false]',
-    # http://json.org/JSON_checker/test/fail23.json
-    '["Bad value", truth]',
-    # http://json.org/JSON_checker/test/fail24.json
-    "['single quote']",
-    # http://code.google.com/p/simplejson/issues/detail?id=3
-    u'["A\u001FZ control characters in string"]',
-]
-
-SKIPS = {
-    1: "why not have a string payload?",
-    18: "spec doesn't specify any nesting limitations",
-}
-
-class TestFail(TestCase):
-    def test_failures(self):
-        for idx, doc in enumerate(JSONDOCS):
-            idx = idx + 1
-            if idx in SKIPS:
-                json.loads(doc)
-                continue
-            try:
-                json.loads(doc)
-            except ValueError:
-                pass
-            else:
-                self.fail("Expected failure for fail%d.json: %r" % (idx, doc))
diff --git a/framework/pym/simplejson/tests/test_float.py b/framework/pym/simplejson/tests/test_float.py
deleted file mode 100644
index 1a2b98a2f6..0000000000
--- a/framework/pym/simplejson/tests/test_float.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import math
-from unittest import TestCase
-
-import simplejson as json
-
-class TestFloat(TestCase):
-    def test_floats(self):
-        for num in [1617161771.7650001, math.pi, math.pi**100, math.pi**-100, 3.1]:
-            self.assertEquals(float(json.dumps(num)), num)
-            self.assertEquals(json.loads(json.dumps(num)), num)
-
-    def test_ints(self):
-        for num in [1, 1L, 1<<32, 1<<64]:
-            self.assertEquals(json.dumps(num), str(num))
-            self.assertEquals(int(json.dumps(num)), num)
diff --git a/framework/pym/simplejson/tests/test_indent.py b/framework/pym/simplejson/tests/test_indent.py
deleted file mode 100644
index 66e19b9ed1..0000000000
--- a/framework/pym/simplejson/tests/test_indent.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-import textwrap
-
-class TestIndent(TestCase):
-    def test_indent(self):
-        h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
-             {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
-
-        expect = textwrap.dedent("""\
-        [
-          [
-            "blorpie"
-          ],
-          [
-            "whoops"
-          ],
-          [],
-          "d-shtaeou",
-          "d-nthiouh",
-          "i-vhbjkhnth",
-          {
-            "nifty": 87
-          },
-          {
-            "field": "yes",
-            "morefield": false
-          }
-        ]""")
-
-
-        d1 = json.dumps(h)
-        d2 = json.dumps(h, indent=2, sort_keys=True, separators=(',', ': '))
-
-        h1 = json.loads(d1)
-        h2 = json.loads(d2)
-
-        self.assertEquals(h1, h)
-        self.assertEquals(h2, h)
-        self.assertEquals(d2, expect)
diff --git a/framework/pym/simplejson/tests/test_pass1.py b/framework/pym/simplejson/tests/test_pass1.py
deleted file mode 100644
index c3d6302d68..0000000000
--- a/framework/pym/simplejson/tests/test_pass1.py
+++ /dev/null
@@ -1,76 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-# from http://json.org/JSON_checker/test/pass1.json
-JSON = r'''
-[
-    "JSON Test Pattern pass1",
-    {"object with 1 member":["array with 1 element"]},
-    {},
-    [],
-    -42,
-    true,
-    false,
-    null,
-    {
-        "integer": 1234567890,
-        "real": -9876.543210,
-        "e": 0.123456789e-12,
-        "E": 1.234567890E+34,
-        "":  23456789012E666,
-        "zero": 0,
-        "one": 1,
-        "space": " ",
-        "quote": "\"",
-        "backslash": "\\",
-        "controls": "\b\f\n\r\t",
-        "slash": "/ & \/",
-        "alpha": "abcdefghijklmnopqrstuvwyz",
-        "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
-        "digit": "0123456789",
-        "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
-        "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
-        "true": true,
-        "false": false,
-        "null": null,
-        "array":[  ],
-        "object":{  },
-        "address": "50 St. James Street",
-        "url": "http://www.JSON.org/",
-        "comment": "// /* <!-- --",
-        "# -- --> */": " ",
-        " s p a c e d " :[1,2 , 3
-
-,
-
-4 , 5        ,          6           ,7        ],
-        "compact": [1,2,3,4,5,6,7],
-        "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
-        "quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
-        "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
-: "A key can be any string"
-    },
-    0.5 ,98.6
-,
-99.44
-,
-
-1066
-
-
-,"rosebud"]
-'''
-
-class TestPass1(TestCase):
-    def test_parse(self):
-        # test in/out equivalence and parsing
-        res = json.loads(JSON)
-        out = json.dumps(res)
-        self.assertEquals(res, json.loads(out))
-        try:
-            json.dumps(res, allow_nan=False)
-        except ValueError:
-            pass
-        else:
-            self.fail("23456789012E666 should be out of range")
diff --git a/framework/pym/simplejson/tests/test_pass2.py b/framework/pym/simplejson/tests/test_pass2.py
deleted file mode 100644
index de4ee00bc6..0000000000
--- a/framework/pym/simplejson/tests/test_pass2.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from unittest import TestCase
-import simplejson as json
-
-# from http://json.org/JSON_checker/test/pass2.json
-JSON = r'''
-[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
-'''
-
-class TestPass2(TestCase):
-    def test_parse(self):
-        # test in/out equivalence and parsing
-        res = json.loads(JSON)
-        out = json.dumps(res)
-        self.assertEquals(res, json.loads(out))
diff --git a/framework/pym/simplejson/tests/test_pass3.py b/framework/pym/simplejson/tests/test_pass3.py
deleted file mode 100644
index f591aba983..0000000000
--- a/framework/pym/simplejson/tests/test_pass3.py
+++ /dev/null
@@ -1,20 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-# from http://json.org/JSON_checker/test/pass3.json
-JSON = r'''
-{
-    "JSON Test Pattern pass3": {
-        "The outermost value": "must be an object or array.",
-        "In this test": "It is an object."
-    }
-}
-'''
-
-class TestPass3(TestCase):
-    def test_parse(self):
-        # test in/out equivalence and parsing
-        res = json.loads(JSON)
-        out = json.dumps(res)
-        self.assertEquals(res, json.loads(out))
diff --git a/framework/pym/simplejson/tests/test_recursion.py b/framework/pym/simplejson/tests/test_recursion.py
deleted file mode 100644
index 97422a66c3..0000000000
--- a/framework/pym/simplejson/tests/test_recursion.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-class JSONTestObject:
-    pass
-
-
-class RecursiveJSONEncoder(json.JSONEncoder):
-    recurse = False
-    def default(self, o):
-        if o is JSONTestObject:
-            if self.recurse:
-                return [JSONTestObject]
-            else:
-                return 'JSONTestObject'
-        return json.JSONEncoder.default(o)
-
-
-class TestRecursion(TestCase):
-    def test_listrecursion(self):
-        x = []
-        x.append(x)
-        try:
-            json.dumps(x)
-        except ValueError:
-            pass
-        else:
-            self.fail("didn't raise ValueError on list recursion")
-        x = []
-        y = [x]
-        x.append(y)
-        try:
-            json.dumps(x)
-        except ValueError:
-            pass
-        else:
-            self.fail("didn't raise ValueError on alternating list recursion")
-        y = []
-        x = [y, y]
-        # ensure that the marker is cleared
-        json.dumps(x)
-
-    def test_dictrecursion(self):
-        x = {}
-        x["test"] = x
-        try:
-            json.dumps(x)
-        except ValueError:
-            pass
-        else:
-            self.fail("didn't raise ValueError on dict recursion")
-        x = {}
-        y = {"a": x, "b": x}
-        # ensure that the marker is cleared
-        json.dumps(x)
-
-    def test_defaultrecursion(self):
-        enc = RecursiveJSONEncoder()
-        self.assertEquals(enc.encode(JSONTestObject), '"JSONTestObject"')
-        enc.recurse = True
-        try:
-            enc.encode(JSONTestObject)
-        except ValueError:
-            pass
-        else:
-            self.fail("didn't raise ValueError on default recursion")
diff --git a/framework/pym/simplejson/tests/test_scanstring.py b/framework/pym/simplejson/tests/test_scanstring.py
deleted file mode 100644
index b08dec71e3..0000000000
--- a/framework/pym/simplejson/tests/test_scanstring.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import sys
-import decimal
-from unittest import TestCase
-
-import simplejson as json
-import simplejson.decoder
-
-class TestScanString(TestCase):
-    def test_py_scanstring(self):
-        self._test_scanstring(simplejson.decoder.py_scanstring)
-
-    def test_c_scanstring(self):
-        if not simplejson.decoder.c_scanstring:
-            return
-        self._test_scanstring(simplejson.decoder.c_scanstring)
-
-    def _test_scanstring(self, scanstring):
-        self.assertEquals(
-            scanstring('"z\\ud834\\udd20x"', 1, None, True),
-            (u'z\U0001d120x', 16))
-
-        if sys.maxunicode == 65535:
-            self.assertEquals(
-                scanstring(u'"z\U0001d120x"', 1, None, True),
-                (u'z\U0001d120x', 6))
-        else:
-            self.assertEquals(
-                scanstring(u'"z\U0001d120x"', 1, None, True),
-                (u'z\U0001d120x', 5))
-
-        self.assertEquals(
-            scanstring('"\\u007b"', 1, None, True),
-            (u'{', 8))
-
-        self.assertEquals(
-            scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
-            (u'A JSON payload should be an object or array, not a string.', 60))
-
-        self.assertEquals(
-            scanstring('["Unclosed array"', 2, None, True),
-            (u'Unclosed array', 17))
-
-        self.assertEquals(
-            scanstring('["extra comma",]', 2, None, True),
-            (u'extra comma', 14))
-
-        self.assertEquals(
-            scanstring('["double extra comma",,]', 2, None, True),
-            (u'double extra comma', 21))
-
-        self.assertEquals(
-            scanstring('["Comma after the close"],', 2, None, True),
-            (u'Comma after the close', 24))
-
-        self.assertEquals(
-            scanstring('["Extra close"]]', 2, None, True),
-            (u'Extra close', 14))
-
-        self.assertEquals(
-            scanstring('{"Extra comma": true,}', 2, None, True),
-            (u'Extra comma', 14))
-
-        self.assertEquals(
-            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
-            (u'Extra value after close', 26))
-
-        self.assertEquals(
-            scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
-            (u'Illegal expression', 21))
-
-        self.assertEquals(
-            scanstring('{"Illegal invocation": alert()}', 2, None, True),
-            (u'Illegal invocation', 21))
-
-        self.assertEquals(
-            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
-            (u'Numbers cannot have leading zeroes', 37))
-
-        self.assertEquals(
-            scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
-            (u'Numbers cannot be hex', 24))
-
-        self.assertEquals(
-            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
-            (u'Too deep', 30))
-
-        self.assertEquals(
-            scanstring('{"Missing colon" null}', 2, None, True),
-            (u'Missing colon', 16))
-
-        self.assertEquals(
-            scanstring('{"Double colon":: null}', 2, None, True),
-            (u'Double colon', 15))
-
-        self.assertEquals(
-            scanstring('{"Comma instead of colon", null}', 2, None, True),
-            (u'Comma instead of colon', 25))
-
-        self.assertEquals(
-            scanstring('["Colon instead of comma": false]', 2, None, True),
-            (u'Colon instead of comma', 25))
-
-        self.assertEquals(
-            scanstring('["Bad value", truth]', 2, None, True),
-            (u'Bad value', 12))
-
-    def test_issue3623(self):
-        self.assertRaises(ValueError, json.decoder.scanstring, "xxx", 1,
-                          "xxx")
-        self.assertRaises(UnicodeDecodeError,
-                          json.encoder.encode_basestring_ascii, "xx\xff")
diff --git a/framework/pym/simplejson/tests/test_separators.py b/framework/pym/simplejson/tests/test_separators.py
deleted file mode 100644
index 8fa0dac633..0000000000
--- a/framework/pym/simplejson/tests/test_separators.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import textwrap
-from unittest import TestCase
-
-import simplejson as json
-
-
-class TestSeparators(TestCase):
-    def test_separators(self):
-        h = [['blorpie'], ['whoops'], [], 'd-shtaeou', 'd-nthiouh', 'i-vhbjkhnth',
-             {'nifty': 87}, {'field': 'yes', 'morefield': False} ]
-
-        expect = textwrap.dedent("""\
-        [
-          [
-            "blorpie"
-          ] ,
-          [
-            "whoops"
-          ] ,
-          [] ,
-          "d-shtaeou" ,
-          "d-nthiouh" ,
-          "i-vhbjkhnth" ,
-          {
-            "nifty" : 87
-          } ,
-          {
-            "field" : "yes" ,
-            "morefield" : false
-          }
-        ]""")
-
-
-        d1 = json.dumps(h)
-        d2 = json.dumps(h, indent=2, sort_keys=True, separators=(' ,', ' : '))
-
-        h1 = json.loads(d1)
-        h2 = json.loads(d2)
-
-        self.assertEquals(h1, h)
-        self.assertEquals(h2, h)
-        self.assertEquals(d2, expect)
diff --git a/framework/pym/simplejson/tests/test_unicode.py b/framework/pym/simplejson/tests/test_unicode.py
deleted file mode 100644
index 6f4384a5bd..0000000000
--- a/framework/pym/simplejson/tests/test_unicode.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from unittest import TestCase
-
-import simplejson as json
-
-class TestUnicode(TestCase):
-    def test_encoding1(self):
-        encoder = json.JSONEncoder(encoding='utf-8')
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        s = u.encode('utf-8')
-        ju = encoder.encode(u)
-        js = encoder.encode(s)
-        self.assertEquals(ju, js)
-
-    def test_encoding2(self):
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        s = u.encode('utf-8')
-        ju = json.dumps(u, encoding='utf-8')
-        js = json.dumps(s, encoding='utf-8')
-        self.assertEquals(ju, js)
-
-    def test_encoding3(self):
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        j = json.dumps(u)
-        self.assertEquals(j, '"\\u03b1\\u03a9"')
-
-    def test_encoding4(self):
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        j = json.dumps([u])
-        self.assertEquals(j, '["\\u03b1\\u03a9"]')
-
-    def test_encoding5(self):
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        j = json.dumps(u, ensure_ascii=False)
-        self.assertEquals(j, u'"%s"' % (u,))
-
-    def test_encoding6(self):
-        u = u'\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
-        j = json.dumps([u], ensure_ascii=False)
-        self.assertEquals(j, u'["%s"]' % (u,))
-
-    def test_big_unicode_encode(self):
-        u = u'\U0001d120'
-        self.assertEquals(json.dumps(u), '"\\ud834\\udd20"')
-        self.assertEquals(json.dumps(u, ensure_ascii=False), u'"\U0001d120"')
-
-    def test_big_unicode_decode(self):
-        u = u'z\U0001d120x'
-        self.assertEquals(json.loads('"' + u + '"'), u)
-        self.assertEquals(json.loads('"z\\ud834\\udd20x"'), u)
-
-    def test_unicode_decode(self):
-        for i in range(0, 0xd7ff):
-            u = unichr(i)
-            s = '"\\u%04x"' % (i,)
-            self.assertEquals(json.loads(s), u)
-
-    def test_default_encoding(self):
-        self.assertEquals(json.loads(u'{"a": "\xe9"}'.encode('utf-8')),
-            {'a': u'\xe9'})
-
-    def test_unicode_preservation(self):
-        self.assertEquals(type(json.loads(u'""')), unicode)
-        self.assertEquals(type(json.loads(u'"a"')), unicode)
-        self.assertEquals(type(json.loads(u'["a"]')[0]), unicode)
\ No newline at end of file
diff --git a/framework/pym/simplejson/tool.py b/framework/pym/simplejson/tool.py
index 90443317b2..062e8e2c18 100644
--- a/framework/pym/simplejson/tool.py
+++ b/framework/pym/simplejson/tool.py
@@ -10,27 +10,32 @@
     Expecting property name: line 1 column 2 (char 2)
 
 """
+from __future__ import with_statement
 import sys
-import simplejson
+import simplejson as json
 
 def main():
     if len(sys.argv) == 1:
         infile = sys.stdin
         outfile = sys.stdout
     elif len(sys.argv) == 2:
-        infile = open(sys.argv[1], 'rb')
+        infile = open(sys.argv[1], 'r')
         outfile = sys.stdout
     elif len(sys.argv) == 3:
-        infile = open(sys.argv[1], 'rb')
-        outfile = open(sys.argv[2], 'wb')
+        infile = open(sys.argv[1], 'r')
+        outfile = open(sys.argv[2], 'w')
     else:
         raise SystemExit(sys.argv[0] + " [infile [outfile]]")
-    try:
-        obj = simplejson.load(infile)
-    except ValueError, e:
-        raise SystemExit(e)
-    simplejson.dump(obj, outfile, sort_keys=True, indent=4)
-    outfile.write('\n')
+    with infile:
+        try:
+            obj = json.load(infile,
+                            object_pairs_hook=json.OrderedDict,
+                            use_decimal=True)
+        except ValueError:
+            raise SystemExit(sys.exc_info()[1])
+    with outfile:
+        json.dump(obj, outfile, sort_keys=True, indent='    ', use_decimal=True)
+        outfile.write('\n')
 
 
 if __name__ == '__main__':
diff --git a/framework/pym/yaml/__init__.py b/framework/pym/yaml/__init__.py
index c0fd1f3370..13d687c501 100644
--- a/framework/pym/yaml/__init__.py
+++ b/framework/pym/yaml/__init__.py
@@ -1,36 +1,81 @@
 
-from error import *
+from .error import *
 
-from tokens import *
-from events import *
-from nodes import *
+from .tokens import *
+from .events import *
+from .nodes import *
 
-from loader import *
-from dumper import *
-
-__version__ = '3.09'
+from .loader import *
+from .dumper import *
 
+__version__ = '5.3.1'
 try:
-    from cyaml import *
+    from .cyaml import *
     __with_libyaml__ = True
 except ImportError:
     __with_libyaml__ = False
 
+import io
+
+#------------------------------------------------------------------------------
+# Warnings control
+#------------------------------------------------------------------------------
+
+# 'Global' warnings state:
+_warnings_enabled = {
+    'YAMLLoadWarning': True,
+}
+
+# Get or set global warnings' state
+def warnings(settings=None):
+    if settings is None:
+        return _warnings_enabled
+
+    if type(settings) is dict:
+        for key in settings:
+            if key in _warnings_enabled:
+                _warnings_enabled[key] = settings[key]
+
+# Warn when load() is called without Loader=...
+class YAMLLoadWarning(RuntimeWarning):
+    pass
+
+def load_warning(method):
+    if _warnings_enabled['YAMLLoadWarning'] is False:
+        return
+
+    import warnings
+
+    message = (
+        "calling yaml.%s() without Loader=... is deprecated, as the "
+        "default Loader is unsafe. Please read "
+        "https://msg.pyyaml.org/load for full details."
+    ) % method
+
+    warnings.warn(message, YAMLLoadWarning, stacklevel=3)
+
+#------------------------------------------------------------------------------
 def scan(stream, Loader=Loader):
     """
     Scan a YAML stream and produce scanning tokens.
     """
     loader = Loader(stream)
-    while loader.check_token():
-        yield loader.get_token()
+    try:
+        while loader.check_token():
+            yield loader.get_token()
+    finally:
+        loader.dispose()
 
 def parse(stream, Loader=Loader):
     """
     Parse a YAML stream and produce parsing events.
     """
     loader = Loader(stream)
-    while loader.check_event():
-        yield loader.get_event()
+    try:
+        while loader.check_event():
+            yield loader.get_event()
+    finally:
+        loader.dispose()
 
 def compose(stream, Loader=Loader):
     """
@@ -38,7 +83,10 @@ def compose(stream, Loader=Loader):
     and produce the corresponding representation tree.
     """
     loader = Loader(stream)
-    return loader.get_single_node()
+    try:
+        return loader.get_single_node()
+    finally:
+        loader.dispose()
 
 def compose_all(stream, Loader=Loader):
     """
@@ -46,31 +94,70 @@ def compose_all(stream, Loader=Loader):
     and produce corresponding representation trees.
     """
     loader = Loader(stream)
-    while loader.check_node():
-        yield loader.get_node()
+    try:
+        while loader.check_node():
+            yield loader.get_node()
+    finally:
+        loader.dispose()
 
-def load(stream, Loader=Loader):
+def load(stream, Loader=None):
     """
     Parse the first YAML document in a stream
     and produce the corresponding Python object.
     """
+    if Loader is None:
+        load_warning('load')
+        Loader = FullLoader
+
     loader = Loader(stream)
-    return loader.get_single_data()
+    try:
+        return loader.get_single_data()
+    finally:
+        loader.dispose()
 
-def load_all(stream, Loader=Loader):
+def load_all(stream, Loader=None):
     """
     Parse all YAML documents in a stream
     and produce corresponding Python objects.
     """
+    if Loader is None:
+        load_warning('load_all')
+        Loader = FullLoader
+
     loader = Loader(stream)
-    while loader.check_data():
-        yield loader.get_data()
+    try:
+        while loader.check_data():
+            yield loader.get_data()
+    finally:
+        loader.dispose()
+
+def full_load(stream):
+    """
+    Parse the first YAML document in a stream
+    and produce the corresponding Python object.
+
+    Resolve all tags except those known to be
+    unsafe on untrusted input.
+    """
+    return load(stream, FullLoader)
+
+def full_load_all(stream):
+    """
+    Parse all YAML documents in a stream
+    and produce corresponding Python objects.
+
+    Resolve all tags except those known to be
+    unsafe on untrusted input.
+    """
+    return load_all(stream, FullLoader)
 
 def safe_load(stream):
     """
     Parse the first YAML document in a stream
     and produce the corresponding Python object.
-    Resolve only basic YAML tags.
+
+    Resolve only basic YAML tags. This is known
+    to be safe for untrusted input.
     """
     return load(stream, SafeLoader)
 
@@ -78,10 +165,32 @@ def safe_load_all(stream):
     """
     Parse all YAML documents in a stream
     and produce corresponding Python objects.
-    Resolve only basic YAML tags.
+
+    Resolve only basic YAML tags. This is known
+    to be safe for untrusted input.
     """
     return load_all(stream, SafeLoader)
 
+def unsafe_load(stream):
+    """
+    Parse the first YAML document in a stream
+    and produce the corresponding Python object.
+
+    Resolve all tags, even those known to be
+    unsafe on untrusted input.
+    """
+    return load(stream, UnsafeLoader)
+
+def unsafe_load_all(stream):
+    """
+    Parse all YAML documents in a stream
+    and produce corresponding Python objects.
+
+    Resolve all tags, even those known to be
+    unsafe on untrusted input.
+    """
+    return load_all(stream, UnsafeLoader)
+
 def emit(events, stream=None, Dumper=Dumper,
         canonical=None, indent=None, width=None,
         allow_unicode=None, line_break=None):
@@ -91,20 +200,22 @@ def emit(events, stream=None, Dumper=Dumper,
     """
     getvalue = None
     if stream is None:
-        from StringIO import StringIO
-        stream = StringIO()
+        stream = io.StringIO()
         getvalue = stream.getvalue
     dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
             allow_unicode=allow_unicode, line_break=line_break)
-    for event in events:
-        dumper.emit(event)
+    try:
+        for event in events:
+            dumper.emit(event)
+    finally:
+        dumper.dispose()
     if getvalue:
         return getvalue()
 
 def serialize_all(nodes, stream=None, Dumper=Dumper,
         canonical=None, indent=None, width=None,
         allow_unicode=None, line_break=None,
-        encoding='utf-8', explicit_start=None, explicit_end=None,
+        encoding=None, explicit_start=None, explicit_end=None,
         version=None, tags=None):
     """
     Serialize a sequence of representation trees into a YAML stream.
@@ -113,19 +224,21 @@ def serialize_all(nodes, stream=None, Dumper=Dumper,
     getvalue = None
     if stream is None:
         if encoding is None:
-            from StringIO import StringIO
+            stream = io.StringIO()
         else:
-            from cStringIO import StringIO
-        stream = StringIO()
+            stream = io.BytesIO()
         getvalue = stream.getvalue
     dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
             allow_unicode=allow_unicode, line_break=line_break,
             encoding=encoding, version=version, tags=tags,
             explicit_start=explicit_start, explicit_end=explicit_end)
-    dumper.open()
-    for node in nodes:
-        dumper.serialize(node)
-    dumper.close()
+    try:
+        dumper.open()
+        for node in nodes:
+            dumper.serialize(node)
+        dumper.close()
+    finally:
+        dumper.dispose()
     if getvalue:
         return getvalue()
 
@@ -137,11 +250,11 @@ def serialize(node, stream=None, Dumper=Dumper, **kwds):
     return serialize_all([node], stream, Dumper=Dumper, **kwds)
 
 def dump_all(documents, stream=None, Dumper=Dumper,
-        default_style=None, default_flow_style=None,
+        default_style=None, default_flow_style=False,
         canonical=None, indent=None, width=None,
         allow_unicode=None, line_break=None,
-        encoding='utf-8', explicit_start=None, explicit_end=None,
-        version=None, tags=None):
+        encoding=None, explicit_start=None, explicit_end=None,
+        version=None, tags=None, sort_keys=True):
     """
     Serialize a sequence of Python objects into a YAML stream.
     If stream is None, return the produced string instead.
@@ -149,21 +262,23 @@ def dump_all(documents, stream=None, Dumper=Dumper,
     getvalue = None
     if stream is None:
         if encoding is None:
-            from StringIO import StringIO
+            stream = io.StringIO()
         else:
-            from cStringIO import StringIO
-        stream = StringIO()
+            stream = io.BytesIO()
         getvalue = stream.getvalue
     dumper = Dumper(stream, default_style=default_style,
             default_flow_style=default_flow_style,
             canonical=canonical, indent=indent, width=width,
             allow_unicode=allow_unicode, line_break=line_break,
             encoding=encoding, version=version, tags=tags,
-            explicit_start=explicit_start, explicit_end=explicit_end)
-    dumper.open()
-    for data in documents:
-        dumper.represent(data)
-    dumper.close()
+            explicit_start=explicit_start, explicit_end=explicit_end, sort_keys=sort_keys)
+    try:
+        dumper.open()
+        for data in documents:
+            dumper.represent(data)
+        dumper.close()
+    finally:
+        dumper.dispose()
     if getvalue:
         return getvalue()
 
@@ -191,42 +306,62 @@ def safe_dump(data, stream=None, **kwds):
     return dump_all([data], stream, Dumper=SafeDumper, **kwds)
 
 def add_implicit_resolver(tag, regexp, first=None,
-        Loader=Loader, Dumper=Dumper):
+        Loader=None, Dumper=Dumper):
     """
     Add an implicit scalar detector.
     If an implicit scalar value matches the given regexp,
     the corresponding tag is assigned to the scalar.
     first is a sequence of possible initial characters or None.
     """
-    Loader.add_implicit_resolver(tag, regexp, first)
+    if Loader is None:
+        loader.Loader.add_implicit_resolver(tag, regexp, first)
+        loader.FullLoader.add_implicit_resolver(tag, regexp, first)
+        loader.UnsafeLoader.add_implicit_resolver(tag, regexp, first)
+    else:
+        Loader.add_implicit_resolver(tag, regexp, first)
     Dumper.add_implicit_resolver(tag, regexp, first)
 
-def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
+def add_path_resolver(tag, path, kind=None, Loader=None, Dumper=Dumper):
     """
     Add a path based resolver for the given tag.
     A path is a list of keys that forms a path
     to a node in the representation tree.
     Keys can be string values, integers, or None.
     """
-    Loader.add_path_resolver(tag, path, kind)
+    if Loader is None:
+        loader.Loader.add_path_resolver(tag, path, kind)
+        loader.FullLoader.add_path_resolver(tag, path, kind)
+        loader.UnsafeLoader.add_path_resolver(tag, path, kind)
+    else:
+        Loader.add_path_resolver(tag, path, kind)
     Dumper.add_path_resolver(tag, path, kind)
 
-def add_constructor(tag, constructor, Loader=Loader):
+def add_constructor(tag, constructor, Loader=None):
     """
     Add a constructor for the given tag.
     Constructor is a function that accepts a Loader instance
     and a node object and produces the corresponding Python object.
     """
-    Loader.add_constructor(tag, constructor)
+    if Loader is None:
+        loader.Loader.add_constructor(tag, constructor)
+        loader.FullLoader.add_constructor(tag, constructor)
+        loader.UnsafeLoader.add_constructor(tag, constructor)
+    else:
+        Loader.add_constructor(tag, constructor)
 
-def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
+def add_multi_constructor(tag_prefix, multi_constructor, Loader=None):
     """
     Add a multi-constructor for the given tag prefix.
     Multi-constructor is called for a node if its tag starts with tag_prefix.
     Multi-constructor accepts a Loader instance, a tag suffix,
     and a node object and produces the corresponding Python object.
     """
-    Loader.add_multi_constructor(tag_prefix, multi_constructor)
+    if Loader is None:
+        loader.Loader.add_multi_constructor(tag_prefix, multi_constructor)
+        loader.FullLoader.add_multi_constructor(tag_prefix, multi_constructor)
+        loader.UnsafeLoader.add_multi_constructor(tag_prefix, multi_constructor)
+    else:
+        Loader.add_multi_constructor(tag_prefix, multi_constructor)
 
 def add_representer(data_type, representer, Dumper=Dumper):
     """
@@ -253,36 +388,40 @@ class YAMLObjectMetaclass(type):
     def __init__(cls, name, bases, kwds):
         super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
         if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
-            cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
+            if isinstance(cls.yaml_loader, list):
+                for loader in cls.yaml_loader:
+                    loader.add_constructor(cls.yaml_tag, cls.from_yaml)
+            else:
+                cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
+
             cls.yaml_dumper.add_representer(cls, cls.to_yaml)
 
-class YAMLObject(object):
+class YAMLObject(metaclass=YAMLObjectMetaclass):
     """
     An object that can dump itself to a YAML stream
     and load itself from a YAML stream.
     """
 
-    __metaclass__ = YAMLObjectMetaclass
     __slots__ = ()  # no direct instantiation, so allow immutable subclasses
 
-    yaml_loader = Loader
+    yaml_loader = [Loader, FullLoader, UnsafeLoader]
     yaml_dumper = Dumper
 
     yaml_tag = None
     yaml_flow_style = None
 
+    @classmethod
     def from_yaml(cls, loader, node):
         """
         Convert a representation node to a Python object.
         """
         return loader.construct_yaml_object(node, cls)
-    from_yaml = classmethod(from_yaml)
 
+    @classmethod
     def to_yaml(cls, dumper, data):
         """
         Convert a Python object to a representation node.
         """
         return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
                 flow_style=cls.yaml_flow_style)
-    to_yaml = classmethod(to_yaml)
 
diff --git a/framework/pym/yaml/composer.py b/framework/pym/yaml/composer.py
index df85ef653b..6d15cb40e3 100644
--- a/framework/pym/yaml/composer.py
+++ b/framework/pym/yaml/composer.py
@@ -1,14 +1,14 @@
 
 __all__ = ['Composer', 'ComposerError']
 
-from error import MarkedYAMLError
-from events import *
-from nodes import *
+from .error import MarkedYAMLError
+from .events import *
+from .nodes import *
 
 class ComposerError(MarkedYAMLError):
     pass
 
-class Composer(object):
+class Composer:
 
     def __init__(self):
         self.anchors = {}
@@ -66,14 +66,14 @@ def compose_node(self, parent, index):
             anchor = event.anchor
             if anchor not in self.anchors:
                 raise ComposerError(None, None, "found undefined alias %r"
-                        % anchor.encode('utf-8'), event.start_mark)
+                        % anchor, event.start_mark)
             return self.anchors[anchor]
         event = self.peek_event()
         anchor = event.anchor
         if anchor is not None:
             if anchor in self.anchors:
                 raise ComposerError("found duplicate anchor %r; first occurrence"
-                        % anchor.encode('utf-8'), self.anchors[anchor].start_mark,
+                        % anchor, self.anchors[anchor].start_mark,
                         "second occurrence", event.start_mark)
         self.descend_resolver(parent, index)
         if self.check_event(ScalarEvent):
@@ -88,7 +88,7 @@ def compose_node(self, parent, index):
     def compose_scalar_node(self, anchor):
         event = self.get_event()
         tag = event.tag
-        if tag is None or tag == u'!':
+        if tag is None or tag == '!':
             tag = self.resolve(ScalarNode, event.value, event.implicit)
         node = ScalarNode(tag, event.value,
                 event.start_mark, event.end_mark, style=event.style)
@@ -99,7 +99,7 @@ def compose_scalar_node(self, anchor):
     def compose_sequence_node(self, anchor):
         start_event = self.get_event()
         tag = start_event.tag
-        if tag is None or tag == u'!':
+        if tag is None or tag == '!':
             tag = self.resolve(SequenceNode, None, start_event.implicit)
         node = SequenceNode(tag, [],
                 start_event.start_mark, None,
@@ -117,7 +117,7 @@ def compose_sequence_node(self, anchor):
     def compose_mapping_node(self, anchor):
         start_event = self.get_event()
         tag = start_event.tag
-        if tag is None or tag == u'!':
+        if tag is None or tag == '!':
             tag = self.resolve(MappingNode, None, start_event.implicit)
         node = MappingNode(tag, [],
                 start_event.start_mark, None,
diff --git a/framework/pym/yaml/constructor.py b/framework/pym/yaml/constructor.py
index 420c434f32..1948b125c2 100644
--- a/framework/pym/yaml/constructor.py
+++ b/framework/pym/yaml/constructor.py
@@ -1,23 +1,22 @@
 
-__all__ = ['BaseConstructor', 'SafeConstructor', 'Constructor',
-    'ConstructorError']
+__all__ = [
+    'BaseConstructor',
+    'SafeConstructor',
+    'FullConstructor',
+    'UnsafeConstructor',
+    'Constructor',
+    'ConstructorError'
+]
 
-from error import *
-from nodes import *
+from .error import *
+from .nodes import *
 
-import datetime
-
-try:
-    set
-except NameError:
-    from sets import Set as set
-
-import binascii, re, sys, types
+import collections.abc, datetime, base64, binascii, re, sys, types
 
 class ConstructorError(MarkedYAMLError):
     pass
 
-class BaseConstructor(object):
+class BaseConstructor:
 
     yaml_constructors = {}
     yaml_multi_constructors = {}
@@ -32,6 +31,14 @@ def check_data(self):
         # If there are more documents available?
         return self.check_node()
 
+    def check_state_key(self, key):
+        """Block special attributes/methods from being set in a newly created
+        object, to prevent user-controlled methods from being called during
+        deserialization"""
+        if self.get_state_keys_blacklist_regexp().match(key):
+            raise ConstructorError(None, None,
+                "blacklisted key '%s' in instance state found" % (key,), None)
+
     def get_data(self):
         # Construct and return the next document.
         if self.check_node():
@@ -58,11 +65,11 @@ def construct_document(self, node):
         return data
 
     def construct_object(self, node, deep=False):
+        if node in self.constructed_objects:
+            return self.constructed_objects[node]
         if deep:
             old_deep = self.deep_construct
             self.deep_construct = True
-        if node in self.constructed_objects:
-            return self.constructed_objects[node]
         if node in self.recursive_objects:
             raise ConstructorError(None, None,
                     "found unconstructable recursive node", node.start_mark)
@@ -73,7 +80,7 @@ def construct_object(self, node, deep=False):
             constructor = self.yaml_constructors[node.tag]
         else:
             for tag_prefix in self.yaml_multi_constructors:
-                if node.tag.startswith(tag_prefix):
+                if tag_prefix is not None and node.tag.startswith(tag_prefix):
                     tag_suffix = node.tag[len(tag_prefix):]
                     constructor = self.yaml_multi_constructors[tag_prefix]
                     break
@@ -95,7 +102,7 @@ def construct_object(self, node, deep=False):
             data = constructor(self, tag_suffix, node)
         if isinstance(data, types.GeneratorType):
             generator = data
-            data = generator.next()
+            data = next(generator)
             if self.deep_construct:
                 for dummy in generator:
                     pass
@@ -130,11 +137,9 @@ def construct_mapping(self, node, deep=False):
         mapping = {}
         for key_node, value_node in node.value:
             key = self.construct_object(key_node, deep=deep)
-            try:
-                hash(key)
-            except TypeError, exc:
+            if not isinstance(key, collections.abc.Hashable):
                 raise ConstructorError("while constructing a mapping", node.start_mark,
-                        "found unacceptable key (%s)" % exc, key_node.start_mark)
+                        "found unhashable key", key_node.start_mark)
             value = self.construct_object(value_node, deep=deep)
             mapping[key] = value
         return mapping
@@ -151,33 +156,33 @@ def construct_pairs(self, node, deep=False):
             pairs.append((key, value))
         return pairs
 
+    @classmethod
     def add_constructor(cls, tag, constructor):
         if not 'yaml_constructors' in cls.__dict__:
             cls.yaml_constructors = cls.yaml_constructors.copy()
         cls.yaml_constructors[tag] = constructor
-    add_constructor = classmethod(add_constructor)
 
+    @classmethod
     def add_multi_constructor(cls, tag_prefix, multi_constructor):
         if not 'yaml_multi_constructors' in cls.__dict__:
             cls.yaml_multi_constructors = cls.yaml_multi_constructors.copy()
         cls.yaml_multi_constructors[tag_prefix] = multi_constructor
-    add_multi_constructor = classmethod(add_multi_constructor)
 
 class SafeConstructor(BaseConstructor):
 
     def construct_scalar(self, node):
         if isinstance(node, MappingNode):
             for key_node, value_node in node.value:
-                if key_node.tag == u'tag:yaml.org,2002:value':
+                if key_node.tag == 'tag:yaml.org,2002:value':
                     return self.construct_scalar(value_node)
-        return BaseConstructor.construct_scalar(self, node)
+        return super().construct_scalar(node)
 
     def flatten_mapping(self, node):
         merge = []
         index = 0
         while index < len(node.value):
             key_node, value_node = node.value[index]
-            if key_node.tag == u'tag:yaml.org,2002:merge':
+            if key_node.tag == 'tag:yaml.org,2002:merge':
                 del node.value[index]
                 if isinstance(value_node, MappingNode):
                     self.flatten_mapping(value_node)
@@ -199,8 +204,8 @@ def flatten_mapping(self, node):
                     raise ConstructorError("while constructing a mapping", node.start_mark,
                             "expected a mapping or list of mappings for merging, but found %s"
                             % value_node.id, value_node.start_mark)
-            elif key_node.tag == u'tag:yaml.org,2002:value':
-                key_node.tag = u'tag:yaml.org,2002:str'
+            elif key_node.tag == 'tag:yaml.org,2002:value':
+                key_node.tag = 'tag:yaml.org,2002:str'
                 index += 1
             else:
                 index += 1
@@ -210,19 +215,19 @@ def flatten_mapping(self, node):
     def construct_mapping(self, node, deep=False):
         if isinstance(node, MappingNode):
             self.flatten_mapping(node)
-        return BaseConstructor.construct_mapping(self, node, deep=deep)
+        return super().construct_mapping(node, deep=deep)
 
     def construct_yaml_null(self, node):
         self.construct_scalar(node)
         return None
 
     bool_values = {
-        u'yes':     True,
-        u'no':      False,
-        u'true':    True,
-        u'false':   False,
-        u'on':      True,
-        u'off':     False,
+        'yes':      True,
+        'no':       False,
+        'true':     True,
+        'false':    False,
+        'on':       True,
+        'off':      False,
     }
 
     def construct_yaml_bool(self, node):
@@ -230,7 +235,7 @@ def construct_yaml_bool(self, node):
         return self.bool_values[value.lower()]
 
     def construct_yaml_int(self, node):
-        value = str(self.construct_scalar(node))
+        value = self.construct_scalar(node)
         value = value.replace('_', '')
         sign = +1
         if value[0] == '-':
@@ -263,7 +268,7 @@ def construct_yaml_int(self, node):
     nan_value = -inf_value/inf_value   # Trying to make a quiet NaN (like C99).
 
     def construct_yaml_float(self, node):
-        value = str(self.construct_scalar(node))
+        value = self.construct_scalar(node)
         value = value.replace('_', '').lower()
         sign = +1
         if value[0] == '-':
@@ -287,15 +292,23 @@ def construct_yaml_float(self, node):
             return sign*float(value)
 
     def construct_yaml_binary(self, node):
-        value = self.construct_scalar(node)
         try:
-            return str(value).decode('base64')
-        except (binascii.Error, UnicodeEncodeError), exc:
+            value = self.construct_scalar(node).encode('ascii')
+        except UnicodeEncodeError as exc:
             raise ConstructorError(None, None,
-                    "failed to decode base64 data: %s" % exc, node.start_mark) 
+                    "failed to convert base64 data into ascii: %s" % exc,
+                    node.start_mark)
+        try:
+            if hasattr(base64, 'decodebytes'):
+                return base64.decodebytes(value)
+            else:
+                return base64.decodestring(value)
+        except binascii.Error as exc:
+            raise ConstructorError(None, None,
+                    "failed to decode base64 data: %s" % exc, node.start_mark)
 
     timestamp_regexp = re.compile(
-            ur'''^(?P<year>[0-9][0-9][0-9][0-9])
+            r'''^(?P<year>[0-9][0-9][0-9][0-9])
                 -(?P<month>[0-9][0-9]?)
                 -(?P<day>[0-9][0-9]?)
                 (?:(?:[Tt]|[ \t]+)
@@ -319,22 +332,23 @@ def construct_yaml_timestamp(self, node):
         minute = int(values['minute'])
         second = int(values['second'])
         fraction = 0
+        tzinfo = None
         if values['fraction']:
             fraction = values['fraction'][:6]
             while len(fraction) < 6:
                 fraction += '0'
             fraction = int(fraction)
-        delta = None
         if values['tz_sign']:
             tz_hour = int(values['tz_hour'])
             tz_minute = int(values['tz_minute'] or 0)
             delta = datetime.timedelta(hours=tz_hour, minutes=tz_minute)
             if values['tz_sign'] == '-':
                 delta = -delta
-        data = datetime.datetime(year, month, day, hour, minute, second, fraction)
-        if delta:
-            data -= delta
-        return data
+            tzinfo = datetime.timezone(delta)
+        elif values['tz']:
+            tzinfo = datetime.timezone.utc
+        return datetime.datetime(year, month, day, hour, minute, second, fraction,
+                                 tzinfo=tzinfo)
 
     def construct_yaml_omap(self, node):
         # Note: we do not check for duplicate keys, because it's too
@@ -386,11 +400,7 @@ def construct_yaml_set(self, node):
         data.update(value)
 
     def construct_yaml_str(self, node):
-        value = self.construct_scalar(node)
-        try:
-            return value.encode('ascii')
-        except UnicodeEncodeError:
-            return value
+        return self.construct_scalar(node)
 
     def construct_yaml_seq(self, node):
         data = []
@@ -415,70 +425,96 @@ def construct_yaml_object(self, node, cls):
 
     def construct_undefined(self, node):
         raise ConstructorError(None, None,
-                "could not determine a constructor for the tag %r" % node.tag.encode('utf-8'),
+                "could not determine a constructor for the tag %r" % node.tag,
                 node.start_mark)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:null',
+        'tag:yaml.org,2002:null',
         SafeConstructor.construct_yaml_null)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:bool',
+        'tag:yaml.org,2002:bool',
         SafeConstructor.construct_yaml_bool)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:int',
+        'tag:yaml.org,2002:int',
         SafeConstructor.construct_yaml_int)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:float',
+        'tag:yaml.org,2002:float',
         SafeConstructor.construct_yaml_float)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:binary',
+        'tag:yaml.org,2002:binary',
         SafeConstructor.construct_yaml_binary)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:timestamp',
+        'tag:yaml.org,2002:timestamp',
         SafeConstructor.construct_yaml_timestamp)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:omap',
+        'tag:yaml.org,2002:omap',
         SafeConstructor.construct_yaml_omap)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:pairs',
+        'tag:yaml.org,2002:pairs',
         SafeConstructor.construct_yaml_pairs)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:set',
+        'tag:yaml.org,2002:set',
         SafeConstructor.construct_yaml_set)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:str',
+        'tag:yaml.org,2002:str',
         SafeConstructor.construct_yaml_str)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:seq',
+        'tag:yaml.org,2002:seq',
         SafeConstructor.construct_yaml_seq)
 
 SafeConstructor.add_constructor(
-        u'tag:yaml.org,2002:map',
+        'tag:yaml.org,2002:map',
         SafeConstructor.construct_yaml_map)
 
 SafeConstructor.add_constructor(None,
         SafeConstructor.construct_undefined)
 
-class Constructor(SafeConstructor):
+class FullConstructor(SafeConstructor):
+    # 'extend' is blacklisted because it is used by
+    # construct_python_object_apply to add `listitems` to a newly generate
+    # python instance
+    def get_state_keys_blacklist(self):
+        return ['^extend$', '^__.*__$']
+
+    def get_state_keys_blacklist_regexp(self):
+        if not hasattr(self, 'state_keys_blacklist_regexp'):
+            self.state_keys_blacklist_regexp = re.compile('(' + '|'.join(self.get_state_keys_blacklist()) + ')')
+        return self.state_keys_blacklist_regexp
 
     def construct_python_str(self, node):
-        return self.construct_scalar(node).encode('utf-8')
+        return self.construct_scalar(node)
 
     def construct_python_unicode(self, node):
         return self.construct_scalar(node)
 
+    def construct_python_bytes(self, node):
+        try:
+            value = self.construct_scalar(node).encode('ascii')
+        except UnicodeEncodeError as exc:
+            raise ConstructorError(None, None,
+                    "failed to convert base64 data into ascii: %s" % exc,
+                    node.start_mark)
+        try:
+            if hasattr(base64, 'decodebytes'):
+                return base64.decodebytes(value)
+            else:
+                return base64.decodestring(value)
+        except binascii.Error as exc:
+            raise ConstructorError(None, None,
+                    "failed to decode base64 data: %s" % exc, node.start_mark)
+
     def construct_python_long(self, node):
-        return long(self.construct_yaml_int(node))
+        return self.construct_yaml_int(node)
 
     def construct_python_complex(self, node):
        return complex(self.construct_scalar(node))
@@ -486,78 +522,77 @@ def construct_python_complex(self, node):
     def construct_python_tuple(self, node):
         return tuple(self.construct_sequence(node))
 
-    def find_python_module(self, name, mark):
+    def find_python_module(self, name, mark, unsafe=False):
         if not name:
             raise ConstructorError("while constructing a Python module", mark,
                     "expected non-empty name appended to the tag", mark)
-        try:
-            __import__(name)
-        except ImportError, exc:
+        if unsafe:
+            try:
+                __import__(name)
+            except ImportError as exc:
+                raise ConstructorError("while constructing a Python module", mark,
+                        "cannot find module %r (%s)" % (name, exc), mark)
+        if name not in sys.modules:
             raise ConstructorError("while constructing a Python module", mark,
-                    "cannot find module %r (%s)" % (name.encode('utf-8'), exc), mark)
+                    "module %r is not imported" % name, mark)
         return sys.modules[name]
 
-    def find_python_name(self, name, mark):
+    def find_python_name(self, name, mark, unsafe=False):
         if not name:
             raise ConstructorError("while constructing a Python object", mark,
                     "expected non-empty name appended to the tag", mark)
-        if u'.' in name:
-            # Python 2.4 only
-            #module_name, object_name = name.rsplit('.', 1)
-            items = name.split('.')
-            object_name = items.pop()
-            module_name = '.'.join(items)
+        if '.' in name:
+            module_name, object_name = name.rsplit('.', 1)
         else:
-            module_name = '__builtin__'
+            module_name = 'builtins'
             object_name = name
-        try:
-            __import__(module_name)
-        except ImportError, exc:
+        if unsafe:
+            try:
+                __import__(module_name)
+            except ImportError as exc:
+                raise ConstructorError("while constructing a Python object", mark,
+                        "cannot find module %r (%s)" % (module_name, exc), mark)
+        if module_name not in sys.modules:
             raise ConstructorError("while constructing a Python object", mark,
-                    "cannot find module %r (%s)" % (module_name.encode('utf-8'), exc), mark)
+                    "module %r is not imported" % module_name, mark)
         module = sys.modules[module_name]
         if not hasattr(module, object_name):
             raise ConstructorError("while constructing a Python object", mark,
-                    "cannot find %r in the module %r" % (object_name.encode('utf-8'),
-                        module.__name__), mark)
+                    "cannot find %r in the module %r"
+                    % (object_name, module.__name__), mark)
         return getattr(module, object_name)
 
     def construct_python_name(self, suffix, node):
         value = self.construct_scalar(node)
         if value:
             raise ConstructorError("while constructing a Python name", node.start_mark,
-                    "expected the empty value, but found %r" % value.encode('utf-8'),
-                    node.start_mark)
+                    "expected the empty value, but found %r" % value, node.start_mark)
         return self.find_python_name(suffix, node.start_mark)
 
     def construct_python_module(self, suffix, node):
         value = self.construct_scalar(node)
         if value:
             raise ConstructorError("while constructing a Python module", node.start_mark,
-                    "expected the empty value, but found %r" % value.encode('utf-8'),
-                    node.start_mark)
+                    "expected the empty value, but found %r" % value, node.start_mark)
         return self.find_python_module(suffix, node.start_mark)
 
-    class classobj: pass
-
     def make_python_instance(self, suffix, node,
-            args=None, kwds=None, newobj=False):
+            args=None, kwds=None, newobj=False, unsafe=False):
         if not args:
             args = []
         if not kwds:
             kwds = {}
         cls = self.find_python_name(suffix, node.start_mark)
-        if newobj and isinstance(cls, type(self.classobj))  \
-                and not args and not kwds:
-            instance = self.classobj()
-            instance.__class__ = cls
-            return instance
-        elif newobj and isinstance(cls, type):
+        if not (unsafe or isinstance(cls, type)):
+            raise ConstructorError("while constructing a Python instance", node.start_mark,
+                    "expected a class, but found %r" % type(cls),
+                    node.start_mark)
+        if newobj and isinstance(cls, type):
             return cls.__new__(cls, *args, **kwds)
         else:
             return cls(*args, **kwds)
 
-    def set_python_instance_state(self, instance, state):
+    def set_python_instance_state(self, instance, state, unsafe=False):
         if hasattr(instance, '__setstate__'):
             instance.__setstate__(state)
         else:
@@ -565,11 +600,16 @@ def set_python_instance_state(self, instance, state):
             if isinstance(state, tuple) and len(state) == 2:
                 state, slotstate = state
             if hasattr(instance, '__dict__'):
+                if not unsafe and state:
+                    for key in state.keys():
+                        self.check_state_key(key)
                 instance.__dict__.update(state)
             elif state:
                 slotstate.update(state)
             for key, value in slotstate.items():
-                setattr(object, key, value)
+                if not unsafe:
+                    self.check_state_key(key)
+                setattr(instance, key, value)
 
     def construct_python_object(self, suffix, node):
         # Format:
@@ -618,67 +658,91 @@ def construct_python_object_apply(self, suffix, node, newobj=False):
     def construct_python_object_new(self, suffix, node):
         return self.construct_python_object_apply(suffix, node, newobj=True)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/none',
-    Constructor.construct_yaml_null)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/none',
+    FullConstructor.construct_yaml_null)
+
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/bool',
+    FullConstructor.construct_yaml_bool)
+
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/str',
+    FullConstructor.construct_python_str)
+
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/unicode',
+    FullConstructor.construct_python_unicode)
+
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/bytes',
+    FullConstructor.construct_python_bytes)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/bool',
-    Constructor.construct_yaml_bool)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/int',
+    FullConstructor.construct_yaml_int)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/str',
-    Constructor.construct_python_str)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/long',
+    FullConstructor.construct_python_long)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/unicode',
-    Constructor.construct_python_unicode)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/float',
+    FullConstructor.construct_yaml_float)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/int',
-    Constructor.construct_yaml_int)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/complex',
+    FullConstructor.construct_python_complex)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/long',
-    Constructor.construct_python_long)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/list',
+    FullConstructor.construct_yaml_seq)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/float',
-    Constructor.construct_yaml_float)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/tuple',
+    FullConstructor.construct_python_tuple)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/complex',
-    Constructor.construct_python_complex)
+FullConstructor.add_constructor(
+    'tag:yaml.org,2002:python/dict',
+    FullConstructor.construct_yaml_map)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/list',
-    Constructor.construct_yaml_seq)
+FullConstructor.add_multi_constructor(
+    'tag:yaml.org,2002:python/name:',
+    FullConstructor.construct_python_name)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/tuple',
-    Constructor.construct_python_tuple)
+FullConstructor.add_multi_constructor(
+    'tag:yaml.org,2002:python/module:',
+    FullConstructor.construct_python_module)
 
-Constructor.add_constructor(
-    u'tag:yaml.org,2002:python/dict',
-    Constructor.construct_yaml_map)
+FullConstructor.add_multi_constructor(
+    'tag:yaml.org,2002:python/object:',
+    FullConstructor.construct_python_object)
 
-Constructor.add_multi_constructor(
-    u'tag:yaml.org,2002:python/name:',
-    Constructor.construct_python_name)
+FullConstructor.add_multi_constructor(
+    'tag:yaml.org,2002:python/object/new:',
+    FullConstructor.construct_python_object_new)
 
-Constructor.add_multi_constructor(
-    u'tag:yaml.org,2002:python/module:',
-    Constructor.construct_python_module)
+class UnsafeConstructor(FullConstructor):
 
-Constructor.add_multi_constructor(
-    u'tag:yaml.org,2002:python/object:',
-    Constructor.construct_python_object)
+    def find_python_module(self, name, mark):
+        return super(UnsafeConstructor, self).find_python_module(name, mark, unsafe=True)
+
+    def find_python_name(self, name, mark):
+        return super(UnsafeConstructor, self).find_python_name(name, mark, unsafe=True)
 
-Constructor.add_multi_constructor(
-    u'tag:yaml.org,2002:python/object/apply:',
-    Constructor.construct_python_object_apply)
+    def make_python_instance(self, suffix, node, args=None, kwds=None, newobj=False):
+        return super(UnsafeConstructor, self).make_python_instance(
+            suffix, node, args, kwds, newobj, unsafe=True)
 
-Constructor.add_multi_constructor(
-    u'tag:yaml.org,2002:python/object/new:',
-    Constructor.construct_python_object_new)
+    def set_python_instance_state(self, instance, state):
+        return super(UnsafeConstructor, self).set_python_instance_state(
+            instance, state, unsafe=True)
+
+UnsafeConstructor.add_multi_constructor(
+    'tag:yaml.org,2002:python/object/apply:',
+    UnsafeConstructor.construct_python_object_apply)
 
+# Constructor is same as UnsafeConstructor. Need to leave this in place in case
+# people have extended it directly.
+class Constructor(UnsafeConstructor):
+    pass
diff --git a/framework/pym/yaml/cyaml.py b/framework/pym/yaml/cyaml.py
index 68dcd75192..1e606c74b9 100644
--- a/framework/pym/yaml/cyaml.py
+++ b/framework/pym/yaml/cyaml.py
@@ -1,15 +1,17 @@
 
-__all__ = ['CBaseLoader', 'CSafeLoader', 'CLoader',
-        'CBaseDumper', 'CSafeDumper', 'CDumper']
+__all__ = [
+    'CBaseLoader', 'CSafeLoader', 'CFullLoader', 'CUnsafeLoader', 'CLoader',
+    'CBaseDumper', 'CSafeDumper', 'CDumper'
+]
 
 from _yaml import CParser, CEmitter
 
-from constructor import *
+from .constructor import *
 
-from serializer import *
-from representer import *
+from .serializer import *
+from .representer import *
 
-from resolver import *
+from .resolver import *
 
 class CBaseLoader(CParser, BaseConstructor, BaseResolver):
 
@@ -25,6 +27,20 @@ def __init__(self, stream):
         SafeConstructor.__init__(self)
         Resolver.__init__(self)
 
+class CFullLoader(CParser, FullConstructor, Resolver):
+
+    def __init__(self, stream):
+        CParser.__init__(self, stream)
+        FullConstructor.__init__(self)
+        Resolver.__init__(self)
+
+class CUnsafeLoader(CParser, UnsafeConstructor, Resolver):
+
+    def __init__(self, stream):
+        CParser.__init__(self, stream)
+        UnsafeConstructor.__init__(self)
+        Resolver.__init__(self)
+
 class CLoader(CParser, Constructor, Resolver):
 
     def __init__(self, stream):
@@ -35,51 +51,51 @@ def __init__(self, stream):
 class CBaseDumper(CEmitter, BaseRepresenter, BaseResolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         CEmitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width, encoding=encoding,
                 allow_unicode=allow_unicode, line_break=line_break,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         Representer.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
 class CSafeDumper(CEmitter, SafeRepresenter, Resolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         CEmitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width, encoding=encoding,
                 allow_unicode=allow_unicode, line_break=line_break,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         SafeRepresenter.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
 class CDumper(CEmitter, Serializer, Representer, Resolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         CEmitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width, encoding=encoding,
                 allow_unicode=allow_unicode, line_break=line_break,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         Representer.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
diff --git a/framework/pym/yaml/dumper.py b/framework/pym/yaml/dumper.py
index f811d2c919..6aadba551f 100644
--- a/framework/pym/yaml/dumper.py
+++ b/framework/pym/yaml/dumper.py
@@ -1,19 +1,19 @@
 
 __all__ = ['BaseDumper', 'SafeDumper', 'Dumper']
 
-from emitter import *
-from serializer import *
-from representer import *
-from resolver import *
+from .emitter import *
+from .serializer import *
+from .representer import *
+from .resolver import *
 
 class BaseDumper(Emitter, Serializer, BaseRepresenter, BaseResolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         Emitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width,
                 allow_unicode=allow_unicode, line_break=line_break)
@@ -21,17 +21,17 @@ def __init__(self, stream,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         Representer.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
 class SafeDumper(Emitter, Serializer, SafeRepresenter, Resolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         Emitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width,
                 allow_unicode=allow_unicode, line_break=line_break)
@@ -39,17 +39,17 @@ def __init__(self, stream,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         SafeRepresenter.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
 class Dumper(Emitter, Serializer, Representer, Resolver):
 
     def __init__(self, stream,
-            default_style=None, default_flow_style=None,
+            default_style=None, default_flow_style=False,
             canonical=None, indent=None, width=None,
             allow_unicode=None, line_break=None,
             encoding=None, explicit_start=None, explicit_end=None,
-            version=None, tags=None):
+            version=None, tags=None, sort_keys=True):
         Emitter.__init__(self, stream, canonical=canonical,
                 indent=indent, width=width,
                 allow_unicode=allow_unicode, line_break=line_break)
@@ -57,6 +57,6 @@ def __init__(self, stream,
                 explicit_start=explicit_start, explicit_end=explicit_end,
                 version=version, tags=tags)
         Representer.__init__(self, default_style=default_style,
-                default_flow_style=default_flow_style)
+                default_flow_style=default_flow_style, sort_keys=sort_keys)
         Resolver.__init__(self)
 
diff --git a/framework/pym/yaml/emitter.py b/framework/pym/yaml/emitter.py
index 5186bccbd0..a664d01116 100644
--- a/framework/pym/yaml/emitter.py
+++ b/framework/pym/yaml/emitter.py
@@ -8,13 +8,13 @@
 
 __all__ = ['Emitter', 'EmitterError']
 
-from error import YAMLError
-from events import *
+from .error import YAMLError
+from .events import *
 
 class EmitterError(YAMLError):
     pass
 
-class ScalarAnalysis(object):
+class ScalarAnalysis:
     def __init__(self, scalar, empty, multiline,
             allow_flow_plain, allow_block_plain,
             allow_single_quoted, allow_double_quoted,
@@ -28,11 +28,11 @@ def __init__(self, scalar, empty, multiline,
         self.allow_double_quoted = allow_double_quoted
         self.allow_block = allow_block
 
-class Emitter(object):
+class Emitter:
 
     DEFAULT_TAG_PREFIXES = {
-        u'!' : u'!',
-        u'tag:yaml.org,2002:' : u'!!',
+        '!' : '!',
+        'tag:yaml.org,2002:' : '!!',
     }
 
     def __init__(self, stream, canonical=None, indent=None, width=None,
@@ -88,8 +88,8 @@ def __init__(self, stream, canonical=None, indent=None, width=None,
         self.best_width = 80
         if width and width > self.best_indent*2:
             self.best_width = width
-        self.best_line_break = u'\n'
-        if line_break in [u'\r', u'\n', u'\r\n']:
+        self.best_line_break = '\n'
+        if line_break in ['\r', '\n', '\r\n']:
             self.best_line_break = line_break
 
         # Tag prefixes.
@@ -103,6 +103,11 @@ def __init__(self, stream, canonical=None, indent=None, width=None,
         self.analysis = None
         self.style = None
 
+    def dispose(self):
+        # Reset the state attributes (to clear self-references)
+        self.states = []
+        self.state = None
+
     def emit(self, event):
         self.events.append(event)
         while not self.need_more_events():
@@ -154,7 +159,7 @@ def increase_indent(self, flow=False, indentless=False):
 
     def expect_stream_start(self):
         if isinstance(self.event, StreamStartEvent):
-            if self.event.encoding and not getattr(self.stream, 'encoding', None):
+            if self.event.encoding and not hasattr(self.stream, 'encoding'):
                 self.encoding = self.event.encoding
             self.write_stream_start()
             self.state = self.expect_first_document_start
@@ -173,15 +178,14 @@ def expect_first_document_start(self):
     def expect_document_start(self, first=False):
         if isinstance(self.event, DocumentStartEvent):
             if (self.event.version or self.event.tags) and self.open_ended:
-                self.write_indicator(u'...', True)
+                self.write_indicator('...', True)
                 self.write_indent()
             if self.event.version:
                 version_text = self.prepare_version(self.event.version)
                 self.write_version_directive(version_text)
             self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
             if self.event.tags:
-                handles = self.event.tags.keys()
-                handles.sort()
+                handles = sorted(self.event.tags.keys())
                 for handle in handles:
                     prefix = self.event.tags[handle]
                     self.tag_prefixes[prefix] = handle
@@ -193,13 +197,13 @@ def expect_document_start(self, first=False):
                     and not self.check_empty_document())
             if not implicit:
                 self.write_indent()
-                self.write_indicator(u'---', True)
+                self.write_indicator('---', True)
                 if self.canonical:
                     self.write_indent()
             self.state = self.expect_document_root
         elif isinstance(self.event, StreamEndEvent):
             if self.open_ended:
-                self.write_indicator(u'...', True)
+                self.write_indicator('...', True)
                 self.write_indent()
             self.write_stream_end()
             self.state = self.expect_nothing
@@ -211,7 +215,7 @@ def expect_document_end(self):
         if isinstance(self.event, DocumentEndEvent):
             self.write_indent()
             if self.event.explicit:
-                self.write_indicator(u'...', True)
+                self.write_indicator('...', True)
                 self.write_indent()
             self.flush_stream()
             self.state = self.expect_document_start
@@ -234,7 +238,7 @@ def expect_node(self, root=False, sequence=False, mapping=False,
         if isinstance(self.event, AliasEvent):
             self.expect_alias()
         elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
-            self.process_anchor(u'&')
+            self.process_anchor('&')
             self.process_tag()
             if isinstance(self.event, ScalarEvent):
                 self.expect_scalar()
@@ -256,7 +260,7 @@ def expect_node(self, root=False, sequence=False, mapping=False,
     def expect_alias(self):
         if self.event.anchor is None:
             raise EmitterError("anchor is not specified for alias")
-        self.process_anchor(u'*')
+        self.process_anchor('*')
         self.state = self.states.pop()
 
     def expect_scalar(self):
@@ -268,7 +272,7 @@ def expect_scalar(self):
     # Flow sequence handlers.
 
     def expect_flow_sequence(self):
-        self.write_indicator(u'[', True, whitespace=True)
+        self.write_indicator('[', True, whitespace=True)
         self.flow_level += 1
         self.increase_indent(flow=True)
         self.state = self.expect_first_flow_sequence_item
@@ -277,7 +281,7 @@ def expect_first_flow_sequence_item(self):
         if isinstance(self.event, SequenceEndEvent):
             self.indent = self.indents.pop()
             self.flow_level -= 1
-            self.write_indicator(u']', False)
+            self.write_indicator(']', False)
             self.state = self.states.pop()
         else:
             if self.canonical or self.column > self.best_width:
@@ -290,12 +294,12 @@ def expect_flow_sequence_item(self):
             self.indent = self.indents.pop()
             self.flow_level -= 1
             if self.canonical:
-                self.write_indicator(u',', False)
+                self.write_indicator(',', False)
                 self.write_indent()
-            self.write_indicator(u']', False)
+            self.write_indicator(']', False)
             self.state = self.states.pop()
         else:
-            self.write_indicator(u',', False)
+            self.write_indicator(',', False)
             if self.canonical or self.column > self.best_width:
                 self.write_indent()
             self.states.append(self.expect_flow_sequence_item)
@@ -304,7 +308,7 @@ def expect_flow_sequence_item(self):
     # Flow mapping handlers.
 
     def expect_flow_mapping(self):
-        self.write_indicator(u'{', True, whitespace=True)
+        self.write_indicator('{', True, whitespace=True)
         self.flow_level += 1
         self.increase_indent(flow=True)
         self.state = self.expect_first_flow_mapping_key
@@ -313,7 +317,7 @@ def expect_first_flow_mapping_key(self):
         if isinstance(self.event, MappingEndEvent):
             self.indent = self.indents.pop()
             self.flow_level -= 1
-            self.write_indicator(u'}', False)
+            self.write_indicator('}', False)
             self.state = self.states.pop()
         else:
             if self.canonical or self.column > self.best_width:
@@ -322,7 +326,7 @@ def expect_first_flow_mapping_key(self):
                 self.states.append(self.expect_flow_mapping_simple_value)
                 self.expect_node(mapping=True, simple_key=True)
             else:
-                self.write_indicator(u'?', True)
+                self.write_indicator('?', True)
                 self.states.append(self.expect_flow_mapping_value)
                 self.expect_node(mapping=True)
 
@@ -331,31 +335,31 @@ def expect_flow_mapping_key(self):
             self.indent = self.indents.pop()
             self.flow_level -= 1
             if self.canonical:
-                self.write_indicator(u',', False)
+                self.write_indicator(',', False)
                 self.write_indent()
-            self.write_indicator(u'}', False)
+            self.write_indicator('}', False)
             self.state = self.states.pop()
         else:
-            self.write_indicator(u',', False)
+            self.write_indicator(',', False)
             if self.canonical or self.column > self.best_width:
                 self.write_indent()
             if not self.canonical and self.check_simple_key():
                 self.states.append(self.expect_flow_mapping_simple_value)
                 self.expect_node(mapping=True, simple_key=True)
             else:
-                self.write_indicator(u'?', True)
+                self.write_indicator('?', True)
                 self.states.append(self.expect_flow_mapping_value)
                 self.expect_node(mapping=True)
 
     def expect_flow_mapping_simple_value(self):
-        self.write_indicator(u':', False)
+        self.write_indicator(':', False)
         self.states.append(self.expect_flow_mapping_key)
         self.expect_node(mapping=True)
 
     def expect_flow_mapping_value(self):
         if self.canonical or self.column > self.best_width:
             self.write_indent()
-        self.write_indicator(u':', True)
+        self.write_indicator(':', True)
         self.states.append(self.expect_flow_mapping_key)
         self.expect_node(mapping=True)
 
@@ -375,7 +379,7 @@ def expect_block_sequence_item(self, first=False):
             self.state = self.states.pop()
         else:
             self.write_indent()
-            self.write_indicator(u'-', True, indention=True)
+            self.write_indicator('-', True, indention=True)
             self.states.append(self.expect_block_sequence_item)
             self.expect_node(sequence=True)
 
@@ -398,18 +402,18 @@ def expect_block_mapping_key(self, first=False):
                 self.states.append(self.expect_block_mapping_simple_value)
                 self.expect_node(mapping=True, simple_key=True)
             else:
-                self.write_indicator(u'?', True, indention=True)
+                self.write_indicator('?', True, indention=True)
                 self.states.append(self.expect_block_mapping_value)
                 self.expect_node(mapping=True)
 
     def expect_block_mapping_simple_value(self):
-        self.write_indicator(u':', False)
+        self.write_indicator(':', False)
         self.states.append(self.expect_block_mapping_key)
         self.expect_node(mapping=True)
 
     def expect_block_mapping_value(self):
         self.write_indent()
-        self.write_indicator(u':', True, indention=True)
+        self.write_indicator(':', True, indention=True)
         self.states.append(self.expect_block_mapping_key)
         self.expect_node(mapping=True)
 
@@ -428,7 +432,7 @@ def check_empty_document(self):
             return False
         event = self.events[0]
         return (isinstance(event, ScalarEvent) and event.anchor is None
-                and event.tag is None and event.implicit and event.value == u'')
+                and event.tag is None and event.implicit and event.value == '')
 
     def check_simple_key(self):
         length = 0
@@ -473,7 +477,7 @@ def process_tag(self):
                 self.prepared_tag = None
                 return
             if self.event.implicit[0] and tag is None:
-                tag = u'!'
+                tag = '!'
                 self.prepared_tag = None
         else:
             if (not self.canonical or tag is None) and self.event.implicit:
@@ -536,19 +540,18 @@ def prepare_version(self, version):
         major, minor = version
         if major != 1:
             raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
-        return u'%d.%d' % (major, minor)
+        return '%d.%d' % (major, minor)
 
     def prepare_tag_handle(self, handle):
         if not handle:
             raise EmitterError("tag handle must not be empty")
-        if handle[0] != u'!' or handle[-1] != u'!':
-            raise EmitterError("tag handle must start and end with '!': %r"
-                    % (handle.encode('utf-8')))
+        if handle[0] != '!' or handle[-1] != '!':
+            raise EmitterError("tag handle must start and end with '!': %r" % handle)
         for ch in handle[1:-1]:
-            if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'  \
-                    or ch in u'-_'):
+            if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'    \
+                    or ch in '-_'):
                 raise EmitterError("invalid character %r in the tag handle: %r"
-                        % (ch.encode('utf-8'), handle.encode('utf-8')))
+                        % (ch, handle))
         return handle
 
     def prepare_tag_prefix(self, prefix):
@@ -556,12 +559,12 @@ def prepare_tag_prefix(self, prefix):
             raise EmitterError("tag prefix must not be empty")
         chunks = []
         start = end = 0
-        if prefix[0] == u'!':
+        if prefix[0] == '!':
             end = 1
         while end < len(prefix):
             ch = prefix[end]
-            if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'   \
-                    or ch in u'-;/?!:@&=+$,_.~*\'()[]':
+            if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
+                    or ch in '-;/?!:@&=+$,_.~*\'()[]':
                 end += 1
             else:
                 if start < end:
@@ -569,32 +572,31 @@ def prepare_tag_prefix(self, prefix):
                 start = end = end+1
                 data = ch.encode('utf-8')
                 for ch in data:
-                    chunks.append(u'%%%02X' % ord(ch))
+                    chunks.append('%%%02X' % ord(ch))
         if start < end:
             chunks.append(prefix[start:end])
-        return u''.join(chunks)
+        return ''.join(chunks)
 
     def prepare_tag(self, tag):
         if not tag:
             raise EmitterError("tag must not be empty")
-        if tag == u'!':
+        if tag == '!':
             return tag
         handle = None
         suffix = tag
-        prefixes = self.tag_prefixes.keys()
-        prefixes.sort()
+        prefixes = sorted(self.tag_prefixes.keys())
         for prefix in prefixes:
             if tag.startswith(prefix)   \
-                    and (prefix == u'!' or len(prefix) < len(tag)):
+                    and (prefix == '!' or len(prefix) < len(tag)):
                 handle = self.tag_prefixes[prefix]
                 suffix = tag[len(prefix):]
         chunks = []
         start = end = 0
         while end < len(suffix):
             ch = suffix[end]
-            if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'   \
-                    or ch in u'-;/?:@&=+$,_.~*\'()[]'   \
-                    or (ch == u'!' and handle != u'!'):
+            if '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \
+                    or ch in '-;/?:@&=+$,_.~*\'()[]'   \
+                    or (ch == '!' and handle != '!'):
                 end += 1
             else:
                 if start < end:
@@ -602,23 +604,23 @@ def prepare_tag(self, tag):
                 start = end = end+1
                 data = ch.encode('utf-8')
                 for ch in data:
-                    chunks.append(u'%%%02X' % ord(ch))
+                    chunks.append('%%%02X' % ch)
         if start < end:
             chunks.append(suffix[start:end])
-        suffix_text = u''.join(chunks)
+        suffix_text = ''.join(chunks)
         if handle:
-            return u'%s%s' % (handle, suffix_text)
+            return '%s%s' % (handle, suffix_text)
         else:
-            return u'!<%s>' % suffix_text
+            return '!<%s>' % suffix_text
 
     def prepare_anchor(self, anchor):
         if not anchor:
             raise EmitterError("anchor must not be empty")
         for ch in anchor:
-            if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'  \
-                    or ch in u'-_'):
+            if not ('0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'    \
+                    or ch in '-_'):
                 raise EmitterError("invalid character %r in the anchor: %r"
-                        % (ch.encode('utf-8'), anchor.encode('utf-8')))
+                        % (ch, anchor))
         return anchor
 
     def analyze_scalar(self, scalar):
@@ -645,7 +647,7 @@ def analyze_scalar(self, scalar):
         space_break = False
 
         # Check document indicators.
-        if scalar.startswith(u'---') or scalar.startswith(u'...'):
+        if scalar.startswith('---') or scalar.startswith('...'):
             block_indicators = True
             flow_indicators = True
 
@@ -654,7 +656,7 @@ def analyze_scalar(self, scalar):
 
         # Last character or followed by a whitespace.
         followed_by_whitespace = (len(scalar) == 1 or
-                scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
+                scalar[1] in '\0 \t\r\n\x85\u2028\u2029')
 
         # The previous character is a space.
         previous_space = False
@@ -669,34 +671,35 @@ def analyze_scalar(self, scalar):
             # Check for indicators.
             if index == 0:
                 # Leading indicators are special characters.
-                if ch in u'#,[]{}&*!|>\'\"%@`': 
+                if ch in '#,[]{}&*!|>\'\"%@`':
                     flow_indicators = True
                     block_indicators = True
-                if ch in u'?:':
+                if ch in '?:':
                     flow_indicators = True
                     if followed_by_whitespace:
                         block_indicators = True
-                if ch == u'-' and followed_by_whitespace:
+                if ch == '-' and followed_by_whitespace:
                     flow_indicators = True
                     block_indicators = True
             else:
                 # Some indicators cannot appear within a scalar as well.
-                if ch in u',?[]{}':
+                if ch in ',?[]{}':
                     flow_indicators = True
-                if ch == u':':
+                if ch == ':':
                     flow_indicators = True
                     if followed_by_whitespace:
                         block_indicators = True
-                if ch == u'#' and preceded_by_whitespace:
+                if ch == '#' and preceded_by_whitespace:
                     flow_indicators = True
                     block_indicators = True
 
             # Check for line breaks, special, and unicode characters.
-            if ch in u'\n\x85\u2028\u2029':
+            if ch in '\n\x85\u2028\u2029':
                 line_breaks = True
-            if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
-                if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
-                        or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
+            if not (ch == '\n' or '\x20' <= ch <= '\x7E'):
+                if (ch == '\x85' or '\xA0' <= ch <= '\uD7FF'
+                        or '\uE000' <= ch <= '\uFFFD'
+                        or '\U00010000' <= ch < '\U0010ffff') and ch != '\uFEFF':
                     unicode_characters = True
                     if not self.allow_unicode:
                         special_characters = True
@@ -704,7 +707,7 @@ def analyze_scalar(self, scalar):
                     special_characters = True
 
             # Detect important whitespace combinations.
-            if ch == u' ':
+            if ch == ' ':
                 if index == 0:
                     leading_space = True
                 if index == len(scalar)-1:
@@ -713,7 +716,7 @@ def analyze_scalar(self, scalar):
                     break_space = True
                 previous_space = True
                 previous_break = False
-            elif ch in u'\n\x85\u2028\u2029':
+            elif ch in '\n\x85\u2028\u2029':
                 if index == 0:
                     leading_break = True
                 if index == len(scalar)-1:
@@ -728,9 +731,9 @@ def analyze_scalar(self, scalar):
 
             # Prepare for the next character.
             index += 1
-            preceded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029')
+            preceded_by_whitespace = (ch in '\0 \t\r\n\x85\u2028\u2029')
             followed_by_whitespace = (index+1 >= len(scalar) or
-                    scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
+                    scalar[index+1] in '\0 \t\r\n\x85\u2028\u2029')
 
         # Let's decide what styles are allowed.
         allow_flow_plain = True
@@ -789,7 +792,7 @@ def flush_stream(self):
     def write_stream_start(self):
         # Write BOM if needed.
         if self.encoding and self.encoding.startswith('utf-16'):
-            self.stream.write(u'\uFEFF'.encode(self.encoding))
+            self.stream.write('\uFEFF'.encode(self.encoding))
 
     def write_stream_end(self):
         self.flush_stream()
@@ -799,7 +802,7 @@ def write_indicator(self, indicator, need_whitespace,
         if self.whitespace or not need_whitespace:
             data = indicator
         else:
-            data = u' '+indicator
+            data = ' '+indicator
         self.whitespace = whitespace
         self.indention = self.indention and indention
         self.column += len(data)
@@ -815,7 +818,7 @@ def write_indent(self):
             self.write_line_break()
         if self.column < indent:
             self.whitespace = True
-            data = u' '*(indent-self.column)
+            data = ' '*(indent-self.column)
             self.column = indent
             if self.encoding:
                 data = data.encode(self.encoding)
@@ -833,14 +836,14 @@ def write_line_break(self, data=None):
         self.stream.write(data)
 
     def write_version_directive(self, version_text):
-        data = u'%%YAML %s' % version_text
+        data = '%%YAML %s' % version_text
         if self.encoding:
             data = data.encode(self.encoding)
         self.stream.write(data)
         self.write_line_break()
 
     def write_tag_directive(self, handle_text, prefix_text):
-        data = u'%%TAG %s %s' % (handle_text, prefix_text)
+        data = '%%TAG %s %s' % (handle_text, prefix_text)
         if self.encoding:
             data = data.encode(self.encoding)
         self.stream.write(data)
@@ -849,7 +852,7 @@ def write_tag_directive(self, handle_text, prefix_text):
     # Scalar streams.
 
     def write_single_quoted(self, text, split=True):
-        self.write_indicator(u'\'', True)
+        self.write_indicator('\'', True)
         spaces = False
         breaks = False
         start = end = 0
@@ -858,7 +861,7 @@ def write_single_quoted(self, text, split=True):
             if end < len(text):
                 ch = text[end]
             if spaces:
-                if ch is None or ch != u' ':
+                if ch is None or ch != ' ':
                     if start+1 == end and self.column > self.best_width and split   \
                             and start != 0 and end != len(text):
                         self.write_indent()
@@ -870,18 +873,18 @@ def write_single_quoted(self, text, split=True):
                         self.stream.write(data)
                     start = end
             elif breaks:
-                if ch is None or ch not in u'\n\x85\u2028\u2029':
-                    if text[start] == u'\n':
+                if ch is None or ch not in '\n\x85\u2028\u2029':
+                    if text[start] == '\n':
                         self.write_line_break()
                     for br in text[start:end]:
-                        if br == u'\n':
+                        if br == '\n':
                             self.write_line_break()
                         else:
                             self.write_line_break(br)
                     self.write_indent()
                     start = end
             else:
-                if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
+                if ch is None or ch in ' \n\x85\u2028\u2029' or ch == '\'':
                     if start < end:
                         data = text[start:end]
                         self.column += len(data)
@@ -889,49 +892,49 @@ def write_single_quoted(self, text, split=True):
                             data = data.encode(self.encoding)
                         self.stream.write(data)
                         start = end
-            if ch == u'\'':
-                data = u'\'\''
+            if ch == '\'':
+                data = '\'\''
                 self.column += 2
                 if self.encoding:
                     data = data.encode(self.encoding)
                 self.stream.write(data)
                 start = end + 1
             if ch is not None:
-                spaces = (ch == u' ')
-                breaks = (ch in u'\n\x85\u2028\u2029')
+                spaces = (ch == ' ')
+                breaks = (ch in '\n\x85\u2028\u2029')
             end += 1
-        self.write_indicator(u'\'', False)
+        self.write_indicator('\'', False)
 
     ESCAPE_REPLACEMENTS = {
-        u'\0':      u'0',
-        u'\x07':    u'a',
-        u'\x08':    u'b',
-        u'\x09':    u't',
-        u'\x0A':    u'n',
-        u'\x0B':    u'v',
-        u'\x0C':    u'f',
-        u'\x0D':    u'r',
-        u'\x1B':    u'e',
-        u'\"':      u'\"',
-        u'\\':      u'\\',
-        u'\x85':    u'N',
-        u'\xA0':    u'_',
-        u'\u2028':  u'L',
-        u'\u2029':  u'P',
+        '\0':       '0',
+        '\x07':     'a',
+        '\x08':     'b',
+        '\x09':     't',
+        '\x0A':     'n',
+        '\x0B':     'v',
+        '\x0C':     'f',
+        '\x0D':     'r',
+        '\x1B':     'e',
+        '\"':       '\"',
+        '\\':       '\\',
+        '\x85':     'N',
+        '\xA0':     '_',
+        '\u2028':   'L',
+        '\u2029':   'P',
     }
 
     def write_double_quoted(self, text, split=True):
-        self.write_indicator(u'"', True)
+        self.write_indicator('"', True)
         start = end = 0
         while end <= len(text):
             ch = None
             if end < len(text):
                 ch = text[end]
-            if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
-                    or not (u'\x20' <= ch <= u'\x7E'
+            if ch is None or ch in '"\\\x85\u2028\u2029\uFEFF' \
+                    or not ('\x20' <= ch <= '\x7E'
                         or (self.allow_unicode
-                            and (u'\xA0' <= ch <= u'\uD7FF'
-                                or u'\uE000' <= ch <= u'\uFFFD'))):
+                            and ('\xA0' <= ch <= '\uD7FF'
+                                or '\uE000' <= ch <= '\uFFFD'))):
                 if start < end:
                     data = text[start:end]
                     self.column += len(data)
@@ -941,21 +944,21 @@ def write_double_quoted(self, text, split=True):
                     start = end
                 if ch is not None:
                     if ch in self.ESCAPE_REPLACEMENTS:
-                        data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
-                    elif ch <= u'\xFF':
-                        data = u'\\x%02X' % ord(ch)
-                    elif ch <= u'\uFFFF':
-                        data = u'\\u%04X' % ord(ch)
+                        data = '\\'+self.ESCAPE_REPLACEMENTS[ch]
+                    elif ch <= '\xFF':
+                        data = '\\x%02X' % ord(ch)
+                    elif ch <= '\uFFFF':
+                        data = '\\u%04X' % ord(ch)
                     else:
-                        data = u'\\U%08X' % ord(ch)
+                        data = '\\U%08X' % ord(ch)
                     self.column += len(data)
                     if self.encoding:
                         data = data.encode(self.encoding)
                     self.stream.write(data)
                     start = end+1
-            if 0 < end < len(text)-1 and (ch == u' ' or start >= end)   \
+            if 0 < end < len(text)-1 and (ch == ' ' or start >= end)    \
                     and self.column+(end-start) > self.best_width and split:
-                data = text[start:end]+u'\\'
+                data = text[start:end]+'\\'
                 if start < end:
                     start = end
                 self.column += len(data)
@@ -965,30 +968,30 @@ def write_double_quoted(self, text, split=True):
                 self.write_indent()
                 self.whitespace = False
                 self.indention = False
-                if text[start] == u' ':
-                    data = u'\\'
+                if text[start] == ' ':
+                    data = '\\'
                     self.column += len(data)
                     if self.encoding:
                         data = data.encode(self.encoding)
                     self.stream.write(data)
             end += 1
-        self.write_indicator(u'"', False)
+        self.write_indicator('"', False)
 
     def determine_block_hints(self, text):
-        hints = u''
+        hints = ''
         if text:
-            if text[0] in u' \n\x85\u2028\u2029':
-                hints += unicode(self.best_indent)
-            if text[-1] not in u'\n\x85\u2028\u2029':
-                hints += u'-'
-            elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029':
-                hints += u'+'
+            if text[0] in ' \n\x85\u2028\u2029':
+                hints += str(self.best_indent)
+            if text[-1] not in '\n\x85\u2028\u2029':
+                hints += '-'
+            elif len(text) == 1 or text[-2] in '\n\x85\u2028\u2029':
+                hints += '+'
         return hints
 
     def write_folded(self, text):
         hints = self.determine_block_hints(text)
-        self.write_indicator(u'>'+hints, True)
-        if hints[-1:] == u'+':
+        self.write_indicator('>'+hints, True)
+        if hints[-1:] == '+':
             self.open_ended = True
         self.write_line_break()
         leading_space = True
@@ -1000,13 +1003,13 @@ def write_folded(self, text):
             if end < len(text):
                 ch = text[end]
             if breaks:
-                if ch is None or ch not in u'\n\x85\u2028\u2029':
-                    if not leading_space and ch is not None and ch != u' '  \
-                            and text[start] == u'\n':
+                if ch is None or ch not in '\n\x85\u2028\u2029':
+                    if not leading_space and ch is not None and ch != ' '   \
+                            and text[start] == '\n':
                         self.write_line_break()
-                    leading_space = (ch == u' ')
+                    leading_space = (ch == ' ')
                     for br in text[start:end]:
-                        if br == u'\n':
+                        if br == '\n':
                             self.write_line_break()
                         else:
                             self.write_line_break(br)
@@ -1014,7 +1017,7 @@ def write_folded(self, text):
                         self.write_indent()
                     start = end
             elif spaces:
-                if ch != u' ':
+                if ch != ' ':
                     if start+1 == end and self.column > self.best_width:
                         self.write_indent()
                     else:
@@ -1025,7 +1028,7 @@ def write_folded(self, text):
                         self.stream.write(data)
                     start = end
             else:
-                if ch is None or ch in u' \n\x85\u2028\u2029':
+                if ch is None or ch in ' \n\x85\u2028\u2029':
                     data = text[start:end]
                     self.column += len(data)
                     if self.encoding:
@@ -1035,14 +1038,14 @@ def write_folded(self, text):
                         self.write_line_break()
                     start = end
             if ch is not None:
-                breaks = (ch in u'\n\x85\u2028\u2029')
-                spaces = (ch == u' ')
+                breaks = (ch in '\n\x85\u2028\u2029')
+                spaces = (ch == ' ')
             end += 1
 
     def write_literal(self, text):
         hints = self.determine_block_hints(text)
-        self.write_indicator(u'|'+hints, True)
-        if hints[-1:] == u'+':
+        self.write_indicator('|'+hints, True)
+        if hints[-1:] == '+':
             self.open_ended = True
         self.write_line_break()
         breaks = True
@@ -1052,9 +1055,9 @@ def write_literal(self, text):
             if end < len(text):
                 ch = text[end]
             if breaks:
-                if ch is None or ch not in u'\n\x85\u2028\u2029':
+                if ch is None or ch not in '\n\x85\u2028\u2029':
                     for br in text[start:end]:
-                        if br == u'\n':
+                        if br == '\n':
                             self.write_line_break()
                         else:
                             self.write_line_break(br)
@@ -1062,7 +1065,7 @@ def write_literal(self, text):
                         self.write_indent()
                     start = end
             else:
-                if ch is None or ch in u'\n\x85\u2028\u2029':
+                if ch is None or ch in '\n\x85\u2028\u2029':
                     data = text[start:end]
                     if self.encoding:
                         data = data.encode(self.encoding)
@@ -1071,7 +1074,7 @@ def write_literal(self, text):
                         self.write_line_break()
                     start = end
             if ch is not None:
-                breaks = (ch in u'\n\x85\u2028\u2029')
+                breaks = (ch in '\n\x85\u2028\u2029')
             end += 1
 
     def write_plain(self, text, split=True):
@@ -1080,7 +1083,7 @@ def write_plain(self, text, split=True):
         if not text:
             return
         if not self.whitespace:
-            data = u' '
+            data = ' '
             self.column += len(data)
             if self.encoding:
                 data = data.encode(self.encoding)
@@ -1095,7 +1098,7 @@ def write_plain(self, text, split=True):
             if end < len(text):
                 ch = text[end]
             if spaces:
-                if ch != u' ':
+                if ch != ' ':
                     if start+1 == end and self.column > self.best_width and split:
                         self.write_indent()
                         self.whitespace = False
@@ -1108,11 +1111,11 @@ def write_plain(self, text, split=True):
                         self.stream.write(data)
                     start = end
             elif breaks:
-                if ch not in u'\n\x85\u2028\u2029':
-                    if text[start] == u'\n':
+                if ch not in '\n\x85\u2028\u2029':
+                    if text[start] == '\n':
                         self.write_line_break()
                     for br in text[start:end]:
-                        if br == u'\n':
+                        if br == '\n':
                             self.write_line_break()
                         else:
                             self.write_line_break(br)
@@ -1121,7 +1124,7 @@ def write_plain(self, text, split=True):
                     self.indention = False
                     start = end
             else:
-                if ch is None or ch in u' \n\x85\u2028\u2029':
+                if ch is None or ch in ' \n\x85\u2028\u2029':
                     data = text[start:end]
                     self.column += len(data)
                     if self.encoding:
@@ -1129,7 +1132,6 @@ def write_plain(self, text, split=True):
                     self.stream.write(data)
                     start = end
             if ch is not None:
-                spaces = (ch == u' ')
-                breaks = (ch in u'\n\x85\u2028\u2029')
+                spaces = (ch == ' ')
+                breaks = (ch in '\n\x85\u2028\u2029')
             end += 1
-
diff --git a/framework/pym/yaml/error.py b/framework/pym/yaml/error.py
index 577686db5f..b796b4dc51 100644
--- a/framework/pym/yaml/error.py
+++ b/framework/pym/yaml/error.py
@@ -1,7 +1,7 @@
 
 __all__ = ['Mark', 'YAMLError', 'MarkedYAMLError']
 
-class Mark(object):
+class Mark:
 
     def __init__(self, name, index, line, column, buffer, pointer):
         self.name = name
@@ -16,7 +16,7 @@ def get_snippet(self, indent=4, max_length=75):
             return None
         head = ''
         start = self.pointer
-        while start > 0 and self.buffer[start-1] not in u'\0\r\n\x85\u2028\u2029':
+        while start > 0 and self.buffer[start-1] not in '\0\r\n\x85\u2028\u2029':
             start -= 1
             if self.pointer-start > max_length/2-1:
                 head = ' ... '
@@ -24,13 +24,13 @@ def get_snippet(self, indent=4, max_length=75):
                 break
         tail = ''
         end = self.pointer
-        while end < len(self.buffer) and self.buffer[end] not in u'\0\r\n\x85\u2028\u2029':
+        while end < len(self.buffer) and self.buffer[end] not in '\0\r\n\x85\u2028\u2029':
             end += 1
             if end-self.pointer > max_length/2-1:
                 tail = ' ... '
                 end -= 5
                 break
-        snippet = self.buffer[start:end].encode('utf-8')
+        snippet = self.buffer[start:end]
         return ' '*indent + head + snippet + tail + '\n'  \
                 + ' '*(indent+self.pointer-start+len(head)) + '^'
 
diff --git a/framework/pym/yaml/loader.py b/framework/pym/yaml/loader.py
index 293ff467b1..e90c11224c 100644
--- a/framework/pym/yaml/loader.py
+++ b/framework/pym/yaml/loader.py
@@ -1,12 +1,12 @@
 
-__all__ = ['BaseLoader', 'SafeLoader', 'Loader']
+__all__ = ['BaseLoader', 'FullLoader', 'SafeLoader', 'Loader', 'UnsafeLoader']
 
-from reader import *
-from scanner import *
-from parser import *
-from composer import *
-from constructor import *
-from resolver import *
+from .reader import *
+from .scanner import *
+from .parser import *
+from .composer import *
+from .constructor import *
+from .resolver import *
 
 class BaseLoader(Reader, Scanner, Parser, Composer, BaseConstructor, BaseResolver):
 
@@ -18,6 +18,16 @@ def __init__(self, stream):
         BaseConstructor.__init__(self)
         BaseResolver.__init__(self)
 
+class FullLoader(Reader, Scanner, Parser, Composer, FullConstructor, Resolver):
+
+    def __init__(self, stream):
+        Reader.__init__(self, stream)
+        Scanner.__init__(self)
+        Parser.__init__(self)
+        Composer.__init__(self)
+        FullConstructor.__init__(self)
+        Resolver.__init__(self)
+
 class SafeLoader(Reader, Scanner, Parser, Composer, SafeConstructor, Resolver):
 
     def __init__(self, stream):
@@ -38,3 +48,16 @@ def __init__(self, stream):
         Constructor.__init__(self)
         Resolver.__init__(self)
 
+# UnsafeLoader is the same as Loader (which is and was always unsafe on
+# untrusted input). Use of either Loader or UnsafeLoader should be rare, since
+# FullLoad should be able to load almost all YAML safely. Loader is left intact
+# to ensure backwards compatibility.
+class UnsafeLoader(Reader, Scanner, Parser, Composer, Constructor, Resolver):
+
+    def __init__(self, stream):
+        Reader.__init__(self, stream)
+        Scanner.__init__(self)
+        Parser.__init__(self)
+        Composer.__init__(self)
+        Constructor.__init__(self)
+        Resolver.__init__(self)
diff --git a/framework/pym/yaml/parser.py b/framework/pym/yaml/parser.py
index b6a7416257..13a5995d29 100644
--- a/framework/pym/yaml/parser.py
+++ b/framework/pym/yaml/parser.py
@@ -61,21 +61,21 @@
 
 __all__ = ['Parser', 'ParserError']
 
-from error import MarkedYAMLError
-from tokens import *
-from events import *
-from scanner import *
+from .error import MarkedYAMLError
+from .tokens import *
+from .events import *
+from .scanner import *
 
 class ParserError(MarkedYAMLError):
     pass
 
-class Parser(object):
+class Parser:
     # Since writing a recursive-descendant parser is a straightforward task, we
     # do not give many comments here.
 
     DEFAULT_TAGS = {
-        u'!':   u'!',
-        u'!!':  u'tag:yaml.org,2002:',
+        '!':   '!',
+        '!!':  'tag:yaml.org,2002:',
     }
 
     def __init__(self):
@@ -86,6 +86,11 @@ def __init__(self):
         self.marks = []
         self.state = self.parse_stream_start
 
+    def dispose(self):
+        # Reset the state attributes (to clear self-references)
+        self.states = []
+        self.state = None
+
     def check_event(self, *choices):
         # Check the type of the next event.
         if self.current_event is None:
@@ -214,7 +219,7 @@ def process_directives(self):
         self.tag_handles = {}
         while self.check_token(DirectiveToken):
             token = self.get_token()
-            if token.name == u'YAML':
+            if token.name == 'YAML':
                 if self.yaml_version is not None:
                     raise ParserError(None, None,
                             "found duplicate YAML directive", token.start_mark)
@@ -224,11 +229,11 @@ def process_directives(self):
                             "found incompatible YAML document (version 1.* is required)",
                             token.start_mark)
                 self.yaml_version = token.value
-            elif token.name == u'TAG':
+            elif token.name == 'TAG':
                 handle, prefix = token.value
                 if handle in self.tag_handles:
                     raise ParserError(None, None,
-                            "duplicate tag handle %r" % handle.encode('utf-8'),
+                            "duplicate tag handle %r" % handle,
                             token.start_mark)
                 self.tag_handles[handle] = prefix
         if self.tag_handles:
@@ -298,19 +303,19 @@ def parse_node(self, block=False, indentless_sequence=False):
                 if handle is not None:
                     if handle not in self.tag_handles:
                         raise ParserError("while parsing a node", start_mark,
-                                "found undefined tag handle %r" % handle.encode('utf-8'),
+                                "found undefined tag handle %r" % handle,
                                 tag_mark)
                     tag = self.tag_handles[handle]+suffix
                 else:
                     tag = suffix
-            #if tag == u'!':
+            #if tag == '!':
             #    raise ParserError("while parsing a node", start_mark,
             #            "found non-specific tag '!'", tag_mark,
             #            "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
             if start_mark is None:
                 start_mark = end_mark = self.peek_token().start_mark
             event = None
-            implicit = (tag is None or tag == u'!')
+            implicit = (tag is None or tag == '!')
             if indentless_sequence and self.check_token(BlockEntryToken):
                 end_mark = self.peek_token().end_mark
                 event = SequenceStartEvent(anchor, tag, implicit,
@@ -320,7 +325,7 @@ def parse_node(self, block=False, indentless_sequence=False):
                 if self.check_token(ScalarToken):
                     token = self.get_token()
                     end_mark = token.end_mark
-                    if (token.plain and tag is None) or tag == u'!':
+                    if (token.plain and tag is None) or tag == '!':
                         implicit = (True, False)
                     elif tag is None:
                         implicit = (False, True)
@@ -352,7 +357,7 @@ def parse_node(self, block=False, indentless_sequence=False):
                 elif anchor is not None or tag is not None:
                     # Empty scalars are allowed even if a tag or an anchor is
                     # specified.
-                    event = ScalarEvent(anchor, tag, (implicit, False), u'',
+                    event = ScalarEvent(anchor, tag, (implicit, False), '',
                             start_mark, end_mark)
                     self.state = self.states.pop()
                 else:
@@ -580,5 +585,5 @@ def parse_flow_mapping_empty_value(self):
         return self.process_empty_scalar(self.peek_token().start_mark)
 
     def process_empty_scalar(self, mark):
-        return ScalarEvent(None, None, (True, False), u'', mark, mark)
+        return ScalarEvent(None, None, (True, False), '', mark, mark)
 
diff --git a/framework/pym/yaml/reader.py b/framework/pym/yaml/reader.py
index 1e7a4db6ef..774b0219b5 100644
--- a/framework/pym/yaml/reader.py
+++ b/framework/pym/yaml/reader.py
@@ -17,45 +17,10 @@
 
 __all__ = ['Reader', 'ReaderError']
 
-from error import YAMLError, Mark
+from .error import YAMLError, Mark
 
 import codecs, re
 
-# Unfortunately, codec functions in Python 2.3 does not support the `finish`
-# arguments, so we have to write our own wrappers.
-
-try:
-    codecs.utf_8_decode('', 'strict', False)
-    from codecs import utf_8_decode, utf_16_le_decode, utf_16_be_decode
-
-except TypeError:
-
-    def utf_16_le_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_le_decode(data, errors)
-
-    def utf_16_be_decode(data, errors, finish=False):
-        if not finish and len(data) % 2 == 1:
-            data = data[:-1]
-        return codecs.utf_16_be_decode(data, errors)
-
-    def utf_8_decode(data, errors, finish=False):
-        if not finish:
-            # We are trying to remove a possible incomplete multibyte character
-            # from the suffix of the data.
-            # The first byte of a multi-byte sequence is in the range 0xc0 to 0xfd.
-            # All further bytes are in the range 0x80 to 0xbf.
-            # UTF-8 encoded UCS characters may be up to six bytes long.
-            count = 0
-            while count < 5 and count < len(data)   \
-                    and '\x80' <= data[-count-1] <= '\xBF':
-                count -= 1
-            if count < 5 and count < len(data)  \
-                    and '\xC0' <= data[-count-1] <= '\xFD':
-                data = data[:-count-1]
-        return codecs.utf_8_decode(data, errors)
-
 class ReaderError(YAMLError):
 
     def __init__(self, name, position, character, encoding, reason):
@@ -66,7 +31,7 @@ def __init__(self, name, position, character, encoding, reason):
         self.reason = reason
 
     def __str__(self):
-        if isinstance(self.character, str):
+        if isinstance(self.character, bytes):
             return "'%s' codec can't decode byte #x%02x: %s\n"  \
                     "  in \"%s\", position %d"    \
                     % (self.encoding, ord(self.character), self.reason,
@@ -79,13 +44,13 @@ def __str__(self):
 
 class Reader(object):
     # Reader:
-    # - determines the data encoding and converts it to unicode,
+    # - determines the data encoding and converts it to a unicode string,
     # - checks if characters are in allowed range,
     # - adds '\0' to the end.
 
     # Reader accepts
+    #  - a `bytes` object,
     #  - a `str` object,
-    #  - a `unicode` object,
     #  - a file-like object with its `read` method returning `str`,
     #  - a file-like object with its `read` method returning `unicode`.
 
@@ -96,7 +61,7 @@ def __init__(self, stream):
         self.stream = None
         self.stream_pointer = 0
         self.eof = True
-        self.buffer = u''
+        self.buffer = ''
         self.pointer = 0
         self.raw_buffer = None
         self.raw_decode = None
@@ -104,19 +69,19 @@ def __init__(self, stream):
         self.index = 0
         self.line = 0
         self.column = 0
-        if isinstance(stream, unicode):
+        if isinstance(stream, str):
             self.name = "<unicode string>"
             self.check_printable(stream)
-            self.buffer = stream+u'\0'
-        elif isinstance(stream, str):
-            self.name = "<string>"
+            self.buffer = stream+'\0'
+        elif isinstance(stream, bytes):
+            self.name = "<byte string>"
             self.raw_buffer = stream
             self.determine_encoding()
         else:
             self.stream = stream
             self.name = getattr(stream, 'name', "<file>")
             self.eof = False
-            self.raw_buffer = ''
+            self.raw_buffer = None
             self.determine_encoding()
 
     def peek(self, index=0):
@@ -138,11 +103,11 @@ def forward(self, length=1):
             ch = self.buffer[self.pointer]
             self.pointer += 1
             self.index += 1
-            if ch in u'\n\x85\u2028\u2029'  \
-                    or (ch == u'\r' and self.buffer[self.pointer] != u'\n'):
+            if ch in '\n\x85\u2028\u2029'  \
+                    or (ch == '\r' and self.buffer[self.pointer] != '\n'):
                 self.line += 1
                 self.column = 0
-            elif ch != u'\uFEFF':
+            elif ch != '\uFEFF':
                 self.column += 1
             length -= 1
 
@@ -155,21 +120,21 @@ def get_mark(self):
                     None, None)
 
     def determine_encoding(self):
-        while not self.eof and len(self.raw_buffer) < 2:
+        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
             self.update_raw()
-        if not isinstance(self.raw_buffer, unicode):
+        if isinstance(self.raw_buffer, bytes):
             if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
-                self.raw_decode = utf_16_le_decode
+                self.raw_decode = codecs.utf_16_le_decode
                 self.encoding = 'utf-16-le'
             elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
-                self.raw_decode = utf_16_be_decode
+                self.raw_decode = codecs.utf_16_be_decode
                 self.encoding = 'utf-16-be'
             else:
-                self.raw_decode = utf_8_decode
+                self.raw_decode = codecs.utf_8_decode
                 self.encoding = 'utf-8'
         self.update(1)
 
-    NON_PRINTABLE = re.compile(u'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]')
+    NON_PRINTABLE = re.compile('[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD\U00010000-\U0010ffff]')
     def check_printable(self, data):
         match = self.NON_PRINTABLE.search(data)
         if match:
@@ -190,8 +155,8 @@ def update(self, length):
                 try:
                     data, converted = self.raw_decode(self.raw_buffer,
                             'strict', self.eof)
-                except UnicodeDecodeError, exc:
-                    character = exc.object[exc.start]
+                except UnicodeDecodeError as exc:
+                    character = self.raw_buffer[exc.start]
                     if self.stream is not None:
                         position = self.stream_pointer-len(self.raw_buffer)+exc.start
                     else:
@@ -205,21 +170,16 @@ def update(self, length):
             self.buffer += data
             self.raw_buffer = self.raw_buffer[converted:]
             if self.eof:
-                self.buffer += u'\0'
+                self.buffer += '\0'
                 self.raw_buffer = None
                 break
 
-    def update_raw(self, size=1024):
+    def update_raw(self, size=4096):
         data = self.stream.read(size)
-        if data:
-            self.raw_buffer += data
-            self.stream_pointer += len(data)
+        if self.raw_buffer is None:
+            self.raw_buffer = data
         else:
+            self.raw_buffer += data
+        self.stream_pointer += len(data)
+        if not data:
             self.eof = True
-
-#try:
-#    import psyco
-#    psyco.bind(Reader)
-#except ImportError:
-#    pass
-
diff --git a/framework/pym/yaml/representer.py b/framework/pym/yaml/representer.py
index f5606ecfea..3b0b192ef3 100644
--- a/framework/pym/yaml/representer.py
+++ b/framework/pym/yaml/representer.py
@@ -2,28 +2,22 @@
 __all__ = ['BaseRepresenter', 'SafeRepresenter', 'Representer',
     'RepresenterError']
 
-from error import *
-from nodes import *
+from .error import *
+from .nodes import *
 
-import datetime
-
-try:
-    set
-except NameError:
-    from sets import Set as set
-
-import sys, copy_reg, types
+import datetime, copyreg, types, base64, collections
 
 class RepresenterError(YAMLError):
     pass
 
-class BaseRepresenter(object):
+class BaseRepresenter:
 
     yaml_representers = {}
     yaml_multi_representers = {}
 
-    def __init__(self, default_style=None, default_flow_style=None):
+    def __init__(self, default_style=None, default_flow_style=False, sort_keys=True):
         self.default_style = default_style
+        self.sort_keys = sort_keys
         self.default_flow_style = default_flow_style
         self.represented_objects = {}
         self.object_keeper = []
@@ -36,12 +30,6 @@ def represent(self, data):
         self.object_keeper = []
         self.alias_key = None
 
-    def get_classobj_bases(self, cls):
-        bases = [cls]
-        for base in cls.__bases__:
-            bases.extend(self.get_classobj_bases(base))
-        return bases
-
     def represent_data(self, data):
         if self.ignore_aliases(data):
             self.alias_key = None
@@ -56,8 +44,6 @@ def represent_data(self, data):
             #self.represented_objects[alias_key] = None
             self.object_keeper.append(data)
         data_types = type(data).__mro__
-        if type(data) is types.InstanceType:
-            data_types = self.get_classobj_bases(data.__class__)+list(data_types)
         if data_types[0] in self.yaml_representers:
             node = self.yaml_representers[data_types[0]](self, data)
         else:
@@ -71,22 +57,22 @@ def represent_data(self, data):
                 elif None in self.yaml_representers:
                     node = self.yaml_representers[None](self, data)
                 else:
-                    node = ScalarNode(None, unicode(data))
+                    node = ScalarNode(None, str(data))
         #if alias_key is not None:
         #    self.represented_objects[alias_key] = node
         return node
 
+    @classmethod
     def add_representer(cls, data_type, representer):
         if not 'yaml_representers' in cls.__dict__:
             cls.yaml_representers = cls.yaml_representers.copy()
         cls.yaml_representers[data_type] = representer
-    add_representer = classmethod(add_representer)
 
+    @classmethod
     def add_multi_representer(cls, data_type, representer):
         if not 'yaml_multi_representers' in cls.__dict__:
             cls.yaml_multi_representers = cls.yaml_multi_representers.copy()
         cls.yaml_multi_representers[data_type] = representer
-    add_multi_representer = classmethod(add_multi_representer)
 
     def represent_scalar(self, tag, value, style=None):
         if style is None:
@@ -121,8 +107,12 @@ def represent_mapping(self, tag, mapping, flow_style=None):
             self.represented_objects[self.alias_key] = node
         best_style = True
         if hasattr(mapping, 'items'):
-            mapping = mapping.items()
-            mapping.sort()
+            mapping = list(mapping.items())
+            if self.sort_keys:
+                try:
+                    mapping = sorted(mapping)
+                except TypeError:
+                    pass
         for item_key, item_value in mapping:
             node_key = self.represent_data(item_key)
             node_value = self.represent_data(item_value)
@@ -144,46 +134,35 @@ def ignore_aliases(self, data):
 class SafeRepresenter(BaseRepresenter):
 
     def ignore_aliases(self, data):
-        if data in [None, ()]:
+        if data is None:
             return True
-        if isinstance(data, (str, unicode, bool, int, float)):
+        if isinstance(data, tuple) and data == ():
+            return True
+        if isinstance(data, (str, bytes, bool, int, float)):
             return True
 
     def represent_none(self, data):
-        return self.represent_scalar(u'tag:yaml.org,2002:null',
-                u'null')
+        return self.represent_scalar('tag:yaml.org,2002:null', 'null')
 
     def represent_str(self, data):
-        tag = None
-        style = None
-        try:
-            data = unicode(data, 'ascii')
-            tag = u'tag:yaml.org,2002:str'
-        except UnicodeDecodeError:
-            try:
-                data = unicode(data, 'utf-8')
-                tag = u'tag:yaml.org,2002:str'
-            except UnicodeDecodeError:
-                data = data.encode('base64')
-                tag = u'tag:yaml.org,2002:binary'
-                style = '|'
-        return self.represent_scalar(tag, data, style=style)
-
-    def represent_unicode(self, data):
-        return self.represent_scalar(u'tag:yaml.org,2002:str', data)
+        return self.represent_scalar('tag:yaml.org,2002:str', data)
+
+    def represent_binary(self, data):
+        if hasattr(base64, 'encodebytes'):
+            data = base64.encodebytes(data).decode('ascii')
+        else:
+            data = base64.encodestring(data).decode('ascii')
+        return self.represent_scalar('tag:yaml.org,2002:binary', data, style='|')
 
     def represent_bool(self, data):
         if data:
-            value = u'true'
+            value = 'true'
         else:
-            value = u'false'
-        return self.represent_scalar(u'tag:yaml.org,2002:bool', value)
+            value = 'false'
+        return self.represent_scalar('tag:yaml.org,2002:bool', value)
 
     def represent_int(self, data):
-        return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
-
-    def represent_long(self, data):
-        return self.represent_scalar(u'tag:yaml.org,2002:int', unicode(data))
+        return self.represent_scalar('tag:yaml.org,2002:int', str(data))
 
     inf_value = 1e300
     while repr(inf_value) != repr(inf_value*inf_value):
@@ -191,13 +170,13 @@ def represent_long(self, data):
 
     def represent_float(self, data):
         if data != data or (data == 0.0 and data == 1.0):
-            value = u'.nan'
+            value = '.nan'
         elif data == self.inf_value:
-            value = u'.inf'
+            value = '.inf'
         elif data == -self.inf_value:
-            value = u'-.inf'
+            value = '-.inf'
         else:
-            value = unicode(repr(data)).lower()
+            value = repr(data).lower()
             # Note that in some cases `repr(data)` represents a float number
             # without the decimal parts.  For instance:
             #   >>> repr(1e17)
@@ -205,9 +184,9 @@ def represent_float(self, data):
             # Unfortunately, this is not a valid float representation according
             # to the definition of the `!!float` tag.  We fix this by adding
             # '.0' before the 'e' symbol.
-            if u'.' not in value and u'e' in value:
-                value = value.replace(u'e', u'.0e', 1)
-        return self.represent_scalar(u'tag:yaml.org,2002:float', value)
+            if '.' not in value and 'e' in value:
+                value = value.replace('e', '.0e', 1)
+        return self.represent_scalar('tag:yaml.org,2002:float', value)
 
     def represent_list(self, data):
         #pairs = (len(data) > 0 and isinstance(data, list))
@@ -217,7 +196,7 @@ def represent_list(self, data):
         #            pairs = False
         #            break
         #if not pairs:
-            return self.represent_sequence(u'tag:yaml.org,2002:seq', data)
+            return self.represent_sequence('tag:yaml.org,2002:seq', data)
         #value = []
         #for item_key, item_value in data:
         #    value.append(self.represent_mapping(u'tag:yaml.org,2002:map',
@@ -225,21 +204,21 @@ def represent_list(self, data):
         #return SequenceNode(u'tag:yaml.org,2002:pairs', value)
 
     def represent_dict(self, data):
-        return self.represent_mapping(u'tag:yaml.org,2002:map', data)
+        return self.represent_mapping('tag:yaml.org,2002:map', data)
 
     def represent_set(self, data):
         value = {}
         for key in data:
             value[key] = None
-        return self.represent_mapping(u'tag:yaml.org,2002:set', value)
+        return self.represent_mapping('tag:yaml.org,2002:set', value)
 
     def represent_date(self, data):
-        value = unicode(data.isoformat())
-        return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+        value = data.isoformat()
+        return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
 
     def represent_datetime(self, data):
-        value = unicode(data.isoformat(' '))
-        return self.represent_scalar(u'tag:yaml.org,2002:timestamp', value)
+        value = data.isoformat(' ')
+        return self.represent_scalar('tag:yaml.org,2002:timestamp', value)
 
     def represent_yaml_object(self, tag, data, cls, flow_style=None):
         if hasattr(data, '__getstate__'):
@@ -249,7 +228,7 @@ def represent_yaml_object(self, tag, data, cls, flow_style=None):
         return self.represent_mapping(tag, state, flow_style=flow_style)
 
     def represent_undefined(self, data):
-        raise RepresenterError("cannot represent an object: %s" % data)
+        raise RepresenterError("cannot represent an object", data)
 
 SafeRepresenter.add_representer(type(None),
         SafeRepresenter.represent_none)
@@ -257,8 +236,8 @@ def represent_undefined(self, data):
 SafeRepresenter.add_representer(str,
         SafeRepresenter.represent_str)
 
-SafeRepresenter.add_representer(unicode,
-        SafeRepresenter.represent_unicode)
+SafeRepresenter.add_representer(bytes,
+        SafeRepresenter.represent_binary)
 
 SafeRepresenter.add_representer(bool,
         SafeRepresenter.represent_bool)
@@ -266,9 +245,6 @@ def represent_undefined(self, data):
 SafeRepresenter.add_representer(int,
         SafeRepresenter.represent_int)
 
-SafeRepresenter.add_representer(long,
-        SafeRepresenter.represent_long)
-
 SafeRepresenter.add_representer(float,
         SafeRepresenter.represent_float)
 
@@ -295,99 +271,27 @@ def represent_undefined(self, data):
 
 class Representer(SafeRepresenter):
 
-    def represent_str(self, data):
-        tag = None
-        style = None
-        try:
-            data = unicode(data, 'ascii')
-            tag = u'tag:yaml.org,2002:str'
-        except UnicodeDecodeError:
-            try:
-                data = unicode(data, 'utf-8')
-                tag = u'tag:yaml.org,2002:python/str'
-            except UnicodeDecodeError:
-                data = data.encode('base64')
-                tag = u'tag:yaml.org,2002:binary'
-                style = '|'
-        return self.represent_scalar(tag, data, style=style)
-
-    def represent_unicode(self, data):
-        tag = None
-        try:
-            data.encode('ascii')
-            tag = u'tag:yaml.org,2002:python/unicode'
-        except UnicodeEncodeError:
-            tag = u'tag:yaml.org,2002:str'
-        return self.represent_scalar(tag, data)
-
-    def represent_long(self, data):
-        tag = u'tag:yaml.org,2002:int'
-        if int(data) is not data:
-            tag = u'tag:yaml.org,2002:python/long'
-        return self.represent_scalar(tag, unicode(data))
-
     def represent_complex(self, data):
         if data.imag == 0.0:
-            data = u'%r' % data.real
+            data = '%r' % data.real
         elif data.real == 0.0:
-            data = u'%rj' % data.imag
+            data = '%rj' % data.imag
         elif data.imag > 0:
-            data = u'%r+%rj' % (data.real, data.imag)
+            data = '%r+%rj' % (data.real, data.imag)
         else:
-            data = u'%r%rj' % (data.real, data.imag)
-        return self.represent_scalar(u'tag:yaml.org,2002:python/complex', data)
+            data = '%r%rj' % (data.real, data.imag)
+        return self.represent_scalar('tag:yaml.org,2002:python/complex', data)
 
     def represent_tuple(self, data):
-        return self.represent_sequence(u'tag:yaml.org,2002:python/tuple', data)
+        return self.represent_sequence('tag:yaml.org,2002:python/tuple', data)
 
     def represent_name(self, data):
-        name = u'%s.%s' % (data.__module__, data.__name__)
-        return self.represent_scalar(u'tag:yaml.org,2002:python/name:'+name, u'')
+        name = '%s.%s' % (data.__module__, data.__name__)
+        return self.represent_scalar('tag:yaml.org,2002:python/name:'+name, '')
 
     def represent_module(self, data):
         return self.represent_scalar(
-                u'tag:yaml.org,2002:python/module:'+data.__name__, u'')
-
-    def represent_instance(self, data):
-        # For instances of classic classes, we use __getinitargs__ and
-        # __getstate__ to serialize the data.
-
-        # If data.__getinitargs__ exists, the object must be reconstructed by
-        # calling cls(**args), where args is a tuple returned by
-        # __getinitargs__. Otherwise, the cls.__init__ method should never be
-        # called and the class instance is created by instantiating a trivial
-        # class and assigning to the instance's __class__ variable.
-
-        # If data.__getstate__ exists, it returns the state of the object.
-        # Otherwise, the state of the object is data.__dict__.
-
-        # We produce either a !!python/object or !!python/object/new node.
-        # If data.__getinitargs__ does not exist and state is a dictionary, we
-        # produce a !!python/object node . Otherwise we produce a
-        # !!python/object/new node.
-
-        cls = data.__class__
-        class_name = u'%s.%s' % (cls.__module__, cls.__name__)
-        args = None
-        state = None
-        if hasattr(data, '__getinitargs__'):
-            args = list(data.__getinitargs__())
-        if hasattr(data, '__getstate__'):
-            state = data.__getstate__()
-        else:
-            state = data.__dict__
-        if args is None and isinstance(state, dict):
-            return self.represent_mapping(
-                    u'tag:yaml.org,2002:python/object:'+class_name, state)
-        if isinstance(state, dict) and not state:
-            return self.represent_sequence(
-                    u'tag:yaml.org,2002:python/object/new:'+class_name, args)
-        value = {}
-        if args:
-            value['args'] = args
-        value['state'] = state
-        return self.represent_mapping(
-                u'tag:yaml.org,2002:python/object/new:'+class_name, value)
+                'tag:yaml.org,2002:python/module:'+data.__name__, '')
 
     def represent_object(self, data):
         # We use __reduce__ API to save the data. data.__reduce__ returns
@@ -407,14 +311,14 @@ def represent_object(self, data):
         # !!python/object/apply node.
 
         cls = type(data)
-        if cls in copy_reg.dispatch_table:
-            reduce = copy_reg.dispatch_table[cls](data)
+        if cls in copyreg.dispatch_table:
+            reduce = copyreg.dispatch_table[cls](data)
         elif hasattr(data, '__reduce_ex__'):
             reduce = data.__reduce_ex__(2)
         elif hasattr(data, '__reduce__'):
             reduce = data.__reduce__()
         else:
-            raise RepresenterError("cannot represent object: %r" % data)
+            raise RepresenterError("cannot represent an object", data)
         reduce = (list(reduce)+[None]*5)[:5]
         function, args, state, listitems, dictitems = reduce
         args = list(args)
@@ -427,16 +331,16 @@ def represent_object(self, data):
         if function.__name__ == '__newobj__':
             function = args[0]
             args = args[1:]
-            tag = u'tag:yaml.org,2002:python/object/new:'
+            tag = 'tag:yaml.org,2002:python/object/new:'
             newobj = True
         else:
-            tag = u'tag:yaml.org,2002:python/object/apply:'
+            tag = 'tag:yaml.org,2002:python/object/apply:'
             newobj = False
-        function_name = u'%s.%s' % (function.__module__, function.__name__)
+        function_name = '%s.%s' % (function.__module__, function.__name__)
         if not args and not listitems and not dictitems \
                 and isinstance(state, dict) and newobj:
             return self.represent_mapping(
-                    u'tag:yaml.org,2002:python/object:'+function_name, state)
+                    'tag:yaml.org,2002:python/object:'+function_name, state)
         if not listitems and not dictitems  \
                 and isinstance(state, dict) and not state:
             return self.represent_sequence(tag+function_name, args)
@@ -451,14 +355,13 @@ def represent_object(self, data):
             value['dictitems'] = dictitems
         return self.represent_mapping(tag+function_name, value)
 
-Representer.add_representer(str,
-        Representer.represent_str)
-
-Representer.add_representer(unicode,
-        Representer.represent_unicode)
-
-Representer.add_representer(long,
-        Representer.represent_long)
+    def represent_ordered_dict(self, data):
+        # Provide uniform representation across different Python versions.
+        data_type = type(data)
+        tag = 'tag:yaml.org,2002:python/object/apply:%s.%s' \
+                % (data_type.__module__, data_type.__name__)
+        items = [[key, value] for key, value in data.items()]
+        return self.represent_sequence(tag, [items])
 
 Representer.add_representer(complex,
         Representer.represent_complex)
@@ -469,8 +372,8 @@ def represent_object(self, data):
 Representer.add_representer(type,
         Representer.represent_name)
 
-Representer.add_representer(types.ClassType,
-        Representer.represent_name)
+Representer.add_representer(collections.OrderedDict,
+        Representer.represent_ordered_dict)
 
 Representer.add_representer(types.FunctionType,
         Representer.represent_name)
@@ -481,9 +384,6 @@ def represent_object(self, data):
 Representer.add_representer(types.ModuleType,
         Representer.represent_module)
 
-Representer.add_multi_representer(types.InstanceType,
-        Representer.represent_instance)
-
 Representer.add_multi_representer(object,
         Representer.represent_object)
 
diff --git a/framework/pym/yaml/resolver.py b/framework/pym/yaml/resolver.py
index 6b5ab87596..02b82e73ee 100644
--- a/framework/pym/yaml/resolver.py
+++ b/framework/pym/yaml/resolver.py
@@ -1,19 +1,19 @@
 
 __all__ = ['BaseResolver', 'Resolver']
 
-from error import *
-from nodes import *
+from .error import *
+from .nodes import *
 
 import re
 
 class ResolverError(YAMLError):
     pass
 
-class BaseResolver(object):
+class BaseResolver:
 
-    DEFAULT_SCALAR_TAG = u'tag:yaml.org,2002:str'
-    DEFAULT_SEQUENCE_TAG = u'tag:yaml.org,2002:seq'
-    DEFAULT_MAPPING_TAG = u'tag:yaml.org,2002:map'
+    DEFAULT_SCALAR_TAG = 'tag:yaml.org,2002:str'
+    DEFAULT_SEQUENCE_TAG = 'tag:yaml.org,2002:seq'
+    DEFAULT_MAPPING_TAG = 'tag:yaml.org,2002:map'
 
     yaml_implicit_resolvers = {}
     yaml_path_resolvers = {}
@@ -22,15 +22,19 @@ def __init__(self):
         self.resolver_exact_paths = []
         self.resolver_prefix_paths = []
 
+    @classmethod
     def add_implicit_resolver(cls, tag, regexp, first):
         if not 'yaml_implicit_resolvers' in cls.__dict__:
-            cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy()
+            implicit_resolvers = {}
+            for key in cls.yaml_implicit_resolvers:
+                implicit_resolvers[key] = cls.yaml_implicit_resolvers[key][:]
+            cls.yaml_implicit_resolvers = implicit_resolvers
         if first is None:
             first = [None]
         for ch in first:
             cls.yaml_implicit_resolvers.setdefault(ch, []).append((tag, regexp))
-    add_implicit_resolver = classmethod(add_implicit_resolver)
 
+    @classmethod
     def add_path_resolver(cls, tag, path, kind=None):
         # Note: `add_path_resolver` is experimental.  The API could be changed.
         # `new_path` is a pattern that is matched against the path from the
@@ -66,10 +70,10 @@ def add_path_resolver(cls, tag, path, kind=None):
             elif node_check is dict:
                 node_check = MappingNode
             elif node_check not in [ScalarNode, SequenceNode, MappingNode]  \
-                    and not isinstance(node_check, basestring)  \
+                    and not isinstance(node_check, str) \
                     and node_check is not None:
                 raise ResolverError("Invalid node checker: %s" % node_check)
-            if not isinstance(index_check, (basestring, int))   \
+            if not isinstance(index_check, (str, int))  \
                     and index_check is not None:
                 raise ResolverError("Invalid index checker: %s" % index_check)
             new_path.append((node_check, index_check))
@@ -83,7 +87,6 @@ def add_path_resolver(cls, tag, path, kind=None):
                 and kind is not None:
             raise ResolverError("Invalid node kind: %s" % kind)
         cls.yaml_path_resolvers[tuple(new_path), kind] = tag
-    add_path_resolver = classmethod(add_path_resolver)
 
     def descend_resolver(self, current_node, current_index):
         if not self.yaml_path_resolvers:
@@ -117,7 +120,7 @@ def ascend_resolver(self):
     def check_resolver_prefix(self, depth, path, kind,
             current_node, current_index):
         node_check, index_check = path[depth-1]
-        if isinstance(node_check, basestring):
+        if isinstance(node_check, str):
             if current_node.tag != node_check:
                 return
         elif node_check is not None:
@@ -128,7 +131,7 @@ def check_resolver_prefix(self, depth, path, kind,
         if (index_check is False or index_check is None)    \
                 and current_index is None:
             return
-        if isinstance(index_check, basestring):
+        if isinstance(index_check, str):
             if not (isinstance(current_index, ScalarNode)
                     and index_check == current_index.value):
                 return
@@ -139,8 +142,8 @@ def check_resolver_prefix(self, depth, path, kind,
 
     def resolve(self, kind, value, implicit):
         if kind is ScalarNode and implicit[0]:
-            if value == u'':
-                resolvers = self.yaml_implicit_resolvers.get(u'', [])
+            if value == '':
+                resolvers = self.yaml_implicit_resolvers.get('', [])
             else:
                 resolvers = self.yaml_implicit_resolvers.get(value[0], [])
             resolvers += self.yaml_implicit_resolvers.get(None, [])
@@ -165,60 +168,60 @@ class Resolver(BaseResolver):
     pass
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:bool',
-        re.compile(ur'''^(?:yes|Yes|YES|no|No|NO
+        'tag:yaml.org,2002:bool',
+        re.compile(r'''^(?:yes|Yes|YES|no|No|NO
                     |true|True|TRUE|false|False|FALSE
                     |on|On|ON|off|Off|OFF)$''', re.X),
-        list(u'yYnNtTfFoO'))
+        list('yYnNtTfFoO'))
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:float',
-        re.compile(ur'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
+        'tag:yaml.org,2002:float',
+        re.compile(r'''^(?:[-+]?(?:[0-9][0-9_]*)\.[0-9_]*(?:[eE][-+][0-9]+)?
                     |\.[0-9_]+(?:[eE][-+][0-9]+)?
                     |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\.[0-9_]*
                     |[-+]?\.(?:inf|Inf|INF)
                     |\.(?:nan|NaN|NAN))$''', re.X),
-        list(u'-+0123456789.'))
+        list('-+0123456789.'))
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:int',
-        re.compile(ur'''^(?:[-+]?0b[0-1_]+
+        'tag:yaml.org,2002:int',
+        re.compile(r'''^(?:[-+]?0b[0-1_]+
                     |[-+]?0[0-7_]+
                     |[-+]?(?:0|[1-9][0-9_]*)
                     |[-+]?0x[0-9a-fA-F_]+
                     |[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$''', re.X),
-        list(u'-+0123456789'))
+        list('-+0123456789'))
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:merge',
-        re.compile(ur'^(?:<<)$'),
-        [u'<'])
+        'tag:yaml.org,2002:merge',
+        re.compile(r'^(?:<<)$'),
+        ['<'])
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:null',
-        re.compile(ur'''^(?: ~
+        'tag:yaml.org,2002:null',
+        re.compile(r'''^(?: ~
                     |null|Null|NULL
                     | )$''', re.X),
-        [u'~', u'n', u'N', u''])
+        ['~', 'n', 'N', ''])
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:timestamp',
-        re.compile(ur'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
+        'tag:yaml.org,2002:timestamp',
+        re.compile(r'''^(?:[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]
                     |[0-9][0-9][0-9][0-9] -[0-9][0-9]? -[0-9][0-9]?
                      (?:[Tt]|[ \t]+)[0-9][0-9]?
                      :[0-9][0-9] :[0-9][0-9] (?:\.[0-9]*)?
                      (?:[ \t]*(?:Z|[-+][0-9][0-9]?(?::[0-9][0-9])?))?)$''', re.X),
-        list(u'0123456789'))
+        list('0123456789'))
 
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:value',
-        re.compile(ur'^(?:=)$'),
-        [u'='])
+        'tag:yaml.org,2002:value',
+        re.compile(r'^(?:=)$'),
+        ['='])
 
 # The following resolver is only for documentation purposes. It cannot work
 # because plain scalars cannot start with '!', '&', or '*'.
 Resolver.add_implicit_resolver(
-        u'tag:yaml.org,2002:yaml',
-        re.compile(ur'^(?:!|&|\*)$'),
-        list(u'!&*'))
+        'tag:yaml.org,2002:yaml',
+        re.compile(r'^(?:!|&|\*)$'),
+        list('!&*'))
 
diff --git a/framework/pym/yaml/scanner.py b/framework/pym/yaml/scanner.py
index 475dfec7f7..7437ede1c6 100644
--- a/framework/pym/yaml/scanner.py
+++ b/framework/pym/yaml/scanner.py
@@ -26,13 +26,13 @@
 
 __all__ = ['Scanner', 'ScannerError']
 
-from error import MarkedYAMLError
-from tokens import *
+from .error import MarkedYAMLError
+from .tokens import *
 
 class ScannerError(MarkedYAMLError):
     pass
 
-class SimpleKey(object):
+class SimpleKey:
     # See below simple keys treatment.
 
     def __init__(self, token_number, required, index, line, column, mark):
@@ -43,7 +43,7 @@ def __init__(self, token_number, required, index, line, column, mark):
         self.column = column
         self.mark = mark
 
-class Scanner(object):
+class Scanner:
 
     def __init__(self):
         """Initialize the scanner."""
@@ -124,10 +124,13 @@ def check_token(self, *choices):
 
     def peek_token(self):
         # Return the next token, but do not delete if from the queue.
+        # Return None if no more tokens.
         while self.need_more_tokens():
             self.fetch_more_tokens()
         if self.tokens:
             return self.tokens[0]
+        else:
+            return None
 
     def get_token(self):
         # Return the next token.
@@ -166,85 +169,85 @@ def fetch_more_tokens(self):
         ch = self.peek()
 
         # Is it the end of stream?
-        if ch == u'\0':
+        if ch == '\0':
             return self.fetch_stream_end()
 
         # Is it a directive?
-        if ch == u'%' and self.check_directive():
+        if ch == '%' and self.check_directive():
             return self.fetch_directive()
 
         # Is it the document start?
-        if ch == u'-' and self.check_document_start():
+        if ch == '-' and self.check_document_start():
             return self.fetch_document_start()
 
         # Is it the document end?
-        if ch == u'.' and self.check_document_end():
+        if ch == '.' and self.check_document_end():
             return self.fetch_document_end()
 
         # TODO: support for BOM within a stream.
-        #if ch == u'\uFEFF':
+        #if ch == '\uFEFF':
         #    return self.fetch_bom()    <-- issue BOMToken
 
         # Note: the order of the following checks is NOT significant.
 
         # Is it the flow sequence start indicator?
-        if ch == u'[':
+        if ch == '[':
             return self.fetch_flow_sequence_start()
 
         # Is it the flow mapping start indicator?
-        if ch == u'{':
+        if ch == '{':
             return self.fetch_flow_mapping_start()
 
         # Is it the flow sequence end indicator?
-        if ch == u']':
+        if ch == ']':
             return self.fetch_flow_sequence_end()
 
         # Is it the flow mapping end indicator?
-        if ch == u'}':
+        if ch == '}':
             return self.fetch_flow_mapping_end()
 
         # Is it the flow entry indicator?
-        if ch == u',':
+        if ch == ',':
             return self.fetch_flow_entry()
 
         # Is it the block entry indicator?
-        if ch == u'-' and self.check_block_entry():
+        if ch == '-' and self.check_block_entry():
             return self.fetch_block_entry()
 
         # Is it the key indicator?
-        if ch == u'?' and self.check_key():
+        if ch == '?' and self.check_key():
             return self.fetch_key()
 
         # Is it the value indicator?
-        if ch == u':' and self.check_value():
+        if ch == ':' and self.check_value():
             return self.fetch_value()
 
         # Is it an alias?
-        if ch == u'*':
+        if ch == '*':
             return self.fetch_alias()
 
         # Is it an anchor?
-        if ch == u'&':
+        if ch == '&':
             return self.fetch_anchor()
 
         # Is it a tag?
-        if ch == u'!':
+        if ch == '!':
             return self.fetch_tag()
 
         # Is it a literal scalar?
-        if ch == u'|' and not self.flow_level:
+        if ch == '|' and not self.flow_level:
             return self.fetch_literal()
 
         # Is it a folded scalar?
-        if ch == u'>' and not self.flow_level:
+        if ch == '>' and not self.flow_level:
             return self.fetch_folded()
 
         # Is it a single quoted scalar?
-        if ch == u'\'':
+        if ch == '\'':
             return self.fetch_single()
 
         # Is it a double quoted scalar?
-        if ch == u'\"':
+        if ch == '\"':
             return self.fetch_double()
 
         # It must be a plain scalar then.
@@ -253,8 +256,8 @@ def fetch_more_tokens(self):
 
         # No? It's an error. Let's produce a nice error message.
         raise ScannerError("while scanning for the next token", None,
-                "found character %r that cannot start any token"
-                % ch.encode('utf-8'), self.get_mark())
+                "found character %r that cannot start any token" % ch,
+                self.get_mark())
 
     # Simple keys treatment.
 
@@ -280,13 +283,13 @@ def stale_possible_simple_keys(self):
         # - should be no longer than 1024 characters.
         # Disabling this procedure will allow simple keys of any length and
         # height (may cause problems if indentation is broken though).
-        for level in self.possible_simple_keys.keys():
+        for level in list(self.possible_simple_keys):
             key = self.possible_simple_keys[level]
             if key.line != self.line  \
                     or self.index-key.index > 1024:
                 if key.required:
                     raise ScannerError("while scanning a simple key", key.mark,
-                            "could not found expected ':'", self.get_mark())
+                            "could not find expected ':'", self.get_mark())
                 del self.possible_simple_keys[level]
 
     def save_possible_simple_key(self):
@@ -297,10 +300,6 @@ def save_possible_simple_key(self):
         # Check if a simple key is required at the current position.
         required = not self.flow_level and self.indent == self.column
 
-        # A simple key is required only if it is the first token in the current
-        # line. Therefore it is always allowed.
-        assert self.allow_simple_key or not required
-
         # The next token might be a simple key. Let's save it's number and
         # position.
         if self.allow_simple_key:
@@ -317,7 +316,7 @@ def remove_possible_simple_key(self):
             
             if key.required:
                 raise ScannerError("while scanning a simple key", key.mark,
-                        "could not found expected ':'", self.get_mark())
+                        "could not find expected ':'", self.get_mark())
 
             del self.possible_simple_keys[self.flow_level]
 
@@ -333,7 +332,7 @@ def unwind_indent(self, column):
         ## }
         #if self.flow_level and self.indent > column:
         #    raise ScannerError(None, None,
-        #            "invalid intendation or unclosed '[' or '{'",
+        #            "invalid indentation or unclosed '[' or '{'",
         #            self.get_mark())
 
         # In the flow context, indentation is ignored. We make the scanner less
@@ -371,7 +370,7 @@ def fetch_stream_start(self):
 
     def fetch_stream_end(self):
 
-        # Set the current intendation to -1.
+        # Set the current indentation to -1.
         self.unwind_indent(-1)
 
         # Reset simple keys.
@@ -390,7 +389,7 @@ def fetch_stream_end(self):
 
     def fetch_directive(self):
         
-        # Set the current intendation to -1.
+        # Set the current indentation to -1.
         self.unwind_indent(-1)
 
         # Reset simple keys.
@@ -408,7 +407,7 @@ def fetch_document_end(self):
 
     def fetch_document_indicator(self, TokenClass):
 
-        # Set the current intendation to -1.
+        # Set the current indentation to -1.
         self.unwind_indent(-1)
 
         # Reset simple keys. Note that there could not be a block collection
@@ -520,7 +519,7 @@ def fetch_key(self):
         # Block context needs additional checks.
         if not self.flow_level:
 
-            # Are we allowed to start a key (not nessesary a simple)?
+            # Are we allowed to start a key (not necessary a simple)?
             if not self.allow_simple_key:
                 raise ScannerError(None, None,
                         "mapping keys are not allowed here",
@@ -692,22 +691,22 @@ def check_document_start(self):
 
         # DOCUMENT-START:   ^ '---' (' '|'\n')
         if self.column == 0:
-            if self.prefix(3) == u'---'  \
-                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+            if self.prefix(3) == '---'  \
+                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
                 return True
 
     def check_document_end(self):
 
         # DOCUMENT-END:     ^ '...' (' '|'\n')
         if self.column == 0:
-            if self.prefix(3) == u'...'  \
-                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+            if self.prefix(3) == '...'  \
+                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
                 return True
 
     def check_block_entry(self):
 
         # BLOCK-ENTRY:      '-' (' '|'\n')
-        return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+        return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
 
     def check_key(self):
 
@@ -717,7 +716,7 @@ def check_key(self):
 
         # KEY(block context):   '?' (' '|'\n')
         else:
-            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+            return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
 
     def check_value(self):
 
@@ -727,7 +726,7 @@ def check_value(self):
 
         # VALUE(block context): ':' (' '|'\n')
         else:
-            return self.peek(1) in u'\0 \t\r\n\x85\u2028\u2029'
+            return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
 
     def check_plain(self):
 
@@ -744,9 +743,9 @@ def check_plain(self):
         # '-' character) because we want the flow context to be space
         # independent.
         ch = self.peek()
-        return ch not in u'\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`'  \
-                or (self.peek(1) not in u'\0 \t\r\n\x85\u2028\u2029'
-                        and (ch == u'-' or (not self.flow_level and ch in u'?:')))
+        return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`'  \
+                or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'
+                        and (ch == '-' or (not self.flow_level and ch in '?:')))
 
     # Scanners.
 
@@ -770,14 +769,14 @@ def scan_to_next_token(self):
         # `unwind_indent` before issuing BLOCK-END.
         # Scanners for block, flow, and plain scalars need to be modified.
 
-        if self.index == 0 and self.peek() == u'\uFEFF':
+        if self.index == 0 and self.peek() == '\uFEFF':
             self.forward()
         found = False
         while not found:
-            while self.peek() == u' ':
+            while self.peek() == ' ':
                 self.forward()
-            if self.peek() == u'#':
-                while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+            if self.peek() == '#':
+                while self.peek() not in '\0\r\n\x85\u2028\u2029':
                     self.forward()
             if self.scan_line_break():
                 if not self.flow_level:
@@ -791,15 +790,15 @@ def scan_directive(self):
         self.forward()
         name = self.scan_directive_name(start_mark)
         value = None
-        if name == u'YAML':
+        if name == 'YAML':
             value = self.scan_yaml_directive_value(start_mark)
             end_mark = self.get_mark()
-        elif name == u'TAG':
+        elif name == 'TAG':
             value = self.scan_tag_directive_value(start_mark)
             end_mark = self.get_mark()
         else:
             end_mark = self.get_mark()
-            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+            while self.peek() not in '\0\r\n\x85\u2028\u2029':
                 self.forward()
         self.scan_directive_ignored_line(start_mark)
         return DirectiveToken(name, value, start_mark, end_mark)
@@ -808,51 +807,48 @@ def scan_directive_name(self, start_mark):
         # See the specification for details.
         length = 0
         ch = self.peek(length)
-        while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'    \
-                or ch in u'-_':
+        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
+                or ch in '-_':
             length += 1
             ch = self.peek(length)
         if not length:
             raise ScannerError("while scanning a directive", start_mark,
                     "expected alphabetic or numeric character, but found %r"
-                    % ch.encode('utf-8'), self.get_mark())
+                    % ch, self.get_mark())
         value = self.prefix(length)
         self.forward(length)
         ch = self.peek()
-        if ch not in u'\0 \r\n\x85\u2028\u2029':
+        if ch not in '\0 \r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a directive", start_mark,
                     "expected alphabetic or numeric character, but found %r"
-                    % ch.encode('utf-8'), self.get_mark())
+                    % ch, self.get_mark())
         return value
 
     def scan_yaml_directive_value(self, start_mark):
         # See the specification for details.
-        while self.peek() == u' ':
+        while self.peek() == ' ':
             self.forward()
         major = self.scan_yaml_directive_number(start_mark)
         if self.peek() != '.':
             raise ScannerError("while scanning a directive", start_mark,
-                    "expected a digit or '.', but found %r"
-                    % self.peek().encode('utf-8'),
+                    "expected a digit or '.', but found %r" % self.peek(),
                     self.get_mark())
         self.forward()
         minor = self.scan_yaml_directive_number(start_mark)
-        if self.peek() not in u'\0 \r\n\x85\u2028\u2029':
+        if self.peek() not in '\0 \r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a directive", start_mark,
-                    "expected a digit or ' ', but found %r"
-                    % self.peek().encode('utf-8'),
+                    "expected a digit or ' ', but found %r" % self.peek(),
                     self.get_mark())
         return (major, minor)
 
     def scan_yaml_directive_number(self, start_mark):
         # See the specification for details.
         ch = self.peek()
-        if not (u'0' <= ch <= u'9'):
+        if not ('0' <= ch <= '9'):
             raise ScannerError("while scanning a directive", start_mark,
-                    "expected a digit, but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
+                    "expected a digit, but found %r" % ch, self.get_mark())
         length = 0
-        while u'0' <= self.peek(length) <= u'9':
+        while '0' <= self.peek(length) <= '9':
             length += 1
         value = int(self.prefix(length))
         self.forward(length)
@@ -860,10 +856,10 @@ def scan_yaml_directive_number(self, start_mark):
 
     def scan_tag_directive_value(self, start_mark):
         # See the specification for details.
-        while self.peek() == u' ':
+        while self.peek() == ' ':
             self.forward()
         handle = self.scan_tag_directive_handle(start_mark)
-        while self.peek() == u' ':
+        while self.peek() == ' ':
             self.forward()
         prefix = self.scan_tag_directive_prefix(start_mark)
         return (handle, prefix)
@@ -872,69 +868,67 @@ def scan_tag_directive_handle(self, start_mark):
         # See the specification for details.
         value = self.scan_tag_handle('directive', start_mark)
         ch = self.peek()
-        if ch != u' ':
+        if ch != ' ':
             raise ScannerError("while scanning a directive", start_mark,
-                    "expected ' ', but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
+                    "expected ' ', but found %r" % ch, self.get_mark())
         return value
 
     def scan_tag_directive_prefix(self, start_mark):
         # See the specification for details.
         value = self.scan_tag_uri('directive', start_mark)
         ch = self.peek()
-        if ch not in u'\0 \r\n\x85\u2028\u2029':
+        if ch not in '\0 \r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a directive", start_mark,
-                    "expected ' ', but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
+                    "expected ' ', but found %r" % ch, self.get_mark())
         return value
 
     def scan_directive_ignored_line(self, start_mark):
         # See the specification for details.
-        while self.peek() == u' ':
+        while self.peek() == ' ':
             self.forward()
-        if self.peek() == u'#':
-            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+        if self.peek() == '#':
+            while self.peek() not in '\0\r\n\x85\u2028\u2029':
                 self.forward()
         ch = self.peek()
-        if ch not in u'\0\r\n\x85\u2028\u2029':
+        if ch not in '\0\r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a directive", start_mark,
                     "expected a comment or a line break, but found %r"
-                        % ch.encode('utf-8'), self.get_mark())
+                        % ch, self.get_mark())
         self.scan_line_break()
 
     def scan_anchor(self, TokenClass):
         # The specification does not restrict characters for anchors and
         # aliases. This may lead to problems, for instance, the document:
         #   [ *alias, value ]
-        # can be interpteted in two ways, as
+        # can be interpreted in two ways, as
         #   [ "value" ]
         # and
         #   [ *alias , "value" ]
         # Therefore we restrict aliases to numbers and ASCII letters.
         start_mark = self.get_mark()
         indicator = self.peek()
-        if indicator == u'*':
+        if indicator == '*':
             name = 'alias'
         else:
             name = 'anchor'
         self.forward()
         length = 0
         ch = self.peek(length)
-        while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'    \
-                or ch in u'-_':
+        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
+                or ch in '-_':
             length += 1
             ch = self.peek(length)
         if not length:
             raise ScannerError("while scanning an %s" % name, start_mark,
                     "expected alphabetic or numeric character, but found %r"
-                    % ch.encode('utf-8'), self.get_mark())
+                    % ch, self.get_mark())
         value = self.prefix(length)
         self.forward(length)
         ch = self.peek()
-        if ch not in u'\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
+        if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
             raise ScannerError("while scanning an %s" % name, start_mark,
                     "expected alphabetic or numeric character, but found %r"
-                    % ch.encode('utf-8'), self.get_mark())
+                    % ch, self.get_mark())
         end_mark = self.get_mark()
         return TokenClass(value, start_mark, end_mark)
 
@@ -942,40 +936,39 @@ def scan_tag(self):
         # See the specification for details.
         start_mark = self.get_mark()
         ch = self.peek(1)
-        if ch == u'<':
+        if ch == '<':
             handle = None
             self.forward(2)
             suffix = self.scan_tag_uri('tag', start_mark)
-            if self.peek() != u'>':
+            if self.peek() != '>':
                 raise ScannerError("while parsing a tag", start_mark,
-                        "expected '>', but found %r" % self.peek().encode('utf-8'),
+                        "expected '>', but found %r" % self.peek(),
                         self.get_mark())
             self.forward()
-        elif ch in u'\0 \t\r\n\x85\u2028\u2029':
+        elif ch in '\0 \t\r\n\x85\u2028\u2029':
             handle = None
-            suffix = u'!'
+            suffix = '!'
             self.forward()
         else:
             length = 1
             use_handle = False
-            while ch not in u'\0 \r\n\x85\u2028\u2029':
-                if ch == u'!':
+            while ch not in '\0 \r\n\x85\u2028\u2029':
+                if ch == '!':
                     use_handle = True
                     break
                 length += 1
                 ch = self.peek(length)
-            handle = u'!'
+            handle = '!'
             if use_handle:
                 handle = self.scan_tag_handle('tag', start_mark)
             else:
-                handle = u'!'
+                handle = '!'
                 self.forward()
             suffix = self.scan_tag_uri('tag', start_mark)
         ch = self.peek()
-        if ch not in u'\0 \r\n\x85\u2028\u2029':
+        if ch not in '\0 \r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a tag", start_mark,
-                    "expected ' ', but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
+                    "expected ' ', but found %r" % ch, self.get_mark())
         value = (handle, suffix)
         end_mark = self.get_mark()
         return TagToken(value, start_mark, end_mark)
@@ -1006,39 +999,39 @@ def scan_block_scalar(self, style):
         else:
             indent = min_indent+increment-1
             breaks, end_mark = self.scan_block_scalar_breaks(indent)
-        line_break = u''
+        line_break = ''
 
         # Scan the inner part of the block scalar.
-        while self.column == indent and self.peek() != u'\0':
+        while self.column == indent and self.peek() != '\0':
             chunks.extend(breaks)
-            leading_non_space = self.peek() not in u' \t'
+            leading_non_space = self.peek() not in ' \t'
             length = 0
-            while self.peek(length) not in u'\0\r\n\x85\u2028\u2029':
+            while self.peek(length) not in '\0\r\n\x85\u2028\u2029':
                 length += 1
             chunks.append(self.prefix(length))
             self.forward(length)
             line_break = self.scan_line_break()
             breaks, end_mark = self.scan_block_scalar_breaks(indent)
-            if self.column == indent and self.peek() != u'\0':
+            if self.column == indent and self.peek() != '\0':
 
                 # Unfortunately, folding rules are ambiguous.
                 #
                 # This is the folding according to the specification:
                 
-                if folded and line_break == u'\n'   \
-                        and leading_non_space and self.peek() not in u' \t':
+                if folded and line_break == '\n'    \
+                        and leading_non_space and self.peek() not in ' \t':
                     if not breaks:
-                        chunks.append(u' ')
+                        chunks.append(' ')
                 else:
                     chunks.append(line_break)
                 
                 # This is Clark Evans's interpretation (also in the spec
                 # examples):
                 #
-                #if folded and line_break == u'\n':
+                #if folded and line_break == '\n':
                 #    if not breaks:
                 #        if self.peek() not in ' \t':
-                #            chunks.append(u' ')
+                #            chunks.append(' ')
                 #        else:
                 #            chunks.append(line_break)
                 #else:
@@ -1053,7 +1046,7 @@ def scan_block_scalar(self, style):
             chunks.extend(breaks)
 
         # We are done.
-        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+        return ScalarToken(''.join(chunks), False, start_mark, end_mark,
                 style)
 
     def scan_block_scalar_indicators(self, start_mark):
@@ -1061,21 +1054,21 @@ def scan_block_scalar_indicators(self, start_mark):
         chomping = None
         increment = None
         ch = self.peek()
-        if ch in u'+-':
+        if ch in '+-':
             if ch == '+':
                 chomping = True
             else:
                 chomping = False
             self.forward()
             ch = self.peek()
-            if ch in u'0123456789':
+            if ch in '0123456789':
                 increment = int(ch)
                 if increment == 0:
                     raise ScannerError("while scanning a block scalar", start_mark,
                             "expected indentation indicator in the range 1-9, but found 0",
                             self.get_mark())
                 self.forward()
-        elif ch in u'0123456789':
+        elif ch in '0123456789':
             increment = int(ch)
             if increment == 0:
                 raise ScannerError("while scanning a block scalar", start_mark,
@@ -1083,31 +1076,31 @@ def scan_block_scalar_indicators(self, start_mark):
                         self.get_mark())
             self.forward()
             ch = self.peek()
-            if ch in u'+-':
+            if ch in '+-':
                 if ch == '+':
                     chomping = True
                 else:
                     chomping = False
                 self.forward()
         ch = self.peek()
-        if ch not in u'\0 \r\n\x85\u2028\u2029':
+        if ch not in '\0 \r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a block scalar", start_mark,
                     "expected chomping or indentation indicators, but found %r"
-                        % ch.encode('utf-8'), self.get_mark())
+                    % ch, self.get_mark())
         return chomping, increment
 
     def scan_block_scalar_ignored_line(self, start_mark):
         # See the specification for details.
-        while self.peek() == u' ':
+        while self.peek() == ' ':
             self.forward()
-        if self.peek() == u'#':
-            while self.peek() not in u'\0\r\n\x85\u2028\u2029':
+        if self.peek() == '#':
+            while self.peek() not in '\0\r\n\x85\u2028\u2029':
                 self.forward()
         ch = self.peek()
-        if ch not in u'\0\r\n\x85\u2028\u2029':
+        if ch not in '\0\r\n\x85\u2028\u2029':
             raise ScannerError("while scanning a block scalar", start_mark,
-                    "expected a comment or a line break, but found %r"
-                        % ch.encode('utf-8'), self.get_mark())
+                    "expected a comment or a line break, but found %r" % ch,
+                    self.get_mark())
         self.scan_line_break()
 
     def scan_block_scalar_indentation(self):
@@ -1115,8 +1108,8 @@ def scan_block_scalar_indentation(self):
         chunks = []
         max_indent = 0
         end_mark = self.get_mark()
-        while self.peek() in u' \r\n\x85\u2028\u2029':
-            if self.peek() != u' ':
+        while self.peek() in ' \r\n\x85\u2028\u2029':
+            if self.peek() != ' ':
                 chunks.append(self.scan_line_break())
                 end_mark = self.get_mark()
             else:
@@ -1129,12 +1122,12 @@ def scan_block_scalar_breaks(self, indent):
         # See the specification for details.
         chunks = []
         end_mark = self.get_mark()
-        while self.column < indent and self.peek() == u' ':
+        while self.column < indent and self.peek() == ' ':
             self.forward()
-        while self.peek() in u'\r\n\x85\u2028\u2029':
+        while self.peek() in '\r\n\x85\u2028\u2029':
             chunks.append(self.scan_line_break())
             end_mark = self.get_mark()
-            while self.column < indent and self.peek() == u' ':
+            while self.column < indent and self.peek() == ' ':
                 self.forward()
         return chunks, end_mark
 
@@ -1159,33 +1152,34 @@ def scan_flow_scalar(self, style):
             chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
         self.forward()
         end_mark = self.get_mark()
-        return ScalarToken(u''.join(chunks), False, start_mark, end_mark,
+        return ScalarToken(''.join(chunks), False, start_mark, end_mark,
                 style)
 
     ESCAPE_REPLACEMENTS = {
-        u'0':   u'\0',
-        u'a':   u'\x07',
-        u'b':   u'\x08',
-        u't':   u'\x09',
-        u'\t':  u'\x09',
-        u'n':   u'\x0A',
-        u'v':   u'\x0B',
-        u'f':   u'\x0C',
-        u'r':   u'\x0D',
-        u'e':   u'\x1B',
-        u' ':   u'\x20',
-        u'\"':  u'\"',
-        u'\\':  u'\\',
-        u'N':   u'\x85',
-        u'_':   u'\xA0',
-        u'L':   u'\u2028',
-        u'P':   u'\u2029',
+        '0':    '\0',
+        'a':    '\x07',
+        'b':    '\x08',
+        't':    '\x09',
+        '\t':   '\x09',
+        'n':    '\x0A',
+        'v':    '\x0B',
+        'f':    '\x0C',
+        'r':    '\x0D',
+        'e':    '\x1B',
+        ' ':    '\x20',
+        '\"':   '\"',
+        '\\':   '\\',
+        '/':    '/',
+        'N':    '\x85',
+        '_':    '\xA0',
+        'L':    '\u2028',
+        'P':    '\u2029',
     }
 
     ESCAPE_CODES = {
-        u'x':   2,
-        u'u':   4,
-        u'U':   8,
+        'x':    2,
+        'u':    4,
+        'U':    8,
     }
 
     def scan_flow_scalar_non_spaces(self, double, start_mark):
@@ -1193,19 +1187,19 @@ def scan_flow_scalar_non_spaces(self, double, start_mark):
         chunks = []
         while True:
             length = 0
-            while self.peek(length) not in u'\'\"\\\0 \t\r\n\x85\u2028\u2029':
+            while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':
                 length += 1
             if length:
                 chunks.append(self.prefix(length))
                 self.forward(length)
             ch = self.peek()
-            if not double and ch == u'\'' and self.peek(1) == u'\'':
-                chunks.append(u'\'')
+            if not double and ch == '\'' and self.peek(1) == '\'':
+                chunks.append('\'')
                 self.forward(2)
-            elif (double and ch == u'\'') or (not double and ch in u'\"\\'):
+            elif (double and ch == '\'') or (not double and ch in '\"\\'):
                 chunks.append(ch)
                 self.forward()
-            elif double and ch == u'\\':
+            elif double and ch == '\\':
                 self.forward()
                 ch = self.peek()
                 if ch in self.ESCAPE_REPLACEMENTS:
@@ -1215,19 +1209,19 @@ def scan_flow_scalar_non_spaces(self, double, start_mark):
                     length = self.ESCAPE_CODES[ch]
                     self.forward()
                     for k in range(length):
-                        if self.peek(k) not in u'0123456789ABCDEFabcdef':
+                        if self.peek(k) not in '0123456789ABCDEFabcdef':
                             raise ScannerError("while scanning a double-quoted scalar", start_mark,
                                     "expected escape sequence of %d hexdecimal numbers, but found %r" %
-                                        (length, self.peek(k).encode('utf-8')), self.get_mark())
+                                        (length, self.peek(k)), self.get_mark())
                     code = int(self.prefix(length), 16)
-                    chunks.append(unichr(code))
+                    chunks.append(chr(code))
                     self.forward(length)
-                elif ch in u'\r\n\x85\u2028\u2029':
+                elif ch in '\r\n\x85\u2028\u2029':
                     self.scan_line_break()
                     chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
                 else:
                     raise ScannerError("while scanning a double-quoted scalar", start_mark,
-                            "found unknown escape character %r" % ch.encode('utf-8'), self.get_mark())
+                            "found unknown escape character %r" % ch, self.get_mark())
             else:
                 return chunks
 
@@ -1235,21 +1229,21 @@ def scan_flow_scalar_spaces(self, double, start_mark):
         # See the specification for details.
         chunks = []
         length = 0
-        while self.peek(length) in u' \t':
+        while self.peek(length) in ' \t':
             length += 1
         whitespaces = self.prefix(length)
         self.forward(length)
         ch = self.peek()
-        if ch == u'\0':
+        if ch == '\0':
             raise ScannerError("while scanning a quoted scalar", start_mark,
                     "found unexpected end of stream", self.get_mark())
-        elif ch in u'\r\n\x85\u2028\u2029':
+        elif ch in '\r\n\x85\u2028\u2029':
             line_break = self.scan_line_break()
             breaks = self.scan_flow_scalar_breaks(double, start_mark)
-            if line_break != u'\n':
+            if line_break != '\n':
                 chunks.append(line_break)
             elif not breaks:
-                chunks.append(u' ')
+                chunks.append(' ')
             chunks.extend(breaks)
         else:
             chunks.append(whitespaces)
@@ -1262,13 +1256,13 @@ def scan_flow_scalar_breaks(self, double, start_mark):
             # Instead of checking indentation, we check for document
             # separators.
             prefix = self.prefix(3)
-            if (prefix == u'---' or prefix == u'...')   \
-                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+            if (prefix == '---' or prefix == '...')   \
+                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
                 raise ScannerError("while scanning a quoted scalar", start_mark,
                         "found unexpected document separator", self.get_mark())
-            while self.peek() in u' \t':
+            while self.peek() in ' \t':
                 self.forward()
-            if self.peek() in u'\r\n\x85\u2028\u2029':
+            if self.peek() in '\r\n\x85\u2028\u2029':
                 chunks.append(self.scan_line_break())
             else:
                 return chunks
@@ -1276,7 +1270,7 @@ def scan_flow_scalar_breaks(self, double, start_mark):
     def scan_plain(self):
         # See the specification for details.
         # We add an additional restriction for the flow context:
-        #   plain scalars in the flow context cannot contain ',', ':' and '?'.
+        #   plain scalars in the flow context cannot contain ',' or '?'.
         # We also keep track of the `allow_simple_key` flag here.
         # Indentation rules are loosed for the flow context.
         chunks = []
@@ -1290,23 +1284,17 @@ def scan_plain(self):
         spaces = []
         while True:
             length = 0
-            if self.peek() == u'#':
+            if self.peek() == '#':
                 break
             while True:
                 ch = self.peek(length)
-                if ch in u'\0 \t\r\n\x85\u2028\u2029'   \
-                        or (not self.flow_level and ch == u':' and
-                                self.peek(length+1) in u'\0 \t\r\n\x85\u2028\u2029') \
-                        or (self.flow_level and ch in u',:?[]{}'):
+                if ch in '\0 \t\r\n\x85\u2028\u2029'    \
+                        or (ch == ':' and
+                                self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029'
+                                      + (u',[]{}' if self.flow_level else u''))\
+                        or (self.flow_level and ch in ',?[]{}'):
                     break
                 length += 1
-            # It's not clear what we should do with ':' in the flow context.
-            if (self.flow_level and ch == u':'
-                    and self.peek(length+1) not in u'\0 \t\r\n\x85\u2028\u2029,[]{}'):
-                self.forward(length)
-                raise ScannerError("while scanning a plain scalar", start_mark,
-                    "found unexpected ':'", self.get_mark(),
-                    "Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")
             if length == 0:
                 break
             self.allow_simple_key = False
@@ -1315,10 +1303,10 @@ def scan_plain(self):
             self.forward(length)
             end_mark = self.get_mark()
             spaces = self.scan_plain_spaces(indent, start_mark)
-            if not spaces or self.peek() == u'#' \
+            if not spaces or self.peek() == '#' \
                     or (not self.flow_level and self.column < indent):
                 break
-        return ScalarToken(u''.join(chunks), True, start_mark, end_mark)
+        return ScalarToken(''.join(chunks), True, start_mark, end_mark)
 
     def scan_plain_spaces(self, indent, start_mark):
         # See the specification for details.
@@ -1326,32 +1314,32 @@ def scan_plain_spaces(self, indent, start_mark):
         # We just forbid them completely. Do not use tabs in YAML!
         chunks = []
         length = 0
-        while self.peek(length) in u' ':
+        while self.peek(length) in ' ':
             length += 1
         whitespaces = self.prefix(length)
         self.forward(length)
         ch = self.peek()
-        if ch in u'\r\n\x85\u2028\u2029':
+        if ch in '\r\n\x85\u2028\u2029':
             line_break = self.scan_line_break()
             self.allow_simple_key = True
             prefix = self.prefix(3)
-            if (prefix == u'---' or prefix == u'...')   \
-                    and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+            if (prefix == '---' or prefix == '...')   \
+                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
                 return
             breaks = []
-            while self.peek() in u' \r\n\x85\u2028\u2029':
+            while self.peek() in ' \r\n\x85\u2028\u2029':
                 if self.peek() == ' ':
                     self.forward()
                 else:
                     breaks.append(self.scan_line_break())
                     prefix = self.prefix(3)
-                    if (prefix == u'---' or prefix == u'...')   \
-                            and self.peek(3) in u'\0 \t\r\n\x85\u2028\u2029':
+                    if (prefix == '---' or prefix == '...')   \
+                            and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
                         return
-            if line_break != u'\n':
+            if line_break != '\n':
                 chunks.append(line_break)
             elif not breaks:
-                chunks.append(u' ')
+                chunks.append(' ')
             chunks.extend(breaks)
         elif whitespaces:
             chunks.append(whitespaces)
@@ -1362,22 +1350,20 @@ def scan_tag_handle(self, name, start_mark):
         # For some strange reasons, the specification does not allow '_' in
         # tag handles. I have allowed it anyway.
         ch = self.peek()
-        if ch != u'!':
+        if ch != '!':
             raise ScannerError("while scanning a %s" % name, start_mark,
-                    "expected '!', but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
+                    "expected '!', but found %r" % ch, self.get_mark())
         length = 1
         ch = self.peek(length)
-        if ch != u' ':
-            while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'    \
-                    or ch in u'-_':
+        if ch != ' ':
+            while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
+                    or ch in '-_':
                 length += 1
                 ch = self.peek(length)
-            if ch != u'!':
+            if ch != '!':
                 self.forward(length)
                 raise ScannerError("while scanning a %s" % name, start_mark,
-                        "expected '!', but found %r" % ch.encode('utf-8'),
-                        self.get_mark())
+                        "expected '!', but found %r" % ch, self.get_mark())
             length += 1
         value = self.prefix(length)
         self.forward(length)
@@ -1389,9 +1375,9 @@ def scan_tag_uri(self, name, start_mark):
         chunks = []
         length = 0
         ch = self.peek(length)
-        while u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z'    \
-                or ch in u'-;/?:@&=+$,_.!~*\'()[]%':
-            if ch == u'%':
+        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
+                or ch in '-;/?:@&=+$,_.!~*\'()[]%':
+            if ch == '%':
                 chunks.append(self.prefix(length))
                 self.forward(length)
                 length = 0
@@ -1405,26 +1391,25 @@ def scan_tag_uri(self, name, start_mark):
             length = 0
         if not chunks:
             raise ScannerError("while parsing a %s" % name, start_mark,
-                    "expected URI, but found %r" % ch.encode('utf-8'),
-                    self.get_mark())
-        return u''.join(chunks)
+                    "expected URI, but found %r" % ch, self.get_mark())
+        return ''.join(chunks)
 
     def scan_uri_escapes(self, name, start_mark):
         # See the specification for details.
-        bytes = []
+        codes = []
         mark = self.get_mark()
-        while self.peek() == u'%':
+        while self.peek() == '%':
             self.forward()
             for k in range(2):
-                if self.peek(k) not in u'0123456789ABCDEFabcdef':
+                if self.peek(k) not in '0123456789ABCDEFabcdef':
                     raise ScannerError("while scanning a %s" % name, start_mark,
-                            "expected URI escape sequence of 2 hexdecimal numbers, but found %r" %
-                                (self.peek(k).encode('utf-8')), self.get_mark())
-            bytes.append(chr(int(self.prefix(2), 16)))
+                            "expected URI escape sequence of 2 hexdecimal numbers, but found %r"
+                            % self.peek(k), self.get_mark())
+            codes.append(int(self.prefix(2), 16))
             self.forward(2)
         try:
-            value = unicode(''.join(bytes), 'utf-8')
-        except UnicodeDecodeError, exc:
+            value = bytes(codes).decode('utf-8')
+        except UnicodeDecodeError as exc:
             raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
         return value
 
@@ -1438,20 +1423,13 @@ def scan_line_break(self):
         #   '\u2029     :   '\u2029'
         #   default     :   ''
         ch = self.peek()
-        if ch in u'\r\n\x85':
-            if self.prefix(2) == u'\r\n':
+        if ch in '\r\n\x85':
+            if self.prefix(2) == '\r\n':
                 self.forward(2)
             else:
                 self.forward()
-            return u'\n'
-        elif ch in u'\u2028\u2029':
+            return '\n'
+        elif ch in '\u2028\u2029':
             self.forward()
             return ch
-        return u''
-
-#try:
-#    import psyco
-#    psyco.bind(Scanner)
-#except ImportError:
-#    pass
-
+        return ''
diff --git a/framework/pym/yaml/serializer.py b/framework/pym/yaml/serializer.py
index 0bf1e96dc1..fe911e67ae 100644
--- a/framework/pym/yaml/serializer.py
+++ b/framework/pym/yaml/serializer.py
@@ -1,16 +1,16 @@
 
 __all__ = ['Serializer', 'SerializerError']
 
-from error import YAMLError
-from events import *
-from nodes import *
+from .error import YAMLError
+from .events import *
+from .nodes import *
 
 class SerializerError(YAMLError):
     pass
 
-class Serializer(object):
+class Serializer:
 
-    ANCHOR_TEMPLATE = u'id%03d'
+    ANCHOR_TEMPLATE = 'id%03d'
 
     def __init__(self, encoding=None,
             explicit_start=None, explicit_end=None, version=None, tags=None):
diff --git a/framework/src/play/classloading/ApplicationCompiler.java b/framework/src/play/classloading/ApplicationCompiler.java
index 5fe7ed56d6..d474a0a6d3 100644
--- a/framework/src/play/classloading/ApplicationCompiler.java
+++ b/framework/src/play/classloading/ApplicationCompiler.java
@@ -39,7 +39,6 @@ public class ApplicationCompiler {
     Map<String, String> settings;
     private static final String JAVA_SOURCE_DEFAULT_VERSION = "11";
     static final Map<String, String> compatibleJavaVersions = new HashMap<>();
-
     static {
         compatibleJavaVersions.put("11", CompilerOptions.VERSION_11);
         compatibleJavaVersions.put("12", CompilerOptions.VERSION_12);
diff --git a/framework/src/play/templates/GroovyTemplate.java b/framework/src/play/templates/GroovyTemplate.java
index 970939a9bb..82da664b71 100644
--- a/framework/src/play/templates/GroovyTemplate.java
+++ b/framework/src/play/templates/GroovyTemplate.java
@@ -3,13 +3,11 @@
 import java.io.File;
 import java.io.PrintWriter;
 import java.io.StringWriter;
-import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
@@ -18,7 +16,6 @@
 import org.codehaus.groovy.control.CompilationUnit.IGroovyClassOperation;
 import org.codehaus.groovy.control.CompilerConfiguration;
 import org.codehaus.groovy.control.MultipleCompilationErrorsException;
-import org.codehaus.groovy.control.Phases;
 import org.codehaus.groovy.control.SourceUnit;
 import org.codehaus.groovy.control.messages.ExceptionMessage;
 import org.codehaus.groovy.control.messages.Message;
@@ -144,24 +141,35 @@ public void compile() {
                 CompilationUnit compilationUnit = new CompilationUnit(compilerConfiguration);
                 compilationUnit.addSource(
                         new SourceUnit(name, compiledSource, compilerConfiguration, tClassLoader, compilationUnit.getErrorCollector()));
-
-                // Play needs to handle writing the generated Groovy class to the file system but the Groovy
-                // compilation unit by default adds it's own output phase operation to do this that cannot
-                // be replaced using the available public methods. Until Groovy provides this capability
-                // it's necessary to access the compilation unit directly using reflection to replace the
-                // default output operation with the Play Groovy class handler.
-                Field phasesF = compilationUnit.getClass().getDeclaredField("phaseOperations");
-                phasesF.setAccessible(true);
-                Collection[] phases = (Collection[]) phasesF.get(compilationUnit);
-                LinkedList<IGroovyClassOperation> output = new LinkedList<>();
-                phases[Phases.OUTPUT] = output;
-                output.add(new IGroovyClassOperation() {
+                
+				// The following approach to adding the phase operation replaces the original
+				// reflection based approach commented out lower down. This appears to be the
+				// canonical approach and possibly has only been made available in the v3.x
+				// stream but it differs in two ways from the reflection based approach and it's
+				// not clear if and what the impact is:
+				// 1. It does NOT guarantee an empty list of OUTPUT phases operations to begin with.
+				// 2. The new phase operation is added to the start and not the end.
+                // See https://github.com/apache/groovy/blob/GROOVY_3_0_6/src/main/java/org/codehaus/groovy/control/CompilationUnit.java#L349
+                compilationUnit.addPhaseOperation(new IGroovyClassOperation() {
                     @Override
                     public void call(GroovyClass gclass) {
                         groovyClassesForThisTemplate.add(gclass);
                     }
                 });
 
+                // TOOD: Remove once the above replacement logic has been confirmed.
+//                Field phasesF = compilationUnit.getClass().getDeclaredField("phaseOperations");
+//                phasesF.setAccessible(true);
+//                Collection[] phases = (Collection[]) phasesF.get(compilationUnit);
+//                LinkedList<IGroovyClassOperation> output = new LinkedList<>();
+//                phases[Phases.OUTPUT] = output;
+//                output.add(new IGroovyClassOperation() {
+//                    @Override
+//                    public void call(GroovyClass gclass) {
+//                        groovyClassesForThisTemplate.add(gclass);
+//                    }
+//                });
+                
                 compilationUnit.compile();
                 // ouf
 
diff --git a/framework/test-src/play/mvc/SessionTest.java b/framework/test-src/play/mvc/SessionTest.java
index f843efc464..d5fd06b842 100644
--- a/framework/test-src/play/mvc/SessionTest.java
+++ b/framework/test-src/play/mvc/SessionTest.java
@@ -22,7 +22,7 @@ private static void mockRequestAndResponse() {
     }
 
     public static void setSendOnlyIfChangedConstant(boolean value) {
-        Scope.SESSION_SEND_ONLY_IF_CHANGED = value;
+    	Scope.SESSION_SEND_ONLY_IF_CHANGED = value;
     }
 
     @Test
diff --git a/modules/crud/commands.py b/modules/crud/commands.py
index cbb8c89e35..f757a2bf29 100644
--- a/modules/crud/commands.py
+++ b/modules/crud/commands.py
@@ -1,5 +1,7 @@
+from __future__ import print_function
 # CRUD
 
+from builtins import str
 import getopt
 from play.utils import *
 
@@ -22,26 +24,26 @@ def execute(**kargs):
                 c = a.split('/')[0]
                 t = a.split('/')[1]
                 app.override('app/views/CRUD/%s.html' % t, 'app/views/%s/%s.html' % (c, t))
-                print "~ "
+                print("~ ")
                 return
 
             if o == '--layout':
                 app.override('app/views/CRUD/layout.html', 'app/views/CRUD/layout.html')
-                print "~ "
+                print("~ ")
                 return
 
             if o == '--css':
                 app.override('public/stylesheets/crud.css', 'public/stylesheets/crud.css')
-                print "~ "
+                print("~ ")
                 return
 
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ ")
         sys.exit(-1)
 
-    print "~ Specify the template to override, ex : -t Users/list" 
-    print "~ "
-    print "~ Use --css to override the CRUD css" 
-    print "~ Use --layout to override the CRUD layout" 
-    print "~ "
+    print("~ Specify the template to override, ex : -t Users/list") 
+    print("~ ")
+    print("~ Use --css to override the CRUD css") 
+    print("~ Use --layout to override the CRUD layout") 
+    print("~ ")
diff --git a/modules/docviewer/commands.py b/modules/docviewer/commands.py
index 158f9fffbc..298d7a71b8 100644
--- a/modules/docviewer/commands.py
+++ b/modules/docviewer/commands.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 # Here you can create play commands that are specific to the module, and extend existing commands
 import os, os.path
 import getopt
@@ -17,15 +18,15 @@ def execute(**kargs):
     env = kargs.get("env")
 
     if command == "doc:export":
-        print "~ Generating project documentation"
-        print "~ "
+        print("~ Generating project documentation")
+        print("~ ")
         java_cmd = app.java_cmd([], None, "play.modules.docviewer.ExportDocumentationGenerator", args)
         try:
             subprocess.call(java_cmd, env=os.environ)
         except OSError:
-            print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). "
+            print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ")
             sys.exit(-1)
-        print
+        print()
 
 # This will be executed before any command (new, run...)
 def before(**kargs):
diff --git a/modules/secure/commands.py b/modules/secure/commands.py
index 09d6964fe4..a33404feeb 100644
--- a/modules/secure/commands.py
+++ b/modules/secure/commands.py
@@ -1,5 +1,7 @@
+from __future__ import print_function
 # Secure
 
+from builtins import str
 import getopt
 from play.utils import *
 
@@ -19,10 +21,10 @@ def execute(**kargs):
     env = kargs.get("env")
 
     if command == 'secure:':
-        print "~ Use: --css to override the Secure css" 
-        print "~      --login to override the login page" 
-        print "~      --layout to override the login layout page" 
-        print "~ "
+        print("~ Use: --css to override the Secure css") 
+        print("~      --login to override the login page") 
+        print("~      --layout to override the login layout page") 
+        print("~ ")
         return
 
     try:
@@ -30,18 +32,18 @@ def execute(**kargs):
         for o, a in optlist:
             if o == '--css':
                 app.override('public/stylesheets/secure.css', 'public/stylesheets/secure.css')
-                print "~ "
+                print("~ ")
                 return
             if o == '--login':
                 app.override('app/views/Secure/login.html', 'app/views/Secure/login.html')
-                print "~ "
+                print("~ ")
                 return
             if o == '--layout':
                 app.override('app/views/Secure/layout.html', 'app/views/Secure/layout.html')
-                print "~ "
+                print("~ ")
                 return
 
-    except getopt.GetoptError, err:
-        print "~ %s" % str(err)
-        print "~ "
+    except getopt.GetoptError as err:
+        print("~ %s" % str(err))
+        print("~ ")
         sys.exit(-1)
diff --git a/play b/play
index 9d96e83e9b..815cf18f90 100755
--- a/play
+++ b/play
@@ -1,7 +1,8 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 # Play command line script www.playframework.com/
 
+from __future__ import print_function
 import sys
 import os
 import os.path
@@ -47,26 +48,26 @@ try:
 
     if showLogo:
         # ~~~~~~~~~~~~~~~~~~~~~~ Display logo
-        print r"~        _            _ "
-        print r"~  _ __ | | __ _ _  _| |"
-        print r"~ | '_ \| |/ _' | || |_|"
-        print r"~ |  __/|_|\____|\__ (_)"
-        print r"~ |_|            |__/   "
-        print r"~"
+        print(r"~        _            _ ")
+        print(r"~  _ __ | | __ _ _  _| |")
+        print(r"~ | '_ \| |/ _' | || |_|")
+        print(r"~ |  __/|_|\____|\__ (_)")
+        print(r"~ |_|            |__/   ")
+        print(r"~")
     else:
         sys.argv.remove("--silent")
 
     play_version_file = os.path.join(play_env["basedir"], 'framework', 'src', 'play', 'version')
     if not os.path.exists(play_version_file):
-        print "~ Oops. %s file not found" % os.path.normpath(os.path.join(play_env["basedir"], 'framework', 'src', 'play', 'version'))
-        print "~ Is the framework compiled? "
-        print "~"
+        print("~ Oops. %s file not found" % os.path.normpath(os.path.join(play_env["basedir"], 'framework', 'src', 'play', 'version')))
+        print("~ Is the framework compiled? ")
+        print("~")
         sys.exit(-1)
 
     play_env["version"] = open(play_version_file).readline().strip()
 
     if showLogo:
-        print "~ play! %s, https://www.playframework.com" % (play_env["version"])
+        print("~ play! %s, https://www.playframework.com" % (play_env["version"]))
 
     # ~~~~~~~~~~~~~~~~~~~~~~ Where is the application?
     application_path = None
@@ -127,9 +128,9 @@ try:
             play_env["id"] = 'test'
 
     if showLogo:
-        if play_env["id"] is not '':
-            print "~ framework ID is %s" % play_env["id"]
-        print "~"
+        if play_env["id"] != '':
+            print("~ framework ID is %s" % play_env["id"])
+        print("~")
 
     # ~~~~~~~~~~~~~~~~~ Checking for disable_random_jpda
     disable_random_jpda = False
@@ -172,17 +173,17 @@ try:
         sys.exit(status)
 
     # ~~~~~~~~~~~~~~~~~~~~~~ Invalid command
-    print "~ Usage: play cmd [app_path] [--options]"
-    print "~ "
-    print "~ with,  new      Create a new application"
-    print "~        run      Run the application in the current shell"
-    print "~        help     Show play help"
-    print "~"
+    print("~ Usage: play cmd [app_path] [--options]")
+    print("~ ")
+    print("~ with,  new      Create a new application")
+    print("~        run      Run the application in the current shell")
+    print("~        help     Show play help")
+    print("~")
     if len(sys.argv) > 1:
-        print "~ Invalid command: %s" % sys.argv[1]
-        print "~"
+        print("~ Invalid command: %s" % sys.argv[1])
+        print("~")
     sys.exit(-1)
 
 except KeyboardInterrupt:
-    print '~ ...'
+    print('~ ...')
     sys.exit(0)
diff --git a/python/Lib/six.py b/python/Lib/six.py
new file mode 100644
index 0000000000..4e15675d8b
--- /dev/null
+++ b/python/Lib/six.py
@@ -0,0 +1,998 @@
+# Copyright (c) 2010-2020 Benjamin Peterson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Utilities for writing code that runs on Python 2 and 3"""
+
+from __future__ import absolute_import
+
+import functools
+import itertools
+import operator
+import sys
+import types
+
+__author__ = "Benjamin Peterson <benjamin@python.org>"
+__version__ = "1.16.0"
+
+
+# Useful for very coarse version differentiation.
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)
+
+if PY3:
+    string_types = str,
+    integer_types = int,
+    class_types = type,
+    text_type = str
+    binary_type = bytes
+
+    MAXSIZE = sys.maxsize
+else:
+    string_types = basestring,
+    integer_types = (int, long)
+    class_types = (type, types.ClassType)
+    text_type = unicode
+    binary_type = str
+
+    if sys.platform.startswith("java"):
+        # Jython always uses 32 bits.
+        MAXSIZE = int((1 << 31) - 1)
+    else:
+        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
+        class X(object):
+
+            def __len__(self):
+                return 1 << 31
+        try:
+            len(X())
+        except OverflowError:
+            # 32-bit
+            MAXSIZE = int((1 << 31) - 1)
+        else:
+            # 64-bit
+            MAXSIZE = int((1 << 63) - 1)
+        del X
+
+if PY34:
+    from importlib.util import spec_from_loader
+else:
+    spec_from_loader = None
+
+
+def _add_doc(func, doc):
+    """Add documentation to a function."""
+    func.__doc__ = doc
+
+
+def _import_module(name):
+    """Import module, returning the module after the last dot."""
+    __import__(name)
+    return sys.modules[name]
+
+
+class _LazyDescr(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, tp):
+        result = self._resolve()
+        setattr(obj, self.name, result)  # Invokes __set__.
+        try:
+            # This is a bit ugly, but it avoids running this again by
+            # removing this descriptor.
+            delattr(obj.__class__, self.name)
+        except AttributeError:
+            pass
+        return result
+
+
+class MovedModule(_LazyDescr):
+
+    def __init__(self, name, old, new=None):
+        super(MovedModule, self).__init__(name)
+        if PY3:
+            if new is None:
+                new = name
+            self.mod = new
+        else:
+            self.mod = old
+
+    def _resolve(self):
+        return _import_module(self.mod)
+
+    def __getattr__(self, attr):
+        _module = self._resolve()
+        value = getattr(_module, attr)
+        setattr(self, attr, value)
+        return value
+
+
+class _LazyModule(types.ModuleType):
+
+    def __init__(self, name):
+        super(_LazyModule, self).__init__(name)
+        self.__doc__ = self.__class__.__doc__
+
+    def __dir__(self):
+        attrs = ["__doc__", "__name__"]
+        attrs += [attr.name for attr in self._moved_attributes]
+        return attrs
+
+    # Subclasses should override this
+    _moved_attributes = []
+
+
+class MovedAttribute(_LazyDescr):
+
+    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
+        super(MovedAttribute, self).__init__(name)
+        if PY3:
+            if new_mod is None:
+                new_mod = name
+            self.mod = new_mod
+            if new_attr is None:
+                if old_attr is None:
+                    new_attr = name
+                else:
+                    new_attr = old_attr
+            self.attr = new_attr
+        else:
+            self.mod = old_mod
+            if old_attr is None:
+                old_attr = name
+            self.attr = old_attr
+
+    def _resolve(self):
+        module = _import_module(self.mod)
+        return getattr(module, self.attr)
+
+
+class _SixMetaPathImporter(object):
+
+    """
+    A meta path importer to import six.moves and its submodules.
+
+    This class implements a PEP302 finder and loader. It should be compatible
+    with Python 2.5 and all existing versions of Python3
+    """
+
+    def __init__(self, six_module_name):
+        self.name = six_module_name
+        self.known_modules = {}
+
+    def _add_module(self, mod, *fullnames):
+        for fullname in fullnames:
+            self.known_modules[self.name + "." + fullname] = mod
+
+    def _get_module(self, fullname):
+        return self.known_modules[self.name + "." + fullname]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.known_modules:
+            return self
+        return None
+
+    def find_spec(self, fullname, path, target=None):
+        if fullname in self.known_modules:
+            return spec_from_loader(fullname, self)
+        return None
+
+    def __get_module(self, fullname):
+        try:
+            return self.known_modules[fullname]
+        except KeyError:
+            raise ImportError("This loader does not know module " + fullname)
+
+    def load_module(self, fullname):
+        try:
+            # in case of a reload
+            return sys.modules[fullname]
+        except KeyError:
+            pass
+        mod = self.__get_module(fullname)
+        if isinstance(mod, MovedModule):
+            mod = mod._resolve()
+        else:
+            mod.__loader__ = self
+        sys.modules[fullname] = mod
+        return mod
+
+    def is_package(self, fullname):
+        """
+        Return true, if the named module is a package.
+
+        We need this method to get correct spec objects with
+        Python 3.4 (see PEP451)
+        """
+        return hasattr(self.__get_module(fullname), "__path__")
+
+    def get_code(self, fullname):
+        """Return None
+
+        Required, if is_package is implemented"""
+        self.__get_module(fullname)  # eventually raises ImportError
+        return None
+    get_source = get_code  # same as get_code
+
+    def create_module(self, spec):
+        return self.load_module(spec.name)
+
+    def exec_module(self, module):
+        pass
+
+_importer = _SixMetaPathImporter(__name__)
+
+
+class _MovedItems(_LazyModule):
+
+    """Lazy loading of moved objects"""
+    __path__ = []  # mark as package
+
+
+_moved_attributes = [
+    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
+    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
+    MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
+    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+    MovedAttribute("intern", "__builtin__", "sys"),
+    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
+    MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+    MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
+    MovedAttribute("getoutput", "commands", "subprocess"),
+    MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
+    MovedAttribute("reduce", "__builtin__", "functools"),
+    MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
+    MovedAttribute("StringIO", "StringIO", "io"),
+    MovedAttribute("UserDict", "UserDict", "collections"),
+    MovedAttribute("UserList", "UserList", "collections"),
+    MovedAttribute("UserString", "UserString", "collections"),
+    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
+    MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
+    MovedModule("builtins", "__builtin__"),
+    MovedModule("configparser", "ConfigParser"),
+    MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"),
+    MovedModule("copyreg", "copy_reg"),
+    MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+    MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"),
+    MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"),
+    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
+    MovedModule("http_cookies", "Cookie", "http.cookies"),
+    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
+    MovedModule("html_parser", "HTMLParser", "html.parser"),
+    MovedModule("http_client", "httplib", "http.client"),
+    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
+    MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"),
+    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+    MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
+    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
+    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
+    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
+    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
+    MovedModule("cPickle", "cPickle", "pickle"),
+    MovedModule("queue", "Queue"),
+    MovedModule("reprlib", "repr"),
+    MovedModule("socketserver", "SocketServer"),
+    MovedModule("_thread", "thread", "_thread"),
+    MovedModule("tkinter", "Tkinter"),
+    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
+    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
+    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
+    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
+    MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
+    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
+    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
+    MovedModule("tkinter_colorchooser", "tkColorChooser",
+                "tkinter.colorchooser"),
+    MovedModule("tkinter_commondialog", "tkCommonDialog",
+                "tkinter.commondialog"),
+    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
+    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
+    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
+                "tkinter.simpledialog"),
+    MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
+    MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
+    MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
+    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
+    MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
+    MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
+]
+# Add windows specific modules.
+if sys.platform == "win32":
+    _moved_attributes += [
+        MovedModule("winreg", "_winreg"),
+    ]
+
+for attr in _moved_attributes:
+    setattr(_MovedItems, attr.name, attr)
+    if isinstance(attr, MovedModule):
+        _importer._add_module(attr, "moves." + attr.name)
+del attr
+
+_MovedItems._moved_attributes = _moved_attributes
+
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")
+
+
+class Module_six_moves_urllib_parse(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_parse"""
+
+
+_urllib_parse_moved_attributes = [
+    MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+    MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
+    MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
+    MovedAttribute("urljoin", "urlparse", "urllib.parse"),
+    MovedAttribute("urlparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("quote", "urllib", "urllib.parse"),
+    MovedAttribute("quote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"),
+    MovedAttribute("urlencode", "urllib", "urllib.parse"),
+    MovedAttribute("splitquery", "urllib", "urllib.parse"),
+    MovedAttribute("splittag", "urllib", "urllib.parse"),
+    MovedAttribute("splituser", "urllib", "urllib.parse"),
+    MovedAttribute("splitvalue", "urllib", "urllib.parse"),
+    MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
+]
+for attr in _urllib_parse_moved_attributes:
+    setattr(Module_six_moves_urllib_parse, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+                      "moves.urllib_parse", "moves.urllib.parse")
+
+
+class Module_six_moves_urllib_error(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_error"""
+
+
+_urllib_error_moved_attributes = [
+    MovedAttribute("URLError", "urllib2", "urllib.error"),
+    MovedAttribute("HTTPError", "urllib2", "urllib.error"),
+    MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
+]
+for attr in _urllib_error_moved_attributes:
+    setattr(Module_six_moves_urllib_error, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+                      "moves.urllib_error", "moves.urllib.error")
+
+
+class Module_six_moves_urllib_request(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_request"""
+
+
+_urllib_request_moved_attributes = [
+    MovedAttribute("urlopen", "urllib2", "urllib.request"),
+    MovedAttribute("install_opener", "urllib2", "urllib.request"),
+    MovedAttribute("build_opener", "urllib2", "urllib.request"),
+    MovedAttribute("pathname2url", "urllib", "urllib.request"),
+    MovedAttribute("url2pathname", "urllib", "urllib.request"),
+    MovedAttribute("getproxies", "urllib", "urllib.request"),
+    MovedAttribute("Request", "urllib2", "urllib.request"),
+    MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
+    MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FileHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("urlretrieve", "urllib", "urllib.request"),
+    MovedAttribute("urlcleanup", "urllib", "urllib.request"),
+    MovedAttribute("URLopener", "urllib", "urllib.request"),
+    MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
+    MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
+    MovedAttribute("parse_http_list", "urllib2", "urllib.request"),
+    MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"),
+]
+for attr in _urllib_request_moved_attributes:
+    setattr(Module_six_moves_urllib_request, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+                      "moves.urllib_request", "moves.urllib.request")
+
+
+class Module_six_moves_urllib_response(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_response"""
+
+
+_urllib_response_moved_attributes = [
+    MovedAttribute("addbase", "urllib", "urllib.response"),
+    MovedAttribute("addclosehook", "urllib", "urllib.response"),
+    MovedAttribute("addinfo", "urllib", "urllib.response"),
+    MovedAttribute("addinfourl", "urllib", "urllib.response"),
+]
+for attr in _urllib_response_moved_attributes:
+    setattr(Module_six_moves_urllib_response, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+                      "moves.urllib_response", "moves.urllib.response")
+
+
+class Module_six_moves_urllib_robotparser(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_robotparser"""
+
+
+_urllib_robotparser_moved_attributes = [
+    MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
+]
+for attr in _urllib_robotparser_moved_attributes:
+    setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+                      "moves.urllib_robotparser", "moves.urllib.robotparser")
+
+
+class Module_six_moves_urllib(types.ModuleType):
+
+    """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
+    __path__ = []  # mark as package
+    parse = _importer._get_module("moves.urllib_parse")
+    error = _importer._get_module("moves.urllib_error")
+    request = _importer._get_module("moves.urllib_request")
+    response = _importer._get_module("moves.urllib_response")
+    robotparser = _importer._get_module("moves.urllib_robotparser")
+
+    def __dir__(self):
+        return ['parse', 'error', 'request', 'response', 'robotparser']
+
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+                      "moves.urllib")
+
+
+def add_move(move):
+    """Add an item to six.moves."""
+    setattr(_MovedItems, move.name, move)
+
+
+def remove_move(name):
+    """Remove item from six.moves."""
+    try:
+        delattr(_MovedItems, name)
+    except AttributeError:
+        try:
+            del moves.__dict__[name]
+        except KeyError:
+            raise AttributeError("no such move, %r" % (name,))
+
+
+if PY3:
+    _meth_func = "__func__"
+    _meth_self = "__self__"
+
+    _func_closure = "__closure__"
+    _func_code = "__code__"
+    _func_defaults = "__defaults__"
+    _func_globals = "__globals__"
+else:
+    _meth_func = "im_func"
+    _meth_self = "im_self"
+
+    _func_closure = "func_closure"
+    _func_code = "func_code"
+    _func_defaults = "func_defaults"
+    _func_globals = "func_globals"
+
+
+try:
+    advance_iterator = next
+except NameError:
+    def advance_iterator(it):
+        return it.next()
+next = advance_iterator
+
+
+try:
+    callable = callable
+except NameError:
+    def callable(obj):
+        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+
+
+if PY3:
+    def get_unbound_function(unbound):
+        return unbound
+
+    create_bound_method = types.MethodType
+
+    def create_unbound_method(func, cls):
+        return func
+
+    Iterator = object
+else:
+    def get_unbound_function(unbound):
+        return unbound.im_func
+
+    def create_bound_method(func, obj):
+        return types.MethodType(func, obj, obj.__class__)
+
+    def create_unbound_method(func, cls):
+        return types.MethodType(func, None, cls)
+
+    class Iterator(object):
+
+        def next(self):
+            return type(self).__next__(self)
+
+    callable = callable
+_add_doc(get_unbound_function,
+         """Get the function out of a possibly unbound function""")
+
+
+get_method_function = operator.attrgetter(_meth_func)
+get_method_self = operator.attrgetter(_meth_self)
+get_function_closure = operator.attrgetter(_func_closure)
+get_function_code = operator.attrgetter(_func_code)
+get_function_defaults = operator.attrgetter(_func_defaults)
+get_function_globals = operator.attrgetter(_func_globals)
+
+
+if PY3:
+    def iterkeys(d, **kw):
+        return iter(d.keys(**kw))
+
+    def itervalues(d, **kw):
+        return iter(d.values(**kw))
+
+    def iteritems(d, **kw):
+        return iter(d.items(**kw))
+
+    def iterlists(d, **kw):
+        return iter(d.lists(**kw))
+
+    viewkeys = operator.methodcaller("keys")
+
+    viewvalues = operator.methodcaller("values")
+
+    viewitems = operator.methodcaller("items")
+else:
+    def iterkeys(d, **kw):
+        return d.iterkeys(**kw)
+
+    def itervalues(d, **kw):
+        return d.itervalues(**kw)
+
+    def iteritems(d, **kw):
+        return d.iteritems(**kw)
+
+    def iterlists(d, **kw):
+        return d.iterlists(**kw)
+
+    viewkeys = operator.methodcaller("viewkeys")
+
+    viewvalues = operator.methodcaller("viewvalues")
+
+    viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+         "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+         "Return an iterator over the (key, [values]) pairs of a dictionary.")
+
+
+if PY3:
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+    unichr = chr
+    import struct
+    int2byte = struct.Struct(">B").pack
+    del struct
+    byte2int = operator.itemgetter(0)
+    indexbytes = operator.getitem
+    iterbytes = iter
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+    del io
+    _assertCountEqual = "assertCountEqual"
+    if sys.version_info[1] <= 1:
+        _assertRaisesRegex = "assertRaisesRegexp"
+        _assertRegex = "assertRegexpMatches"
+        _assertNotRegex = "assertNotRegexpMatches"
+    else:
+        _assertRaisesRegex = "assertRaisesRegex"
+        _assertRegex = "assertRegex"
+        _assertNotRegex = "assertNotRegex"
+else:
+    def b(s):
+        return s
+    # Workaround for standalone backslash
+
+    def u(s):
+        return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
+    unichr = unichr
+    int2byte = chr
+
+    def byte2int(bs):
+        return ord(bs[0])
+
+    def indexbytes(buf, i):
+        return ord(buf[i])
+    iterbytes = functools.partial(itertools.imap, ord)
+    import StringIO
+    StringIO = BytesIO = StringIO.StringIO
+    _assertCountEqual = "assertItemsEqual"
+    _assertRaisesRegex = "assertRaisesRegexp"
+    _assertRegex = "assertRegexpMatches"
+    _assertNotRegex = "assertNotRegexpMatches"
+_add_doc(b, """Byte literal""")
+_add_doc(u, """Text literal""")
+
+
+def assertCountEqual(self, *args, **kwargs):
+    return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+
+def assertRaisesRegex(self, *args, **kwargs):
+    return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+    return getattr(self, _assertRegex)(*args, **kwargs)
+
+
+def assertNotRegex(self, *args, **kwargs):
+    return getattr(self, _assertNotRegex)(*args, **kwargs)
+
+
+if PY3:
+    exec_ = getattr(moves.builtins, "exec")
+
+    def reraise(tp, value, tb=None):
+        try:
+            if value is None:
+                value = tp()
+            if value.__traceback__ is not tb:
+                raise value.with_traceback(tb)
+            raise value
+        finally:
+            value = None
+            tb = None
+
+else:
+    def exec_(_code_, _globs_=None, _locs_=None):
+        """Execute code in a namespace."""
+        if _globs_ is None:
+            frame = sys._getframe(1)
+            _globs_ = frame.f_globals
+            if _locs_ is None:
+                _locs_ = frame.f_locals
+            del frame
+        elif _locs_ is None:
+            _locs_ = _globs_
+        exec("""exec _code_ in _globs_, _locs_""")
+
+    exec_("""def reraise(tp, value, tb=None):
+    try:
+        raise tp, value, tb
+    finally:
+        tb = None
+""")
+
+
+if sys.version_info[:2] > (3,):
+    exec_("""def raise_from(value, from_value):
+    try:
+        raise value from from_value
+    finally:
+        value = None
+""")
+else:
+    def raise_from(value, from_value):
+        raise value
+
+
+print_ = getattr(moves.builtins, "print", None)
+if print_ is None:
+    def print_(*args, **kwargs):
+        """The new-style print function for Python 2.4 and 2.5."""
+        fp = kwargs.pop("file", sys.stdout)
+        if fp is None:
+            return
+
+        def write(data):
+            if not isinstance(data, basestring):
+                data = str(data)
+            # If the file has an encoding, encode unicode with it.
+            if (isinstance(fp, file) and
+                    isinstance(data, unicode) and
+                    fp.encoding is not None):
+                errors = getattr(fp, "errors", None)
+                if errors is None:
+                    errors = "strict"
+                data = data.encode(fp.encoding, errors)
+            fp.write(data)
+        want_unicode = False
+        sep = kwargs.pop("sep", None)
+        if sep is not None:
+            if isinstance(sep, unicode):
+                want_unicode = True
+            elif not isinstance(sep, str):
+                raise TypeError("sep must be None or a string")
+        end = kwargs.pop("end", None)
+        if end is not None:
+            if isinstance(end, unicode):
+                want_unicode = True
+            elif not isinstance(end, str):
+                raise TypeError("end must be None or a string")
+        if kwargs:
+            raise TypeError("invalid keyword arguments to print()")
+        if not want_unicode:
+            for arg in args:
+                if isinstance(arg, unicode):
+                    want_unicode = True
+                    break
+        if want_unicode:
+            newline = unicode("\n")
+            space = unicode(" ")
+        else:
+            newline = "\n"
+            space = " "
+        if sep is None:
+            sep = space
+        if end is None:
+            end = newline
+        for i, arg in enumerate(args):
+            if i:
+                write(sep)
+            write(arg)
+        write(end)
+if sys.version_info[:2] < (3, 3):
+    _print = print_
+
+    def print_(*args, **kwargs):
+        fp = kwargs.get("file", sys.stdout)
+        flush = kwargs.pop("flush", False)
+        _print(*args, **kwargs)
+        if flush and fp is not None:
+            fp.flush()
+
+_add_doc(reraise, """Reraise an exception.""")
+
+if sys.version_info[0:2] < (3, 4):
+    # This does exactly the same what the :func:`py3:functools.update_wrapper`
+    # function does on Python versions after 3.2. It sets the ``__wrapped__``
+    # attribute on ``wrapper`` object and it doesn't raise an error if any of
+    # the attributes mentioned in ``assigned`` and ``updated`` are missing on
+    # ``wrapped`` object.
+    def _update_wrapper(wrapper, wrapped,
+                        assigned=functools.WRAPPER_ASSIGNMENTS,
+                        updated=functools.WRAPPER_UPDATES):
+        for attr in assigned:
+            try:
+                value = getattr(wrapped, attr)
+            except AttributeError:
+                continue
+            else:
+                setattr(wrapper, attr, value)
+        for attr in updated:
+            getattr(wrapper, attr).update(getattr(wrapped, attr, {}))
+        wrapper.__wrapped__ = wrapped
+        return wrapper
+    _update_wrapper.__doc__ = functools.update_wrapper.__doc__
+
+    def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+              updated=functools.WRAPPER_UPDATES):
+        return functools.partial(_update_wrapper, wrapped=wrapped,
+                                 assigned=assigned, updated=updated)
+    wraps.__doc__ = functools.wraps.__doc__
+
+else:
+    wraps = functools.wraps
+
+
+def with_metaclass(meta, *bases):
+    """Create a base class with a metaclass."""
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(type):
+
+        def __new__(cls, name, this_bases, d):
+            if sys.version_info[:2] >= (3, 7):
+                # This version introduced PEP 560 that requires a bit
+                # of extra care (we mimic what is done by __build_class__).
+                resolved_bases = types.resolve_bases(bases)
+                if resolved_bases is not bases:
+                    d['__orig_bases__'] = bases
+            else:
+                resolved_bases = bases
+            return meta(name, resolved_bases, d)
+
+        @classmethod
+        def __prepare__(cls, name, this_bases):
+            return meta.__prepare__(name, bases)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+def add_metaclass(metaclass):
+    """Class decorator for creating a class with a metaclass."""
+    def wrapper(cls):
+        orig_vars = cls.__dict__.copy()
+        slots = orig_vars.get('__slots__')
+        if slots is not None:
+            if isinstance(slots, str):
+                slots = [slots]
+            for slots_var in slots:
+                orig_vars.pop(slots_var)
+        orig_vars.pop('__dict__', None)
+        orig_vars.pop('__weakref__', None)
+        if hasattr(cls, '__qualname__'):
+            orig_vars['__qualname__'] = cls.__qualname__
+        return metaclass(cls.__name__, cls.__bases__, orig_vars)
+    return wrapper
+
+
+def ensure_binary(s, encoding='utf-8', errors='strict'):
+    """Coerce **s** to six.binary_type.
+
+    For Python 2:
+      - `unicode` -> encoded to `str`
+      - `str` -> `str`
+
+    For Python 3:
+      - `str` -> encoded to `bytes`
+      - `bytes` -> `bytes`
+    """
+    if isinstance(s, binary_type):
+        return s
+    if isinstance(s, text_type):
+        return s.encode(encoding, errors)
+    raise TypeError("not expecting type '%s'" % type(s))
+
+
+def ensure_str(s, encoding='utf-8', errors='strict'):
+    """Coerce *s* to `str`.
+
+    For Python 2:
+      - `unicode` -> encoded to `str`
+      - `str` -> `str`
+
+    For Python 3:
+      - `str` -> `str`
+      - `bytes` -> decoded to `str`
+    """
+    # Optimization: Fast return for the common case.
+    if type(s) is str:
+        return s
+    if PY2 and isinstance(s, text_type):
+        return s.encode(encoding, errors)
+    elif PY3 and isinstance(s, binary_type):
+        return s.decode(encoding, errors)
+    elif not isinstance(s, (text_type, binary_type)):
+        raise TypeError("not expecting type '%s'" % type(s))
+    return s
+
+
+def ensure_text(s, encoding='utf-8', errors='strict'):
+    """Coerce *s* to six.text_type.
+
+    For Python 2:
+      - `unicode` -> `unicode`
+      - `str` -> `unicode`
+
+    For Python 3:
+      - `str` -> `str`
+      - `bytes` -> decoded to `str`
+    """
+    if isinstance(s, binary_type):
+        return s.decode(encoding, errors)
+    elif isinstance(s, text_type):
+        return s
+    else:
+        raise TypeError("not expecting type '%s'" % type(s))
+
+
+def python_2_unicode_compatible(klass):
+    """
+    A class decorator that defines __unicode__ and __str__ methods under Python 2.
+    Under Python 3 it does nothing.
+
+    To support Python 2 and 3 with a single code base, define a __str__ method
+    returning text and apply this decorator to the class.
+    """
+    if PY2:
+        if '__str__' not in klass.__dict__:
+            raise ValueError("@python_2_unicode_compatible cannot be applied "
+                             "to %s because it doesn't define __str__()." %
+                             klass.__name__)
+        klass.__unicode__ = klass.__str__
+        klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+    return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = []  # required for PEP 302 and PEP 451
+__package__ = __name__  # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+    __spec__.submodule_search_locations = []  # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+    for i, importer in enumerate(sys.meta_path):
+        # Here's some real nastiness: Another "instance" of the six module might
+        # be floating around. Therefore, we can't use isinstance() to check for
+        # the six meta path importer, since the other six instance will have
+        # inserted an importer with different class.
+        if (type(importer).__name__ == "_SixMetaPathImporter" and
+                importer.name == __name__):
+            del sys.meta_path[i]
+            break
+    del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/__init__.py
new file mode 100644
index 0000000000..320e0c3b43
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/__init__.py
@@ -0,0 +1,35 @@
+"""
+HTML parsing library based on the `WHATWG HTML specification
+<https://whatwg.org/html>`_. The parser is designed to be compatible with
+existing HTML found in the wild and implements well-defined error recovery that
+is largely compatible with modern desktop web browsers.
+
+Example usage::
+
+    import html5lib
+    with open("my_document.html", "rb") as f:
+        tree = html5lib.parse(f)
+
+For convenience, this module re-exports the following names:
+
+* :func:`~.html5parser.parse`
+* :func:`~.html5parser.parseFragment`
+* :class:`~.html5parser.HTMLParser`
+* :func:`~.treebuilders.getTreeBuilder`
+* :func:`~.treewalkers.getTreeWalker`
+* :func:`~.serializer.serialize`
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+
+from .html5parser import HTMLParser, parse, parseFragment
+from .treebuilders import getTreeBuilder
+from .treewalkers import getTreeWalker
+from .serializer import serialize
+
+__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
+           "getTreeWalker", "serialize"]
+
+# this has to be at the top level, see how setup.py parses this
+#: Distribution version number.
+__version__ = "1.1"
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_ihatexml.py b/samples-and-tests/i-am-a-developer/html5lib/_ihatexml.py
new file mode 100644
index 0000000000..3ff803c195
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_ihatexml.py
@@ -0,0 +1,289 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+import warnings
+
+from .constants import DataLossWarning
+
+baseChar = """
+[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
+[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
+[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
+[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
+[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
+[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
+[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
+[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
+[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
+[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
+[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
+[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
+[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
+[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
+[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
+[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
+[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
+[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
+[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
+[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
+[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
+[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
+[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
+[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
+[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
+[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
+[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
+[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
+[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
+[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
+#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
+#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
+#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
+[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
+[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
+#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
+[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
+[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
+[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
+[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
+[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
+#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
+[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
+[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
+[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
+[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+
+ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
+
+combiningCharacter = """
+[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
+[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
+[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
+[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
+#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
+[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
+[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
+#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
+[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
+[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
+#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
+[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
+[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
+[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
+[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
+[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
+#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
+[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
+#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
+[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
+[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
+#x3099 | #x309A"""
+
+digit = """
+[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
+[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
+[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
+[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+
+extender = """
+#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
+#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+
+letter = " | ".join([baseChar, ideographic])
+
+# Without the
+name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
+                   extender])
+nameFirst = " | ".join([letter, "_"])
+
+reChar = re.compile(r"#x([\d|A-F]{4,4})")
+reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
+
+
+def charStringToList(chars):
+    charRanges = [item.strip() for item in chars.split(" | ")]
+    rv = []
+    for item in charRanges:
+        foundMatch = False
+        for regexp in (reChar, reCharRange):
+            match = regexp.match(item)
+            if match is not None:
+                rv.append([hexToInt(item) for item in match.groups()])
+                if len(rv[-1]) == 1:
+                    rv[-1] = rv[-1] * 2
+                foundMatch = True
+                break
+        if not foundMatch:
+            assert len(item) == 1
+
+            rv.append([ord(item)] * 2)
+    rv = normaliseCharList(rv)
+    return rv
+
+
+def normaliseCharList(charList):
+    charList = sorted(charList)
+    for item in charList:
+        assert item[1] >= item[0]
+    rv = []
+    i = 0
+    while i < len(charList):
+        j = 1
+        rv.append(charList[i])
+        while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i + j][1]
+            j += 1
+        i += j
+    return rv
+
+
+# We don't really support characters above the BMP :(
+max_unicode = int("FFFF", 16)
+
+
+def missingRanges(charList):
+    rv = []
+    if charList[0] != 0:
+        rv.append([0, charList[0][0] - 1])
+    for i, item in enumerate(charList[:-1]):
+        rv.append([item[1] + 1, charList[i + 1][0] - 1])
+    if charList[-1][1] != max_unicode:
+        rv.append([charList[-1][1] + 1, max_unicode])
+    return rv
+
+
+def listToRegexpStr(charList):
+    rv = []
+    for item in charList:
+        if item[0] == item[1]:
+            rv.append(escapeRegexp(chr(item[0])))
+        else:
+            rv.append(escapeRegexp(chr(item[0])) + "-" +
+                      escapeRegexp(chr(item[1])))
+    return "[%s]" % "".join(rv)
+
+
+def hexToInt(hex_str):
+    return int(hex_str, 16)
+
+
+def escapeRegexp(string):
+    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
+                         "[", "]", "|", "(", ")", "-")
+    for char in specialCharacters:
+        string = string.replace(char, "\\" + char)
+
+    return string
+
+# output from the above
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
+
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
+
+# Simpler things
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
+
+
+class InfosetFilter(object):
+    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
+
+    def __init__(self,
+                 dropXmlnsLocalName=False,
+                 dropXmlnsAttrNs=False,
+                 preventDoubleDashComments=False,
+                 preventDashAtCommentEnd=False,
+                 replaceFormFeedCharacters=True,
+                 preventSingleQuotePubid=False):
+
+        self.dropXmlnsLocalName = dropXmlnsLocalName
+        self.dropXmlnsAttrNs = dropXmlnsAttrNs
+
+        self.preventDoubleDashComments = preventDoubleDashComments
+        self.preventDashAtCommentEnd = preventDashAtCommentEnd
+
+        self.replaceFormFeedCharacters = replaceFormFeedCharacters
+
+        self.preventSingleQuotePubid = preventSingleQuotePubid
+
+        self.replaceCache = {}
+
+    def coerceAttribute(self, name, namespace=None):
+        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
+            warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
+            return None
+        elif (self.dropXmlnsAttrNs and
+              namespace == "http://www.w3.org/2000/xmlns/"):
+            warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
+            return None
+        else:
+            return self.toXmlName(name)
+
+    def coerceElement(self, name):
+        return self.toXmlName(name)
+
+    def coerceComment(self, data):
+        if self.preventDoubleDashComments:
+            while "--" in data:
+                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
+                data = data.replace("--", "- -")
+            if data.endswith("-"):
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
+                data += " "
+        return data
+
+    def coerceCharacters(self, data):
+        if self.replaceFormFeedCharacters:
+            for _ in range(data.count("\x0C")):
+                warnings.warn("Text cannot contain U+000C", DataLossWarning)
+            data = data.replace("\x0C", " ")
+        # Other non-xml characters
+        return data
+
+    def coercePubid(self, data):
+        dataOutput = data
+        for char in nonPubidCharRegexp.findall(data):
+            warnings.warn("Coercing non-XML pubid", DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            dataOutput = dataOutput.replace(char, replacement)
+        if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
+            warnings.warn("Pubid cannot contain single quote", DataLossWarning)
+            dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
+        return dataOutput
+
+    def toXmlName(self, name):
+        nameFirst = name[0]
+        nameRest = name[1:]
+        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
+        if m:
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
+            nameFirstOutput = self.getReplacementCharacter(nameFirst)
+        else:
+            nameFirstOutput = nameFirst
+
+        nameRestOutput = nameRest
+        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
+        for char in replaceChars:
+            warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
+            replacement = self.getReplacementCharacter(char)
+            nameRestOutput = nameRestOutput.replace(char, replacement)
+        return nameFirstOutput + nameRestOutput
+
+    def getReplacementCharacter(self, char):
+        if char in self.replaceCache:
+            replacement = self.replaceCache[char]
+        else:
+            replacement = self.escapeChar(char)
+        return replacement
+
+    def fromXmlName(self, name):
+        for item in set(self.replacementRegexp.findall(name)):
+            name = name.replace(item, self.unescapeChar(item))
+        return name
+
+    def escapeChar(self, char):
+        replacement = "U%05X" % ord(char)
+        self.replaceCache[char] = replacement
+        return replacement
+
+    def unescapeChar(self, charcode):
+        return chr(int(charcode[1:], 16))
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_inputstream.py b/samples-and-tests/i-am-a-developer/html5lib/_inputstream.py
new file mode 100644
index 0000000000..0207dd211b
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_inputstream.py
@@ -0,0 +1,918 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from six import text_type
+from six.moves import http_client, urllib
+
+import codecs
+import re
+from io import BytesIO, StringIO
+
+import webencodings
+
+from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
+from .constants import _ReparseException
+from . import _utils
+
+# Non-unicode versions of constants for use in the pre-parser
+spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
+asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
+asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
+
+
+invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
+
+if _utils.supports_lone_surrogates:
+    # Use one extra step of indirection and create surrogates with
+    # eval. Not using this indirection would introduce an illegal
+    # unicode literal on platforms not supporting such lone
+    # surrogates.
+    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
+                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
+                                    "]")
+else:
+    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
+
+non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                              0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                              0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                              0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                              0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                              0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                              0x10FFFE, 0x10FFFF}
+
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
+
+# Cache for charsUntil()
+charsUntilRegEx = {}
+
+
+class BufferedStream(object):
+    """Buffering for streams that do not have buffering of their own
+
+    The buffer is implemented as a list of chunks on the assumption that
+    joining many strings will be slow since it is O(n**2)
+    """
+
+    def __init__(self, stream):
+        self.stream = stream
+        self.buffer = []
+        self.position = [-1, 0]  # chunk number, offset
+
+    def tell(self):
+        pos = 0
+        for chunk in self.buffer[:self.position[0]]:
+            pos += len(chunk)
+        pos += self.position[1]
+        return pos
+
+    def seek(self, pos):
+        assert pos <= self._bufferedBytes()
+        offset = pos
+        i = 0
+        while len(self.buffer[i]) < offset:
+            offset -= len(self.buffer[i])
+            i += 1
+        self.position = [i, offset]
+
+    def read(self, bytes):
+        if not self.buffer:
+            return self._readStream(bytes)
+        elif (self.position[0] == len(self.buffer) and
+              self.position[1] == len(self.buffer[-1])):
+            return self._readStream(bytes)
+        else:
+            return self._readFromBuffer(bytes)
+
+    def _bufferedBytes(self):
+        return sum([len(item) for item in self.buffer])
+
+    def _readStream(self, bytes):
+        data = self.stream.read(bytes)
+        self.buffer.append(data)
+        self.position[0] += 1
+        self.position[1] = len(data)
+        return data
+
+    def _readFromBuffer(self, bytes):
+        remainingBytes = bytes
+        rv = []
+        bufferIndex = self.position[0]
+        bufferOffset = self.position[1]
+        while bufferIndex < len(self.buffer) and remainingBytes != 0:
+            assert remainingBytes > 0
+            bufferedData = self.buffer[bufferIndex]
+
+            if remainingBytes <= len(bufferedData) - bufferOffset:
+                bytesToRead = remainingBytes
+                self.position = [bufferIndex, bufferOffset + bytesToRead]
+            else:
+                bytesToRead = len(bufferedData) - bufferOffset
+                self.position = [bufferIndex, len(bufferedData)]
+                bufferIndex += 1
+            rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
+            remainingBytes -= bytesToRead
+
+            bufferOffset = 0
+
+        if remainingBytes:
+            rv.append(self._readStream(remainingBytes))
+
+        return b"".join(rv)
+
+
+def HTMLInputStream(source, **kwargs):
+    # Work around Python bug #20007: read(0) closes the connection.
+    # http://bugs.python.org/issue20007
+    if (isinstance(source, http_client.HTTPResponse) or
+        # Also check for addinfourl wrapping HTTPResponse
+        (isinstance(source, urllib.response.addbase) and
+         isinstance(source.fp, http_client.HTTPResponse))):
+        isUnicode = False
+    elif hasattr(source, "read"):
+        isUnicode = isinstance(source.read(0), text_type)
+    else:
+        isUnicode = isinstance(source, text_type)
+
+    if isUnicode:
+        encodings = [x for x in kwargs if x.endswith("_encoding")]
+        if encodings:
+            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
+
+        return HTMLUnicodeInputStream(source, **kwargs)
+    else:
+        return HTMLBinaryInputStream(source, **kwargs)
+
+
+class HTMLUnicodeInputStream(object):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+
+    """
+
+    _defaultChunkSize = 10240
+
+    def __init__(self, source):
+        """Initialises the HTMLInputStream.
+
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+
+        source can be either a file-object, local filename or a string.
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+
+        """
+
+        if not _utils.supports_lone_surrogates:
+            # Such platforms will have already checked for such
+            # surrogate errors, so no need to do this checking.
+            self.reportCharacterErrors = None
+        elif len("\U0010FFFF") == 1:
+            self.reportCharacterErrors = self.characterErrorsUCS4
+        else:
+            self.reportCharacterErrors = self.characterErrorsUCS2
+
+        # List of where new lines occur
+        self.newLines = [0]
+
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
+        self.dataStream = self.openStream(source)
+
+        self.reset()
+
+    def reset(self):
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+        self.errors = []
+
+        # number of (complete) lines in previous chunks
+        self.prevNumLines = 0
+        # number of columns in the last line of the previous chunk
+        self.prevNumCols = 0
+
+        # Deal with CR LF and surrogates split over chunk boundaries
+        self._bufferedCharacter = None
+
+    def openStream(self, source):
+        """Produces a file object from source.
+
+        source can be either a file object, local filename or a string.
+
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = StringIO(source)
+
+        return stream
+
+    def _position(self, offset):
+        chunk = self.chunk
+        nLines = chunk.count('\n', 0, offset)
+        positionLine = self.prevNumLines + nLines
+        lastLinePos = chunk.rfind('\n', 0, offset)
+        if lastLinePos == -1:
+            positionColumn = self.prevNumCols + offset
+        else:
+            positionColumn = offset - (lastLinePos + 1)
+        return (positionLine, positionColumn)
+
+    def position(self):
+        """Returns (line, col) of the current position in the stream."""
+        line, col = self._position(self.chunkOffset)
+        return (line + 1, col)
+
+    def char(self):
+        """ Read one character from the stream or queue if available. Return
+            EOF when EOF is reached.
+        """
+        # Read a new chunk from the input stream if necessary
+        if self.chunkOffset >= self.chunkSize:
+            if not self.readChunk():
+                return EOF
+
+        chunkOffset = self.chunkOffset
+        char = self.chunk[chunkOffset]
+        self.chunkOffset = chunkOffset + 1
+
+        return char
+
+    def readChunk(self, chunkSize=None):
+        if chunkSize is None:
+            chunkSize = self._defaultChunkSize
+
+        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
+
+        self.chunk = ""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+
+        data = self.dataStream.read(chunkSize)
+
+        # Deal with CR LF and surrogates broken across chunks
+        if self._bufferedCharacter:
+            data = self._bufferedCharacter + data
+            self._bufferedCharacter = None
+        elif not data:
+            # We have no more data, bye-bye stream
+            return False
+
+        if len(data) > 1:
+            lastv = ord(data[-1])
+            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
+                self._bufferedCharacter = data[-1]
+                data = data[:-1]
+
+        if self.reportCharacterErrors:
+            self.reportCharacterErrors(data)
+
+        # Replace invalid characters
+        data = data.replace("\r\n", "\n")
+        data = data.replace("\r", "\n")
+
+        self.chunk = data
+        self.chunkSize = len(data)
+
+        return True
+
+    def characterErrorsUCS4(self, data):
+        for _ in range(len(invalid_unicode_re.findall(data))):
+            self.errors.append("invalid-codepoint")
+
+    def characterErrorsUCS2(self, data):
+        # Someone picked the wrong compile option
+        # You lose
+        skip = False
+        for match in invalid_unicode_re.finditer(data):
+            if skip:
+                continue
+            codepoint = ord(match.group())
+            pos = match.start()
+            # Pretty sure there should be endianness issues here
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
+                # We have a surrogate pair!
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                if char_val in non_bmp_invalid_codepoints:
+                    self.errors.append("invalid-codepoint")
+                skip = True
+            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
+                  pos == len(data) - 1):
+                self.errors.append("invalid-codepoint")
+            else:
+                skip = False
+                self.errors.append("invalid-codepoint")
+
+    def charsUntil(self, characters, opposite=False):
+        """ Returns a string of characters from the stream up to but not
+        including any character in 'characters' or EOF. 'characters' must be
+        a container that supports the 'in' method and iteration over its
+        characters.
+        """
+
+        # Use a cache of regexps to find the required characters
+        try:
+            chars = charsUntilRegEx[(characters, opposite)]
+        except KeyError:
+            if __debug__:
+                for c in characters:
+                    assert(ord(c) < 128)
+            regex = "".join(["\\x%02x" % ord(c) for c in characters])
+            if not opposite:
+                regex = "^%s" % regex
+            chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
+
+        rv = []
+
+        while True:
+            # Find the longest matching prefix
+            m = chars.match(self.chunk, self.chunkOffset)
+            if m is None:
+                # If nothing matched, and it wasn't because we ran out of chunk,
+                # then stop
+                if self.chunkOffset != self.chunkSize:
+                    break
+            else:
+                end = m.end()
+                # If not the whole chunk matched, return everything
+                # up to the part that didn't match
+                if end != self.chunkSize:
+                    rv.append(self.chunk[self.chunkOffset:end])
+                    self.chunkOffset = end
+                    break
+            # If the whole remainder of the chunk matched,
+            # use it all and read the next chunk
+            rv.append(self.chunk[self.chunkOffset:])
+            if not self.readChunk():
+                # Reached EOF
+                break
+
+        r = "".join(rv)
+        return r
+
+    def unget(self, char):
+        # Only one character is allowed to be ungotten at once - it must
+        # be consumed again before any further call to unget
+        if char is not EOF:
+            if self.chunkOffset == 0:
+                # unget is called quite rarely, so it's a good idea to do
+                # more work here if it saves a bit of work in the frequently
+                # called char and charsUntil.
+                # So, just prepend the ungotten character onto the current
+                # chunk:
+                self.chunk = char + self.chunk
+                self.chunkSize += 1
+            else:
+                self.chunkOffset -= 1
+                assert self.chunk[self.chunkOffset] == char
+
+
+class HTMLBinaryInputStream(HTMLUnicodeInputStream):
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+
+    """
+
+    def __init__(self, source, override_encoding=None, transport_encoding=None,
+                 same_origin_parent_encoding=None, likely_encoding=None,
+                 default_encoding="windows-1252", useChardet=True):
+        """Initialises the HTMLInputStream.
+
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+
+        source can be either a file-object, local filename or a string.
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+
+        """
+        # Raw Stream - for unicode objects this will encode to utf-8 and set
+        #              self.charEncoding as appropriate
+        self.rawStream = self.openStream(source)
+
+        HTMLUnicodeInputStream.__init__(self, self.rawStream)
+
+        # Encoding Information
+        # Number of bytes to use when looking for a meta element with
+        # encoding information
+        self.numBytesMeta = 1024
+        # Number of bytes to use when using detecting encoding using chardet
+        self.numBytesChardet = 100
+        # Things from args
+        self.override_encoding = override_encoding
+        self.transport_encoding = transport_encoding
+        self.same_origin_parent_encoding = same_origin_parent_encoding
+        self.likely_encoding = likely_encoding
+        self.default_encoding = default_encoding
+
+        # Determine encoding
+        self.charEncoding = self.determineEncoding(useChardet)
+        assert self.charEncoding[0] is not None
+
+        # Call superclass
+        self.reset()
+
+    def reset(self):
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
+        HTMLUnicodeInputStream.reset(self)
+
+    def openStream(self, source):
+        """Produces a file object from source.
+
+        source can be either a file object, local filename or a string.
+
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            stream = BytesIO(source)
+
+        try:
+            stream.seek(stream.tell())
+        except Exception:
+            stream = BufferedStream(stream)
+
+        return stream
+
+    def determineEncoding(self, chardet=True):
+        # BOMs take precedence over everything
+        # This will also read past the BOM if present
+        charEncoding = self.detectBOM(), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # If we've been overridden, we've been overridden
+        charEncoding = lookupEncoding(self.override_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Now check the transport layer
+        charEncoding = lookupEncoding(self.transport_encoding), "certain"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Look for meta elements with encoding information
+        charEncoding = self.detectEncodingMeta(), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Parent document encoding
+        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
+        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
+            return charEncoding
+
+        # "likely" encoding
+        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Guess with chardet, if available
+        if chardet:
+            try:
+                from chardet.universaldetector import UniversalDetector
+            except ImportError:
+                pass
+            else:
+                buffers = []
+                detector = UniversalDetector()
+                while not detector.done:
+                    buffer = self.rawStream.read(self.numBytesChardet)
+                    assert isinstance(buffer, bytes)
+                    if not buffer:
+                        break
+                    buffers.append(buffer)
+                    detector.feed(buffer)
+                detector.close()
+                encoding = lookupEncoding(detector.result['encoding'])
+                self.rawStream.seek(0)
+                if encoding is not None:
+                    return encoding, "tentative"
+
+        # Try the default encoding
+        charEncoding = lookupEncoding(self.default_encoding), "tentative"
+        if charEncoding[0] is not None:
+            return charEncoding
+
+        # Fallback to html5lib's default if even that hasn't worked
+        return lookupEncoding("windows-1252"), "tentative"
+
+    def changeEncoding(self, newEncoding):
+        assert self.charEncoding[1] != "certain"
+        newEncoding = lookupEncoding(newEncoding)
+        if newEncoding is None:
+            return
+        if newEncoding.name in ("utf-16be", "utf-16le"):
+            newEncoding = lookupEncoding("utf-8")
+            assert newEncoding is not None
+        elif newEncoding == self.charEncoding[0]:
+            self.charEncoding = (self.charEncoding[0], "certain")
+        else:
+            self.rawStream.seek(0)
+            self.charEncoding = (newEncoding, "certain")
+            self.reset()
+            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+
+    def detectBOM(self):
+        """Attempts to detect at BOM at the start of the stream. If
+        an encoding can be determined from the BOM return the name of the
+        encoding otherwise return None"""
+        bomDict = {
+            codecs.BOM_UTF8: 'utf-8',
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
+        }
+
+        # Go to beginning of file and read in 4 bytes
+        string = self.rawStream.read(4)
+        assert isinstance(string, bytes)
+
+        # Try detecting the BOM using bytes from the string
+        encoding = bomDict.get(string[:3])         # UTF-8
+        seek = 3
+        if not encoding:
+            # Need to detect UTF-32 before UTF-16
+            encoding = bomDict.get(string)         # UTF-32
+            seek = 4
+            if not encoding:
+                encoding = bomDict.get(string[:2])  # UTF-16
+                seek = 2
+
+        # Set the read position past the BOM if one was found, otherwise
+        # set it to the start of the stream
+        if encoding:
+            self.rawStream.seek(seek)
+            return lookupEncoding(encoding)
+        else:
+            self.rawStream.seek(0)
+            return None
+
+    def detectEncodingMeta(self):
+        """Report the encoding declared by the meta element
+        """
+        buffer = self.rawStream.read(self.numBytesMeta)
+        assert isinstance(buffer, bytes)
+        parser = EncodingParser(buffer)
+        self.rawStream.seek(0)
+        encoding = parser.getEncoding()
+
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
+            encoding = lookupEncoding("utf-8")
+
+        return encoding
+
+
+class EncodingBytes(bytes):
+    """String-like object with an associated position and various extra methods
+    If the position is ever greater than the string length then an exception is
+    raised"""
+    def __new__(self, value):
+        assert isinstance(value, bytes)
+        return bytes.__new__(self, value.lower())
+
+    def __init__(self, value):
+        # pylint:disable=unused-argument
+        self._position = -1
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        p = self._position = self._position + 1
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        return self[p:p + 1]
+
+    def next(self):
+        # Py2 compat
+        return self.__next__()
+
+    def previous(self):
+        p = self._position
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        self._position = p = p - 1
+        return self[p:p + 1]
+
+    def setPosition(self, position):
+        if self._position >= len(self):
+            raise StopIteration
+        self._position = position
+
+    def getPosition(self):
+        if self._position >= len(self):
+            raise StopIteration
+        if self._position >= 0:
+            return self._position
+        else:
+            return None
+
+    position = property(getPosition, setPosition)
+
+    def getCurrentByte(self):
+        return self[self.position:self.position + 1]
+
+    currentByte = property(getCurrentByte)
+
+    def skip(self, chars=spaceCharactersBytes):
+        """Skip past a list of characters"""
+        p = self.position               # use property for the error-checking
+        while p < len(self):
+            c = self[p:p + 1]
+            if c not in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def skipUntil(self, chars):
+        p = self.position
+        while p < len(self):
+            c = self[p:p + 1]
+            if c in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def matchBytes(self, bytes):
+        """Look for a sequence of bytes at the start of a string. If the bytes
+        are found return True and advance the position to the byte after the
+        match. Otherwise return False and leave the position alone"""
+        rv = self.startswith(bytes, self.position)
+        if rv:
+            self.position += len(bytes)
+        return rv
+
+    def jumpTo(self, bytes):
+        """Look for the next sequence of bytes matching a given sequence. If
+        a match is found advance the position to the last byte of the match"""
+        try:
+            self._position = self.index(bytes, self.position) + len(bytes) - 1
+        except ValueError:
+            raise StopIteration
+        return True
+
+
+class EncodingParser(object):
+    """Mini parser for detecting character encoding from meta elements"""
+
+    def __init__(self, data):
+        """string - the data to work on for encoding detection"""
+        self.data = EncodingBytes(data)
+        self.encoding = None
+
+    def getEncoding(self):
+        if b"<meta" not in self.data:
+            return None
+
+        methodDispatch = (
+            (b"<!--", self.handleComment),
+            (b"<meta", self.handleMeta),
+            (b"</", self.handlePossibleEndTag),
+            (b"<!", self.handleOther),
+            (b"<?", self.handleOther),
+            (b"<", self.handlePossibleStartTag))
+        for _ in self.data:
+            keepParsing = True
+            try:
+                self.data.jumpTo(b"<")
+            except StopIteration:
+                break
+            for key, method in methodDispatch:
+                if self.data.matchBytes(key):
+                    try:
+                        keepParsing = method()
+                        break
+                    except StopIteration:
+                        keepParsing = False
+                        break
+            if not keepParsing:
+                break
+
+        return self.encoding
+
+    def handleComment(self):
+        """Skip over comments"""
+        return self.data.jumpTo(b"-->")
+
+    def handleMeta(self):
+        if self.data.currentByte not in spaceCharactersBytes:
+            # if we have <meta not followed by a space so just keep going
+            return True
+        # We have a valid meta element we want to search for attributes
+        hasPragma = False
+        pendingEncoding = None
+        while True:
+            # Try to find the next attribute after the current position
+            attr = self.getAttribute()
+            if attr is None:
+                return True
+            else:
+                if attr[0] == b"http-equiv":
+                    hasPragma = attr[1] == b"content-type"
+                    if hasPragma and pendingEncoding is not None:
+                        self.encoding = pendingEncoding
+                        return False
+                elif attr[0] == b"charset":
+                    tentativeEncoding = attr[1]
+                    codec = lookupEncoding(tentativeEncoding)
+                    if codec is not None:
+                        self.encoding = codec
+                        return False
+                elif attr[0] == b"content":
+                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
+                    tentativeEncoding = contentParser.parse()
+                    if tentativeEncoding is not None:
+                        codec = lookupEncoding(tentativeEncoding)
+                        if codec is not None:
+                            if hasPragma:
+                                self.encoding = codec
+                                return False
+                            else:
+                                pendingEncoding = codec
+
+    def handlePossibleStartTag(self):
+        return self.handlePossibleTag(False)
+
+    def handlePossibleEndTag(self):
+        next(self.data)
+        return self.handlePossibleTag(True)
+
+    def handlePossibleTag(self, endTag):
+        data = self.data
+        if data.currentByte not in asciiLettersBytes:
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handleOther
+            if endTag:
+                data.previous()
+                self.handleOther()
+            return True
+
+        c = data.skipUntil(spacesAngleBrackets)
+        if c == b"<":
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
+            data.previous()
+        else:
+            # Read all attributes
+            attr = self.getAttribute()
+            while attr is not None:
+                attr = self.getAttribute()
+        return True
+
+    def handleOther(self):
+        return self.data.jumpTo(b">")
+
+    def getAttribute(self):
+        """Return a name,value pair for the next attribute in the stream,
+        if one is found, or None"""
+        data = self.data
+        # Step 1 (skip chars)
+        c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
+        assert c is None or len(c) == 1
+        # Step 2
+        if c in (b">", None):
+            return None
+        # Step 3
+        attrName = []
+        attrValue = []
+        # Step 4 attribute name
+        while True:
+            if c == b"=" and attrName:
+                break
+            elif c in spaceCharactersBytes:
+                # Step 6!
+                c = data.skip()
+                break
+            elif c in (b"/", b">"):
+                return b"".join(attrName), b""
+            elif c in asciiUppercaseBytes:
+                attrName.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrName.append(c)
+            # Step 5
+            c = next(data)
+        # Step 7
+        if c != b"=":
+            data.previous()
+            return b"".join(attrName), b""
+        # Step 8
+        next(data)
+        # Step 9
+        c = data.skip()
+        # Step 10
+        if c in (b"'", b'"'):
+            # 10.1
+            quoteChar = c
+            while True:
+                # 10.2
+                c = next(data)
+                # 10.3
+                if c == quoteChar:
+                    next(data)
+                    return b"".join(attrName), b"".join(attrValue)
+                # 10.4
+                elif c in asciiUppercaseBytes:
+                    attrValue.append(c.lower())
+                # 10.5
+                else:
+                    attrValue.append(c)
+        elif c == b">":
+            return b"".join(attrName), b""
+        elif c in asciiUppercaseBytes:
+            attrValue.append(c.lower())
+        elif c is None:
+            return None
+        else:
+            attrValue.append(c)
+        # Step 11
+        while True:
+            c = next(data)
+            if c in spacesAngleBrackets:
+                return b"".join(attrName), b"".join(attrValue)
+            elif c in asciiUppercaseBytes:
+                attrValue.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrValue.append(c)
+
+
+class ContentAttrParser(object):
+    def __init__(self, data):
+        assert isinstance(data, bytes)
+        self.data = data
+
+    def parse(self):
+        try:
+            # Check if the attr name is charset
+            # otherwise return
+            self.data.jumpTo(b"charset")
+            self.data.position += 1
+            self.data.skip()
+            if not self.data.currentByte == b"=":
+                # If there is no = sign keep looking for attrs
+                return None
+            self.data.position += 1
+            self.data.skip()
+            # Look for an encoding between matching quote marks
+            if self.data.currentByte in (b'"', b"'"):
+                quoteMark = self.data.currentByte
+                self.data.position += 1
+                oldPosition = self.data.position
+                if self.data.jumpTo(quoteMark):
+                    return self.data[oldPosition:self.data.position]
+                else:
+                    return None
+            else:
+                # Unquoted value
+                oldPosition = self.data.position
+                try:
+                    self.data.skipUntil(spaceCharactersBytes)
+                    return self.data[oldPosition:self.data.position]
+                except StopIteration:
+                    # Return the whole remaining value
+                    return self.data[oldPosition:]
+        except StopIteration:
+            return None
+
+
+def lookupEncoding(encoding):
+    """Return the python codec name corresponding to an encoding or None if the
+    string doesn't correspond to a valid encoding."""
+    if isinstance(encoding, bytes):
+        try:
+            encoding = encoding.decode("ascii")
+        except UnicodeDecodeError:
+            return None
+
+    if encoding is not None:
+        try:
+            return webencodings.lookup(encoding)
+        except AttributeError:
+            return None
+    else:
+        return None
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_tokenizer.py b/samples-and-tests/i-am-a-developer/html5lib/_tokenizer.py
new file mode 100644
index 0000000000..4748a19795
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_tokenizer.py
@@ -0,0 +1,1735 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from six import unichr as chr
+
+from collections import deque, OrderedDict
+from sys import version_info
+
+from .constants import spaceCharacters
+from .constants import entities
+from .constants import asciiLetters, asciiUpper2Lower
+from .constants import digits, hexDigits, EOF
+from .constants import tokenTypes, tagTokenTypes
+from .constants import replacementCharacters
+
+from ._inputstream import HTMLInputStream
+
+from ._trie import Trie
+
+entitiesTrie = Trie(entities)
+
+if version_info >= (3, 7):
+    attributeMap = dict
+else:
+    attributeMap = OrderedDict
+
+
+class HTMLTokenizer(object):
+    """ This class takes care of tokenizing HTML.
+
+    * self.currentToken
+      Holds the token that is currently being processed.
+
+    * self.state
+      Holds a reference to the method to be invoked... XXX
+
+    * self.stream
+      Points to HTMLInputStream object.
+    """
+
+    def __init__(self, stream, parser=None, **kwargs):
+
+        self.stream = HTMLInputStream(stream, **kwargs)
+        self.parser = parser
+
+        # Setup the initial tokenizer state
+        self.escapeFlag = False
+        self.lastFourChars = []
+        self.state = self.dataState
+        self.escape = False
+
+        # The current token being created
+        self.currentToken = None
+        super(HTMLTokenizer, self).__init__()
+
+    def __iter__(self):
+        """ This is where the magic happens.
+
+        We do our usually processing through the states and when we have a token
+        to return we yield the token which pauses processing until the next token
+        is requested.
+        """
+        self.tokenQueue = deque([])
+        # Start processing. When EOF is reached self.state will return False
+        # instead of True and the loop will terminate.
+        while self.state():
+            while self.stream.errors:
+                yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
+            while self.tokenQueue:
+                yield self.tokenQueue.popleft()
+
+    def consumeNumberEntity(self, isHex):
+        """This function returns either U+FFFD or the character based on the
+        decimal or hexadecimal representation. It also discards ";" if present.
+        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
+        """
+
+        allowed = digits
+        radix = 10
+        if isHex:
+            allowed = hexDigits
+            radix = 16
+
+        charStack = []
+
+        # Consume all the characters that are in range while making sure we
+        # don't hit an EOF.
+        c = self.stream.char()
+        while c in allowed and c is not EOF:
+            charStack.append(c)
+            c = self.stream.char()
+
+        # Convert the set of characters consumed to an int.
+        charAsInt = int("".join(charStack), radix)
+
+        # Certain characters get replaced with others
+        if charAsInt in replacementCharacters:
+            char = replacementCharacters[charAsInt]
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
+        elif ((0xD800 <= charAsInt <= 0xDFFF) or
+              (charAsInt > 0x10FFFF)):
+            char = "\uFFFD"
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "illegal-codepoint-for-numeric-entity",
+                                    "datavars": {"charAsInt": charAsInt}})
+        else:
+            # Should speed up this check somehow (e.g. move the set to a constant)
+            if ((0x0001 <= charAsInt <= 0x0008) or
+                (0x000E <= charAsInt <= 0x001F) or
+                (0x007F <= charAsInt <= 0x009F) or
+                (0xFDD0 <= charAsInt <= 0xFDEF) or
+                charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE,
+                                        0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                                        0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE,
+                                        0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
+                                        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
+                                        0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE,
+                                        0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                                        0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE,
+                                        0xFFFFF, 0x10FFFE, 0x10FFFF])):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data":
+                                        "illegal-codepoint-for-numeric-entity",
+                                        "datavars": {"charAsInt": charAsInt}})
+            try:
+                # Try/except needed as UCS-2 Python builds' unichar only works
+                # within the BMP.
+                char = chr(charAsInt)
+            except ValueError:
+                v = charAsInt - 0x10000
+                char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF))
+
+        # Discard the ; if present. Otherwise, put it back on the queue and
+        # invoke parseError on parser.
+        if c != ";":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "numeric-entity-without-semicolon"})
+            self.stream.unget(c)
+
+        return char
+
+    def consumeEntity(self, allowedChar=None, fromAttribute=False):
+        # Initialise to the default output for when no entity is matched
+        output = "&"
+
+        charStack = [self.stream.char()]
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
+                (allowedChar is not None and allowedChar == charStack[0])):
+            self.stream.unget(charStack[0])
+
+        elif charStack[0] == "#":
+            # Read the next character to see if it's hex or decimal
+            hex = False
+            charStack.append(self.stream.char())
+            if charStack[-1] in ("x", "X"):
+                hex = True
+                charStack.append(self.stream.char())
+
+            # charStack[-1] should be the first digit
+            if (hex and charStack[-1] in hexDigits) \
+                    or (not hex and charStack[-1] in digits):
+                # At least one digit found, so consume the whole number
+                self.stream.unget(charStack[-1])
+                output = self.consumeNumberEntity(hex)
+            else:
+                # No digits found
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data": "expected-numeric-entity"})
+                self.stream.unget(charStack.pop())
+                output = "&" + "".join(charStack)
+
+        else:
+            # At this point in the process might have named entity. Entities
+            # are stored in the global variable "entities".
+            #
+            # Consume characters and compare to these to a substring of the
+            # entity names in the list until the substring no longer matches.
+            while (charStack[-1] is not EOF):
+                if not entitiesTrie.has_keys_with_prefix("".join(charStack)):
+                    break
+                charStack.append(self.stream.char())
+
+            # At this point we have a string that starts with some characters
+            # that may match an entity
+            # Try to find the longest entity the string will match to take care
+            # of &noti for instance.
+            try:
+                entityName = entitiesTrie.longest_prefix("".join(charStack[:-1]))
+                entityLength = len(entityName)
+            except KeyError:
+                entityName = None
+
+            if entityName is not None:
+                if entityName[-1] != ";":
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                            "named-entity-without-semicolon"})
+                if (entityName[-1] != ";" and fromAttribute and
+                    (charStack[entityLength] in asciiLetters or
+                     charStack[entityLength] in digits or
+                     charStack[entityLength] == "=")):
+                    self.stream.unget(charStack.pop())
+                    output = "&" + "".join(charStack)
+                else:
+                    output = entities[entityName]
+                    self.stream.unget(charStack.pop())
+                    output += "".join(charStack[entityLength:])
+            else:
+                self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                        "expected-named-entity"})
+                self.stream.unget(charStack.pop())
+                output = "&" + "".join(charStack)
+
+        if fromAttribute:
+            self.currentToken["data"][-1][1] += output
+        else:
+            if output in spaceCharacters:
+                tokenType = "SpaceCharacters"
+            else:
+                tokenType = "Characters"
+            self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
+
+    def processEntityInAttribute(self, allowedChar):
+        """This method replaces the need for "entityInAttributeValueState".
+        """
+        self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
+
+    def emitCurrentToken(self):
+        """This method is a generic handler for emitting the tags. It also sets
+        the state to "data" because that's what's needed after a token has been
+        emitted.
+        """
+        token = self.currentToken
+        # Add token to the queue to be yielded
+        if (token["type"] in tagTokenTypes):
+            token["name"] = token["name"].translate(asciiUpper2Lower)
+            if token["type"] == tokenTypes["StartTag"]:
+                raw = token["data"]
+                data = attributeMap(raw)
+                if len(raw) > len(data):
+                    # we had some duplicated attribute, fix so first wins
+                    data.update(raw[::-1])
+                token["data"] = data
+
+            if token["type"] == tokenTypes["EndTag"]:
+                if token["data"]:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "attributes-in-end-tag"})
+                if token["selfClosing"]:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                            "data": "self-closing-flag-on-end-tag"})
+        self.tokenQueue.append(token)
+        self.state = self.dataState
+
+    # Below are the various tokenizer states worked out.
+    def dataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.entityDataState
+        elif data == "<":
+            self.state = self.tagOpenState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\u0000"})
+        elif data is EOF:
+            # Tokenization ends.
+            return False
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil(("&", "<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+
+    def entityDataState(self):
+        self.consumeEntity()
+        self.state = self.dataState
+        return True
+
+    def rcdataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.characterReferenceInRcdata
+        elif data == "<":
+            self.state = self.rcdataLessThanSignState
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+                                    data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil(("&", "<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+
+    def characterReferenceInRcdata(self):
+        self.consumeEntity()
+        self.state = self.rcdataState
+        return True
+
+    def rawtextState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.rawtextLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil(("<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+
+    def scriptDataState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.scriptDataLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil(("<", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+
+    def plaintextState(self):
+        data = self.stream.char()
+        if data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + self.stream.charsUntil("\u0000")})
+        return True
+
+    def tagOpenState(self):
+        data = self.stream.char()
+        if data == "!":
+            self.state = self.markupDeclarationOpenState
+        elif data == "/":
+            self.state = self.closeTagOpenState
+        elif data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["StartTag"],
+                                 "name": data, "data": [],
+                                 "selfClosing": False,
+                                 "selfClosingAcknowledged": False}
+            self.state = self.tagNameState
+        elif data == ">":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name-but-got-right-bracket"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"})
+            self.state = self.dataState
+        elif data == "?":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name-but-got-question-mark"})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-tag-name"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        return True
+
+    def closeTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
+                                 "data": [], "selfClosing": False}
+            self.state = self.tagNameState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-right-bracket"})
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-eof"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.state = self.dataState
+        else:
+            # XXX data can be _'_...
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-closing-tag-but-got-char",
+                                    "datavars": {"data": data}})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        return True
+
+    def tagNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-tag-name"})
+            self.state = self.dataState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += "\uFFFD"
+        else:
+            self.currentToken["name"] += data
+            # (Don't use charsUntil here, because tag names are
+            # very short and it's faster to not do anything fancy)
+        return True
+
+    def rcdataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rcdataEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+
+    def rcdataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rcdataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+
+    def rcdataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+
+    def rawtextLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rawtextEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+
+    def rawtextEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rawtextEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+
+    def rawtextEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+
+    def scriptDataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEndTagOpenState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"})
+            self.state = self.scriptDataEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+
+    def scriptDataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.scriptDataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+
+    def scriptDataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+
+    def scriptDataEscapeStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapeStartDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+
+    def scriptDataEscapeStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+
+    def scriptDataEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            chars = self.stream.charsUntil(("<", "-", "\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data":
+                                    data + chars})
+        return True
+
+    def scriptDataEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataEscapedDashDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataEscapedDashDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
+            self.state = self.scriptDataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEscapedEndTagOpenState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data})
+            self.temporaryBuffer = data
+            self.state = self.scriptDataDoubleEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataEscapedEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer = data
+            self.state = self.scriptDataEscapedEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataEscapedEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing": False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataDoubleEscapeStartState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataDoubleEscapedState
+            else:
+                self.state = self.scriptDataEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+
+    def scriptDataDoubleEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataDoubleEscapedDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+        return True
+
+    def scriptDataDoubleEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+            self.state = self.scriptDataDoubleEscapedDashDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+
+    def scriptDataDoubleEscapedDashDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"})
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"})
+            self.state = self.scriptDataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": "\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+
+    def scriptDataDoubleEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"})
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataDoubleEscapeEndState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+
+    def scriptDataDoubleEscapeEndState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataEscapedState
+            else:
+                self.state = self.scriptDataDoubleEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+
+    def beforeAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data in ("'", '"', "=", "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "invalid-character-in-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append(["\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-name-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+
+    def attributeNameState(self):
+        data = self.stream.char()
+        leavingThisState = True
+        emitToken = False
+        if data == "=":
+            self.state = self.beforeAttributeValueState
+        elif data in asciiLetters:
+            self.currentToken["data"][-1][0] += data +\
+                self.stream.charsUntil(asciiLetters, True)
+            leavingThisState = False
+        elif data == ">":
+            # XXX If we emit here the attributes are converted to a dict
+            # without being checked and when the code below runs we error
+            # because data is a dict not a list
+            emitToken = True
+        elif data in spaceCharacters:
+            self.state = self.afterAttributeNameState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][0] += "\uFFFD"
+            leavingThisState = False
+        elif data in ("'", '"', "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data":
+                                    "invalid-character-in-attribute-name"})
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "eof-in-attribute-name"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+
+        if leavingThisState:
+            # Attributes are not dropped at this stage. That happens when the
+            # start tag token is emitted so values can still be safely appended
+            # to attributes, but we do want to report the parse error in time.
+            self.currentToken["data"][-1][0] = (
+                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, _ in self.currentToken["data"][:-1]:
+                if self.currentToken["data"][-1][0] == name:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                            "duplicate-attribute"})
+                    break
+            # XXX Fix for above XXX
+            if emitToken:
+                self.emitCurrentToken()
+        return True
+
+    def afterAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == "=":
+            self.state = self.beforeAttributeValueState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append(["\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data in ("'", '"', "<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "invalid-character-after-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-end-of-tag-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+
+    def beforeAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == "\"":
+            self.state = self.attributeValueDoubleQuotedState
+        elif data == "&":
+            self.state = self.attributeValueUnQuotedState
+            self.stream.unget(data)
+        elif data == "'":
+            self.state = self.attributeValueSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-value-but-got-right-bracket"})
+            self.emitCurrentToken()
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+            self.state = self.attributeValueUnQuotedState
+        elif data in ("=", "<", "`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "equals-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-attribute-value-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        return True
+
+    def attributeValueDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterAttributeValueState
+        elif data == "&":
+            self.processEntityInAttribute('"')
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-double-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+                self.stream.charsUntil(("\"", "&", "\u0000"))
+        return True
+
+    def attributeValueSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterAttributeValueState
+        elif data == "&":
+            self.processEntityInAttribute("'")
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-single-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+                self.stream.charsUntil(("'", "&", "\u0000"))
+        return True
+
+    def attributeValueUnQuotedState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == "&":
+            self.processEntityInAttribute(">")
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data in ('"', "'", "=", "<", "`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-attribute-value-no-quotes"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
+                frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters)
+        return True
+
+    def afterAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == ">":
+            self.emitCurrentToken()
+        elif data == "/":
+            self.state = self.selfClosingStartTagState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-EOF-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+
+    def selfClosingStartTagState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.currentToken["selfClosing"] = True
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data":
+                                    "unexpected-EOF-after-solidus-in-tag"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-character-after-solidus-in-tag"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+
+    def bogusCommentState(self):
+        # Make a new comment token and give it as value all the characters
+        # until the first > or EOF (charsUntil checks for EOF automatically)
+        # and emit it.
+        data = self.stream.charsUntil(">")
+        data = data.replace("\u0000", "\uFFFD")
+        self.tokenQueue.append(
+            {"type": tokenTypes["Comment"], "data": data})
+
+        # Eat the character directly after the bogus comment which is either a
+        # ">" or an EOF.
+        self.stream.char()
+        self.state = self.dataState
+        return True
+
+    def markupDeclarationOpenState(self):
+        charStack = [self.stream.char()]
+        if charStack[-1] == "-":
+            charStack.append(self.stream.char())
+            if charStack[-1] == "-":
+                self.currentToken = {"type": tokenTypes["Comment"], "data": ""}
+                self.state = self.commentStartState
+                return True
+        elif charStack[-1] in ('d', 'D'):
+            matched = True
+            for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'),
+                             ('y', 'Y'), ('p', 'P'), ('e', 'E')):
+                charStack.append(self.stream.char())
+                if charStack[-1] not in expected:
+                    matched = False
+                    break
+            if matched:
+                self.currentToken = {"type": tokenTypes["Doctype"],
+                                     "name": "",
+                                     "publicId": None, "systemId": None,
+                                     "correct": True}
+                self.state = self.doctypeState
+                return True
+        elif (charStack[-1] == "[" and
+              self.parser is not None and
+              self.parser.tree.openElements and
+              self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
+            matched = True
+            for expected in ["C", "D", "A", "T", "A", "["]:
+                charStack.append(self.stream.char())
+                if charStack[-1] != expected:
+                    matched = False
+                    break
+            if matched:
+                self.state = self.cdataSectionState
+                return True
+
+        self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                "expected-dashes-or-doctype"})
+
+        while charStack:
+            self.stream.unget(charStack.pop())
+        self.state = self.bogusCommentState
+        return True
+
+    def commentStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentStartDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data
+            self.state = self.commentState
+        return True
+
+    def commentStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "-\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "-" + data
+            self.state = self.commentState
+        return True
+
+    def commentState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data + \
+                self.stream.charsUntil(("-", "\u0000"))
+        return True
+
+    def commentEndDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "-\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-end-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "-" + data
+            self.state = self.commentState
+        return True
+
+    def commentEndState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "--\uFFFD"
+            self.state = self.commentState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-bang-after-double-dash-in-comment"})
+            self.state = self.commentEndBangState
+        elif data == "-":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-dash-after-double-dash-in-comment"})
+            self.currentToken["data"] += data
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-double-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-comment"})
+            self.currentToken["data"] += "--" + data
+            self.state = self.commentState
+        return True
+
+    def commentEndBangState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "-":
+            self.currentToken["data"] += "--!"
+            self.state = self.commentEndDashState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += "--!\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-comment-end-bang-state"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "--!" + data
+            self.state = self.commentState
+        return True
+
+    def doctypeState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "need-space-after-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeNameState
+        return True
+
+    def beforeDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-right-bracket"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] = "\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] = data
+            self.state = self.doctypeNameState
+        return True
+
+    def doctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.state = self.afterDoctypeNameState
+        elif data == ">":
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += "\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype-name"})
+            self.currentToken["correct"] = False
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] += data
+        return True
+
+    def afterDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.currentToken["correct"] = False
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            if data in ("p", "P"):
+                matched = True
+                for expected in (("u", "U"), ("b", "B"), ("l", "L"),
+                                 ("i", "I"), ("c", "C")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypePublicKeywordState
+                    return True
+            elif data in ("s", "S"):
+                matched = True
+                for expected in (("y", "Y"), ("s", "S"), ("t", "T"),
+                                 ("e", "E"), ("m", "M")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypeSystemKeywordState
+                    return True
+
+            # All the characters read before the current 'data' will be
+            # [a-zA-Z], so they're garbage in the bogus doctype and can be
+            # discarded; only the latest character might be '>' or EOF
+            # and needs to be ungetted
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "expected-space-or-right-bracket-in-doctype", "datavars":
+                                    {"data": data}})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+
+        return True
+
+    def afterDoctypePublicKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        return True
+
+    def beforeDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["publicId"] = ""
+            self.state = self.doctypePublicIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["publicId"] = ""
+            self.state = self.doctypePublicIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def doctypePublicIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+
+    def doctypePublicIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+
+    def afterDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.betweenDoctypePublicAndSystemIdentifiersState
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def betweenDoctypePublicAndSystemIdentifiersState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def afterDoctypeSystemKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        return True
+
+    def beforeDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = ""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def doctypeSystemIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+
+    def doctypeSystemIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == "\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += "\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+
+    def afterDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                                    "unexpected-char-in-doctype"})
+            self.state = self.bogusDoctypeState
+        return True
+
+    def bogusDoctypeState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            # XXX EMIT
+            self.stream.unget(data)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            pass
+        return True
+
+    def cdataSectionState(self):
+        data = []
+        while True:
+            data.append(self.stream.charsUntil("]"))
+            data.append(self.stream.charsUntil(">"))
+            char = self.stream.char()
+            if char == EOF:
+                break
+            else:
+                assert char == ">"
+                if data[-1][-2:] == "]]":
+                    data[-1] = data[-1][:-2]
+                    break
+                else:
+                    data.append(char)
+
+        data = "".join(data)  # pylint:disable=redefined-variable-type
+        # Deal with null here rather than in the parser
+        nullCount = data.count("\u0000")
+        if nullCount > 0:
+            for _ in range(nullCount):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                                        "data": "invalid-codepoint"})
+            data = data.replace("\u0000", "\uFFFD")
+        if data:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": data})
+        self.state = self.dataState
+        return True
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_trie/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/_trie/__init__.py
new file mode 100644
index 0000000000..07bad5d31c
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_trie/__init__.py
@@ -0,0 +1,5 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from .py import Trie
+
+__all__ = ["Trie"]
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_trie/_base.py b/samples-and-tests/i-am-a-developer/html5lib/_trie/_base.py
new file mode 100644
index 0000000000..6b71975f08
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_trie/_base.py
@@ -0,0 +1,40 @@
+from __future__ import absolute_import, division, unicode_literals
+
+try:
+    from collections.abc import Mapping
+except ImportError:  # Python 2.7
+    from collections import Mapping
+
+
+class Trie(Mapping):
+    """Abstract base class for tries"""
+
+    def keys(self, prefix=None):
+        # pylint:disable=arguments-differ
+        keys = super(Trie, self).keys()
+
+        if prefix is None:
+            return set(keys)
+
+        return {x for x in keys if x.startswith(prefix)}
+
+    def has_keys_with_prefix(self, prefix):
+        for key in self.keys():
+            if key.startswith(prefix):
+                return True
+
+        return False
+
+    def longest_prefix(self, prefix):
+        if prefix in self:
+            return prefix
+
+        for i in range(1, len(prefix) + 1):
+            if prefix[:-i] in self:
+                return prefix[:-i]
+
+        raise KeyError(prefix)
+
+    def longest_prefix_item(self, prefix):
+        lprefix = self.longest_prefix(prefix)
+        return (lprefix, self[lprefix])
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_trie/py.py b/samples-and-tests/i-am-a-developer/html5lib/_trie/py.py
new file mode 100644
index 0000000000..c2ba3da757
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_trie/py.py
@@ -0,0 +1,67 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+from bisect import bisect_left
+
+from ._base import Trie as ABCTrie
+
+
+class Trie(ABCTrie):
+    def __init__(self, data):
+        if not all(isinstance(x, text_type) for x in data.keys()):
+            raise TypeError("All keys must be strings")
+
+        self._data = data
+        self._keys = sorted(data.keys())
+        self._cachestr = ""
+        self._cachepoints = (0, len(data))
+
+    def __contains__(self, key):
+        return key in self._data
+
+    def __len__(self):
+        return len(self._data)
+
+    def __iter__(self):
+        return iter(self._data)
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def keys(self, prefix=None):
+        if prefix is None or prefix == "" or not self._keys:
+            return set(self._keys)
+
+        if prefix.startswith(self._cachestr):
+            lo, hi = self._cachepoints
+            start = i = bisect_left(self._keys, prefix, lo, hi)
+        else:
+            start = i = bisect_left(self._keys, prefix)
+
+        keys = set()
+        if start == len(self._keys):
+            return keys
+
+        while self._keys[i].startswith(prefix):
+            keys.add(self._keys[i])
+            i += 1
+
+        self._cachestr = prefix
+        self._cachepoints = (start, i)
+
+        return keys
+
+    def has_keys_with_prefix(self, prefix):
+        if prefix in self._data:
+            return True
+
+        if prefix.startswith(self._cachestr):
+            lo, hi = self._cachepoints
+            i = bisect_left(self._keys, prefix, lo, hi)
+        else:
+            i = bisect_left(self._keys, prefix)
+
+        if i == len(self._keys):
+            return False
+
+        return self._keys[i].startswith(prefix)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/_utils.py b/samples-and-tests/i-am-a-developer/html5lib/_utils.py
new file mode 100644
index 0000000000..9ea5794214
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/_utils.py
@@ -0,0 +1,159 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from types import ModuleType
+
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping
+
+from six import text_type, PY3
+
+if PY3:
+    import xml.etree.ElementTree as default_etree
+else:
+    try:
+        import xml.etree.cElementTree as default_etree
+    except ImportError:
+        import xml.etree.ElementTree as default_etree
+
+
+__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
+           "surrogatePairToCodepoint", "moduleFactoryFactory",
+           "supports_lone_surrogates"]
+
+
+# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
+# caught by the below test. In general this would be any platform
+# using UTF-16 as its encoding of unicode strings, such as
+# Jython. This is because UTF-16 itself is based on the use of such
+# surrogates, and there is no mechanism to further escape such
+# escapes.
+try:
+    _x = eval('"\\uD800"')  # pylint:disable=eval-used
+    if not isinstance(_x, text_type):
+        # We need this with u"" because of http://bugs.jython.org/issue2039
+        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
+        assert isinstance(_x, text_type)
+except Exception:
+    supports_lone_surrogates = False
+else:
+    supports_lone_surrogates = True
+
+
+class MethodDispatcher(dict):
+    """Dict with 2 special properties:
+
+    On initiation, keys that are lists, sets or tuples are converted to
+    multiple keys so accessing any one of the items in the original
+    list-like object returns the matching value
+
+    md = MethodDispatcher({("foo", "bar"):"baz"})
+    md["foo"] == "baz"
+
+    A default value which can be set through the default attribute.
+    """
+
+    def __init__(self, items=()):
+        _dictEntries = []
+        for name, value in items:
+            if isinstance(name, (list, tuple, frozenset, set)):
+                for item in name:
+                    _dictEntries.append((item, value))
+            else:
+                _dictEntries.append((name, value))
+        dict.__init__(self, _dictEntries)
+        assert len(self) == len(_dictEntries)
+        self.default = None
+
+    def __getitem__(self, key):
+        return dict.get(self, key, self.default)
+
+    def __get__(self, instance, owner=None):
+        return BoundMethodDispatcher(instance, self)
+
+
+class BoundMethodDispatcher(Mapping):
+    """Wraps a MethodDispatcher, binding its return values to `instance`"""
+    def __init__(self, instance, dispatcher):
+        self.instance = instance
+        self.dispatcher = dispatcher
+
+    def __getitem__(self, key):
+        # see https://docs.python.org/3/reference/datamodel.html#object.__get__
+        # on a function, __get__ is used to bind a function to an instance as a bound method
+        return self.dispatcher[key].__get__(self.instance)
+
+    def get(self, key, default):
+        if key in self.dispatcher:
+            return self[key]
+        else:
+            return default
+
+    def __iter__(self):
+        return iter(self.dispatcher)
+
+    def __len__(self):
+        return len(self.dispatcher)
+
+    def __contains__(self, key):
+        return key in self.dispatcher
+
+
+# Some utility functions to deal with weirdness around UCS2 vs UCS4
+# python builds
+
+def isSurrogatePair(data):
+    return (len(data) == 2 and
+            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
+            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
+
+
+def surrogatePairToCodepoint(data):
+    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
+                (ord(data[1]) - 0xDC00))
+    return char_val
+
+# Module Factory Factory (no, this isn't Java, I know)
+# Here to stop this being duplicated all over the place.
+
+
+def moduleFactoryFactory(factory):
+    moduleCache = {}
+
+    def moduleFactory(baseModule, *args, **kwargs):
+        if isinstance(ModuleType.__name__, type("")):
+            name = "_%s_factory" % baseModule.__name__
+        else:
+            name = b"_%s_factory" % baseModule.__name__
+
+        kwargs_tuple = tuple(kwargs.items())
+
+        try:
+            return moduleCache[name][args][kwargs_tuple]
+        except KeyError:
+            mod = ModuleType(name)
+            objs = factory(baseModule, *args, **kwargs)
+            mod.__dict__.update(objs)
+            if "name" not in moduleCache:
+                moduleCache[name] = {}
+            if "args" not in moduleCache[name]:
+                moduleCache[name][args] = {}
+            if "kwargs" not in moduleCache[name][args]:
+                moduleCache[name][args][kwargs_tuple] = {}
+            moduleCache[name][args][kwargs_tuple] = mod
+            return mod
+
+    return moduleFactory
+
+
+def memoize(func):
+    cache = {}
+
+    def wrapped(*args, **kwargs):
+        key = (tuple(args), tuple(kwargs.items()))
+        if key not in cache:
+            cache[key] = func(*args, **kwargs)
+        return cache[key]
+
+    return wrapped
diff --git a/samples-and-tests/i-am-a-developer/html5lib/constants.py b/samples-and-tests/i-am-a-developer/html5lib/constants.py
new file mode 100644
index 0000000000..fe3e237cd8
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/constants.py
@@ -0,0 +1,2946 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import string
+
+EOF = None
+
+E = {
+    "null-character":
+        "Null character in input stream, replaced with U+FFFD.",
+    "invalid-codepoint":
+        "Invalid codepoint in stream.",
+    "incorrectly-placed-solidus":
+        "Solidus (/) incorrectly placed in tag.",
+    "incorrect-cr-newline-entity":
+        "Incorrect CR newline entity, replaced with LF.",
+    "illegal-windows-1252-entity":
+        "Entity used with illegal number (windows-1252 reference).",
+    "cant-convert-numeric-entity":
+        "Numeric entity couldn't be converted to character "
+        "(codepoint U+%(charAsInt)08x).",
+    "illegal-codepoint-for-numeric-entity":
+        "Numeric entity represents an illegal codepoint: "
+        "U+%(charAsInt)08x.",
+    "numeric-entity-without-semicolon":
+        "Numeric entity didn't end with ';'.",
+    "expected-numeric-entity-but-got-eof":
+        "Numeric entity expected. Got end of file instead.",
+    "expected-numeric-entity":
+        "Numeric entity expected but none found.",
+    "named-entity-without-semicolon":
+        "Named entity didn't end with ';'.",
+    "expected-named-entity":
+        "Named entity expected. Got none.",
+    "attributes-in-end-tag":
+        "End tag contains unexpected attributes.",
+    'self-closing-flag-on-end-tag':
+        "End tag contains unexpected self-closing flag.",
+    "expected-tag-name-but-got-right-bracket":
+        "Expected tag name. Got '>' instead.",
+    "expected-tag-name-but-got-question-mark":
+        "Expected tag name. Got '?' instead. (HTML doesn't "
+        "support processing instructions.)",
+    "expected-tag-name":
+        "Expected tag name. Got something else instead",
+    "expected-closing-tag-but-got-right-bracket":
+        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
+    "expected-closing-tag-but-got-eof":
+        "Expected closing tag. Unexpected end of file.",
+    "expected-closing-tag-but-got-char":
+        "Expected closing tag. Unexpected character '%(data)s' found.",
+    "eof-in-tag-name":
+        "Unexpected end of file in the tag name.",
+    "expected-attribute-name-but-got-eof":
+        "Unexpected end of file. Expected attribute name instead.",
+    "eof-in-attribute-name":
+        "Unexpected end of file in attribute name.",
+    "invalid-character-in-attribute-name":
+        "Invalid character in attribute name",
+    "duplicate-attribute":
+        "Dropped duplicate attribute on tag.",
+    "expected-end-of-tag-name-but-got-eof":
+        "Unexpected end of file. Expected = or end of tag.",
+    "expected-attribute-value-but-got-eof":
+        "Unexpected end of file. Expected attribute value.",
+    "expected-attribute-value-but-got-right-bracket":
+        "Expected attribute value. Got '>' instead.",
+    'equals-in-unquoted-attribute-value':
+        "Unexpected = in unquoted attribute",
+    'unexpected-character-in-unquoted-attribute-value':
+        "Unexpected character in unquoted attribute",
+    "invalid-character-after-attribute-name":
+        "Unexpected character after attribute name.",
+    "unexpected-character-after-attribute-value":
+        "Unexpected character after attribute value.",
+    "eof-in-attribute-value-double-quote":
+        "Unexpected end of file in attribute value (\").",
+    "eof-in-attribute-value-single-quote":
+        "Unexpected end of file in attribute value (').",
+    "eof-in-attribute-value-no-quotes":
+        "Unexpected end of file in attribute value.",
+    "unexpected-EOF-after-solidus-in-tag":
+        "Unexpected end of file in tag. Expected >",
+    "unexpected-character-after-solidus-in-tag":
+        "Unexpected character after / in tag. Expected >",
+    "expected-dashes-or-doctype":
+        "Expected '--' or 'DOCTYPE'. Not found.",
+    "unexpected-bang-after-double-dash-in-comment":
+        "Unexpected ! after -- in comment",
+    "unexpected-space-after-double-dash-in-comment":
+        "Unexpected space after -- in comment",
+    "incorrect-comment":
+        "Incorrect comment.",
+    "eof-in-comment":
+        "Unexpected end of file in comment.",
+    "eof-in-comment-end-dash":
+        "Unexpected end of file in comment (-)",
+    "unexpected-dash-after-double-dash-in-comment":
+        "Unexpected '-' after '--' found in comment.",
+    "eof-in-comment-double-dash":
+        "Unexpected end of file in comment (--).",
+    "eof-in-comment-end-space-state":
+        "Unexpected end of file in comment.",
+    "eof-in-comment-end-bang-state":
+        "Unexpected end of file in comment.",
+    "unexpected-char-in-comment":
+        "Unexpected character in comment found.",
+    "need-space-after-doctype":
+        "No space after literal string 'DOCTYPE'.",
+    "expected-doctype-name-but-got-right-bracket":
+        "Unexpected > character. Expected DOCTYPE name.",
+    "expected-doctype-name-but-got-eof":
+        "Unexpected end of file. Expected DOCTYPE name.",
+    "eof-in-doctype-name":
+        "Unexpected end of file in DOCTYPE name.",
+    "eof-in-doctype":
+        "Unexpected end of file in DOCTYPE.",
+    "expected-space-or-right-bracket-in-doctype":
+        "Expected space or '>'. Got '%(data)s'",
+    "unexpected-end-of-doctype":
+        "Unexpected end of DOCTYPE.",
+    "unexpected-char-in-doctype":
+        "Unexpected character in DOCTYPE.",
+    "eof-in-innerhtml":
+        "XXX innerHTML EOF",
+    "unexpected-doctype":
+        "Unexpected DOCTYPE. Ignored.",
+    "non-html-root":
+        "html needs to be the first start tag.",
+    "expected-doctype-but-got-eof":
+        "Unexpected End of file. Expected DOCTYPE.",
+    "unknown-doctype":
+        "Erroneous DOCTYPE.",
+    "expected-doctype-but-got-chars":
+        "Unexpected non-space characters. Expected DOCTYPE.",
+    "expected-doctype-but-got-start-tag":
+        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
+    "expected-doctype-but-got-end-tag":
+        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
+    "end-tag-after-implied-root":
+        "Unexpected end tag (%(name)s) after the (implied) root element.",
+    "expected-named-closing-tag-but-got-eof":
+        "Unexpected end of file. Expected end tag (%(name)s).",
+    "two-heads-are-not-better-than-one":
+        "Unexpected start tag head in existing head. Ignored.",
+    "unexpected-end-tag":
+        "Unexpected end tag (%(name)s). Ignored.",
+    "unexpected-start-tag-out-of-my-head":
+        "Unexpected start tag (%(name)s) that can be in head. Moved.",
+    "unexpected-start-tag":
+        "Unexpected start tag (%(name)s).",
+    "missing-end-tag":
+        "Missing end tag (%(name)s).",
+    "missing-end-tags":
+        "Missing end tags (%(name)s).",
+    "unexpected-start-tag-implies-end-tag":
+        "Unexpected start tag (%(startName)s) "
+        "implies end tag (%(endName)s).",
+    "unexpected-start-tag-treated-as":
+        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
+    "deprecated-tag":
+        "Unexpected start tag %(name)s. Don't use it!",
+    "unexpected-start-tag-ignored":
+        "Unexpected start tag %(name)s. Ignored.",
+    "expected-one-end-tag-but-got-another":
+        "Unexpected end tag (%(gotName)s). "
+        "Missing end tag (%(expectedName)s).",
+    "end-tag-too-early":
+        "End tag (%(name)s) seen too early. Expected other end tag.",
+    "end-tag-too-early-named":
+        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
+    "end-tag-too-early-ignored":
+        "End tag (%(name)s) seen too early. Ignored.",
+    "adoption-agency-1.1":
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 1 of the adoption agency algorithm.",
+    "adoption-agency-1.2":
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 2 of the adoption agency algorithm.",
+    "adoption-agency-1.3":
+        "End tag (%(name)s) violates step 1, "
+        "paragraph 3 of the adoption agency algorithm.",
+    "adoption-agency-4.4":
+        "End tag (%(name)s) violates step 4, "
+        "paragraph 4 of the adoption agency algorithm.",
+    "unexpected-end-tag-treated-as":
+        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
+    "no-end-tag":
+        "This element (%(name)s) has no end tag.",
+    "unexpected-implied-end-tag-in-table":
+        "Unexpected implied end tag (%(name)s) in the table phase.",
+    "unexpected-implied-end-tag-in-table-body":
+        "Unexpected implied end tag (%(name)s) in the table body phase.",
+    "unexpected-char-implies-table-voodoo":
+        "Unexpected non-space characters in "
+        "table context caused voodoo mode.",
+    "unexpected-hidden-input-in-table":
+        "Unexpected input with type hidden in table context.",
+    "unexpected-form-in-table":
+        "Unexpected form in table context.",
+    "unexpected-start-tag-implies-table-voodoo":
+        "Unexpected start tag (%(name)s) in "
+        "table context caused voodoo mode.",
+    "unexpected-end-tag-implies-table-voodoo":
+        "Unexpected end tag (%(name)s) in "
+        "table context caused voodoo mode.",
+    "unexpected-cell-in-table-body":
+        "Unexpected table cell start tag (%(name)s) "
+        "in the table body phase.",
+    "unexpected-cell-end-tag":
+        "Got table cell end tag (%(name)s) "
+        "while required end tags are missing.",
+    "unexpected-end-tag-in-table-body":
+        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
+    "unexpected-implied-end-tag-in-table-row":
+        "Unexpected implied end tag (%(name)s) in the table row phase.",
+    "unexpected-end-tag-in-table-row":
+        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
+    "unexpected-select-in-select":
+        "Unexpected select start tag in the select phase "
+        "treated as select end tag.",
+    "unexpected-input-in-select":
+        "Unexpected input start tag in the select phase.",
+    "unexpected-start-tag-in-select":
+        "Unexpected start tag token (%(name)s in the select phase. "
+        "Ignored.",
+    "unexpected-end-tag-in-select":
+        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
+    "unexpected-table-element-start-tag-in-select-in-table":
+        "Unexpected table element start tag (%(name)s) in the select in table phase.",
+    "unexpected-table-element-end-tag-in-select-in-table":
+        "Unexpected table element end tag (%(name)s) in the select in table phase.",
+    "unexpected-char-after-body":
+        "Unexpected non-space characters in the after body phase.",
+    "unexpected-start-tag-after-body":
+        "Unexpected start tag token (%(name)s)"
+        " in the after body phase.",
+    "unexpected-end-tag-after-body":
+        "Unexpected end tag token (%(name)s)"
+        " in the after body phase.",
+    "unexpected-char-in-frameset":
+        "Unexpected characters in the frameset phase. Characters ignored.",
+    "unexpected-start-tag-in-frameset":
+        "Unexpected start tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
+    "unexpected-frameset-in-frameset-innerhtml":
+        "Unexpected end tag token (frameset) "
+        "in the frameset phase (innerHTML).",
+    "unexpected-end-tag-in-frameset":
+        "Unexpected end tag token (%(name)s)"
+        " in the frameset phase. Ignored.",
+    "unexpected-char-after-frameset":
+        "Unexpected non-space characters in the "
+        "after frameset phase. Ignored.",
+    "unexpected-start-tag-after-frameset":
+        "Unexpected start tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
+    "unexpected-end-tag-after-frameset":
+        "Unexpected end tag (%(name)s)"
+        " in the after frameset phase. Ignored.",
+    "unexpected-end-tag-after-body-innerhtml":
+        "Unexpected end tag after body(innerHtml)",
+    "expected-eof-but-got-char":
+        "Unexpected non-space characters. Expected end of file.",
+    "expected-eof-but-got-start-tag":
+        "Unexpected start tag (%(name)s)"
+        ". Expected end of file.",
+    "expected-eof-but-got-end-tag":
+        "Unexpected end tag (%(name)s)"
+        ". Expected end of file.",
+    "eof-in-table":
+        "Unexpected end of file. Expected table content.",
+    "eof-in-select":
+        "Unexpected end of file. Expected select content.",
+    "eof-in-frameset":
+        "Unexpected end of file. Expected frameset content.",
+    "eof-in-script-in-script":
+        "Unexpected end of file. Expected script content.",
+    "eof-in-foreign-lands":
+        "Unexpected end of file. Expected foreign content",
+    "non-void-element-with-trailing-solidus":
+        "Trailing solidus not allowed on element %(name)s",
+    "unexpected-html-element-in-foreign-content":
+        "Element %(name)s not allowed in a non-html context",
+    "unexpected-end-tag-before-html":
+        "Unexpected end tag (%(name)s) before html.",
+    "unexpected-inhead-noscript-tag":
+        "Element %(name)s not allowed in a inhead-noscript context",
+    "eof-in-head-noscript":
+        "Unexpected end of file. Expected inhead-noscript content",
+    "char-in-head-noscript":
+        "Unexpected non-space character. Expected inhead-noscript content",
+    "XXX-undefined-error":
+        "Undefined error (this sucks and should be fixed)",
+}
+
+namespaces = {
+    "html": "http://www.w3.org/1999/xhtml",
+    "mathml": "http://www.w3.org/1998/Math/MathML",
+    "svg": "http://www.w3.org/2000/svg",
+    "xlink": "http://www.w3.org/1999/xlink",
+    "xml": "http://www.w3.org/XML/1998/namespace",
+    "xmlns": "http://www.w3.org/2000/xmlns/"
+}
+
+scopingElements = frozenset([
+    (namespaces["html"], "applet"),
+    (namespaces["html"], "caption"),
+    (namespaces["html"], "html"),
+    (namespaces["html"], "marquee"),
+    (namespaces["html"], "object"),
+    (namespaces["html"], "table"),
+    (namespaces["html"], "td"),
+    (namespaces["html"], "th"),
+    (namespaces["mathml"], "mi"),
+    (namespaces["mathml"], "mo"),
+    (namespaces["mathml"], "mn"),
+    (namespaces["mathml"], "ms"),
+    (namespaces["mathml"], "mtext"),
+    (namespaces["mathml"], "annotation-xml"),
+    (namespaces["svg"], "foreignObject"),
+    (namespaces["svg"], "desc"),
+    (namespaces["svg"], "title"),
+])
+
+formattingElements = frozenset([
+    (namespaces["html"], "a"),
+    (namespaces["html"], "b"),
+    (namespaces["html"], "big"),
+    (namespaces["html"], "code"),
+    (namespaces["html"], "em"),
+    (namespaces["html"], "font"),
+    (namespaces["html"], "i"),
+    (namespaces["html"], "nobr"),
+    (namespaces["html"], "s"),
+    (namespaces["html"], "small"),
+    (namespaces["html"], "strike"),
+    (namespaces["html"], "strong"),
+    (namespaces["html"], "tt"),
+    (namespaces["html"], "u")
+])
+
+specialElements = frozenset([
+    (namespaces["html"], "address"),
+    (namespaces["html"], "applet"),
+    (namespaces["html"], "area"),
+    (namespaces["html"], "article"),
+    (namespaces["html"], "aside"),
+    (namespaces["html"], "base"),
+    (namespaces["html"], "basefont"),
+    (namespaces["html"], "bgsound"),
+    (namespaces["html"], "blockquote"),
+    (namespaces["html"], "body"),
+    (namespaces["html"], "br"),
+    (namespaces["html"], "button"),
+    (namespaces["html"], "caption"),
+    (namespaces["html"], "center"),
+    (namespaces["html"], "col"),
+    (namespaces["html"], "colgroup"),
+    (namespaces["html"], "command"),
+    (namespaces["html"], "dd"),
+    (namespaces["html"], "details"),
+    (namespaces["html"], "dir"),
+    (namespaces["html"], "div"),
+    (namespaces["html"], "dl"),
+    (namespaces["html"], "dt"),
+    (namespaces["html"], "embed"),
+    (namespaces["html"], "fieldset"),
+    (namespaces["html"], "figure"),
+    (namespaces["html"], "footer"),
+    (namespaces["html"], "form"),
+    (namespaces["html"], "frame"),
+    (namespaces["html"], "frameset"),
+    (namespaces["html"], "h1"),
+    (namespaces["html"], "h2"),
+    (namespaces["html"], "h3"),
+    (namespaces["html"], "h4"),
+    (namespaces["html"], "h5"),
+    (namespaces["html"], "h6"),
+    (namespaces["html"], "head"),
+    (namespaces["html"], "header"),
+    (namespaces["html"], "hr"),
+    (namespaces["html"], "html"),
+    (namespaces["html"], "iframe"),
+    # Note that image is commented out in the spec as "this isn't an
+    # element that can end up on the stack, so it doesn't matter,"
+    (namespaces["html"], "image"),
+    (namespaces["html"], "img"),
+    (namespaces["html"], "input"),
+    (namespaces["html"], "isindex"),
+    (namespaces["html"], "li"),
+    (namespaces["html"], "link"),
+    (namespaces["html"], "listing"),
+    (namespaces["html"], "marquee"),
+    (namespaces["html"], "menu"),
+    (namespaces["html"], "meta"),
+    (namespaces["html"], "nav"),
+    (namespaces["html"], "noembed"),
+    (namespaces["html"], "noframes"),
+    (namespaces["html"], "noscript"),
+    (namespaces["html"], "object"),
+    (namespaces["html"], "ol"),
+    (namespaces["html"], "p"),
+    (namespaces["html"], "param"),
+    (namespaces["html"], "plaintext"),
+    (namespaces["html"], "pre"),
+    (namespaces["html"], "script"),
+    (namespaces["html"], "section"),
+    (namespaces["html"], "select"),
+    (namespaces["html"], "style"),
+    (namespaces["html"], "table"),
+    (namespaces["html"], "tbody"),
+    (namespaces["html"], "td"),
+    (namespaces["html"], "textarea"),
+    (namespaces["html"], "tfoot"),
+    (namespaces["html"], "th"),
+    (namespaces["html"], "thead"),
+    (namespaces["html"], "title"),
+    (namespaces["html"], "tr"),
+    (namespaces["html"], "ul"),
+    (namespaces["html"], "wbr"),
+    (namespaces["html"], "xmp"),
+    (namespaces["svg"], "foreignObject")
+])
+
+htmlIntegrationPointElements = frozenset([
+    (namespaces["mathml"], "annotation-xml"),
+    (namespaces["svg"], "foreignObject"),
+    (namespaces["svg"], "desc"),
+    (namespaces["svg"], "title")
+])
+
+mathmlTextIntegrationPointElements = frozenset([
+    (namespaces["mathml"], "mi"),
+    (namespaces["mathml"], "mo"),
+    (namespaces["mathml"], "mn"),
+    (namespaces["mathml"], "ms"),
+    (namespaces["mathml"], "mtext")
+])
+
+adjustSVGAttributes = {
+    "attributename": "attributeName",
+    "attributetype": "attributeType",
+    "basefrequency": "baseFrequency",
+    "baseprofile": "baseProfile",
+    "calcmode": "calcMode",
+    "clippathunits": "clipPathUnits",
+    "contentscripttype": "contentScriptType",
+    "contentstyletype": "contentStyleType",
+    "diffuseconstant": "diffuseConstant",
+    "edgemode": "edgeMode",
+    "externalresourcesrequired": "externalResourcesRequired",
+    "filterres": "filterRes",
+    "filterunits": "filterUnits",
+    "glyphref": "glyphRef",
+    "gradienttransform": "gradientTransform",
+    "gradientunits": "gradientUnits",
+    "kernelmatrix": "kernelMatrix",
+    "kernelunitlength": "kernelUnitLength",
+    "keypoints": "keyPoints",
+    "keysplines": "keySplines",
+    "keytimes": "keyTimes",
+    "lengthadjust": "lengthAdjust",
+    "limitingconeangle": "limitingConeAngle",
+    "markerheight": "markerHeight",
+    "markerunits": "markerUnits",
+    "markerwidth": "markerWidth",
+    "maskcontentunits": "maskContentUnits",
+    "maskunits": "maskUnits",
+    "numoctaves": "numOctaves",
+    "pathlength": "pathLength",
+    "patterncontentunits": "patternContentUnits",
+    "patterntransform": "patternTransform",
+    "patternunits": "patternUnits",
+    "pointsatx": "pointsAtX",
+    "pointsaty": "pointsAtY",
+    "pointsatz": "pointsAtZ",
+    "preservealpha": "preserveAlpha",
+    "preserveaspectratio": "preserveAspectRatio",
+    "primitiveunits": "primitiveUnits",
+    "refx": "refX",
+    "refy": "refY",
+    "repeatcount": "repeatCount",
+    "repeatdur": "repeatDur",
+    "requiredextensions": "requiredExtensions",
+    "requiredfeatures": "requiredFeatures",
+    "specularconstant": "specularConstant",
+    "specularexponent": "specularExponent",
+    "spreadmethod": "spreadMethod",
+    "startoffset": "startOffset",
+    "stddeviation": "stdDeviation",
+    "stitchtiles": "stitchTiles",
+    "surfacescale": "surfaceScale",
+    "systemlanguage": "systemLanguage",
+    "tablevalues": "tableValues",
+    "targetx": "targetX",
+    "targety": "targetY",
+    "textlength": "textLength",
+    "viewbox": "viewBox",
+    "viewtarget": "viewTarget",
+    "xchannelselector": "xChannelSelector",
+    "ychannelselector": "yChannelSelector",
+    "zoomandpan": "zoomAndPan"
+}
+
+adjustMathMLAttributes = {"definitionurl": "definitionURL"}
+
+adjustForeignAttributes = {
+    "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
+    "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
+    "xlink:href": ("xlink", "href", namespaces["xlink"]),
+    "xlink:role": ("xlink", "role", namespaces["xlink"]),
+    "xlink:show": ("xlink", "show", namespaces["xlink"]),
+    "xlink:title": ("xlink", "title", namespaces["xlink"]),
+    "xlink:type": ("xlink", "type", namespaces["xlink"]),
+    "xml:base": ("xml", "base", namespaces["xml"]),
+    "xml:lang": ("xml", "lang", namespaces["xml"]),
+    "xml:space": ("xml", "space", namespaces["xml"]),
+    "xmlns": (None, "xmlns", namespaces["xmlns"]),
+    "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
+}
+
+unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
+                             adjustForeignAttributes.items()}
+
+spaceCharacters = frozenset([
+    "\t",
+    "\n",
+    "\u000C",
+    " ",
+    "\r"
+])
+
+tableInsertModeElements = frozenset([
+    "table",
+    "tbody",
+    "tfoot",
+    "thead",
+    "tr"
+])
+
+asciiLowercase = frozenset(string.ascii_lowercase)
+asciiUppercase = frozenset(string.ascii_uppercase)
+asciiLetters = frozenset(string.ascii_letters)
+digits = frozenset(string.digits)
+hexDigits = frozenset(string.hexdigits)
+
+asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
+
+# Heading elements need to be ordered
+headingElements = (
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6"
+)
+
+voidElements = frozenset([
+    "base",
+    "command",
+    "event-source",
+    "link",
+    "meta",
+    "hr",
+    "br",
+    "img",
+    "embed",
+    "param",
+    "area",
+    "col",
+    "input",
+    "source",
+    "track"
+])
+
+cdataElements = frozenset(['title', 'textarea'])
+
+rcdataElements = frozenset([
+    'style',
+    'script',
+    'xmp',
+    'iframe',
+    'noembed',
+    'noframes',
+    'noscript'
+])
+
+booleanAttributes = {
+    "": frozenset(["irrelevant", "itemscope"]),
+    "style": frozenset(["scoped"]),
+    "img": frozenset(["ismap"]),
+    "audio": frozenset(["autoplay", "controls"]),
+    "video": frozenset(["autoplay", "controls"]),
+    "script": frozenset(["defer", "async"]),
+    "details": frozenset(["open"]),
+    "datagrid": frozenset(["multiple", "disabled"]),
+    "command": frozenset(["hidden", "disabled", "checked", "default"]),
+    "hr": frozenset(["noshade"]),
+    "menu": frozenset(["autosubmit"]),
+    "fieldset": frozenset(["disabled", "readonly"]),
+    "option": frozenset(["disabled", "readonly", "selected"]),
+    "optgroup": frozenset(["disabled", "readonly"]),
+    "button": frozenset(["disabled", "autofocus"]),
+    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
+    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
+    "output": frozenset(["disabled", "readonly"]),
+    "iframe": frozenset(["seamless"]),
+}
+
+# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
+# therefore can't be a frozenset.
+entitiesWindows1252 = (
+    8364,   # 0x80  0x20AC  EURO SIGN
+    65533,  # 0x81          UNDEFINED
+    8218,   # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+    402,    # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+    8222,   # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+    8230,   # 0x85  0x2026  HORIZONTAL ELLIPSIS
+    8224,   # 0x86  0x2020  DAGGER
+    8225,   # 0x87  0x2021  DOUBLE DAGGER
+    710,    # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+    8240,   # 0x89  0x2030  PER MILLE SIGN
+    352,    # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+    8249,   # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    338,    # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+    65533,  # 0x8D          UNDEFINED
+    381,    # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+    65533,  # 0x8F          UNDEFINED
+    65533,  # 0x90          UNDEFINED
+    8216,   # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+    8217,   # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+    8220,   # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+    8221,   # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+    8226,   # 0x95  0x2022  BULLET
+    8211,   # 0x96  0x2013  EN DASH
+    8212,   # 0x97  0x2014  EM DASH
+    732,    # 0x98  0x02DC  SMALL TILDE
+    8482,   # 0x99  0x2122  TRADE MARK SIGN
+    353,    # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+    8250,   # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    339,    # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+    65533,  # 0x9D          UNDEFINED
+    382,    # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+)
+
+xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
+
+entities = {
+    "AElig": "\xc6",
+    "AElig;": "\xc6",
+    "AMP": "&",
+    "AMP;": "&",
+    "Aacute": "\xc1",
+    "Aacute;": "\xc1",
+    "Abreve;": "\u0102",
+    "Acirc": "\xc2",
+    "Acirc;": "\xc2",
+    "Acy;": "\u0410",
+    "Afr;": "\U0001d504",
+    "Agrave": "\xc0",
+    "Agrave;": "\xc0",
+    "Alpha;": "\u0391",
+    "Amacr;": "\u0100",
+    "And;": "\u2a53",
+    "Aogon;": "\u0104",
+    "Aopf;": "\U0001d538",
+    "ApplyFunction;": "\u2061",
+    "Aring": "\xc5",
+    "Aring;": "\xc5",
+    "Ascr;": "\U0001d49c",
+    "Assign;": "\u2254",
+    "Atilde": "\xc3",
+    "Atilde;": "\xc3",
+    "Auml": "\xc4",
+    "Auml;": "\xc4",
+    "Backslash;": "\u2216",
+    "Barv;": "\u2ae7",
+    "Barwed;": "\u2306",
+    "Bcy;": "\u0411",
+    "Because;": "\u2235",
+    "Bernoullis;": "\u212c",
+    "Beta;": "\u0392",
+    "Bfr;": "\U0001d505",
+    "Bopf;": "\U0001d539",
+    "Breve;": "\u02d8",
+    "Bscr;": "\u212c",
+    "Bumpeq;": "\u224e",
+    "CHcy;": "\u0427",
+    "COPY": "\xa9",
+    "COPY;": "\xa9",
+    "Cacute;": "\u0106",
+    "Cap;": "\u22d2",
+    "CapitalDifferentialD;": "\u2145",
+    "Cayleys;": "\u212d",
+    "Ccaron;": "\u010c",
+    "Ccedil": "\xc7",
+    "Ccedil;": "\xc7",
+    "Ccirc;": "\u0108",
+    "Cconint;": "\u2230",
+    "Cdot;": "\u010a",
+    "Cedilla;": "\xb8",
+    "CenterDot;": "\xb7",
+    "Cfr;": "\u212d",
+    "Chi;": "\u03a7",
+    "CircleDot;": "\u2299",
+    "CircleMinus;": "\u2296",
+    "CirclePlus;": "\u2295",
+    "CircleTimes;": "\u2297",
+    "ClockwiseContourIntegral;": "\u2232",
+    "CloseCurlyDoubleQuote;": "\u201d",
+    "CloseCurlyQuote;": "\u2019",
+    "Colon;": "\u2237",
+    "Colone;": "\u2a74",
+    "Congruent;": "\u2261",
+    "Conint;": "\u222f",
+    "ContourIntegral;": "\u222e",
+    "Copf;": "\u2102",
+    "Coproduct;": "\u2210",
+    "CounterClockwiseContourIntegral;": "\u2233",
+    "Cross;": "\u2a2f",
+    "Cscr;": "\U0001d49e",
+    "Cup;": "\u22d3",
+    "CupCap;": "\u224d",
+    "DD;": "\u2145",
+    "DDotrahd;": "\u2911",
+    "DJcy;": "\u0402",
+    "DScy;": "\u0405",
+    "DZcy;": "\u040f",
+    "Dagger;": "\u2021",
+    "Darr;": "\u21a1",
+    "Dashv;": "\u2ae4",
+    "Dcaron;": "\u010e",
+    "Dcy;": "\u0414",
+    "Del;": "\u2207",
+    "Delta;": "\u0394",
+    "Dfr;": "\U0001d507",
+    "DiacriticalAcute;": "\xb4",
+    "DiacriticalDot;": "\u02d9",
+    "DiacriticalDoubleAcute;": "\u02dd",
+    "DiacriticalGrave;": "`",
+    "DiacriticalTilde;": "\u02dc",
+    "Diamond;": "\u22c4",
+    "DifferentialD;": "\u2146",
+    "Dopf;": "\U0001d53b",
+    "Dot;": "\xa8",
+    "DotDot;": "\u20dc",
+    "DotEqual;": "\u2250",
+    "DoubleContourIntegral;": "\u222f",
+    "DoubleDot;": "\xa8",
+    "DoubleDownArrow;": "\u21d3",
+    "DoubleLeftArrow;": "\u21d0",
+    "DoubleLeftRightArrow;": "\u21d4",
+    "DoubleLeftTee;": "\u2ae4",
+    "DoubleLongLeftArrow;": "\u27f8",
+    "DoubleLongLeftRightArrow;": "\u27fa",
+    "DoubleLongRightArrow;": "\u27f9",
+    "DoubleRightArrow;": "\u21d2",
+    "DoubleRightTee;": "\u22a8",
+    "DoubleUpArrow;": "\u21d1",
+    "DoubleUpDownArrow;": "\u21d5",
+    "DoubleVerticalBar;": "\u2225",
+    "DownArrow;": "\u2193",
+    "DownArrowBar;": "\u2913",
+    "DownArrowUpArrow;": "\u21f5",
+    "DownBreve;": "\u0311",
+    "DownLeftRightVector;": "\u2950",
+    "DownLeftTeeVector;": "\u295e",
+    "DownLeftVector;": "\u21bd",
+    "DownLeftVectorBar;": "\u2956",
+    "DownRightTeeVector;": "\u295f",
+    "DownRightVector;": "\u21c1",
+    "DownRightVectorBar;": "\u2957",
+    "DownTee;": "\u22a4",
+    "DownTeeArrow;": "\u21a7",
+    "Downarrow;": "\u21d3",
+    "Dscr;": "\U0001d49f",
+    "Dstrok;": "\u0110",
+    "ENG;": "\u014a",
+    "ETH": "\xd0",
+    "ETH;": "\xd0",
+    "Eacute": "\xc9",
+    "Eacute;": "\xc9",
+    "Ecaron;": "\u011a",
+    "Ecirc": "\xca",
+    "Ecirc;": "\xca",
+    "Ecy;": "\u042d",
+    "Edot;": "\u0116",
+    "Efr;": "\U0001d508",
+    "Egrave": "\xc8",
+    "Egrave;": "\xc8",
+    "Element;": "\u2208",
+    "Emacr;": "\u0112",
+    "EmptySmallSquare;": "\u25fb",
+    "EmptyVerySmallSquare;": "\u25ab",
+    "Eogon;": "\u0118",
+    "Eopf;": "\U0001d53c",
+    "Epsilon;": "\u0395",
+    "Equal;": "\u2a75",
+    "EqualTilde;": "\u2242",
+    "Equilibrium;": "\u21cc",
+    "Escr;": "\u2130",
+    "Esim;": "\u2a73",
+    "Eta;": "\u0397",
+    "Euml": "\xcb",
+    "Euml;": "\xcb",
+    "Exists;": "\u2203",
+    "ExponentialE;": "\u2147",
+    "Fcy;": "\u0424",
+    "Ffr;": "\U0001d509",
+    "FilledSmallSquare;": "\u25fc",
+    "FilledVerySmallSquare;": "\u25aa",
+    "Fopf;": "\U0001d53d",
+    "ForAll;": "\u2200",
+    "Fouriertrf;": "\u2131",
+    "Fscr;": "\u2131",
+    "GJcy;": "\u0403",
+    "GT": ">",
+    "GT;": ">",
+    "Gamma;": "\u0393",
+    "Gammad;": "\u03dc",
+    "Gbreve;": "\u011e",
+    "Gcedil;": "\u0122",
+    "Gcirc;": "\u011c",
+    "Gcy;": "\u0413",
+    "Gdot;": "\u0120",
+    "Gfr;": "\U0001d50a",
+    "Gg;": "\u22d9",
+    "Gopf;": "\U0001d53e",
+    "GreaterEqual;": "\u2265",
+    "GreaterEqualLess;": "\u22db",
+    "GreaterFullEqual;": "\u2267",
+    "GreaterGreater;": "\u2aa2",
+    "GreaterLess;": "\u2277",
+    "GreaterSlantEqual;": "\u2a7e",
+    "GreaterTilde;": "\u2273",
+    "Gscr;": "\U0001d4a2",
+    "Gt;": "\u226b",
+    "HARDcy;": "\u042a",
+    "Hacek;": "\u02c7",
+    "Hat;": "^",
+    "Hcirc;": "\u0124",
+    "Hfr;": "\u210c",
+    "HilbertSpace;": "\u210b",
+    "Hopf;": "\u210d",
+    "HorizontalLine;": "\u2500",
+    "Hscr;": "\u210b",
+    "Hstrok;": "\u0126",
+    "HumpDownHump;": "\u224e",
+    "HumpEqual;": "\u224f",
+    "IEcy;": "\u0415",
+    "IJlig;": "\u0132",
+    "IOcy;": "\u0401",
+    "Iacute": "\xcd",
+    "Iacute;": "\xcd",
+    "Icirc": "\xce",
+    "Icirc;": "\xce",
+    "Icy;": "\u0418",
+    "Idot;": "\u0130",
+    "Ifr;": "\u2111",
+    "Igrave": "\xcc",
+    "Igrave;": "\xcc",
+    "Im;": "\u2111",
+    "Imacr;": "\u012a",
+    "ImaginaryI;": "\u2148",
+    "Implies;": "\u21d2",
+    "Int;": "\u222c",
+    "Integral;": "\u222b",
+    "Intersection;": "\u22c2",
+    "InvisibleComma;": "\u2063",
+    "InvisibleTimes;": "\u2062",
+    "Iogon;": "\u012e",
+    "Iopf;": "\U0001d540",
+    "Iota;": "\u0399",
+    "Iscr;": "\u2110",
+    "Itilde;": "\u0128",
+    "Iukcy;": "\u0406",
+    "Iuml": "\xcf",
+    "Iuml;": "\xcf",
+    "Jcirc;": "\u0134",
+    "Jcy;": "\u0419",
+    "Jfr;": "\U0001d50d",
+    "Jopf;": "\U0001d541",
+    "Jscr;": "\U0001d4a5",
+    "Jsercy;": "\u0408",
+    "Jukcy;": "\u0404",
+    "KHcy;": "\u0425",
+    "KJcy;": "\u040c",
+    "Kappa;": "\u039a",
+    "Kcedil;": "\u0136",
+    "Kcy;": "\u041a",
+    "Kfr;": "\U0001d50e",
+    "Kopf;": "\U0001d542",
+    "Kscr;": "\U0001d4a6",
+    "LJcy;": "\u0409",
+    "LT": "<",
+    "LT;": "<",
+    "Lacute;": "\u0139",
+    "Lambda;": "\u039b",
+    "Lang;": "\u27ea",
+    "Laplacetrf;": "\u2112",
+    "Larr;": "\u219e",
+    "Lcaron;": "\u013d",
+    "Lcedil;": "\u013b",
+    "Lcy;": "\u041b",
+    "LeftAngleBracket;": "\u27e8",
+    "LeftArrow;": "\u2190",
+    "LeftArrowBar;": "\u21e4",
+    "LeftArrowRightArrow;": "\u21c6",
+    "LeftCeiling;": "\u2308",
+    "LeftDoubleBracket;": "\u27e6",
+    "LeftDownTeeVector;": "\u2961",
+    "LeftDownVector;": "\u21c3",
+    "LeftDownVectorBar;": "\u2959",
+    "LeftFloor;": "\u230a",
+    "LeftRightArrow;": "\u2194",
+    "LeftRightVector;": "\u294e",
+    "LeftTee;": "\u22a3",
+    "LeftTeeArrow;": "\u21a4",
+    "LeftTeeVector;": "\u295a",
+    "LeftTriangle;": "\u22b2",
+    "LeftTriangleBar;": "\u29cf",
+    "LeftTriangleEqual;": "\u22b4",
+    "LeftUpDownVector;": "\u2951",
+    "LeftUpTeeVector;": "\u2960",
+    "LeftUpVector;": "\u21bf",
+    "LeftUpVectorBar;": "\u2958",
+    "LeftVector;": "\u21bc",
+    "LeftVectorBar;": "\u2952",
+    "Leftarrow;": "\u21d0",
+    "Leftrightarrow;": "\u21d4",
+    "LessEqualGreater;": "\u22da",
+    "LessFullEqual;": "\u2266",
+    "LessGreater;": "\u2276",
+    "LessLess;": "\u2aa1",
+    "LessSlantEqual;": "\u2a7d",
+    "LessTilde;": "\u2272",
+    "Lfr;": "\U0001d50f",
+    "Ll;": "\u22d8",
+    "Lleftarrow;": "\u21da",
+    "Lmidot;": "\u013f",
+    "LongLeftArrow;": "\u27f5",
+    "LongLeftRightArrow;": "\u27f7",
+    "LongRightArrow;": "\u27f6",
+    "Longleftarrow;": "\u27f8",
+    "Longleftrightarrow;": "\u27fa",
+    "Longrightarrow;": "\u27f9",
+    "Lopf;": "\U0001d543",
+    "LowerLeftArrow;": "\u2199",
+    "LowerRightArrow;": "\u2198",
+    "Lscr;": "\u2112",
+    "Lsh;": "\u21b0",
+    "Lstrok;": "\u0141",
+    "Lt;": "\u226a",
+    "Map;": "\u2905",
+    "Mcy;": "\u041c",
+    "MediumSpace;": "\u205f",
+    "Mellintrf;": "\u2133",
+    "Mfr;": "\U0001d510",
+    "MinusPlus;": "\u2213",
+    "Mopf;": "\U0001d544",
+    "Mscr;": "\u2133",
+    "Mu;": "\u039c",
+    "NJcy;": "\u040a",
+    "Nacute;": "\u0143",
+    "Ncaron;": "\u0147",
+    "Ncedil;": "\u0145",
+    "Ncy;": "\u041d",
+    "NegativeMediumSpace;": "\u200b",
+    "NegativeThickSpace;": "\u200b",
+    "NegativeThinSpace;": "\u200b",
+    "NegativeVeryThinSpace;": "\u200b",
+    "NestedGreaterGreater;": "\u226b",
+    "NestedLessLess;": "\u226a",
+    "NewLine;": "\n",
+    "Nfr;": "\U0001d511",
+    "NoBreak;": "\u2060",
+    "NonBreakingSpace;": "\xa0",
+    "Nopf;": "\u2115",
+    "Not;": "\u2aec",
+    "NotCongruent;": "\u2262",
+    "NotCupCap;": "\u226d",
+    "NotDoubleVerticalBar;": "\u2226",
+    "NotElement;": "\u2209",
+    "NotEqual;": "\u2260",
+    "NotEqualTilde;": "\u2242\u0338",
+    "NotExists;": "\u2204",
+    "NotGreater;": "\u226f",
+    "NotGreaterEqual;": "\u2271",
+    "NotGreaterFullEqual;": "\u2267\u0338",
+    "NotGreaterGreater;": "\u226b\u0338",
+    "NotGreaterLess;": "\u2279",
+    "NotGreaterSlantEqual;": "\u2a7e\u0338",
+    "NotGreaterTilde;": "\u2275",
+    "NotHumpDownHump;": "\u224e\u0338",
+    "NotHumpEqual;": "\u224f\u0338",
+    "NotLeftTriangle;": "\u22ea",
+    "NotLeftTriangleBar;": "\u29cf\u0338",
+    "NotLeftTriangleEqual;": "\u22ec",
+    "NotLess;": "\u226e",
+    "NotLessEqual;": "\u2270",
+    "NotLessGreater;": "\u2278",
+    "NotLessLess;": "\u226a\u0338",
+    "NotLessSlantEqual;": "\u2a7d\u0338",
+    "NotLessTilde;": "\u2274",
+    "NotNestedGreaterGreater;": "\u2aa2\u0338",
+    "NotNestedLessLess;": "\u2aa1\u0338",
+    "NotPrecedes;": "\u2280",
+    "NotPrecedesEqual;": "\u2aaf\u0338",
+    "NotPrecedesSlantEqual;": "\u22e0",
+    "NotReverseElement;": "\u220c",
+    "NotRightTriangle;": "\u22eb",
+    "NotRightTriangleBar;": "\u29d0\u0338",
+    "NotRightTriangleEqual;": "\u22ed",
+    "NotSquareSubset;": "\u228f\u0338",
+    "NotSquareSubsetEqual;": "\u22e2",
+    "NotSquareSuperset;": "\u2290\u0338",
+    "NotSquareSupersetEqual;": "\u22e3",
+    "NotSubset;": "\u2282\u20d2",
+    "NotSubsetEqual;": "\u2288",
+    "NotSucceeds;": "\u2281",
+    "NotSucceedsEqual;": "\u2ab0\u0338",
+    "NotSucceedsSlantEqual;": "\u22e1",
+    "NotSucceedsTilde;": "\u227f\u0338",
+    "NotSuperset;": "\u2283\u20d2",
+    "NotSupersetEqual;": "\u2289",
+    "NotTilde;": "\u2241",
+    "NotTildeEqual;": "\u2244",
+    "NotTildeFullEqual;": "\u2247",
+    "NotTildeTilde;": "\u2249",
+    "NotVerticalBar;": "\u2224",
+    "Nscr;": "\U0001d4a9",
+    "Ntilde": "\xd1",
+    "Ntilde;": "\xd1",
+    "Nu;": "\u039d",
+    "OElig;": "\u0152",
+    "Oacute": "\xd3",
+    "Oacute;": "\xd3",
+    "Ocirc": "\xd4",
+    "Ocirc;": "\xd4",
+    "Ocy;": "\u041e",
+    "Odblac;": "\u0150",
+    "Ofr;": "\U0001d512",
+    "Ograve": "\xd2",
+    "Ograve;": "\xd2",
+    "Omacr;": "\u014c",
+    "Omega;": "\u03a9",
+    "Omicron;": "\u039f",
+    "Oopf;": "\U0001d546",
+    "OpenCurlyDoubleQuote;": "\u201c",
+    "OpenCurlyQuote;": "\u2018",
+    "Or;": "\u2a54",
+    "Oscr;": "\U0001d4aa",
+    "Oslash": "\xd8",
+    "Oslash;": "\xd8",
+    "Otilde": "\xd5",
+    "Otilde;": "\xd5",
+    "Otimes;": "\u2a37",
+    "Ouml": "\xd6",
+    "Ouml;": "\xd6",
+    "OverBar;": "\u203e",
+    "OverBrace;": "\u23de",
+    "OverBracket;": "\u23b4",
+    "OverParenthesis;": "\u23dc",
+    "PartialD;": "\u2202",
+    "Pcy;": "\u041f",
+    "Pfr;": "\U0001d513",
+    "Phi;": "\u03a6",
+    "Pi;": "\u03a0",
+    "PlusMinus;": "\xb1",
+    "Poincareplane;": "\u210c",
+    "Popf;": "\u2119",
+    "Pr;": "\u2abb",
+    "Precedes;": "\u227a",
+    "PrecedesEqual;": "\u2aaf",
+    "PrecedesSlantEqual;": "\u227c",
+    "PrecedesTilde;": "\u227e",
+    "Prime;": "\u2033",
+    "Product;": "\u220f",
+    "Proportion;": "\u2237",
+    "Proportional;": "\u221d",
+    "Pscr;": "\U0001d4ab",
+    "Psi;": "\u03a8",
+    "QUOT": "\"",
+    "QUOT;": "\"",
+    "Qfr;": "\U0001d514",
+    "Qopf;": "\u211a",
+    "Qscr;": "\U0001d4ac",
+    "RBarr;": "\u2910",
+    "REG": "\xae",
+    "REG;": "\xae",
+    "Racute;": "\u0154",
+    "Rang;": "\u27eb",
+    "Rarr;": "\u21a0",
+    "Rarrtl;": "\u2916",
+    "Rcaron;": "\u0158",
+    "Rcedil;": "\u0156",
+    "Rcy;": "\u0420",
+    "Re;": "\u211c",
+    "ReverseElement;": "\u220b",
+    "ReverseEquilibrium;": "\u21cb",
+    "ReverseUpEquilibrium;": "\u296f",
+    "Rfr;": "\u211c",
+    "Rho;": "\u03a1",
+    "RightAngleBracket;": "\u27e9",
+    "RightArrow;": "\u2192",
+    "RightArrowBar;": "\u21e5",
+    "RightArrowLeftArrow;": "\u21c4",
+    "RightCeiling;": "\u2309",
+    "RightDoubleBracket;": "\u27e7",
+    "RightDownTeeVector;": "\u295d",
+    "RightDownVector;": "\u21c2",
+    "RightDownVectorBar;": "\u2955",
+    "RightFloor;": "\u230b",
+    "RightTee;": "\u22a2",
+    "RightTeeArrow;": "\u21a6",
+    "RightTeeVector;": "\u295b",
+    "RightTriangle;": "\u22b3",
+    "RightTriangleBar;": "\u29d0",
+    "RightTriangleEqual;": "\u22b5",
+    "RightUpDownVector;": "\u294f",
+    "RightUpTeeVector;": "\u295c",
+    "RightUpVector;": "\u21be",
+    "RightUpVectorBar;": "\u2954",
+    "RightVector;": "\u21c0",
+    "RightVectorBar;": "\u2953",
+    "Rightarrow;": "\u21d2",
+    "Ropf;": "\u211d",
+    "RoundImplies;": "\u2970",
+    "Rrightarrow;": "\u21db",
+    "Rscr;": "\u211b",
+    "Rsh;": "\u21b1",
+    "RuleDelayed;": "\u29f4",
+    "SHCHcy;": "\u0429",
+    "SHcy;": "\u0428",
+    "SOFTcy;": "\u042c",
+    "Sacute;": "\u015a",
+    "Sc;": "\u2abc",
+    "Scaron;": "\u0160",
+    "Scedil;": "\u015e",
+    "Scirc;": "\u015c",
+    "Scy;": "\u0421",
+    "Sfr;": "\U0001d516",
+    "ShortDownArrow;": "\u2193",
+    "ShortLeftArrow;": "\u2190",
+    "ShortRightArrow;": "\u2192",
+    "ShortUpArrow;": "\u2191",
+    "Sigma;": "\u03a3",
+    "SmallCircle;": "\u2218",
+    "Sopf;": "\U0001d54a",
+    "Sqrt;": "\u221a",
+    "Square;": "\u25a1",
+    "SquareIntersection;": "\u2293",
+    "SquareSubset;": "\u228f",
+    "SquareSubsetEqual;": "\u2291",
+    "SquareSuperset;": "\u2290",
+    "SquareSupersetEqual;": "\u2292",
+    "SquareUnion;": "\u2294",
+    "Sscr;": "\U0001d4ae",
+    "Star;": "\u22c6",
+    "Sub;": "\u22d0",
+    "Subset;": "\u22d0",
+    "SubsetEqual;": "\u2286",
+    "Succeeds;": "\u227b",
+    "SucceedsEqual;": "\u2ab0",
+    "SucceedsSlantEqual;": "\u227d",
+    "SucceedsTilde;": "\u227f",
+    "SuchThat;": "\u220b",
+    "Sum;": "\u2211",
+    "Sup;": "\u22d1",
+    "Superset;": "\u2283",
+    "SupersetEqual;": "\u2287",
+    "Supset;": "\u22d1",
+    "THORN": "\xde",
+    "THORN;": "\xde",
+    "TRADE;": "\u2122",
+    "TSHcy;": "\u040b",
+    "TScy;": "\u0426",
+    "Tab;": "\t",
+    "Tau;": "\u03a4",
+    "Tcaron;": "\u0164",
+    "Tcedil;": "\u0162",
+    "Tcy;": "\u0422",
+    "Tfr;": "\U0001d517",
+    "Therefore;": "\u2234",
+    "Theta;": "\u0398",
+    "ThickSpace;": "\u205f\u200a",
+    "ThinSpace;": "\u2009",
+    "Tilde;": "\u223c",
+    "TildeEqual;": "\u2243",
+    "TildeFullEqual;": "\u2245",
+    "TildeTilde;": "\u2248",
+    "Topf;": "\U0001d54b",
+    "TripleDot;": "\u20db",
+    "Tscr;": "\U0001d4af",
+    "Tstrok;": "\u0166",
+    "Uacute": "\xda",
+    "Uacute;": "\xda",
+    "Uarr;": "\u219f",
+    "Uarrocir;": "\u2949",
+    "Ubrcy;": "\u040e",
+    "Ubreve;": "\u016c",
+    "Ucirc": "\xdb",
+    "Ucirc;": "\xdb",
+    "Ucy;": "\u0423",
+    "Udblac;": "\u0170",
+    "Ufr;": "\U0001d518",
+    "Ugrave": "\xd9",
+    "Ugrave;": "\xd9",
+    "Umacr;": "\u016a",
+    "UnderBar;": "_",
+    "UnderBrace;": "\u23df",
+    "UnderBracket;": "\u23b5",
+    "UnderParenthesis;": "\u23dd",
+    "Union;": "\u22c3",
+    "UnionPlus;": "\u228e",
+    "Uogon;": "\u0172",
+    "Uopf;": "\U0001d54c",
+    "UpArrow;": "\u2191",
+    "UpArrowBar;": "\u2912",
+    "UpArrowDownArrow;": "\u21c5",
+    "UpDownArrow;": "\u2195",
+    "UpEquilibrium;": "\u296e",
+    "UpTee;": "\u22a5",
+    "UpTeeArrow;": "\u21a5",
+    "Uparrow;": "\u21d1",
+    "Updownarrow;": "\u21d5",
+    "UpperLeftArrow;": "\u2196",
+    "UpperRightArrow;": "\u2197",
+    "Upsi;": "\u03d2",
+    "Upsilon;": "\u03a5",
+    "Uring;": "\u016e",
+    "Uscr;": "\U0001d4b0",
+    "Utilde;": "\u0168",
+    "Uuml": "\xdc",
+    "Uuml;": "\xdc",
+    "VDash;": "\u22ab",
+    "Vbar;": "\u2aeb",
+    "Vcy;": "\u0412",
+    "Vdash;": "\u22a9",
+    "Vdashl;": "\u2ae6",
+    "Vee;": "\u22c1",
+    "Verbar;": "\u2016",
+    "Vert;": "\u2016",
+    "VerticalBar;": "\u2223",
+    "VerticalLine;": "|",
+    "VerticalSeparator;": "\u2758",
+    "VerticalTilde;": "\u2240",
+    "VeryThinSpace;": "\u200a",
+    "Vfr;": "\U0001d519",
+    "Vopf;": "\U0001d54d",
+    "Vscr;": "\U0001d4b1",
+    "Vvdash;": "\u22aa",
+    "Wcirc;": "\u0174",
+    "Wedge;": "\u22c0",
+    "Wfr;": "\U0001d51a",
+    "Wopf;": "\U0001d54e",
+    "Wscr;": "\U0001d4b2",
+    "Xfr;": "\U0001d51b",
+    "Xi;": "\u039e",
+    "Xopf;": "\U0001d54f",
+    "Xscr;": "\U0001d4b3",
+    "YAcy;": "\u042f",
+    "YIcy;": "\u0407",
+    "YUcy;": "\u042e",
+    "Yacute": "\xdd",
+    "Yacute;": "\xdd",
+    "Ycirc;": "\u0176",
+    "Ycy;": "\u042b",
+    "Yfr;": "\U0001d51c",
+    "Yopf;": "\U0001d550",
+    "Yscr;": "\U0001d4b4",
+    "Yuml;": "\u0178",
+    "ZHcy;": "\u0416",
+    "Zacute;": "\u0179",
+    "Zcaron;": "\u017d",
+    "Zcy;": "\u0417",
+    "Zdot;": "\u017b",
+    "ZeroWidthSpace;": "\u200b",
+    "Zeta;": "\u0396",
+    "Zfr;": "\u2128",
+    "Zopf;": "\u2124",
+    "Zscr;": "\U0001d4b5",
+    "aacute": "\xe1",
+    "aacute;": "\xe1",
+    "abreve;": "\u0103",
+    "ac;": "\u223e",
+    "acE;": "\u223e\u0333",
+    "acd;": "\u223f",
+    "acirc": "\xe2",
+    "acirc;": "\xe2",
+    "acute": "\xb4",
+    "acute;": "\xb4",
+    "acy;": "\u0430",
+    "aelig": "\xe6",
+    "aelig;": "\xe6",
+    "af;": "\u2061",
+    "afr;": "\U0001d51e",
+    "agrave": "\xe0",
+    "agrave;": "\xe0",
+    "alefsym;": "\u2135",
+    "aleph;": "\u2135",
+    "alpha;": "\u03b1",
+    "amacr;": "\u0101",
+    "amalg;": "\u2a3f",
+    "amp": "&",
+    "amp;": "&",
+    "and;": "\u2227",
+    "andand;": "\u2a55",
+    "andd;": "\u2a5c",
+    "andslope;": "\u2a58",
+    "andv;": "\u2a5a",
+    "ang;": "\u2220",
+    "ange;": "\u29a4",
+    "angle;": "\u2220",
+    "angmsd;": "\u2221",
+    "angmsdaa;": "\u29a8",
+    "angmsdab;": "\u29a9",
+    "angmsdac;": "\u29aa",
+    "angmsdad;": "\u29ab",
+    "angmsdae;": "\u29ac",
+    "angmsdaf;": "\u29ad",
+    "angmsdag;": "\u29ae",
+    "angmsdah;": "\u29af",
+    "angrt;": "\u221f",
+    "angrtvb;": "\u22be",
+    "angrtvbd;": "\u299d",
+    "angsph;": "\u2222",
+    "angst;": "\xc5",
+    "angzarr;": "\u237c",
+    "aogon;": "\u0105",
+    "aopf;": "\U0001d552",
+    "ap;": "\u2248",
+    "apE;": "\u2a70",
+    "apacir;": "\u2a6f",
+    "ape;": "\u224a",
+    "apid;": "\u224b",
+    "apos;": "'",
+    "approx;": "\u2248",
+    "approxeq;": "\u224a",
+    "aring": "\xe5",
+    "aring;": "\xe5",
+    "ascr;": "\U0001d4b6",
+    "ast;": "*",
+    "asymp;": "\u2248",
+    "asympeq;": "\u224d",
+    "atilde": "\xe3",
+    "atilde;": "\xe3",
+    "auml": "\xe4",
+    "auml;": "\xe4",
+    "awconint;": "\u2233",
+    "awint;": "\u2a11",
+    "bNot;": "\u2aed",
+    "backcong;": "\u224c",
+    "backepsilon;": "\u03f6",
+    "backprime;": "\u2035",
+    "backsim;": "\u223d",
+    "backsimeq;": "\u22cd",
+    "barvee;": "\u22bd",
+    "barwed;": "\u2305",
+    "barwedge;": "\u2305",
+    "bbrk;": "\u23b5",
+    "bbrktbrk;": "\u23b6",
+    "bcong;": "\u224c",
+    "bcy;": "\u0431",
+    "bdquo;": "\u201e",
+    "becaus;": "\u2235",
+    "because;": "\u2235",
+    "bemptyv;": "\u29b0",
+    "bepsi;": "\u03f6",
+    "bernou;": "\u212c",
+    "beta;": "\u03b2",
+    "beth;": "\u2136",
+    "between;": "\u226c",
+    "bfr;": "\U0001d51f",
+    "bigcap;": "\u22c2",
+    "bigcirc;": "\u25ef",
+    "bigcup;": "\u22c3",
+    "bigodot;": "\u2a00",
+    "bigoplus;": "\u2a01",
+    "bigotimes;": "\u2a02",
+    "bigsqcup;": "\u2a06",
+    "bigstar;": "\u2605",
+    "bigtriangledown;": "\u25bd",
+    "bigtriangleup;": "\u25b3",
+    "biguplus;": "\u2a04",
+    "bigvee;": "\u22c1",
+    "bigwedge;": "\u22c0",
+    "bkarow;": "\u290d",
+    "blacklozenge;": "\u29eb",
+    "blacksquare;": "\u25aa",
+    "blacktriangle;": "\u25b4",
+    "blacktriangledown;": "\u25be",
+    "blacktriangleleft;": "\u25c2",
+    "blacktriangleright;": "\u25b8",
+    "blank;": "\u2423",
+    "blk12;": "\u2592",
+    "blk14;": "\u2591",
+    "blk34;": "\u2593",
+    "block;": "\u2588",
+    "bne;": "=\u20e5",
+    "bnequiv;": "\u2261\u20e5",
+    "bnot;": "\u2310",
+    "bopf;": "\U0001d553",
+    "bot;": "\u22a5",
+    "bottom;": "\u22a5",
+    "bowtie;": "\u22c8",
+    "boxDL;": "\u2557",
+    "boxDR;": "\u2554",
+    "boxDl;": "\u2556",
+    "boxDr;": "\u2553",
+    "boxH;": "\u2550",
+    "boxHD;": "\u2566",
+    "boxHU;": "\u2569",
+    "boxHd;": "\u2564",
+    "boxHu;": "\u2567",
+    "boxUL;": "\u255d",
+    "boxUR;": "\u255a",
+    "boxUl;": "\u255c",
+    "boxUr;": "\u2559",
+    "boxV;": "\u2551",
+    "boxVH;": "\u256c",
+    "boxVL;": "\u2563",
+    "boxVR;": "\u2560",
+    "boxVh;": "\u256b",
+    "boxVl;": "\u2562",
+    "boxVr;": "\u255f",
+    "boxbox;": "\u29c9",
+    "boxdL;": "\u2555",
+    "boxdR;": "\u2552",
+    "boxdl;": "\u2510",
+    "boxdr;": "\u250c",
+    "boxh;": "\u2500",
+    "boxhD;": "\u2565",
+    "boxhU;": "\u2568",
+    "boxhd;": "\u252c",
+    "boxhu;": "\u2534",
+    "boxminus;": "\u229f",
+    "boxplus;": "\u229e",
+    "boxtimes;": "\u22a0",
+    "boxuL;": "\u255b",
+    "boxuR;": "\u2558",
+    "boxul;": "\u2518",
+    "boxur;": "\u2514",
+    "boxv;": "\u2502",
+    "boxvH;": "\u256a",
+    "boxvL;": "\u2561",
+    "boxvR;": "\u255e",
+    "boxvh;": "\u253c",
+    "boxvl;": "\u2524",
+    "boxvr;": "\u251c",
+    "bprime;": "\u2035",
+    "breve;": "\u02d8",
+    "brvbar": "\xa6",
+    "brvbar;": "\xa6",
+    "bscr;": "\U0001d4b7",
+    "bsemi;": "\u204f",
+    "bsim;": "\u223d",
+    "bsime;": "\u22cd",
+    "bsol;": "\\",
+    "bsolb;": "\u29c5",
+    "bsolhsub;": "\u27c8",
+    "bull;": "\u2022",
+    "bullet;": "\u2022",
+    "bump;": "\u224e",
+    "bumpE;": "\u2aae",
+    "bumpe;": "\u224f",
+    "bumpeq;": "\u224f",
+    "cacute;": "\u0107",
+    "cap;": "\u2229",
+    "capand;": "\u2a44",
+    "capbrcup;": "\u2a49",
+    "capcap;": "\u2a4b",
+    "capcup;": "\u2a47",
+    "capdot;": "\u2a40",
+    "caps;": "\u2229\ufe00",
+    "caret;": "\u2041",
+    "caron;": "\u02c7",
+    "ccaps;": "\u2a4d",
+    "ccaron;": "\u010d",
+    "ccedil": "\xe7",
+    "ccedil;": "\xe7",
+    "ccirc;": "\u0109",
+    "ccups;": "\u2a4c",
+    "ccupssm;": "\u2a50",
+    "cdot;": "\u010b",
+    "cedil": "\xb8",
+    "cedil;": "\xb8",
+    "cemptyv;": "\u29b2",
+    "cent": "\xa2",
+    "cent;": "\xa2",
+    "centerdot;": "\xb7",
+    "cfr;": "\U0001d520",
+    "chcy;": "\u0447",
+    "check;": "\u2713",
+    "checkmark;": "\u2713",
+    "chi;": "\u03c7",
+    "cir;": "\u25cb",
+    "cirE;": "\u29c3",
+    "circ;": "\u02c6",
+    "circeq;": "\u2257",
+    "circlearrowleft;": "\u21ba",
+    "circlearrowright;": "\u21bb",
+    "circledR;": "\xae",
+    "circledS;": "\u24c8",
+    "circledast;": "\u229b",
+    "circledcirc;": "\u229a",
+    "circleddash;": "\u229d",
+    "cire;": "\u2257",
+    "cirfnint;": "\u2a10",
+    "cirmid;": "\u2aef",
+    "cirscir;": "\u29c2",
+    "clubs;": "\u2663",
+    "clubsuit;": "\u2663",
+    "colon;": ":",
+    "colone;": "\u2254",
+    "coloneq;": "\u2254",
+    "comma;": ",",
+    "commat;": "@",
+    "comp;": "\u2201",
+    "compfn;": "\u2218",
+    "complement;": "\u2201",
+    "complexes;": "\u2102",
+    "cong;": "\u2245",
+    "congdot;": "\u2a6d",
+    "conint;": "\u222e",
+    "copf;": "\U0001d554",
+    "coprod;": "\u2210",
+    "copy": "\xa9",
+    "copy;": "\xa9",
+    "copysr;": "\u2117",
+    "crarr;": "\u21b5",
+    "cross;": "\u2717",
+    "cscr;": "\U0001d4b8",
+    "csub;": "\u2acf",
+    "csube;": "\u2ad1",
+    "csup;": "\u2ad0",
+    "csupe;": "\u2ad2",
+    "ctdot;": "\u22ef",
+    "cudarrl;": "\u2938",
+    "cudarrr;": "\u2935",
+    "cuepr;": "\u22de",
+    "cuesc;": "\u22df",
+    "cularr;": "\u21b6",
+    "cularrp;": "\u293d",
+    "cup;": "\u222a",
+    "cupbrcap;": "\u2a48",
+    "cupcap;": "\u2a46",
+    "cupcup;": "\u2a4a",
+    "cupdot;": "\u228d",
+    "cupor;": "\u2a45",
+    "cups;": "\u222a\ufe00",
+    "curarr;": "\u21b7",
+    "curarrm;": "\u293c",
+    "curlyeqprec;": "\u22de",
+    "curlyeqsucc;": "\u22df",
+    "curlyvee;": "\u22ce",
+    "curlywedge;": "\u22cf",
+    "curren": "\xa4",
+    "curren;": "\xa4",
+    "curvearrowleft;": "\u21b6",
+    "curvearrowright;": "\u21b7",
+    "cuvee;": "\u22ce",
+    "cuwed;": "\u22cf",
+    "cwconint;": "\u2232",
+    "cwint;": "\u2231",
+    "cylcty;": "\u232d",
+    "dArr;": "\u21d3",
+    "dHar;": "\u2965",
+    "dagger;": "\u2020",
+    "daleth;": "\u2138",
+    "darr;": "\u2193",
+    "dash;": "\u2010",
+    "dashv;": "\u22a3",
+    "dbkarow;": "\u290f",
+    "dblac;": "\u02dd",
+    "dcaron;": "\u010f",
+    "dcy;": "\u0434",
+    "dd;": "\u2146",
+    "ddagger;": "\u2021",
+    "ddarr;": "\u21ca",
+    "ddotseq;": "\u2a77",
+    "deg": "\xb0",
+    "deg;": "\xb0",
+    "delta;": "\u03b4",
+    "demptyv;": "\u29b1",
+    "dfisht;": "\u297f",
+    "dfr;": "\U0001d521",
+    "dharl;": "\u21c3",
+    "dharr;": "\u21c2",
+    "diam;": "\u22c4",
+    "diamond;": "\u22c4",
+    "diamondsuit;": "\u2666",
+    "diams;": "\u2666",
+    "die;": "\xa8",
+    "digamma;": "\u03dd",
+    "disin;": "\u22f2",
+    "div;": "\xf7",
+    "divide": "\xf7",
+    "divide;": "\xf7",
+    "divideontimes;": "\u22c7",
+    "divonx;": "\u22c7",
+    "djcy;": "\u0452",
+    "dlcorn;": "\u231e",
+    "dlcrop;": "\u230d",
+    "dollar;": "$",
+    "dopf;": "\U0001d555",
+    "dot;": "\u02d9",
+    "doteq;": "\u2250",
+    "doteqdot;": "\u2251",
+    "dotminus;": "\u2238",
+    "dotplus;": "\u2214",
+    "dotsquare;": "\u22a1",
+    "doublebarwedge;": "\u2306",
+    "downarrow;": "\u2193",
+    "downdownarrows;": "\u21ca",
+    "downharpoonleft;": "\u21c3",
+    "downharpoonright;": "\u21c2",
+    "drbkarow;": "\u2910",
+    "drcorn;": "\u231f",
+    "drcrop;": "\u230c",
+    "dscr;": "\U0001d4b9",
+    "dscy;": "\u0455",
+    "dsol;": "\u29f6",
+    "dstrok;": "\u0111",
+    "dtdot;": "\u22f1",
+    "dtri;": "\u25bf",
+    "dtrif;": "\u25be",
+    "duarr;": "\u21f5",
+    "duhar;": "\u296f",
+    "dwangle;": "\u29a6",
+    "dzcy;": "\u045f",
+    "dzigrarr;": "\u27ff",
+    "eDDot;": "\u2a77",
+    "eDot;": "\u2251",
+    "eacute": "\xe9",
+    "eacute;": "\xe9",
+    "easter;": "\u2a6e",
+    "ecaron;": "\u011b",
+    "ecir;": "\u2256",
+    "ecirc": "\xea",
+    "ecirc;": "\xea",
+    "ecolon;": "\u2255",
+    "ecy;": "\u044d",
+    "edot;": "\u0117",
+    "ee;": "\u2147",
+    "efDot;": "\u2252",
+    "efr;": "\U0001d522",
+    "eg;": "\u2a9a",
+    "egrave": "\xe8",
+    "egrave;": "\xe8",
+    "egs;": "\u2a96",
+    "egsdot;": "\u2a98",
+    "el;": "\u2a99",
+    "elinters;": "\u23e7",
+    "ell;": "\u2113",
+    "els;": "\u2a95",
+    "elsdot;": "\u2a97",
+    "emacr;": "\u0113",
+    "empty;": "\u2205",
+    "emptyset;": "\u2205",
+    "emptyv;": "\u2205",
+    "emsp13;": "\u2004",
+    "emsp14;": "\u2005",
+    "emsp;": "\u2003",
+    "eng;": "\u014b",
+    "ensp;": "\u2002",
+    "eogon;": "\u0119",
+    "eopf;": "\U0001d556",
+    "epar;": "\u22d5",
+    "eparsl;": "\u29e3",
+    "eplus;": "\u2a71",
+    "epsi;": "\u03b5",
+    "epsilon;": "\u03b5",
+    "epsiv;": "\u03f5",
+    "eqcirc;": "\u2256",
+    "eqcolon;": "\u2255",
+    "eqsim;": "\u2242",
+    "eqslantgtr;": "\u2a96",
+    "eqslantless;": "\u2a95",
+    "equals;": "=",
+    "equest;": "\u225f",
+    "equiv;": "\u2261",
+    "equivDD;": "\u2a78",
+    "eqvparsl;": "\u29e5",
+    "erDot;": "\u2253",
+    "erarr;": "\u2971",
+    "escr;": "\u212f",
+    "esdot;": "\u2250",
+    "esim;": "\u2242",
+    "eta;": "\u03b7",
+    "eth": "\xf0",
+    "eth;": "\xf0",
+    "euml": "\xeb",
+    "euml;": "\xeb",
+    "euro;": "\u20ac",
+    "excl;": "!",
+    "exist;": "\u2203",
+    "expectation;": "\u2130",
+    "exponentiale;": "\u2147",
+    "fallingdotseq;": "\u2252",
+    "fcy;": "\u0444",
+    "female;": "\u2640",
+    "ffilig;": "\ufb03",
+    "fflig;": "\ufb00",
+    "ffllig;": "\ufb04",
+    "ffr;": "\U0001d523",
+    "filig;": "\ufb01",
+    "fjlig;": "fj",
+    "flat;": "\u266d",
+    "fllig;": "\ufb02",
+    "fltns;": "\u25b1",
+    "fnof;": "\u0192",
+    "fopf;": "\U0001d557",
+    "forall;": "\u2200",
+    "fork;": "\u22d4",
+    "forkv;": "\u2ad9",
+    "fpartint;": "\u2a0d",
+    "frac12": "\xbd",
+    "frac12;": "\xbd",
+    "frac13;": "\u2153",
+    "frac14": "\xbc",
+    "frac14;": "\xbc",
+    "frac15;": "\u2155",
+    "frac16;": "\u2159",
+    "frac18;": "\u215b",
+    "frac23;": "\u2154",
+    "frac25;": "\u2156",
+    "frac34": "\xbe",
+    "frac34;": "\xbe",
+    "frac35;": "\u2157",
+    "frac38;": "\u215c",
+    "frac45;": "\u2158",
+    "frac56;": "\u215a",
+    "frac58;": "\u215d",
+    "frac78;": "\u215e",
+    "frasl;": "\u2044",
+    "frown;": "\u2322",
+    "fscr;": "\U0001d4bb",
+    "gE;": "\u2267",
+    "gEl;": "\u2a8c",
+    "gacute;": "\u01f5",
+    "gamma;": "\u03b3",
+    "gammad;": "\u03dd",
+    "gap;": "\u2a86",
+    "gbreve;": "\u011f",
+    "gcirc;": "\u011d",
+    "gcy;": "\u0433",
+    "gdot;": "\u0121",
+    "ge;": "\u2265",
+    "gel;": "\u22db",
+    "geq;": "\u2265",
+    "geqq;": "\u2267",
+    "geqslant;": "\u2a7e",
+    "ges;": "\u2a7e",
+    "gescc;": "\u2aa9",
+    "gesdot;": "\u2a80",
+    "gesdoto;": "\u2a82",
+    "gesdotol;": "\u2a84",
+    "gesl;": "\u22db\ufe00",
+    "gesles;": "\u2a94",
+    "gfr;": "\U0001d524",
+    "gg;": "\u226b",
+    "ggg;": "\u22d9",
+    "gimel;": "\u2137",
+    "gjcy;": "\u0453",
+    "gl;": "\u2277",
+    "glE;": "\u2a92",
+    "gla;": "\u2aa5",
+    "glj;": "\u2aa4",
+    "gnE;": "\u2269",
+    "gnap;": "\u2a8a",
+    "gnapprox;": "\u2a8a",
+    "gne;": "\u2a88",
+    "gneq;": "\u2a88",
+    "gneqq;": "\u2269",
+    "gnsim;": "\u22e7",
+    "gopf;": "\U0001d558",
+    "grave;": "`",
+    "gscr;": "\u210a",
+    "gsim;": "\u2273",
+    "gsime;": "\u2a8e",
+    "gsiml;": "\u2a90",
+    "gt": ">",
+    "gt;": ">",
+    "gtcc;": "\u2aa7",
+    "gtcir;": "\u2a7a",
+    "gtdot;": "\u22d7",
+    "gtlPar;": "\u2995",
+    "gtquest;": "\u2a7c",
+    "gtrapprox;": "\u2a86",
+    "gtrarr;": "\u2978",
+    "gtrdot;": "\u22d7",
+    "gtreqless;": "\u22db",
+    "gtreqqless;": "\u2a8c",
+    "gtrless;": "\u2277",
+    "gtrsim;": "\u2273",
+    "gvertneqq;": "\u2269\ufe00",
+    "gvnE;": "\u2269\ufe00",
+    "hArr;": "\u21d4",
+    "hairsp;": "\u200a",
+    "half;": "\xbd",
+    "hamilt;": "\u210b",
+    "hardcy;": "\u044a",
+    "harr;": "\u2194",
+    "harrcir;": "\u2948",
+    "harrw;": "\u21ad",
+    "hbar;": "\u210f",
+    "hcirc;": "\u0125",
+    "hearts;": "\u2665",
+    "heartsuit;": "\u2665",
+    "hellip;": "\u2026",
+    "hercon;": "\u22b9",
+    "hfr;": "\U0001d525",
+    "hksearow;": "\u2925",
+    "hkswarow;": "\u2926",
+    "hoarr;": "\u21ff",
+    "homtht;": "\u223b",
+    "hookleftarrow;": "\u21a9",
+    "hookrightarrow;": "\u21aa",
+    "hopf;": "\U0001d559",
+    "horbar;": "\u2015",
+    "hscr;": "\U0001d4bd",
+    "hslash;": "\u210f",
+    "hstrok;": "\u0127",
+    "hybull;": "\u2043",
+    "hyphen;": "\u2010",
+    "iacute": "\xed",
+    "iacute;": "\xed",
+    "ic;": "\u2063",
+    "icirc": "\xee",
+    "icirc;": "\xee",
+    "icy;": "\u0438",
+    "iecy;": "\u0435",
+    "iexcl": "\xa1",
+    "iexcl;": "\xa1",
+    "iff;": "\u21d4",
+    "ifr;": "\U0001d526",
+    "igrave": "\xec",
+    "igrave;": "\xec",
+    "ii;": "\u2148",
+    "iiiint;": "\u2a0c",
+    "iiint;": "\u222d",
+    "iinfin;": "\u29dc",
+    "iiota;": "\u2129",
+    "ijlig;": "\u0133",
+    "imacr;": "\u012b",
+    "image;": "\u2111",
+    "imagline;": "\u2110",
+    "imagpart;": "\u2111",
+    "imath;": "\u0131",
+    "imof;": "\u22b7",
+    "imped;": "\u01b5",
+    "in;": "\u2208",
+    "incare;": "\u2105",
+    "infin;": "\u221e",
+    "infintie;": "\u29dd",
+    "inodot;": "\u0131",
+    "int;": "\u222b",
+    "intcal;": "\u22ba",
+    "integers;": "\u2124",
+    "intercal;": "\u22ba",
+    "intlarhk;": "\u2a17",
+    "intprod;": "\u2a3c",
+    "iocy;": "\u0451",
+    "iogon;": "\u012f",
+    "iopf;": "\U0001d55a",
+    "iota;": "\u03b9",
+    "iprod;": "\u2a3c",
+    "iquest": "\xbf",
+    "iquest;": "\xbf",
+    "iscr;": "\U0001d4be",
+    "isin;": "\u2208",
+    "isinE;": "\u22f9",
+    "isindot;": "\u22f5",
+    "isins;": "\u22f4",
+    "isinsv;": "\u22f3",
+    "isinv;": "\u2208",
+    "it;": "\u2062",
+    "itilde;": "\u0129",
+    "iukcy;": "\u0456",
+    "iuml": "\xef",
+    "iuml;": "\xef",
+    "jcirc;": "\u0135",
+    "jcy;": "\u0439",
+    "jfr;": "\U0001d527",
+    "jmath;": "\u0237",
+    "jopf;": "\U0001d55b",
+    "jscr;": "\U0001d4bf",
+    "jsercy;": "\u0458",
+    "jukcy;": "\u0454",
+    "kappa;": "\u03ba",
+    "kappav;": "\u03f0",
+    "kcedil;": "\u0137",
+    "kcy;": "\u043a",
+    "kfr;": "\U0001d528",
+    "kgreen;": "\u0138",
+    "khcy;": "\u0445",
+    "kjcy;": "\u045c",
+    "kopf;": "\U0001d55c",
+    "kscr;": "\U0001d4c0",
+    "lAarr;": "\u21da",
+    "lArr;": "\u21d0",
+    "lAtail;": "\u291b",
+    "lBarr;": "\u290e",
+    "lE;": "\u2266",
+    "lEg;": "\u2a8b",
+    "lHar;": "\u2962",
+    "lacute;": "\u013a",
+    "laemptyv;": "\u29b4",
+    "lagran;": "\u2112",
+    "lambda;": "\u03bb",
+    "lang;": "\u27e8",
+    "langd;": "\u2991",
+    "langle;": "\u27e8",
+    "lap;": "\u2a85",
+    "laquo": "\xab",
+    "laquo;": "\xab",
+    "larr;": "\u2190",
+    "larrb;": "\u21e4",
+    "larrbfs;": "\u291f",
+    "larrfs;": "\u291d",
+    "larrhk;": "\u21a9",
+    "larrlp;": "\u21ab",
+    "larrpl;": "\u2939",
+    "larrsim;": "\u2973",
+    "larrtl;": "\u21a2",
+    "lat;": "\u2aab",
+    "latail;": "\u2919",
+    "late;": "\u2aad",
+    "lates;": "\u2aad\ufe00",
+    "lbarr;": "\u290c",
+    "lbbrk;": "\u2772",
+    "lbrace;": "{",
+    "lbrack;": "[",
+    "lbrke;": "\u298b",
+    "lbrksld;": "\u298f",
+    "lbrkslu;": "\u298d",
+    "lcaron;": "\u013e",
+    "lcedil;": "\u013c",
+    "lceil;": "\u2308",
+    "lcub;": "{",
+    "lcy;": "\u043b",
+    "ldca;": "\u2936",
+    "ldquo;": "\u201c",
+    "ldquor;": "\u201e",
+    "ldrdhar;": "\u2967",
+    "ldrushar;": "\u294b",
+    "ldsh;": "\u21b2",
+    "le;": "\u2264",
+    "leftarrow;": "\u2190",
+    "leftarrowtail;": "\u21a2",
+    "leftharpoondown;": "\u21bd",
+    "leftharpoonup;": "\u21bc",
+    "leftleftarrows;": "\u21c7",
+    "leftrightarrow;": "\u2194",
+    "leftrightarrows;": "\u21c6",
+    "leftrightharpoons;": "\u21cb",
+    "leftrightsquigarrow;": "\u21ad",
+    "leftthreetimes;": "\u22cb",
+    "leg;": "\u22da",
+    "leq;": "\u2264",
+    "leqq;": "\u2266",
+    "leqslant;": "\u2a7d",
+    "les;": "\u2a7d",
+    "lescc;": "\u2aa8",
+    "lesdot;": "\u2a7f",
+    "lesdoto;": "\u2a81",
+    "lesdotor;": "\u2a83",
+    "lesg;": "\u22da\ufe00",
+    "lesges;": "\u2a93",
+    "lessapprox;": "\u2a85",
+    "lessdot;": "\u22d6",
+    "lesseqgtr;": "\u22da",
+    "lesseqqgtr;": "\u2a8b",
+    "lessgtr;": "\u2276",
+    "lesssim;": "\u2272",
+    "lfisht;": "\u297c",
+    "lfloor;": "\u230a",
+    "lfr;": "\U0001d529",
+    "lg;": "\u2276",
+    "lgE;": "\u2a91",
+    "lhard;": "\u21bd",
+    "lharu;": "\u21bc",
+    "lharul;": "\u296a",
+    "lhblk;": "\u2584",
+    "ljcy;": "\u0459",
+    "ll;": "\u226a",
+    "llarr;": "\u21c7",
+    "llcorner;": "\u231e",
+    "llhard;": "\u296b",
+    "lltri;": "\u25fa",
+    "lmidot;": "\u0140",
+    "lmoust;": "\u23b0",
+    "lmoustache;": "\u23b0",
+    "lnE;": "\u2268",
+    "lnap;": "\u2a89",
+    "lnapprox;": "\u2a89",
+    "lne;": "\u2a87",
+    "lneq;": "\u2a87",
+    "lneqq;": "\u2268",
+    "lnsim;": "\u22e6",
+    "loang;": "\u27ec",
+    "loarr;": "\u21fd",
+    "lobrk;": "\u27e6",
+    "longleftarrow;": "\u27f5",
+    "longleftrightarrow;": "\u27f7",
+    "longmapsto;": "\u27fc",
+    "longrightarrow;": "\u27f6",
+    "looparrowleft;": "\u21ab",
+    "looparrowright;": "\u21ac",
+    "lopar;": "\u2985",
+    "lopf;": "\U0001d55d",
+    "loplus;": "\u2a2d",
+    "lotimes;": "\u2a34",
+    "lowast;": "\u2217",
+    "lowbar;": "_",
+    "loz;": "\u25ca",
+    "lozenge;": "\u25ca",
+    "lozf;": "\u29eb",
+    "lpar;": "(",
+    "lparlt;": "\u2993",
+    "lrarr;": "\u21c6",
+    "lrcorner;": "\u231f",
+    "lrhar;": "\u21cb",
+    "lrhard;": "\u296d",
+    "lrm;": "\u200e",
+    "lrtri;": "\u22bf",
+    "lsaquo;": "\u2039",
+    "lscr;": "\U0001d4c1",
+    "lsh;": "\u21b0",
+    "lsim;": "\u2272",
+    "lsime;": "\u2a8d",
+    "lsimg;": "\u2a8f",
+    "lsqb;": "[",
+    "lsquo;": "\u2018",
+    "lsquor;": "\u201a",
+    "lstrok;": "\u0142",
+    "lt": "<",
+    "lt;": "<",
+    "ltcc;": "\u2aa6",
+    "ltcir;": "\u2a79",
+    "ltdot;": "\u22d6",
+    "lthree;": "\u22cb",
+    "ltimes;": "\u22c9",
+    "ltlarr;": "\u2976",
+    "ltquest;": "\u2a7b",
+    "ltrPar;": "\u2996",
+    "ltri;": "\u25c3",
+    "ltrie;": "\u22b4",
+    "ltrif;": "\u25c2",
+    "lurdshar;": "\u294a",
+    "luruhar;": "\u2966",
+    "lvertneqq;": "\u2268\ufe00",
+    "lvnE;": "\u2268\ufe00",
+    "mDDot;": "\u223a",
+    "macr": "\xaf",
+    "macr;": "\xaf",
+    "male;": "\u2642",
+    "malt;": "\u2720",
+    "maltese;": "\u2720",
+    "map;": "\u21a6",
+    "mapsto;": "\u21a6",
+    "mapstodown;": "\u21a7",
+    "mapstoleft;": "\u21a4",
+    "mapstoup;": "\u21a5",
+    "marker;": "\u25ae",
+    "mcomma;": "\u2a29",
+    "mcy;": "\u043c",
+    "mdash;": "\u2014",
+    "measuredangle;": "\u2221",
+    "mfr;": "\U0001d52a",
+    "mho;": "\u2127",
+    "micro": "\xb5",
+    "micro;": "\xb5",
+    "mid;": "\u2223",
+    "midast;": "*",
+    "midcir;": "\u2af0",
+    "middot": "\xb7",
+    "middot;": "\xb7",
+    "minus;": "\u2212",
+    "minusb;": "\u229f",
+    "minusd;": "\u2238",
+    "minusdu;": "\u2a2a",
+    "mlcp;": "\u2adb",
+    "mldr;": "\u2026",
+    "mnplus;": "\u2213",
+    "models;": "\u22a7",
+    "mopf;": "\U0001d55e",
+    "mp;": "\u2213",
+    "mscr;": "\U0001d4c2",
+    "mstpos;": "\u223e",
+    "mu;": "\u03bc",
+    "multimap;": "\u22b8",
+    "mumap;": "\u22b8",
+    "nGg;": "\u22d9\u0338",
+    "nGt;": "\u226b\u20d2",
+    "nGtv;": "\u226b\u0338",
+    "nLeftarrow;": "\u21cd",
+    "nLeftrightarrow;": "\u21ce",
+    "nLl;": "\u22d8\u0338",
+    "nLt;": "\u226a\u20d2",
+    "nLtv;": "\u226a\u0338",
+    "nRightarrow;": "\u21cf",
+    "nVDash;": "\u22af",
+    "nVdash;": "\u22ae",
+    "nabla;": "\u2207",
+    "nacute;": "\u0144",
+    "nang;": "\u2220\u20d2",
+    "nap;": "\u2249",
+    "napE;": "\u2a70\u0338",
+    "napid;": "\u224b\u0338",
+    "napos;": "\u0149",
+    "napprox;": "\u2249",
+    "natur;": "\u266e",
+    "natural;": "\u266e",
+    "naturals;": "\u2115",
+    "nbsp": "\xa0",
+    "nbsp;": "\xa0",
+    "nbump;": "\u224e\u0338",
+    "nbumpe;": "\u224f\u0338",
+    "ncap;": "\u2a43",
+    "ncaron;": "\u0148",
+    "ncedil;": "\u0146",
+    "ncong;": "\u2247",
+    "ncongdot;": "\u2a6d\u0338",
+    "ncup;": "\u2a42",
+    "ncy;": "\u043d",
+    "ndash;": "\u2013",
+    "ne;": "\u2260",
+    "neArr;": "\u21d7",
+    "nearhk;": "\u2924",
+    "nearr;": "\u2197",
+    "nearrow;": "\u2197",
+    "nedot;": "\u2250\u0338",
+    "nequiv;": "\u2262",
+    "nesear;": "\u2928",
+    "nesim;": "\u2242\u0338",
+    "nexist;": "\u2204",
+    "nexists;": "\u2204",
+    "nfr;": "\U0001d52b",
+    "ngE;": "\u2267\u0338",
+    "nge;": "\u2271",
+    "ngeq;": "\u2271",
+    "ngeqq;": "\u2267\u0338",
+    "ngeqslant;": "\u2a7e\u0338",
+    "nges;": "\u2a7e\u0338",
+    "ngsim;": "\u2275",
+    "ngt;": "\u226f",
+    "ngtr;": "\u226f",
+    "nhArr;": "\u21ce",
+    "nharr;": "\u21ae",
+    "nhpar;": "\u2af2",
+    "ni;": "\u220b",
+    "nis;": "\u22fc",
+    "nisd;": "\u22fa",
+    "niv;": "\u220b",
+    "njcy;": "\u045a",
+    "nlArr;": "\u21cd",
+    "nlE;": "\u2266\u0338",
+    "nlarr;": "\u219a",
+    "nldr;": "\u2025",
+    "nle;": "\u2270",
+    "nleftarrow;": "\u219a",
+    "nleftrightarrow;": "\u21ae",
+    "nleq;": "\u2270",
+    "nleqq;": "\u2266\u0338",
+    "nleqslant;": "\u2a7d\u0338",
+    "nles;": "\u2a7d\u0338",
+    "nless;": "\u226e",
+    "nlsim;": "\u2274",
+    "nlt;": "\u226e",
+    "nltri;": "\u22ea",
+    "nltrie;": "\u22ec",
+    "nmid;": "\u2224",
+    "nopf;": "\U0001d55f",
+    "not": "\xac",
+    "not;": "\xac",
+    "notin;": "\u2209",
+    "notinE;": "\u22f9\u0338",
+    "notindot;": "\u22f5\u0338",
+    "notinva;": "\u2209",
+    "notinvb;": "\u22f7",
+    "notinvc;": "\u22f6",
+    "notni;": "\u220c",
+    "notniva;": "\u220c",
+    "notnivb;": "\u22fe",
+    "notnivc;": "\u22fd",
+    "npar;": "\u2226",
+    "nparallel;": "\u2226",
+    "nparsl;": "\u2afd\u20e5",
+    "npart;": "\u2202\u0338",
+    "npolint;": "\u2a14",
+    "npr;": "\u2280",
+    "nprcue;": "\u22e0",
+    "npre;": "\u2aaf\u0338",
+    "nprec;": "\u2280",
+    "npreceq;": "\u2aaf\u0338",
+    "nrArr;": "\u21cf",
+    "nrarr;": "\u219b",
+    "nrarrc;": "\u2933\u0338",
+    "nrarrw;": "\u219d\u0338",
+    "nrightarrow;": "\u219b",
+    "nrtri;": "\u22eb",
+    "nrtrie;": "\u22ed",
+    "nsc;": "\u2281",
+    "nsccue;": "\u22e1",
+    "nsce;": "\u2ab0\u0338",
+    "nscr;": "\U0001d4c3",
+    "nshortmid;": "\u2224",
+    "nshortparallel;": "\u2226",
+    "nsim;": "\u2241",
+    "nsime;": "\u2244",
+    "nsimeq;": "\u2244",
+    "nsmid;": "\u2224",
+    "nspar;": "\u2226",
+    "nsqsube;": "\u22e2",
+    "nsqsupe;": "\u22e3",
+    "nsub;": "\u2284",
+    "nsubE;": "\u2ac5\u0338",
+    "nsube;": "\u2288",
+    "nsubset;": "\u2282\u20d2",
+    "nsubseteq;": "\u2288",
+    "nsubseteqq;": "\u2ac5\u0338",
+    "nsucc;": "\u2281",
+    "nsucceq;": "\u2ab0\u0338",
+    "nsup;": "\u2285",
+    "nsupE;": "\u2ac6\u0338",
+    "nsupe;": "\u2289",
+    "nsupset;": "\u2283\u20d2",
+    "nsupseteq;": "\u2289",
+    "nsupseteqq;": "\u2ac6\u0338",
+    "ntgl;": "\u2279",
+    "ntilde": "\xf1",
+    "ntilde;": "\xf1",
+    "ntlg;": "\u2278",
+    "ntriangleleft;": "\u22ea",
+    "ntrianglelefteq;": "\u22ec",
+    "ntriangleright;": "\u22eb",
+    "ntrianglerighteq;": "\u22ed",
+    "nu;": "\u03bd",
+    "num;": "#",
+    "numero;": "\u2116",
+    "numsp;": "\u2007",
+    "nvDash;": "\u22ad",
+    "nvHarr;": "\u2904",
+    "nvap;": "\u224d\u20d2",
+    "nvdash;": "\u22ac",
+    "nvge;": "\u2265\u20d2",
+    "nvgt;": ">\u20d2",
+    "nvinfin;": "\u29de",
+    "nvlArr;": "\u2902",
+    "nvle;": "\u2264\u20d2",
+    "nvlt;": "<\u20d2",
+    "nvltrie;": "\u22b4\u20d2",
+    "nvrArr;": "\u2903",
+    "nvrtrie;": "\u22b5\u20d2",
+    "nvsim;": "\u223c\u20d2",
+    "nwArr;": "\u21d6",
+    "nwarhk;": "\u2923",
+    "nwarr;": "\u2196",
+    "nwarrow;": "\u2196",
+    "nwnear;": "\u2927",
+    "oS;": "\u24c8",
+    "oacute": "\xf3",
+    "oacute;": "\xf3",
+    "oast;": "\u229b",
+    "ocir;": "\u229a",
+    "ocirc": "\xf4",
+    "ocirc;": "\xf4",
+    "ocy;": "\u043e",
+    "odash;": "\u229d",
+    "odblac;": "\u0151",
+    "odiv;": "\u2a38",
+    "odot;": "\u2299",
+    "odsold;": "\u29bc",
+    "oelig;": "\u0153",
+    "ofcir;": "\u29bf",
+    "ofr;": "\U0001d52c",
+    "ogon;": "\u02db",
+    "ograve": "\xf2",
+    "ograve;": "\xf2",
+    "ogt;": "\u29c1",
+    "ohbar;": "\u29b5",
+    "ohm;": "\u03a9",
+    "oint;": "\u222e",
+    "olarr;": "\u21ba",
+    "olcir;": "\u29be",
+    "olcross;": "\u29bb",
+    "oline;": "\u203e",
+    "olt;": "\u29c0",
+    "omacr;": "\u014d",
+    "omega;": "\u03c9",
+    "omicron;": "\u03bf",
+    "omid;": "\u29b6",
+    "ominus;": "\u2296",
+    "oopf;": "\U0001d560",
+    "opar;": "\u29b7",
+    "operp;": "\u29b9",
+    "oplus;": "\u2295",
+    "or;": "\u2228",
+    "orarr;": "\u21bb",
+    "ord;": "\u2a5d",
+    "order;": "\u2134",
+    "orderof;": "\u2134",
+    "ordf": "\xaa",
+    "ordf;": "\xaa",
+    "ordm": "\xba",
+    "ordm;": "\xba",
+    "origof;": "\u22b6",
+    "oror;": "\u2a56",
+    "orslope;": "\u2a57",
+    "orv;": "\u2a5b",
+    "oscr;": "\u2134",
+    "oslash": "\xf8",
+    "oslash;": "\xf8",
+    "osol;": "\u2298",
+    "otilde": "\xf5",
+    "otilde;": "\xf5",
+    "otimes;": "\u2297",
+    "otimesas;": "\u2a36",
+    "ouml": "\xf6",
+    "ouml;": "\xf6",
+    "ovbar;": "\u233d",
+    "par;": "\u2225",
+    "para": "\xb6",
+    "para;": "\xb6",
+    "parallel;": "\u2225",
+    "parsim;": "\u2af3",
+    "parsl;": "\u2afd",
+    "part;": "\u2202",
+    "pcy;": "\u043f",
+    "percnt;": "%",
+    "period;": ".",
+    "permil;": "\u2030",
+    "perp;": "\u22a5",
+    "pertenk;": "\u2031",
+    "pfr;": "\U0001d52d",
+    "phi;": "\u03c6",
+    "phiv;": "\u03d5",
+    "phmmat;": "\u2133",
+    "phone;": "\u260e",
+    "pi;": "\u03c0",
+    "pitchfork;": "\u22d4",
+    "piv;": "\u03d6",
+    "planck;": "\u210f",
+    "planckh;": "\u210e",
+    "plankv;": "\u210f",
+    "plus;": "+",
+    "plusacir;": "\u2a23",
+    "plusb;": "\u229e",
+    "pluscir;": "\u2a22",
+    "plusdo;": "\u2214",
+    "plusdu;": "\u2a25",
+    "pluse;": "\u2a72",
+    "plusmn": "\xb1",
+    "plusmn;": "\xb1",
+    "plussim;": "\u2a26",
+    "plustwo;": "\u2a27",
+    "pm;": "\xb1",
+    "pointint;": "\u2a15",
+    "popf;": "\U0001d561",
+    "pound": "\xa3",
+    "pound;": "\xa3",
+    "pr;": "\u227a",
+    "prE;": "\u2ab3",
+    "prap;": "\u2ab7",
+    "prcue;": "\u227c",
+    "pre;": "\u2aaf",
+    "prec;": "\u227a",
+    "precapprox;": "\u2ab7",
+    "preccurlyeq;": "\u227c",
+    "preceq;": "\u2aaf",
+    "precnapprox;": "\u2ab9",
+    "precneqq;": "\u2ab5",
+    "precnsim;": "\u22e8",
+    "precsim;": "\u227e",
+    "prime;": "\u2032",
+    "primes;": "\u2119",
+    "prnE;": "\u2ab5",
+    "prnap;": "\u2ab9",
+    "prnsim;": "\u22e8",
+    "prod;": "\u220f",
+    "profalar;": "\u232e",
+    "profline;": "\u2312",
+    "profsurf;": "\u2313",
+    "prop;": "\u221d",
+    "propto;": "\u221d",
+    "prsim;": "\u227e",
+    "prurel;": "\u22b0",
+    "pscr;": "\U0001d4c5",
+    "psi;": "\u03c8",
+    "puncsp;": "\u2008",
+    "qfr;": "\U0001d52e",
+    "qint;": "\u2a0c",
+    "qopf;": "\U0001d562",
+    "qprime;": "\u2057",
+    "qscr;": "\U0001d4c6",
+    "quaternions;": "\u210d",
+    "quatint;": "\u2a16",
+    "quest;": "?",
+    "questeq;": "\u225f",
+    "quot": "\"",
+    "quot;": "\"",
+    "rAarr;": "\u21db",
+    "rArr;": "\u21d2",
+    "rAtail;": "\u291c",
+    "rBarr;": "\u290f",
+    "rHar;": "\u2964",
+    "race;": "\u223d\u0331",
+    "racute;": "\u0155",
+    "radic;": "\u221a",
+    "raemptyv;": "\u29b3",
+    "rang;": "\u27e9",
+    "rangd;": "\u2992",
+    "range;": "\u29a5",
+    "rangle;": "\u27e9",
+    "raquo": "\xbb",
+    "raquo;": "\xbb",
+    "rarr;": "\u2192",
+    "rarrap;": "\u2975",
+    "rarrb;": "\u21e5",
+    "rarrbfs;": "\u2920",
+    "rarrc;": "\u2933",
+    "rarrfs;": "\u291e",
+    "rarrhk;": "\u21aa",
+    "rarrlp;": "\u21ac",
+    "rarrpl;": "\u2945",
+    "rarrsim;": "\u2974",
+    "rarrtl;": "\u21a3",
+    "rarrw;": "\u219d",
+    "ratail;": "\u291a",
+    "ratio;": "\u2236",
+    "rationals;": "\u211a",
+    "rbarr;": "\u290d",
+    "rbbrk;": "\u2773",
+    "rbrace;": "}",
+    "rbrack;": "]",
+    "rbrke;": "\u298c",
+    "rbrksld;": "\u298e",
+    "rbrkslu;": "\u2990",
+    "rcaron;": "\u0159",
+    "rcedil;": "\u0157",
+    "rceil;": "\u2309",
+    "rcub;": "}",
+    "rcy;": "\u0440",
+    "rdca;": "\u2937",
+    "rdldhar;": "\u2969",
+    "rdquo;": "\u201d",
+    "rdquor;": "\u201d",
+    "rdsh;": "\u21b3",
+    "real;": "\u211c",
+    "realine;": "\u211b",
+    "realpart;": "\u211c",
+    "reals;": "\u211d",
+    "rect;": "\u25ad",
+    "reg": "\xae",
+    "reg;": "\xae",
+    "rfisht;": "\u297d",
+    "rfloor;": "\u230b",
+    "rfr;": "\U0001d52f",
+    "rhard;": "\u21c1",
+    "rharu;": "\u21c0",
+    "rharul;": "\u296c",
+    "rho;": "\u03c1",
+    "rhov;": "\u03f1",
+    "rightarrow;": "\u2192",
+    "rightarrowtail;": "\u21a3",
+    "rightharpoondown;": "\u21c1",
+    "rightharpoonup;": "\u21c0",
+    "rightleftarrows;": "\u21c4",
+    "rightleftharpoons;": "\u21cc",
+    "rightrightarrows;": "\u21c9",
+    "rightsquigarrow;": "\u219d",
+    "rightthreetimes;": "\u22cc",
+    "ring;": "\u02da",
+    "risingdotseq;": "\u2253",
+    "rlarr;": "\u21c4",
+    "rlhar;": "\u21cc",
+    "rlm;": "\u200f",
+    "rmoust;": "\u23b1",
+    "rmoustache;": "\u23b1",
+    "rnmid;": "\u2aee",
+    "roang;": "\u27ed",
+    "roarr;": "\u21fe",
+    "robrk;": "\u27e7",
+    "ropar;": "\u2986",
+    "ropf;": "\U0001d563",
+    "roplus;": "\u2a2e",
+    "rotimes;": "\u2a35",
+    "rpar;": ")",
+    "rpargt;": "\u2994",
+    "rppolint;": "\u2a12",
+    "rrarr;": "\u21c9",
+    "rsaquo;": "\u203a",
+    "rscr;": "\U0001d4c7",
+    "rsh;": "\u21b1",
+    "rsqb;": "]",
+    "rsquo;": "\u2019",
+    "rsquor;": "\u2019",
+    "rthree;": "\u22cc",
+    "rtimes;": "\u22ca",
+    "rtri;": "\u25b9",
+    "rtrie;": "\u22b5",
+    "rtrif;": "\u25b8",
+    "rtriltri;": "\u29ce",
+    "ruluhar;": "\u2968",
+    "rx;": "\u211e",
+    "sacute;": "\u015b",
+    "sbquo;": "\u201a",
+    "sc;": "\u227b",
+    "scE;": "\u2ab4",
+    "scap;": "\u2ab8",
+    "scaron;": "\u0161",
+    "sccue;": "\u227d",
+    "sce;": "\u2ab0",
+    "scedil;": "\u015f",
+    "scirc;": "\u015d",
+    "scnE;": "\u2ab6",
+    "scnap;": "\u2aba",
+    "scnsim;": "\u22e9",
+    "scpolint;": "\u2a13",
+    "scsim;": "\u227f",
+    "scy;": "\u0441",
+    "sdot;": "\u22c5",
+    "sdotb;": "\u22a1",
+    "sdote;": "\u2a66",
+    "seArr;": "\u21d8",
+    "searhk;": "\u2925",
+    "searr;": "\u2198",
+    "searrow;": "\u2198",
+    "sect": "\xa7",
+    "sect;": "\xa7",
+    "semi;": ";",
+    "seswar;": "\u2929",
+    "setminus;": "\u2216",
+    "setmn;": "\u2216",
+    "sext;": "\u2736",
+    "sfr;": "\U0001d530",
+    "sfrown;": "\u2322",
+    "sharp;": "\u266f",
+    "shchcy;": "\u0449",
+    "shcy;": "\u0448",
+    "shortmid;": "\u2223",
+    "shortparallel;": "\u2225",
+    "shy": "\xad",
+    "shy;": "\xad",
+    "sigma;": "\u03c3",
+    "sigmaf;": "\u03c2",
+    "sigmav;": "\u03c2",
+    "sim;": "\u223c",
+    "simdot;": "\u2a6a",
+    "sime;": "\u2243",
+    "simeq;": "\u2243",
+    "simg;": "\u2a9e",
+    "simgE;": "\u2aa0",
+    "siml;": "\u2a9d",
+    "simlE;": "\u2a9f",
+    "simne;": "\u2246",
+    "simplus;": "\u2a24",
+    "simrarr;": "\u2972",
+    "slarr;": "\u2190",
+    "smallsetminus;": "\u2216",
+    "smashp;": "\u2a33",
+    "smeparsl;": "\u29e4",
+    "smid;": "\u2223",
+    "smile;": "\u2323",
+    "smt;": "\u2aaa",
+    "smte;": "\u2aac",
+    "smtes;": "\u2aac\ufe00",
+    "softcy;": "\u044c",
+    "sol;": "/",
+    "solb;": "\u29c4",
+    "solbar;": "\u233f",
+    "sopf;": "\U0001d564",
+    "spades;": "\u2660",
+    "spadesuit;": "\u2660",
+    "spar;": "\u2225",
+    "sqcap;": "\u2293",
+    "sqcaps;": "\u2293\ufe00",
+    "sqcup;": "\u2294",
+    "sqcups;": "\u2294\ufe00",
+    "sqsub;": "\u228f",
+    "sqsube;": "\u2291",
+    "sqsubset;": "\u228f",
+    "sqsubseteq;": "\u2291",
+    "sqsup;": "\u2290",
+    "sqsupe;": "\u2292",
+    "sqsupset;": "\u2290",
+    "sqsupseteq;": "\u2292",
+    "squ;": "\u25a1",
+    "square;": "\u25a1",
+    "squarf;": "\u25aa",
+    "squf;": "\u25aa",
+    "srarr;": "\u2192",
+    "sscr;": "\U0001d4c8",
+    "ssetmn;": "\u2216",
+    "ssmile;": "\u2323",
+    "sstarf;": "\u22c6",
+    "star;": "\u2606",
+    "starf;": "\u2605",
+    "straightepsilon;": "\u03f5",
+    "straightphi;": "\u03d5",
+    "strns;": "\xaf",
+    "sub;": "\u2282",
+    "subE;": "\u2ac5",
+    "subdot;": "\u2abd",
+    "sube;": "\u2286",
+    "subedot;": "\u2ac3",
+    "submult;": "\u2ac1",
+    "subnE;": "\u2acb",
+    "subne;": "\u228a",
+    "subplus;": "\u2abf",
+    "subrarr;": "\u2979",
+    "subset;": "\u2282",
+    "subseteq;": "\u2286",
+    "subseteqq;": "\u2ac5",
+    "subsetneq;": "\u228a",
+    "subsetneqq;": "\u2acb",
+    "subsim;": "\u2ac7",
+    "subsub;": "\u2ad5",
+    "subsup;": "\u2ad3",
+    "succ;": "\u227b",
+    "succapprox;": "\u2ab8",
+    "succcurlyeq;": "\u227d",
+    "succeq;": "\u2ab0",
+    "succnapprox;": "\u2aba",
+    "succneqq;": "\u2ab6",
+    "succnsim;": "\u22e9",
+    "succsim;": "\u227f",
+    "sum;": "\u2211",
+    "sung;": "\u266a",
+    "sup1": "\xb9",
+    "sup1;": "\xb9",
+    "sup2": "\xb2",
+    "sup2;": "\xb2",
+    "sup3": "\xb3",
+    "sup3;": "\xb3",
+    "sup;": "\u2283",
+    "supE;": "\u2ac6",
+    "supdot;": "\u2abe",
+    "supdsub;": "\u2ad8",
+    "supe;": "\u2287",
+    "supedot;": "\u2ac4",
+    "suphsol;": "\u27c9",
+    "suphsub;": "\u2ad7",
+    "suplarr;": "\u297b",
+    "supmult;": "\u2ac2",
+    "supnE;": "\u2acc",
+    "supne;": "\u228b",
+    "supplus;": "\u2ac0",
+    "supset;": "\u2283",
+    "supseteq;": "\u2287",
+    "supseteqq;": "\u2ac6",
+    "supsetneq;": "\u228b",
+    "supsetneqq;": "\u2acc",
+    "supsim;": "\u2ac8",
+    "supsub;": "\u2ad4",
+    "supsup;": "\u2ad6",
+    "swArr;": "\u21d9",
+    "swarhk;": "\u2926",
+    "swarr;": "\u2199",
+    "swarrow;": "\u2199",
+    "swnwar;": "\u292a",
+    "szlig": "\xdf",
+    "szlig;": "\xdf",
+    "target;": "\u2316",
+    "tau;": "\u03c4",
+    "tbrk;": "\u23b4",
+    "tcaron;": "\u0165",
+    "tcedil;": "\u0163",
+    "tcy;": "\u0442",
+    "tdot;": "\u20db",
+    "telrec;": "\u2315",
+    "tfr;": "\U0001d531",
+    "there4;": "\u2234",
+    "therefore;": "\u2234",
+    "theta;": "\u03b8",
+    "thetasym;": "\u03d1",
+    "thetav;": "\u03d1",
+    "thickapprox;": "\u2248",
+    "thicksim;": "\u223c",
+    "thinsp;": "\u2009",
+    "thkap;": "\u2248",
+    "thksim;": "\u223c",
+    "thorn": "\xfe",
+    "thorn;": "\xfe",
+    "tilde;": "\u02dc",
+    "times": "\xd7",
+    "times;": "\xd7",
+    "timesb;": "\u22a0",
+    "timesbar;": "\u2a31",
+    "timesd;": "\u2a30",
+    "tint;": "\u222d",
+    "toea;": "\u2928",
+    "top;": "\u22a4",
+    "topbot;": "\u2336",
+    "topcir;": "\u2af1",
+    "topf;": "\U0001d565",
+    "topfork;": "\u2ada",
+    "tosa;": "\u2929",
+    "tprime;": "\u2034",
+    "trade;": "\u2122",
+    "triangle;": "\u25b5",
+    "triangledown;": "\u25bf",
+    "triangleleft;": "\u25c3",
+    "trianglelefteq;": "\u22b4",
+    "triangleq;": "\u225c",
+    "triangleright;": "\u25b9",
+    "trianglerighteq;": "\u22b5",
+    "tridot;": "\u25ec",
+    "trie;": "\u225c",
+    "triminus;": "\u2a3a",
+    "triplus;": "\u2a39",
+    "trisb;": "\u29cd",
+    "tritime;": "\u2a3b",
+    "trpezium;": "\u23e2",
+    "tscr;": "\U0001d4c9",
+    "tscy;": "\u0446",
+    "tshcy;": "\u045b",
+    "tstrok;": "\u0167",
+    "twixt;": "\u226c",
+    "twoheadleftarrow;": "\u219e",
+    "twoheadrightarrow;": "\u21a0",
+    "uArr;": "\u21d1",
+    "uHar;": "\u2963",
+    "uacute": "\xfa",
+    "uacute;": "\xfa",
+    "uarr;": "\u2191",
+    "ubrcy;": "\u045e",
+    "ubreve;": "\u016d",
+    "ucirc": "\xfb",
+    "ucirc;": "\xfb",
+    "ucy;": "\u0443",
+    "udarr;": "\u21c5",
+    "udblac;": "\u0171",
+    "udhar;": "\u296e",
+    "ufisht;": "\u297e",
+    "ufr;": "\U0001d532",
+    "ugrave": "\xf9",
+    "ugrave;": "\xf9",
+    "uharl;": "\u21bf",
+    "uharr;": "\u21be",
+    "uhblk;": "\u2580",
+    "ulcorn;": "\u231c",
+    "ulcorner;": "\u231c",
+    "ulcrop;": "\u230f",
+    "ultri;": "\u25f8",
+    "umacr;": "\u016b",
+    "uml": "\xa8",
+    "uml;": "\xa8",
+    "uogon;": "\u0173",
+    "uopf;": "\U0001d566",
+    "uparrow;": "\u2191",
+    "updownarrow;": "\u2195",
+    "upharpoonleft;": "\u21bf",
+    "upharpoonright;": "\u21be",
+    "uplus;": "\u228e",
+    "upsi;": "\u03c5",
+    "upsih;": "\u03d2",
+    "upsilon;": "\u03c5",
+    "upuparrows;": "\u21c8",
+    "urcorn;": "\u231d",
+    "urcorner;": "\u231d",
+    "urcrop;": "\u230e",
+    "uring;": "\u016f",
+    "urtri;": "\u25f9",
+    "uscr;": "\U0001d4ca",
+    "utdot;": "\u22f0",
+    "utilde;": "\u0169",
+    "utri;": "\u25b5",
+    "utrif;": "\u25b4",
+    "uuarr;": "\u21c8",
+    "uuml": "\xfc",
+    "uuml;": "\xfc",
+    "uwangle;": "\u29a7",
+    "vArr;": "\u21d5",
+    "vBar;": "\u2ae8",
+    "vBarv;": "\u2ae9",
+    "vDash;": "\u22a8",
+    "vangrt;": "\u299c",
+    "varepsilon;": "\u03f5",
+    "varkappa;": "\u03f0",
+    "varnothing;": "\u2205",
+    "varphi;": "\u03d5",
+    "varpi;": "\u03d6",
+    "varpropto;": "\u221d",
+    "varr;": "\u2195",
+    "varrho;": "\u03f1",
+    "varsigma;": "\u03c2",
+    "varsubsetneq;": "\u228a\ufe00",
+    "varsubsetneqq;": "\u2acb\ufe00",
+    "varsupsetneq;": "\u228b\ufe00",
+    "varsupsetneqq;": "\u2acc\ufe00",
+    "vartheta;": "\u03d1",
+    "vartriangleleft;": "\u22b2",
+    "vartriangleright;": "\u22b3",
+    "vcy;": "\u0432",
+    "vdash;": "\u22a2",
+    "vee;": "\u2228",
+    "veebar;": "\u22bb",
+    "veeeq;": "\u225a",
+    "vellip;": "\u22ee",
+    "verbar;": "|",
+    "vert;": "|",
+    "vfr;": "\U0001d533",
+    "vltri;": "\u22b2",
+    "vnsub;": "\u2282\u20d2",
+    "vnsup;": "\u2283\u20d2",
+    "vopf;": "\U0001d567",
+    "vprop;": "\u221d",
+    "vrtri;": "\u22b3",
+    "vscr;": "\U0001d4cb",
+    "vsubnE;": "\u2acb\ufe00",
+    "vsubne;": "\u228a\ufe00",
+    "vsupnE;": "\u2acc\ufe00",
+    "vsupne;": "\u228b\ufe00",
+    "vzigzag;": "\u299a",
+    "wcirc;": "\u0175",
+    "wedbar;": "\u2a5f",
+    "wedge;": "\u2227",
+    "wedgeq;": "\u2259",
+    "weierp;": "\u2118",
+    "wfr;": "\U0001d534",
+    "wopf;": "\U0001d568",
+    "wp;": "\u2118",
+    "wr;": "\u2240",
+    "wreath;": "\u2240",
+    "wscr;": "\U0001d4cc",
+    "xcap;": "\u22c2",
+    "xcirc;": "\u25ef",
+    "xcup;": "\u22c3",
+    "xdtri;": "\u25bd",
+    "xfr;": "\U0001d535",
+    "xhArr;": "\u27fa",
+    "xharr;": "\u27f7",
+    "xi;": "\u03be",
+    "xlArr;": "\u27f8",
+    "xlarr;": "\u27f5",
+    "xmap;": "\u27fc",
+    "xnis;": "\u22fb",
+    "xodot;": "\u2a00",
+    "xopf;": "\U0001d569",
+    "xoplus;": "\u2a01",
+    "xotime;": "\u2a02",
+    "xrArr;": "\u27f9",
+    "xrarr;": "\u27f6",
+    "xscr;": "\U0001d4cd",
+    "xsqcup;": "\u2a06",
+    "xuplus;": "\u2a04",
+    "xutri;": "\u25b3",
+    "xvee;": "\u22c1",
+    "xwedge;": "\u22c0",
+    "yacute": "\xfd",
+    "yacute;": "\xfd",
+    "yacy;": "\u044f",
+    "ycirc;": "\u0177",
+    "ycy;": "\u044b",
+    "yen": "\xa5",
+    "yen;": "\xa5",
+    "yfr;": "\U0001d536",
+    "yicy;": "\u0457",
+    "yopf;": "\U0001d56a",
+    "yscr;": "\U0001d4ce",
+    "yucy;": "\u044e",
+    "yuml": "\xff",
+    "yuml;": "\xff",
+    "zacute;": "\u017a",
+    "zcaron;": "\u017e",
+    "zcy;": "\u0437",
+    "zdot;": "\u017c",
+    "zeetrf;": "\u2128",
+    "zeta;": "\u03b6",
+    "zfr;": "\U0001d537",
+    "zhcy;": "\u0436",
+    "zigrarr;": "\u21dd",
+    "zopf;": "\U0001d56b",
+    "zscr;": "\U0001d4cf",
+    "zwj;": "\u200d",
+    "zwnj;": "\u200c",
+}
+
+replacementCharacters = {
+    0x0: "\uFFFD",
+    0x0d: "\u000D",
+    0x80: "\u20AC",
+    0x81: "\u0081",
+    0x82: "\u201A",
+    0x83: "\u0192",
+    0x84: "\u201E",
+    0x85: "\u2026",
+    0x86: "\u2020",
+    0x87: "\u2021",
+    0x88: "\u02C6",
+    0x89: "\u2030",
+    0x8A: "\u0160",
+    0x8B: "\u2039",
+    0x8C: "\u0152",
+    0x8D: "\u008D",
+    0x8E: "\u017D",
+    0x8F: "\u008F",
+    0x90: "\u0090",
+    0x91: "\u2018",
+    0x92: "\u2019",
+    0x93: "\u201C",
+    0x94: "\u201D",
+    0x95: "\u2022",
+    0x96: "\u2013",
+    0x97: "\u2014",
+    0x98: "\u02DC",
+    0x99: "\u2122",
+    0x9A: "\u0161",
+    0x9B: "\u203A",
+    0x9C: "\u0153",
+    0x9D: "\u009D",
+    0x9E: "\u017E",
+    0x9F: "\u0178",
+}
+
+tokenTypes = {
+    "Doctype": 0,
+    "Characters": 1,
+    "SpaceCharacters": 2,
+    "StartTag": 3,
+    "EndTag": 4,
+    "EmptyTag": 5,
+    "Comment": 6,
+    "ParseError": 7
+}
+
+tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
+                           tokenTypes["EmptyTag"]])
+
+
+prefixes = {v: k for k, v in namespaces.items()}
+prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
+
+
+class DataLossWarning(UserWarning):
+    """Raised when the current tree is unable to represent the input data"""
+    pass
+
+
+class _ReparseException(Exception):
+    pass
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/filters/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/alphabeticalattributes.py b/samples-and-tests/i-am-a-developer/html5lib/filters/alphabeticalattributes.py
new file mode 100644
index 0000000000..5ba926e3b0
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/alphabeticalattributes.py
@@ -0,0 +1,29 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import base
+
+from collections import OrderedDict
+
+
+def _attr_key(attr):
+    """Return an appropriate key for an attribute for sorting
+
+    Attributes have a namespace that can be either ``None`` or a string. We
+    can't compare the two because they're different types, so we convert
+    ``None`` to an empty string first.
+
+    """
+    return (attr[0][0] or ''), attr[0][1]
+
+
+class Filter(base.Filter):
+    """Alphabetizes attributes for elements"""
+    def __iter__(self):
+        for token in base.Filter.__iter__(self):
+            if token["type"] in ("StartTag", "EmptyTag"):
+                attrs = OrderedDict()
+                for name, value in sorted(token["data"].items(),
+                                          key=_attr_key):
+                    attrs[name] = value
+                token["data"] = attrs
+            yield token
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/base.py b/samples-and-tests/i-am-a-developer/html5lib/filters/base.py
new file mode 100644
index 0000000000..c7dbaed0fa
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/base.py
@@ -0,0 +1,12 @@
+from __future__ import absolute_import, division, unicode_literals
+
+
+class Filter(object):
+    def __init__(self, source):
+        self.source = source
+
+    def __iter__(self):
+        return iter(self.source)
+
+    def __getattr__(self, name):
+        return getattr(self.source, name)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/inject_meta_charset.py b/samples-and-tests/i-am-a-developer/html5lib/filters/inject_meta_charset.py
new file mode 100644
index 0000000000..aefb5c842c
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/inject_meta_charset.py
@@ -0,0 +1,73 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import base
+
+
+class Filter(base.Filter):
+    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
+    def __init__(self, source, encoding):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg encoding: the encoding to set
+
+        """
+        base.Filter.__init__(self, source)
+        self.encoding = encoding
+
+    def __iter__(self):
+        state = "pre_head"
+        meta_found = (self.encoding is None)
+        pending = []
+
+        for token in base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag":
+                if token["name"].lower() == "head":
+                    state = "in_head"
+
+            elif type == "EmptyTag":
+                if token["name"].lower() == "meta":
+                    # replace charset with actual encoding
+                    has_http_equiv_content_type = False
+                    for (namespace, name), value in token["data"].items():
+                        if namespace is not None:
+                            continue
+                        elif name.lower() == 'charset':
+                            token["data"][(namespace, name)] = self.encoding
+                            meta_found = True
+                            break
+                        elif name == 'http-equiv' and value.lower() == 'content-type':
+                            has_http_equiv_content_type = True
+                    else:
+                        if has_http_equiv_content_type and (None, "content") in token["data"]:
+                            token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding
+                            meta_found = True
+
+                elif token["name"].lower() == "head" and not meta_found:
+                    # insert meta into empty head
+                    yield {"type": "StartTag", "name": "head",
+                           "data": token["data"]}
+                    yield {"type": "EmptyTag", "name": "meta",
+                           "data": {(None, "charset"): self.encoding}}
+                    yield {"type": "EndTag", "name": "head"}
+                    meta_found = True
+                    continue
+
+            elif type == "EndTag":
+                if token["name"].lower() == "head" and pending:
+                    # insert meta into head (if necessary) and flush pending queue
+                    yield pending.pop(0)
+                    if not meta_found:
+                        yield {"type": "EmptyTag", "name": "meta",
+                               "data": {(None, "charset"): self.encoding}}
+                    while pending:
+                        yield pending.pop(0)
+                    meta_found = True
+                    state = "post_head"
+
+            if state == "in_head":
+                pending.append(token)
+            else:
+                yield token
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/lint.py b/samples-and-tests/i-am-a-developer/html5lib/filters/lint.py
new file mode 100644
index 0000000000..acd4d7a2af
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/lint.py
@@ -0,0 +1,93 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from six import text_type
+
+from . import base
+from ..constants import namespaces, voidElements
+
+from ..constants import spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+
+class Filter(base.Filter):
+    """Lints the token stream for errors
+
+    If it finds any errors, it'll raise an ``AssertionError``.
+
+    """
+    def __init__(self, source, require_matching_tags=True):
+        """Creates a Filter
+
+        :arg source: the source token stream
+
+        :arg require_matching_tags: whether or not to require matching tags
+
+        """
+        super(Filter, self).__init__(source)
+        self.require_matching_tags = require_matching_tags
+
+    def __iter__(self):
+        open_elements = []
+        for token in base.Filter.__iter__(self):
+            type = token["type"]
+            if type in ("StartTag", "EmptyTag"):
+                namespace = token["namespace"]
+                name = token["name"]
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                assert isinstance(token["data"], dict)
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert type == "EmptyTag"
+                else:
+                    assert type == "StartTag"
+                if type == "StartTag" and self.require_matching_tags:
+                    open_elements.append((namespace, name))
+                for (namespace, name), value in token["data"].items():
+                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace != ""
+                    assert isinstance(name, text_type)
+                    assert name != ""
+                    assert isinstance(value, text_type)
+
+            elif type == "EndTag":
+                namespace = token["namespace"]
+                name = token["name"]
+                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace != ""
+                assert isinstance(name, text_type)
+                assert name != ""
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+                elif self.require_matching_tags:
+                    start = open_elements.pop()
+                    assert start == (namespace, name)
+
+            elif type == "Comment":
+                data = token["data"]
+                assert isinstance(data, text_type)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                data = token["data"]
+                assert isinstance(data, text_type)
+                assert data != ""
+                if type == "SpaceCharacters":
+                    assert data.strip(spaceCharacters) == ""
+
+            elif type == "Doctype":
+                name = token["name"]
+                assert name is None or isinstance(name, text_type)
+                assert token["publicId"] is None or isinstance(name, text_type)
+                assert token["systemId"] is None or isinstance(name, text_type)
+
+            elif type == "Entity":
+                assert isinstance(token["name"], text_type)
+
+            elif type == "SerializerError":
+                assert isinstance(token["data"], text_type)
+
+            else:
+                assert False, "Unknown token type: %(type)s" % {"type": type}
+
+            yield token
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/optionaltags.py b/samples-and-tests/i-am-a-developer/html5lib/filters/optionaltags.py
new file mode 100644
index 0000000000..4a865012c1
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/optionaltags.py
@@ -0,0 +1,207 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from . import base
+
+
+class Filter(base.Filter):
+    """Removes optional tags from the token stream"""
+    def slider(self):
+        previous1 = previous2 = None
+        for token in self.source:
+            if previous1 is not None:
+                yield previous2, previous1, token
+            previous2 = previous1
+            previous1 = token
+        if previous1 is not None:
+            yield previous2, previous1, None
+
+    def __iter__(self):
+        for previous, token, next in self.slider():
+            type = token["type"]
+            if type == "StartTag":
+                if (token["data"] or
+                        not self.is_optional_start(token["name"], previous, next)):
+                    yield token
+            elif type == "EndTag":
+                if not self.is_optional_end(token["name"], next):
+                    yield token
+            else:
+                yield token
+
+    def is_optional_start(self, tagname, previous, next):
+        type = next and next["type"] or None
+        if tagname in 'html':
+            # An html element's start tag may be omitted if the first thing
+            # inside the html element is not a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname == 'head':
+            # A head element's start tag may be omitted if the first thing
+            # inside the head element is an element.
+            # XXX: we also omit the start tag if the head element is empty
+            if type in ("StartTag", "EmptyTag"):
+                return True
+            elif type == "EndTag":
+                return next["name"] == "head"
+        elif tagname == 'body':
+            # A body element's start tag may be omitted if the first thing
+            # inside the body element is not a space character or a comment,
+            # except if the first thing inside the body element is a script
+            # or style element and the node immediately preceding the body
+            # element is a head element whose end tag has been omitted.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we do not look at the preceding event, so we never omit
+                # the body element's start tag if it's followed by a script or
+                # a style element.
+                return next["name"] not in ('script', 'style')
+            else:
+                return True
+        elif tagname == 'colgroup':
+            # A colgroup element's start tag may be omitted if the first thing
+            # inside the colgroup element is a col element, and if the element
+            # is not immediately preceded by another colgroup element whose
+            # end tag has been omitted.
+            if type in ("StartTag", "EmptyTag"):
+                # XXX: we do not look at the preceding event, so instead we never
+                # omit the colgroup element's end tag when it is immediately
+                # followed by another colgroup element. See is_optional_end.
+                return next["name"] == "col"
+            else:
+                return False
+        elif tagname == 'tbody':
+            # A tbody element's start tag may be omitted if the first thing
+            # inside the tbody element is a tr element, and if the element is
+            # not immediately preceded by a tbody, thead, or tfoot element
+            # whose end tag has been omitted.
+            if type == "StartTag":
+                # omit the thead and tfoot elements' end tag when they are
+                # immediately followed by a tbody element. See is_optional_end.
+                if previous and previous['type'] == 'EndTag' and \
+                        previous['name'] in ('tbody', 'thead', 'tfoot'):
+                    return False
+                return next["name"] == 'tr'
+            else:
+                return False
+        return False
+
+    def is_optional_end(self, tagname, next):
+        type = next and next["type"] or None
+        if tagname in ('html', 'head', 'body'):
+            # An html element's end tag may be omitted if the html element
+            # is not immediately followed by a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname in ('li', 'optgroup', 'tr'):
+            # A li element's end tag may be omitted if the li element is
+            # immediately followed by another li element or if there is
+            # no more content in the parent element.
+            # An optgroup element's end tag may be omitted if the optgroup
+            # element is immediately followed by another optgroup element,
+            # or if there is no more content in the parent element.
+            # A tr element's end tag may be omitted if the tr element is
+            # immediately followed by another tr element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] == tagname
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('dt', 'dd'):
+            # A dt element's end tag may be omitted if the dt element is
+            # immediately followed by another dt element or a dd element.
+            # A dd element's end tag may be omitted if the dd element is
+            # immediately followed by another dd element or a dt element,
+            # or if there is no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('dt', 'dd')
+            elif tagname == 'dd':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'p':
+            # A p element's end tag may be omitted if the p element is
+            # immediately followed by an address, article, aside,
+            # blockquote, datagrid, dialog, dir, div, dl, fieldset,
+            # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
+            # nav, ol, p, pre, section, table, or ul, element, or if
+            # there is no more content in the parent element.
+            if type in ("StartTag", "EmptyTag"):
+                return next["name"] in ('address', 'article', 'aside',
+                                        'blockquote', 'datagrid', 'dialog',
+                                        'dir', 'div', 'dl', 'fieldset', 'footer',
+                                        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+                                        'header', 'hr', 'menu', 'nav', 'ol',
+                                        'p', 'pre', 'section', 'table', 'ul')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'option':
+            # An option element's end tag may be omitted if the option
+            # element is immediately followed by another option element,
+            # or if it is immediately followed by an <code>optgroup</code>
+            # element, or if there is no more content in the parent
+            # element.
+            if type == "StartTag":
+                return next["name"] in ('option', 'optgroup')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('rt', 'rp'):
+            # An rt element's end tag may be omitted if the rt element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            # An rp element's end tag may be omitted if the rp element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('rt', 'rp')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'colgroup':
+            # A colgroup element's end tag may be omitted if the colgroup
+            # element is not immediately followed by a space character or
+            # a comment.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we also look for an immediately following colgroup
+                # element. See is_optional_start.
+                return next["name"] != 'colgroup'
+            else:
+                return True
+        elif tagname in ('thead', 'tbody'):
+            # A thead element's end tag may be omitted if the thead element
+            # is immediately followed by a tbody or tfoot element.
+            # A tbody element's end tag may be omitted if the tbody element
+            # is immediately followed by a tbody or tfoot element, or if
+            # there is no more content in the parent element.
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] in ['tbody', 'tfoot']
+            elif tagname == 'tbody':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'tfoot':
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] == 'tbody'
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('td', 'th'):
+            # A td element's end tag may be omitted if the td element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            # A th element's end tag may be omitted if the th element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('td', 'th')
+            else:
+                return type == "EndTag" or type is None
+        return False
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/sanitizer.py b/samples-and-tests/i-am-a-developer/html5lib/filters/sanitizer.py
new file mode 100644
index 0000000000..70ef90665e
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/sanitizer.py
@@ -0,0 +1,916 @@
+"""Deprecated from html5lib 1.1.
+
+See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
+information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
+is recommended as a replacement. Please let us know in the aforementioned issue
+if Bleach is unsuitable for your needs.
+
+"""
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+import warnings
+from xml.sax.saxutils import escape, unescape
+
+from six.moves import urllib_parse as urlparse
+
+from . import base
+from ..constants import namespaces, prefixes
+
+__all__ = ["Filter"]
+
+
+_deprecation_msg = (
+    "html5lib's sanitizer is deprecated; see " +
+    "https://github.com/html5lib/html5lib-python/issues/443 and please let " +
+    "us know if Bleach is unsuitable for your needs"
+)
+
+warnings.warn(_deprecation_msg, DeprecationWarning)
+
+allowed_elements = frozenset((
+    (namespaces['html'], 'a'),
+    (namespaces['html'], 'abbr'),
+    (namespaces['html'], 'acronym'),
+    (namespaces['html'], 'address'),
+    (namespaces['html'], 'area'),
+    (namespaces['html'], 'article'),
+    (namespaces['html'], 'aside'),
+    (namespaces['html'], 'audio'),
+    (namespaces['html'], 'b'),
+    (namespaces['html'], 'big'),
+    (namespaces['html'], 'blockquote'),
+    (namespaces['html'], 'br'),
+    (namespaces['html'], 'button'),
+    (namespaces['html'], 'canvas'),
+    (namespaces['html'], 'caption'),
+    (namespaces['html'], 'center'),
+    (namespaces['html'], 'cite'),
+    (namespaces['html'], 'code'),
+    (namespaces['html'], 'col'),
+    (namespaces['html'], 'colgroup'),
+    (namespaces['html'], 'command'),
+    (namespaces['html'], 'datagrid'),
+    (namespaces['html'], 'datalist'),
+    (namespaces['html'], 'dd'),
+    (namespaces['html'], 'del'),
+    (namespaces['html'], 'details'),
+    (namespaces['html'], 'dfn'),
+    (namespaces['html'], 'dialog'),
+    (namespaces['html'], 'dir'),
+    (namespaces['html'], 'div'),
+    (namespaces['html'], 'dl'),
+    (namespaces['html'], 'dt'),
+    (namespaces['html'], 'em'),
+    (namespaces['html'], 'event-source'),
+    (namespaces['html'], 'fieldset'),
+    (namespaces['html'], 'figcaption'),
+    (namespaces['html'], 'figure'),
+    (namespaces['html'], 'footer'),
+    (namespaces['html'], 'font'),
+    (namespaces['html'], 'form'),
+    (namespaces['html'], 'header'),
+    (namespaces['html'], 'h1'),
+    (namespaces['html'], 'h2'),
+    (namespaces['html'], 'h3'),
+    (namespaces['html'], 'h4'),
+    (namespaces['html'], 'h5'),
+    (namespaces['html'], 'h6'),
+    (namespaces['html'], 'hr'),
+    (namespaces['html'], 'i'),
+    (namespaces['html'], 'img'),
+    (namespaces['html'], 'input'),
+    (namespaces['html'], 'ins'),
+    (namespaces['html'], 'keygen'),
+    (namespaces['html'], 'kbd'),
+    (namespaces['html'], 'label'),
+    (namespaces['html'], 'legend'),
+    (namespaces['html'], 'li'),
+    (namespaces['html'], 'm'),
+    (namespaces['html'], 'map'),
+    (namespaces['html'], 'menu'),
+    (namespaces['html'], 'meter'),
+    (namespaces['html'], 'multicol'),
+    (namespaces['html'], 'nav'),
+    (namespaces['html'], 'nextid'),
+    (namespaces['html'], 'ol'),
+    (namespaces['html'], 'output'),
+    (namespaces['html'], 'optgroup'),
+    (namespaces['html'], 'option'),
+    (namespaces['html'], 'p'),
+    (namespaces['html'], 'pre'),
+    (namespaces['html'], 'progress'),
+    (namespaces['html'], 'q'),
+    (namespaces['html'], 's'),
+    (namespaces['html'], 'samp'),
+    (namespaces['html'], 'section'),
+    (namespaces['html'], 'select'),
+    (namespaces['html'], 'small'),
+    (namespaces['html'], 'sound'),
+    (namespaces['html'], 'source'),
+    (namespaces['html'], 'spacer'),
+    (namespaces['html'], 'span'),
+    (namespaces['html'], 'strike'),
+    (namespaces['html'], 'strong'),
+    (namespaces['html'], 'sub'),
+    (namespaces['html'], 'sup'),
+    (namespaces['html'], 'table'),
+    (namespaces['html'], 'tbody'),
+    (namespaces['html'], 'td'),
+    (namespaces['html'], 'textarea'),
+    (namespaces['html'], 'time'),
+    (namespaces['html'], 'tfoot'),
+    (namespaces['html'], 'th'),
+    (namespaces['html'], 'thead'),
+    (namespaces['html'], 'tr'),
+    (namespaces['html'], 'tt'),
+    (namespaces['html'], 'u'),
+    (namespaces['html'], 'ul'),
+    (namespaces['html'], 'var'),
+    (namespaces['html'], 'video'),
+    (namespaces['mathml'], 'maction'),
+    (namespaces['mathml'], 'math'),
+    (namespaces['mathml'], 'merror'),
+    (namespaces['mathml'], 'mfrac'),
+    (namespaces['mathml'], 'mi'),
+    (namespaces['mathml'], 'mmultiscripts'),
+    (namespaces['mathml'], 'mn'),
+    (namespaces['mathml'], 'mo'),
+    (namespaces['mathml'], 'mover'),
+    (namespaces['mathml'], 'mpadded'),
+    (namespaces['mathml'], 'mphantom'),
+    (namespaces['mathml'], 'mprescripts'),
+    (namespaces['mathml'], 'mroot'),
+    (namespaces['mathml'], 'mrow'),
+    (namespaces['mathml'], 'mspace'),
+    (namespaces['mathml'], 'msqrt'),
+    (namespaces['mathml'], 'mstyle'),
+    (namespaces['mathml'], 'msub'),
+    (namespaces['mathml'], 'msubsup'),
+    (namespaces['mathml'], 'msup'),
+    (namespaces['mathml'], 'mtable'),
+    (namespaces['mathml'], 'mtd'),
+    (namespaces['mathml'], 'mtext'),
+    (namespaces['mathml'], 'mtr'),
+    (namespaces['mathml'], 'munder'),
+    (namespaces['mathml'], 'munderover'),
+    (namespaces['mathml'], 'none'),
+    (namespaces['svg'], 'a'),
+    (namespaces['svg'], 'animate'),
+    (namespaces['svg'], 'animateColor'),
+    (namespaces['svg'], 'animateMotion'),
+    (namespaces['svg'], 'animateTransform'),
+    (namespaces['svg'], 'clipPath'),
+    (namespaces['svg'], 'circle'),
+    (namespaces['svg'], 'defs'),
+    (namespaces['svg'], 'desc'),
+    (namespaces['svg'], 'ellipse'),
+    (namespaces['svg'], 'font-face'),
+    (namespaces['svg'], 'font-face-name'),
+    (namespaces['svg'], 'font-face-src'),
+    (namespaces['svg'], 'g'),
+    (namespaces['svg'], 'glyph'),
+    (namespaces['svg'], 'hkern'),
+    (namespaces['svg'], 'linearGradient'),
+    (namespaces['svg'], 'line'),
+    (namespaces['svg'], 'marker'),
+    (namespaces['svg'], 'metadata'),
+    (namespaces['svg'], 'missing-glyph'),
+    (namespaces['svg'], 'mpath'),
+    (namespaces['svg'], 'path'),
+    (namespaces['svg'], 'polygon'),
+    (namespaces['svg'], 'polyline'),
+    (namespaces['svg'], 'radialGradient'),
+    (namespaces['svg'], 'rect'),
+    (namespaces['svg'], 'set'),
+    (namespaces['svg'], 'stop'),
+    (namespaces['svg'], 'svg'),
+    (namespaces['svg'], 'switch'),
+    (namespaces['svg'], 'text'),
+    (namespaces['svg'], 'title'),
+    (namespaces['svg'], 'tspan'),
+    (namespaces['svg'], 'use'),
+))
+
+allowed_attributes = frozenset((
+    # HTML attributes
+    (None, 'abbr'),
+    (None, 'accept'),
+    (None, 'accept-charset'),
+    (None, 'accesskey'),
+    (None, 'action'),
+    (None, 'align'),
+    (None, 'alt'),
+    (None, 'autocomplete'),
+    (None, 'autofocus'),
+    (None, 'axis'),
+    (None, 'background'),
+    (None, 'balance'),
+    (None, 'bgcolor'),
+    (None, 'bgproperties'),
+    (None, 'border'),
+    (None, 'bordercolor'),
+    (None, 'bordercolordark'),
+    (None, 'bordercolorlight'),
+    (None, 'bottompadding'),
+    (None, 'cellpadding'),
+    (None, 'cellspacing'),
+    (None, 'ch'),
+    (None, 'challenge'),
+    (None, 'char'),
+    (None, 'charoff'),
+    (None, 'choff'),
+    (None, 'charset'),
+    (None, 'checked'),
+    (None, 'cite'),
+    (None, 'class'),
+    (None, 'clear'),
+    (None, 'color'),
+    (None, 'cols'),
+    (None, 'colspan'),
+    (None, 'compact'),
+    (None, 'contenteditable'),
+    (None, 'controls'),
+    (None, 'coords'),
+    (None, 'data'),
+    (None, 'datafld'),
+    (None, 'datapagesize'),
+    (None, 'datasrc'),
+    (None, 'datetime'),
+    (None, 'default'),
+    (None, 'delay'),
+    (None, 'dir'),
+    (None, 'disabled'),
+    (None, 'draggable'),
+    (None, 'dynsrc'),
+    (None, 'enctype'),
+    (None, 'end'),
+    (None, 'face'),
+    (None, 'for'),
+    (None, 'form'),
+    (None, 'frame'),
+    (None, 'galleryimg'),
+    (None, 'gutter'),
+    (None, 'headers'),
+    (None, 'height'),
+    (None, 'hidefocus'),
+    (None, 'hidden'),
+    (None, 'high'),
+    (None, 'href'),
+    (None, 'hreflang'),
+    (None, 'hspace'),
+    (None, 'icon'),
+    (None, 'id'),
+    (None, 'inputmode'),
+    (None, 'ismap'),
+    (None, 'keytype'),
+    (None, 'label'),
+    (None, 'leftspacing'),
+    (None, 'lang'),
+    (None, 'list'),
+    (None, 'longdesc'),
+    (None, 'loop'),
+    (None, 'loopcount'),
+    (None, 'loopend'),
+    (None, 'loopstart'),
+    (None, 'low'),
+    (None, 'lowsrc'),
+    (None, 'max'),
+    (None, 'maxlength'),
+    (None, 'media'),
+    (None, 'method'),
+    (None, 'min'),
+    (None, 'multiple'),
+    (None, 'name'),
+    (None, 'nohref'),
+    (None, 'noshade'),
+    (None, 'nowrap'),
+    (None, 'open'),
+    (None, 'optimum'),
+    (None, 'pattern'),
+    (None, 'ping'),
+    (None, 'point-size'),
+    (None, 'poster'),
+    (None, 'pqg'),
+    (None, 'preload'),
+    (None, 'prompt'),
+    (None, 'radiogroup'),
+    (None, 'readonly'),
+    (None, 'rel'),
+    (None, 'repeat-max'),
+    (None, 'repeat-min'),
+    (None, 'replace'),
+    (None, 'required'),
+    (None, 'rev'),
+    (None, 'rightspacing'),
+    (None, 'rows'),
+    (None, 'rowspan'),
+    (None, 'rules'),
+    (None, 'scope'),
+    (None, 'selected'),
+    (None, 'shape'),
+    (None, 'size'),
+    (None, 'span'),
+    (None, 'src'),
+    (None, 'start'),
+    (None, 'step'),
+    (None, 'style'),
+    (None, 'summary'),
+    (None, 'suppress'),
+    (None, 'tabindex'),
+    (None, 'target'),
+    (None, 'template'),
+    (None, 'title'),
+    (None, 'toppadding'),
+    (None, 'type'),
+    (None, 'unselectable'),
+    (None, 'usemap'),
+    (None, 'urn'),
+    (None, 'valign'),
+    (None, 'value'),
+    (None, 'variable'),
+    (None, 'volume'),
+    (None, 'vspace'),
+    (None, 'vrml'),
+    (None, 'width'),
+    (None, 'wrap'),
+    (namespaces['xml'], 'lang'),
+    # MathML attributes
+    (None, 'actiontype'),
+    (None, 'align'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnalign'),
+    (None, 'columnlines'),
+    (None, 'columnspacing'),
+    (None, 'columnspan'),
+    (None, 'depth'),
+    (None, 'display'),
+    (None, 'displaystyle'),
+    (None, 'equalcolumns'),
+    (None, 'equalrows'),
+    (None, 'fence'),
+    (None, 'fontstyle'),
+    (None, 'fontweight'),
+    (None, 'frame'),
+    (None, 'height'),
+    (None, 'linethickness'),
+    (None, 'lspace'),
+    (None, 'mathbackground'),
+    (None, 'mathcolor'),
+    (None, 'mathvariant'),
+    (None, 'mathvariant'),
+    (None, 'maxsize'),
+    (None, 'minsize'),
+    (None, 'other'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowalign'),
+    (None, 'rowlines'),
+    (None, 'rowspacing'),
+    (None, 'rowspan'),
+    (None, 'rspace'),
+    (None, 'scriptlevel'),
+    (None, 'selection'),
+    (None, 'separator'),
+    (None, 'stretchy'),
+    (None, 'width'),
+    (None, 'width'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'type'),
+    # SVG attributes
+    (None, 'accent-height'),
+    (None, 'accumulate'),
+    (None, 'additive'),
+    (None, 'alphabetic'),
+    (None, 'arabic-form'),
+    (None, 'ascent'),
+    (None, 'attributeName'),
+    (None, 'attributeType'),
+    (None, 'baseProfile'),
+    (None, 'bbox'),
+    (None, 'begin'),
+    (None, 'by'),
+    (None, 'calcMode'),
+    (None, 'cap-height'),
+    (None, 'class'),
+    (None, 'clip-path'),
+    (None, 'color'),
+    (None, 'color-rendering'),
+    (None, 'content'),
+    (None, 'cx'),
+    (None, 'cy'),
+    (None, 'd'),
+    (None, 'dx'),
+    (None, 'dy'),
+    (None, 'descent'),
+    (None, 'display'),
+    (None, 'dur'),
+    (None, 'end'),
+    (None, 'fill'),
+    (None, 'fill-opacity'),
+    (None, 'fill-rule'),
+    (None, 'font-family'),
+    (None, 'font-size'),
+    (None, 'font-stretch'),
+    (None, 'font-style'),
+    (None, 'font-variant'),
+    (None, 'font-weight'),
+    (None, 'from'),
+    (None, 'fx'),
+    (None, 'fy'),
+    (None, 'g1'),
+    (None, 'g2'),
+    (None, 'glyph-name'),
+    (None, 'gradientUnits'),
+    (None, 'hanging'),
+    (None, 'height'),
+    (None, 'horiz-adv-x'),
+    (None, 'horiz-origin-x'),
+    (None, 'id'),
+    (None, 'ideographic'),
+    (None, 'k'),
+    (None, 'keyPoints'),
+    (None, 'keySplines'),
+    (None, 'keyTimes'),
+    (None, 'lang'),
+    (None, 'marker-end'),
+    (None, 'marker-mid'),
+    (None, 'marker-start'),
+    (None, 'markerHeight'),
+    (None, 'markerUnits'),
+    (None, 'markerWidth'),
+    (None, 'mathematical'),
+    (None, 'max'),
+    (None, 'min'),
+    (None, 'name'),
+    (None, 'offset'),
+    (None, 'opacity'),
+    (None, 'orient'),
+    (None, 'origin'),
+    (None, 'overline-position'),
+    (None, 'overline-thickness'),
+    (None, 'panose-1'),
+    (None, 'path'),
+    (None, 'pathLength'),
+    (None, 'points'),
+    (None, 'preserveAspectRatio'),
+    (None, 'r'),
+    (None, 'refX'),
+    (None, 'refY'),
+    (None, 'repeatCount'),
+    (None, 'repeatDur'),
+    (None, 'requiredExtensions'),
+    (None, 'requiredFeatures'),
+    (None, 'restart'),
+    (None, 'rotate'),
+    (None, 'rx'),
+    (None, 'ry'),
+    (None, 'slope'),
+    (None, 'stemh'),
+    (None, 'stemv'),
+    (None, 'stop-color'),
+    (None, 'stop-opacity'),
+    (None, 'strikethrough-position'),
+    (None, 'strikethrough-thickness'),
+    (None, 'stroke'),
+    (None, 'stroke-dasharray'),
+    (None, 'stroke-dashoffset'),
+    (None, 'stroke-linecap'),
+    (None, 'stroke-linejoin'),
+    (None, 'stroke-miterlimit'),
+    (None, 'stroke-opacity'),
+    (None, 'stroke-width'),
+    (None, 'systemLanguage'),
+    (None, 'target'),
+    (None, 'text-anchor'),
+    (None, 'to'),
+    (None, 'transform'),
+    (None, 'type'),
+    (None, 'u1'),
+    (None, 'u2'),
+    (None, 'underline-position'),
+    (None, 'underline-thickness'),
+    (None, 'unicode'),
+    (None, 'unicode-range'),
+    (None, 'units-per-em'),
+    (None, 'values'),
+    (None, 'version'),
+    (None, 'viewBox'),
+    (None, 'visibility'),
+    (None, 'width'),
+    (None, 'widths'),
+    (None, 'x'),
+    (None, 'x-height'),
+    (None, 'x1'),
+    (None, 'x2'),
+    (namespaces['xlink'], 'actuate'),
+    (namespaces['xlink'], 'arcrole'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xlink'], 'role'),
+    (namespaces['xlink'], 'show'),
+    (namespaces['xlink'], 'title'),
+    (namespaces['xlink'], 'type'),
+    (namespaces['xml'], 'base'),
+    (namespaces['xml'], 'lang'),
+    (namespaces['xml'], 'space'),
+    (None, 'y'),
+    (None, 'y1'),
+    (None, 'y2'),
+    (None, 'zoomAndPan'),
+))
+
+attr_val_is_uri = frozenset((
+    (None, 'href'),
+    (None, 'src'),
+    (None, 'cite'),
+    (None, 'action'),
+    (None, 'longdesc'),
+    (None, 'poster'),
+    (None, 'background'),
+    (None, 'datasrc'),
+    (None, 'dynsrc'),
+    (None, 'lowsrc'),
+    (None, 'ping'),
+    (namespaces['xlink'], 'href'),
+    (namespaces['xml'], 'base'),
+))
+
+svg_attr_val_allows_ref = frozenset((
+    (None, 'clip-path'),
+    (None, 'color-profile'),
+    (None, 'cursor'),
+    (None, 'fill'),
+    (None, 'filter'),
+    (None, 'marker'),
+    (None, 'marker-start'),
+    (None, 'marker-mid'),
+    (None, 'marker-end'),
+    (None, 'mask'),
+    (None, 'stroke'),
+))
+
+svg_allow_local_href = frozenset((
+    (None, 'altGlyph'),
+    (None, 'animate'),
+    (None, 'animateColor'),
+    (None, 'animateMotion'),
+    (None, 'animateTransform'),
+    (None, 'cursor'),
+    (None, 'feImage'),
+    (None, 'filter'),
+    (None, 'linearGradient'),
+    (None, 'pattern'),
+    (None, 'radialGradient'),
+    (None, 'textpath'),
+    (None, 'tref'),
+    (None, 'set'),
+    (None, 'use')
+))
+
+allowed_css_properties = frozenset((
+    'azimuth',
+    'background-color',
+    'border-bottom-color',
+    'border-collapse',
+    'border-color',
+    'border-left-color',
+    'border-right-color',
+    'border-top-color',
+    'clear',
+    'color',
+    'cursor',
+    'direction',
+    'display',
+    'elevation',
+    'float',
+    'font',
+    'font-family',
+    'font-size',
+    'font-style',
+    'font-variant',
+    'font-weight',
+    'height',
+    'letter-spacing',
+    'line-height',
+    'overflow',
+    'pause',
+    'pause-after',
+    'pause-before',
+    'pitch',
+    'pitch-range',
+    'richness',
+    'speak',
+    'speak-header',
+    'speak-numeral',
+    'speak-punctuation',
+    'speech-rate',
+    'stress',
+    'text-align',
+    'text-decoration',
+    'text-indent',
+    'unicode-bidi',
+    'vertical-align',
+    'voice-family',
+    'volume',
+    'white-space',
+    'width',
+))
+
+allowed_css_keywords = frozenset((
+    'auto',
+    'aqua',
+    'black',
+    'block',
+    'blue',
+    'bold',
+    'both',
+    'bottom',
+    'brown',
+    'center',
+    'collapse',
+    'dashed',
+    'dotted',
+    'fuchsia',
+    'gray',
+    'green',
+    '!important',
+    'italic',
+    'left',
+    'lime',
+    'maroon',
+    'medium',
+    'none',
+    'navy',
+    'normal',
+    'nowrap',
+    'olive',
+    'pointer',
+    'purple',
+    'red',
+    'right',
+    'solid',
+    'silver',
+    'teal',
+    'top',
+    'transparent',
+    'underline',
+    'white',
+    'yellow',
+))
+
+allowed_svg_properties = frozenset((
+    'fill',
+    'fill-opacity',
+    'fill-rule',
+    'stroke',
+    'stroke-width',
+    'stroke-linecap',
+    'stroke-linejoin',
+    'stroke-opacity',
+))
+
+allowed_protocols = frozenset((
+    'ed2k',
+    'ftp',
+    'http',
+    'https',
+    'irc',
+    'mailto',
+    'news',
+    'gopher',
+    'nntp',
+    'telnet',
+    'webcal',
+    'xmpp',
+    'callto',
+    'feed',
+    'urn',
+    'aim',
+    'rsync',
+    'tag',
+    'ssh',
+    'sftp',
+    'rtsp',
+    'afs',
+    'data',
+))
+
+allowed_content_types = frozenset((
+    'image/png',
+    'image/jpeg',
+    'image/gif',
+    'image/webp',
+    'image/bmp',
+    'text/plain',
+))
+
+
+data_content_type = re.compile(r'''
+                                ^
+                                # Match a content type <application>/<type>
+                                (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
+                                # Match any character set and encoding
+                                (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
+                                  |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
+                                # Assume the rest is data
+                                ,.*
+                                $
+                                ''',
+                               re.VERBOSE)
+
+
+class Filter(base.Filter):
+    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
+    def __init__(self,
+                 source,
+                 allowed_elements=allowed_elements,
+                 allowed_attributes=allowed_attributes,
+                 allowed_css_properties=allowed_css_properties,
+                 allowed_css_keywords=allowed_css_keywords,
+                 allowed_svg_properties=allowed_svg_properties,
+                 allowed_protocols=allowed_protocols,
+                 allowed_content_types=allowed_content_types,
+                 attr_val_is_uri=attr_val_is_uri,
+                 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
+                 svg_allow_local_href=svg_allow_local_href):
+        """Creates a Filter
+
+        :arg allowed_elements: set of elements to allow--everything else will
+            be escaped
+
+        :arg allowed_attributes: set of attributes to allow in
+            elements--everything else will be stripped
+
+        :arg allowed_css_properties: set of CSS properties to allow--everything
+            else will be stripped
+
+        :arg allowed_css_keywords: set of CSS keywords to allow--everything
+            else will be stripped
+
+        :arg allowed_svg_properties: set of SVG properties to allow--everything
+            else will be removed
+
+        :arg allowed_protocols: set of allowed protocols for URIs
+
+        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
+
+        :arg attr_val_is_uri: set of attributes that have URI values--values
+            that have a scheme not listed in ``allowed_protocols`` are removed
+
+        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
+            references
+
+        :arg svg_allow_local_href: set of SVG elements that can have local
+            hrefs--these are removed
+
+        """
+        super(Filter, self).__init__(source)
+
+        warnings.warn(_deprecation_msg, DeprecationWarning)
+
+        self.allowed_elements = allowed_elements
+        self.allowed_attributes = allowed_attributes
+        self.allowed_css_properties = allowed_css_properties
+        self.allowed_css_keywords = allowed_css_keywords
+        self.allowed_svg_properties = allowed_svg_properties
+        self.allowed_protocols = allowed_protocols
+        self.allowed_content_types = allowed_content_types
+        self.attr_val_is_uri = attr_val_is_uri
+        self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
+        self.svg_allow_local_href = svg_allow_local_href
+
+    def __iter__(self):
+        for token in base.Filter.__iter__(self):
+            token = self.sanitize_token(token)
+            if token:
+                yield token
+
+    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
+    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
+    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
+    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
+    # allowed.
+    #
+    #   sanitize_html('<script> do_nasty_stuff() </script>')
+    #    => &lt;script> do_nasty_stuff() &lt;/script>
+    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+    #    => <a>Click here for $100</a>
+    def sanitize_token(self, token):
+
+        # accommodate filters which use token_type differently
+        token_type = token["type"]
+        if token_type in ("StartTag", "EndTag", "EmptyTag"):
+            name = token["name"]
+            namespace = token["namespace"]
+            if ((namespace, name) in self.allowed_elements or
+                (namespace is None and
+                 (namespaces["html"], name) in self.allowed_elements)):
+                return self.allowed_token(token)
+            else:
+                return self.disallowed_token(token)
+        elif token_type == "Comment":
+            pass
+        else:
+            return token
+
+    def allowed_token(self, token):
+        if "data" in token:
+            attrs = token["data"]
+            attr_names = set(attrs.keys())
+
+            # Remove forbidden attributes
+            for to_remove in (attr_names - self.allowed_attributes):
+                del token["data"][to_remove]
+                attr_names.remove(to_remove)
+
+            # Remove attributes with disallowed URL values
+            for attr in (attr_names & self.attr_val_is_uri):
+                assert attr in attrs
+                # I don't have a clue where this regexp comes from or why it matches those
+                # characters, nor why we call unescape. I just know it's always been here.
+                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
+                # this will do is remove *more* than it otherwise would.
+                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
+                                       unescape(attrs[attr])).lower()
+                # remove replacement characters from unescaped characters
+                val_unescaped = val_unescaped.replace("\ufffd", "")
+                try:
+                    uri = urlparse.urlparse(val_unescaped)
+                except ValueError:
+                    uri = None
+                    del attrs[attr]
+                if uri and uri.scheme:
+                    if uri.scheme not in self.allowed_protocols:
+                        del attrs[attr]
+                    if uri.scheme == 'data':
+                        m = data_content_type.match(uri.path)
+                        if not m:
+                            del attrs[attr]
+                        elif m.group('content_type') not in self.allowed_content_types:
+                            del attrs[attr]
+
+            for attr in self.svg_attr_val_allows_ref:
+                if attr in attrs:
+                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                         ' ',
+                                         unescape(attrs[attr]))
+            if (token["name"] in self.svg_allow_local_href and
+                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
+                                                                     attrs[(namespaces['xlink'], 'href')])):
+                del attrs[(namespaces['xlink'], 'href')]
+            if (None, 'style') in attrs:
+                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
+            token["data"] = attrs
+        return token
+
+    def disallowed_token(self, token):
+        token_type = token["type"]
+        if token_type == "EndTag":
+            token["data"] = "</%s>" % token["name"]
+        elif token["data"]:
+            assert token_type in ("StartTag", "EmptyTag")
+            attrs = []
+            for (ns, name), v in token["data"].items():
+                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
+            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+        else:
+            token["data"] = "<%s>" % token["name"]
+        if token.get("selfClosing"):
+            token["data"] = token["data"][:-1] + "/>"
+
+        token["type"] = "Characters"
+
+        del token["name"]
+        return token
+
+    def sanitize_css(self, style):
+        # disallow urls
+        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
+
+        # gauntlet
+        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
+            return ''
+        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
+            return ''
+
+        clean = []
+        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
+            if not value:
+                continue
+            if prop.lower() in self.allowed_css_properties:
+                clean.append(prop + ': ' + value + ';')
+            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
+                                                'padding']:
+                for keyword in value.split():
+                    if keyword not in self.allowed_css_keywords and \
+                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
+                        break
+                else:
+                    clean.append(prop + ': ' + value + ';')
+            elif prop.lower() in self.allowed_svg_properties:
+                clean.append(prop + ': ' + value + ';')
+
+        return ' '.join(clean)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py b/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py
new file mode 100644
index 0000000000..0d12584b45
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py
@@ -0,0 +1,38 @@
+from __future__ import absolute_import, division, unicode_literals
+
+import re
+
+from . import base
+from ..constants import rcdataElements, spaceCharacters
+spaceCharacters = "".join(spaceCharacters)
+
+SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
+
+
+class Filter(base.Filter):
+    """Collapses whitespace except in pre, textarea, and script elements"""
+    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
+
+    def __iter__(self):
+        preserve = 0
+        for token in base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag" \
+                    and (preserve or token["name"] in self.spacePreserveElements):
+                preserve += 1
+
+            elif type == "EndTag" and preserve:
+                preserve -= 1
+
+            elif not preserve and type == "SpaceCharacters" and token["data"]:
+                # Test on token["data"] above to not introduce spaces where there were not
+                token["data"] = " "
+
+            elif not preserve and type == "Characters":
+                token["data"] = collapse_spaces(token["data"])
+
+            yield token
+
+
+def collapse_spaces(text):
+    return SPACES_REGEX.sub(' ', text)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/html5parser.py b/samples-and-tests/i-am-a-developer/html5lib/html5parser.py
new file mode 100644
index 0000000000..74d829d984
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/html5parser.py
@@ -0,0 +1,2795 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import with_metaclass, viewkeys
+
+import types
+
+from . import _inputstream
+from . import _tokenizer
+
+from . import treebuilders
+from .treebuilders.base import Marker
+
+from . import _utils
+from .constants import (
+    spaceCharacters, asciiUpper2Lower,
+    specialElements, headingElements, cdataElements, rcdataElements,
+    tokenTypes, tagTokenTypes,
+    namespaces,
+    htmlIntegrationPointElements, mathmlTextIntegrationPointElements,
+    adjustForeignAttributes as adjustForeignAttributesMap,
+    adjustMathMLAttributes, adjustSVGAttributes,
+    E,
+    _ReparseException
+)
+
+
+def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+    """Parse an HTML document as a string or file-like object into a tree
+
+    :arg doc: the document to parse as a string or file-like object
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> parse('<html><body><p>This is a doc</p></body></html>')
+    <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
+
+    """
+    tb = treebuilders.getTreeBuilder(treebuilder)
+    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+    return p.parse(doc, **kwargs)
+
+
+def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+    """Parse an HTML fragment as a string or file-like object into a tree
+
+    :arg doc: the fragment to parse as a string or file-like object
+
+    :arg container: the container context to parse the fragment in
+
+    :arg treebuilder: the treebuilder to use when parsing
+
+    :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+    :returns: parsed tree
+
+    Example:
+
+    >>> from html5lib.html5libparser import parseFragment
+    >>> parseFragment('<b>this is a fragment</b>')
+    <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
+
+    """
+    tb = treebuilders.getTreeBuilder(treebuilder)
+    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+    return p.parseFragment(doc, container=container, **kwargs)
+
+
+def method_decorator_metaclass(function):
+    class Decorated(type):
+        def __new__(meta, classname, bases, classDict):
+            for attributeName, attribute in classDict.items():
+                if isinstance(attribute, types.FunctionType):
+                    attribute = function(attribute)
+
+                classDict[attributeName] = attribute
+            return type.__new__(meta, classname, bases, classDict)
+    return Decorated
+
+
+class HTMLParser(object):
+    """HTML parser
+
+    Generates a tree structure from a stream of (possibly malformed) HTML.
+
+    """
+
+    def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
+        """
+        :arg tree: a treebuilder class controlling the type of tree that will be
+            returned. Built in treebuilders can be accessed through
+            html5lib.treebuilders.getTreeBuilder(treeType)
+
+        :arg strict: raise an exception when a parse error is encountered
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        :arg debug: whether or not to enable debug mode which logs things
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()                     # generates parser with etree builder
+        >>> parser = HTMLParser('lxml', strict=True)  # generates parser with lxml builder which is strict
+
+        """
+
+        # Raise an exception on the first error encountered
+        self.strict = strict
+
+        if tree is None:
+            tree = treebuilders.getTreeBuilder("etree")
+        self.tree = tree(namespaceHTMLElements)
+        self.errors = []
+
+        self.phases = {name: cls(self, self.tree) for name, cls in
+                       getPhases(debug).items()}
+
+    def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
+
+        self.innerHTMLMode = innerHTML
+        self.container = container
+        self.scripting = scripting
+        self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs)
+        self.reset()
+
+        try:
+            self.mainLoop()
+        except _ReparseException:
+            self.reset()
+            self.mainLoop()
+
+    def reset(self):
+        self.tree.reset()
+        self.firstStartTag = False
+        self.errors = []
+        self.log = []  # only used with debug mode
+        # "quirks" / "limited quirks" / "no quirks"
+        self.compatMode = "no quirks"
+
+        if self.innerHTMLMode:
+            self.innerHTML = self.container.lower()
+
+            if self.innerHTML in cdataElements:
+                self.tokenizer.state = self.tokenizer.rcdataState
+            elif self.innerHTML in rcdataElements:
+                self.tokenizer.state = self.tokenizer.rawtextState
+            elif self.innerHTML == 'plaintext':
+                self.tokenizer.state = self.tokenizer.plaintextState
+            else:
+                # state already is data state
+                # self.tokenizer.state = self.tokenizer.dataState
+                pass
+            self.phase = self.phases["beforeHtml"]
+            self.phase.insertHtmlElement()
+            self.resetInsertionMode()
+        else:
+            self.innerHTML = False  # pylint:disable=redefined-variable-type
+            self.phase = self.phases["initial"]
+
+        self.lastPhase = None
+
+        self.beforeRCDataPhase = None
+
+        self.framesetOK = True
+
+    @property
+    def documentEncoding(self):
+        """Name of the character encoding that was used to decode the input stream, or
+        :obj:`None` if that is not determined yet
+
+        """
+        if not hasattr(self, 'tokenizer'):
+            return None
+        return self.tokenizer.stream.charEncoding[0].name
+
+    def isHTMLIntegrationPoint(self, element):
+        if (element.name == "annotation-xml" and
+                element.namespace == namespaces["mathml"]):
+            return ("encoding" in element.attributes and
+                    element.attributes["encoding"].translate(
+                        asciiUpper2Lower) in
+                    ("text/html", "application/xhtml+xml"))
+        else:
+            return (element.namespace, element.name) in htmlIntegrationPointElements
+
+    def isMathMLTextIntegrationPoint(self, element):
+        return (element.namespace, element.name) in mathmlTextIntegrationPointElements
+
+    def mainLoop(self):
+        CharactersToken = tokenTypes["Characters"]
+        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
+        StartTagToken = tokenTypes["StartTag"]
+        EndTagToken = tokenTypes["EndTag"]
+        CommentToken = tokenTypes["Comment"]
+        DoctypeToken = tokenTypes["Doctype"]
+        ParseErrorToken = tokenTypes["ParseError"]
+
+        for token in self.tokenizer:
+            prev_token = None
+            new_token = token
+            while new_token is not None:
+                prev_token = new_token
+                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
+                currentNodeNamespace = currentNode.namespace if currentNode else None
+                currentNodeName = currentNode.name if currentNode else None
+
+                type = new_token["type"]
+
+                if type == ParseErrorToken:
+                    self.parseError(new_token["data"], new_token.get("datavars", {}))
+                    new_token = None
+                else:
+                    if (len(self.tree.openElements) == 0 or
+                        currentNodeNamespace == self.tree.defaultNamespace or
+                        (self.isMathMLTextIntegrationPoint(currentNode) and
+                         ((type == StartTagToken and
+                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
+                          type in (CharactersToken, SpaceCharactersToken))) or
+                        (currentNodeNamespace == namespaces["mathml"] and
+                         currentNodeName == "annotation-xml" and
+                         type == StartTagToken and
+                         token["name"] == "svg") or
+                        (self.isHTMLIntegrationPoint(currentNode) and
+                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
+                        phase = self.phase
+                    else:
+                        phase = self.phases["inForeignContent"]
+
+                    if type == CharactersToken:
+                        new_token = phase.processCharacters(new_token)
+                    elif type == SpaceCharactersToken:
+                        new_token = phase.processSpaceCharacters(new_token)
+                    elif type == StartTagToken:
+                        new_token = phase.processStartTag(new_token)
+                    elif type == EndTagToken:
+                        new_token = phase.processEndTag(new_token)
+                    elif type == CommentToken:
+                        new_token = phase.processComment(new_token)
+                    elif type == DoctypeToken:
+                        new_token = phase.processDoctype(new_token)
+
+            if (type == StartTagToken and prev_token["selfClosing"] and
+                    not prev_token["selfClosingAcknowledged"]):
+                self.parseError("non-void-element-with-trailing-solidus",
+                                {"name": prev_token["name"]})
+
+        # When the loop finishes it's EOF
+        reprocess = True
+        phases = []
+        while reprocess:
+            phases.append(self.phase)
+            reprocess = self.phase.processEOF()
+            if reprocess:
+                assert self.phase not in phases
+
+    def parse(self, stream, *args, **kwargs):
+        """Parse a HTML document into a well-formed tree
+
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element).
+
+        :arg scripting: treat noscript elements as if JavaScript was turned on
+
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5parser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parse('<html><body><p>This is a doc</p></body></html>')
+        <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0>
+
+        """
+        self._parse(stream, False, None, *args, **kwargs)
+        return self.tree.getDocument()
+
+    def parseFragment(self, stream, *args, **kwargs):
+        """Parse a HTML fragment into a well-formed tree fragment
+
+        :arg container: name of the element we're setting the innerHTML
+            property if set to None, default to 'div'
+
+        :arg stream: a file-like object or string containing the HTML to be parsed
+
+            The optional encoding parameter must be a string that indicates
+            the encoding.  If specified, that encoding will be used,
+            regardless of any BOM or later declaration (such as in a meta
+            element)
+
+        :arg scripting: treat noscript elements as if JavaScript was turned on
+
+        :returns: parsed tree
+
+        Example:
+
+        >>> from html5lib.html5libparser import HTMLParser
+        >>> parser = HTMLParser()
+        >>> parser.parseFragment('<b>this is a fragment</b>')
+        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>
+
+        """
+        self._parse(stream, True, *args, **kwargs)
+        return self.tree.getFragment()
+
+    def parseError(self, errorcode="XXX-undefined-error", datavars=None):
+        # XXX The idea is to make errorcode mandatory.
+        if datavars is None:
+            datavars = {}
+        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
+        if self.strict:
+            raise ParseError(E[errorcode] % datavars)
+
+    def adjustMathMLAttributes(self, token):
+        adjust_attributes(token, adjustMathMLAttributes)
+
+    def adjustSVGAttributes(self, token):
+        adjust_attributes(token, adjustSVGAttributes)
+
+    def adjustForeignAttributes(self, token):
+        adjust_attributes(token, adjustForeignAttributesMap)
+
+    def reparseTokenNormal(self, token):
+        # pylint:disable=unused-argument
+        self.parser.phase()
+
+    def resetInsertionMode(self):
+        # The name of this method is mostly historical. (It's also used in the
+        # specification.)
+        last = False
+        newModes = {
+            "select": "inSelect",
+            "td": "inCell",
+            "th": "inCell",
+            "tr": "inRow",
+            "tbody": "inTableBody",
+            "thead": "inTableBody",
+            "tfoot": "inTableBody",
+            "caption": "inCaption",
+            "colgroup": "inColumnGroup",
+            "table": "inTable",
+            "head": "inBody",
+            "body": "inBody",
+            "frameset": "inFrameset",
+            "html": "beforeHead"
+        }
+        for node in self.tree.openElements[::-1]:
+            nodeName = node.name
+            new_phase = None
+            if node == self.tree.openElements[0]:
+                assert self.innerHTML
+                last = True
+                nodeName = self.innerHTML
+            # Check for conditions that should only happen in the innerHTML
+            # case
+            if nodeName in ("select", "colgroup", "head", "html"):
+                assert self.innerHTML
+
+            if not last and node.namespace != self.tree.defaultNamespace:
+                continue
+
+            if nodeName in newModes:
+                new_phase = self.phases[newModes[nodeName]]
+                break
+            elif last:
+                new_phase = self.phases["inBody"]
+                break
+
+        self.phase = new_phase
+
+    def parseRCDataRawtext(self, token, contentType):
+        # Generic RCDATA/RAWTEXT Parsing algorithm
+        assert contentType in ("RAWTEXT", "RCDATA")
+
+        self.tree.insertElement(token)
+
+        if contentType == "RAWTEXT":
+            self.tokenizer.state = self.tokenizer.rawtextState
+        else:
+            self.tokenizer.state = self.tokenizer.rcdataState
+
+        self.originalPhase = self.phase
+
+        self.phase = self.phases["text"]
+
+
+@_utils.memoize
+def getPhases(debug):
+    def log(function):
+        """Logger that records which phase processes each token"""
+        type_names = {value: key for key, value in tokenTypes.items()}
+
+        def wrapped(self, *args, **kwargs):
+            if function.__name__.startswith("process") and len(args) > 0:
+                token = args[0]
+                info = {"type": type_names[token['type']]}
+                if token['type'] in tagTokenTypes:
+                    info["name"] = token['name']
+
+                self.parser.log.append((self.parser.tokenizer.state.__name__,
+                                        self.parser.phase.__class__.__name__,
+                                        self.__class__.__name__,
+                                        function.__name__,
+                                        info))
+                return function(self, *args, **kwargs)
+            else:
+                return function(self, *args, **kwargs)
+        return wrapped
+
+    def getMetaclass(use_metaclass, metaclass_func):
+        if use_metaclass:
+            return method_decorator_metaclass(metaclass_func)
+        else:
+            return type
+
+    # pylint:disable=unused-argument
+    class Phase(with_metaclass(getMetaclass(debug, log))):
+        """Base class for helper object that implements each phase of processing
+        """
+        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
+
+        def __init__(self, parser, tree):
+            self.parser = parser
+            self.tree = tree
+            self.__startTagCache = {}
+            self.__endTagCache = {}
+
+        def processEOF(self):
+            raise NotImplementedError
+
+        def processComment(self, token):
+            # For most phases the following is correct. Where it's not it will be
+            # overridden.
+            self.tree.insertComment(token, self.tree.openElements[-1])
+
+        def processDoctype(self, token):
+            self.parser.parseError("unexpected-doctype")
+
+        def processCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processSpaceCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processStartTag(self, token):
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__startTagCache:
+                func = self.__startTagCache[name]
+            else:
+                func = self.__startTagCache[name] = self.startTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
+            return func(token)
+
+        def startTagHtml(self, token):
+            if not self.parser.firstStartTag and token["name"] == "html":
+                self.parser.parseError("non-html-root")
+            # XXX Need a check here to see if the first start tag token emitted is
+            # this token... If it's not, invoke self.parser.parseError().
+            for attr, value in token["data"].items():
+                if attr not in self.tree.openElements[0].attributes:
+                    self.tree.openElements[0].attributes[attr] = value
+            self.parser.firstStartTag = False
+
+        def processEndTag(self, token):
+            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
+            # requires a circular reference to the Phase, and this ends up with a significant
+            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+            name = token["name"]
+            # In Py2, using `in` is quicker in general than try/except KeyError
+            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
+            if name in self.__endTagCache:
+                func = self.__endTagCache[name]
+            else:
+                func = self.__endTagCache[name] = self.endTagHandler[name]
+                # bound the cache size in case we get loads of unknown tags
+                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
+                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
+                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
+            return func(token)
+
+    class InitialPhase(Phase):
+        __slots__ = tuple()
+
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processDoctype(self, token):
+            name = token["name"]
+            publicId = token["publicId"]
+            systemId = token["systemId"]
+            correct = token["correct"]
+
+            if (name != "html" or publicId is not None or
+                    systemId is not None and systemId != "about:legacy-compat"):
+                self.parser.parseError("unknown-doctype")
+
+            if publicId is None:
+                publicId = ""
+
+            self.tree.insertDoctype(token)
+
+            if publicId != "":
+                publicId = publicId.translate(asciiUpper2Lower)
+
+            if (not correct or token["name"] != "html" or
+                    publicId.startswith(
+                        ("+//silmaril//dtd html pro v0r11 19970101//",
+                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                         "-//as//dtd html 3.0 aswedit + extensions//",
+                         "-//ietf//dtd html 2.0 level 1//",
+                         "-//ietf//dtd html 2.0 level 2//",
+                         "-//ietf//dtd html 2.0 strict level 1//",
+                         "-//ietf//dtd html 2.0 strict level 2//",
+                         "-//ietf//dtd html 2.0 strict//",
+                         "-//ietf//dtd html 2.0//",
+                         "-//ietf//dtd html 2.1e//",
+                         "-//ietf//dtd html 3.0//",
+                         "-//ietf//dtd html 3.2 final//",
+                         "-//ietf//dtd html 3.2//",
+                         "-//ietf//dtd html 3//",
+                         "-//ietf//dtd html level 0//",
+                         "-//ietf//dtd html level 1//",
+                         "-//ietf//dtd html level 2//",
+                         "-//ietf//dtd html level 3//",
+                         "-//ietf//dtd html strict level 0//",
+                         "-//ietf//dtd html strict level 1//",
+                         "-//ietf//dtd html strict level 2//",
+                         "-//ietf//dtd html strict level 3//",
+                         "-//ietf//dtd html strict//",
+                         "-//ietf//dtd html//",
+                         "-//metrius//dtd metrius presentational//",
+                         "-//microsoft//dtd internet explorer 2.0 html strict//",
+                         "-//microsoft//dtd internet explorer 2.0 html//",
+                         "-//microsoft//dtd internet explorer 2.0 tables//",
+                         "-//microsoft//dtd internet explorer 3.0 html strict//",
+                         "-//microsoft//dtd internet explorer 3.0 html//",
+                         "-//microsoft//dtd internet explorer 3.0 tables//",
+                         "-//netscape comm. corp.//dtd html//",
+                         "-//netscape comm. corp.//dtd strict html//",
+                         "-//o'reilly and associates//dtd html 2.0//",
+                         "-//o'reilly and associates//dtd html extended 1.0//",
+                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                         "-//spyglass//dtd html 2.0 extended//",
+                         "-//sq//dtd html 2.0 hotmetal + extensions//",
+                         "-//sun microsystems corp.//dtd hotjava html//",
+                         "-//sun microsystems corp.//dtd hotjava strict html//",
+                         "-//w3c//dtd html 3 1995-03-24//",
+                         "-//w3c//dtd html 3.2 draft//",
+                         "-//w3c//dtd html 3.2 final//",
+                         "-//w3c//dtd html 3.2//",
+                         "-//w3c//dtd html 3.2s draft//",
+                         "-//w3c//dtd html 4.0 frameset//",
+                         "-//w3c//dtd html 4.0 transitional//",
+                         "-//w3c//dtd html experimental 19960712//",
+                         "-//w3c//dtd html experimental 970421//",
+                         "-//w3c//dtd w3 html//",
+                         "-//w3o//dtd w3 html 3.0//",
+                         "-//webtechs//dtd mozilla html 2.0//",
+                         "-//webtechs//dtd mozilla html//")) or
+                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
+                                 "-/w3c/dtd html 4.0 transitional/en",
+                                 "html") or
+                    publicId.startswith(
+                        ("-//w3c//dtd html 4.01 frameset//",
+                         "-//w3c//dtd html 4.01 transitional//")) and
+                    systemId is None or
+                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+                self.parser.compatMode = "quirks"
+            elif (publicId.startswith(
+                    ("-//w3c//dtd xhtml 1.0 frameset//",
+                     "-//w3c//dtd xhtml 1.0 transitional//")) or
+                  publicId.startswith(
+                      ("-//w3c//dtd html 4.01 frameset//",
+                       "-//w3c//dtd html 4.01 transitional//")) and
+                  systemId is not None):
+                self.parser.compatMode = "limited quirks"
+
+            self.parser.phase = self.parser.phases["beforeHtml"]
+
+        def anythingElse(self):
+            self.parser.compatMode = "quirks"
+            self.parser.phase = self.parser.phases["beforeHtml"]
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-doctype-but-got-chars")
+            self.anythingElse()
+            return token
+
+        def processStartTag(self, token):
+            self.parser.parseError("expected-doctype-but-got-start-tag",
+                                   {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-doctype-but-got-end-tag",
+                                   {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def processEOF(self):
+            self.parser.parseError("expected-doctype-but-got-eof")
+            self.anythingElse()
+            return True
+
+    class BeforeHtmlPhase(Phase):
+        __slots__ = tuple()
+
+        # helper methods
+        def insertHtmlElement(self):
+            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
+            self.parser.phase = self.parser.phases["beforeHead"]
+
+        # other
+        def processEOF(self):
+            self.insertHtmlElement()
+            return True
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processCharacters(self, token):
+            self.insertHtmlElement()
+            return token
+
+        def processStartTag(self, token):
+            if token["name"] == "html":
+                self.parser.firstStartTag = True
+            self.insertHtmlElement()
+            return token
+
+        def processEndTag(self, token):
+            if token["name"] not in ("head", "body", "html", "br"):
+                self.parser.parseError("unexpected-end-tag-before-html",
+                                       {"name": token["name"]})
+            else:
+                self.insertHtmlElement()
+                return token
+
+    class BeforeHeadPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return True
+
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processCharacters(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagHead(self, token):
+            self.tree.insertElement(token)
+            self.tree.headPointer = self.tree.openElements[-1]
+            self.parser.phase = self.parser.phases["inHead"]
+
+        def startTagOther(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def endTagImplyHead(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("end-tag-after-implied-root",
+                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("head", "body", "html", "br"), endTagImplyHead)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InHeadPhase(Phase):
+        __slots__ = tuple()
+
+        # the real thing
+        def processEOF(self):
+            self.anythingElse()
+            return True
+
+        def processCharacters(self, token):
+            self.anythingElse()
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagHead(self, token):
+            self.parser.parseError("two-heads-are-not-better-than-one")
+
+        def startTagBaseLinkCommand(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+        def startTagMeta(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+            attributes = token["data"]
+            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
+                if "charset" in attributes:
+                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
+                elif ("content" in attributes and
+                      "http-equiv" in attributes and
+                      attributes["http-equiv"].lower() == "content-type"):
+                    # Encoding it as UTF-8 here is a hack, as really we should pass
+                    # the abstract Unicode string, and just use the
+                    # ContentAttrParser on that, but using UTF-8 allows all chars
+                    # to be encoded and as a ASCII-superset works.
+                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+                    parser = _inputstream.ContentAttrParser(data)
+                    codec = parser.parse()
+                    self.parser.tokenizer.stream.changeEncoding(codec)
+
+        def startTagTitle(self, token):
+            self.parser.parseRCDataRawtext(token, "RCDATA")
+
+        def startTagNoFramesStyle(self, token):
+            # Need to decide whether to implement the scripting-disabled case
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagNoscript(self, token):
+            if self.parser.scripting:
+                self.parser.parseRCDataRawtext(token, "RAWTEXT")
+            else:
+                self.tree.insertElement(token)
+                self.parser.phase = self.parser.phases["inHeadNoscript"]
+
+        def startTagScript(self, token):
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
+            self.parser.originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["text"]
+
+        def startTagOther(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagHead(self, token):
+            node = self.parser.tree.openElements.pop()
+            assert node.name == "head", "Expected head got %s" % node.name
+            self.parser.phase = self.parser.phases["afterHead"]
+
+        def endTagHtmlBodyBr(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def anythingElse(self):
+            self.endTagHead(impliedTagToken("head"))
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("title", startTagTitle),
+            (("noframes", "style"), startTagNoFramesStyle),
+            ("noscript", startTagNoscript),
+            ("script", startTagScript),
+            (("base", "basefont", "bgsound", "command", "link"),
+             startTagBaseLinkCommand),
+            ("meta", startTagMeta),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("head", endTagHead),
+            (("br", "html", "body"), endTagHtmlBodyBr)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InHeadNoscriptPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            self.parser.parseError("eof-in-head-noscript")
+            self.anythingElse()
+            return True
+
+        def processComment(self, token):
+            return self.parser.phases["inHead"].processComment(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("char-in-head-noscript")
+            self.anythingElse()
+            return token
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inHead"].processSpaceCharacters(token)
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagBaseLinkCommand(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagHeadNoscript(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def endTagNoscript(self, token):
+            node = self.parser.tree.openElements.pop()
+            assert node.name == "noscript", "Expected noscript got %s" % node.name
+            self.parser.phase = self.parser.phases["inHead"]
+
+        def endTagBr(self, token):
+            self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def anythingElse(self):
+            # Caller must raise parse error first!
+            self.endTagNoscript(impliedTagToken("noscript"))
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
+            (("head", "noscript"), startTagHeadNoscript),
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("noscript", endTagNoscript),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterHeadPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            self.anythingElse()
+            return True
+
+        def processCharacters(self, token):
+            self.anythingElse()
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagBody(self, token):
+            self.parser.framesetOK = False
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inBody"]
+
+        def startTagFrameset(self, token):
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inFrameset"]
+
+        def startTagFromHead(self, token):
+            self.parser.parseError("unexpected-start-tag-out-of-my-head",
+                                   {"name": token["name"]})
+            self.tree.openElements.append(self.tree.headPointer)
+            self.parser.phases["inHead"].processStartTag(token)
+            for node in self.tree.openElements[::-1]:
+                if node.name == "head":
+                    self.tree.openElements.remove(node)
+                    break
+
+        def startTagHead(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+
+        def startTagOther(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagHtmlBodyBr(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def anythingElse(self):
+            self.tree.insertElement(impliedTagToken("body", "StartTag"))
+            self.parser.phase = self.parser.phases["inBody"]
+            self.parser.framesetOK = True
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
+              "style", "title"),
+             startTagFromHead),
+            ("head", startTagHead)
+        ])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
+                                                  endTagHtmlBodyBr)])
+        endTagHandler.default = endTagOther
+
+    class InBodyPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
+        # the really-really-really-very crazy mode
+        __slots__ = ("processSpaceCharacters",)
+
+        def __init__(self, *args, **kwargs):
+            super(InBodyPhase, self).__init__(*args, **kwargs)
+            # Set this to the default handler
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
+
+        def isMatchingFormattingElement(self, node1, node2):
+            return (node1.name == node2.name and
+                    node1.namespace == node2.namespace and
+                    node1.attributes == node2.attributes)
+
+        # helper
+        def addFormattingElement(self, token):
+            self.tree.insertElement(token)
+            element = self.tree.openElements[-1]
+
+            matchingElements = []
+            for node in self.tree.activeFormattingElements[::-1]:
+                if node is Marker:
+                    break
+                elif self.isMatchingFormattingElement(node, element):
+                    matchingElements.append(node)
+
+            assert len(matchingElements) <= 3
+            if len(matchingElements) == 3:
+                self.tree.activeFormattingElements.remove(matchingElements[-1])
+            self.tree.activeFormattingElements.append(element)
+
+        # the real deal
+        def processEOF(self):
+            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+                                          "tfoot", "th", "thead", "tr", "body",
+                                          "html"))
+            for node in self.tree.openElements[::-1]:
+                if node.name not in allowed_elements:
+                    self.parser.parseError("expected-closing-tag-but-got-eof")
+                    break
+            # Stop parsing
+
+        def processSpaceCharactersDropNewline(self, token):
+            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
+            # want to drop leading newlines
+            data = token["data"]
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
+            if (data.startswith("\n") and
+                self.tree.openElements[-1].name in ("pre", "listing", "textarea") and
+                    not self.tree.openElements[-1].hasContent()):
+                data = data[1:]
+            if data:
+                self.tree.reconstructActiveFormattingElements()
+                self.tree.insertText(data)
+
+        def processCharacters(self, token):
+            if token["data"] == "\u0000":
+                # The tokenizer should always emit null on its own
+                return
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertText(token["data"])
+            # This must be bad for performance
+            if (self.parser.framesetOK and
+                any([char not in spaceCharacters
+                     for char in token["data"]])):
+                self.parser.framesetOK = False
+
+        def processSpaceCharactersNonPre(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertText(token["data"])
+
+        def startTagProcessInHead(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagBody(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": "body"})
+            if (len(self.tree.openElements) == 1 or
+                    self.tree.openElements[1].name != "body"):
+                assert self.parser.innerHTML
+            else:
+                self.parser.framesetOK = False
+                for attr, value in token["data"].items():
+                    if attr not in self.tree.openElements[1].attributes:
+                        self.tree.openElements[1].attributes[attr] = value
+
+        def startTagFrameset(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
+            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
+                assert self.parser.innerHTML
+            elif not self.parser.framesetOK:
+                pass
+            else:
+                if self.tree.openElements[1].parent:
+                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
+                while self.tree.openElements[-1].name != "html":
+                    self.tree.openElements.pop()
+                self.tree.insertElement(token)
+                self.parser.phase = self.parser.phases["inFrameset"]
+
+        def startTagCloseP(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+
+        def startTagPreListing(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+
+        def startTagForm(self, token):
+            if self.tree.formPointer:
+                self.parser.parseError("unexpected-start-tag", {"name": "form"})
+            else:
+                if self.tree.elementInScope("p", variant="button"):
+                    self.endTagP(impliedTagToken("p"))
+                self.tree.insertElement(token)
+                self.tree.formPointer = self.tree.openElements[-1]
+
+        def startTagListItem(self, token):
+            self.parser.framesetOK = False
+
+            stopNamesMap = {"li": ["li"],
+                            "dt": ["dt", "dd"],
+                            "dd": ["dt", "dd"]}
+            stopNames = stopNamesMap[token["name"]]
+            for node in reversed(self.tree.openElements):
+                if node.name in stopNames:
+                    self.parser.phase.processEndTag(
+                        impliedTagToken(node.name, "EndTag"))
+                    break
+                if (node.nameTuple in specialElements and
+                        node.name not in ("address", "div", "p")):
+                    break
+
+            if self.tree.elementInScope("p", variant="button"):
+                self.parser.phase.processEndTag(
+                    impliedTagToken("p", "EndTag"))
+
+            self.tree.insertElement(token)
+
+        def startTagPlaintext(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
+
+        def startTagHeading(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            if self.tree.openElements[-1].name in headingElements:
+                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagA(self, token):
+            afeAElement = self.tree.elementInActiveFormattingElements("a")
+            if afeAElement:
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                       {"startName": "a", "endName": "a"})
+                self.endTagFormatting(impliedTagToken("a"))
+                if afeAElement in self.tree.openElements:
+                    self.tree.openElements.remove(afeAElement)
+                if afeAElement in self.tree.activeFormattingElements:
+                    self.tree.activeFormattingElements.remove(afeAElement)
+            self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagFormatting(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagNobr(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            if self.tree.elementInScope("nobr"):
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                       {"startName": "nobr", "endName": "nobr"})
+                self.processEndTag(impliedTagToken("nobr"))
+                # XXX Need tests that trigger the following
+                self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagButton(self, token):
+            if self.tree.elementInScope("button"):
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                       {"startName": "button", "endName": "button"})
+                self.processEndTag(impliedTagToken("button"))
+                return token
+            else:
+                self.tree.reconstructActiveFormattingElements()
+                self.tree.insertElement(token)
+                self.parser.framesetOK = False
+
+        def startTagAppletMarqueeObject(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.tree.activeFormattingElements.append(Marker)
+            self.parser.framesetOK = False
+
+        def startTagXmp(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.framesetOK = False
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagTable(self, token):
+            if self.parser.compatMode != "quirks":
+                if self.tree.elementInScope("p", variant="button"):
+                    self.processEndTag(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            self.parser.phase = self.parser.phases["inTable"]
+
+        def startTagVoidFormatting(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+            self.parser.framesetOK = False
+
+        def startTagInput(self, token):
+            framesetOK = self.parser.framesetOK
+            self.startTagVoidFormatting(token)
+            if ("type" in token["data"] and
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                # input type=hidden doesn't change framesetOK
+                self.parser.framesetOK = framesetOK
+
+        def startTagParamSource(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+        def startTagHr(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+            self.parser.framesetOK = False
+
+        def startTagImage(self, token):
+            # No really...
+            self.parser.parseError("unexpected-start-tag-treated-as",
+                                   {"originalName": "image", "newName": "img"})
+            self.processStartTag(impliedTagToken("img", "StartTag",
+                                                 attributes=token["data"],
+                                                 selfClosing=token["selfClosing"]))
+
+        def startTagIsIndex(self, token):
+            self.parser.parseError("deprecated-tag", {"name": "isindex"})
+            if self.tree.formPointer:
+                return
+            form_attrs = {}
+            if "action" in token["data"]:
+                form_attrs["action"] = token["data"]["action"]
+            self.processStartTag(impliedTagToken("form", "StartTag",
+                                                 attributes=form_attrs))
+            self.processStartTag(impliedTagToken("hr", "StartTag"))
+            self.processStartTag(impliedTagToken("label", "StartTag"))
+            # XXX Localization ...
+            if "prompt" in token["data"]:
+                prompt = token["data"]["prompt"]
+            else:
+                prompt = "This is a searchable index. Enter search keywords: "
+            self.processCharacters(
+                {"type": tokenTypes["Characters"], "data": prompt})
+            attributes = token["data"].copy()
+            if "action" in attributes:
+                del attributes["action"]
+            if "prompt" in attributes:
+                del attributes["prompt"]
+            attributes["name"] = "isindex"
+            self.processStartTag(impliedTagToken("input", "StartTag",
+                                                 attributes=attributes,
+                                                 selfClosing=token["selfClosing"]))
+            self.processEndTag(impliedTagToken("label"))
+            self.processStartTag(impliedTagToken("hr", "StartTag"))
+            self.processEndTag(impliedTagToken("form"))
+
+        def startTagTextarea(self, token):
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
+            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+            self.parser.framesetOK = False
+
+        def startTagIFrame(self, token):
+            self.parser.framesetOK = False
+            self.startTagRawtext(token)
+
+        def startTagNoscript(self, token):
+            if self.parser.scripting:
+                self.startTagRawtext(token)
+            else:
+                self.startTagOther(token)
+
+        def startTagRawtext(self, token):
+            """iframe, noembed noframes, noscript(if scripting enabled)"""
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagOpt(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.parser.phase.processEndTag(impliedTagToken("option"))
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.tree.insertElement(token)
+
+        def startTagSelect(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            if self.parser.phase in (self.parser.phases["inTable"],
+                                     self.parser.phases["inCaption"],
+                                     self.parser.phases["inColumnGroup"],
+                                     self.parser.phases["inTableBody"],
+                                     self.parser.phases["inRow"],
+                                     self.parser.phases["inCell"]):
+                self.parser.phase = self.parser.phases["inSelectInTable"]
+            else:
+                self.parser.phase = self.parser.phases["inSelect"]
+
+        def startTagRpRt(self, token):
+            if self.tree.elementInScope("ruby"):
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "ruby":
+                    self.parser.parseError()
+            self.tree.insertElement(token)
+
+        def startTagMath(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.adjustMathMLAttributes(token)
+            self.parser.adjustForeignAttributes(token)
+            token["namespace"] = namespaces["mathml"]
+            self.tree.insertElement(token)
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
+            if token["selfClosing"]:
+                self.tree.openElements.pop()
+                token["selfClosingAcknowledged"] = True
+
+        def startTagSvg(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.adjustSVGAttributes(token)
+            self.parser.adjustForeignAttributes(token)
+            token["namespace"] = namespaces["svg"]
+            self.tree.insertElement(token)
+            # Need to get the parse error right for the case where the token
+            # has a namespace not equal to the xmlns attribute
+            if token["selfClosing"]:
+                self.tree.openElements.pop()
+                token["selfClosingAcknowledged"] = True
+
+        def startTagMisplaced(self, token):
+            """ Elements that should be children of other elements that have a
+            different insertion mode; here they are ignored
+            "caption", "col", "colgroup", "frame", "frameset", "head",
+            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
+            "tr", "noscript"
+            """
+            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
+
+        def startTagOther(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+
+        def endTagP(self, token):
+            if not self.tree.elementInScope("p", variant="button"):
+                self.startTagCloseP(impliedTagToken("p", "StartTag"))
+                self.parser.parseError("unexpected-end-tag", {"name": "p"})
+                self.endTagP(impliedTagToken("p", "EndTag"))
+            else:
+                self.tree.generateImpliedEndTags("p")
+                if self.tree.openElements[-1].name != "p":
+                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
+                node = self.tree.openElements.pop()
+                while node.name != "p":
+                    node = self.tree.openElements.pop()
+
+        def endTagBody(self, token):
+            if not self.tree.elementInScope("body"):
+                self.parser.parseError()
+                return
+            elif self.tree.openElements[-1].name != "body":
+                for node in self.tree.openElements[2:]:
+                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
+                                                   "option", "p", "rp", "rt",
+                                                   "tbody", "td", "tfoot",
+                                                   "th", "thead", "tr", "body",
+                                                   "html")):
+                        # Not sure this is the correct name for the parse error
+                        self.parser.parseError(
+                            "expected-one-end-tag-but-got-another",
+                            {"gotName": "body", "expectedName": node.name})
+                        break
+            self.parser.phase = self.parser.phases["afterBody"]
+
+        def endTagHtml(self, token):
+            # We repeat the test for the body end tag token being ignored here
+            if self.tree.elementInScope("body"):
+                self.endTagBody(impliedTagToken("body"))
+                return token
+
+        def endTagBlock(self, token):
+            # Put us back in the right whitespace handling mode
+            if token["name"] == "pre":
+                self.processSpaceCharacters = self.processSpaceCharactersNonPre
+            inScope = self.tree.elementInScope(token["name"])
+            if inScope:
+                self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+            if inScope:
+                node = self.tree.openElements.pop()
+                while node.name != token["name"]:
+                    node = self.tree.openElements.pop()
+
+        def endTagForm(self, token):
+            node = self.tree.formPointer
+            self.tree.formPointer = None
+            if node is None or not self.tree.elementInScope(node):
+                self.parser.parseError("unexpected-end-tag",
+                                       {"name": "form"})
+            else:
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1] != node:
+                    self.parser.parseError("end-tag-too-early-ignored",
+                                           {"name": "form"})
+                self.tree.openElements.remove(node)
+
+        def endTagListItem(self, token):
+            if token["name"] == "li":
+                variant = "list"
+            else:
+                variant = None
+            if not self.tree.elementInScope(token["name"], variant=variant):
+                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+            else:
+                self.tree.generateImpliedEndTags(exclude=token["name"])
+                if self.tree.openElements[-1].name != token["name"]:
+                    self.parser.parseError(
+                        "end-tag-too-early",
+                        {"name": token["name"]})
+                node = self.tree.openElements.pop()
+                while node.name != token["name"]:
+                    node = self.tree.openElements.pop()
+
+        def endTagHeading(self, token):
+            for item in headingElements:
+                if self.tree.elementInScope(item):
+                    self.tree.generateImpliedEndTags()
+                    break
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+
+            for item in headingElements:
+                if self.tree.elementInScope(item):
+                    item = self.tree.openElements.pop()
+                    while item.name not in headingElements:
+                        item = self.tree.openElements.pop()
+                    break
+
+        def endTagFormatting(self, token):
+            """The much-feared adoption agency algorithm"""
+            # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
+            # XXX Better parseError messages appreciated.
+
+            # Step 1
+            outerLoopCounter = 0
+
+            # Step 2
+            while outerLoopCounter < 8:
+
+                # Step 3
+                outerLoopCounter += 1
+
+                # Step 4:
+
+                # Let the formatting element be the last element in
+                # the list of active formatting elements that:
+                # - is between the end of the list and the last scope
+                # marker in the list, if any, or the start of the list
+                # otherwise, and
+                # - has the same tag name as the token.
+                formattingElement = self.tree.elementInActiveFormattingElements(
+                    token["name"])
+                if (not formattingElement or
+                    (formattingElement in self.tree.openElements and
+                     not self.tree.elementInScope(formattingElement.name))):
+                    # If there is no such node, then abort these steps
+                    # and instead act as described in the "any other
+                    # end tag" entry below.
+                    self.endTagOther(token)
+                    return
+
+                # Otherwise, if there is such a node, but that node is
+                # not in the stack of open elements, then this is a
+                # parse error; remove the element from the list, and
+                # abort these steps.
+                elif formattingElement not in self.tree.openElements:
+                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
+                    self.tree.activeFormattingElements.remove(formattingElement)
+                    return
+
+                # Otherwise, if there is such a node, and that node is
+                # also in the stack of open elements, but the element
+                # is not in scope, then this is a parse error; ignore
+                # the token, and abort these steps.
+                elif not self.tree.elementInScope(formattingElement.name):
+                    self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
+                    return
+
+                # Otherwise, there is a formatting element and that
+                # element is in the stack and is in scope. If the
+                # element is not the current node, this is a parse
+                # error. In any case, proceed with the algorithm as
+                # written in the following steps.
+                else:
+                    if formattingElement != self.tree.openElements[-1]:
+                        self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
+
+                # Step 5:
+
+                # Let the furthest block be the topmost node in the
+                # stack of open elements that is lower in the stack
+                # than the formatting element, and is an element in
+                # the special category. There might not be one.
+                afeIndex = self.tree.openElements.index(formattingElement)
+                furthestBlock = None
+                for element in self.tree.openElements[afeIndex:]:
+                    if element.nameTuple in specialElements:
+                        furthestBlock = element
+                        break
+
+                # Step 6:
+
+                # If there is no furthest block, then the UA must
+                # first pop all the nodes from the bottom of the stack
+                # of open elements, from the current node up to and
+                # including the formatting element, then remove the
+                # formatting element from the list of active
+                # formatting elements, and finally abort these steps.
+                if furthestBlock is None:
+                    element = self.tree.openElements.pop()
+                    while element != formattingElement:
+                        element = self.tree.openElements.pop()
+                    self.tree.activeFormattingElements.remove(element)
+                    return
+
+                # Step 7
+                commonAncestor = self.tree.openElements[afeIndex - 1]
+
+                # Step 8:
+                # The bookmark is supposed to help us identify where to reinsert
+                # nodes in step 15. We have to ensure that we reinsert nodes after
+                # the node before the active formatting element. Note the bookmark
+                # can move in step 9.7
+                bookmark = self.tree.activeFormattingElements.index(formattingElement)
+
+                # Step 9
+                lastNode = node = furthestBlock
+                innerLoopCounter = 0
+
+                index = self.tree.openElements.index(node)
+                while innerLoopCounter < 3:
+                    innerLoopCounter += 1
+                    # Node is element before node in open elements
+                    index -= 1
+                    node = self.tree.openElements[index]
+                    if node not in self.tree.activeFormattingElements:
+                        self.tree.openElements.remove(node)
+                        continue
+                    # Step 9.6
+                    if node == formattingElement:
+                        break
+                    # Step 9.7
+                    if lastNode == furthestBlock:
+                        bookmark = self.tree.activeFormattingElements.index(node) + 1
+                    # Step 9.8
+                    clone = node.cloneNode()
+                    # Replace node with clone
+                    self.tree.activeFormattingElements[
+                        self.tree.activeFormattingElements.index(node)] = clone
+                    self.tree.openElements[
+                        self.tree.openElements.index(node)] = clone
+                    node = clone
+                    # Step 9.9
+                    # Remove lastNode from its parents, if any
+                    if lastNode.parent:
+                        lastNode.parent.removeChild(lastNode)
+                    node.appendChild(lastNode)
+                    # Step 9.10
+                    lastNode = node
+
+                # Step 10
+                # Foster parent lastNode if commonAncestor is a
+                # table, tbody, tfoot, thead, or tr we need to foster
+                # parent the lastNode
+                if lastNode.parent:
+                    lastNode.parent.removeChild(lastNode)
+
+                if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
+                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
+                    parent.insertBefore(lastNode, insertBefore)
+                else:
+                    commonAncestor.appendChild(lastNode)
+
+                # Step 11
+                clone = formattingElement.cloneNode()
+
+                # Step 12
+                furthestBlock.reparentChildren(clone)
+
+                # Step 13
+                furthestBlock.appendChild(clone)
+
+                # Step 14
+                self.tree.activeFormattingElements.remove(formattingElement)
+                self.tree.activeFormattingElements.insert(bookmark, clone)
+
+                # Step 15
+                self.tree.openElements.remove(formattingElement)
+                self.tree.openElements.insert(
+                    self.tree.openElements.index(furthestBlock) + 1, clone)
+
+        def endTagAppletMarqueeObject(self, token):
+            if self.tree.elementInScope(token["name"]):
+                self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+
+            if self.tree.elementInScope(token["name"]):
+                element = self.tree.openElements.pop()
+                while element.name != token["name"]:
+                    element = self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+
+        def endTagBr(self, token):
+            self.parser.parseError("unexpected-end-tag-treated-as",
+                                   {"originalName": "br", "newName": "br element"})
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(impliedTagToken("br", "StartTag"))
+            self.tree.openElements.pop()
+
+        def endTagOther(self, token):
+            for node in self.tree.openElements[::-1]:
+                if node.name == token["name"]:
+                    self.tree.generateImpliedEndTags(exclude=token["name"])
+                    if self.tree.openElements[-1].name != token["name"]:
+                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                    while self.tree.openElements.pop() != node:
+                        pass
+                    break
+                else:
+                    if node.nameTuple in specialElements:
+                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                        break
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("base", "basefont", "bgsound", "command", "link", "meta",
+              "script", "style", "title"),
+             startTagProcessInHead),
+            ("body", startTagBody),
+            ("frameset", startTagFrameset),
+            (("address", "article", "aside", "blockquote", "center", "details",
+              "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
+              "section", "summary", "ul"),
+             startTagCloseP),
+            (headingElements, startTagHeading),
+            (("pre", "listing"), startTagPreListing),
+            ("form", startTagForm),
+            (("li", "dd", "dt"), startTagListItem),
+            ("plaintext", startTagPlaintext),
+            ("a", startTagA),
+            (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
+              "strong", "tt", "u"), startTagFormatting),
+            ("nobr", startTagNobr),
+            ("button", startTagButton),
+            (("applet", "marquee", "object"), startTagAppletMarqueeObject),
+            ("xmp", startTagXmp),
+            ("table", startTagTable),
+            (("area", "br", "embed", "img", "keygen", "wbr"),
+             startTagVoidFormatting),
+            (("param", "source", "track"), startTagParamSource),
+            ("input", startTagInput),
+            ("hr", startTagHr),
+            ("image", startTagImage),
+            ("isindex", startTagIsIndex),
+            ("textarea", startTagTextarea),
+            ("iframe", startTagIFrame),
+            ("noscript", startTagNoscript),
+            (("noembed", "noframes"), startTagRawtext),
+            ("select", startTagSelect),
+            (("rp", "rt"), startTagRpRt),
+            (("option", "optgroup"), startTagOpt),
+            (("math"), startTagMath),
+            (("svg"), startTagSvg),
+            (("caption", "col", "colgroup", "frame", "head",
+              "tbody", "td", "tfoot", "th", "thead",
+              "tr"), startTagMisplaced)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("body", endTagBody),
+            ("html", endTagHtml),
+            (("address", "article", "aside", "blockquote", "button", "center",
+              "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+              "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
+              "section", "summary", "ul"), endTagBlock),
+            ("form", endTagForm),
+            ("p", endTagP),
+            (("dd", "dt", "li"), endTagListItem),
+            (headingElements, endTagHeading),
+            (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+              "strike", "strong", "tt", "u"), endTagFormatting),
+            (("applet", "marquee", "object"), endTagAppletMarqueeObject),
+            ("br", endTagBr),
+        ])
+        endTagHandler.default = endTagOther
+
+    class TextPhase(Phase):
+        __slots__ = tuple()
+
+        def processCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processEOF(self):
+            self.parser.parseError("expected-named-closing-tag-but-got-eof",
+                                   {"name": self.tree.openElements[-1].name})
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.originalPhase
+            return True
+
+        def startTagOther(self, token):
+            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
+
+        def endTagScript(self, token):
+            node = self.tree.openElements.pop()
+            assert node.name == "script"
+            self.parser.phase = self.parser.originalPhase
+            # The rest of this method is all stuff that only happens if
+            # document.write works
+
+        def endTagOther(self, token):
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.originalPhase
+
+        startTagHandler = _utils.MethodDispatcher([])
+        startTagHandler.default = startTagOther
+        endTagHandler = _utils.MethodDispatcher([
+            ("script", endTagScript)])
+        endTagHandler.default = endTagOther
+
+    class InTablePhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
+        __slots__ = tuple()
+
+        # helper methods
+        def clearStackToTableContext(self):
+            # "clear the stack back to a table context"
+            while self.tree.openElements[-1].name not in ("table", "html"):
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
+                #  {"name":  self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+            # When the current node is <html> it's an innerHTML case
+
+        # processing methods
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-table")
+            else:
+                assert self.parser.innerHTML
+            # Stop parsing
+
+        def processSpaceCharacters(self, token):
+            originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["inTableText"]
+            self.parser.phase.originalPhase = originalPhase
+            self.parser.phase.processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["inTableText"]
+            self.parser.phase.originalPhase = originalPhase
+            self.parser.phase.processCharacters(token)
+
+        def insertText(self, token):
+            # If we get here there must be at least one non-whitespace character
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processCharacters(token)
+            self.tree.insertFromTable = False
+
+        def startTagCaption(self, token):
+            self.clearStackToTableContext()
+            self.tree.activeFormattingElements.append(Marker)
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inCaption"]
+
+        def startTagColgroup(self, token):
+            self.clearStackToTableContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inColumnGroup"]
+
+        def startTagCol(self, token):
+            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
+            return token
+
+        def startTagRowGroup(self, token):
+            self.clearStackToTableContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inTableBody"]
+
+        def startTagImplyTbody(self, token):
+            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
+            return token
+
+        def startTagTable(self, token):
+            self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                                   {"startName": "table", "endName": "table"})
+            self.parser.phase.processEndTag(impliedTagToken("table"))
+            if not self.parser.innerHTML:
+                return token
+
+        def startTagStyleScript(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagInput(self, token):
+            if ("type" in token["data"] and
+                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                self.parser.parseError("unexpected-hidden-input-in-table")
+                self.tree.insertElement(token)
+                # XXX associate with form
+                self.tree.openElements.pop()
+            else:
+                self.startTagOther(token)
+
+        def startTagForm(self, token):
+            self.parser.parseError("unexpected-form-in-table")
+            if self.tree.formPointer is None:
+                self.tree.insertElement(token)
+                self.tree.formPointer = self.tree.openElements[-1]
+                self.tree.openElements.pop()
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processStartTag(token)
+            self.tree.insertFromTable = False
+
+        def endTagTable(self, token):
+            if self.tree.elementInScope("table", variant="table"):
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "table":
+                    self.parser.parseError("end-tag-too-early-named",
+                                           {"gotName": "table",
+                                            "expectedName": self.tree.openElements[-1].name})
+                while self.tree.openElements[-1].name != "table":
+                    self.tree.openElements.pop()
+                self.tree.openElements.pop()
+                self.parser.resetInsertionMode()
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processEndTag(token)
+            self.tree.insertFromTable = False
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("caption", startTagCaption),
+            ("colgroup", startTagColgroup),
+            ("col", startTagCol),
+            (("tbody", "tfoot", "thead"), startTagRowGroup),
+            (("td", "th", "tr"), startTagImplyTbody),
+            ("table", startTagTable),
+            (("style", "script"), startTagStyleScript),
+            ("input", startTagInput),
+            ("form", startTagForm)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "tbody", "td",
+              "tfoot", "th", "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InTableTextPhase(Phase):
+        __slots__ = ("originalPhase", "characterTokens")
+
+        def __init__(self, *args, **kwargs):
+            super(InTableTextPhase, self).__init__(*args, **kwargs)
+            self.originalPhase = None
+            self.characterTokens = []
+
+        def flushCharacters(self):
+            data = "".join([item["data"] for item in self.characterTokens])
+            if any([item not in spaceCharacters for item in data]):
+                token = {"type": tokenTypes["Characters"], "data": data}
+                self.parser.phases["inTable"].insertText(token)
+            elif data:
+                self.tree.insertText(data)
+            self.characterTokens = []
+
+        def processComment(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+        def processEOF(self):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return True
+
+        def processCharacters(self, token):
+            if token["data"] == "\u0000":
+                return
+            self.characterTokens.append(token)
+
+        def processSpaceCharacters(self, token):
+            # pretty sure we should never reach here
+            self.characterTokens.append(token)
+    #        assert False
+
+        def processStartTag(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+        def processEndTag(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+    class InCaptionPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
+        __slots__ = tuple()
+
+        def ignoreEndTagCaption(self):
+            return not self.tree.elementInScope("caption", variant="table")
+
+        def processEOF(self):
+            self.parser.phases["inBody"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inBody"].processCharacters(token)
+
+        def startTagTableElement(self, token):
+            self.parser.parseError()
+            # XXX Have to duplicate logic here to find out if the tag is ignored
+            ignoreEndTag = self.ignoreEndTagCaption()
+            self.parser.phase.processEndTag(impliedTagToken("caption"))
+            if not ignoreEndTag:
+                return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def endTagCaption(self, token):
+            if not self.ignoreEndTagCaption():
+                # AT this code is quite similar to endTagTable in "InTable"
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "caption":
+                    self.parser.parseError("expected-one-end-tag-but-got-another",
+                                           {"gotName": "caption",
+                                            "expectedName": self.tree.openElements[-1].name})
+                while self.tree.openElements[-1].name != "caption":
+                    self.tree.openElements.pop()
+                self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+                self.parser.phase = self.parser.phases["inTable"]
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagTable(self, token):
+            self.parser.parseError()
+            ignoreEndTag = self.ignoreEndTagCaption()
+            self.parser.phase.processEndTag(impliedTagToken("caption"))
+            if not ignoreEndTag:
+                return token
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inBody"].processEndTag(token)
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableElement)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("caption", endTagCaption),
+            ("table", endTagTable),
+            (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InColumnGroupPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
+        __slots__ = tuple()
+
+        def ignoreEndTagColgroup(self):
+            return self.tree.openElements[-1].name == "html"
+
+        def processEOF(self):
+            if self.tree.openElements[-1].name == "html":
+                assert self.parser.innerHTML
+                return
+            else:
+                ignoreEndTag = self.ignoreEndTagColgroup()
+                self.endTagColgroup(impliedTagToken("colgroup"))
+                if not ignoreEndTag:
+                    return True
+
+        def processCharacters(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+        def startTagCol(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+        def startTagOther(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+        def endTagColgroup(self, token):
+            if self.ignoreEndTagColgroup():
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+            else:
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTable"]
+
+        def endTagCol(self, token):
+            self.parser.parseError("no-end-tag", {"name": "col"})
+
+        def endTagOther(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("col", startTagCol)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("colgroup", endTagColgroup),
+            ("col", endTagCol)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InTableBodyPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
+        __slots__ = tuple()
+
+        # helper methods
+        def clearStackToTableBodyContext(self):
+            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
+                                                          "thead", "html"):
+                # self.parser.parseError("unexpected-implied-end-tag-in-table",
+                #  {"name": self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+            if self.tree.openElements[-1].name == "html":
+                assert self.parser.innerHTML
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inTable"].processEOF()
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inTable"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            return self.parser.phases["inTable"].processCharacters(token)
+
+        def startTagTr(self, token):
+            self.clearStackToTableBodyContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inRow"]
+
+        def startTagTableCell(self, token):
+            self.parser.parseError("unexpected-cell-in-table-body",
+                                   {"name": token["name"]})
+            self.startTagTr(impliedTagToken("tr", "StartTag"))
+            return token
+
+        def startTagTableOther(self, token):
+            # XXX AT Any ideas on how to share this with endTagTable?
+            if (self.tree.elementInScope("tbody", variant="table") or
+                self.tree.elementInScope("thead", variant="table") or
+                    self.tree.elementInScope("tfoot", variant="table")):
+                self.clearStackToTableBodyContext()
+                self.endTagTableRowGroup(
+                    impliedTagToken(self.tree.openElements[-1].name))
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def startTagOther(self, token):
+            return self.parser.phases["inTable"].processStartTag(token)
+
+        def endTagTableRowGroup(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.clearStackToTableBodyContext()
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTable"]
+            else:
+                self.parser.parseError("unexpected-end-tag-in-table-body",
+                                       {"name": token["name"]})
+
+        def endTagTable(self, token):
+            if (self.tree.elementInScope("tbody", variant="table") or
+                self.tree.elementInScope("thead", variant="table") or
+                    self.tree.elementInScope("tfoot", variant="table")):
+                self.clearStackToTableBodyContext()
+                self.endTagTableRowGroup(
+                    impliedTagToken(self.tree.openElements[-1].name))
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag-in-table-body",
+                                   {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inTable"].processEndTag(token)
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("tr", startTagTr),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
+             startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            ("table", endTagTable),
+            (("body", "caption", "col", "colgroup", "html", "td", "th",
+              "tr"), endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InRowPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
+        __slots__ = tuple()
+
+        # helper methods (XXX unify this with other table helper methods)
+        def clearStackToTableRowContext(self):
+            while self.tree.openElements[-1].name not in ("tr", "html"):
+                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
+                                       {"name": self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+
+        def ignoreEndTagTr(self):
+            return not self.tree.elementInScope("tr", variant="table")
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inTable"].processEOF()
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inTable"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            return self.parser.phases["inTable"].processCharacters(token)
+
+        def startTagTableCell(self, token):
+            self.clearStackToTableRowContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inCell"]
+            self.tree.activeFormattingElements.append(Marker)
+
+        def startTagTableOther(self, token):
+            ignoreEndTag = self.ignoreEndTagTr()
+            self.endTagTr(impliedTagToken("tr"))
+            # XXX how are we sure it's always ignored in the innerHTML case?
+            if not ignoreEndTag:
+                return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inTable"].processStartTag(token)
+
+        def endTagTr(self, token):
+            if not self.ignoreEndTagTr():
+                self.clearStackToTableRowContext()
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTableBody"]
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagTable(self, token):
+            ignoreEndTag = self.ignoreEndTagTr()
+            self.endTagTr(impliedTagToken("tr"))
+            # Reprocess the current tag if the tr end tag was not ignored
+            # XXX how are we sure it's always ignored in the innerHTML case?
+            if not ignoreEndTag:
+                return token
+
+        def endTagTableRowGroup(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.endTagTr(impliedTagToken("tr"))
+                return token
+            else:
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag-in-table-row",
+                                   {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inTable"].processEndTag(token)
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("td", "th"), startTagTableCell),
+            (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
+              "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("tr", endTagTr),
+            ("table", endTagTable),
+            (("tbody", "tfoot", "thead"), endTagTableRowGroup),
+            (("body", "caption", "col", "colgroup", "html", "td", "th"),
+             endTagIgnore)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InCellPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
+        __slots__ = tuple()
+
+        # helper
+        def closeCell(self):
+            if self.tree.elementInScope("td", variant="table"):
+                self.endTagTableCell(impliedTagToken("td"))
+            elif self.tree.elementInScope("th", variant="table"):
+                self.endTagTableCell(impliedTagToken("th"))
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inBody"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inBody"].processCharacters(token)
+
+        def startTagTableOther(self, token):
+            if (self.tree.elementInScope("td", variant="table") or
+                    self.tree.elementInScope("th", variant="table")):
+                self.closeCell()
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def startTagOther(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def endTagTableCell(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.tree.generateImpliedEndTags(token["name"])
+                if self.tree.openElements[-1].name != token["name"]:
+                    self.parser.parseError("unexpected-cell-end-tag",
+                                           {"name": token["name"]})
+                    while True:
+                        node = self.tree.openElements.pop()
+                        if node.name == token["name"]:
+                            break
+                else:
+                    self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+                self.parser.phase = self.parser.phases["inRow"]
+            else:
+                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagImply(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.closeCell()
+                return token
+            else:
+                # sometimes innerHTML case
+                self.parser.parseError()
+
+        def endTagOther(self, token):
+            return self.parser.phases["inBody"].processEndTag(token)
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+              "thead", "tr"), startTagTableOther)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("td", "th"), endTagTableCell),
+            (("body", "caption", "col", "colgroup", "html"), endTagIgnore),
+            (("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InSelectPhase(Phase):
+        __slots__ = tuple()
+
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-select")
+            else:
+                assert self.parser.innerHTML
+
+        def processCharacters(self, token):
+            if token["data"] == "\u0000":
+                return
+            self.tree.insertText(token["data"])
+
+        def startTagOption(self, token):
+            # We need to imply </option> if <option> is the current node.
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagOptgroup(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            if self.tree.openElements[-1].name == "optgroup":
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagSelect(self, token):
+            self.parser.parseError("unexpected-select-in-select")
+            self.endTagSelect(impliedTagToken("select"))
+
+        def startTagInput(self, token):
+            self.parser.parseError("unexpected-input-in-select")
+            if self.tree.elementInScope("select", variant="select"):
+                self.endTagSelect(impliedTagToken("select"))
+                return token
+            else:
+                assert self.parser.innerHTML
+
+        def startTagScript(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-in-select",
+                                   {"name": token["name"]})
+
+        def endTagOption(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            else:
+                self.parser.parseError("unexpected-end-tag-in-select",
+                                       {"name": "option"})
+
+        def endTagOptgroup(self, token):
+            # </optgroup> implicitly closes <option>
+            if (self.tree.openElements[-1].name == "option" and
+                    self.tree.openElements[-2].name == "optgroup"):
+                self.tree.openElements.pop()
+            # It also closes </optgroup>
+            if self.tree.openElements[-1].name == "optgroup":
+                self.tree.openElements.pop()
+            # But nothing else
+            else:
+                self.parser.parseError("unexpected-end-tag-in-select",
+                                       {"name": "optgroup"})
+
+        def endTagSelect(self, token):
+            if self.tree.elementInScope("select", variant="select"):
+                node = self.tree.openElements.pop()
+                while node.name != "select":
+                    node = self.tree.openElements.pop()
+                self.parser.resetInsertionMode()
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-in-select",
+                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("option", startTagOption),
+            ("optgroup", startTagOptgroup),
+            ("select", startTagSelect),
+            (("input", "keygen", "textarea"), startTagInput),
+            ("script", startTagScript)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("option", endTagOption),
+            ("optgroup", endTagOptgroup),
+            ("select", endTagSelect)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InSelectInTablePhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            self.parser.phases["inSelect"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inSelect"].processCharacters(token)
+
+        def startTagTable(self, token):
+            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
+            self.endTagOther(impliedTagToken("select"))
+            return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inSelect"].processStartTag(token)
+
+        def endTagTable(self, token):
+            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.endTagOther(impliedTagToken("select"))
+                return token
+
+        def endTagOther(self, token):
+            return self.parser.phases["inSelect"].processEndTag(token)
+
+        startTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             startTagTable)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+             endTagTable)
+        ])
+        endTagHandler.default = endTagOther
+
+    class InForeignContentPhase(Phase):
+        __slots__ = tuple()
+
+        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
+                                      "center", "code", "dd", "div", "dl", "dt",
+                                      "em", "embed", "h1", "h2", "h3",
+                                      "h4", "h5", "h6", "head", "hr", "i", "img",
+                                      "li", "listing", "menu", "meta", "nobr",
+                                      "ol", "p", "pre", "ruby", "s", "small",
+                                      "span", "strong", "strike", "sub", "sup",
+                                      "table", "tt", "u", "ul", "var"])
+
+        def adjustSVGTagNames(self, token):
+            replacements = {"altglyph": "altGlyph",
+                            "altglyphdef": "altGlyphDef",
+                            "altglyphitem": "altGlyphItem",
+                            "animatecolor": "animateColor",
+                            "animatemotion": "animateMotion",
+                            "animatetransform": "animateTransform",
+                            "clippath": "clipPath",
+                            "feblend": "feBlend",
+                            "fecolormatrix": "feColorMatrix",
+                            "fecomponenttransfer": "feComponentTransfer",
+                            "fecomposite": "feComposite",
+                            "feconvolvematrix": "feConvolveMatrix",
+                            "fediffuselighting": "feDiffuseLighting",
+                            "fedisplacementmap": "feDisplacementMap",
+                            "fedistantlight": "feDistantLight",
+                            "feflood": "feFlood",
+                            "fefunca": "feFuncA",
+                            "fefuncb": "feFuncB",
+                            "fefuncg": "feFuncG",
+                            "fefuncr": "feFuncR",
+                            "fegaussianblur": "feGaussianBlur",
+                            "feimage": "feImage",
+                            "femerge": "feMerge",
+                            "femergenode": "feMergeNode",
+                            "femorphology": "feMorphology",
+                            "feoffset": "feOffset",
+                            "fepointlight": "fePointLight",
+                            "fespecularlighting": "feSpecularLighting",
+                            "fespotlight": "feSpotLight",
+                            "fetile": "feTile",
+                            "feturbulence": "feTurbulence",
+                            "foreignobject": "foreignObject",
+                            "glyphref": "glyphRef",
+                            "lineargradient": "linearGradient",
+                            "radialgradient": "radialGradient",
+                            "textpath": "textPath"}
+
+            if token["name"] in replacements:
+                token["name"] = replacements[token["name"]]
+
+        def processCharacters(self, token):
+            if token["data"] == "\u0000":
+                token["data"] = "\uFFFD"
+            elif (self.parser.framesetOK and
+                  any(char not in spaceCharacters for char in token["data"])):
+                self.parser.framesetOK = False
+            Phase.processCharacters(self, token)
+
+        def processStartTag(self, token):
+            currentNode = self.tree.openElements[-1]
+            if (token["name"] in self.breakoutElements or
+                (token["name"] == "font" and
+                 set(token["data"].keys()) & {"color", "face", "size"})):
+                self.parser.parseError("unexpected-html-element-in-foreign-content",
+                                       {"name": token["name"]})
+                while (self.tree.openElements[-1].namespace !=
+                       self.tree.defaultNamespace and
+                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
+                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
+                    self.tree.openElements.pop()
+                return token
+
+            else:
+                if currentNode.namespace == namespaces["mathml"]:
+                    self.parser.adjustMathMLAttributes(token)
+                elif currentNode.namespace == namespaces["svg"]:
+                    self.adjustSVGTagNames(token)
+                    self.parser.adjustSVGAttributes(token)
+                self.parser.adjustForeignAttributes(token)
+                token["namespace"] = currentNode.namespace
+                self.tree.insertElement(token)
+                if token["selfClosing"]:
+                    self.tree.openElements.pop()
+                    token["selfClosingAcknowledged"] = True
+
+        def processEndTag(self, token):
+            nodeIndex = len(self.tree.openElements) - 1
+            node = self.tree.openElements[-1]
+            if node.name.translate(asciiUpper2Lower) != token["name"]:
+                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+            while True:
+                if node.name.translate(asciiUpper2Lower) == token["name"]:
+                    # XXX this isn't in the spec but it seems necessary
+                    if self.parser.phase == self.parser.phases["inTableText"]:
+                        self.parser.phase.flushCharacters()
+                        self.parser.phase = self.parser.phase.originalPhase
+                    while self.tree.openElements.pop() != node:
+                        assert self.tree.openElements
+                    new_token = None
+                    break
+                nodeIndex -= 1
+
+                node = self.tree.openElements[nodeIndex]
+                if node.namespace != self.tree.defaultNamespace:
+                    continue
+                else:
+                    new_token = self.parser.phase.processEndTag(token)
+                    break
+            return new_token
+
+    class AfterBodyPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            # Stop parsing
+            pass
+
+        def processComment(self, token):
+            # This is needed because data is to be appended to the <html> element
+            # here and not to whatever is currently open.
+            self.tree.insertComment(token, self.tree.openElements[0])
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-after-body")
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-after-body",
+                                   {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def endTagHtml(self, name):
+            if self.parser.innerHTML:
+                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
+            else:
+                self.parser.phase = self.parser.phases["afterAfterBody"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-after-body",
+                                   {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
+        endTagHandler.default = endTagOther
+
+    class InFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+        __slots__ = tuple()
+
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-frameset")
+            else:
+                assert self.parser.innerHTML
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-in-frameset")
+
+        def startTagFrameset(self, token):
+            self.tree.insertElement(token)
+
+        def startTagFrame(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+
+        def startTagNoframes(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-in-frameset",
+                                   {"name": token["name"]})
+
+        def endTagFrameset(self, token):
+            if self.tree.openElements[-1].name == "html":
+                # innerHTML case
+                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
+            else:
+                self.tree.openElements.pop()
+            if (not self.parser.innerHTML and
+                    self.tree.openElements[-1].name != "frameset"):
+                # If we're not in innerHTML mode and the current node is not a
+                # "frameset" element (anymore) then switch.
+                self.parser.phase = self.parser.phases["afterFrameset"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-in-frameset",
+                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("frameset", startTagFrameset),
+            ("frame", startTagFrame),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("frameset", endTagFrameset)
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#after3
+        __slots__ = tuple()
+
+        def processEOF(self):
+            # Stop parsing
+            pass
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-after-frameset")
+
+        def startTagNoframes(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-after-frameset",
+                                   {"name": token["name"]})
+
+        def endTagHtml(self, token):
+            self.parser.phase = self.parser.phases["afterAfterFrameset"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-after-frameset",
+                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", Phase.startTagHtml),
+            ("noframes", startTagNoframes)
+        ])
+        startTagHandler.default = startTagOther
+
+        endTagHandler = _utils.MethodDispatcher([
+            ("html", endTagHtml)
+        ])
+        endTagHandler.default = endTagOther
+
+    class AfterAfterBodyPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-eof-but-got-char")
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("expected-eof-but-got-start-tag",
+                                   {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-eof-but-got-end-tag",
+                                   {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml)
+        ])
+        startTagHandler.default = startTagOther
+
+    class AfterAfterFramesetPhase(Phase):
+        __slots__ = tuple()
+
+        def processEOF(self):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-eof-but-got-char")
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagNoFrames(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("expected-eof-but-got-start-tag",
+                                   {"name": token["name"]})
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-eof-but-got-end-tag",
+                                   {"name": token["name"]})
+
+        startTagHandler = _utils.MethodDispatcher([
+            ("html", startTagHtml),
+            ("noframes", startTagNoFrames)
+        ])
+        startTagHandler.default = startTagOther
+
+    # pylint:enable=unused-argument
+
+    return {
+        "initial": InitialPhase,
+        "beforeHtml": BeforeHtmlPhase,
+        "beforeHead": BeforeHeadPhase,
+        "inHead": InHeadPhase,
+        "inHeadNoscript": InHeadNoscriptPhase,
+        "afterHead": AfterHeadPhase,
+        "inBody": InBodyPhase,
+        "text": TextPhase,
+        "inTable": InTablePhase,
+        "inTableText": InTableTextPhase,
+        "inCaption": InCaptionPhase,
+        "inColumnGroup": InColumnGroupPhase,
+        "inTableBody": InTableBodyPhase,
+        "inRow": InRowPhase,
+        "inCell": InCellPhase,
+        "inSelect": InSelectPhase,
+        "inSelectInTable": InSelectInTablePhase,
+        "inForeignContent": InForeignContentPhase,
+        "afterBody": AfterBodyPhase,
+        "inFrameset": InFramesetPhase,
+        "afterFrameset": AfterFramesetPhase,
+        "afterAfterBody": AfterAfterBodyPhase,
+        "afterAfterFrameset": AfterAfterFramesetPhase,
+        # XXX after after frameset
+    }
+
+
+def adjust_attributes(token, replacements):
+    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    if needs_adjustment:
+        token['data'] = type(token['data'])((replacements.get(k, k), v)
+                                            for k, v in token['data'].items())
+
+
+def impliedTagToken(name, type="EndTag", attributes=None,
+                    selfClosing=False):
+    if attributes is None:
+        attributes = {}
+    return {"type": tokenTypes[type], "name": name, "data": attributes,
+            "selfClosing": selfClosing}
+
+
+class ParseError(Exception):
+    """Error in parsed document"""
+    pass
diff --git a/samples-and-tests/i-am-a-developer/html5lib/serializer.py b/samples-and-tests/i-am-a-developer/html5lib/serializer.py
new file mode 100644
index 0000000000..c66df68392
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/serializer.py
@@ -0,0 +1,409 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+import re
+
+from codecs import register_error, xmlcharrefreplace_errors
+
+from .constants import voidElements, booleanAttributes, spaceCharacters
+from .constants import rcdataElements, entities, xmlEntities
+from . import treewalkers, _utils
+from xml.sax.saxutils import escape
+
+_quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
+_quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
+_quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
+                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
+                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
+                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
+                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
+                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
+                                   "\u3000]")
+
+
+_encode_entity_map = {}
+_is_ucs4 = len("\U0010FFFF") == 1
+for k, v in list(entities.items()):
+    # skip multi-character entities
+    if ((_is_ucs4 and len(v) > 1) or
+            (not _is_ucs4 and len(v) > 2)):
+        continue
+    if v != "&":
+        if len(v) == 2:
+            v = _utils.surrogatePairToCodepoint(v)
+        else:
+            v = ord(v)
+        if v not in _encode_entity_map or k.islower():
+            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+            _encode_entity_map[v] = k
+
+
+def htmlentityreplace_errors(exc):
+    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+        res = []
+        codepoints = []
+        skip = False
+        for i, c in enumerate(exc.object[exc.start:exc.end]):
+            if skip:
+                skip = False
+                continue
+            index = i + exc.start
+            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
+                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
+                skip = True
+            else:
+                codepoint = ord(c)
+            codepoints.append(codepoint)
+        for cp in codepoints:
+            e = _encode_entity_map.get(cp)
+            if e:
+                res.append("&")
+                res.append(e)
+                if not e.endswith(";"):
+                    res.append(";")
+            else:
+                res.append("&#x%s;" % (hex(cp)[2:]))
+        return ("".join(res), exc.end)
+    else:
+        return xmlcharrefreplace_errors(exc)
+
+
+register_error("htmlentityreplace", htmlentityreplace_errors)
+
+
+def serialize(input, tree="etree", encoding=None, **serializer_opts):
+    """Serializes the input token stream using the specified treewalker
+
+    :arg input: the token stream to serialize
+
+    :arg tree: the treewalker to use
+
+    :arg encoding: the encoding to use
+
+    :arg serializer_opts: any options to pass to the
+        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
+
+    :returns: the tree serialized as a string
+
+    Example:
+
+    >>> from html5lib.html5parser import parse
+    >>> from html5lib.serializer import serialize
+    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
+    >>> serialize(token_stream, omit_optional_tags=False)
+    '<html><head></head><body><p>Hi!</p></body></html>'
+
+    """
+    # XXX: Should we cache this?
+    walker = treewalkers.getTreeWalker(tree)
+    s = HTMLSerializer(**serializer_opts)
+    return s.render(walker(input), encoding)
+
+
+class HTMLSerializer(object):
+
+    # attribute quoting options
+    quote_attr_values = "legacy"  # be secure by default
+    quote_char = '"'
+    use_best_quote_char = True
+
+    # tag syntax options
+    omit_optional_tags = True
+    minimize_boolean_attributes = True
+    use_trailing_solidus = False
+    space_before_trailing_solidus = True
+
+    # escaping options
+    escape_lt_in_attrs = False
+    escape_rcdata = False
+    resolve_entities = True
+
+    # miscellaneous options
+    alphabetical_attributes = False
+    inject_meta_charset = True
+    strip_whitespace = False
+    sanitize = False
+
+    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
+               "omit_optional_tags", "minimize_boolean_attributes",
+               "use_trailing_solidus", "space_before_trailing_solidus",
+               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
+               "alphabetical_attributes", "inject_meta_charset",
+               "strip_whitespace", "sanitize")
+
+    def __init__(self, **kwargs):
+        """Initialize HTMLSerializer
+
+        :arg inject_meta_charset: Whether or not to inject the meta charset.
+
+            Defaults to ``True``.
+
+        :arg quote_attr_values: Whether to quote attribute values that don't
+            require quoting per legacy browser behavior (``"legacy"``), when
+            required by the standard (``"spec"``), or always (``"always"``).
+
+            Defaults to ``"legacy"``.
+
+        :arg quote_char: Use given quote character for attribute quoting.
+
+            Defaults to ``"`` which will use double quotes unless attribute
+            value contains a double quote, in which case single quotes are
+            used.
+
+        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
+            values.
+
+            Defaults to ``False``.
+
+        :arg escape_rcdata: Whether to escape characters that need to be
+            escaped within normal elements within rcdata elements such as
+            style.
+
+            Defaults to ``False``.
+
+        :arg resolve_entities: Whether to resolve named character entities that
+            appear in the source tree. The XML predefined entities &lt; &gt;
+            &amp; &quot; &apos; are unaffected by this setting.
+
+            Defaults to ``True``.
+
+        :arg strip_whitespace: Whether to remove semantically meaningless
+            whitespace. (This compresses all whitespace to a single space
+            except within ``pre``.)
+
+            Defaults to ``False``.
+
+        :arg minimize_boolean_attributes: Shortens boolean attributes to give
+            just the attribute value, for example::
+
+              <input disabled="disabled">
+
+            becomes::
+
+              <input disabled>
+
+            Defaults to ``True``.
+
+        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
+            start tag of void elements (empty elements whose end tag is
+            forbidden). E.g. ``<hr/>``.
+
+            Defaults to ``False``.
+
+        :arg space_before_trailing_solidus: Places a space immediately before
+            the closing slash in a tag using a trailing solidus. E.g.
+            ``<hr />``. Requires ``use_trailing_solidus=True``.
+
+            Defaults to ``True``.
+
+        :arg sanitize: Strip all unsafe or unknown constructs from output.
+            See :py:class:`html5lib.filters.sanitizer.Filter`.
+
+            Defaults to ``False``.
+
+        :arg omit_optional_tags: Omit start/end tags that are optional.
+
+            Defaults to ``True``.
+
+        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
+
+            Defaults to ``False``.
+
+        """
+        unexpected_args = frozenset(kwargs) - frozenset(self.options)
+        if len(unexpected_args) > 0:
+            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
+        if 'quote_char' in kwargs:
+            self.use_best_quote_char = False
+        for attr in self.options:
+            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
+        self.errors = []
+        self.strict = False
+
+    def encode(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "htmlentityreplace")
+        else:
+            return string
+
+    def encodeStrict(self, string):
+        assert(isinstance(string, text_type))
+        if self.encoding:
+            return string.encode(self.encoding, "strict")
+        else:
+            return string
+
+    def serialize(self, treewalker, encoding=None):
+        # pylint:disable=too-many-nested-blocks
+        self.encoding = encoding
+        in_cdata = False
+        self.errors = []
+
+        if encoding and self.inject_meta_charset:
+            from .filters.inject_meta_charset import Filter
+            treewalker = Filter(treewalker, encoding)
+        # Alphabetical attributes is here under the assumption that none of
+        # the later filters add or change order of attributes; it needs to be
+        # before the sanitizer so escaped elements come out correctly
+        if self.alphabetical_attributes:
+            from .filters.alphabeticalattributes import Filter
+            treewalker = Filter(treewalker)
+        # WhitespaceFilter should be used before OptionalTagFilter
+        # for maximum efficiently of this latter filter
+        if self.strip_whitespace:
+            from .filters.whitespace import Filter
+            treewalker = Filter(treewalker)
+        if self.sanitize:
+            from .filters.sanitizer import Filter
+            treewalker = Filter(treewalker)
+        if self.omit_optional_tags:
+            from .filters.optionaltags import Filter
+            treewalker = Filter(treewalker)
+
+        for token in treewalker:
+            type = token["type"]
+            if type == "Doctype":
+                doctype = "<!DOCTYPE %s" % token["name"]
+
+                if token["publicId"]:
+                    doctype += ' PUBLIC "%s"' % token["publicId"]
+                elif token["systemId"]:
+                    doctype += " SYSTEM"
+                if token["systemId"]:
+                    if token["systemId"].find('"') >= 0:
+                        if token["systemId"].find("'") >= 0:
+                            self.serializeError("System identifier contains both single and double quote characters")
+                        quote_char = "'"
+                    else:
+                        quote_char = '"'
+                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
+
+                doctype += ">"
+                yield self.encodeStrict(doctype)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                if type == "SpaceCharacters" or in_cdata:
+                    if in_cdata and token["data"].find("</") >= 0:
+                        self.serializeError("Unexpected </ in CDATA")
+                    yield self.encode(token["data"])
+                else:
+                    yield self.encode(escape(token["data"]))
+
+            elif type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                yield self.encodeStrict("<%s" % name)
+                if name in rcdataElements and not self.escape_rcdata:
+                    in_cdata = True
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                for (_, attr_name), attr_value in token["data"].items():
+                    # TODO: Add namespace support here
+                    k = attr_name
+                    v = attr_value
+                    yield self.encodeStrict(' ')
+
+                    yield self.encodeStrict(k)
+                    if not self.minimize_boolean_attributes or \
+                        (k not in booleanAttributes.get(name, tuple()) and
+                         k not in booleanAttributes.get("", tuple())):
+                        yield self.encodeStrict("=")
+                        if self.quote_attr_values == "always" or len(v) == 0:
+                            quote_attr = True
+                        elif self.quote_attr_values == "spec":
+                            quote_attr = _quoteAttributeSpec.search(v) is not None
+                        elif self.quote_attr_values == "legacy":
+                            quote_attr = _quoteAttributeLegacy.search(v) is not None
+                        else:
+                            raise ValueError("quote_attr_values must be one of: "
+                                             "'always', 'spec', or 'legacy'")
+                        v = v.replace("&", "&amp;")
+                        if self.escape_lt_in_attrs:
+                            v = v.replace("<", "&lt;")
+                        if quote_attr:
+                            quote_char = self.quote_char
+                            if self.use_best_quote_char:
+                                if "'" in v and '"' not in v:
+                                    quote_char = '"'
+                                elif '"' in v and "'" not in v:
+                                    quote_char = "'"
+                            if quote_char == "'":
+                                v = v.replace("'", "&#39;")
+                            else:
+                                v = v.replace('"', "&quot;")
+                            yield self.encodeStrict(quote_char)
+                            yield self.encode(v)
+                            yield self.encodeStrict(quote_char)
+                        else:
+                            yield self.encode(v)
+                if name in voidElements and self.use_trailing_solidus:
+                    if self.space_before_trailing_solidus:
+                        yield self.encodeStrict(" /")
+                    else:
+                        yield self.encodeStrict("/")
+                yield self.encode(">")
+
+            elif type == "EndTag":
+                name = token["name"]
+                if name in rcdataElements:
+                    in_cdata = False
+                elif in_cdata:
+                    self.serializeError("Unexpected child element of a CDATA element")
+                yield self.encodeStrict("</%s>" % name)
+
+            elif type == "Comment":
+                data = token["data"]
+                if data.find("--") >= 0:
+                    self.serializeError("Comment contains --")
+                yield self.encodeStrict("<!--%s-->" % token["data"])
+
+            elif type == "Entity":
+                name = token["name"]
+                key = name + ";"
+                if key not in entities:
+                    self.serializeError("Entity %s not recognized" % name)
+                if self.resolve_entities and key not in xmlEntities:
+                    data = entities[key]
+                else:
+                    data = "&%s;" % name
+                yield self.encodeStrict(data)
+
+            else:
+                self.serializeError(token["data"])
+
+    def render(self, treewalker, encoding=None):
+        """Serializes the stream from the treewalker into a string
+
+        :arg treewalker: the treewalker to serialize
+
+        :arg encoding: the string encoding to use
+
+        :returns: the serialized tree
+
+        Example:
+
+        >>> from html5lib import parse, getTreeWalker
+        >>> from html5lib.serializer import HTMLSerializer
+        >>> token_stream = parse('<html><body>Hi!</body></html>')
+        >>> walker = getTreeWalker('etree')
+        >>> serializer = HTMLSerializer(omit_optional_tags=False)
+        >>> serializer.render(walker(token_stream))
+        '<html><head></head><body>Hi!</body></html>'
+
+        """
+        if encoding:
+            return b"".join(list(self.serialize(treewalker, encoding)))
+        else:
+            return "".join(list(self.serialize(treewalker)))
+
+    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
+        # XXX The idea is to make data mandatory.
+        self.errors.append(data)
+        if self.strict:
+            raise SerializeError
+
+
+class SerializeError(Exception):
+    """Error in serialized tree"""
+    pass
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treeadapters/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/__init__.py
new file mode 100644
index 0000000000..dfeb0ba5e1
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/__init__.py
@@ -0,0 +1,30 @@
+"""Tree adapters let you convert from one tree structure to another
+
+Example:
+
+.. code-block:: python
+
+   import html5lib
+   from html5lib.treeadapters import genshi
+
+   doc = '<html><body>Hi!</body></html>'
+   treebuilder = html5lib.getTreeBuilder('etree')
+   parser = html5lib.HTMLParser(tree=treebuilder)
+   tree = parser.parse(doc)
+   TreeWalker = html5lib.getTreeWalker('etree')
+
+   genshi_tree = genshi.to_genshi(TreeWalker(tree))
+
+"""
+from __future__ import absolute_import, division, unicode_literals
+
+from . import sax
+
+__all__ = ["sax"]
+
+try:
+    from . import genshi  # noqa
+except ImportError:
+    pass
+else:
+    __all__.append("genshi")
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treeadapters/genshi.py b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/genshi.py
new file mode 100644
index 0000000000..61d5fb6ac4
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/genshi.py
@@ -0,0 +1,54 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from genshi.core import QName, Attrs
+from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+
+
+def to_genshi(walker):
+    """Convert a tree to a genshi tree
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :returns: generator of genshi nodes
+
+    """
+    text = []
+    for token in walker:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            text.append(token["data"])
+        elif text:
+            yield TEXT, "".join(text), (None, -1, -1)
+            text = []
+
+        if type in ("StartTag", "EmptyTag"):
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+            attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
+                           for attr, value in token["data"].items()])
+            yield (START, (QName(name), attrs), (None, -1, -1))
+            if type == "EmptyTag":
+                type = "EndTag"
+
+        if type == "EndTag":
+            if token["namespace"]:
+                name = "{%s}%s" % (token["namespace"], token["name"])
+            else:
+                name = token["name"]
+
+            yield END, QName(name), (None, -1, -1)
+
+        elif type == "Comment":
+            yield COMMENT, token["data"], (None, -1, -1)
+
+        elif type == "Doctype":
+            yield DOCTYPE, (token["name"], token["publicId"],
+                            token["systemId"]), (None, -1, -1)
+
+        else:
+            pass  # FIXME: What to do?
+
+    if text:
+        yield TEXT, "".join(text), (None, -1, -1)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treeadapters/sax.py b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/sax.py
new file mode 100644
index 0000000000..f4ccea5a25
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treeadapters/sax.py
@@ -0,0 +1,50 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from xml.sax.xmlreader import AttributesNSImpl
+
+from ..constants import adjustForeignAttributes, unadjustForeignAttributes
+
+prefix_mapping = {}
+for prefix, localName, namespace in adjustForeignAttributes.values():
+    if prefix is not None:
+        prefix_mapping[prefix] = namespace
+
+
+def to_sax(walker, handler):
+    """Call SAX-like content handler based on treewalker walker
+
+    :arg walker: the treewalker to use to walk the tree to convert it
+
+    :arg handler: SAX handler to use
+
+    """
+    handler.startDocument()
+    for prefix, namespace in prefix_mapping.items():
+        handler.startPrefixMapping(prefix, namespace)
+
+    for token in walker:
+        type = token["type"]
+        if type == "Doctype":
+            continue
+        elif type in ("StartTag", "EmptyTag"):
+            attrs = AttributesNSImpl(token["data"],
+                                     unadjustForeignAttributes)
+            handler.startElementNS((token["namespace"], token["name"]),
+                                   token["name"],
+                                   attrs)
+            if type == "EmptyTag":
+                handler.endElementNS((token["namespace"], token["name"]),
+                                     token["name"])
+        elif type == "EndTag":
+            handler.endElementNS((token["namespace"], token["name"]),
+                                 token["name"])
+        elif type in ("Characters", "SpaceCharacters"):
+            handler.characters(token["data"])
+        elif type == "Comment":
+            pass
+        else:
+            assert False, "Unknown token type"
+
+    for prefix, namespace in prefix_mapping.items():
+        handler.endPrefixMapping(prefix)
+    handler.endDocument()
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treebuilders/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/__init__.py
new file mode 100644
index 0000000000..d44447eaf5
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/__init__.py
@@ -0,0 +1,88 @@
+"""A collection of modules for building different kinds of trees from HTML
+documents.
+
+To create a treebuilder for a new type of tree, you need to do
+implement several things:
+
+1. A set of classes for various types of elements: Document, Doctype, Comment,
+   Element. These must implement the interface of ``base.treebuilders.Node``
+   (although comment nodes have a different signature for their constructor,
+   see ``treebuilders.etree.Comment``) Textual content may also be implemented
+   as another node type, or not, as your tree implementation requires.
+
+2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
+   from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
+
+   * ``documentClass`` - the class to use for the bottommost node of a document
+   * ``elementClass`` - the class to use for HTML Elements
+   * ``commentClass`` - the class to use for comments
+   * ``doctypeClass`` - the class to use for doctypes
+
+   It also has one required method:
+
+   * ``getDocument`` - Returns the root node of the complete document tree
+
+3. If you wish to run the unit tests, you must also create a ``testSerializer``
+   method on your treebuilder which accepts a node and returns a string
+   containing Node and its children serialized according to the format used in
+   the unittests
+
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+
+from .._utils import default_etree
+
+treeBuilderCache = {}
+
+
+def getTreeBuilder(treeType, implementation=None, **kwargs):
+    """Get a TreeBuilder class for various types of trees with built-in support
+
+    :arg treeType: the name of the tree type required (case-insensitive). Supported
+        values are:
+
+        * "dom" - A generic builder for DOM implementations, defaulting to a
+          xml.dom.minidom based implementation.
+        * "etree" - A generic builder for tree implementations exposing an
+          ElementTree-like interface, defaulting to xml.etree.cElementTree if
+          available and xml.etree.ElementTree if not.
+        * "lxml" - A etree-based builder for lxml.etree, handling limitations
+          of lxml's implementation.
+
+    :arg implementation: (Currently applies to the "etree" and "dom" tree
+        types). A module implementing the tree type e.g. xml.etree.ElementTree
+        or xml.etree.cElementTree.
+
+    :arg kwargs: Any additional options to pass to the TreeBuilder when
+        creating it.
+
+    Example:
+
+    >>> from html5lib.treebuilders import getTreeBuilder
+    >>> builder = getTreeBuilder('etree')
+
+    """
+
+    treeType = treeType.lower()
+    if treeType not in treeBuilderCache:
+        if treeType == "dom":
+            from . import dom
+            # Come up with a sane default (pref. from the stdlib)
+            if implementation is None:
+                from xml.dom import minidom
+                implementation = minidom
+            # NEVER cache here, caching is done in the dom submodule
+            return dom.getDomModule(implementation, **kwargs).TreeBuilder
+        elif treeType == "lxml":
+            from . import etree_lxml
+            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
+        elif treeType == "etree":
+            from . import etree
+            if implementation is None:
+                implementation = default_etree
+            # NEVER cache here, caching is done in the etree submodule
+            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
+        else:
+            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
+    return treeBuilderCache.get(treeType)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treebuilders/base.py b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/base.py
new file mode 100644
index 0000000000..e4a3d710d9
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/base.py
@@ -0,0 +1,417 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+from ..constants import scopingElements, tableInsertModeElements, namespaces
+
+# The scope markers are inserted when entering object elements,
+# marquees, table cells, and table captions, and are used to prevent formatting
+# from "leaking" into tables, object elements, and marquees.
+Marker = None
+
+listElementsMap = {
+    None: (frozenset(scopingElements), False),
+    "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
+    "list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
+                                          (namespaces["html"], "ul")}), False),
+    "table": (frozenset([(namespaces["html"], "html"),
+                         (namespaces["html"], "table")]), False),
+    "select": (frozenset([(namespaces["html"], "optgroup"),
+                          (namespaces["html"], "option")]), True)
+}
+
+
+class Node(object):
+    """Represents an item in the tree"""
+    def __init__(self, name):
+        """Creates a Node
+
+        :arg name: The tag name associated with the node
+
+        """
+        # The tag name associated with the node
+        self.name = name
+        # The parent of the current node (or None for the document node)
+        self.parent = None
+        # The value of the current node (applies to text nodes and comments)
+        self.value = None
+        # A dict holding name -> value pairs for attributes of the node
+        self.attributes = {}
+        # A list of child nodes of the current node. This must include all
+        # elements but not necessarily other node types.
+        self.childNodes = []
+        # A list of miscellaneous flags that can be set on the node.
+        self._flags = []
+
+    def __str__(self):
+        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
+                                  for name, value in
+                                  self.attributes.items()])
+        if attributesStr:
+            return "<%s %s>" % (self.name, attributesStr)
+        else:
+            return "<%s>" % (self.name)
+
+    def __repr__(self):
+        return "<%s>" % (self.name)
+
+    def appendChild(self, node):
+        """Insert node as a child of the current node
+
+        :arg node: the node to insert
+
+        """
+        raise NotImplementedError
+
+    def insertText(self, data, insertBefore=None):
+        """Insert data as text in the current node, positioned before the
+        start of node insertBefore or to the end of the node's text.
+
+        :arg data: the data to insert
+
+        :arg insertBefore: True if you want to insert the text before the node
+            and False if you want to insert it after the node
+
+        """
+        raise NotImplementedError
+
+    def insertBefore(self, node, refNode):
+        """Insert node as a child of the current node, before refNode in the
+        list of child nodes. Raises ValueError if refNode is not a child of
+        the current node
+
+        :arg node: the node to insert
+
+        :arg refNode: the child node to insert the node before
+
+        """
+        raise NotImplementedError
+
+    def removeChild(self, node):
+        """Remove node from the children of the current node
+
+        :arg node: the child node to remove
+
+        """
+        raise NotImplementedError
+
+    def reparentChildren(self, newParent):
+        """Move all the children of the current node to newParent.
+        This is needed so that trees that don't store text as nodes move the
+        text in the correct way
+
+        :arg newParent: the node to move all this node's children to
+
+        """
+        # XXX - should this method be made more general?
+        for child in self.childNodes:
+            newParent.appendChild(child)
+        self.childNodes = []
+
+    def cloneNode(self):
+        """Return a shallow copy of the current node i.e. a node with the same
+        name and attributes but with no parent or child nodes
+        """
+        raise NotImplementedError
+
+    def hasContent(self):
+        """Return true if the node has children or text, false otherwise
+        """
+        raise NotImplementedError
+
+
+class ActiveFormattingElements(list):
+    def append(self, node):
+        equalCount = 0
+        if node != Marker:
+            for element in self[::-1]:
+                if element == Marker:
+                    break
+                if self.nodesEqual(element, node):
+                    equalCount += 1
+                if equalCount == 3:
+                    self.remove(element)
+                    break
+        list.append(self, node)
+
+    def nodesEqual(self, node1, node2):
+        if not node1.nameTuple == node2.nameTuple:
+            return False
+
+        if not node1.attributes == node2.attributes:
+            return False
+
+        return True
+
+
+class TreeBuilder(object):
+    """Base treebuilder implementation
+
+    * documentClass - the class to use for the bottommost node of a document
+    * elementClass - the class to use for HTML Elements
+    * commentClass - the class to use for comments
+    * doctypeClass - the class to use for doctypes
+
+    """
+    # pylint:disable=not-callable
+
+    # Document class
+    documentClass = None
+
+    # The class to use for creating a node
+    elementClass = None
+
+    # The class to use for creating comments
+    commentClass = None
+
+    # The class to use for creating doctypes
+    doctypeClass = None
+
+    # Fragment class
+    fragmentClass = None
+
+    def __init__(self, namespaceHTMLElements):
+        """Create a TreeBuilder
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        """
+        if namespaceHTMLElements:
+            self.defaultNamespace = "http://www.w3.org/1999/xhtml"
+        else:
+            self.defaultNamespace = None
+        self.reset()
+
+    def reset(self):
+        self.openElements = []
+        self.activeFormattingElements = ActiveFormattingElements()
+
+        # XXX - rename these to headElement, formElement
+        self.headPointer = None
+        self.formPointer = None
+
+        self.insertFromTable = False
+
+        self.document = self.documentClass()
+
+    def elementInScope(self, target, variant=None):
+
+        # If we pass a node in we match that. if we pass a string
+        # match any node with that name
+        exactNode = hasattr(target, "nameTuple")
+        if not exactNode:
+            if isinstance(target, text_type):
+                target = (namespaces["html"], target)
+            assert isinstance(target, tuple)
+
+        listElements, invert = listElementsMap[variant]
+
+        for node in reversed(self.openElements):
+            if exactNode and node == target:
+                return True
+            elif not exactNode and node.nameTuple == target:
+                return True
+            elif (invert ^ (node.nameTuple in listElements)):
+                return False
+
+        assert False  # We should never reach this point
+
+    def reconstructActiveFormattingElements(self):
+        # Within this algorithm the order of steps described in the
+        # specification is not quite the same as the order of steps in the
+        # code. It should still do the same though.
+
+        # Step 1: stop the algorithm when there's nothing to do.
+        if not self.activeFormattingElements:
+            return
+
+        # Step 2 and step 3: we start with the last element. So i is -1.
+        i = len(self.activeFormattingElements) - 1
+        entry = self.activeFormattingElements[i]
+        if entry == Marker or entry in self.openElements:
+            return
+
+        # Step 6
+        while entry != Marker and entry not in self.openElements:
+            if i == 0:
+                # This will be reset to 0 below
+                i = -1
+                break
+            i -= 1
+            # Step 5: let entry be one earlier in the list.
+            entry = self.activeFormattingElements[i]
+
+        while True:
+            # Step 7
+            i += 1
+
+            # Step 8
+            entry = self.activeFormattingElements[i]
+            clone = entry.cloneNode()  # Mainly to get a new copy of the attributes
+
+            # Step 9
+            element = self.insertElement({"type": "StartTag",
+                                          "name": clone.name,
+                                          "namespace": clone.namespace,
+                                          "data": clone.attributes})
+
+            # Step 10
+            self.activeFormattingElements[i] = element
+
+            # Step 11
+            if element == self.activeFormattingElements[-1]:
+                break
+
+    def clearActiveFormattingElements(self):
+        entry = self.activeFormattingElements.pop()
+        while self.activeFormattingElements and entry != Marker:
+            entry = self.activeFormattingElements.pop()
+
+    def elementInActiveFormattingElements(self, name):
+        """Check if an element exists between the end of the active
+        formatting elements and the last marker. If it does, return it, else
+        return false"""
+
+        for item in self.activeFormattingElements[::-1]:
+            # Check for Marker first because if it's a Marker it doesn't have a
+            # name attribute.
+            if item == Marker:
+                break
+            elif item.name == name:
+                return item
+        return False
+
+    def insertRoot(self, token):
+        element = self.createElement(token)
+        self.openElements.append(element)
+        self.document.appendChild(element)
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        doctype = self.doctypeClass(name, publicId, systemId)
+        self.document.appendChild(doctype)
+
+    def insertComment(self, token, parent=None):
+        if parent is None:
+            parent = self.openElements[-1]
+        parent.appendChild(self.commentClass(token["data"]))
+
+    def createElement(self, token):
+        """Create an element but don't insert it anywhere"""
+        name = token["name"]
+        namespace = token.get("namespace", self.defaultNamespace)
+        element = self.elementClass(name, namespace)
+        element.attributes = token["data"]
+        return element
+
+    def _getInsertFromTable(self):
+        return self._insertFromTable
+
+    def _setInsertFromTable(self, value):
+        """Switch the function used to insert an element from the
+        normal one to the misnested table one and back again"""
+        self._insertFromTable = value
+        if value:
+            self.insertElement = self.insertElementTable
+        else:
+            self.insertElement = self.insertElementNormal
+
+    insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
+
+    def insertElementNormal(self, token):
+        name = token["name"]
+        assert isinstance(name, text_type), "Element %s not unicode" % name
+        namespace = token.get("namespace", self.defaultNamespace)
+        element = self.elementClass(name, namespace)
+        element.attributes = token["data"]
+        self.openElements[-1].appendChild(element)
+        self.openElements.append(element)
+        return element
+
+    def insertElementTable(self, token):
+        """Create an element and insert it into the tree"""
+        element = self.createElement(token)
+        if self.openElements[-1].name not in tableInsertModeElements:
+            return self.insertElementNormal(token)
+        else:
+            # We should be in the InTable mode. This means we want to do
+            # special magic element rearranging
+            parent, insertBefore = self.getTableMisnestedNodePosition()
+            if insertBefore is None:
+                parent.appendChild(element)
+            else:
+                parent.insertBefore(element, insertBefore)
+            self.openElements.append(element)
+        return element
+
+    def insertText(self, data, parent=None):
+        """Insert text data."""
+        if parent is None:
+            parent = self.openElements[-1]
+
+        if (not self.insertFromTable or (self.insertFromTable and
+                                         self.openElements[-1].name
+                                         not in tableInsertModeElements)):
+            parent.insertText(data)
+        else:
+            # We should be in the InTable mode. This means we want to do
+            # special magic element rearranging
+            parent, insertBefore = self.getTableMisnestedNodePosition()
+            parent.insertText(data, insertBefore)
+
+    def getTableMisnestedNodePosition(self):
+        """Get the foster parent element, and sibling to insert before
+        (or None) when inserting a misnested table node"""
+        # The foster parent element is the one which comes before the most
+        # recently opened table element
+        # XXX - this is really inelegant
+        lastTable = None
+        fosterParent = None
+        insertBefore = None
+        for elm in self.openElements[::-1]:
+            if elm.name == "table":
+                lastTable = elm
+                break
+        if lastTable:
+            # XXX - we should really check that this parent is actually a
+            # node here
+            if lastTable.parent:
+                fosterParent = lastTable.parent
+                insertBefore = lastTable
+            else:
+                fosterParent = self.openElements[
+                    self.openElements.index(lastTable) - 1]
+        else:
+            fosterParent = self.openElements[0]
+        return fosterParent, insertBefore
+
+    def generateImpliedEndTags(self, exclude=None):
+        name = self.openElements[-1].name
+        # XXX td, th and tr are not actually needed
+        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and
+                name != exclude):
+            self.openElements.pop()
+            # XXX This is not entirely what the specification says. We should
+            # investigate it more closely.
+            self.generateImpliedEndTags(exclude)
+
+    def getDocument(self):
+        """Return the final tree"""
+        return self.document
+
+    def getFragment(self):
+        """Return the final fragment"""
+        # assert self.innerHTML
+        fragment = self.fragmentClass()
+        self.openElements[0].reparentChildren(fragment)
+        return fragment
+
+    def testSerializer(self, node):
+        """Serialize the subtree of node in the format required by unit tests
+
+        :arg node: the node from which to start serializing
+
+        """
+        raise NotImplementedError
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treebuilders/dom.py b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/dom.py
new file mode 100644
index 0000000000..d8b5300465
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/dom.py
@@ -0,0 +1,239 @@
+from __future__ import absolute_import, division, unicode_literals
+
+
+try:
+    from collections.abc import MutableMapping
+except ImportError:  # Python 2.7
+    from collections import MutableMapping
+from xml.dom import minidom, Node
+import weakref
+
+from . import base
+from .. import constants
+from ..constants import namespaces
+from .._utils import moduleFactoryFactory
+
+
+def getDomBuilder(DomImplementation):
+    Dom = DomImplementation
+
+    class AttrList(MutableMapping):
+        def __init__(self, element):
+            self.element = element
+
+        def __iter__(self):
+            return iter(self.element.attributes.keys())
+
+        def __setitem__(self, name, value):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                attr = self.element.ownerDocument.createAttribute(name)
+                attr.value = value
+                self.element.attributes[name] = attr
+
+        def __len__(self):
+            return len(self.element.attributes)
+
+        def items(self):
+            return list(self.element.attributes.items())
+
+        def values(self):
+            return list(self.element.attributes.values())
+
+        def __getitem__(self, name):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                return self.element.attributes[name].value
+
+        def __delitem__(self, name):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                del self.element.attributes[name]
+
+    class NodeBuilder(base.Node):
+        def __init__(self, element):
+            base.Node.__init__(self, element.nodeName)
+            self.element = element
+
+        namespace = property(lambda self: hasattr(self.element, "namespaceURI") and
+                             self.element.namespaceURI or None)
+
+        def appendChild(self, node):
+            node.parent = self
+            self.element.appendChild(node.element)
+
+        def insertText(self, data, insertBefore=None):
+            text = self.element.ownerDocument.createTextNode(data)
+            if insertBefore:
+                self.element.insertBefore(text, insertBefore.element)
+            else:
+                self.element.appendChild(text)
+
+        def insertBefore(self, node, refNode):
+            self.element.insertBefore(node.element, refNode.element)
+            node.parent = self
+
+        def removeChild(self, node):
+            if node.element.parentNode == self.element:
+                self.element.removeChild(node.element)
+            node.parent = None
+
+        def reparentChildren(self, newParent):
+            while self.element.hasChildNodes():
+                child = self.element.firstChild
+                self.element.removeChild(child)
+                newParent.element.appendChild(child)
+            self.childNodes = []
+
+        def getAttributes(self):
+            return AttrList(self.element)
+
+        def setAttributes(self, attributes):
+            if attributes:
+                for name, value in list(attributes.items()):
+                    if isinstance(name, tuple):
+                        if name[0] is not None:
+                            qualifiedName = (name[0] + ":" + name[1])
+                        else:
+                            qualifiedName = name[1]
+                        self.element.setAttributeNS(name[2], qualifiedName,
+                                                    value)
+                    else:
+                        self.element.setAttribute(
+                            name, value)
+        attributes = property(getAttributes, setAttributes)
+
+        def cloneNode(self):
+            return NodeBuilder(self.element.cloneNode(False))
+
+        def hasContent(self):
+            return self.element.hasChildNodes()
+
+        def getNameTuple(self):
+            if self.namespace is None:
+                return namespaces["html"], self.name
+            else:
+                return self.namespace, self.name
+
+        nameTuple = property(getNameTuple)
+
+    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
+        def documentClass(self):
+            self.dom = Dom.getDOMImplementation().createDocument(None, None, None)
+            return weakref.proxy(self)
+
+        def insertDoctype(self, token):
+            name = token["name"]
+            publicId = token["publicId"]
+            systemId = token["systemId"]
+
+            domimpl = Dom.getDOMImplementation()
+            doctype = domimpl.createDocumentType(name, publicId, systemId)
+            self.document.appendChild(NodeBuilder(doctype))
+            if Dom == minidom:
+                doctype.ownerDocument = self.dom
+
+        def elementClass(self, name, namespace=None):
+            if namespace is None and self.defaultNamespace is None:
+                node = self.dom.createElement(name)
+            else:
+                node = self.dom.createElementNS(namespace, name)
+
+            return NodeBuilder(node)
+
+        def commentClass(self, data):
+            return NodeBuilder(self.dom.createComment(data))
+
+        def fragmentClass(self):
+            return NodeBuilder(self.dom.createDocumentFragment())
+
+        def appendChild(self, node):
+            self.dom.appendChild(node.element)
+
+        def testSerializer(self, element):
+            return testSerializer(element)
+
+        def getDocument(self):
+            return self.dom
+
+        def getFragment(self):
+            return base.TreeBuilder.getFragment(self).element
+
+        def insertText(self, data, parent=None):
+            data = data
+            if parent != self:
+                base.TreeBuilder.insertText(self, data, parent)
+            else:
+                # HACK: allow text nodes as children of the document node
+                if hasattr(self.dom, '_child_node_types'):
+                    # pylint:disable=protected-access
+                    if Node.TEXT_NODE not in self.dom._child_node_types:
+                        self.dom._child_node_types = list(self.dom._child_node_types)
+                        self.dom._child_node_types.append(Node.TEXT_NODE)
+                self.dom.appendChild(self.dom.createTextNode(data))
+
+        implementation = DomImplementation
+        name = None
+
+    def testSerializer(element):
+        element.normalize()
+        rv = []
+
+        def serializeElement(element, indent=0):
+            if element.nodeType == Node.DOCUMENT_TYPE_NODE:
+                if element.name:
+                    if element.publicId or element.systemId:
+                        publicId = element.publicId or ""
+                        systemId = element.systemId or ""
+                        rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (' ' * indent, element.name, publicId, systemId))
+                    else:
+                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
+                else:
+                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
+            elif element.nodeType == Node.DOCUMENT_NODE:
+                rv.append("#document")
+            elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+                rv.append("#document-fragment")
+            elif element.nodeType == Node.COMMENT_NODE:
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
+            elif element.nodeType == Node.TEXT_NODE:
+                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
+            else:
+                if (hasattr(element, "namespaceURI") and
+                        element.namespaceURI is not None):
+                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
+                                      element.nodeName)
+                else:
+                    name = element.nodeName
+                rv.append("|%s<%s>" % (' ' * indent, name))
+                if element.hasAttributes():
+                    attributes = []
+                    for i in range(len(element.attributes)):
+                        attr = element.attributes.item(i)
+                        name = attr.nodeName
+                        value = attr.value
+                        ns = attr.namespaceURI
+                        if ns:
+                            name = "%s %s" % (constants.prefixes[ns], attr.localName)
+                        else:
+                            name = attr.nodeName
+                        attributes.append((name, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+            indent += 2
+            for child in element.childNodes:
+                serializeElement(child, indent)
+        serializeElement(element, 0)
+
+        return "\n".join(rv)
+
+    return locals()
+
+
+# The actual means to get a module!
+getDomModule = moduleFactoryFactory(getDomBuilder)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree.py b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree.py
new file mode 100644
index 0000000000..086bed4eed
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree.py
@@ -0,0 +1,343 @@
+from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
+
+from six import text_type
+
+import re
+
+from copy import copy
+
+from . import base
+from .. import _ihatexml
+from .. import constants
+from ..constants import namespaces
+from .._utils import moduleFactoryFactory
+
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+
+def getETreeBuilder(ElementTreeImplementation, fullTree=False):
+    ElementTree = ElementTreeImplementation
+    ElementTreeCommentType = ElementTree.Comment("asd").tag
+
+    class Element(base.Node):
+        def __init__(self, name, namespace=None):
+            self._name = name
+            self._namespace = namespace
+            self._element = ElementTree.Element(self._getETreeTag(name,
+                                                                  namespace))
+            if namespace is None:
+                self.nameTuple = namespaces["html"], self._name
+            else:
+                self.nameTuple = self._namespace, self._name
+            self.parent = None
+            self._childNodes = []
+            self._flags = []
+
+        def _getETreeTag(self, name, namespace):
+            if namespace is None:
+                etree_tag = name
+            else:
+                etree_tag = "{%s}%s" % (namespace, name)
+            return etree_tag
+
+        def _setName(self, name):
+            self._name = name
+            self._element.tag = self._getETreeTag(self._name, self._namespace)
+
+        def _getName(self):
+            return self._name
+
+        name = property(_getName, _setName)
+
+        def _setNamespace(self, namespace):
+            self._namespace = namespace
+            self._element.tag = self._getETreeTag(self._name, self._namespace)
+
+        def _getNamespace(self):
+            return self._namespace
+
+        namespace = property(_getNamespace, _setNamespace)
+
+        def _getAttributes(self):
+            return self._element.attrib
+
+        def _setAttributes(self, attributes):
+            el_attrib = self._element.attrib
+            el_attrib.clear()
+            if attributes:
+                # calling .items _always_ allocates, and the above truthy check is cheaper than the
+                # allocation on average
+                for key, value in attributes.items():
+                    if isinstance(key, tuple):
+                        name = "{%s}%s" % (key[2], key[1])
+                    else:
+                        name = key
+                    el_attrib[name] = value
+
+        attributes = property(_getAttributes, _setAttributes)
+
+        def _getChildNodes(self):
+            return self._childNodes
+
+        def _setChildNodes(self, value):
+            del self._element[:]
+            self._childNodes = []
+            for element in value:
+                self.insertChild(element)
+
+        childNodes = property(_getChildNodes, _setChildNodes)
+
+        def hasContent(self):
+            """Return true if the node has children or text"""
+            return bool(self._element.text or len(self._element))
+
+        def appendChild(self, node):
+            self._childNodes.append(node)
+            self._element.append(node._element)
+            node.parent = self
+
+        def insertBefore(self, node, refNode):
+            index = list(self._element).index(refNode._element)
+            self._element.insert(index, node._element)
+            node.parent = self
+
+        def removeChild(self, node):
+            self._childNodes.remove(node)
+            self._element.remove(node._element)
+            node.parent = None
+
+        def insertText(self, data, insertBefore=None):
+            if not(len(self._element)):
+                if not self._element.text:
+                    self._element.text = ""
+                self._element.text += data
+            elif insertBefore is None:
+                # Insert the text as the tail of the last child element
+                if not self._element[-1].tail:
+                    self._element[-1].tail = ""
+                self._element[-1].tail += data
+            else:
+                # Insert the text before the specified node
+                children = list(self._element)
+                index = children.index(insertBefore._element)
+                if index > 0:
+                    if not self._element[index - 1].tail:
+                        self._element[index - 1].tail = ""
+                    self._element[index - 1].tail += data
+                else:
+                    if not self._element.text:
+                        self._element.text = ""
+                    self._element.text += data
+
+        def cloneNode(self):
+            element = type(self)(self.name, self.namespace)
+            if self._element.attrib:
+                element._element.attrib = copy(self._element.attrib)
+            return element
+
+        def reparentChildren(self, newParent):
+            if newParent.childNodes:
+                newParent.childNodes[-1]._element.tail += self._element.text
+            else:
+                if not newParent._element.text:
+                    newParent._element.text = ""
+                if self._element.text is not None:
+                    newParent._element.text += self._element.text
+            self._element.text = ""
+            base.Node.reparentChildren(self, newParent)
+
+    class Comment(Element):
+        def __init__(self, data):
+            # Use the superclass constructor to set all properties on the
+            # wrapper element
+            self._element = ElementTree.Comment(data)
+            self.parent = None
+            self._childNodes = []
+            self._flags = []
+
+        def _getData(self):
+            return self._element.text
+
+        def _setData(self, value):
+            self._element.text = value
+
+        data = property(_getData, _setData)
+
+    class DocumentType(Element):
+        def __init__(self, name, publicId, systemId):
+            Element.__init__(self, "<!DOCTYPE>")
+            self._element.text = name
+            self.publicId = publicId
+            self.systemId = systemId
+
+        def _getPublicId(self):
+            return self._element.get("publicId", "")
+
+        def _setPublicId(self, value):
+            if value is not None:
+                self._element.set("publicId", value)
+
+        publicId = property(_getPublicId, _setPublicId)
+
+        def _getSystemId(self):
+            return self._element.get("systemId", "")
+
+        def _setSystemId(self, value):
+            if value is not None:
+                self._element.set("systemId", value)
+
+        systemId = property(_getSystemId, _setSystemId)
+
+    class Document(Element):
+        def __init__(self):
+            Element.__init__(self, "DOCUMENT_ROOT")
+
+    class DocumentFragment(Element):
+        def __init__(self):
+            Element.__init__(self, "DOCUMENT_FRAGMENT")
+
+    def testSerializer(element):
+        rv = []
+
+        def serializeElement(element, indent=0):
+            if not(hasattr(element, "tag")):
+                element = element.getroot()
+            if element.tag == "<!DOCTYPE>":
+                if element.get("publicId") or element.get("systemId"):
+                    publicId = element.get("publicId") or ""
+                    systemId = element.get("systemId") or ""
+                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
+                              (element.text, publicId, systemId))
+                else:
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
+            elif element.tag == "DOCUMENT_ROOT":
+                rv.append("#document")
+                if element.text is not None:
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
+            elif element.tag == ElementTreeCommentType:
+                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
+            else:
+                assert isinstance(element.tag, text_type), \
+                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
+                nsmatch = tag_regexp.match(element.tag)
+
+                if nsmatch is None:
+                    name = element.tag
+                else:
+                    ns, name = nsmatch.groups()
+                    prefix = constants.prefixes[ns]
+                    name = "%s %s" % (prefix, name)
+                rv.append("|%s<%s>" % (' ' * indent, name))
+
+                if hasattr(element, "attrib"):
+                    attributes = []
+                    for name, value in element.attrib.items():
+                        nsmatch = tag_regexp.match(name)
+                        if nsmatch is not None:
+                            ns, name = nsmatch.groups()
+                            prefix = constants.prefixes[ns]
+                            attr_string = "%s %s" % (prefix, name)
+                        else:
+                            attr_string = name
+                        attributes.append((attr_string, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                if element.text:
+                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+            indent += 2
+            for child in element:
+                serializeElement(child, indent)
+            if element.tail:
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+        serializeElement(element, 0)
+
+        return "\n".join(rv)
+
+    def tostring(element):  # pylint:disable=unused-variable
+        """Serialize an element and its child nodes to a string"""
+        rv = []
+        filter = _ihatexml.InfosetFilter()
+
+        def serializeElement(element):
+            if isinstance(element, ElementTree.ElementTree):
+                element = element.getroot()
+
+            if element.tag == "<!DOCTYPE>":
+                if element.get("publicId") or element.get("systemId"):
+                    publicId = element.get("publicId") or ""
+                    systemId = element.get("systemId") or ""
+                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
+                              (element.text, publicId, systemId))
+                else:
+                    rv.append("<!DOCTYPE %s>" % (element.text,))
+            elif element.tag == "DOCUMENT_ROOT":
+                if element.text is not None:
+                    rv.append(element.text)
+                if element.tail is not None:
+                    raise TypeError("Document node cannot have tail")
+                if hasattr(element, "attrib") and len(element.attrib):
+                    raise TypeError("Document node cannot have attributes")
+
+                for child in element:
+                    serializeElement(child)
+
+            elif element.tag == ElementTreeCommentType:
+                rv.append("<!--%s-->" % (element.text,))
+            else:
+                # This is assumed to be an ordinary element
+                if not element.attrib:
+                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
+                else:
+                    attr = " ".join(["%s=\"%s\"" % (
+                        filter.fromXmlName(name), value)
+                        for name, value in element.attrib.items()])
+                    rv.append("<%s %s>" % (element.tag, attr))
+                if element.text:
+                    rv.append(element.text)
+
+                for child in element:
+                    serializeElement(child)
+
+                rv.append("</%s>" % (element.tag,))
+
+            if element.tail:
+                rv.append(element.tail)
+
+        serializeElement(element)
+
+        return "".join(rv)
+
+    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
+        documentClass = Document
+        doctypeClass = DocumentType
+        elementClass = Element
+        commentClass = Comment
+        fragmentClass = DocumentFragment
+        implementation = ElementTreeImplementation
+
+        def testSerializer(self, element):
+            return testSerializer(element)
+
+        def getDocument(self):
+            if fullTree:
+                return self.document._element
+            else:
+                if self.defaultNamespace is not None:
+                    return self.document._element.find(
+                        "{%s}html" % self.defaultNamespace)
+                else:
+                    return self.document._element.find("html")
+
+        def getFragment(self):
+            return base.TreeBuilder.getFragment(self)._element
+
+    return locals()
+
+
+getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree_lxml.py b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree_lxml.py
new file mode 100644
index 0000000000..e73de61a85
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treebuilders/etree_lxml.py
@@ -0,0 +1,392 @@
+"""Module for supporting the lxml.etree library. The idea here is to use as much
+of the native library as possible, without using fragile hacks like custom element
+names that break between releases. The downside of this is that we cannot represent
+all possible trees; specifically the following are known to cause problems:
+
+Text or comments as siblings of the root element
+Docypes with no name
+
+When any of these things occur, we emit a DataLossWarning
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+# pylint:disable=protected-access
+
+import warnings
+import re
+import sys
+
+try:
+    from collections.abc import MutableMapping
+except ImportError:
+    from collections import MutableMapping
+
+from . import base
+from ..constants import DataLossWarning
+from .. import constants
+from . import etree as etree_builders
+from .. import _ihatexml
+
+import lxml.etree as etree
+from six import PY3, binary_type
+
+
+fullTree = True
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+comment_type = etree.Comment("asd").tag
+
+
+class DocumentType(object):
+    def __init__(self, name, publicId, systemId):
+        self.name = name
+        self.publicId = publicId
+        self.systemId = systemId
+
+
+class Document(object):
+    def __init__(self):
+        self._elementTree = None
+        self._childNodes = []
+
+    def appendChild(self, element):
+        last = self._elementTree.getroot()
+        for last in self._elementTree.getroot().itersiblings():
+            pass
+
+        last.addnext(element._element)
+
+    def _getChildNodes(self):
+        return self._childNodes
+
+    childNodes = property(_getChildNodes)
+
+
+def testSerializer(element):
+    rv = []
+    infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
+
+    def serializeElement(element, indent=0):
+        if not hasattr(element, "tag"):
+            if hasattr(element, "getroot"):
+                # Full tree case
+                rv.append("#document")
+                if element.docinfo.internalDTD:
+                    if not (element.docinfo.public_id or
+                            element.docinfo.system_url):
+                        dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
+                    else:
+                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
+                            element.docinfo.root_name,
+                            element.docinfo.public_id,
+                            element.docinfo.system_url)
+                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
+                next_element = element.getroot()
+                while next_element.getprevious() is not None:
+                    next_element = next_element.getprevious()
+                while next_element is not None:
+                    serializeElement(next_element, indent + 2)
+                    next_element = next_element.getnext()
+            elif isinstance(element, str) or isinstance(element, bytes):
+                # Text in a fragment
+                assert isinstance(element, str) or sys.version_info[0] == 2
+                rv.append("|%s\"%s\"" % (' ' * indent, element))
+            else:
+                # Fragment case
+                rv.append("#document-fragment")
+                for next_element in element:
+                    serializeElement(next_element, indent + 2)
+        elif element.tag == comment_type:
+            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
+            if hasattr(element, "tail") and element.tail:
+                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
+        else:
+            assert isinstance(element, etree._Element)
+            nsmatch = etree_builders.tag_regexp.match(element.tag)
+            if nsmatch is not None:
+                ns = nsmatch.group(1)
+                tag = nsmatch.group(2)
+                prefix = constants.prefixes[ns]
+                rv.append("|%s<%s %s>" % (' ' * indent, prefix,
+                                          infosetFilter.fromXmlName(tag)))
+            else:
+                rv.append("|%s<%s>" % (' ' * indent,
+                                       infosetFilter.fromXmlName(element.tag)))
+
+            if hasattr(element, "attrib"):
+                attributes = []
+                for name, value in element.attrib.items():
+                    nsmatch = tag_regexp.match(name)
+                    if nsmatch is not None:
+                        ns, name = nsmatch.groups()
+                        name = infosetFilter.fromXmlName(name)
+                        prefix = constants.prefixes[ns]
+                        attr_string = "%s %s" % (prefix, name)
+                    else:
+                        attr_string = infosetFilter.fromXmlName(name)
+                    attributes.append((attr_string, value))
+
+                for name, value in sorted(attributes):
+                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+
+            if element.text:
+                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+            indent += 2
+            for child in element:
+                serializeElement(child, indent)
+            if hasattr(element, "tail") and element.tail:
+                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+    serializeElement(element, 0)
+
+    return "\n".join(rv)
+
+
+def tostring(element):
+    """Serialize an element and its child nodes to a string"""
+    rv = []
+
+    def serializeElement(element):
+        if not hasattr(element, "tag"):
+            if element.docinfo.internalDTD:
+                if element.docinfo.doctype:
+                    dtd_str = element.docinfo.doctype
+                else:
+                    dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
+                rv.append(dtd_str)
+            serializeElement(element.getroot())
+
+        elif element.tag == comment_type:
+            rv.append("<!--%s-->" % (element.text,))
+
+        else:
+            # This is assumed to be an ordinary element
+            if not element.attrib:
+                rv.append("<%s>" % (element.tag,))
+            else:
+                attr = " ".join(["%s=\"%s\"" % (name, value)
+                                 for name, value in element.attrib.items()])
+                rv.append("<%s %s>" % (element.tag, attr))
+            if element.text:
+                rv.append(element.text)
+
+            for child in element:
+                serializeElement(child)
+
+            rv.append("</%s>" % (element.tag,))
+
+        if hasattr(element, "tail") and element.tail:
+            rv.append(element.tail)
+
+    serializeElement(element)
+
+    return "".join(rv)
+
+
+class TreeBuilder(base.TreeBuilder):
+    documentClass = Document
+    doctypeClass = DocumentType
+    elementClass = None
+    commentClass = None
+    fragmentClass = Document
+    implementation = etree
+
+    def __init__(self, namespaceHTMLElements, fullTree=False):
+        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
+        infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
+        self.namespaceHTMLElements = namespaceHTMLElements
+
+        class Attributes(MutableMapping):
+            def __init__(self, element):
+                self._element = element
+
+            def _coerceKey(self, key):
+                if isinstance(key, tuple):
+                    name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
+                else:
+                    name = infosetFilter.coerceAttribute(key)
+                return name
+
+            def __getitem__(self, key):
+                value = self._element._element.attrib[self._coerceKey(key)]
+                if not PY3 and isinstance(value, binary_type):
+                    value = value.decode("ascii")
+                return value
+
+            def __setitem__(self, key, value):
+                self._element._element.attrib[self._coerceKey(key)] = value
+
+            def __delitem__(self, key):
+                del self._element._element.attrib[self._coerceKey(key)]
+
+            def __iter__(self):
+                return iter(self._element._element.attrib)
+
+            def __len__(self):
+                return len(self._element._element.attrib)
+
+            def clear(self):
+                return self._element._element.attrib.clear()
+
+        class Element(builder.Element):
+            def __init__(self, name, namespace):
+                name = infosetFilter.coerceElement(name)
+                builder.Element.__init__(self, name, namespace=namespace)
+                self._attributes = Attributes(self)
+
+            def _setName(self, name):
+                self._name = infosetFilter.coerceElement(name)
+                self._element.tag = self._getETreeTag(
+                    self._name, self._namespace)
+
+            def _getName(self):
+                return infosetFilter.fromXmlName(self._name)
+
+            name = property(_getName, _setName)
+
+            def _getAttributes(self):
+                return self._attributes
+
+            def _setAttributes(self, value):
+                attributes = self.attributes
+                attributes.clear()
+                attributes.update(value)
+
+            attributes = property(_getAttributes, _setAttributes)
+
+            def insertText(self, data, insertBefore=None):
+                data = infosetFilter.coerceCharacters(data)
+                builder.Element.insertText(self, data, insertBefore)
+
+            def cloneNode(self):
+                element = type(self)(self.name, self.namespace)
+                if self._element.attrib:
+                    element._element.attrib.update(self._element.attrib)
+                return element
+
+        class Comment(builder.Comment):
+            def __init__(self, data):
+                data = infosetFilter.coerceComment(data)
+                builder.Comment.__init__(self, data)
+
+            def _setData(self, data):
+                data = infosetFilter.coerceComment(data)
+                self._element.text = data
+
+            def _getData(self):
+                return self._element.text
+
+            data = property(_getData, _setData)
+
+        self.elementClass = Element
+        self.commentClass = Comment
+        # self.fragmentClass = builder.DocumentFragment
+        base.TreeBuilder.__init__(self, namespaceHTMLElements)
+
+    def reset(self):
+        base.TreeBuilder.reset(self)
+        self.insertComment = self.insertCommentInitial
+        self.initial_comments = []
+        self.doctype = None
+
+    def testSerializer(self, element):
+        return testSerializer(element)
+
+    def getDocument(self):
+        if fullTree:
+            return self.document._elementTree
+        else:
+            return self.document._elementTree.getroot()
+
+    def getFragment(self):
+        fragment = []
+        element = self.openElements[0]._element
+        if element.text:
+            fragment.append(element.text)
+        fragment.extend(list(element))
+        if element.tail:
+            fragment.append(element.tail)
+        return fragment
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        if not name:
+            warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
+            self.doctype = None
+        else:
+            coercedName = self.infosetFilter.coerceElement(name)
+            if coercedName != name:
+                warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
+
+            doctype = self.doctypeClass(coercedName, publicId, systemId)
+            self.doctype = doctype
+
+    def insertCommentInitial(self, data, parent=None):
+        assert parent is None or parent is self.document
+        assert self.document._elementTree is None
+        self.initial_comments.append(data)
+
+    def insertCommentMain(self, data, parent=None):
+        if (parent == self.document and
+                self.document._elementTree.getroot()[-1].tag == comment_type):
+            warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
+        super(TreeBuilder, self).insertComment(data, parent)
+
+    def insertRoot(self, token):
+        # Because of the way libxml2 works, it doesn't seem to be possible to
+        # alter information like the doctype after the tree has been parsed.
+        # Therefore we need to use the built-in parser to create our initial
+        # tree, after which we can add elements like normal
+        docStr = ""
+        if self.doctype:
+            assert self.doctype.name
+            docStr += "<!DOCTYPE %s" % self.doctype.name
+            if (self.doctype.publicId is not None or
+                    self.doctype.systemId is not None):
+                docStr += (' PUBLIC "%s" ' %
+                           (self.infosetFilter.coercePubid(self.doctype.publicId or "")))
+                if self.doctype.systemId:
+                    sysid = self.doctype.systemId
+                    if sysid.find("'") >= 0 and sysid.find('"') >= 0:
+                        warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
+                        sysid = sysid.replace("'", 'U00027')
+                    if sysid.find("'") >= 0:
+                        docStr += '"%s"' % sysid
+                    else:
+                        docStr += "'%s'" % sysid
+                else:
+                    docStr += "''"
+            docStr += ">"
+            if self.doctype.name != token["name"]:
+                warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
+        docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
+        root = etree.fromstring(docStr)
+
+        # Append the initial comments:
+        for comment_token in self.initial_comments:
+            comment = self.commentClass(comment_token["data"])
+            root.addprevious(comment._element)
+
+        # Create the root document and add the ElementTree to it
+        self.document = self.documentClass()
+        self.document._elementTree = root.getroottree()
+
+        # Give the root element the right name
+        name = token["name"]
+        namespace = token.get("namespace", self.defaultNamespace)
+        if namespace is None:
+            etree_tag = name
+        else:
+            etree_tag = "{%s}%s" % (namespace, name)
+        root.tag = etree_tag
+
+        # Add the root element to the internal child/open data structures
+        root_element = self.elementClass(name, namespace)
+        root_element._element = root
+        self.document._childNodes.append(root_element)
+        self.openElements.append(root_element)
+
+        # Reset to the default insert comment function
+        self.insertComment = self.insertCommentMain
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/__init__.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/__init__.py
new file mode 100644
index 0000000000..b2d3aac313
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/__init__.py
@@ -0,0 +1,154 @@
+"""A collection of modules for iterating through different kinds of
+tree, generating tokens identical to those produced by the tokenizer
+module.
+
+To create a tree walker for a new type of tree, you need to
+implement a tree walker object (called TreeWalker by convention) that
+implements a 'serialize' method which takes a tree as sole argument and
+returns an iterator which generates tokens.
+"""
+
+from __future__ import absolute_import, division, unicode_literals
+
+from .. import constants
+from .._utils import default_etree
+
+__all__ = ["getTreeWalker", "pprint"]
+
+treeWalkerCache = {}
+
+
+def getTreeWalker(treeType, implementation=None, **kwargs):
+    """Get a TreeWalker class for various types of tree with built-in support
+
+    :arg str treeType: the name of the tree type required (case-insensitive).
+        Supported values are:
+
+        * "dom": The xml.dom.minidom DOM implementation
+        * "etree": A generic walker for tree implementations exposing an
+          elementtree-like interface (known to work with ElementTree,
+          cElementTree and lxml.etree).
+        * "lxml": Optimized walker for lxml.etree
+        * "genshi": a Genshi stream
+
+    :arg implementation: A module implementing the tree type e.g.
+        xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
+        tree type only).
+
+    :arg kwargs: keyword arguments passed to the etree walker--for other
+        walkers, this has no effect
+
+    :returns: a TreeWalker class
+
+    """
+
+    treeType = treeType.lower()
+    if treeType not in treeWalkerCache:
+        if treeType == "dom":
+            from . import dom
+            treeWalkerCache[treeType] = dom.TreeWalker
+        elif treeType == "genshi":
+            from . import genshi
+            treeWalkerCache[treeType] = genshi.TreeWalker
+        elif treeType == "lxml":
+            from . import etree_lxml
+            treeWalkerCache[treeType] = etree_lxml.TreeWalker
+        elif treeType == "etree":
+            from . import etree
+            if implementation is None:
+                implementation = default_etree
+            # XXX: NEVER cache here, caching is done in the etree submodule
+            return etree.getETreeModule(implementation, **kwargs).TreeWalker
+    return treeWalkerCache.get(treeType)
+
+
+def concatenateCharacterTokens(tokens):
+    pendingCharacters = []
+    for token in tokens:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            pendingCharacters.append(token["data"])
+        else:
+            if pendingCharacters:
+                yield {"type": "Characters", "data": "".join(pendingCharacters)}
+                pendingCharacters = []
+            yield token
+    if pendingCharacters:
+        yield {"type": "Characters", "data": "".join(pendingCharacters)}
+
+
+def pprint(walker):
+    """Pretty printer for tree walkers
+
+    Takes a TreeWalker instance and pretty prints the output of walking the tree.
+
+    :arg walker: a TreeWalker instance
+
+    """
+    output = []
+    indent = 0
+    for token in concatenateCharacterTokens(walker):
+        type = token["type"]
+        if type in ("StartTag", "EmptyTag"):
+            # tag name
+            if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
+                if token["namespace"] in constants.prefixes:
+                    ns = constants.prefixes[token["namespace"]]
+                else:
+                    ns = token["namespace"]
+                name = "%s %s" % (ns, token["name"])
+            else:
+                name = token["name"]
+            output.append("%s<%s>" % (" " * indent, name))
+            indent += 2
+            # attributes (sorted for consistent ordering)
+            attrs = token["data"]
+            for (namespace, localname), value in sorted(attrs.items()):
+                if namespace:
+                    if namespace in constants.prefixes:
+                        ns = constants.prefixes[namespace]
+                    else:
+                        ns = namespace
+                    name = "%s %s" % (ns, localname)
+                else:
+                    name = localname
+                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
+            # self-closing
+            if type == "EmptyTag":
+                indent -= 2
+
+        elif type == "EndTag":
+            indent -= 2
+
+        elif type == "Comment":
+            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
+
+        elif type == "Doctype":
+            if token["name"]:
+                if token["publicId"]:
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["publicId"],
+                                   token["systemId"] if token["systemId"] else ""))
+                elif token["systemId"]:
+                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
+                                  (" " * indent,
+                                   token["name"],
+                                   token["systemId"]))
+                else:
+                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
+                                                       token["name"]))
+            else:
+                output.append("%s<!DOCTYPE >" % (" " * indent,))
+
+        elif type == "Characters":
+            output.append("%s\"%s\"" % (" " * indent, token["data"]))
+
+        elif type == "SpaceCharacters":
+            assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
+
+        else:
+            raise ValueError("Unknown token type, %s" % type)
+
+    return "\n".join(output)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/base.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/base.py
new file mode 100644
index 0000000000..80c474c4e9
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/base.py
@@ -0,0 +1,252 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from xml.dom import Node
+from ..constants import namespaces, voidElements, spaceCharacters
+
+__all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN",
+           "TreeWalker", "NonRecursiveTreeWalker"]
+
+DOCUMENT = Node.DOCUMENT_NODE
+DOCTYPE = Node.DOCUMENT_TYPE_NODE
+TEXT = Node.TEXT_NODE
+ELEMENT = Node.ELEMENT_NODE
+COMMENT = Node.COMMENT_NODE
+ENTITY = Node.ENTITY_NODE
+UNKNOWN = "<#UNKNOWN#>"
+
+spaceCharacters = "".join(spaceCharacters)
+
+
+class TreeWalker(object):
+    """Walks a tree yielding tokens
+
+    Tokens are dicts that all have a ``type`` field specifying the type of the
+    token.
+
+    """
+    def __init__(self, tree):
+        """Creates a TreeWalker
+
+        :arg tree: the tree to walk
+
+        """
+        self.tree = tree
+
+    def __iter__(self):
+        raise NotImplementedError
+
+    def error(self, msg):
+        """Generates an error token with the given message
+
+        :arg msg: the error message
+
+        :returns: SerializeError token
+
+        """
+        return {"type": "SerializeError", "data": msg}
+
+    def emptyTag(self, namespace, name, attrs, hasChildren=False):
+        """Generates an EmptyTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :arg hasChildren: whether or not to yield a SerializationError because
+            this tag shouldn't have children
+
+        :returns: EmptyTag token
+
+        """
+        yield {"type": "EmptyTag", "name": name,
+               "namespace": namespace,
+               "data": attrs}
+        if hasChildren:
+            yield self.error("Void element has children")
+
+    def startTag(self, namespace, name, attrs):
+        """Generates a StartTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :arg attrs: the attributes of the element as a dict
+
+        :returns: StartTag token
+
+        """
+        return {"type": "StartTag",
+                "name": name,
+                "namespace": namespace,
+                "data": attrs}
+
+    def endTag(self, namespace, name):
+        """Generates an EndTag token
+
+        :arg namespace: the namespace of the token--can be ``None``
+
+        :arg name: the name of the element
+
+        :returns: EndTag token
+
+        """
+        return {"type": "EndTag",
+                "name": name,
+                "namespace": namespace}
+
+    def text(self, data):
+        """Generates SpaceCharacters and Characters tokens
+
+        Depending on what's in the data, this generates one or more
+        ``SpaceCharacters`` and ``Characters`` tokens.
+
+        For example:
+
+            >>> from html5lib.treewalkers.base import TreeWalker
+            >>> # Give it an empty tree just so it instantiates
+            >>> walker = TreeWalker([])
+            >>> list(walker.text(''))
+            []
+            >>> list(walker.text('  '))
+            [{u'data': '  ', u'type': u'SpaceCharacters'}]
+            >>> list(walker.text(' abc '))  # doctest: +NORMALIZE_WHITESPACE
+            [{u'data': ' ', u'type': u'SpaceCharacters'},
+            {u'data': u'abc', u'type': u'Characters'},
+            {u'data': u' ', u'type': u'SpaceCharacters'}]
+
+        :arg data: the text data
+
+        :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
+
+        """
+        data = data
+        middle = data.lstrip(spaceCharacters)
+        left = data[:len(data) - len(middle)]
+        if left:
+            yield {"type": "SpaceCharacters", "data": left}
+        data = middle
+        middle = data.rstrip(spaceCharacters)
+        right = data[len(middle):]
+        if middle:
+            yield {"type": "Characters", "data": middle}
+        if right:
+            yield {"type": "SpaceCharacters", "data": right}
+
+    def comment(self, data):
+        """Generates a Comment token
+
+        :arg data: the comment
+
+        :returns: Comment token
+
+        """
+        return {"type": "Comment", "data": data}
+
+    def doctype(self, name, publicId=None, systemId=None):
+        """Generates a Doctype token
+
+        :arg name:
+
+        :arg publicId:
+
+        :arg systemId:
+
+        :returns: the Doctype token
+
+        """
+        return {"type": "Doctype",
+                "name": name,
+                "publicId": publicId,
+                "systemId": systemId}
+
+    def entity(self, name):
+        """Generates an Entity token
+
+        :arg name: the entity name
+
+        :returns: an Entity token
+
+        """
+        return {"type": "Entity", "name": name}
+
+    def unknown(self, nodeType):
+        """Handles unknown node types"""
+        return self.error("Unknown node type: " + nodeType)
+
+
+class NonRecursiveTreeWalker(TreeWalker):
+    def getNodeDetails(self, node):
+        raise NotImplementedError
+
+    def getFirstChild(self, node):
+        raise NotImplementedError
+
+    def getNextSibling(self, node):
+        raise NotImplementedError
+
+    def getParentNode(self, node):
+        raise NotImplementedError
+
+    def __iter__(self):
+        currentNode = self.tree
+        while currentNode is not None:
+            details = self.getNodeDetails(currentNode)
+            type, details = details[0], details[1:]
+            hasChildren = False
+
+            if type == DOCTYPE:
+                yield self.doctype(*details)
+
+            elif type == TEXT:
+                for token in self.text(*details):
+                    yield token
+
+            elif type == ELEMENT:
+                namespace, name, attributes, hasChildren = details
+                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
+                    for token in self.emptyTag(namespace, name, attributes,
+                                               hasChildren):
+                        yield token
+                    hasChildren = False
+                else:
+                    yield self.startTag(namespace, name, attributes)
+
+            elif type == COMMENT:
+                yield self.comment(details[0])
+
+            elif type == ENTITY:
+                yield self.entity(details[0])
+
+            elif type == DOCUMENT:
+                hasChildren = True
+
+            else:
+                yield self.unknown(details[0])
+
+            if hasChildren:
+                firstChild = self.getFirstChild(currentNode)
+            else:
+                firstChild = None
+
+            if firstChild is not None:
+                currentNode = firstChild
+            else:
+                while currentNode is not None:
+                    details = self.getNodeDetails(currentNode)
+                    type, details = details[0], details[1:]
+                    if type == ELEMENT:
+                        namespace, name, attributes, hasChildren = details
+                        if (namespace and namespace != namespaces["html"]) or name not in voidElements:
+                            yield self.endTag(namespace, name)
+                    if self.tree is currentNode:
+                        currentNode = None
+                        break
+                    nextSibling = self.getNextSibling(currentNode)
+                    if nextSibling is not None:
+                        currentNode = nextSibling
+                        break
+                    else:
+                        currentNode = self.getParentNode(currentNode)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/dom.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/dom.py
new file mode 100644
index 0000000000..b0c89b001f
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/dom.py
@@ -0,0 +1,43 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from xml.dom import Node
+
+from . import base
+
+
+class TreeWalker(base.NonRecursiveTreeWalker):
+    def getNodeDetails(self, node):
+        if node.nodeType == Node.DOCUMENT_TYPE_NODE:
+            return base.DOCTYPE, node.name, node.publicId, node.systemId
+
+        elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+            return base.TEXT, node.nodeValue
+
+        elif node.nodeType == Node.ELEMENT_NODE:
+            attrs = {}
+            for attr in list(node.attributes.keys()):
+                attr = node.getAttributeNode(attr)
+                if attr.namespaceURI:
+                    attrs[(attr.namespaceURI, attr.localName)] = attr.value
+                else:
+                    attrs[(None, attr.name)] = attr.value
+            return (base.ELEMENT, node.namespaceURI, node.nodeName,
+                    attrs, node.hasChildNodes())
+
+        elif node.nodeType == Node.COMMENT_NODE:
+            return base.COMMENT, node.nodeValue
+
+        elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
+            return (base.DOCUMENT,)
+
+        else:
+            return base.UNKNOWN, node.nodeType
+
+    def getFirstChild(self, node):
+        return node.firstChild
+
+    def getNextSibling(self, node):
+        return node.nextSibling
+
+    def getParentNode(self, node):
+        return node.parentNode
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree.py
new file mode 100644
index 0000000000..44653372d6
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree.py
@@ -0,0 +1,131 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from collections import OrderedDict
+import re
+
+from six import string_types
+
+from . import base
+from .._utils import moduleFactoryFactory
+
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+
+def getETreeBuilder(ElementTreeImplementation):
+    ElementTree = ElementTreeImplementation
+    ElementTreeCommentType = ElementTree.Comment("asd").tag
+
+    class TreeWalker(base.NonRecursiveTreeWalker):  # pylint:disable=unused-variable
+        """Given the particular ElementTree representation, this implementation,
+        to avoid using recursion, returns "nodes" as tuples with the following
+        content:
+
+        1. The current element
+
+        2. The index of the element relative to its parent
+
+        3. A stack of ancestor elements
+
+        4. A flag "text", "tail" or None to indicate if the current node is a
+           text node; either the text or tail of the current element (1)
+        """
+        def getNodeDetails(self, node):
+            if isinstance(node, tuple):  # It might be the root Element
+                elt, _, _, flag = node
+                if flag in ("text", "tail"):
+                    return base.TEXT, getattr(elt, flag)
+                else:
+                    node = elt
+
+            if not(hasattr(node, "tag")):
+                node = node.getroot()
+
+            if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
+                return (base.DOCUMENT,)
+
+            elif node.tag == "<!DOCTYPE>":
+                return (base.DOCTYPE, node.text,
+                        node.get("publicId"), node.get("systemId"))
+
+            elif node.tag == ElementTreeCommentType:
+                return base.COMMENT, node.text
+
+            else:
+                assert isinstance(node.tag, string_types), type(node.tag)
+                # This is assumed to be an ordinary element
+                match = tag_regexp.match(node.tag)
+                if match:
+                    namespace, tag = match.groups()
+                else:
+                    namespace = None
+                    tag = node.tag
+                attrs = OrderedDict()
+                for name, value in list(node.attrib.items()):
+                    match = tag_regexp.match(name)
+                    if match:
+                        attrs[(match.group(1), match.group(2))] = value
+                    else:
+                        attrs[(None, name)] = value
+                return (base.ELEMENT, namespace, tag,
+                        attrs, len(node) or node.text)
+
+        def getFirstChild(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                element, key, parents, flag = node, None, [], None
+
+            if flag in ("text", "tail"):
+                return None
+            else:
+                if element.text:
+                    return element, key, parents, "text"
+                elif len(element):
+                    parents.append(element)
+                    return element[0], 0, parents, None
+                else:
+                    return None
+
+        def getNextSibling(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                return None
+
+            if flag == "text":
+                if len(element):
+                    parents.append(element)
+                    return element[0], 0, parents, None
+                else:
+                    return None
+            else:
+                if element.tail and flag != "tail":
+                    return element, key, parents, "tail"
+                elif key < len(parents[-1]) - 1:
+                    return parents[-1][key + 1], key + 1, parents, None
+                else:
+                    return None
+
+        def getParentNode(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                return None
+
+            if flag == "text":
+                if not parents:
+                    return element
+                else:
+                    return element, key, parents, None
+            else:
+                parent = parents.pop()
+                if not parents:
+                    return parent
+                else:
+                    assert list(parents[-1]).count(parent) == 1
+                    return parent, list(parents[-1]).index(parent), parents, None
+
+    return locals()
+
+
+getETreeModule = moduleFactoryFactory(getETreeBuilder)
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree_lxml.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree_lxml.py
new file mode 100644
index 0000000000..a614ac5b3f
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/etree_lxml.py
@@ -0,0 +1,215 @@
+from __future__ import absolute_import, division, unicode_literals
+from six import text_type
+
+from collections import OrderedDict
+
+from lxml import etree
+from ..treebuilders.etree import tag_regexp
+
+from . import base
+
+from .. import _ihatexml
+
+
+def ensure_str(s):
+    if s is None:
+        return None
+    elif isinstance(s, text_type):
+        return s
+    else:
+        return s.decode("ascii", "strict")
+
+
+class Root(object):
+    def __init__(self, et):
+        self.elementtree = et
+        self.children = []
+
+        try:
+            if et.docinfo.internalDTD:
+                self.children.append(Doctype(self,
+                                             ensure_str(et.docinfo.root_name),
+                                             ensure_str(et.docinfo.public_id),
+                                             ensure_str(et.docinfo.system_url)))
+        except AttributeError:
+            pass
+
+        try:
+            node = et.getroot()
+        except AttributeError:
+            node = et
+
+        while node.getprevious() is not None:
+            node = node.getprevious()
+        while node is not None:
+            self.children.append(node)
+            node = node.getnext()
+
+        self.text = None
+        self.tail = None
+
+    def __getitem__(self, key):
+        return self.children[key]
+
+    def getnext(self):
+        return None
+
+    def __len__(self):
+        return 1
+
+
+class Doctype(object):
+    def __init__(self, root_node, name, public_id, system_id):
+        self.root_node = root_node
+        self.name = name
+        self.public_id = public_id
+        self.system_id = system_id
+
+        self.text = None
+        self.tail = None
+
+    def getnext(self):
+        return self.root_node.children[1]
+
+
+class FragmentRoot(Root):
+    def __init__(self, children):
+        self.children = [FragmentWrapper(self, child) for child in children]
+        self.text = self.tail = None
+
+    def getnext(self):
+        return None
+
+
+class FragmentWrapper(object):
+    def __init__(self, fragment_root, obj):
+        self.root_node = fragment_root
+        self.obj = obj
+        if hasattr(self.obj, 'text'):
+            self.text = ensure_str(self.obj.text)
+        else:
+            self.text = None
+        if hasattr(self.obj, 'tail'):
+            self.tail = ensure_str(self.obj.tail)
+        else:
+            self.tail = None
+
+    def __getattr__(self, name):
+        return getattr(self.obj, name)
+
+    def getnext(self):
+        siblings = self.root_node.children
+        idx = siblings.index(self)
+        if idx < len(siblings) - 1:
+            return siblings[idx + 1]
+        else:
+            return None
+
+    def __getitem__(self, key):
+        return self.obj[key]
+
+    def __bool__(self):
+        return bool(self.obj)
+
+    def getparent(self):
+        return None
+
+    def __str__(self):
+        return str(self.obj)
+
+    def __unicode__(self):
+        return str(self.obj)
+
+    def __len__(self):
+        return len(self.obj)
+
+
+class TreeWalker(base.NonRecursiveTreeWalker):
+    def __init__(self, tree):
+        # pylint:disable=redefined-variable-type
+        if isinstance(tree, list):
+            self.fragmentChildren = set(tree)
+            tree = FragmentRoot(tree)
+        else:
+            self.fragmentChildren = set()
+            tree = Root(tree)
+        base.NonRecursiveTreeWalker.__init__(self, tree)
+        self.filter = _ihatexml.InfosetFilter()
+
+    def getNodeDetails(self, node):
+        if isinstance(node, tuple):  # Text node
+            node, key = node
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
+            return base.TEXT, ensure_str(getattr(node, key))
+
+        elif isinstance(node, Root):
+            return (base.DOCUMENT,)
+
+        elif isinstance(node, Doctype):
+            return base.DOCTYPE, node.name, node.public_id, node.system_id
+
+        elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"):
+            return base.TEXT, ensure_str(node.obj)
+
+        elif node.tag == etree.Comment:
+            return base.COMMENT, ensure_str(node.text)
+
+        elif node.tag == etree.Entity:
+            return base.ENTITY, ensure_str(node.text)[1:-1]  # strip &;
+
+        else:
+            # This is assumed to be an ordinary element
+            match = tag_regexp.match(ensure_str(node.tag))
+            if match:
+                namespace, tag = match.groups()
+            else:
+                namespace = None
+                tag = ensure_str(node.tag)
+            attrs = OrderedDict()
+            for name, value in list(node.attrib.items()):
+                name = ensure_str(name)
+                value = ensure_str(value)
+                match = tag_regexp.match(name)
+                if match:
+                    attrs[(match.group(1), match.group(2))] = value
+                else:
+                    attrs[(None, name)] = value
+            return (base.ELEMENT, namespace, self.filter.fromXmlName(tag),
+                    attrs, len(node) > 0 or node.text)
+
+    def getFirstChild(self, node):
+        assert not isinstance(node, tuple), "Text nodes have no children"
+
+        assert len(node) or node.text, "Node has no children"
+        if node.text:
+            return (node, "text")
+        else:
+            return node[0]
+
+    def getNextSibling(self, node):
+        if isinstance(node, tuple):  # Text node
+            node, key = node
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
+            if key == "text":
+                # XXX: we cannot use a "bool(node) and node[0] or None" construct here
+                # because node[0] might evaluate to False if it has no child element
+                if len(node):
+                    return node[0]
+                else:
+                    return None
+            else:  # tail
+                return node.getnext()
+
+        return (node, "tail") if node.tail else node.getnext()
+
+    def getParentNode(self, node):
+        if isinstance(node, tuple):  # Text node
+            node, key = node
+            assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key
+            if key == "text":
+                return node
+            # else: fallback to "normal" processing
+        elif node in self.fragmentChildren:
+            return None
+
+        return node.getparent()
diff --git a/samples-and-tests/i-am-a-developer/html5lib/treewalkers/genshi.py b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/genshi.py
new file mode 100644
index 0000000000..7483be27d4
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/html5lib/treewalkers/genshi.py
@@ -0,0 +1,69 @@
+from __future__ import absolute_import, division, unicode_literals
+
+from genshi.core import QName
+from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
+from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
+
+from . import base
+
+from ..constants import voidElements, namespaces
+
+
+class TreeWalker(base.TreeWalker):
+    def __iter__(self):
+        # Buffer the events so we can pass in the following one
+        previous = None
+        for event in self.tree:
+            if previous is not None:
+                for token in self.tokens(previous, event):
+                    yield token
+            previous = event
+
+        # Don't forget the final event!
+        if previous is not None:
+            for token in self.tokens(previous, None):
+                yield token
+
+    def tokens(self, event, next):
+        kind, data, _ = event
+        if kind == START:
+            tag, attribs = data
+            name = tag.localname
+            namespace = tag.namespace
+            converted_attribs = {}
+            for k, v in attribs:
+                if isinstance(k, QName):
+                    converted_attribs[(k.namespace, k.localname)] = v
+                else:
+                    converted_attribs[(None, k)] = v
+
+            if namespace == namespaces["html"] and name in voidElements:
+                for token in self.emptyTag(namespace, name, converted_attribs,
+                                           not next or next[0] != END or
+                                           next[1] != tag):
+                    yield token
+            else:
+                yield self.startTag(namespace, name, converted_attribs)
+
+        elif kind == END:
+            name = data.localname
+            namespace = data.namespace
+            if namespace != namespaces["html"] or name not in voidElements:
+                yield self.endTag(namespace, name)
+
+        elif kind == COMMENT:
+            yield self.comment(data)
+
+        elif kind == TEXT:
+            for token in self.text(data):
+                yield token
+
+        elif kind == DOCTYPE:
+            yield self.doctype(*data)
+
+        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS,
+                      START_CDATA, END_CDATA, PI):
+            pass
+
+        else:
+            yield self.unknown(kind)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/__init__.py b/samples-and-tests/i-am-a-developer/mechanize/__init__.py
index 4bb20aa704..8e7e7a2244 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/__init__.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/__init__.py
@@ -1,3 +1,57 @@
+from __future__ import absolute_import
+
+import logging
+
+from ._clientcookie import request_host_lc as request_host
+# cookies
+from ._clientcookie import (Cookie, CookieJar, CookiePolicy,
+                            DefaultCookiePolicy, FileCookieJar, LoadError,
+                            LWPCookieJar, MozillaCookieJar,
+                            effective_request_host, lwp_cookie_str)
+# forms
+from ._form_controls import (
+    AmbiguityError, CheckboxControl, Control, ControlNotFoundError,
+    FileControl, HiddenControl, HTMLForm, IgnoreControl, ImageControl, Item,
+    ItemCountError, ItemNotFoundError, Label, ListControl, LocateError,
+    Missing, PasswordControl, RadioControl, ScalarControl, SelectControl,
+    SubmitButtonControl, SubmitControl, TextareaControl, TextControl)
+from ._html import Factory, Link
+# misc
+from ._entities import html5_entities
+from ._equiv import HTTPEquivParser
+# high-level stateful browser-style interface
+from ._mechanize import (Browser, BrowserStateError, FormNotFoundError,
+                         History, LinkNotFoundError)
+from ._opener import ContentTooShortError, OpenerFactory, urlretrieve
+from ._response import (make_response, response_seek_wrapper,
+                        seek_wrapped_response)
+from ._rfc3986 import urljoin
+from ._urllib2 import (
+    AbstractBasicAuthHandler, AbstractDigestAuthHandler, BaseHandler,
+    CacheFTPHandler, FileHandler, FTPHandler, HTTPBasicAuthHandler,
+    HTTPCookieProcessor, HTTPDefaultErrorHandler, HTTPDigestAuthHandler,
+    HTTPEquivProcessor, HTTPError, HTTPErrorProcessor, HTTPHandler,
+    HTTPPasswordMgr, HTTPPasswordMgrWithDefaultRealm, HTTPProxyPasswordMgr,
+    HTTPRedirectDebugProcessor, HTTPRedirectHandler, HTTPRefererProcessor,
+    HTTPRefreshProcessor, HTTPResponseDebugProcessor, HTTPRobotRulesProcessor,
+    HTTPSClientCertMgr, HTTPSHandler, OpenerDirector, ProxyBasicAuthHandler,
+    ProxyDigestAuthHandler, ProxyHandler, Request, RobotExclusionError,
+    SeekableResponseOpener, UnknownHandler, URLError, build_opener,
+    install_opener, urlopen)
+# configurable URL-opener interface
+from ._useragent import UserAgent, UserAgentBase
+from ._util import http2time as str2time
+from ._version import __version__
+from ._gzip import HTTPGzipProcessor
+
+# If you hate the idea of turning bugs into warnings, do:
+# import mechanize; mechanize.USE_BARE_EXCEPT = False
+USE_BARE_EXCEPT = True
+
+logger = logging.getLogger("mechanize")
+if logger.level is logging.NOTSET:
+    logger.setLevel(logging.CRITICAL)
+del logger
 __all__ = [
     'AbstractBasicAuthHandler',
     'AbstractDigestAuthHandler',
@@ -10,13 +64,12 @@
     'CookieJar',
     'CookiePolicy',
     'DefaultCookiePolicy',
-    'DefaultFactory',
+    'effective_request_host',
     'FTPHandler',
     'Factory',
     'FileCookieJar',
     'FileHandler',
     'FormNotFoundError',
-    'FormsFactory',
     'HTTPBasicAuthHandler',
     'HTTPCookieProcessor',
     'HTTPDefaultErrorHandler',
@@ -24,7 +77,9 @@
     'HTTPEquivProcessor',
     'HTTPError',
     'HTTPErrorProcessor',
+    'HTTPGzipProcessor',
     'HTTPHandler',
+    'HTTPSHandler',
     'HTTPPasswordMgr',
     'HTTPPasswordMgrWithDefaultRealm',
     'HTTPProxyPasswordMgr',
@@ -32,42 +87,30 @@
     'HTTPRedirectHandler',
     'HTTPRefererProcessor',
     'HTTPRefreshProcessor',
-    'HTTPRequestUpgradeProcessor',
     'HTTPResponseDebugProcessor',
     'HTTPRobotRulesProcessor',
     'HTTPSClientCertMgr',
-    'HTTPSHandler',
-    'HeadParser',
     'History',
     'LWPCookieJar',
     'Link',
     'LinkNotFoundError',
-    'LinksFactory',
     'LoadError',
-    'MSIECookieJar',
     'MozillaCookieJar',
     'OpenerDirector',
     'OpenerFactory',
-    'ParseError',
     'ProxyBasicAuthHandler',
     'ProxyDigestAuthHandler',
     'ProxyHandler',
     'Request',
-    'ResponseUpgradeProcessor',
     'RobotExclusionError',
-    'RobustFactory',
-    'RobustFormsFactory',
-    'RobustLinksFactory',
-    'RobustTitleFactory',
-    'SeekableProcessor',
     'SeekableResponseOpener',
-    'TitleFactory',
     'URLError',
     'USE_BARE_EXCEPT',
     'UnknownHandler',
     'UserAgent',
     'UserAgentBase',
-    'XHTMLCompatibleHeadParser',
+    'HTTPEquivParser',
+    'html5_entities',
     '__version__',
     'build_opener',
     'install_opener',
@@ -75,66 +118,38 @@
     'make_response',
     'request_host',
     'response_seek_wrapper',  # XXX deprecate in public interface?
-    'seek_wrapped_response'   # XXX should probably use this internally in place of response_seek_wrapper()
+    # XXX should probably use this internally in place of
+    # response_seek_wrapper()
+    'seek_wrapped_response',
     'str2time',
     'urlopen',
-    'urlretrieve']
-
-import logging
-import sys
-
-from _mechanize import __version__
-
-# high-level stateful browser-style interface
-from _mechanize import \
-     Browser, History, \
-     BrowserStateError, LinkNotFoundError, FormNotFoundError
-
-# configurable URL-opener interface
-from _useragent import UserAgentBase, UserAgent
-from _html import \
-     ParseError, \
-     Link, \
-     Factory, DefaultFactory, RobustFactory, \
-     FormsFactory, LinksFactory, TitleFactory, \
-     RobustFormsFactory, RobustLinksFactory, RobustTitleFactory
-
-# urllib2 work-alike interface (part from mechanize, part from urllib2)
-# This is a superset of the urllib2 interface.
-from _urllib2 import *
+    'urlretrieve',
+    'urljoin',
 
-# misc
-from _opener import ContentTooShortError, OpenerFactory, urlretrieve
-from _util import http2time as str2time
-from _response import \
-     response_seek_wrapper, seek_wrapped_response, make_response
-from _http import HeadParser
-try:
-    from _http import XHTMLCompatibleHeadParser
-except ImportError:
-    pass
-
-# cookies
-from _clientcookie import Cookie, CookiePolicy, DefaultCookiePolicy, \
-     CookieJar, FileCookieJar, LoadError, request_host_lc as request_host, \
-     effective_request_host
-from _lwpcookiejar import LWPCookieJar, lwp_cookie_str
-# 2.4 raises SyntaxError due to generator / try/finally use
-if sys.version_info[:2] > (2,4):
-    try:
-        import sqlite3
-    except ImportError:
-        pass
-    else:
-        from _firefox3cookiejar import Firefox3CookieJar
-from _mozillacookiejar import MozillaCookieJar
-from _msiecookiejar import MSIECookieJar
-
-# If you hate the idea of turning bugs into warnings, do:
-# import mechanize; mechanize.USE_BARE_EXCEPT = False
-USE_BARE_EXCEPT = True
-
-logger = logging.getLogger("mechanize")
-if logger.level is logging.NOTSET:
-    logger.setLevel(logging.CRITICAL)
-del logger
+    # ClientForm API
+    'AmbiguityError',
+    'ControlNotFoundError',
+    'ItemCountError',
+    'ItemNotFoundError',
+    'LocateError',
+    'Missing',
+    # deprecated
+    'CheckboxControl',
+    'Control',
+    'FileControl',
+    'HTMLForm',
+    'HiddenControl',
+    'IgnoreControl',
+    'ImageControl',
+    'Item',
+    'Label',
+    'ListControl',
+    'PasswordControl',
+    'RadioControl',
+    'ScalarControl',
+    'SelectControl',
+    'SubmitButtonControl',
+    'SubmitControl',
+    'TextControl',
+    'TextareaControl',
+]
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_auth.py b/samples-and-tests/i-am-a-developer/mechanize/_auth.py
index 232f7d8132..023a689b77 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_auth.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_auth.py
@@ -1,477 +1,26 @@
 """HTTP Authentication and Proxy support.
 
-All but HTTPProxyPasswordMgr come from Python 2.5.
-
 
 Copyright 2006 John J. Lee <jjl@pobox.com>
 
 This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE
 included with the distribution).
 
 """
 
-import base64
-import copy
-import os
-import posixpath
-import random
-import re
-import time
-import urlparse
-
-try:
-    import hashlib
-except ImportError:
-    import md5
-    import sha
-    def sha1_digest(bytes):
-        return sha.new(bytes).hexdigest()
-    def md5_digest(bytes):
-        return md5.new(bytes).hexdigest()
-else:
-    def sha1_digest(bytes):
-        return hashlib.sha1(bytes).hexdigest()
-    def md5_digest(bytes):
-        return hashlib.md5(bytes).hexdigest()
-
-from urllib2 import BaseHandler, HTTPError, parse_keqv_list, parse_http_list
-from urllib import getproxies, unquote, splittype, splituser, splitpasswd, \
-     splitport
-
-
-def _parse_proxy(proxy):
-    """Return (scheme, user, password, host/port) given a URL or an authority.
-
-    If a URL is supplied, it must have an authority (host:port) component.
-    According to RFC 3986, having an authority component means the URL must
-    have two slashes after the scheme:
-
-    >>> _parse_proxy('file:/ftp.example.com/')
-    Traceback (most recent call last):
-    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
-
-    The first three items of the returned tuple may be None.
-
-    Examples of authority parsing:
-
-    >>> _parse_proxy('proxy.example.com')
-    (None, None, None, 'proxy.example.com')
-    >>> _parse_proxy('proxy.example.com:3128')
-    (None, None, None, 'proxy.example.com:3128')
-
-    The authority component may optionally include userinfo (assumed to be
-    username:password):
-
-    >>> _parse_proxy('joe:password@proxy.example.com')
-    (None, 'joe', 'password', 'proxy.example.com')
-    >>> _parse_proxy('joe:password@proxy.example.com:3128')
-    (None, 'joe', 'password', 'proxy.example.com:3128')
-
-    Same examples, but with URLs instead:
-
-    >>> _parse_proxy('http://proxy.example.com/')
-    ('http', None, None, 'proxy.example.com')
-    >>> _parse_proxy('http://proxy.example.com:3128/')
-    ('http', None, None, 'proxy.example.com:3128')
-    >>> _parse_proxy('http://joe:password@proxy.example.com/')
-    ('http', 'joe', 'password', 'proxy.example.com')
-    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
-    ('http', 'joe', 'password', 'proxy.example.com:3128')
-
-    Everything after the authority is ignored:
-
-    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
-    ('ftp', 'joe', 'password', 'proxy.example.com')
-
-    Test for no trailing '/' case:
-
-    >>> _parse_proxy('http://joe:password@proxy.example.com')
-    ('http', 'joe', 'password', 'proxy.example.com')
-
-    """
-    scheme, r_scheme = splittype(proxy)
-    if not r_scheme.startswith("/"):
-        # authority
-        scheme = None
-        authority = proxy
-    else:
-        # URL
-        if not r_scheme.startswith("//"):
-            raise ValueError("proxy URL with no authority: %r" % proxy)
-        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
-        # and 3.3.), path is empty or starts with '/'
-        end = r_scheme.find("/", 2)
-        if end == -1:
-            end = None
-        authority = r_scheme[2:end]
-    userinfo, hostport = splituser(authority)
-    if userinfo is not None:
-        user, password = splitpasswd(userinfo)
-    else:
-        user = password = None
-    return scheme, user, password, hostport
-
-class ProxyHandler(BaseHandler):
-    # Proxies must be in front
-    handler_order = 100
-
-    def __init__(self, proxies=None):
-        if proxies is None:
-            proxies = getproxies()
-        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
-        self.proxies = proxies
-        for type, url in proxies.items():
-            setattr(self, '%s_open' % type,
-                    lambda r, proxy=url, type=type, meth=self.proxy_open: \
-                    meth(r, proxy, type))
-
-    def proxy_open(self, req, proxy, type):
-        orig_type = req.get_type()
-        proxy_type, user, password, hostport = _parse_proxy(proxy)
-        if proxy_type is None:
-            proxy_type = orig_type
-        if user and password:
-            user_pass = '%s:%s' % (unquote(user), unquote(password))
-            creds = base64.encodestring(user_pass).strip()
-            req.add_header('Proxy-authorization', 'Basic ' + creds)
-        hostport = unquote(hostport)
-        req.set_proxy(hostport, proxy_type)
-        if orig_type == proxy_type:
-            # let other handlers take care of it
-            return None
-        else:
-            # need to start over, because the other handlers don't
-            # grok the proxy's URL type
-            # e.g. if we have a constructor arg proxies like so:
-            # {'http': 'ftp://proxy.example.com'}, we may end up turning
-            # a request for http://acme.example.com/a into one for
-            # ftp://proxy.example.com/a
-            return self.parent.open(req)
-
-class HTTPPasswordMgr:
+from __future__ import absolute_import
+from ._urllib2_fork import HTTPPasswordMgr
+from .polyglot import is_string, iteritems
 
-    def __init__(self):
-        self.passwd = {}
 
-    def add_password(self, realm, uri, user, passwd):
-        # uri could be a single URI or a sequence
-        if isinstance(uri, basestring):
-            uri = [uri]
-        if not realm in self.passwd:
-            self.passwd[realm] = {}
-        for default_port in True, False:
-            reduced_uri = tuple(
-                [self.reduce_uri(u, default_port) for u in uri])
-            self.passwd[realm][reduced_uri] = (user, passwd)
-
-    def find_user_password(self, realm, authuri):
-        domains = self.passwd.get(realm, {})
-        for default_port in True, False:
-            reduced_authuri = self.reduce_uri(authuri, default_port)
-            for uris, authinfo in domains.iteritems():
-                for uri in uris:
-                    if self.is_suburi(uri, reduced_authuri):
-                        return authinfo
-        return None, None
-
-    def reduce_uri(self, uri, default_port=True):
-        """Accept authority or URI and extract only the authority and path."""
-        # note HTTP URLs do not have a userinfo component
-        parts = urlparse.urlsplit(uri)
-        if parts[1]:
-            # URI
-            scheme = parts[0]
-            authority = parts[1]
-            path = parts[2] or '/'
-        else:
-            # host or host:port
-            scheme = None
-            authority = uri
-            path = '/'
-        host, port = splitport(authority)
-        if default_port and port is None and scheme is not None:
-            dport = {"http": 80,
-                     "https": 443,
-                     }.get(scheme)
-            if dport is not None:
-                authority = "%s:%d" % (host, dport)
-        return authority, path
-
-    def is_suburi(self, base, test):
-        """Check if test is below base in a URI tree
-
-        Both args must be URIs in reduced form.
-        """
-        if base == test:
-            return True
-        if base[0] != test[0]:
-            return False
-        common = posixpath.commonprefix((base[1], test[1]))
-        if len(common) == len(base[1]):
-            return True
-        return False
-
-
-class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
-
-    def find_user_password(self, realm, authuri):
-        user, password = HTTPPasswordMgr.find_user_password(self, realm,
-                                                            authuri)
-        if user is not None:
-            return user, password
-        return HTTPPasswordMgr.find_user_password(self, None, authuri)
-
-
-class AbstractBasicAuthHandler:
-
-    rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', re.I)
-
-    # XXX there can actually be multiple auth-schemes in a
-    # www-authenticate header.  should probably be a lot more careful
-    # in parsing them to extract multiple alternatives
-
-    def __init__(self, password_mgr=None):
-        if password_mgr is None:
-            password_mgr = HTTPPasswordMgr()
-        self.passwd = password_mgr
-        self.add_password = self.passwd.add_password
-
-    def http_error_auth_reqed(self, authreq, host, req, headers):
-        # host may be an authority (without userinfo) or a URL with an
-        # authority
-        # XXX could be multiple headers
-        authreq = headers.get(authreq, None)
-        if authreq:
-            mo = AbstractBasicAuthHandler.rx.search(authreq)
-            if mo:
-                scheme, realm = mo.groups()
-                if scheme.lower() == 'basic':
-                    return self.retry_http_basic_auth(host, req, realm)
-
-    def retry_http_basic_auth(self, host, req, realm):
-        user, pw = self.passwd.find_user_password(realm, host)
-        if pw is not None:
-            raw = "%s:%s" % (user, pw)
-            auth = 'Basic %s' % base64.encodestring(raw).strip()
-            if req.headers.get(self.auth_header, None) == auth:
-                return None
-            newreq = copy.copy(req)
-            newreq.add_header(self.auth_header, auth)
-            newreq.visit = False
-            return self.parent.open(newreq)
-        else:
-            return None
-
-
-class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
-    auth_header = 'Authorization'
-
-    def http_error_401(self, req, fp, code, msg, headers):
-        url = req.get_full_url()
-        return self.http_error_auth_reqed('www-authenticate',
-                                          url, req, headers)
-
-
-class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
-
-    auth_header = 'Proxy-authorization'
-
-    def http_error_407(self, req, fp, code, msg, headers):
-        # http_error_auth_reqed requires that there is no userinfo component in
-        # authority.  Assume there isn't one, since urllib2 does not (and
-        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
-        # userinfo.
-        authority = req.get_host()
-        return self.http_error_auth_reqed('proxy-authenticate',
-                                          authority, req, headers)
-
-
-def randombytes(n):
-    """Return n random bytes."""
-    # Use /dev/urandom if it is available.  Fall back to random module
-    # if not.  It might be worthwhile to extend this function to use
-    # other platform-specific mechanisms for getting random bytes.
-    if os.path.exists("/dev/urandom"):
-        f = open("/dev/urandom")
-        s = f.read(n)
-        f.close()
-        return s
-    else:
-        L = [chr(random.randrange(0, 256)) for i in range(n)]
-        return "".join(L)
-
-class AbstractDigestAuthHandler:
-    # Digest authentication is specified in RFC 2617.
-
-    # XXX The client does not inspect the Authentication-Info header
-    # in a successful response.
-
-    # XXX It should be possible to test this implementation against
-    # a mock server that just generates a static set of challenges.
-
-    # XXX qop="auth-int" supports is shaky
-
-    def __init__(self, passwd=None):
-        if passwd is None:
-            passwd = HTTPPasswordMgr()
-        self.passwd = passwd
-        self.add_password = self.passwd.add_password
-        self.retried = 0
-        self.nonce_count = 0
-
-    def reset_retry_count(self):
-        self.retried = 0
-
-    def http_error_auth_reqed(self, auth_header, host, req, headers):
-        authreq = headers.get(auth_header, None)
-        if self.retried > 5:
-            # Don't fail endlessly - if we failed once, we'll probably
-            # fail a second time. Hm. Unless the Password Manager is
-            # prompting for the information. Crap. This isn't great
-            # but it's better than the current 'repeat until recursion
-            # depth exceeded' approach <wink>
-            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
-                            headers, None)
-        else:
-            self.retried += 1
-        if authreq:
-            scheme = authreq.split()[0]
-            if scheme.lower() == 'digest':
-                return self.retry_http_digest_auth(req, authreq)
-
-    def retry_http_digest_auth(self, req, auth):
-        token, challenge = auth.split(' ', 1)
-        chal = parse_keqv_list(parse_http_list(challenge))
-        auth = self.get_authorization(req, chal)
-        if auth:
-            auth_val = 'Digest %s' % auth
-            if req.headers.get(self.auth_header, None) == auth_val:
-                return None
-            newreq = copy.copy(req)
-            newreq.add_unredirected_header(self.auth_header, auth_val)
-            newreq.visit = False
-            return self.parent.open(newreq)
-
-    def get_cnonce(self, nonce):
-        # The cnonce-value is an opaque
-        # quoted string value provided by the client and used by both client
-        # and server to avoid chosen plaintext attacks, to provide mutual
-        # authentication, and to provide some message integrity protection.
-        # This isn't a fabulous effort, but it's probably Good Enough.
-        dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce,
-                                           time.ctime(), randombytes(8)))
-        return dig[:16]
-
-    def get_authorization(self, req, chal):
-        try:
-            realm = chal['realm']
-            nonce = chal['nonce']
-            qop = chal.get('qop')
-            algorithm = chal.get('algorithm', 'MD5')
-            # mod_digest doesn't send an opaque, even though it isn't
-            # supposed to be optional
-            opaque = chal.get('opaque', None)
-        except KeyError:
-            return None
-
-        H, KD = self.get_algorithm_impls(algorithm)
-        if H is None:
-            return None
-
-        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
-        if user is None:
-            return None
-
-        # XXX not implemented yet
-        if req.has_data():
-            entdig = self.get_entity_digest(req.get_data(), chal)
-        else:
-            entdig = None
-
-        A1 = "%s:%s:%s" % (user, realm, pw)
-        A2 = "%s:%s" % (req.get_method(),
-                        # XXX selector: what about proxies and full urls
-                        req.get_selector())
-        if qop == 'auth':
-            self.nonce_count += 1
-            ncvalue = '%08x' % self.nonce_count
-            cnonce = self.get_cnonce(nonce)
-            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
-            respdig = KD(H(A1), noncebit)
-        elif qop is None:
-            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
-        else:
-            # XXX handle auth-int.
-            pass
-
-        # XXX should the partial digests be encoded too?
-
-        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
-               'response="%s"' % (user, realm, nonce, req.get_selector(),
-                                  respdig)
-        if opaque:
-            base += ', opaque="%s"' % opaque
-        if entdig:
-            base += ', digest="%s"' % entdig
-        base += ', algorithm="%s"' % algorithm
-        if qop:
-            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
-        return base
-
-    def get_algorithm_impls(self, algorithm):
-        # lambdas assume digest modules are imported at the top level
-        if algorithm == 'MD5':
-            H = md5_digest
-        elif algorithm == 'SHA':
-            H = sha1_digest
-        # XXX MD5-sess
-        KD = lambda s, d: H("%s:%s" % (s, d))
-        return H, KD
-
-    def get_entity_digest(self, data, chal):
-        # XXX not implemented yet
-        return None
-
-
-class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
-    """An authentication protocol defined by RFC 2069
-
-    Digest authentication improves on basic authentication because it
-    does not transmit passwords in the clear.
-    """
-
-    auth_header = 'Authorization'
-    handler_order = 490
-
-    def http_error_401(self, req, fp, code, msg, headers):
-        host = urlparse.urlparse(req.get_full_url())[1]
-        retry = self.http_error_auth_reqed('www-authenticate',
-                                           host, req, headers)
-        self.reset_retry_count()
-        return retry
-
-
-class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
-
-    auth_header = 'Proxy-Authorization'
-    handler_order = 490
-
-    def http_error_407(self, req, fp, code, msg, headers):
-        host = req.get_host()
-        retry = self.http_error_auth_reqed('proxy-authenticate',
-                                           host, req, headers)
-        self.reset_retry_count()
-        return retry
-
-
-# XXX ugly implementation, should probably not bother deriving
+# TODO: stop deriving from HTTPPasswordMgr
 class HTTPProxyPasswordMgr(HTTPPasswordMgr):
     # has default realm and host/port
+
     def add_password(self, realm, uri, user, passwd):
         # uri could be a single URI or a sequence
-        if uri is None or isinstance(uri, basestring):
+        if uri is None or is_string(uri):
             uris = [uri]
         else:
             uris = uri
@@ -490,7 +39,7 @@ def find_user_password(self, realm, authuri):
                 authinfo_by_domain = self.passwd.get(realm, {})
                 for default_port in True, False:
                     reduced_authuri = self.reduce_uri(authuri, default_port)
-                    for uri, authinfo in authinfo_by_domain.iteritems():
+                    for uri, authinfo in iteritems(authinfo_by_domain):
                         if uri is None and not default_uri:
                             continue
                         if self.is_suburi(uri, reduced_authuri):
@@ -516,7 +65,9 @@ def is_suburi(self, base, test):
 
 class HTTPSClientCertMgr(HTTPPasswordMgr):
     # implementation inheritance: this is not a proper subclass
+
     def add_key_cert(self, uri, key_file, cert_file):
         self.add_password(None, uri, key_file, cert_file)
+
     def find_key_cert(self, authuri):
         return HTTPPasswordMgr.find_user_password(self, None, authuri)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_beautifulsoup.py b/samples-and-tests/i-am-a-developer/mechanize/_beautifulsoup.py
deleted file mode 100644
index 268b305d30..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_beautifulsoup.py
+++ /dev/null
@@ -1,1080 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-v2.1.1
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup parses arbitrarily invalid XML- or HTML-like substance
-into a tree representation. It provides methods and Pythonic idioms
-that make it easy to search and modify the tree.
-
-A well-formed XML/HTML document will yield a well-formed data
-structure. An ill-formed XML/HTML document will yield a
-correspondingly ill-formed data structure. If your document is only
-locally well-formed, you can use this library to find and process the
-well-formed part of it. The BeautifulSoup class has heuristics for
-obtaining a sensible parse tree in the face of common HTML errors.
-
-Beautiful Soup has no external dependencies. It works with Python 2.2
-and up.
-
-Beautiful Soup defines classes for four different parsing strategies:
-
- * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
-   language that kind of looks like XML.
-
- * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
-   or invalid.
-
- * ICantBelieveItsBeautifulSoup, for parsing valid but bizarre HTML
-   that trips up BeautifulSoup.
-
- * BeautifulSOAP, for making it easier to parse XML documents that use
-   lots of subelements containing a single string, where you'd prefer
-   they put that string into an attribute (such as SOAP messages).
-
-You can subclass BeautifulStoneSoup or BeautifulSoup to create a
-parsing strategy specific to an XML schema or a particular bizarre
-HTML document. Typically your subclass would just override
-SELF_CLOSING_TAGS and/or NESTABLE_TAGS.
-""" #"
-from __future__ import generators
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "2.1.1"
-__date__ = "$Date: 2004/10/18 00:14:20 $"
-__copyright__ = "Copyright (c) 2004-2005 Leonard Richardson"
-__license__ = "PSF"
-
-from sgmllib import SGMLParser, SGMLParseError
-import types
-import re
-import sgmllib
-
-#This code makes Beautiful Soup able to parse XML with namespaces
-sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
-
-class NullType(object):
-
-    """Similar to NoneType with a corresponding singleton instance
-    'Null' that, unlike None, accepts any message and returns itself.
-
-    Examples:
-    >>> Null("send", "a", "message")("and one more",
-    ...      "and what you get still") is Null
-    True
-    """
-
-    def __new__(cls):                    return Null
-    def __call__(self, *args, **kwargs): return Null
-##    def __getstate__(self, *args):       return Null
-    def __getattr__(self, attr):         return Null
-    def __getitem__(self, item):         return Null
-    def __setattr__(self, attr, value):  pass
-    def __setitem__(self, item, value):  pass
-    def __len__(self):                   return 0
-    # FIXME: is this a python bug? otherwise ``for x in Null: pass``
-    #        never terminates...
-    def __iter__(self):                  return iter([])
-    def __contains__(self, item):        return False
-    def __repr__(self):                  return "Null"
-Null = object.__new__(NullType)
-
-class PageElement:
-    """Contains the navigational information for some part of the page
-    (either a tag or a piece of text)"""
-
-    def setup(self, parent=Null, previous=Null):
-        """Sets up the initial relations between this element and
-        other elements."""
-        self.parent = parent
-        self.previous = previous
-        self.next = Null
-        self.previousSibling = Null
-        self.nextSibling = Null
-        if self.parent and self.parent.contents:
-            self.previousSibling = self.parent.contents[-1]
-            self.previousSibling.nextSibling = self
-
-    def findNext(self, name=None, attrs={}, text=None):
-        """Returns the first item that matches the given criteria and
-        appears after this Tag in the document."""
-        return self._first(self.fetchNext, name, attrs, text)
-    firstNext = findNext
-
-    def fetchNext(self, name=None, attrs={}, text=None, limit=None):
-        """Returns all items that match the given criteria and appear
-        before after Tag in the document."""
-        return self._fetch(name, attrs, text, limit, self.nextGenerator)
-
-    def findNextSibling(self, name=None, attrs={}, text=None):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears after this Tag in the document."""
-        return self._first(self.fetchNextSiblings, name, attrs, text)
-    firstNextSibling = findNextSibling
-
-    def fetchNextSiblings(self, name=None, attrs={}, text=None, limit=None):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear after this Tag in the document."""
-        return self._fetch(name, attrs, text, limit, self.nextSiblingGenerator)
-
-    def findPrevious(self, name=None, attrs={}, text=None):
-        """Returns the first item that matches the given criteria and
-        appears before this Tag in the document."""
-        return self._first(self.fetchPrevious, name, attrs, text)
-
-    def fetchPrevious(self, name=None, attrs={}, text=None, limit=None):
-        """Returns all items that match the given criteria and appear
-        before this Tag in the document."""
-        return self._fetch(name, attrs, text, limit, self.previousGenerator)
-    firstPrevious = findPrevious
-
-    def findPreviousSibling(self, name=None, attrs={}, text=None):
-        """Returns the closest sibling to this Tag that matches the
-        given criteria and appears before this Tag in the document."""
-        return self._first(self.fetchPreviousSiblings, name, attrs, text)
-    firstPreviousSibling = findPreviousSibling
-
-    def fetchPreviousSiblings(self, name=None, attrs={}, text=None,
-                              limit=None):
-        """Returns the siblings of this Tag that match the given
-        criteria and appear before this Tag in the document."""
-        return self._fetch(name, attrs, text, limit,
-                           self.previousSiblingGenerator)
-
-    def findParent(self, name=None, attrs={}):
-        """Returns the closest parent of this Tag that matches the given
-        criteria."""
-        r = Null
-        l = self.fetchParents(name, attrs, 1)
-        if l:
-            r = l[0]
-        return r
-    firstParent = findParent
-
-    def fetchParents(self, name=None, attrs={}, limit=None):
-        """Returns the parents of this Tag that match the given
-        criteria."""
-        return self._fetch(name, attrs, None, limit, self.parentGenerator)
-
-    #These methods do the real heavy lifting.
-
-    def _first(self, method, name, attrs, text):
-        r = Null
-        l = method(name, attrs, text, 1)
-        if l:
-            r = l[0]
-        return r
-    
-    def _fetch(self, name, attrs, text, limit, generator):
-        "Iterates over a generator looking for things that match."
-        if not hasattr(attrs, 'items'):
-            attrs = {'class' : attrs}
-
-        results = []
-        g = generator()
-        while True:
-            try:
-                i = g.next()
-            except StopIteration:
-                break
-            found = None
-            if isinstance(i, Tag):
-                if not text:
-                    if not name or self._matches(i, name):
-                        match = True
-                        for attr, matchAgainst in attrs.items():
-                            check = i.get(attr)
-                            if not self._matches(check, matchAgainst):
-                                match = False
-                                break
-                        if match:
-                            found = i
-            elif text:
-                if self._matches(i, text):
-                    found = i                    
-            if found:
-                results.append(found)
-                if limit and len(results) >= limit:
-                    break
-        return results
-
-    #Generators that can be used to navigate starting from both
-    #NavigableTexts and Tags.                
-    def nextGenerator(self):
-        i = self
-        while i:
-            i = i.next
-            yield i
-
-    def nextSiblingGenerator(self):
-        i = self
-        while i:
-            i = i.nextSibling
-            yield i
-
-    def previousGenerator(self):
-        i = self
-        while i:
-            i = i.previous
-            yield i
-
-    def previousSiblingGenerator(self):
-        i = self
-        while i:
-            i = i.previousSibling
-            yield i
-
-    def parentGenerator(self):
-        i = self
-        while i:
-            i = i.parent
-            yield i
-
-    def _matches(self, chunk, howToMatch):
-        #print 'looking for %s in %s' % (howToMatch, chunk)
-        #
-        # If given a list of items, return true if the list contains a
-        # text element that matches.
-        if isList(chunk) and not isinstance(chunk, Tag):
-            for tag in chunk:
-                if isinstance(tag, NavigableText) and self._matches(tag, howToMatch):
-                    return True
-            return False
-        if callable(howToMatch):
-            return howToMatch(chunk)
-        if isinstance(chunk, Tag):
-            #Custom match methods take the tag as an argument, but all other
-            #ways of matching match the tag name as a string
-            chunk = chunk.name
-        #Now we know that chunk is a string
-        if not isinstance(chunk, basestring):
-            chunk = str(chunk)
-        if hasattr(howToMatch, 'match'):
-            # It's a regexp object.
-            return howToMatch.search(chunk)
-        if isList(howToMatch):
-            return chunk in howToMatch
-        if hasattr(howToMatch, 'items'):
-            return howToMatch.has_key(chunk)
-        #It's just a string
-        return str(howToMatch) == chunk
-
-class NavigableText(PageElement):
-
-    def __getattr__(self, attr):
-        "For backwards compatibility, text.string gives you text"
-        if attr == 'string':
-            return self
-        else:
-            raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
-        
-class NavigableString(str, NavigableText):
-    pass
-
-class NavigableUnicodeString(unicode, NavigableText):
-    pass
-
-class Tag(PageElement):
-
-    """Represents a found HTML tag with its attributes and contents."""
-
-    def __init__(self, name, attrs=None, parent=Null, previous=Null):
-        "Basic constructor."
-        self.name = name
-        if attrs == None:
-            attrs = []
-        self.attrs = attrs
-        self.contents = []
-        self.setup(parent, previous)
-        self.hidden = False
-
-    def get(self, key, default=None):
-        """Returns the value of the 'key' attribute for the tag, or
-        the value given for 'default' if it doesn't have that
-        attribute."""
-        return self._getAttrMap().get(key, default)    
-
-    def __getitem__(self, key):
-        """tag[key] returns the value of the 'key' attribute for the tag,
-        and throws an exception if it's not there."""
-        return self._getAttrMap()[key]
-
-    def __iter__(self):
-        "Iterating over a tag iterates over its contents."
-        return iter(self.contents)
-
-    def __len__(self):
-        "The length of a tag is the length of its list of contents."
-        return len(self.contents)
-
-    def __contains__(self, x):
-        return x in self.contents
-
-    def __nonzero__(self):
-        "A tag is non-None even if it has no contents."
-        return True
-
-    def __setitem__(self, key, value):        
-        """Setting tag[key] sets the value of the 'key' attribute for the
-        tag."""
-        self._getAttrMap()
-        self.attrMap[key] = value
-        found = False
-        for i in range(0, len(self.attrs)):
-            if self.attrs[i][0] == key:
-                self.attrs[i] = (key, value)
-                found = True
-        if not found:
-            self.attrs.append((key, value))
-        self._getAttrMap()[key] = value
-
-    def __delitem__(self, key):
-        "Deleting tag[key] deletes all 'key' attributes for the tag."
-        for item in self.attrs:
-            if item[0] == key:
-                self.attrs.remove(item)
-                #We don't break because bad HTML can define the same
-                #attribute multiple times.
-            self._getAttrMap()
-            if self.attrMap.has_key(key):
-                del self.attrMap[key]
-
-    def __call__(self, *args, **kwargs):
-        """Calling a tag like a function is the same as calling its
-        fetch() method. Eg. tag('a') returns a list of all the A tags
-        found within this tag."""
-        return apply(self.fetch, args, kwargs)
-
-    def __getattr__(self, tag):
-        if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3:
-            return self.first(tag[:-3])
-        elif tag.find('__') != 0:
-            return self.first(tag)
-
-    def __eq__(self, other):
-        """Returns true iff this tag has the same name, the same attributes,
-        and the same contents (recursively) as the given tag.
-
-        NOTE: right now this will return false if two tags have the
-        same attributes in a different order. Should this be fixed?"""
-        if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
-            return False
-        for i in range(0, len(self.contents)):
-            if self.contents[i] != other.contents[i]:
-                return False
-        return True
-
-    def __ne__(self, other):
-        """Returns true iff this tag is not identical to the other tag,
-        as defined in __eq__."""
-        return not self == other
-
-    def __repr__(self):
-        """Renders this tag as a string."""
-        return str(self)
-
-    def __unicode__(self):
-        return self.__str__(1)
-
-    def __str__(self, needUnicode=None, showStructureIndent=None):
-        """Returns a string or Unicode representation of this tag and
-        its contents.
-
-        NOTE: since Python's HTML parser consumes whitespace, this
-        method is not certain to reproduce the whitespace present in
-        the original string."""
-        
-        attrs = []
-        if self.attrs:
-            for key, val in self.attrs:
-                attrs.append('%s="%s"' % (key, val))
-        close = ''
-        closeTag = ''
-        if self.isSelfClosing():
-            close = ' /'
-        else:
-            closeTag = '</%s>' % self.name
-        indentIncrement = None        
-        if showStructureIndent != None:
-            indentIncrement = showStructureIndent
-            if not self.hidden:
-                indentIncrement += 1
-        contents = self.renderContents(indentIncrement, needUnicode=needUnicode)        
-        if showStructureIndent:
-            space = '\n%s' % (' ' * showStructureIndent)
-        if self.hidden:
-            s = contents
-        else:
-            s = []
-            attributeString = ''
-            if attrs:
-                attributeString = ' ' + ' '.join(attrs)            
-            if showStructureIndent:
-                s.append(space)
-            s.append('<%s%s%s>' % (self.name, attributeString, close))
-            s.append(contents)
-            if closeTag and showStructureIndent != None:
-                s.append(space)
-            s.append(closeTag)
-            s = ''.join(s)
-        isUnicode = type(s) == types.UnicodeType
-        if needUnicode and not isUnicode:
-            s = unicode(s)
-        elif isUnicode and needUnicode==False:
-            s = str(s)
-        return s
-
-    def prettify(self, needUnicode=None):
-        return self.__str__(needUnicode, showStructureIndent=True)
-
-    def renderContents(self, showStructureIndent=None, needUnicode=None):
-        """Renders the contents of this tag as a (possibly Unicode) 
-        string."""
-        s=[]
-        for c in self:
-            text = None
-            if isinstance(c, NavigableUnicodeString) or type(c) == types.UnicodeType:
-                text = unicode(c)
-            elif isinstance(c, Tag):
-                s.append(c.__str__(needUnicode, showStructureIndent))
-            elif needUnicode:
-                text = unicode(c)
-            else:
-                text = str(c)
-            if text:
-                if showStructureIndent != None:
-                    if text[-1] == '\n':
-                        text = text[:-1]
-                s.append(text)
-        return ''.join(s)    
-
-    #Soup methods
-
-    def firstText(self, text, recursive=True):
-        """Convenience method to retrieve the first piece of text matching the
-        given criteria. 'text' can be a string, a regular expression object,
-        a callable that takes a string and returns whether or not the
-        string 'matches', etc."""
-        return self.first(recursive=recursive, text=text)
-
-    def fetchText(self, text, recursive=True, limit=None):
-        """Convenience method to retrieve all pieces of text matching the
-        given criteria. 'text' can be a string, a regular expression object,
-        a callable that takes a string and returns whether or not the
-        string 'matches', etc."""
-        return self.fetch(recursive=recursive, text=text, limit=limit)
-
-    def first(self, name=None, attrs={}, recursive=True, text=None):
-        """Return only the first child of this
-        Tag matching the given criteria."""
-        r = Null
-        l = self.fetch(name, attrs, recursive, text, 1)
-        if l:
-            r = l[0]
-        return r
-    findChild = first
-
-    def fetch(self, name=None, attrs={}, recursive=True, text=None,
-              limit=None):
-        """Extracts a list of Tag objects that match the given
-        criteria.  You can specify the name of the Tag and any
-        attributes you want the Tag to have.
-
-        The value of a key-value pair in the 'attrs' map can be a
-        string, a list of strings, a regular expression object, or a
-        callable that takes a string and returns whether or not the
-        string matches for some custom definition of 'matches'. The
-        same is true of the tag name."""
-        generator = self.recursiveChildGenerator
-        if not recursive:
-            generator = self.childGenerator
-        return self._fetch(name, attrs, text, limit, generator)
-    fetchChildren = fetch
-    
-    #Utility methods
-
-    def isSelfClosing(self):
-        """Returns true iff this is a self-closing tag as defined in the HTML
-        standard.
-
-        TODO: This is specific to BeautifulSoup and its subclasses, but it's
-        used by __str__"""
-        return self.name in BeautifulSoup.SELF_CLOSING_TAGS
-
-    def append(self, tag):
-        """Appends the given tag to the contents of this tag."""
-        self.contents.append(tag)
-
-    #Private methods
-
-    def _getAttrMap(self):
-        """Initializes a map representation of this tag's attributes,
-        if not already initialized."""
-        if not getattr(self, 'attrMap'):
-            self.attrMap = {}
-            for (key, value) in self.attrs:
-                self.attrMap[key] = value 
-        return self.attrMap
-
-    #Generator methods
-    def childGenerator(self):
-        for i in range(0, len(self.contents)):
-            yield self.contents[i]
-        raise StopIteration
-    
-    def recursiveChildGenerator(self):
-        stack = [(self, 0)]
-        while stack:
-            tag, start = stack.pop()
-            if isinstance(tag, Tag):            
-                for i in range(start, len(tag.contents)):
-                    a = tag.contents[i]
-                    yield a
-                    if isinstance(a, Tag) and tag.contents:
-                        if i < len(tag.contents) - 1:
-                            stack.append((tag, i+1))
-                        stack.append((a, 0))
-                        break
-        raise StopIteration
-
-
-def isList(l):
-    """Convenience method that works with all 2.x versions of Python
-    to determine whether or not something is listlike."""
-    return hasattr(l, '__iter__') \
-           or (type(l) in (types.ListType, types.TupleType))
-
-def buildTagMap(default, *args):
-    """Turns a list of maps, lists, or scalars into a single map.
-    Used to build the SELF_CLOSING_TAGS and NESTABLE_TAGS maps out
-    of lists and partial maps."""
-    built = {}
-    for portion in args:
-        if hasattr(portion, 'items'):
-            #It's a map. Merge it.
-            for k,v in portion.items():
-                built[k] = v
-        elif isList(portion):
-            #It's a list. Map each item to the default.
-            for k in portion:
-                built[k] = default
-        else:
-            #It's a scalar. Map it to the default.
-            built[portion] = default
-    return built
-
-class BeautifulStoneSoup(Tag, SGMLParser):
-
-    """This class contains the basic parser and fetch code. It defines
-    a parser that knows nothing about tag behavior except for the
-    following:
-   
-      You can't close a tag without closing all the tags it encloses.
-      That is, "<foo><bar></foo>" actually means
-      "<foo><bar></bar></foo>".
-
-    [Another possible explanation is "<foo><bar /></foo>", but since
-    this class defines no SELF_CLOSING_TAGS, it will never use that
-    explanation.]
-
-    This class is useful for parsing XML or made-up markup languages,
-    or when BeautifulSoup makes an assumption counter to what you were
-    expecting."""
-
-    SELF_CLOSING_TAGS = {}
-    NESTABLE_TAGS = {}
-    RESET_NESTING_TAGS = {}
-    QUOTE_TAGS = {}
-
-    #As a public service we will by default silently replace MS smart quotes
-    #and similar characters with their HTML or ASCII equivalents.
-    MS_CHARS = { '\x80' : '&euro;',
-                 '\x81' : ' ',
-                 '\x82' : '&sbquo;',
-                 '\x83' : '&fnof;',
-                 '\x84' : '&bdquo;',
-                 '\x85' : '&hellip;',
-                 '\x86' : '&dagger;',
-                 '\x87' : '&Dagger;',
-                 '\x88' : '&caret;',
-                 '\x89' : '%',
-                 '\x8A' : '&Scaron;',
-                 '\x8B' : '&lt;',
-                 '\x8C' : '&OElig;',
-                 '\x8D' : '?',
-                 '\x8E' : 'Z',
-                 '\x8F' : '?',
-                 '\x90' : '?',
-                 '\x91' : '&lsquo;',
-                 '\x92' : '&rsquo;',
-                 '\x93' : '&ldquo;',
-                 '\x94' : '&rdquo;',
-                 '\x95' : '&bull;',
-                 '\x96' : '&ndash;',
-                 '\x97' : '&mdash;',
-                 '\x98' : '&tilde;',
-                 '\x99' : '&trade;',
-                 '\x9a' : '&scaron;',
-                 '\x9b' : '&gt;',
-                 '\x9c' : '&oelig;',
-                 '\x9d' : '?',
-                 '\x9e' : 'z',
-                 '\x9f' : '&Yuml;',}
-
-    PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
-                       lambda(x):x.group(1) + ' />'),
-                      (re.compile('<!\s+([^<>]*)>'),
-                       lambda(x):'<!' + x.group(1) + '>'),
-                      (re.compile("([\x80-\x9f])"),
-                       lambda(x): BeautifulStoneSoup.MS_CHARS.get(x.group(1)))
-                      ]
-
-    ROOT_TAG_NAME = '[document]'
-
-    def __init__(self, text=None, avoidParserProblems=True,
-                 initialTextIsEverything=True):
-        """Initialize this as the 'root tag' and feed in any text to
-        the parser.
-
-        NOTE about avoidParserProblems: sgmllib will process most bad
-        HTML, and BeautifulSoup has tricks for dealing with some HTML
-        that kills sgmllib, but Beautiful Soup can nonetheless choke
-        or lose data if your data uses self-closing tags or
-        declarations incorrectly. By default, Beautiful Soup sanitizes
-        its input to avoid the vast majority of these problems. The
-        problems are relatively rare, even in bad HTML, so feel free
-        to pass in False to avoidParserProblems if they don't apply to
-        you, and you'll get better performance. The only reason I have
-        this turned on by default is so I don't get so many tech
-        support questions.
-
-        The two most common instances of invalid HTML that will choke
-        sgmllib are fixed by the default parser massage techniques:
-
-         <br/> (No space between name of closing tag and tag close)
-         <! --Comment--> (Extraneous whitespace in declaration)
-
-        You can pass in a custom list of (RE object, replace method)
-        tuples to get Beautiful Soup to scrub your input the way you
-        want."""
-        Tag.__init__(self, self.ROOT_TAG_NAME)
-        if avoidParserProblems \
-           and not isList(avoidParserProblems):
-            avoidParserProblems = self.PARSER_MASSAGE            
-        self.avoidParserProblems = avoidParserProblems
-        SGMLParser.__init__(self)
-        self.quoteStack = []
-        self.hidden = 1
-        self.reset()
-        if hasattr(text, 'read'):
-            #It's a file-type object.
-            text = text.read()
-        if text:
-            self.feed(text)
-        if initialTextIsEverything:
-            self.done()
-
-    def __getattr__(self, methodName):
-        """This method routes method call requests to either the SGMLParser
-        superclass or the Tag superclass, depending on the method name."""
-        if methodName.find('start_') == 0 or methodName.find('end_') == 0 \
-               or methodName.find('do_') == 0:
-            return SGMLParser.__getattr__(self, methodName)
-        elif methodName.find('__') != 0:
-            return Tag.__getattr__(self, methodName)
-        else:
-            raise AttributeError
-
-    def feed(self, text):
-        if self.avoidParserProblems:
-            for fix, m in self.avoidParserProblems:
-                text = fix.sub(m, text)
-        SGMLParser.feed(self, text)
-
-    def done(self):
-        """Called when you're done parsing, so that the unclosed tags can be
-        correctly processed."""
-        self.endData() #NEW
-        while self.currentTag.name != self.ROOT_TAG_NAME:
-            self.popTag()
-            
-    def reset(self):
-        SGMLParser.reset(self)
-        self.currentData = []
-        self.currentTag = None
-        self.tagStack = []
-        self.pushTag(self)        
-    
-    def popTag(self):
-        tag = self.tagStack.pop()
-        # Tags with just one string-owning child get the child as a
-        # 'string' property, so that soup.tag.string is shorthand for
-        # soup.tag.contents[0]
-        if len(self.currentTag.contents) == 1 and \
-           isinstance(self.currentTag.contents[0], NavigableText):
-            self.currentTag.string = self.currentTag.contents[0]
-
-        #print "Pop", tag.name
-        if self.tagStack:
-            self.currentTag = self.tagStack[-1]
-        return self.currentTag
-
-    def pushTag(self, tag):
-        #print "Push", tag.name
-        if self.currentTag:
-            self.currentTag.append(tag)
-        self.tagStack.append(tag)
-        self.currentTag = self.tagStack[-1]
-
-    def endData(self):
-        currentData = ''.join(self.currentData)
-        if currentData:
-            if not currentData.strip():
-                if '\n' in currentData:
-                    currentData = '\n'
-                else:
-                    currentData = ' '
-            c = NavigableString
-            if type(currentData) == types.UnicodeType:
-                c = NavigableUnicodeString
-            o = c(currentData)
-            o.setup(self.currentTag, self.previous)
-            if self.previous:
-                self.previous.next = o
-            self.previous = o
-            self.currentTag.contents.append(o)
-        self.currentData = []
-
-    def _popToTag(self, name, inclusivePop=True):
-        """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
-        if name == self.ROOT_TAG_NAME:
-            return            
-
-        numPops = 0
-        mostRecentTag = None
-        for i in range(len(self.tagStack)-1, 0, -1):
-            if name == self.tagStack[i].name:
-                numPops = len(self.tagStack)-i
-                break
-        if not inclusivePop:
-            numPops = numPops - 1
-
-        for i in range(0, numPops):
-            mostRecentTag = self.popTag()
-        return mostRecentTag    
-
-    def _smartPop(self, name):
-
-        """We need to pop up to the previous tag of this type, unless
-        one of this tag's nesting reset triggers comes between this
-        tag and the previous tag of this type, OR unless this tag is a
-        generic nesting trigger and another generic nesting trigger
-        comes between this tag and the previous tag of this type.
-
-        Examples:
-         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
-         <p>Foo<table>Bar<p> should pop to 'table', not 'p'.
-         <p>Foo<table><tr>Bar<p> should pop to 'tr', not 'p'.
-         <p>Foo<b>Bar<p> should pop to 'p', not 'b'.
-
-         <li><ul><li> *<li>* should pop to 'ul', not the first 'li'.
-         <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr'
-         <td><tr><td> *<td>* should pop to 'tr', not the first 'td'
-        """
-
-        nestingResetTriggers = self.NESTABLE_TAGS.get(name)
-        isNestable = nestingResetTriggers != None
-        isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
-        popTo = None
-        inclusive = True
-        for i in range(len(self.tagStack)-1, 0, -1):
-            p = self.tagStack[i]
-            if (not p or p.name == name) and not isNestable:
-                #Non-nestable tags get popped to the top or to their
-                #last occurance.
-                popTo = name
-                break
-            if (nestingResetTriggers != None
-                and p.name in nestingResetTriggers) \
-                or (nestingResetTriggers == None and isResetNesting
-                    and self.RESET_NESTING_TAGS.has_key(p.name)):
-                
-                #If we encounter one of the nesting reset triggers
-                #peculiar to this tag, or we encounter another tag
-                #that causes nesting to reset, pop up to but not
-                #including that tag.
-
-                popTo = p.name
-                inclusive = False
-                break
-            p = p.parent
-        if popTo:
-            self._popToTag(popTo, inclusive)
-
-    def unknown_starttag(self, name, attrs, selfClosing=0):
-        #print "Start tag %s" % name
-        if self.quoteStack:
-            #This is not a real tag.
-            #print "<%s> is not real!" % name
-            attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
-            self.handle_data('<%s%s>' % (name, attrs))
-            return
-        self.endData()
-        if not name in self.SELF_CLOSING_TAGS and not selfClosing:
-            self._smartPop(name)
-        tag = Tag(name, attrs, self.currentTag, self.previous)        
-        if self.previous:
-            self.previous.next = tag
-        self.previous = tag
-        self.pushTag(tag)
-        if selfClosing or name in self.SELF_CLOSING_TAGS:
-            self.popTag()                
-        if name in self.QUOTE_TAGS:
-            #print "Beginning quote (%s)" % name
-            self.quoteStack.append(name)
-            self.literal = 1
-
-    def unknown_endtag(self, name):
-        if self.quoteStack and self.quoteStack[-1] != name:
-            #This is not a real end tag.
-            #print "</%s> is not real!" % name
-            self.handle_data('</%s>' % name)
-            return
-        self.endData()
-        self._popToTag(name)
-        if self.quoteStack and self.quoteStack[-1] == name:
-            self.quoteStack.pop()
-            self.literal = (len(self.quoteStack) > 0)
-
-    def handle_data(self, data):
-        self.currentData.append(data)
-
-    def handle_pi(self, text):
-        "Propagate processing instructions right through."
-        self.handle_data("<?%s>" % text)
-
-    def handle_comment(self, text):
-        "Propagate comments right through."
-        self.handle_data("<!--%s-->" % text)
-
-    def handle_charref(self, ref):
-        "Propagate char refs right through."
-        self.handle_data('&#%s;' % ref)
-
-    def handle_entityref(self, ref):
-        "Propagate entity refs right through."
-        self.handle_data('&%s;' % ref)
-        
-    def handle_decl(self, data):
-        "Propagate DOCTYPEs and the like right through."
-        self.handle_data('<!%s>' % data)
-
-    def parse_declaration(self, i):
-        """Treat a bogus SGML declaration as raw data. Treat a CDATA
-        declaration as regular data."""
-        j = None
-        if self.rawdata[i:i+9] == '<![CDATA[':
-             k = self.rawdata.find(']]>', i)
-             if k == -1:
-                 k = len(self.rawdata)
-             self.handle_data(self.rawdata[i+9:k])
-             j = k+3
-        else:
-            try:
-                j = SGMLParser.parse_declaration(self, i)
-            except SGMLParseError:
-                toHandle = self.rawdata[i:]
-                self.handle_data(toHandle)
-                j = i + len(toHandle)
-        return j
-
-class BeautifulSoup(BeautifulStoneSoup):
-
-    """This parser knows the following facts about HTML:
-
-    * Some tags have no closing tag and should be interpreted as being
-      closed as soon as they are encountered.
-
-    * The text inside some tags (ie. 'script') may contain tags which
-      are not really part of the document and which should be parsed
-      as text, not tags. If you want to parse the text as tags, you can
-      always fetch it and parse it explicitly.
-
-    * Tag nesting rules:
-
-      Most tags can't be nested at all. For instance, the occurance of
-      a <p> tag should implicitly close the previous <p> tag.
-
-       <p>Para1<p>Para2
-        should be transformed into:
-       <p>Para1</p><p>Para2
-
-      Some tags can be nested arbitrarily. For instance, the occurance
-      of a <blockquote> tag should _not_ implicitly close the previous
-      <blockquote> tag.
-
-       Alice said: <blockquote>Bob said: <blockquote>Blah
-        should NOT be transformed into:
-       Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah
-
-      Some tags can be nested, but the nesting is reset by the
-      interposition of other tags. For instance, a <tr> tag should
-      implicitly close the previous <tr> tag within the same <table>,
-      but not close a <tr> tag in another table.
-
-       <table><tr>Blah<tr>Blah
-        should be transformed into:
-       <table><tr>Blah</tr><tr>Blah
-        but,
-       <tr>Blah<table><tr>Blah
-        should NOT be transformed into
-       <tr>Blah<table></tr><tr>Blah
-
-    Differing assumptions about tag nesting rules are a major source
-    of problems with the BeautifulSoup class. If BeautifulSoup is not
-    treating as nestable a tag your page author treats as nestable,
-    try ICantBelieveItsBeautifulSoup before writing your own
-    subclass."""
-
-    SELF_CLOSING_TAGS = buildTagMap(None, ['br' , 'hr', 'input', 'img', 'meta',
-                                           'spacer', 'link', 'frame', 'base'])
-
-    QUOTE_TAGS = {'script': None}
-    
-    #According to the HTML standard, each of these inline tags can
-    #contain another tag of the same type. Furthermore, it's common
-    #to actually use these tags this way.
-    NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup',
-                            'center']
-
-    #According to the HTML standard, these block tags can contain
-    #another tag of the same type. Furthermore, it's common
-    #to actually use these tags this way.
-    NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del']
-
-    #Lists can contain other lists, but there are restrictions.    
-    NESTABLE_LIST_TAGS = { 'ol' : [],
-                           'ul' : [],
-                           'li' : ['ul', 'ol'],
-                           'dl' : [],
-                           'dd' : ['dl'],
-                           'dt' : ['dl'] }
-
-    #Tables can contain other tables, but there are restrictions.    
-    NESTABLE_TABLE_TAGS = {'table' : [], 
-                           'tr' : ['table', 'tbody', 'tfoot', 'thead'],
-                           'td' : ['tr'],
-                           'th' : ['tr'],
-                           }
-
-    NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre']
-
-    #If one of these tags is encountered, all tags up to the next tag of
-    #this type are popped.
-    RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript',
-                                     NON_NESTABLE_BLOCK_TAGS,
-                                     NESTABLE_LIST_TAGS,
-                                     NESTABLE_TABLE_TAGS)
-
-    NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS,
-                                NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS)
-    
-class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
-    """The BeautifulSoup class is oriented towards skipping over
-    common HTML errors like unclosed tags. However, sometimes it makes
-    errors of its own. For instance, consider this fragment:
-
-     <b>Foo<b>Bar</b></b>
-
-    This is perfectly valid (if bizarre) HTML. However, the
-    BeautifulSoup class will implicitly close the first b tag when it
-    encounters the second 'b'. It will think the author wrote
-    "<b>Foo<b>Bar", and didn't close the first 'b' tag, because
-    there's no real-world reason to bold something that's already
-    bold. When it encounters '</b></b>' it will close two more 'b'
-    tags, for a grand total of three tags closed instead of two. This
-    can throw off the rest of your document structure. The same is
-    true of a number of other tags, listed below.
-
-    It's much more common for someone to forget to close (eg.) a 'b'
-    tag than to actually use nested 'b' tags, and the BeautifulSoup
-    class handles the common case. This class handles the
-    not-co-common case: where you can't believe someone wrote what
-    they did, but it's valid HTML and BeautifulSoup screwed up by
-    assuming it wouldn't be.
-
-    If this doesn't do what you need, try subclassing this class or
-    BeautifulSoup, and providing your own list of NESTABLE_TAGS."""
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
-     ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
-      'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
-      'big']
-
-    I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript']
-
-    NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
-                                I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-class BeautifulSOAP(BeautifulStoneSoup):
-    """This class will push a tag with only a single string child into
-    the tag's parent as an attribute. The attribute's name is the tag
-    name, and the value is the string child. An example should give
-    the flavor of the change:
-
-    <foo><bar>baz</bar></foo>
-     =>
-    <foo bar="baz"><bar>baz</bar></foo>
-
-    You can then access fooTag['bar'] instead of fooTag.barTag.string.
-
-    This is, of course, useful for scraping structures that tend to
-    use subelements instead of attributes, such as SOAP messages. Note
-    that it modifies its input, so don't print the modified version
-    out.
-
-    I'm not sure how many people really want to use this class; let me
-    know if you do. Mainly I like the name."""
-
-    def popTag(self):
-        if len(self.tagStack) > 1:
-            tag = self.tagStack[-1]
-            parent = self.tagStack[-2]
-            parent._getAttrMap()
-            if (isinstance(tag, Tag) and len(tag.contents) == 1 and
-                isinstance(tag.contents[0], NavigableText) and 
-                not parent.attrMap.has_key(tag.name)):
-                parent[tag.name] = tag.contents[0]
-        BeautifulStoneSoup.popTag(self)
-
-#Enterprise class names! It has come to our attention that some people
-#think the names of the Beautiful Soup parser classes are too silly
-#and "unprofessional" for use in enterprise screen-scraping. We feel
-#your pain! For such-minded folk, the Beautiful Soup Consortium And
-#All-Night Kosher Bakery recommends renaming this file to
-#"RobustParser.py" (or, in cases of extreme enterprisitude,
-#"RobustParserBeanInterface.class") and using the following
-#enterprise-friendly class aliases:
-class RobustXMLParser(BeautifulStoneSoup):
-    pass
-class RobustHTMLParser(BeautifulSoup):
-    pass
-class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup):
-    pass
-class SimplifyingSOAPParser(BeautifulSOAP):
-    pass
-
-###
-
-
-#By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
-    import sys
-    soup = BeautifulStoneSoup(sys.stdin.read())
-    print soup.prettify()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py b/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py
index caeb82bfc0..6c89cbfe8c 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_clientcookie.py
@@ -1,208 +1,26 @@
-"""HTTP cookie handling for web clients.
+from __future__ import absolute_import
 
-This module originally developed from my port of Gisle Aas' Perl module
-HTTP::Cookies, from the libwww-perl library.
+import re
+import time
+from .polyglot import (
+    Cookie as _Cookie, CookieJar as CJ, MozillaCookieJar as MCJ, request_host
+    as request_host_lc, DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy,
+    FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match,
+    eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach, request_path,
+    request_port, user_domain_match, iteritems)
 
-Docstrings, comments and debug strings in this code refer to the
-attributes of the HTTP cookie system as cookie-attributes, to distinguish
-them clearly from Python attributes.
+__all__ = [
+    'DEFAULT_HTTP_PORT', 'CookiePolicy', 'DefaultCookiePolicy',
+    'request_host_lc', 'MozillaCookieJar', 'escape_path', 'is_HDN',
+    'request_port', 'LWPCookieJar', 'LoadError', 'reach', 'FileCookieJar',
+    'lwp_cookie_str', 'domain_match', 'request_path', 'user_domain_match'
+]
 
-                        CookieJar____
-                        /     \      \
-            FileCookieJar      \      \
-             /    |   \         \      \
- MozillaCookieJar | LWPCookieJar \      \
-                  |               |      \
-                  |   ---MSIEBase |       \
-                  |  /      |     |        \
-                  | /   MSIEDBCookieJar BSDDBCookieJar
-                  |/    
-               MSIECookieJar
-
-Comments to John J Lee <jjl@pobox.com>.
-
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-Copyright 2002-2003 Johnny Lee (original MSIE Perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import sys, re, copy, time, urllib, types, logging
-try:
-    import threading
-    _threading = threading; del threading
-except ImportError:
-    import dummy_threading
-    _threading = dummy_threading; del dummy_threading
-
-MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
-                         "instance initialised with one)")
-DEFAULT_HTTP_PORT = "80"
-
-from _headersutil import split_header_words, parse_ns_headers
-from _util import isstringlike
-import _rfc3986
-
-debug = logging.getLogger("mechanize.cookies").debug
-
-
-def reraise_unmasked_exceptions(unmasked=()):
-    # There are a few catch-all except: statements in this module, for
-    # catching input that's bad in unexpected ways.
-    # This function re-raises some exceptions we don't want to trap.
-    import mechanize, warnings
-    if not mechanize.USE_BARE_EXCEPT:
-        raise
-    unmasked = unmasked + (KeyboardInterrupt, SystemExit, MemoryError)
-    etype = sys.exc_info()[0]
-    if issubclass(etype, unmasked):
-        raise
-    # swallowed an exception
-    import traceback, StringIO
-    f = StringIO.StringIO()
-    traceback.print_exc(None, f)
-    msg = f.getvalue()
-    warnings.warn("mechanize bug!\n%s" % msg, stacklevel=2)
-
-
-IPV4_RE = re.compile(r"\.\d+$")
-def is_HDN(text):
-    """Return True if text is a host domain name."""
-    # XXX
-    # This may well be wrong.  Which RFC is HDN defined in, if any (for
-    #  the purposes of RFC 2965)?
-    # For the current implementation, what about IPv6?  Remember to look
-    #  at other uses of IPV4_RE also, if change this.
-    return not (IPV4_RE.search(text) or
-                text == "" or
-                text[0] == "." or text[-1] == ".")
-
-def domain_match(A, B):
-    """Return True if domain A domain-matches domain B, according to RFC 2965.
-
-    A and B may be host domain names or IP addresses.
-
-    RFC 2965, section 1:
-
-    Host names can be specified either as an IP address or a HDN string.
-    Sometimes we compare one host name with another.  (Such comparisons SHALL
-    be case-insensitive.)  Host A's name domain-matches host B's if
-
-         *  their host name strings string-compare equal; or
-
-         * A is a HDN string and has the form NB, where N is a non-empty
-            name string, B has the form .B', and B' is a HDN string.  (So,
-            x.y.com domain-matches .Y.com but not Y.com.)
-
-    Note that domain-match is not a commutative operation: a.b.c.com
-    domain-matches .c.com, but not the reverse.
-
-    """
-    # Note that, if A or B are IP addresses, the only relevant part of the
-    # definition of the domain-match algorithm is the direct string-compare.
-    A = A.lower()
-    B = B.lower()
-    if A == B:
-        return True
-    if not is_HDN(A):
-        return False
-    i = A.rfind(B)
-    has_form_nb = not (i == -1 or i == 0)
-    return (
-        has_form_nb and
-        B.startswith(".") and
-        is_HDN(B[1:])
-        )
-
-def liberal_is_HDN(text):
-    """Return True if text is a sort-of-like a host domain name.
-
-    For accepting/blocking domains.
-
-    """
-    return not IPV4_RE.search(text)
-
-def user_domain_match(A, B):
-    """For blocking/accepting domains.
-
-    A and B may be host domain names or IP addresses.
-
-    """
-    A = A.lower()
-    B = B.lower()
-    if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
-        if A == B:
-            # equal IP addresses
-            return True
-        return False
-    initial_dot = B.startswith(".")
-    if initial_dot and A.endswith(B):
-        return True
-    if not initial_dot and A == B:
-        return True
-    return False
-
-cut_port_re = re.compile(r":\d+$")
-def request_host(request):
-    """Return request-host, as defined by RFC 2965.
-
-    Variation from RFC: returned value is lowercased, for convenient
-    comparison.
-
-    """
-    url = request.get_full_url()
-    host = _rfc3986.urlsplit(url)[1]
-    if host is None:
-        host = request.get_header("Host", "")
-    # remove port, if present
-    return cut_port_re.sub("", host, 1)
-
-def request_host_lc(request):
-    return request_host(request).lower()
-
-def eff_request_host(request):
-    """Return a tuple (request-host, effective request-host name)."""
-    erhn = req_host = request_host(request)
-    if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
-        erhn = req_host + ".local"
-    return req_host, erhn
-
-def eff_request_host_lc(request):
-    req_host, erhn = eff_request_host(request)
-    return req_host.lower(), erhn.lower()
 
 def effective_request_host(request):
     """Return the effective request-host, as defined by RFC 2965."""
     return eff_request_host(request)[1]
 
-def request_path(request):
-    """request-URI, as defined by RFC 2965."""
-    url = request.get_full_url()
-    path, query, frag = _rfc3986.urlsplit(url)[2:]
-    path = escape_path(path)
-    req_path = _rfc3986.urlunsplit((None, None, path, query, frag))
-    if not req_path.startswith("/"):
-        req_path = "/"+req_path
-    return req_path
-
-def request_port(request):
-    host = request.get_host()
-    i = host.find(':')
-    if i >= 0:
-        port = host[i+1:]
-        try:
-            int(port)
-        except ValueError:
-            debug("nonnumeric port: '%s'", port)
-            return None
-    else:
-        port = DEFAULT_HTTP_PORT
-    return port
 
 def request_is_unverifiable(request):
     try:
@@ -213,851 +31,34 @@ def request_is_unverifiable(request):
         else:
             raise
 
-# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
-# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
-HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
-ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
-def uppercase_escaped_char(match):
-    return "%%%s" % match.group(1).upper()
-def escape_path(path):
-    """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
-    # There's no knowing what character encoding was used to create URLs
-    # containing %-escapes, but since we have to pick one to escape invalid
-    # path characters, we pick UTF-8, as recommended in the HTML 4.0
-    # specification:
-    # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
-    # And here, kind of: draft-fielding-uri-rfc2396bis-03
-    # (And in draft IRI specification: draft-duerst-iri-05)
-    # (And here, for new URI schemes: RFC 2718)
-    if isinstance(path, types.UnicodeType):
-        path = path.encode("utf-8")
-    path = urllib.quote(path, HTTP_PATH_SAFE)
-    path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
-    return path
-
-def reach(h):
-    """Return reach of host h, as defined by RFC 2965, section 1.
-
-    The reach R of a host name H is defined as follows:
-
-       *  If
-
-          -  H is the host domain name of a host; and,
-
-          -  H has the form A.B; and
-
-          -  A has no embedded (that is, interior) dots; and
-
-          -  B has at least one embedded dot, or B is the string "local".
-             then the reach of H is .B.
-
-       *  Otherwise, the reach of H is H.
-
-    >>> reach("www.acme.com")
-    '.acme.com'
-    >>> reach("acme.com")
-    'acme.com'
-    >>> reach("acme.local")
-    '.local'
-
-    """
-    i = h.find(".")
-    if i >= 0:
-        #a = h[:i]  # this line is only here to show what a is
-        b = h[i+1:]
-        i = b.find(".")
-        if is_HDN(h) and (i >= 0 or b == "local"):
-            return "."+b
-    return h
-
-def is_third_party(request):
-    """
-
-    RFC 2965, section 3.3.6:
-
-        An unverifiable transaction is to a third-party host if its request-
-        host U does not domain-match the reach R of the request-host O in the
-        origin transaction.
-
-    """
-    req_host = request_host_lc(request)
-    # the origin request's request-host was stuffed into request by
-    # _urllib2_support.AbstractHTTPHandler
-    return not domain_match(req_host, reach(request.origin_req_host))
-
-
-class Cookie:
-    """HTTP Cookie.
-
-    This class represents both Netscape and RFC 2965 cookies.
-
-    This is deliberately a very simple class.  It just holds attributes.  It's
-    possible to construct Cookie instances that don't comply with the cookie
-    standards.  CookieJar.make_cookies is the factory function for Cookie
-    objects -- it deals with cookie parsing, supplying defaults, and
-    normalising to the representation used in this class.  CookiePolicy is
-    responsible for checking them to see whether they should be accepted from
-    and returned to the server.
-
-    version: integer;
-    name: string;
-    value: string (may be None);
-    port: string; None indicates no attribute was supplied (eg. "Port", rather
-     than eg. "Port=80"); otherwise, a port string (eg. "80") or a port list
-     string (eg. "80,8080")
-    port_specified: boolean; true if a value was supplied with the Port
-     cookie-attribute
-    domain: string;
-    domain_specified: boolean; true if Domain was explicitly set
-    domain_initial_dot: boolean; true if Domain as set in HTTP header by server
-     started with a dot (yes, this really is necessary!)
-    path: string;
-    path_specified: boolean; true if Path was explicitly set
-    secure:  boolean; true if should only be returned over secure connection
-    expires: integer; seconds since epoch (RFC 2965 cookies should calculate
-     this value from the Max-Age attribute)
-    discard: boolean, true if this is a session cookie; (if no expires value,
-     this should be true)
-    comment: string;
-    comment_url: string;
-    rfc2109: boolean; true if cookie arrived in a Set-Cookie: (not
-     Set-Cookie2:) header, but had a version cookie-attribute of 1
-    rest: mapping of other cookie-attributes
-
-    Note that the port may be present in the headers, but unspecified ("Port"
-    rather than"Port=80", for example); if this is the case, port is None.
-
-    """
-
-    def __init__(self, version, name, value,
-                 port, port_specified,
-                 domain, domain_specified, domain_initial_dot,
-                 path, path_specified,
-                 secure,
-                 expires,
-                 discard,
-                 comment,
-                 comment_url,
-                 rest,
-                 rfc2109=False,
-                 ):
-
-        if version is not None: version = int(version)
-        if expires is not None: expires = int(expires)
-        if port is None and port_specified is True:
-            raise ValueError("if port is None, port_specified must be false")
-
-        self.version = version
-        self.name = name
-        self.value = value
-        self.port = port
-        self.port_specified = port_specified
-        # normalise case, as per RFC 2965 section 3.3.3
-        self.domain = domain.lower()
-        self.domain_specified = domain_specified
-        # Sigh.  We need to know whether the domain given in the
-        # cookie-attribute had an initial dot, in order to follow RFC 2965
-        # (as clarified in draft errata).  Needed for the returned $Domain
-        # value.
-        self.domain_initial_dot = domain_initial_dot
-        self.path = path
-        self.path_specified = path_specified
-        self.secure = secure
-        self.expires = expires
-        self.discard = discard
-        self.comment = comment
-        self.comment_url = comment_url
-        self.rfc2109 = rfc2109
-
-        self._rest = copy.copy(rest)
-
-    def has_nonstandard_attr(self, name):
-        return self._rest.has_key(name)
-    def get_nonstandard_attr(self, name, default=None):
-        return self._rest.get(name, default)
-    def set_nonstandard_attr(self, name, value):
-        self._rest[name] = value
-    def nonstandard_attr_keys(self):
-        return self._rest.keys()
-
-    def is_expired(self, now=None):
-        if now is None: now = time.time()
-        return (self.expires is not None) and (self.expires <= now)
-
-    def __str__(self):
-        if self.port is None: p = ""
-        else: p = ":"+self.port
-        limit = self.domain + p + self.path
-        if self.value is not None:
-            namevalue = "%s=%s" % (self.name, self.value)
-        else:
-            namevalue = self.name
-        return "<Cookie %s for %s>" % (namevalue, limit)
-
-    def __repr__(self):
-        args = []
-        for name in ["version", "name", "value",
-                     "port", "port_specified",
-                     "domain", "domain_specified", "domain_initial_dot",
-                     "path", "path_specified",
-                     "secure", "expires", "discard", "comment", "comment_url",
-                     ]:
-            attr = getattr(self, name)
-            args.append("%s=%s" % (name, repr(attr)))
-        args.append("rest=%s" % repr(self._rest))
-        args.append("rfc2109=%s" % repr(self.rfc2109))
-        return "Cookie(%s)" % ", ".join(args)
-
-
-class CookiePolicy:
-    """Defines which cookies get accepted from and returned to server.
-
-    May also modify cookies.
-
-    The subclass DefaultCookiePolicy defines the standard rules for Netscape
-    and RFC 2965 cookies -- override that if you want a customised policy.
-
-    As well as implementing set_ok and return_ok, implementations of this
-    interface must also supply the following attributes, indicating which
-    protocols should be used, and how.  These can be read and set at any time,
-    though whether that makes complete sense from the protocol point of view is
-    doubtful.
-
-    Public attributes:
-
-    netscape: implement netscape protocol
-    rfc2965: implement RFC 2965 protocol
-    rfc2109_as_netscape:
-       WARNING: This argument will change or go away if is not accepted into
-                the Python standard library in this form!
-     If true, treat RFC 2109 cookies as though they were Netscape cookies.  The
-     default is for this attribute to be None, which means treat 2109 cookies
-     as RFC 2965 cookies unless RFC 2965 handling is switched off (which it is,
-     by default), and as Netscape cookies otherwise.
-    hide_cookie2: don't add Cookie2 header to requests (the presence of
-     this header indicates to the server that we understand RFC 2965
-     cookies)
-
-    """
-    def set_ok(self, cookie, request):
-        """Return true if (and only if) cookie should be accepted from server.
-
-        Currently, pre-expired cookies never get this far -- the CookieJar
-        class deletes such cookies itself.
-
-        cookie: mechanize.Cookie object
-        request: object implementing the interface defined by
-         CookieJar.extract_cookies.__doc__
-
-        """
-        raise NotImplementedError()
-
-    def return_ok(self, cookie, request):
-        """Return true if (and only if) cookie should be returned to server.
-
-        cookie: mechanize.Cookie object
-        request: object implementing the interface defined by
-         CookieJar.add_cookie_header.__doc__
-
-        """
-        raise NotImplementedError()
-
-    def domain_return_ok(self, domain, request):
-        """Return false if cookies should not be returned, given cookie domain.
-
-        This is here as an optimization, to remove the need for checking every
-        cookie with a particular domain (which may involve reading many files).
-        The default implementations of domain_return_ok and path_return_ok
-        (return True) leave all the work to return_ok.
-
-        If domain_return_ok returns true for the cookie domain, path_return_ok
-        is called for the cookie path.  Otherwise, path_return_ok and return_ok
-        are never called for that cookie domain.  If path_return_ok returns
-        true, return_ok is called with the Cookie object itself for a full
-        check.  Otherwise, return_ok is never called for that cookie path.
-
-        Note that domain_return_ok is called for every *cookie* domain, not
-        just for the *request* domain.  For example, the function might be
-        called with both ".acme.com" and "www.acme.com" if the request domain
-        is "www.acme.com".  The same goes for path_return_ok.
-
-        For argument documentation, see the docstring for return_ok.
-
-        """
-        return True
-
-    def path_return_ok(self, path, request):
-        """Return false if cookies should not be returned, given cookie path.
-
-        See the docstring for domain_return_ok.
-
-        """
-        return True
-
-
-class DefaultCookiePolicy(CookiePolicy):
-    """Implements the standard rules for accepting and returning cookies.
-
-    Both RFC 2965 and Netscape cookies are covered.  RFC 2965 handling is
-    switched off by default.
-
-    The easiest way to provide your own policy is to override this class and
-    call its methods in your overriden implementations before adding your own
-    additional checks.
-
-    import mechanize
-    class MyCookiePolicy(mechanize.DefaultCookiePolicy):
-        def set_ok(self, cookie, request):
-            if not mechanize.DefaultCookiePolicy.set_ok(
-                self, cookie, request):
-                return False
-            if i_dont_want_to_store_this_cookie():
-                return False
-            return True
-
-    In addition to the features required to implement the CookiePolicy
-    interface, this class allows you to block and allow domains from setting
-    and receiving cookies.  There are also some strictness switches that allow
-    you to tighten up the rather loose Netscape protocol rules a little bit (at
-    the cost of blocking some benign cookies).
-
-    A domain blacklist and whitelist is provided (both off by default).  Only
-    domains not in the blacklist and present in the whitelist (if the whitelist
-    is active) participate in cookie setting and returning.  Use the
-    blocked_domains constructor argument, and blocked_domains and
-    set_blocked_domains methods (and the corresponding argument and methods for
-    allowed_domains).  If you set a whitelist, you can turn it off again by
-    setting it to None.
-
-    Domains in block or allow lists that do not start with a dot must
-    string-compare equal.  For example, "acme.com" matches a blacklist entry of
-    "acme.com", but "www.acme.com" does not.  Domains that do start with a dot
-    are matched by more specific domains too.  For example, both "www.acme.com"
-    and "www.munitions.acme.com" match ".acme.com" (but "acme.com" itself does
-    not).  IP addresses are an exception, and must match exactly.  For example,
-    if blocked_domains contains "192.168.1.2" and ".168.1.2" 192.168.1.2 is
-    blocked, but 193.168.1.2 is not.
-
-    Additional Public Attributes:
-
-    General strictness switches
-
-    strict_domain: don't allow sites to set two-component domains with
-     country-code top-level domains like .co.uk, .gov.uk, .co.nz. etc.
-     This is far from perfect and isn't guaranteed to work!
-
-    RFC 2965 protocol strictness switches
-
-    strict_rfc2965_unverifiable: follow RFC 2965 rules on unverifiable
-     transactions (usually, an unverifiable transaction is one resulting from
-     a redirect or an image hosted on another site); if this is false, cookies
-     are NEVER blocked on the basis of verifiability
-
-    Netscape protocol strictness switches
-
-    strict_ns_unverifiable: apply RFC 2965 rules on unverifiable transactions
-     even to Netscape cookies
-    strict_ns_domain: flags indicating how strict to be with domain-matching
-     rules for Netscape cookies:
-      DomainStrictNoDots: when setting cookies, host prefix must not contain a
-       dot (eg. www.foo.bar.com can't set a cookie for .bar.com, because
-       www.foo contains a dot)
-      DomainStrictNonDomain: cookies that did not explicitly specify a Domain
-       cookie-attribute can only be returned to a domain that string-compares
-       equal to the domain that set the cookie (eg. rockets.acme.com won't
-       be returned cookies from acme.com that had no Domain cookie-attribute)
-      DomainRFC2965Match: when setting cookies, require a full RFC 2965
-       domain-match
-      DomainLiberal and DomainStrict are the most useful combinations of the
-       above flags, for convenience
-    strict_ns_set_initial_dollar: ignore cookies in Set-Cookie: headers that
-     have names starting with '$'
-    strict_ns_set_path: don't allow setting cookies whose path doesn't
-     path-match request URI
-
-    """
-
-    DomainStrictNoDots = 1
-    DomainStrictNonDomain = 2
-    DomainRFC2965Match = 4
-
-    DomainLiberal = 0
-    DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
-
-    def __init__(self,
-                 blocked_domains=None, allowed_domains=None,
-                 netscape=True, rfc2965=False,
-                 # WARNING: this argument will change or go away if is not
-                 # accepted into the Python standard library in this form!
-                 # default, ie. treat 2109 as netscape iff not rfc2965
-                 rfc2109_as_netscape=None,
-                 hide_cookie2=False,
-                 strict_domain=False,
-                 strict_rfc2965_unverifiable=True,
-                 strict_ns_unverifiable=False,
-                 strict_ns_domain=DomainLiberal,
-                 strict_ns_set_initial_dollar=False,
-                 strict_ns_set_path=False,
-                 ):
-        """
-        Constructor arguments should be used as keyword arguments only.
-
-        blocked_domains: sequence of domain names that we never accept cookies
-         from, nor return cookies to
-        allowed_domains: if not None, this is a sequence of the only domains
-         for which we accept and return cookies
-
-        For other arguments, see CookiePolicy.__doc__ and
-        DefaultCookiePolicy.__doc__..
-
-        """
-        self.netscape = netscape
-        self.rfc2965 = rfc2965
-        self.rfc2109_as_netscape = rfc2109_as_netscape
-        self.hide_cookie2 = hide_cookie2
-        self.strict_domain = strict_domain
-        self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
-        self.strict_ns_unverifiable = strict_ns_unverifiable
-        self.strict_ns_domain = strict_ns_domain
-        self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
-        self.strict_ns_set_path = strict_ns_set_path
-
-        if blocked_domains is not None:
-            self._blocked_domains = tuple(blocked_domains)
-        else:
-            self._blocked_domains = ()
-
-        if allowed_domains is not None:
-            allowed_domains = tuple(allowed_domains)
-        self._allowed_domains = allowed_domains
-
-    def blocked_domains(self):
-        """Return the sequence of blocked domains (as a tuple)."""
-        return self._blocked_domains
-    def set_blocked_domains(self, blocked_domains):
-        """Set the sequence of blocked domains."""
-        self._blocked_domains = tuple(blocked_domains)
-
-    def is_blocked(self, domain):
-        for blocked_domain in self._blocked_domains:
-            if user_domain_match(domain, blocked_domain):
-                return True
-        return False
-
-    def allowed_domains(self):
-        """Return None, or the sequence of allowed domains (as a tuple)."""
-        return self._allowed_domains
-    def set_allowed_domains(self, allowed_domains):
-        """Set the sequence of allowed domains, or None."""
-        if allowed_domains is not None:
-            allowed_domains = tuple(allowed_domains)
-        self._allowed_domains = allowed_domains
-
-    def is_not_allowed(self, domain):
-        if self._allowed_domains is None:
-            return False
-        for allowed_domain in self._allowed_domains:
-            if user_domain_match(domain, allowed_domain):
-                return False
-        return True
-
-    def set_ok(self, cookie, request):
-        """
-        If you override set_ok, be sure to call this method.  If it returns
-        false, so should your subclass (assuming your subclass wants to be more
-        strict about which cookies to accept).
-
-        """
-        debug(" - checking cookie %s", cookie)
-
-        assert cookie.name is not None
-
-        for n in "version", "verifiability", "name", "path", "domain", "port":
-            fn_name = "set_ok_"+n
-            fn = getattr(self, fn_name)
-            if not fn(cookie, request):
-                return False
-
-        return True
-
-    def set_ok_version(self, cookie, request):
-        if cookie.version is None:
-            # Version is always set to 0 by parse_ns_headers if it's a Netscape
-            # cookie, so this must be an invalid RFC 2965 cookie.
-            debug("   Set-Cookie2 without version attribute (%s)", cookie)
-            return False
-        if cookie.version > 0 and not self.rfc2965:
-            debug("   RFC 2965 cookies are switched off")
-            return False
-        elif cookie.version == 0 and not self.netscape:
-            debug("   Netscape cookies are switched off")
-            return False
-        return True
-
-    def set_ok_verifiability(self, cookie, request):
-        if request_is_unverifiable(request) and is_third_party(request):
-            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
-                debug("   third-party RFC 2965 cookie during "
-                             "unverifiable transaction")
-                return False
-            elif cookie.version == 0 and self.strict_ns_unverifiable:
-                debug("   third-party Netscape cookie during "
-                             "unverifiable transaction")
-                return False
-        return True
-
-    def set_ok_name(self, cookie, request):
-        # Try and stop servers setting V0 cookies designed to hack other
-        # servers that know both V0 and V1 protocols.
-        if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
-            cookie.name.startswith("$")):
-            debug("   illegal name (starts with '$'): '%s'", cookie.name)
-            return False
-        return True
-
-    def set_ok_path(self, cookie, request):
-        if cookie.path_specified:
-            req_path = request_path(request)
-            if ((cookie.version > 0 or
-                 (cookie.version == 0 and self.strict_ns_set_path)) and
-                not req_path.startswith(cookie.path)):
-                debug("   path attribute %s is not a prefix of request "
-                      "path %s", cookie.path, req_path)
-                return False
-        return True
-
-    def set_ok_countrycode_domain(self, cookie, request):
-        """Return False if explicit cookie domain is not acceptable.
-
-        Called by set_ok_domain, for convenience of overriding by
-        subclasses.
-
-        """
-        if cookie.domain_specified and self.strict_domain:
-            domain = cookie.domain
-            # since domain was specified, we know that:
-            assert domain.startswith(".")
-            if domain.count(".") == 2:
-                # domain like .foo.bar
-                i = domain.rfind(".")
-                tld = domain[i+1:]
-                sld = domain[1:i]
-                if (sld.lower() in [
-                    "co", "ac",
-                    "com", "edu", "org", "net", "gov", "mil", "int",
-                    "aero", "biz", "cat", "coop", "info", "jobs", "mobi",
-                    "museum", "name", "pro", "travel",
-                    ] and
-                    len(tld) == 2):
-                    # domain like .co.uk
-                    return False
-        return True
-
-    def set_ok_domain(self, cookie, request):
-        if self.is_blocked(cookie.domain):
-            debug("   domain %s is in user block-list", cookie.domain)
-            return False
-        if self.is_not_allowed(cookie.domain):
-            debug("   domain %s is not in user allow-list", cookie.domain)
-            return False
-        if not self.set_ok_countrycode_domain(cookie, request):
-            debug("   country-code second level domain %s", cookie.domain)
-            return False
-        if cookie.domain_specified:
-            req_host, erhn = eff_request_host_lc(request)
-            domain = cookie.domain
-            if domain.startswith("."):
-                undotted_domain = domain[1:]
-            else:
-                undotted_domain = domain
-            embedded_dots = (undotted_domain.find(".") >= 0)
-            if not embedded_dots and domain != ".local":
-                debug("   non-local domain %s contains no embedded dot",
-                      domain)
-                return False
-            if cookie.version == 0:
-                if (not erhn.endswith(domain) and
-                    (not erhn.startswith(".") and
-                     not ("."+erhn).endswith(domain))):
-                    debug("   effective request-host %s (even with added "
-                          "initial dot) does not end end with %s",
-                          erhn, domain)
-                    return False
-            if (cookie.version > 0 or
-                (self.strict_ns_domain & self.DomainRFC2965Match)):
-                if not domain_match(erhn, domain):
-                    debug("   effective request-host %s does not domain-match "
-                          "%s", erhn, domain)
-                    return False
-            if (cookie.version > 0 or
-                (self.strict_ns_domain & self.DomainStrictNoDots)):
-                host_prefix = req_host[:-len(domain)]
-                if (host_prefix.find(".") >= 0 and
-                    not IPV4_RE.search(req_host)):
-                    debug("   host prefix %s for domain %s contains a dot",
-                          host_prefix, domain)
-                    return False
-        return True
-
-    def set_ok_port(self, cookie, request):
-        if cookie.port_specified:
-            req_port = request_port(request)
-            if req_port is None:
-                req_port = "80"
-            else:
-                req_port = str(req_port)
-            for p in cookie.port.split(","):
-                try:
-                    int(p)
-                except ValueError:
-                    debug("   bad port %s (not numeric)", p)
-                    return False
-                if p == req_port:
-                    break
-            else:
-                debug("   request port (%s) not found in %s",
-                      req_port, cookie.port)
-                return False
-        return True
-
-    def return_ok(self, cookie, request):
-        """
-        If you override return_ok, be sure to call this method.  If it returns
-        false, so should your subclass (assuming your subclass wants to be more
-        strict about which cookies to return).
-
-        """
-        # Path has already been checked by path_return_ok, and domain blocking
-        # done by domain_return_ok.
-        debug(" - checking cookie %s", cookie)
-
-        for n in ("version", "verifiability", "secure", "expires", "port",
-                  "domain"):
-            fn_name = "return_ok_"+n
-            fn = getattr(self, fn_name)
-            if not fn(cookie, request):
-                return False
-        return True
-
-    def return_ok_version(self, cookie, request):
-        if cookie.version > 0 and not self.rfc2965:
-            debug("   RFC 2965 cookies are switched off")
-            return False
-        elif cookie.version == 0 and not self.netscape:
-            debug("   Netscape cookies are switched off")
-            return False
-        return True
-
-    def return_ok_verifiability(self, cookie, request):
-        if request_is_unverifiable(request) and is_third_party(request):
-            if cookie.version > 0 and self.strict_rfc2965_unverifiable:
-                debug("   third-party RFC 2965 cookie during unverifiable "
-                      "transaction")
-                return False
-            elif cookie.version == 0 and self.strict_ns_unverifiable:
-                debug("   third-party Netscape cookie during unverifiable "
-                      "transaction")
-                return False
-        return True
-
-    def return_ok_secure(self, cookie, request):
-        if cookie.secure and request.get_type() != "https":
-            debug("   secure cookie with non-secure request")
-            return False
-        return True
-
-    def return_ok_expires(self, cookie, request):
-        if cookie.is_expired(self._now):
-            debug("   cookie expired")
-            return False
-        return True
-
-    def return_ok_port(self, cookie, request):
-        if cookie.port:
-            req_port = request_port(request)
-            if req_port is None:
-                req_port = "80"
-            for p in cookie.port.split(","):
-                if p == req_port:
-                    break
-            else:
-                debug("   request port %s does not match cookie port %s",
-                      req_port, cookie.port)
-                return False
-        return True
-
-    def return_ok_domain(self, cookie, request):
-        req_host, erhn = eff_request_host_lc(request)
-        domain = cookie.domain
-
-        # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
-        if (cookie.version == 0 and
-            (self.strict_ns_domain & self.DomainStrictNonDomain) and
-            not cookie.domain_specified and domain != erhn):
-            debug("   cookie with unspecified domain does not string-compare "
-                  "equal to request domain")
-            return False
-
-        if cookie.version > 0 and not domain_match(erhn, domain):
-            debug("   effective request-host name %s does not domain-match "
-                  "RFC 2965 cookie domain %s", erhn, domain)
-            return False
-        if cookie.version == 0 and not ("."+erhn).endswith(domain):
-            debug("   request-host %s does not match Netscape cookie domain "
-                  "%s", req_host, domain)
-            return False
-        return True
-
-    def domain_return_ok(self, domain, request):
-        # Liberal check of domain.  This is here as an optimization to avoid
-        # having to load lots of MSIE cookie files unless necessary.
-
-        # Munge req_host and erhn to always start with a dot, so as to err on
-        # the side of letting cookies through.
-        dotted_req_host, dotted_erhn = eff_request_host_lc(request)
-        if not dotted_req_host.startswith("."):
-            dotted_req_host = "."+dotted_req_host
-        if not dotted_erhn.startswith("."):
-            dotted_erhn = "."+dotted_erhn
-        if not (dotted_req_host.endswith(domain) or
-                dotted_erhn.endswith(domain)):
-            #debug("   request domain %s does not match cookie domain %s",
-            #      req_host, domain)
-            return False
-
-        if self.is_blocked(domain):
-            debug("   domain %s is in user block-list", domain)
-            return False
-        if self.is_not_allowed(domain):
-            debug("   domain %s is not in user allow-list", domain)
-            return False
-
-        return True
-
-    def path_return_ok(self, path, request):
-        debug("- checking cookie path=%s", path)
-        req_path = request_path(request)
-        if not req_path.startswith(path):
-            debug("  %s does not path-match %s", req_path, path)
-            return False
-        return True
-
-
-def vals_sorted_by_key(adict):
-    keys = adict.keys()
-    keys.sort()
-    return map(adict.get, keys)
-
-class MappingIterator:
-    """Iterates over nested mapping, depth-first, in sorted order by key."""
-    def __init__(self, mapping):
-        self._s = [(vals_sorted_by_key(mapping), 0, None)]  # LIFO stack
-
-    def __iter__(self): return self
-
-    def next(self):
-        # this is hairy because of lack of generators
-        while 1:
-            try:
-                vals, i, prev_item = self._s.pop()
-            except IndexError:
-                raise StopIteration()
-            if i < len(vals):
-                item = vals[i]
-                i = i + 1
-                self._s.append((vals, i, prev_item))
-                try:
-                    item.items
-                except AttributeError:
-                    # non-mapping
-                    break
-                else:
-                    # mapping
-                    self._s.append((vals_sorted_by_key(item), 0, item))
-                    continue
-        return item
-
-
-# Used as second parameter to dict.get method, to distinguish absent
-# dict key from one with a None value.
-class Absent: pass
 
-class CookieJar:
-    """Collection of HTTP cookies.
+def cookies_equal(a, b):
+    return all(getattr(a, x) == getattr(b, x) for x in Cookie._attrs)
 
-    You may not need to know about this class: try mechanize.urlopen().
 
-    The major methods are extract_cookies and add_cookie_header; these are all
-    you are likely to need.
+class Cookie(_Cookie):
+    _attrs = ("version", "name", "value", "port", "port_specified", "domain",
+              "domain_specified", "domain_initial_dot", "path",
+              "path_specified", "secure", "expires", "discard", "comment",
+              "comment_url", "rfc2109", "_rest")
 
-    CookieJar supports the iterator protocol:
+    def __eq__(self, other):
+        return all(getattr(self, a) == getattr(other, a) for a in self._attrs)
 
-    for cookie in cookiejar:
-        # do something with cookie
+    def __ne__(self, other):
+        return not (self == other)
 
-    Methods:
 
-    add_cookie_header(request)
-    extract_cookies(response, request)
-    get_policy()
-    set_policy(policy)
-    cookies_for_request(request)
-    make_cookies(response, request)
-    set_cookie_if_ok(cookie, request)
-    set_cookie(cookie)
-    clear_session_cookies()
-    clear_expired_cookies()
-    clear(domain=None, path=None, name=None)
+class CookieJar(CJ):
 
-    Public attributes
+    def __getstate__(self):
+        ans = self.__dict__.copy()
+        del ans['_cookies_lock']
+        return ans
 
-    policy: CookiePolicy object
-
-    """
-
-    non_word_re = re.compile(r"\W")
-    quote_re = re.compile(r"([\"\\])")
-    strict_domain_re = re.compile(r"\.?[^.]*")
-    domain_re = re.compile(r"[^.]*")
-    dots_re = re.compile(r"^\.+")
-
-    def __init__(self, policy=None):
-        """
-        See CookieJar.__doc__ for argument documentation.
-
-        """
-        if policy is None:
-            policy = DefaultCookiePolicy()
-        self._policy = policy
-
-        self._cookies = {}
-
-        # for __getitem__ iteration in pre-2.2 Pythons
-        self._prev_getitem_index = 0
-
-    def get_policy(self):
-        return self._policy
-
-    def set_policy(self, policy):
-        self._policy = policy
-
-    def _cookies_for_domain(self, domain, request):
-        cookies = []
-        if not self._policy.domain_return_ok(domain, request):
-            return []
-        debug("Checking %s for cookies to return", domain)
-        cookies_by_path = self._cookies[domain]
-        for path in cookies_by_path.keys():
-            if not self._policy.path_return_ok(path, request):
-                continue
-            cookies_by_name = cookies_by_path[path]
-            for cookie in cookies_by_name.values():
-                if not self._policy.return_ok(cookie, request):
-                    debug("   not returning cookie")
-                    continue
-                debug("   it's a match")
-                cookies.append(cookie)
-        return cookies
+    def __setstate__(self, val):
+        for k, v in iteritems(val):
+            setattr(self, k, v)
 
     def cookies_for_request(self, request):
         """Return a list of cookies to be returned to server.
@@ -1067,127 +68,20 @@ def cookies_for_request(self, request):
 
         See add_cookie_header.__doc__ for the interface required of the
         request argument.
-
-        New in version 0.1.10
-
-        """
-        self._policy._now = self._now = int(time.time())
-        cookies = self._cookies_for_request(request)
-        # add cookies in order of most specific (i.e. longest) path first
-        def decreasing_size(a, b): return cmp(len(b.path), len(a.path))
-        cookies.sort(decreasing_size)
-        return cookies
-
-    def _cookies_for_request(self, request):
-        """Return a list of cookies to be returned to server."""
-        # this method still exists (alongside cookies_for_request) because it
-        # is part of an implied protected interface for subclasses of cookiejar
-        # XXX document that implied interface, or provide another way of
-        # implementing cookiejars than subclassing
-        cookies = []
-        for domain in self._cookies.keys():
-            cookies.extend(self._cookies_for_domain(domain, request))
-        return cookies
-
-    def _cookie_attrs(self, cookies):
-        """Return a list of cookie-attributes to be returned to server.
-
-        The $Version attribute is also added when appropriate (currently only
-        once per request).
-
-        >>> jar = CookieJar()
-        >>> ns_cookie = Cookie(0, "foo", '"bar"', None, False,
-        ...                   "example.com", False, False,
-        ...                   "/", False, False, None, True,
-        ...                   None, None, {})
-        >>> jar._cookie_attrs([ns_cookie])
-        ['foo="bar"']
-        >>> rfc2965_cookie = Cookie(1, "foo", "bar", None, False,
-        ...                         ".example.com", True, False,
-        ...                         "/", False, False, None, True,
-        ...                         None, None, {})
-        >>> jar._cookie_attrs([rfc2965_cookie])
-        ['$Version=1', 'foo=bar', '$Domain="example.com"']
-
         """
-        version_set = False
-
-        attrs = []
-        for cookie in cookies:
-            # set version of Cookie header
-            # XXX
-            # What should it be if multiple matching Set-Cookie headers have
-            #  different versions themselves?
-            # Answer: there is no answer; was supposed to be settled by
-            #  RFC 2965 errata, but that may never appear...
-            version = cookie.version
-            if not version_set:
-                version_set = True
-                if version > 0:
-                    attrs.append("$Version=%s" % version)
+        with self._cookies_lock:
+            self._policy._now = self._now = int(time.time())
+            cookies = self._cookies_for_request(request)
 
-            # quote cookie value if necessary
-            # (not for Netscape protocol, which already has any quotes
-            #  intact, due to the poorly-specified Netscape Cookie: syntax)
-            if ((cookie.value is not None) and
-                self.non_word_re.search(cookie.value) and version > 0):
-                value = self.quote_re.sub(r"\\\1", cookie.value)
-            else:
-                value = cookie.value
+            # add cookies in order of most specific (i.e. longest) path first
+            def key(x):
+                return len(x.path)
 
-            # add cookie-attributes to be returned in Cookie header
-            if cookie.value is None:
-                attrs.append(cookie.name)
-            else:
-                attrs.append("%s=%s" % (cookie.name, value))
-            if version > 0:
-                if cookie.path_specified:
-                    attrs.append('$Path="%s"' % cookie.path)
-                if cookie.domain.startswith("."):
-                    domain = cookie.domain
-                    if (not cookie.domain_initial_dot and
-                        domain.startswith(".")):
-                        domain = domain[1:]
-                    attrs.append('$Domain="%s"' % domain)
-                if cookie.port is not None:
-                    p = "$Port"
-                    if cookie.port_specified:
-                        p = p + ('="%s"' % cookie.port)
-                    attrs.append(p)
+            cookies.sort(key=key, reverse=True)
+            return cookies
 
-        return attrs
-
-    def add_cookie_header(self, request):
-        """Add correct Cookie: header to request (urllib2.Request object).
-
-        The Cookie2 header is also added unless policy.hide_cookie2 is true.
-
-        The request object (usually a urllib2.Request instance) must support
-        the methods get_full_url, get_host, is_unverifiable, get_type,
-        has_header, get_header, header_items and add_unredirected_header, as
-        documented by urllib2, and the port attribute (the port number).
-        Actually, RequestUpgradeProcessor will automatically upgrade your
-        Request object to one with has_header, get_header, header_items and
-        add_unredirected_header, if it lacks those methods, for compatibility
-        with pre-2.4 versions of urllib2.
-
-        """
-        debug("add_cookie_header")
-        cookies = self.cookies_for_request(request)
-
-        attrs = self._cookie_attrs(cookies)
-        if attrs:
-            if not request.has_header("Cookie"):
-                request.add_unredirected_header("Cookie", "; ".join(attrs))
-
-        # if necessary, advertise that we know RFC 2965
-        if self._policy.rfc2965 and not self._policy.hide_cookie2:
-            for cookie in cookies:
-                if cookie.version != 1 and not request.has_header("Cookie2"):
-                    request.add_unredirected_header("Cookie2", '$Version="1"')
-                    break
-
-        self.clear_expired_cookies()
+    def get_policy(self):
+        return self._policy
 
     def _normalized_cookie_tuples(self, attrs_set):
         """Return list of tuples containing normalised cookie information.
@@ -1205,10 +99,8 @@ def _normalized_cookie_tuples(self, attrs_set):
         cookie_tuples = []
 
         boolean_attrs = "discard", "secure"
-        value_attrs = ("version",
-                       "expires", "max-age",
-                       "domain", "path", "port",
-                       "comment", "commenturl")
+        value_attrs = ("version", "expires", "max-age", "domain", "path",
+                       "port", "comment", "commenturl")
 
         for cookie_attrs in attrs_set:
             name, value = cookie_attrs[0]
@@ -1236,12 +128,12 @@ def _normalized_cookie_tuples(self, attrs_set):
                     # boolean cookie-attribute is present, but has no value
                     # (like "discard", rather than "port=80")
                     v = True
-                if standard.has_key(k):
+                if k in standard:
                     # only first value is significant
                     continue
                 if k == "domain":
                     if v is None:
-                        debug("   missing value for domain attribute")
+                        _debug("   missing value for domain attribute")
                         bad_cookie = True
                         break
                     # RFC 2965 section 3.3.3
@@ -1251,32 +143,31 @@ def _normalized_cookie_tuples(self, attrs_set):
                         # Prefer max-age to expires (like Mozilla)
                         continue
                     if v is None:
-                        debug("   missing or invalid value for expires "
-                              "attribute: treating as session cookie")
+                        _debug("   missing or invalid value for expires "
+                               "attribute: treating as session cookie")
                         continue
                 if k == "max-age":
                     max_age_set = True
-                    if v is None:
-                        debug("   missing value for max-age attribute")
-                        bad_cookie = True
-                        break
                     try:
                         v = int(v)
                     except ValueError:
-                        debug("   missing or invalid (non-numeric) value for "
-                              "max-age attribute")
+                        _debug("   missing or invalid (non-numeric) value for "
+                               "max-age attribute")
                         bad_cookie = True
                         break
                     # convert RFC 2965 Max-Age to seconds since epoch
                     # XXX Strictly you're supposed to follow RFC 2616
-                    #   age-calculation rules.  Remember that zero Max-Age is a
+                    #   age-calculation rules.  Remember that zero Max-Age
                     #   is a request to discard (old and new) cookie, though.
                     k = "expires"
                     v = self._now + v
+                if not v and k == 'path':
+                    # Added by Kovid, not in stdlib implementation
+                    v = '/'
                 if (k in value_attrs) or (k in boolean_attrs):
                     if (v is None and
-                        k not in ["port", "comment", "commenturl"]):
-                        debug("   missing value for %s attribute" % k)
+                            k not in ("port", "comment", "commenturl")):
+                        _debug("   missing value for %s attribute" % k)
                         bad_cookie = True
                         break
                     standard[k] = v
@@ -1290,418 +181,81 @@ def _normalized_cookie_tuples(self, attrs_set):
 
         return cookie_tuples
 
-    def _cookie_from_cookie_tuple(self, tup, request):
-        # standard is dict of standard cookie-attributes, rest is dict of the
-        # rest of them
-        name, value, standard, rest = tup
-
-        domain = standard.get("domain", Absent)
-        path = standard.get("path", Absent)
-        port = standard.get("port", Absent)
-        expires = standard.get("expires", Absent)
-
-        # set the easy defaults
-        version = standard.get("version", None)
-        if version is not None:
-            try:
-                version = int(version)
-            except ValueError:
-                return None  # invalid version, ignore cookie
-        secure = standard.get("secure", False)
-        # (discard is also set if expires is Absent)
-        discard = standard.get("discard", False)
-        comment = standard.get("comment", None)
-        comment_url = standard.get("commenturl", None)
-
-        # set default path
-        if path is not Absent and path != "":
-            path_specified = True
-            path = escape_path(path)
-        else:
-            path_specified = False
-            path = request_path(request)
-            i = path.rfind("/")
-            if i != -1:
-                if version == 0:
-                    # Netscape spec parts company from reality here
-                    path = path[:i]
-                else:
-                    path = path[:i+1]
-            if len(path) == 0: path = "/"
-
-        # set default domain
-        domain_specified = domain is not Absent
-        # but first we have to remember whether it starts with a dot
-        domain_initial_dot = False
-        if domain_specified:
-            domain_initial_dot = bool(domain.startswith("."))
-        if domain is Absent:
-            req_host, erhn = eff_request_host_lc(request)
-            domain = erhn
-        elif not domain.startswith("."):
-            domain = "."+domain
-
-        # set default port
-        port_specified = False
-        if port is not Absent:
-            if port is None:
-                # Port attr present, but has no value: default to request port.
-                # Cookie should then only be sent back on that port.
-                port = request_port(request)
-            else:
-                port_specified = True
-                port = re.sub(r"\s+", "", port)
-        else:
-            # No port attr present.  Cookie can be sent back on any port.
-            port = None
-
-        # set default expires and discard
-        if expires is Absent:
-            expires = None
-            discard = True
-
-        return Cookie(version,
-                      name, value,
-                      port, port_specified,
-                      domain, domain_specified, domain_initial_dot,
-                      path, path_specified,
-                      secure,
-                      expires,
-                      discard,
-                      comment,
-                      comment_url,
-                      rest)
-
-    def _cookies_from_attrs_set(self, attrs_set, request):
-        cookie_tuples = self._normalized_cookie_tuples(attrs_set)
-
-        cookies = []
-        for tup in cookie_tuples:
-            cookie = self._cookie_from_cookie_tuple(tup, request)
-            if cookie: cookies.append(cookie)
-        return cookies
-
-    def _process_rfc2109_cookies(self, cookies):
-        if self._policy.rfc2109_as_netscape is None:
-            rfc2109_as_netscape = not self._policy.rfc2965
-        else:
-            rfc2109_as_netscape = self._policy.rfc2109_as_netscape
-        for cookie in cookies:
-            if cookie.version == 1:
-                cookie.rfc2109 = True
-                if rfc2109_as_netscape: 
-                    # treat 2109 cookies as Netscape cookies rather than
-                    # as RFC2965 cookies
-                    cookie.version = 0
-
-    def _make_cookies(self, response, request):
-        # get cookie-attributes for RFC 2965 and Netscape protocols
-        headers = response.info()
-        rfc2965_hdrs = headers.getheaders("Set-Cookie2")
-        ns_hdrs = headers.getheaders("Set-Cookie")
-
-        rfc2965 = self._policy.rfc2965
-        netscape = self._policy.netscape
-
-        if ((not rfc2965_hdrs and not ns_hdrs) or
-            (not ns_hdrs and not rfc2965) or
-            (not rfc2965_hdrs and not netscape) or
-            (not netscape and not rfc2965)):
-            return []  # no relevant cookie headers: quick exit
-
-        try:
-            cookies = self._cookies_from_attrs_set(
-                split_header_words(rfc2965_hdrs), request)
-        except:
-            reraise_unmasked_exceptions()
-            cookies = []
-
-        if ns_hdrs and netscape:
-            try:
-                # RFC 2109 and Netscape cookies
-                ns_cookies = self._cookies_from_attrs_set(
-                    parse_ns_headers(ns_hdrs), request)
-            except:
-                reraise_unmasked_exceptions()
-                ns_cookies = []
-            self._process_rfc2109_cookies(ns_cookies)
-
-            # Look for Netscape cookies (from Set-Cookie headers) that match
-            # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
-            # For each match, keep the RFC 2965 cookie and ignore the Netscape
-            # cookie (RFC 2965 section 9.1).  Actually, RFC 2109 cookies are
-            # bundled in with the Netscape cookies for this purpose, which is
-            # reasonable behaviour.
-            if rfc2965:
-                lookup = {}
-                for cookie in cookies:
-                    lookup[(cookie.domain, cookie.path, cookie.name)] = None
-
-                def no_matching_rfc2965(ns_cookie, lookup=lookup):
-                    key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
-                    return not lookup.has_key(key)
-                ns_cookies = filter(no_matching_rfc2965, ns_cookies)
-
-            if ns_cookies:
-                cookies.extend(ns_cookies)
-
-        return cookies
-
-    def make_cookies(self, response, request):
-        """Return sequence of Cookie objects extracted from response object.
-
-        See extract_cookies.__doc__ for the interface required of the
-        response and request arguments.
-
-        """
-        self._policy._now = self._now = int(time.time())
-        return [cookie for cookie in self._make_cookies(response, request)
-                if cookie.expires is None or not cookie.expires <= self._now]
-
-    def set_cookie_if_ok(self, cookie, request):
-        """Set a cookie if policy says it's OK to do so.
-
-        cookie: mechanize.Cookie instance
-        request: see extract_cookies.__doc__ for the required interface
-
-        """
-        self._policy._now = self._now = int(time.time())
-
-        if self._policy.set_ok(cookie, request):
-            self.set_cookie(cookie)
-
-    def set_cookie(self, cookie):
-        """Set a cookie, without checking whether or not it should be set.
-
-        cookie: mechanize.Cookie instance
-        """
-        c = self._cookies
-        if not c.has_key(cookie.domain): c[cookie.domain] = {}
-        c2 = c[cookie.domain]
-        if not c2.has_key(cookie.path): c2[cookie.path] = {}
-        c3 = c2[cookie.path]
-        c3[cookie.name] = cookie
-
-    def extract_cookies(self, response, request):
-        """Extract cookies from response, where allowable given the request.
-
-        Look for allowable Set-Cookie: and Set-Cookie2: headers in the response
-        object passed as argument.  Any of these headers that are found are
-        used to update the state of the object (subject to the policy.set_ok
-        method's approval).
-
-        The response object (usually be the result of a call to
-        mechanize.urlopen, or similar) should support an info method, which
-        returns a mimetools.Message object (in fact, the 'mimetools.Message
-        object' may be any object that provides a getheaders method).
-
-        The request object (usually a urllib2.Request instance) must support
-        the methods get_full_url, get_type, get_host, and is_unverifiable, as
-        documented by urllib2, and the port attribute (the port number).  The
-        request is used to set default values for cookie-attributes as well as
-        for checking that the cookie is OK to be set.
-
-        """
-        debug("extract_cookies: %s", response.info())
-        self._policy._now = self._now = int(time.time())
-
-        for cookie in self._make_cookies(response, request):
-            if cookie.expires is not None and cookie.expires <= self._now:
-                # Expiry date in past is request to delete cookie.  This can't be
-                # in DefaultCookiePolicy, because can't delete cookies there.
-                try:
-                    self.clear(cookie.domain, cookie.path, cookie.name)
-                except KeyError:
-                    pass
-                debug("Expiring cookie, domain='%s', path='%s', name='%s'",
-                      cookie.domain, cookie.path, cookie.name)
-            elif self._policy.set_ok(cookie, request):
-                debug(" setting cookie: %s", cookie)
-                self.set_cookie(cookie)
-
-    def clear(self, domain=None, path=None, name=None):
-        """Clear some cookies.
-
-        Invoking this method without arguments will clear all cookies.  If
-        given a single argument, only cookies belonging to that domain will be
-        removed.  If given two arguments, cookies belonging to the specified
-        path within that domain are removed.  If given three arguments, then
-        the cookie with the specified name, path and domain is removed.
-
-        Raises KeyError if no matching cookie exists.
-
-        """
-        if name is not None:
-            if (domain is None) or (path is None):
-                raise ValueError(
-                    "domain and path must be given to remove a cookie by name")
-            del self._cookies[domain][path][name]
-        elif path is not None:
-            if domain is None:
-                raise ValueError(
-                    "domain must be given to remove cookies by path")
-            del self._cookies[domain][path]
-        elif domain is not None:
-            del self._cookies[domain]
-        else:
-            self._cookies = {}
-
-    def clear_session_cookies(self):
-        """Discard all session cookies.
-
-        Discards all cookies held by object which had either no Max-Age or
-        Expires cookie-attribute or an explicit Discard cookie-attribute, or
-        which otherwise have ended up with a true discard attribute.  For
-        interactive browsers, the end of a session usually corresponds to
-        closing the browser window.
-
-        Note that the save method won't save session cookies anyway, unless you
-        ask otherwise by passing a true ignore_discard argument.
-
-        """
-        for cookie in self:
-            if cookie.discard:
-                self.clear(cookie.domain, cookie.path, cookie.name)
-
-    def clear_expired_cookies(self):
-        """Discard all expired cookies.
-
-        You probably don't need to call this method: expired cookies are never
-        sent back to the server (provided you're using DefaultCookiePolicy),
-        this method is called by CookieJar itself every so often, and the save
-        method won't save expired cookies anyway (unless you ask otherwise by
-        passing a true ignore_expires argument).
-
-        """
-        now = time.time()
-        for cookie in self:
-            if cookie.is_expired(now):
-                self.clear(cookie.domain, cookie.path, cookie.name)
-
     def __getitem__(self, i):
-        if i == 0:
-            self._getitem_iterator = self.__iter__()
-        elif self._prev_getitem_index != i-1: raise IndexError(
-            "CookieJar.__getitem__ only supports sequential iteration")
-        self._prev_getitem_index = i
-        try:
-            return self._getitem_iterator.next()
-        except StopIteration:
-            raise IndexError()
-
-    def __iter__(self):
-        return MappingIterator(self._cookies)
-
-    def __len__(self):
-        """Return number of contained cookies."""
-        i = 0
-        for cookie in self: i = i + 1
-        return i
-
-    def __repr__(self):
-        r = []
-        for cookie in self: r.append(repr(cookie))
-        return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
-    def __str__(self):
-        r = []
-        for cookie in self: r.append(str(cookie))
-        return "<%s[%s]>" % (self.__class__, ", ".join(r))
-
-
-class LoadError(Exception): pass
-
-class FileCookieJar(CookieJar):
-    """CookieJar that can be loaded from and saved to a file.
-
-    Additional methods
-
-    save(filename=None, ignore_discard=False, ignore_expires=False)
-    load(filename=None, ignore_discard=False, ignore_expires=False)
-    revert(filename=None, ignore_discard=False, ignore_expires=False)
-
-    Additional public attributes
-
-    filename: filename for loading and saving cookies
-
-    Additional public readable attributes
-
-    delayload: request that cookies are lazily loaded from disk; this is only
-     a hint since this only affects performance, not behaviour (unless the
-     cookies on disk are changing); a CookieJar object may ignore it (in fact,
-     only MSIECookieJar lazily loads cookies at the moment)
-
-    """
-
-    def __init__(self, filename=None, delayload=False, policy=None):
-        """
-        See FileCookieJar.__doc__ for argument documentation.
-
-        Cookies are NOT loaded from the named file until either the load or
-        revert method is called.
-
-        """
-        CookieJar.__init__(self, policy)
-        if filename is not None and not isstringlike(filename):
-            raise ValueError("filename must be string-like")
-        self.filename = filename
-        self.delayload = bool(delayload)
-
-    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
-        """Save cookies to a file.
+        for q, ans in enumerate(self):
+            if q == i:
+                return ans
+        raise IndexError()
 
-        filename: name of file in which to save cookies
-        ignore_discard: save even cookies set to be discarded
-        ignore_expires: save even cookies that have expired
 
-        The file is overwritten if it already exists, thus wiping all its
-        cookies.  Saved cookies can be restored later using the load or revert
-        methods.  If filename is not specified, self.filename is used; if
-        self.filename is None, ValueError is raised.
-
-        """
-        raise NotImplementedError()
-
-    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
-        """Load cookies from a file.
+try:
+    from http.cookiejar import NETSCAPE_MAGIC_RGX, NETSCAPE_HEADER_TEXT
+except ImportError:  # python < 3.10
+    NETSCAPE_MAGIC_RGX = MCJ.magic_re
+    NETSCAPE_HEADER_TEXT = MCJ.header
+else:
+    MCJ.header = NETSCAPE_HEADER_TEXT  # needed for tests
 
-        Old cookies are kept unless overwritten by newly loaded ones.
 
-        Arguments are as for .save().
+class MozillaCookieJar(MCJ):
 
-        If filename is not specified, self.filename is used; if self.filename
-        is None, ValueError is raised.  The named file must be in the format
-        understood by the class, or LoadError will be raised.  This format will
-        be identical to that written by the save method, unless the load format
-        is not sufficiently well understood (as is the case for MSIECookieJar).
+    def _really_load(self, f, filename, ignore_discard, ignore_expires):
+        now = time.time()
 
-        """
-        if filename is None:
-            if self.filename is not None: filename = self.filename
-            else: raise ValueError(MISSING_FILENAME_TEXT)
+        magic = f.readline()
+        if not re.search(NETSCAPE_MAGIC_RGX, magic):
+            f.close()
+            raise LoadError(
+                "%r does not look like a Netscape format cookies file" %
+                filename)
 
-        f = open(filename)
         try:
-            self._really_load(f, filename, ignore_discard, ignore_expires)
-        finally:
-            f.close()
+            while 1:
+                line = f.readline()
+                if line == "":
+                    break
 
-    def revert(self, filename=None,
-               ignore_discard=False, ignore_expires=False):
-        """Clear all cookies and reload cookies from a saved file.
+                # last field may be absent, so keep any trailing tab
+                if line.endswith("\n"):
+                    line = line[:-1]
 
-        Raises LoadError (or IOError) if reversion is not successful; the
-        object's state will not be altered if this happens.
+                # skip comments and blank lines XXX what is $ for?
+                if (line.strip().startswith(("#", "$")) or line.strip() == ""):
+                    continue
 
-        """
-        if filename is None:
-            if self.filename is not None: filename = self.filename
-            else: raise ValueError(MISSING_FILENAME_TEXT)
+                (domain, domain_specified, path, secure, expires, name,
+                 value) = line.split("\t", 6)  # Changed by Kovid
+                secure = (secure == "TRUE")
+                domain_specified = (domain_specified == "TRUE")
+                if name == "":
+                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                    # with no name, whereas cookielib regards it as a
+                    # cookie with no value.
+                    name = value
+                    value = None
+
+                initial_dot = domain.startswith(".")
+                if domain_specified != initial_dot:
+                    raise ValueError()
+                assert domain_specified == initial_dot
+
+                discard = False
+                if expires == "":
+                    expires = None
+                    discard = True
+
+                # assume path_specified is false
+                c = Cookie(0, name, value, None, False, domain,
+                           domain_specified, initial_dot, path, False, secure,
+                           expires, discard, None, None, {})
+                if not ignore_discard and c.discard:
+                    continue
+                if not ignore_expires and c.is_expired(now):
+                    continue
+                self.set_cookie(c)
 
-        old_state = copy.deepcopy(self._cookies)
-        self._cookies = {}
-        try:
-            self.load(filename, ignore_discard, ignore_expires)
-        except (LoadError, IOError):
-            self._cookies = old_state
+        except IOError:
             raise
+        except Exception:
+            raise LoadError("invalid Netscape format cookies file %r: %r" %
+                            (filename, line))
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_debug.py b/samples-and-tests/i-am-a-developer/mechanize/_debug.py
index 596b11477e..4bb1c7ce76 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_debug.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_debug.py
@@ -1,7 +1,9 @@
+from __future__ import absolute_import
+
 import logging
 
-from urllib2 import BaseHandler
-from _response import response_seek_wrapper
+from ._response import response_seek_wrapper
+from ._urllib2_fork import BaseHandler
 
 
 class HTTPResponseDebugProcessor(BaseHandler):
@@ -20,7 +22,9 @@ def http_response(self, request, response):
 
     https_response = http_response
 
+
 class HTTPRedirectDebugProcessor(BaseHandler):
+
     def http_request(self, request):
         if hasattr(request, "redirect_dict"):
             info = logging.getLogger("mechanize.http_redirects").info
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_entities.py b/samples-and-tests/i-am-a-developer/mechanize/_entities.py
new file mode 100644
index 0000000000..50a075cc5f
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/_entities.py
@@ -0,0 +1,1751 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+html5_entities = {
+# ENTITY_DATA {{{
+    'aacute': 'á',
+    'abreve': 'ă',
+    'ac': '∾',
+    'acd': '∿',
+    'ace': '∾̳',
+    'acirc': 'â',
+    'acute': '´',
+    'acy': 'а',
+    'aelig': 'æ',
+    'af': '\u2061',
+    'afr': '𝔞',
+    'agrave': 'à',
+    'alefsym': 'ℵ',
+    'aleph': 'ℵ',
+    'alpha': 'α',
+    'amacr': 'ā',
+    'amalg': '⨿',
+    'amp': '&',
+    'and': '∧',
+    'andand': '⩕',
+    'andd': '⩜',
+    'andslope': '⩘',
+    'andv': '⩚',
+    'ang': '∠',
+    'ange': '⦤',
+    'angle': '∠',
+    'angmsd': '∡',
+    'angmsdaa': '⦨',
+    'angmsdab': '⦩',
+    'angmsdac': '⦪',
+    'angmsdad': '⦫',
+    'angmsdae': '⦬',
+    'angmsdaf': '⦭',
+    'angmsdag': '⦮',
+    'angmsdah': '⦯',
+    'angrt': '∟',
+    'angrtvb': '⊾',
+    'angrtvbd': '⦝',
+    'angsph': '∢',
+    'angst': 'Å',
+    'angzarr': '⍼',
+    'aogon': 'ą',
+    'aopf': '𝕒',
+    'ap': '≈',
+    'apacir': '⩯',
+    'ape': '≊',
+    'apid': '≋',
+    'apos': "'",
+    'applyfunction': '\u2061',
+    'approx': '≈',
+    'approxeq': '≊',
+    'aring': 'å',
+    'ascr': '𝒶',
+    'assign': '≔',
+    'ast': '*',
+    'asymp': '≈',
+    'asympeq': '≍',
+    'atilde': 'ã',
+    'auml': 'ä',
+    'awconint': '∳',
+    'awint': '⨑',
+    'backcong': '≌',
+    'backepsilon': '϶',
+    'backprime': '‵',
+    'backsim': '∽',
+    'backsimeq': '⋍',
+    'backslash': '∖',
+    'barv': '⫧',
+    'barvee': '⊽',
+    'barwed': '⌅',
+    'barwedge': '⌅',
+    'bbrk': '⎵',
+    'bbrktbrk': '⎶',
+    'bcong': '≌',
+    'bcy': 'б',
+    'bdquo': '„',
+    'becaus': '∵',
+    'because': '∵',
+    'bemptyv': '⦰',
+    'bepsi': '϶',
+    'bernou': 'ℬ',
+    'bernoullis': 'ℬ',
+    'beta': 'β',
+    'beth': 'ℶ',
+    'between': '≬',
+    'bfr': '𝔟',
+    'bigcap': '⋂',
+    'bigcirc': '◯',
+    'bigcup': '⋃',
+    'bigodot': '⨀',
+    'bigoplus': '⨁',
+    'bigotimes': '⨂',
+    'bigsqcup': '⨆',
+    'bigstar': '★',
+    'bigtriangledown': '▽',
+    'bigtriangleup': '△',
+    'biguplus': '⨄',
+    'bigvee': '⋁',
+    'bigwedge': '⋀',
+    'bkarow': '⤍',
+    'blacklozenge': '⧫',
+    'blacksquare': '▪',
+    'blacktriangle': '▴',
+    'blacktriangledown': '▾',
+    'blacktriangleleft': '◂',
+    'blacktriangleright': '▸',
+    'blank': '␣',
+    'blk12': '▒',
+    'blk14': '░',
+    'blk34': '▓',
+    'block': '█',
+    'bne': '=⃥',
+    'bnequiv': '≡⃥',
+    'bnot': '⌐',
+    'bopf': '𝕓',
+    'bot': '⊥',
+    'bottom': '⊥',
+    'bowtie': '⋈',
+    'boxbox': '⧉',
+    'boxdl': '┐',
+    'boxdr': '┌',
+    'boxh': '─',
+    'boxhd': '┬',
+    'boxhu': '┴',
+    'boxminus': '⊟',
+    'boxplus': '⊞',
+    'boxtimes': '⊠',
+    'boxul': '┘',
+    'boxur': '└',
+    'boxv': '│',
+    'boxvh': '┼',
+    'boxvl': '┤',
+    'boxvr': '├',
+    'bprime': '‵',
+    'breve': '˘',
+    'brvbar': '¦',
+    'bscr': '𝒷',
+    'bsemi': '⁏',
+    'bsim': '∽',
+    'bsime': '⋍',
+    'bsol': '\\',
+    'bsolb': '⧅',
+    'bsolhsub': '⟈',
+    'bull': '•',
+    'bullet': '•',
+    'bump': '≎',
+    'bumpe': '≏',
+    'bumpeq': '≏',
+    'cacute': 'ć',
+    'cap': '∩',
+    'capand': '⩄',
+    'capbrcup': '⩉',
+    'capcap': '⩋',
+    'capcup': '⩇',
+    'capdot': '⩀',
+    'capitaldifferentiald': 'ⅅ',
+    'caps': '∩︀',
+    'caret': '⁁',
+    'caron': 'ˇ',
+    'cayleys': 'ℭ',
+    'ccaps': '⩍',
+    'ccaron': 'č',
+    'ccedil': 'ç',
+    'ccirc': 'ĉ',
+    'cconint': '∰',
+    'ccups': '⩌',
+    'ccupssm': '⩐',
+    'cdot': 'ċ',
+    'cedil': '¸',
+    'cedilla': '¸',
+    'cemptyv': '⦲',
+    'cent': '¢',
+    'centerdot': '·',
+    'cfr': '𝔠',
+    'chcy': 'ч',
+    'check': '✓',
+    'checkmark': '✓',
+    'chi': 'χ',
+    'cir': '○',
+    'circ': 'ˆ',
+    'circeq': '≗',
+    'circlearrowleft': '↺',
+    'circlearrowright': '↻',
+    'circledast': '⊛',
+    'circledcirc': '⊚',
+    'circleddash': '⊝',
+    'circledot': '⊙',
+    'circledr': '®',
+    'circleds': 'Ⓢ',
+    'circleminus': '⊖',
+    'circleplus': '⊕',
+    'circletimes': '⊗',
+    'cire': '≗',
+    'cirfnint': '⨐',
+    'cirmid': '⫯',
+    'cirscir': '⧂',
+    'clockwisecontourintegral': '∲',
+    'closecurlydoublequote': '”',
+    'closecurlyquote': '’',
+    'clubs': '♣',
+    'clubsuit': '♣',
+    'colon': ':',
+    'colone': '≔',
+    'coloneq': '≔',
+    'comma': ',',
+    'commat': '@',
+    'comp': '∁',
+    'compfn': '∘',
+    'complement': '∁',
+    'complexes': 'ℂ',
+    'cong': '≅',
+    'congdot': '⩭',
+    'congruent': '≡',
+    'conint': '∮',
+    'contourintegral': '∮',
+    'copf': '𝕔',
+    'coprod': '∐',
+    'coproduct': '∐',
+    'copy': '©',
+    'copysr': '℗',
+    'counterclockwisecontourintegral': '∳',
+    'crarr': '↵',
+    'cross': '✗',
+    'cscr': '𝒸',
+    'csub': '⫏',
+    'csube': '⫑',
+    'csup': '⫐',
+    'csupe': '⫒',
+    'ctdot': '⋯',
+    'cudarrl': '⤸',
+    'cudarrr': '⤵',
+    'cuepr': '⋞',
+    'cuesc': '⋟',
+    'cularr': '↶',
+    'cularrp': '⤽',
+    'cup': '∪',
+    'cupbrcap': '⩈',
+    'cupcap': '⩆',
+    'cupcup': '⩊',
+    'cupdot': '⊍',
+    'cupor': '⩅',
+    'cups': '∪︀',
+    'curarr': '↷',
+    'curarrm': '⤼',
+    'curlyeqprec': '⋞',
+    'curlyeqsucc': '⋟',
+    'curlyvee': '⋎',
+    'curlywedge': '⋏',
+    'curren': '¤',
+    'curvearrowleft': '↶',
+    'curvearrowright': '↷',
+    'cuvee': '⋎',
+    'cuwed': '⋏',
+    'cwconint': '∲',
+    'cwint': '∱',
+    'cylcty': '⌭',
+    'dagger': '†',
+    'daleth': 'ℸ',
+    'darr': '↓',
+    'dash': '‐',
+    'dashv': '⊣',
+    'dbkarow': '⤏',
+    'dblac': '˝',
+    'dcaron': 'ď',
+    'dcy': 'д',
+    'dd': 'ⅆ',
+    'ddagger': '‡',
+    'ddarr': '⇊',
+    'ddotrahd': '⤑',
+    'ddotseq': '⩷',
+    'deg': '°',
+    'del': '∇',
+    'delta': 'δ',
+    'demptyv': '⦱',
+    'dfisht': '⥿',
+    'dfr': '𝔡',
+    'dhar': '⥥',
+    'dharl': '⇃',
+    'dharr': '⇂',
+    'diacriticalacute': '´',
+    'diacriticaldot': '˙',
+    'diacriticaldoubleacute': '˝',
+    'diacriticalgrave': '`',
+    'diacriticaltilde': '˜',
+    'diam': '⋄',
+    'diamond': '⋄',
+    'diamondsuit': '♦',
+    'diams': '♦',
+    'die': '¨',
+    'differentiald': 'ⅆ',
+    'digamma': 'ϝ',
+    'disin': '⋲',
+    'div': '÷',
+    'divide': '÷',
+    'divideontimes': '⋇',
+    'divonx': '⋇',
+    'djcy': 'ђ',
+    'dlcorn': '⌞',
+    'dlcrop': '⌍',
+    'dollar': '$',
+    'dopf': '𝕕',
+    'dot': '˙',
+    'dotdot': '⃜',
+    'doteq': '≐',
+    'doteqdot': '≑',
+    'dotequal': '≐',
+    'dotminus': '∸',
+    'dotplus': '∔',
+    'dotsquare': '⊡',
+    'doublebarwedge': '⌆',
+    'doublecontourintegral': '∯',
+    'doubledot': '¨',
+    'doubledownarrow': '⇓',
+    'doubleleftarrow': '⇐',
+    'doubleleftrightarrow': '⇔',
+    'doublelefttee': '⫤',
+    'doublelongleftarrow': '⟸',
+    'doublelongleftrightarrow': '⟺',
+    'doublelongrightarrow': '⟹',
+    'doublerightarrow': '⇒',
+    'doublerighttee': '⊨',
+    'doubleuparrow': '⇑',
+    'doubleupdownarrow': '⇕',
+    'doubleverticalbar': '∥',
+    'downarrow': '↓',
+    'downarrowbar': '⤓',
+    'downarrowuparrow': '⇵',
+    'downbreve': '̑',
+    'downdownarrows': '⇊',
+    'downharpoonleft': '⇃',
+    'downharpoonright': '⇂',
+    'downleftrightvector': '⥐',
+    'downleftteevector': '⥞',
+    'downleftvector': '↽',
+    'downleftvectorbar': '⥖',
+    'downrightteevector': '⥟',
+    'downrightvector': '⇁',
+    'downrightvectorbar': '⥗',
+    'downtee': '⊤',
+    'downteearrow': '↧',
+    'drbkarow': '⤐',
+    'drcorn': '⌟',
+    'drcrop': '⌌',
+    'dscr': '𝒹',
+    'dscy': 'ѕ',
+    'dsol': '⧶',
+    'dstrok': 'đ',
+    'dtdot': '⋱',
+    'dtri': '▿',
+    'dtrif': '▾',
+    'duarr': '⇵',
+    'duhar': '⥯',
+    'dwangle': '⦦',
+    'dzcy': 'џ',
+    'dzigrarr': '⟿',
+    'eacute': 'é',
+    'easter': '⩮',
+    'ecaron': 'ě',
+    'ecir': '≖',
+    'ecirc': 'ê',
+    'ecolon': '≕',
+    'ecy': 'э',
+    'eddot': '⩷',
+    'edot': 'ė',
+    'ee': 'ⅇ',
+    'efdot': '≒',
+    'efr': '𝔢',
+    'eg': '⪚',
+    'egrave': 'è',
+    'egs': '⪖',
+    'egsdot': '⪘',
+    'el': '⪙',
+    'element': '∈',
+    'elinters': '⏧',
+    'ell': 'ℓ',
+    'els': '⪕',
+    'elsdot': '⪗',
+    'emacr': 'ē',
+    'empty': '∅',
+    'emptyset': '∅',
+    'emptysmallsquare': '◻',
+    'emptyv': '∅',
+    'emptyverysmallsquare': '▫',
+    'emsp': '\u2003',
+    'emsp13': '\u2004',
+    'emsp14': '\u2005',
+    'eng': 'ŋ',
+    'ensp': '\u2002',
+    'eogon': 'ę',
+    'eopf': '𝕖',
+    'epar': '⋕',
+    'eparsl': '⧣',
+    'eplus': '⩱',
+    'epsi': 'ε',
+    'epsilon': 'ε',
+    'epsiv': 'ϵ',
+    'eqcirc': '≖',
+    'eqcolon': '≕',
+    'eqsim': '≂',
+    'eqslantgtr': '⪖',
+    'eqslantless': '⪕',
+    'equal': '⩵',
+    'equals': '=',
+    'equaltilde': '≂',
+    'equest': '≟',
+    'equilibrium': '⇌',
+    'equiv': '≡',
+    'equivdd': '⩸',
+    'eqvparsl': '⧥',
+    'erarr': '⥱',
+    'erdot': '≓',
+    'escr': 'ℯ',
+    'esdot': '≐',
+    'esim': '≂',
+    'eta': 'η',
+    'eth': 'ð',
+    'euml': 'ë',
+    'euro': '€',
+    'excl': '!',
+    'exist': '∃',
+    'exists': '∃',
+    'expectation': 'ℰ',
+    'exponentiale': 'ⅇ',
+    'fallingdotseq': '≒',
+    'fcy': 'ф',
+    'female': '♀',
+    'ffilig': 'ﬃ',
+    'fflig': 'ﬀ',
+    'ffllig': 'ﬄ',
+    'ffr': '𝔣',
+    'filig': 'ﬁ',
+    'filledsmallsquare': '◼',
+    'filledverysmallsquare': '▪',
+    'fjlig': 'fj',
+    'flat': '♭',
+    'fllig': 'ﬂ',
+    'fltns': '▱',
+    'fnof': 'ƒ',
+    'fopf': '𝕗',
+    'forall': '∀',
+    'fork': '⋔',
+    'forkv': '⫙',
+    'fouriertrf': 'ℱ',
+    'fpartint': '⨍',
+    'frac12': '½',
+    'frac13': '⅓',
+    'frac14': '¼',
+    'frac15': '⅕',
+    'frac16': '⅙',
+    'frac18': '⅛',
+    'frac23': '⅔',
+    'frac25': '⅖',
+    'frac34': '¾',
+    'frac35': '⅗',
+    'frac38': '⅜',
+    'frac45': '⅘',
+    'frac56': '⅚',
+    'frac58': '⅝',
+    'frac78': '⅞',
+    'frasl': '⁄',
+    'frown': '⌢',
+    'fscr': '𝒻',
+    'gacute': 'ǵ',
+    'gamma': 'γ',
+    'gammad': 'ϝ',
+    'gap': '⪆',
+    'gbreve': 'ğ',
+    'gcedil': 'Ģ',
+    'gcirc': 'ĝ',
+    'gcy': 'г',
+    'gdot': 'ġ',
+    'ge': '≥',
+    'gel': '⋛',
+    'geq': '≥',
+    'geqq': '≧',
+    'geqslant': '⩾',
+    'ges': '⩾',
+    'gescc': '⪩',
+    'gesdot': '⪀',
+    'gesdoto': '⪂',
+    'gesdotol': '⪄',
+    'gesl': '⋛︀',
+    'gesles': '⪔',
+    'gfr': '𝔤',
+    'gg': '≫',
+    'ggg': '⋙',
+    'gimel': 'ℷ',
+    'gjcy': 'ѓ',
+    'gl': '≷',
+    'gla': '⪥',
+    'gle': '⪒',
+    'glj': '⪤',
+    'gnap': '⪊',
+    'gnapprox': '⪊',
+    'gne': '⪈',
+    'gneq': '⪈',
+    'gneqq': '≩',
+    'gnsim': '⋧',
+    'gopf': '𝕘',
+    'grave': '`',
+    'greaterequal': '≥',
+    'greaterequalless': '⋛',
+    'greaterfullequal': '≧',
+    'greatergreater': '⪢',
+    'greaterless': '≷',
+    'greaterslantequal': '⩾',
+    'greatertilde': '≳',
+    'gscr': 'ℊ',
+    'gsim': '≳',
+    'gsime': '⪎',
+    'gsiml': '⪐',
+    'gt': '>',
+    'gtcc': '⪧',
+    'gtcir': '⩺',
+    'gtdot': '⋗',
+    'gtlpar': '⦕',
+    'gtquest': '⩼',
+    'gtrapprox': '⪆',
+    'gtrarr': '⥸',
+    'gtrdot': '⋗',
+    'gtreqless': '⋛',
+    'gtreqqless': '⪌',
+    'gtrless': '≷',
+    'gtrsim': '≳',
+    'gvertneqq': '≩︀',
+    'gvne': '≩︀',
+    'hacek': 'ˇ',
+    'hairsp': '\u200a',
+    'half': '½',
+    'hamilt': 'ℋ',
+    'hardcy': 'ъ',
+    'harr': '↔',
+    'harrcir': '⥈',
+    'harrw': '↭',
+    'hat': '^',
+    'hbar': 'ℏ',
+    'hcirc': 'ĥ',
+    'hearts': '♥',
+    'heartsuit': '♥',
+    'hellip': '…',
+    'hercon': '⊹',
+    'hfr': '𝔥',
+    'hilbertspace': 'ℋ',
+    'hksearow': '⤥',
+    'hkswarow': '⤦',
+    'hoarr': '⇿',
+    'homtht': '∻',
+    'hookleftarrow': '↩',
+    'hookrightarrow': '↪',
+    'hopf': '𝕙',
+    'horbar': '―',
+    'horizontalline': '─',
+    'hscr': '𝒽',
+    'hslash': 'ℏ',
+    'hstrok': 'ħ',
+    'humpdownhump': '≎',
+    'humpequal': '≏',
+    'hybull': '⁃',
+    'hyphen': '‐',
+    'iacute': 'í',
+    'ic': '\u2063',
+    'icirc': 'î',
+    'icy': 'и',
+    'idot': 'İ',
+    'iecy': 'е',
+    'iexcl': '¡',
+    'iff': '⇔',
+    'ifr': '𝔦',
+    'igrave': 'ì',
+    'ii': 'ⅈ',
+    'iiiint': '⨌',
+    'iiint': '∭',
+    'iinfin': '⧜',
+    'iiota': '℩',
+    'ijlig': 'ĳ',
+    'im': 'ℑ',
+    'imacr': 'ī',
+    'image': 'ℑ',
+    'imaginaryi': 'ⅈ',
+    'imagline': 'ℐ',
+    'imagpart': 'ℑ',
+    'imath': 'ı',
+    'imof': '⊷',
+    'imped': 'Ƶ',
+    'implies': '⇒',
+    'in': '∈',
+    'incare': '℅',
+    'infin': '∞',
+    'infintie': '⧝',
+    'inodot': 'ı',
+    'int': '∫',
+    'intcal': '⊺',
+    'integers': 'ℤ',
+    'integral': '∫',
+    'intercal': '⊺',
+    'intersection': '⋂',
+    'intlarhk': '⨗',
+    'intprod': '⨼',
+    'invisiblecomma': '\u2063',
+    'invisibletimes': '\u2062',
+    'iocy': 'ё',
+    'iogon': 'į',
+    'iopf': '𝕚',
+    'iota': 'ι',
+    'iprod': '⨼',
+    'iquest': '¿',
+    'iscr': '𝒾',
+    'isin': '∈',
+    'isindot': '⋵',
+    'isine': '⋹',
+    'isins': '⋴',
+    'isinsv': '⋳',
+    'isinv': '∈',
+    'it': '\u2062',
+    'itilde': 'ĩ',
+    'iukcy': 'і',
+    'iuml': 'ï',
+    'jcirc': 'ĵ',
+    'jcy': 'й',
+    'jfr': '𝔧',
+    'jmath': 'ȷ',
+    'jopf': '𝕛',
+    'jscr': '𝒿',
+    'jsercy': 'ј',
+    'jukcy': 'є',
+    'kappa': 'κ',
+    'kappav': 'ϰ',
+    'kcedil': 'ķ',
+    'kcy': 'к',
+    'kfr': '𝔨',
+    'kgreen': 'ĸ',
+    'khcy': 'х',
+    'kjcy': 'ќ',
+    'kopf': '𝕜',
+    'kscr': '𝓀',
+    'laarr': '⇚',
+    'lacute': 'ĺ',
+    'laemptyv': '⦴',
+    'lagran': 'ℒ',
+    'lambda': 'λ',
+    'lang': '⟨',
+    'langd': '⦑',
+    'langle': '⟨',
+    'lap': '⪅',
+    'laplacetrf': 'ℒ',
+    'laquo': '«',
+    'larr': '←',
+    'larrb': '⇤',
+    'larrbfs': '⤟',
+    'larrfs': '⤝',
+    'larrhk': '↩',
+    'larrlp': '↫',
+    'larrpl': '⤹',
+    'larrsim': '⥳',
+    'larrtl': '↢',
+    'lat': '⪫',
+    'latail': '⤙',
+    'late': '⪭',
+    'lates': '⪭︀',
+    'lbarr': '⤌',
+    'lbbrk': '❲',
+    'lbrace': '{',
+    'lbrack': '[',
+    'lbrke': '⦋',
+    'lbrksld': '⦏',
+    'lbrkslu': '⦍',
+    'lcaron': 'ľ',
+    'lcedil': 'ļ',
+    'lceil': '⌈',
+    'lcub': '{',
+    'lcy': 'л',
+    'ldca': '⤶',
+    'ldquo': '“',
+    'ldquor': '„',
+    'ldrdhar': '⥧',
+    'ldrushar': '⥋',
+    'ldsh': '↲',
+    'le': '≤',
+    'leftanglebracket': '⟨',
+    'leftarrow': '←',
+    'leftarrowbar': '⇤',
+    'leftarrowrightarrow': '⇆',
+    'leftarrowtail': '↢',
+    'leftceiling': '⌈',
+    'leftdoublebracket': '⟦',
+    'leftdownteevector': '⥡',
+    'leftdownvector': '⇃',
+    'leftdownvectorbar': '⥙',
+    'leftfloor': '⌊',
+    'leftharpoondown': '↽',
+    'leftharpoonup': '↼',
+    'leftleftarrows': '⇇',
+    'leftrightarrow': '↔',
+    'leftrightarrows': '⇆',
+    'leftrightharpoons': '⇋',
+    'leftrightsquigarrow': '↭',
+    'leftrightvector': '⥎',
+    'lefttee': '⊣',
+    'leftteearrow': '↤',
+    'leftteevector': '⥚',
+    'leftthreetimes': '⋋',
+    'lefttriangle': '⊲',
+    'lefttrianglebar': '⧏',
+    'lefttriangleequal': '⊴',
+    'leftupdownvector': '⥑',
+    'leftupteevector': '⥠',
+    'leftupvector': '↿',
+    'leftupvectorbar': '⥘',
+    'leftvector': '↼',
+    'leftvectorbar': '⥒',
+    'leg': '⋚',
+    'leq': '≤',
+    'leqq': '≦',
+    'leqslant': '⩽',
+    'les': '⩽',
+    'lescc': '⪨',
+    'lesdot': '⩿',
+    'lesdoto': '⪁',
+    'lesdotor': '⪃',
+    'lesg': '⋚︀',
+    'lesges': '⪓',
+    'lessapprox': '⪅',
+    'lessdot': '⋖',
+    'lesseqgtr': '⋚',
+    'lesseqqgtr': '⪋',
+    'lessequalgreater': '⋚',
+    'lessfullequal': '≦',
+    'lessgreater': '≶',
+    'lessgtr': '≶',
+    'lessless': '⪡',
+    'lesssim': '≲',
+    'lessslantequal': '⩽',
+    'lesstilde': '≲',
+    'lfisht': '⥼',
+    'lfloor': '⌊',
+    'lfr': '𝔩',
+    'lg': '≶',
+    'lge': '⪑',
+    'lhar': '⥢',
+    'lhard': '↽',
+    'lharu': '↼',
+    'lharul': '⥪',
+    'lhblk': '▄',
+    'ljcy': 'љ',
+    'll': '≪',
+    'llarr': '⇇',
+    'llcorner': '⌞',
+    'lleftarrow': '⇚',
+    'llhard': '⥫',
+    'lltri': '◺',
+    'lmidot': 'ŀ',
+    'lmoust': '⎰',
+    'lmoustache': '⎰',
+    'lnap': '⪉',
+    'lnapprox': '⪉',
+    'lne': '⪇',
+    'lneq': '⪇',
+    'lneqq': '≨',
+    'lnsim': '⋦',
+    'loang': '⟬',
+    'loarr': '⇽',
+    'lobrk': '⟦',
+    'longleftarrow': '⟵',
+    'longleftrightarrow': '⟷',
+    'longmapsto': '⟼',
+    'longrightarrow': '⟶',
+    'looparrowleft': '↫',
+    'looparrowright': '↬',
+    'lopar': '⦅',
+    'lopf': '𝕝',
+    'loplus': '⨭',
+    'lotimes': '⨴',
+    'lowast': '∗',
+    'lowbar': '_',
+    'lowerleftarrow': '↙',
+    'lowerrightarrow': '↘',
+    'loz': '◊',
+    'lozenge': '◊',
+    'lozf': '⧫',
+    'lpar': '(',
+    'lparlt': '⦓',
+    'lrarr': '⇆',
+    'lrcorner': '⌟',
+    'lrhar': '⇋',
+    'lrhard': '⥭',
+    'lrm': '\u200e',
+    'lrtri': '⊿',
+    'lsaquo': '‹',
+    'lscr': '𝓁',
+    'lsh': '↰',
+    'lsim': '≲',
+    'lsime': '⪍',
+    'lsimg': '⪏',
+    'lsqb': '[',
+    'lsquo': '‘',
+    'lsquor': '‚',
+    'lstrok': 'ł',
+    'lt': '<',
+    'ltcc': '⪦',
+    'ltcir': '⩹',
+    'ltdot': '⋖',
+    'lthree': '⋋',
+    'ltimes': '⋉',
+    'ltlarr': '⥶',
+    'ltquest': '⩻',
+    'ltri': '◃',
+    'ltrie': '⊴',
+    'ltrif': '◂',
+    'ltrpar': '⦖',
+    'lurdshar': '⥊',
+    'luruhar': '⥦',
+    'lvertneqq': '≨︀',
+    'lvne': '≨︀',
+    'macr': '¯',
+    'male': '♂',
+    'malt': '✠',
+    'maltese': '✠',
+    'map': '↦',
+    'mapsto': '↦',
+    'mapstodown': '↧',
+    'mapstoleft': '↤',
+    'mapstoup': '↥',
+    'marker': '▮',
+    'mcomma': '⨩',
+    'mcy': 'м',
+    'mdash': '—',
+    'mddot': '∺',
+    'measuredangle': '∡',
+    'mediumspace': '\u205f',
+    'mellintrf': 'ℳ',
+    'mfr': '𝔪',
+    'mho': '℧',
+    'micro': 'µ',
+    'mid': '∣',
+    'midast': '*',
+    'midcir': '⫰',
+    'middot': '·',
+    'minus': '−',
+    'minusb': '⊟',
+    'minusd': '∸',
+    'minusdu': '⨪',
+    'minusplus': '∓',
+    'mlcp': '⫛',
+    'mldr': '…',
+    'mnplus': '∓',
+    'models': '⊧',
+    'mopf': '𝕞',
+    'mp': '∓',
+    'mscr': '𝓂',
+    'mstpos': '∾',
+    'mu': 'μ',
+    'multimap': '⊸',
+    'mumap': '⊸',
+    'nabla': '∇',
+    'nacute': 'ń',
+    'nang': '∠⃒',
+    'nap': '≉',
+    'nape': '⩰̸',
+    'napid': '≋̸',
+    'napos': 'ŉ',
+    'napprox': '≉',
+    'natur': '♮',
+    'natural': '♮',
+    'naturals': 'ℕ',
+    'nbsp': '\xa0',
+    'nbump': '≎̸',
+    'nbumpe': '≏̸',
+    'ncap': '⩃',
+    'ncaron': 'ň',
+    'ncedil': 'ņ',
+    'ncong': '≇',
+    'ncongdot': '⩭̸',
+    'ncup': '⩂',
+    'ncy': 'н',
+    'ndash': '–',
+    'ne': '≠',
+    'nearhk': '⤤',
+    'nearr': '↗',
+    'nearrow': '↗',
+    'nedot': '≐̸',
+    'negativemediumspace': '\u200b',
+    'negativethickspace': '\u200b',
+    'negativethinspace': '\u200b',
+    'negativeverythinspace': '\u200b',
+    'nequiv': '≢',
+    'nesear': '⤨',
+    'nesim': '≂̸',
+    'nestedgreatergreater': '≫',
+    'nestedlessless': '≪',
+    'newline': '\n',
+    'nexist': '∄',
+    'nexists': '∄',
+    'nfr': '𝔫',
+    'nge': '≱',
+    'ngeq': '≱',
+    'ngeqq': '≧̸',
+    'ngeqslant': '⩾̸',
+    'nges': '⩾̸',
+    'ngg': '⋙̸',
+    'ngsim': '≵',
+    'ngt': '≯',
+    'ngtr': '≯',
+    'ngtv': '≫̸',
+    'nharr': '↮',
+    'nhpar': '⫲',
+    'ni': '∋',
+    'nis': '⋼',
+    'nisd': '⋺',
+    'niv': '∋',
+    'njcy': 'њ',
+    'nlarr': '↚',
+    'nldr': '‥',
+    'nle': '≰',
+    'nleftarrow': '↚',
+    'nleftrightarrow': '↮',
+    'nleq': '≰',
+    'nleqq': '≦̸',
+    'nleqslant': '⩽̸',
+    'nles': '⩽̸',
+    'nless': '≮',
+    'nll': '⋘̸',
+    'nlsim': '≴',
+    'nlt': '≮',
+    'nltri': '⋪',
+    'nltrie': '⋬',
+    'nltv': '≪̸',
+    'nmid': '∤',
+    'nobreak': '\u2060',
+    'nonbreakingspace': '\xa0',
+    'nopf': '𝕟',
+    'not': '¬',
+    'notcongruent': '≢',
+    'notcupcap': '≭',
+    'notdoubleverticalbar': '∦',
+    'notelement': '∉',
+    'notequal': '≠',
+    'notequaltilde': '≂̸',
+    'notexists': '∄',
+    'notgreater': '≯',
+    'notgreaterequal': '≱',
+    'notgreaterfullequal': '≧̸',
+    'notgreatergreater': '≫̸',
+    'notgreaterless': '≹',
+    'notgreaterslantequal': '⩾̸',
+    'notgreatertilde': '≵',
+    'nothumpdownhump': '≎̸',
+    'nothumpequal': '≏̸',
+    'notin': '∉',
+    'notindot': '⋵̸',
+    'notine': '⋹̸',
+    'notinva': '∉',
+    'notinvb': '⋷',
+    'notinvc': '⋶',
+    'notlefttriangle': '⋪',
+    'notlefttrianglebar': '⧏̸',
+    'notlefttriangleequal': '⋬',
+    'notless': '≮',
+    'notlessequal': '≰',
+    'notlessgreater': '≸',
+    'notlessless': '≪̸',
+    'notlessslantequal': '⩽̸',
+    'notlesstilde': '≴',
+    'notnestedgreatergreater': '⪢̸',
+    'notnestedlessless': '⪡̸',
+    'notni': '∌',
+    'notniva': '∌',
+    'notnivb': '⋾',
+    'notnivc': '⋽',
+    'notprecedes': '⊀',
+    'notprecedesequal': '⪯̸',
+    'notprecedesslantequal': '⋠',
+    'notreverseelement': '∌',
+    'notrighttriangle': '⋫',
+    'notrighttrianglebar': '⧐̸',
+    'notrighttriangleequal': '⋭',
+    'notsquaresubset': '⊏̸',
+    'notsquaresubsetequal': '⋢',
+    'notsquaresuperset': '⊐̸',
+    'notsquaresupersetequal': '⋣',
+    'notsubset': '⊂⃒',
+    'notsubsetequal': '⊈',
+    'notsucceeds': '⊁',
+    'notsucceedsequal': '⪰̸',
+    'notsucceedsslantequal': '⋡',
+    'notsucceedstilde': '≿̸',
+    'notsuperset': '⊃⃒',
+    'notsupersetequal': '⊉',
+    'nottilde': '≁',
+    'nottildeequal': '≄',
+    'nottildefullequal': '≇',
+    'nottildetilde': '≉',
+    'notverticalbar': '∤',
+    'npar': '∦',
+    'nparallel': '∦',
+    'nparsl': '⫽⃥',
+    'npart': '∂̸',
+    'npolint': '⨔',
+    'npr': '⊀',
+    'nprcue': '⋠',
+    'npre': '⪯̸',
+    'nprec': '⊀',
+    'npreceq': '⪯̸',
+    'nrarr': '↛',
+    'nrarrc': '⤳̸',
+    'nrarrw': '↝̸',
+    'nrightarrow': '↛',
+    'nrtri': '⋫',
+    'nrtrie': '⋭',
+    'nsc': '⊁',
+    'nsccue': '⋡',
+    'nsce': '⪰̸',
+    'nscr': '𝓃',
+    'nshortmid': '∤',
+    'nshortparallel': '∦',
+    'nsim': '≁',
+    'nsime': '≄',
+    'nsimeq': '≄',
+    'nsmid': '∤',
+    'nspar': '∦',
+    'nsqsube': '⋢',
+    'nsqsupe': '⋣',
+    'nsub': '⊄',
+    'nsube': '⊈',
+    'nsubset': '⊂⃒',
+    'nsubseteq': '⊈',
+    'nsubseteqq': '⫅̸',
+    'nsucc': '⊁',
+    'nsucceq': '⪰̸',
+    'nsup': '⊅',
+    'nsupe': '⊉',
+    'nsupset': '⊃⃒',
+    'nsupseteq': '⊉',
+    'nsupseteqq': '⫆̸',
+    'ntgl': '≹',
+    'ntilde': 'ñ',
+    'ntlg': '≸',
+    'ntriangleleft': '⋪',
+    'ntrianglelefteq': '⋬',
+    'ntriangleright': '⋫',
+    'ntrianglerighteq': '⋭',
+    'nu': 'ν',
+    'num': '#',
+    'numero': '№',
+    'numsp': '\u2007',
+    'nvap': '≍⃒',
+    'nvdash': '⊬',
+    'nvge': '≥⃒',
+    'nvgt': '>⃒',
+    'nvharr': '⤄',
+    'nvinfin': '⧞',
+    'nvlarr': '⤂',
+    'nvle': '≤⃒',
+    'nvlt': '<⃒',
+    'nvltrie': '⊴⃒',
+    'nvrarr': '⤃',
+    'nvrtrie': '⊵⃒',
+    'nvsim': '∼⃒',
+    'nwarhk': '⤣',
+    'nwarr': '↖',
+    'nwarrow': '↖',
+    'nwnear': '⤧',
+    'oacute': 'ó',
+    'oast': '⊛',
+    'ocir': '⊚',
+    'ocirc': 'ô',
+    'ocy': 'о',
+    'odash': '⊝',
+    'odblac': 'ő',
+    'odiv': '⨸',
+    'odot': '⊙',
+    'odsold': '⦼',
+    'oelig': 'œ',
+    'ofcir': '⦿',
+    'ofr': '𝔬',
+    'ogon': '˛',
+    'ograve': 'ò',
+    'ogt': '⧁',
+    'ohbar': '⦵',
+    'ohm': 'Ω',
+    'oint': '∮',
+    'olarr': '↺',
+    'olcir': '⦾',
+    'olcross': '⦻',
+    'oline': '‾',
+    'olt': '⧀',
+    'omacr': 'ō',
+    'omega': 'ω',
+    'omicron': 'ο',
+    'omid': '⦶',
+    'ominus': '⊖',
+    'oopf': '𝕠',
+    'opar': '⦷',
+    'opencurlydoublequote': '“',
+    'opencurlyquote': '‘',
+    'operp': '⦹',
+    'oplus': '⊕',
+    'or': '∨',
+    'orarr': '↻',
+    'ord': '⩝',
+    'order': 'ℴ',
+    'orderof': 'ℴ',
+    'ordf': 'ª',
+    'ordm': 'º',
+    'origof': '⊶',
+    'oror': '⩖',
+    'orslope': '⩗',
+    'orv': '⩛',
+    'os': 'Ⓢ',
+    'oscr': 'ℴ',
+    'oslash': 'ø',
+    'osol': '⊘',
+    'otilde': 'õ',
+    'otimes': '⊗',
+    'otimesas': '⨶',
+    'ouml': 'ö',
+    'ovbar': '⌽',
+    'overbar': '‾',
+    'overbrace': '⏞',
+    'overbracket': '⎴',
+    'overparenthesis': '⏜',
+    'par': '∥',
+    'para': '¶',
+    'parallel': '∥',
+    'parsim': '⫳',
+    'parsl': '⫽',
+    'part': '∂',
+    'partiald': '∂',
+    'pcy': 'п',
+    'percnt': '%',
+    'period': '.',
+    'permil': '‰',
+    'perp': '⊥',
+    'pertenk': '‱',
+    'pfr': '𝔭',
+    'phi': 'φ',
+    'phiv': 'ϕ',
+    'phmmat': 'ℳ',
+    'phone': '☎',
+    'pi': 'π',
+    'pitchfork': '⋔',
+    'piv': 'ϖ',
+    'planck': 'ℏ',
+    'planckh': 'ℎ',
+    'plankv': 'ℏ',
+    'plus': '+',
+    'plusacir': '⨣',
+    'plusb': '⊞',
+    'pluscir': '⨢',
+    'plusdo': '∔',
+    'plusdu': '⨥',
+    'pluse': '⩲',
+    'plusminus': '±',
+    'plusmn': '±',
+    'plussim': '⨦',
+    'plustwo': '⨧',
+    'pm': '±',
+    'poincareplane': 'ℌ',
+    'pointint': '⨕',
+    'popf': '𝕡',
+    'pound': '£',
+    'pr': '≺',
+    'prap': '⪷',
+    'prcue': '≼',
+    'pre': '⪯',
+    'prec': '≺',
+    'precapprox': '⪷',
+    'preccurlyeq': '≼',
+    'precedes': '≺',
+    'precedesequal': '⪯',
+    'precedesslantequal': '≼',
+    'precedestilde': '≾',
+    'preceq': '⪯',
+    'precnapprox': '⪹',
+    'precneqq': '⪵',
+    'precnsim': '⋨',
+    'precsim': '≾',
+    'prime': '′',
+    'primes': 'ℙ',
+    'prnap': '⪹',
+    'prne': '⪵',
+    'prnsim': '⋨',
+    'prod': '∏',
+    'product': '∏',
+    'profalar': '⌮',
+    'profline': '⌒',
+    'profsurf': '⌓',
+    'prop': '∝',
+    'proportion': '∷',
+    'proportional': '∝',
+    'propto': '∝',
+    'prsim': '≾',
+    'prurel': '⊰',
+    'pscr': '𝓅',
+    'psi': 'ψ',
+    'puncsp': '\u2008',
+    'qfr': '𝔮',
+    'qint': '⨌',
+    'qopf': '𝕢',
+    'qprime': '⁗',
+    'qscr': '𝓆',
+    'quaternions': 'ℍ',
+    'quatint': '⨖',
+    'quest': '?',
+    'questeq': '≟',
+    'quot': '"',
+    'raarr': '⇛',
+    'race': '∽̱',
+    'racute': 'ŕ',
+    'radic': '√',
+    'raemptyv': '⦳',
+    'rang': '⟩',
+    'rangd': '⦒',
+    'range': '⦥',
+    'rangle': '⟩',
+    'raquo': '»',
+    'rarr': '→',
+    'rarrap': '⥵',
+    'rarrb': '⇥',
+    'rarrbfs': '⤠',
+    'rarrc': '⤳',
+    'rarrfs': '⤞',
+    'rarrhk': '↪',
+    'rarrlp': '↬',
+    'rarrpl': '⥅',
+    'rarrsim': '⥴',
+    'rarrtl': '↣',
+    'rarrw': '↝',
+    'ratail': '⤚',
+    'ratio': '∶',
+    'rationals': 'ℚ',
+    'rbarr': '⤍',
+    'rbbrk': '❳',
+    'rbrace': '}',
+    'rbrack': ']',
+    'rbrke': '⦌',
+    'rbrksld': '⦎',
+    'rbrkslu': '⦐',
+    'rcaron': 'ř',
+    'rcedil': 'ŗ',
+    'rceil': '⌉',
+    'rcub': '}',
+    'rcy': 'р',
+    'rdca': '⤷',
+    'rdldhar': '⥩',
+    'rdquo': '”',
+    'rdquor': '”',
+    'rdsh': '↳',
+    're': 'ℜ',
+    'real': 'ℜ',
+    'realine': 'ℛ',
+    'realpart': 'ℜ',
+    'reals': 'ℝ',
+    'rect': '▭',
+    'reg': '®',
+    'reverseelement': '∋',
+    'reverseequilibrium': '⇋',
+    'reverseupequilibrium': '⥯',
+    'rfisht': '⥽',
+    'rfloor': '⌋',
+    'rfr': '𝔯',
+    'rhar': '⥤',
+    'rhard': '⇁',
+    'rharu': '⇀',
+    'rharul': '⥬',
+    'rho': 'ρ',
+    'rhov': 'ϱ',
+    'rightanglebracket': '⟩',
+    'rightarrow': '→',
+    'rightarrowbar': '⇥',
+    'rightarrowleftarrow': '⇄',
+    'rightarrowtail': '↣',
+    'rightceiling': '⌉',
+    'rightdoublebracket': '⟧',
+    'rightdownteevector': '⥝',
+    'rightdownvector': '⇂',
+    'rightdownvectorbar': '⥕',
+    'rightfloor': '⌋',
+    'rightharpoondown': '⇁',
+    'rightharpoonup': '⇀',
+    'rightleftarrows': '⇄',
+    'rightleftharpoons': '⇌',
+    'rightrightarrows': '⇉',
+    'rightsquigarrow': '↝',
+    'righttee': '⊢',
+    'rightteearrow': '↦',
+    'rightteevector': '⥛',
+    'rightthreetimes': '⋌',
+    'righttriangle': '⊳',
+    'righttrianglebar': '⧐',
+    'righttriangleequal': '⊵',
+    'rightupdownvector': '⥏',
+    'rightupteevector': '⥜',
+    'rightupvector': '↾',
+    'rightupvectorbar': '⥔',
+    'rightvector': '⇀',
+    'rightvectorbar': '⥓',
+    'ring': '˚',
+    'risingdotseq': '≓',
+    'rlarr': '⇄',
+    'rlhar': '⇌',
+    'rlm': '\u200f',
+    'rmoust': '⎱',
+    'rmoustache': '⎱',
+    'rnmid': '⫮',
+    'roang': '⟭',
+    'roarr': '⇾',
+    'robrk': '⟧',
+    'ropar': '⦆',
+    'ropf': '𝕣',
+    'roplus': '⨮',
+    'rotimes': '⨵',
+    'roundimplies': '⥰',
+    'rpar': ')',
+    'rpargt': '⦔',
+    'rppolint': '⨒',
+    'rrarr': '⇉',
+    'rrightarrow': '⇛',
+    'rsaquo': '›',
+    'rscr': '𝓇',
+    'rsh': '↱',
+    'rsqb': ']',
+    'rsquo': '’',
+    'rsquor': '’',
+    'rthree': '⋌',
+    'rtimes': '⋊',
+    'rtri': '▹',
+    'rtrie': '⊵',
+    'rtrif': '▸',
+    'rtriltri': '⧎',
+    'ruledelayed': '⧴',
+    'ruluhar': '⥨',
+    'rx': '℞',
+    'sacute': 'ś',
+    'sbquo': '‚',
+    'sc': '≻',
+    'scap': '⪸',
+    'scaron': 'š',
+    'sccue': '≽',
+    'sce': '⪰',
+    'scedil': 'ş',
+    'scirc': 'ŝ',
+    'scnap': '⪺',
+    'scne': '⪶',
+    'scnsim': '⋩',
+    'scpolint': '⨓',
+    'scsim': '≿',
+    'scy': 'с',
+    'sdot': '⋅',
+    'sdotb': '⊡',
+    'sdote': '⩦',
+    'searhk': '⤥',
+    'searr': '↘',
+    'searrow': '↘',
+    'sect': '§',
+    'semi': ';',
+    'seswar': '⤩',
+    'setminus': '∖',
+    'setmn': '∖',
+    'sext': '✶',
+    'sfr': '𝔰',
+    'sfrown': '⌢',
+    'sharp': '♯',
+    'shchcy': 'щ',
+    'shcy': 'ш',
+    'shortdownarrow': '↓',
+    'shortleftarrow': '←',
+    'shortmid': '∣',
+    'shortparallel': '∥',
+    'shortrightarrow': '→',
+    'shortuparrow': '↑',
+    'shy': '\xad',
+    'sigma': 'σ',
+    'sigmaf': 'ς',
+    'sigmav': 'ς',
+    'sim': '∼',
+    'simdot': '⩪',
+    'sime': '≃',
+    'simeq': '≃',
+    'simg': '⪞',
+    'simge': '⪠',
+    'siml': '⪝',
+    'simle': '⪟',
+    'simne': '≆',
+    'simplus': '⨤',
+    'simrarr': '⥲',
+    'slarr': '←',
+    'smallcircle': '∘',
+    'smallsetminus': '∖',
+    'smashp': '⨳',
+    'smeparsl': '⧤',
+    'smid': '∣',
+    'smile': '⌣',
+    'smt': '⪪',
+    'smte': '⪬',
+    'smtes': '⪬︀',
+    'softcy': 'ь',
+    'sol': '/',
+    'solb': '⧄',
+    'solbar': '⌿',
+    'sopf': '𝕤',
+    'spades': '♠',
+    'spadesuit': '♠',
+    'spar': '∥',
+    'sqcap': '⊓',
+    'sqcaps': '⊓︀',
+    'sqcup': '⊔',
+    'sqcups': '⊔︀',
+    'sqrt': '√',
+    'sqsub': '⊏',
+    'sqsube': '⊑',
+    'sqsubset': '⊏',
+    'sqsubseteq': '⊑',
+    'sqsup': '⊐',
+    'sqsupe': '⊒',
+    'sqsupset': '⊐',
+    'sqsupseteq': '⊒',
+    'squ': '□',
+    'square': '□',
+    'squareintersection': '⊓',
+    'squaresubset': '⊏',
+    'squaresubsetequal': '⊑',
+    'squaresuperset': '⊐',
+    'squaresupersetequal': '⊒',
+    'squareunion': '⊔',
+    'squarf': '▪',
+    'squf': '▪',
+    'srarr': '→',
+    'sscr': '𝓈',
+    'ssetmn': '∖',
+    'ssmile': '⌣',
+    'sstarf': '⋆',
+    'star': '☆',
+    'starf': '★',
+    'straightepsilon': 'ϵ',
+    'straightphi': 'ϕ',
+    'strns': '¯',
+    'sub': '⊂',
+    'subdot': '⪽',
+    'sube': '⊆',
+    'subedot': '⫃',
+    'submult': '⫁',
+    'subne': '⊊',
+    'subplus': '⪿',
+    'subrarr': '⥹',
+    'subset': '⊂',
+    'subseteq': '⊆',
+    'subseteqq': '⫅',
+    'subsetequal': '⊆',
+    'subsetneq': '⊊',
+    'subsetneqq': '⫋',
+    'subsim': '⫇',
+    'subsub': '⫕',
+    'subsup': '⫓',
+    'succ': '≻',
+    'succapprox': '⪸',
+    'succcurlyeq': '≽',
+    'succeeds': '≻',
+    'succeedsequal': '⪰',
+    'succeedsslantequal': '≽',
+    'succeedstilde': '≿',
+    'succeq': '⪰',
+    'succnapprox': '⪺',
+    'succneqq': '⪶',
+    'succnsim': '⋩',
+    'succsim': '≿',
+    'suchthat': '∋',
+    'sum': '∑',
+    'sung': '♪',
+    'sup': '⊃',
+    'sup1': '¹',
+    'sup2': '²',
+    'sup3': '³',
+    'supdot': '⪾',
+    'supdsub': '⫘',
+    'supe': '⊇',
+    'supedot': '⫄',
+    'superset': '⊃',
+    'supersetequal': '⊇',
+    'suphsol': '⟉',
+    'suphsub': '⫗',
+    'suplarr': '⥻',
+    'supmult': '⫂',
+    'supne': '⊋',
+    'supplus': '⫀',
+    'supset': '⊃',
+    'supseteq': '⊇',
+    'supseteqq': '⫆',
+    'supsetneq': '⊋',
+    'supsetneqq': '⫌',
+    'supsim': '⫈',
+    'supsub': '⫔',
+    'supsup': '⫖',
+    'swarhk': '⤦',
+    'swarr': '↙',
+    'swarrow': '↙',
+    'swnwar': '⤪',
+    'szlig': 'ß',
+    'tab': '	',
+    'target': '⌖',
+    'tau': 'τ',
+    'tbrk': '⎴',
+    'tcaron': 'ť',
+    'tcedil': 'ţ',
+    'tcy': 'т',
+    'tdot': '⃛',
+    'telrec': '⌕',
+    'tfr': '𝔱',
+    'there4': '∴',
+    'therefore': '∴',
+    'theta': 'θ',
+    'thetasym': 'ϑ',
+    'thetav': 'ϑ',
+    'thickapprox': '≈',
+    'thicksim': '∼',
+    'thickspace': '\u205f\u200a',
+    'thinsp': '\u2009',
+    'thinspace': '\u2009',
+    'thkap': '≈',
+    'thksim': '∼',
+    'thorn': 'þ',
+    'tilde': '˜',
+    'tildeequal': '≃',
+    'tildefullequal': '≅',
+    'tildetilde': '≈',
+    'times': '×',
+    'timesb': '⊠',
+    'timesbar': '⨱',
+    'timesd': '⨰',
+    'tint': '∭',
+    'toea': '⤨',
+    'top': '⊤',
+    'topbot': '⌶',
+    'topcir': '⫱',
+    'topf': '𝕥',
+    'topfork': '⫚',
+    'tosa': '⤩',
+    'tprime': '‴',
+    'trade': '™',
+    'triangle': '▵',
+    'triangledown': '▿',
+    'triangleleft': '◃',
+    'trianglelefteq': '⊴',
+    'triangleq': '≜',
+    'triangleright': '▹',
+    'trianglerighteq': '⊵',
+    'tridot': '◬',
+    'trie': '≜',
+    'triminus': '⨺',
+    'tripledot': '⃛',
+    'triplus': '⨹',
+    'trisb': '⧍',
+    'tritime': '⨻',
+    'trpezium': '⏢',
+    'tscr': '𝓉',
+    'tscy': 'ц',
+    'tshcy': 'ћ',
+    'tstrok': 'ŧ',
+    'twixt': '≬',
+    'twoheadleftarrow': '↞',
+    'twoheadrightarrow': '↠',
+    'uacute': 'ú',
+    'uarr': '↑',
+    'uarrocir': '⥉',
+    'ubrcy': 'ў',
+    'ubreve': 'ŭ',
+    'ucirc': 'û',
+    'ucy': 'у',
+    'udarr': '⇅',
+    'udblac': 'ű',
+    'udhar': '⥮',
+    'ufisht': '⥾',
+    'ufr': '𝔲',
+    'ugrave': 'ù',
+    'uhar': '⥣',
+    'uharl': '↿',
+    'uharr': '↾',
+    'uhblk': '▀',
+    'ulcorn': '⌜',
+    'ulcorner': '⌜',
+    'ulcrop': '⌏',
+    'ultri': '◸',
+    'umacr': 'ū',
+    'uml': '¨',
+    'underbar': '_',
+    'underbrace': '⏟',
+    'underbracket': '⎵',
+    'underparenthesis': '⏝',
+    'union': '⋃',
+    'unionplus': '⊎',
+    'uogon': 'ų',
+    'uopf': '𝕦',
+    'uparrow': '↑',
+    'uparrowbar': '⤒',
+    'uparrowdownarrow': '⇅',
+    'updownarrow': '↕',
+    'upequilibrium': '⥮',
+    'upharpoonleft': '↿',
+    'upharpoonright': '↾',
+    'uplus': '⊎',
+    'upperleftarrow': '↖',
+    'upperrightarrow': '↗',
+    'upsi': 'υ',
+    'upsih': 'ϒ',
+    'upsilon': 'υ',
+    'uptee': '⊥',
+    'upteearrow': '↥',
+    'upuparrows': '⇈',
+    'urcorn': '⌝',
+    'urcorner': '⌝',
+    'urcrop': '⌎',
+    'uring': 'ů',
+    'urtri': '◹',
+    'uscr': '𝓊',
+    'utdot': '⋰',
+    'utilde': 'ũ',
+    'utri': '▵',
+    'utrif': '▴',
+    'uuarr': '⇈',
+    'uuml': 'ü',
+    'uwangle': '⦧',
+    'vangrt': '⦜',
+    'varepsilon': 'ϵ',
+    'varkappa': 'ϰ',
+    'varnothing': '∅',
+    'varphi': 'ϕ',
+    'varpi': 'ϖ',
+    'varpropto': '∝',
+    'varr': '↕',
+    'varrho': 'ϱ',
+    'varsigma': 'ς',
+    'varsubsetneq': '⊊︀',
+    'varsubsetneqq': '⫋︀',
+    'varsupsetneq': '⊋︀',
+    'varsupsetneqq': '⫌︀',
+    'vartheta': 'ϑ',
+    'vartriangleleft': '⊲',
+    'vartriangleright': '⊳',
+    'vbar': '⫨',
+    'vbarv': '⫩',
+    'vcy': 'в',
+    'vdash': '⊢',
+    'vdashl': '⫦',
+    'vee': '∨',
+    'veebar': '⊻',
+    'veeeq': '≚',
+    'vellip': '⋮',
+    'verbar': '|',
+    'vert': '|',
+    'verticalbar': '∣',
+    'verticalline': '|',
+    'verticalseparator': '❘',
+    'verticaltilde': '≀',
+    'verythinspace': '\u200a',
+    'vfr': '𝔳',
+    'vltri': '⊲',
+    'vnsub': '⊂⃒',
+    'vnsup': '⊃⃒',
+    'vopf': '𝕧',
+    'vprop': '∝',
+    'vrtri': '⊳',
+    'vscr': '𝓋',
+    'vsubne': '⊊︀',
+    'vsupne': '⊋︀',
+    'vvdash': '⊪',
+    'vzigzag': '⦚',
+    'wcirc': 'ŵ',
+    'wedbar': '⩟',
+    'wedge': '∧',
+    'wedgeq': '≙',
+    'weierp': '℘',
+    'wfr': '𝔴',
+    'wopf': '𝕨',
+    'wp': '℘',
+    'wr': '≀',
+    'wreath': '≀',
+    'wscr': '𝓌',
+    'xcap': '⋂',
+    'xcirc': '◯',
+    'xcup': '⋃',
+    'xdtri': '▽',
+    'xfr': '𝔵',
+    'xharr': '⟷',
+    'xi': 'ξ',
+    'xlarr': '⟵',
+    'xmap': '⟼',
+    'xnis': '⋻',
+    'xodot': '⨀',
+    'xopf': '𝕩',
+    'xoplus': '⨁',
+    'xotime': '⨂',
+    'xrarr': '⟶',
+    'xscr': '𝓍',
+    'xsqcup': '⨆',
+    'xuplus': '⨄',
+    'xutri': '△',
+    'xvee': '⋁',
+    'xwedge': '⋀',
+    'yacute': 'ý',
+    'yacy': 'я',
+    'ycirc': 'ŷ',
+    'ycy': 'ы',
+    'yen': '¥',
+    'yfr': '𝔶',
+    'yicy': 'ї',
+    'yopf': '𝕪',
+    'yscr': '𝓎',
+    'yucy': 'ю',
+    'yuml': 'ÿ',
+    'zacute': 'ź',
+    'zcaron': 'ž',
+    'zcy': 'з',
+    'zdot': 'ż',
+    'zeetrf': 'ℨ',
+    'zerowidthspace': '\u200b',
+    'zeta': 'ζ',
+    'zfr': '𝔷',
+    'zhcy': 'ж',
+    'zigrarr': '⇝',
+    'zopf': '𝕫',
+    'zscr': '𝓏',
+    'zwj': '\u200d',
+    'zwnj': '\u200c',
+# }}}
+}
+
+
+if __name__ == '__main__':
+    import re
+    from html5lib.constants import entities
+    entities = {k.lower().replace(';', ''): entities[k] for k in entities}
+    lines = []
+
+    for k in sorted(entities):
+        lines.append("    '%s': %r," % (k, entities[k]))
+
+    with open(__file__, 'r+b') as f:
+        raw = f.read().decode('utf-8')
+        raw = re.sub(r'^# ENTITY_DATA {{{.+^# }}}',
+                     '# ENTITY_DATA {{{\n' + '\n'.join(lines) + '\n# }}}',
+                     raw, flags=re.M | re.DOTALL)
+        f.seek(0), f.truncate()
+        f.write(raw.encode('utf-8'))
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_equiv.py b/samples-and-tests/i-am-a-developer/mechanize/_equiv.py
new file mode 100644
index 0000000000..6c00287a76
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/_equiv.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import re
+import string
+
+from ._entities import html5_entities
+from .polyglot import codepoint_to_chr
+
+space_chars = frozenset(("\t", "\n", "\u000C", " ", "\r"))
+space_chars_bytes = frozenset(item.encode("ascii") for item in space_chars)
+ascii_letters_bytes = frozenset(
+    item.encode("ascii") for item in string.ascii_letters)
+spaces_angle_brackets = space_chars_bytes | frozenset((b">", b"<"))
+skip1 = space_chars_bytes | frozenset((b"/", ))
+head_elems = frozenset((
+    b"html", b"head", b"title", b"base", b"script",
+    b"style", b"meta", b"link", b"object"))
+
+
+def my_unichr(num):
+    try:
+        return codepoint_to_chr(num)
+    except (ValueError, OverflowError):
+        return '?'
+
+
+def replace_entity(match):
+    ent = match.group(1).lower()
+    if ent in {'apos', 'squot'}:
+        # squot is generated by some broken CMS software
+        return "'"
+    if ent == 'hellips':
+        ent = 'hellip'
+    if ent.startswith('#'):
+        try:
+            if ent[1] in ('x', 'X'):
+                num = int(ent[2:], 16)
+            else:
+                num = int(ent[1:])
+        except Exception:
+            return '&' + ent + ';'
+        if num > 255:
+            return my_unichr(num)
+        try:
+            return chr(num).decode('cp1252')
+        except UnicodeDecodeError:
+            return my_unichr(num)
+    try:
+        return html5_entities[ent]
+    except KeyError:
+        pass
+    return '&' + ent + ';'
+
+
+class Bytes(bytes):
+    """String-like object with an associated position and various extra methods
+    If the position is ever greater than the string length then an exception is
+    raised"""
+
+    def __init__(self, value):
+        self._position = -1
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        p = self._position = self._position + 1
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        return self[p:p + 1]
+
+    def next(self):
+        # Py2 compat
+        return self.__next__()
+
+    def previous(self):
+        p = self._position
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        self._position = p = p - 1
+        return self[p:p + 1]
+
+    @property
+    def position(self):
+        if self._position >= len(self):
+            raise StopIteration
+        if self._position >= 0:
+            return self._position
+
+    @position.setter
+    def position(self, position):
+        if self._position >= len(self):
+            raise StopIteration
+        self._position = position
+
+    @property
+    def current_byte(self):
+        return self[self.position:self.position + 1]
+
+    def skip(self, chars=space_chars_bytes):
+        """Skip past a list of characters"""
+        p = self.position  # use property for the error-checking
+        while p < len(self):
+            c = self[p:p + 1]
+            if c not in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return
+
+    def skip_until(self, chars):
+        p = pos = self.position
+        while p < len(self):
+            c = self[p:p + 1]
+            if c in chars:
+                self._position = p
+                return self[pos:p], c
+            p += 1
+        self._position = p
+        return b'', b''
+
+    def match_bytes(self, bytes):
+        """Look for a sequence of bytes at the start of a string. If the bytes
+        are found return True and advance the position to the byte after the
+        match. Otherwise return False and leave the position alone"""
+        p = self.position
+        data = self[p:p + len(bytes)]
+        rv = data.startswith(bytes)
+        if rv:
+            self.position += len(bytes)
+        return rv
+
+    def match_bytes_pat(self, pat):
+        bytes = pat.pattern
+        m = pat.match(self, self.position)
+        if m is None:
+            return False
+        bytes = m.group()
+        self.position += len(bytes)
+        return True
+
+    def jump_to(self, bytes):
+        """Look for the next sequence of bytes matching a given sequence. If
+        a match is found advance the position to the last byte of the match"""
+        new_pos = self.find(bytes, max(0, self.position))
+        if new_pos > -1:
+            new_pos -= self.position
+            if self._position == -1:
+                self._position = 0
+            self._position += (new_pos + len(bytes) - 1)
+            return True
+        else:
+            raise StopIteration
+
+
+class HTTPEquivParser(object):
+    """Mini parser for detecting http-equiv headers from meta tags """
+
+    def __init__(self, data):
+        """string - the data to work on """
+        self.data = Bytes(data)
+        self.headers = []
+
+    def __call__(self):
+        mb, mbp = self.data.match_bytes, self.data.match_bytes_pat
+        dispatch = (
+                (mb, b"<!--", self.handle_comment),
+                (mbp, re.compile(b"<meta", flags=re.IGNORECASE),
+                    self.handle_meta),
+                (mbp, re.compile(b"</head", flags=re.IGNORECASE),
+                    lambda: False),
+                (mb, b"</", self.handle_possible_end_tag),
+                (mb, b"<!", self.handle_other),
+                (mb, b"<?", self.handle_other),
+                (mb, b"<", self.handle_possible_start_tag)
+        )
+        for byte in self.data:
+            keep_parsing = True
+            for matcher, key, method in dispatch:
+                if matcher(key):
+                    try:
+                        keep_parsing = method()
+                        break
+                    except StopIteration:
+                        keep_parsing = False
+                        break
+            if not keep_parsing:
+                break
+
+        ans = []
+        entity_pat = re.compile(r'&(\S+?);')
+        for name, val in self.headers:
+            try:
+                name, val = name.decode('ascii'), val.decode('ascii')
+            except ValueError:
+                continue
+            name = entity_pat.sub(replace_entity, name)
+            val = entity_pat.sub(replace_entity, val)
+            try:
+                name, val = name.encode('ascii'), val.encode('ascii')
+            except ValueError:
+                continue
+            ans.append((name, val))
+        return ans
+
+    def handle_comment(self):
+        """Skip over comments"""
+        return self.data.jump_to(b"-->")
+
+    def handle_meta(self):
+        if self.data.current_byte not in space_chars_bytes:
+            # if we have <meta not followed by a space so just keep going
+            return True
+        # We have a valid meta element we want to search for attributes
+        pending_header = pending_content = None
+
+        while True:
+            # Try to find the next attribute after the current position
+            attr = self.get_attribute()
+            if attr is None:
+                return True
+            name, val = attr
+            name = name.lower()
+            if name == b"http-equiv":
+                if val:
+                    val = val.lower()
+                    if pending_content:
+                        self.headers.append((val, pending_content))
+                        return True
+                    pending_header = val
+            elif name == b'content':
+                if val:
+                    if pending_header:
+                        self.headers.append((pending_header, val))
+                        return True
+                    pending_content = val
+        return True
+
+    def handle_possible_start_tag(self):
+        return self.handle_possible_tag(False)
+
+    def handle_possible_end_tag(self):
+        next(self.data)
+        return self.handle_possible_tag(True)
+
+    def handle_possible_tag(self, end_tag):
+        data = self.data
+        if data.current_byte not in ascii_letters_bytes:
+            # If the next byte is not an ascii letter either ignore this
+            # fragment (possible start tag case) or treat it according to
+            # handle_other
+            if end_tag:
+                data.previous()
+                self.handle_other()
+            return True
+
+        tag_name, c = data.skip_until(spaces_angle_brackets)
+        tag_name = tag_name.lower()
+        if not end_tag and tag_name not in head_elems:
+            return False
+        if c == b"<":
+            # return to the first step in the overall "two step" algorithm
+            # reprocessing the < byte
+            data.previous()
+        else:
+            # Read all attributes
+            attr = self.get_attribute()
+            while attr is not None:
+                attr = self.get_attribute()
+        return True
+
+    def handle_other(self):
+        return self.data.jump_to(b">")
+
+    def get_attribute(self):
+        """Return a name,value pair for the next attribute in the stream,
+        if one is found, or None"""
+        data = self.data
+        # Step 1 (skip chars)
+        c = data.skip(skip1)
+        assert c is None or len(c) == 1
+        # Step 2
+        if c in (b">", None):
+            return None
+        # Step 3
+        attr_name = []
+        attr_value = []
+        # Step 4 attribute name
+        while True:
+            if c == b"=" and attr_name:
+                break
+            elif c in space_chars_bytes:
+                # Step 6!
+                c = data.skip()
+                break
+            elif c in (b"/", b">"):
+                return b"".join(attr_name), b""
+            elif c is None:
+                return None
+            else:
+                attr_name.append(c)
+            # Step 5
+            c = next(data)
+        # Step 7
+        if c != b"=":
+            data.previous()
+            return b"".join(attr_name), b""
+        # Step 8
+        next(data)
+        # Step 9
+        c = data.skip()
+        # Step 10
+        if c in (b"'", b'"'):
+            # 10.1
+            quote_char = c
+            while True:
+                # 10.2
+                c = next(data)
+                # 10.3
+                if c == quote_char:
+                    next(data)
+                    return b"".join(attr_name), b"".join(attr_value)
+                # 10.4
+                else:
+                    attr_value.append(c)
+        elif c == b">":
+            return b"".join(attr_name), b""
+        elif c is None:
+            return None
+        else:
+            attr_value.append(c)
+        # Step 11
+        while True:
+            c = next(data)
+            if c in spaces_angle_brackets:
+                return b"".join(attr_name), b"".join(attr_value)
+            elif c is None:
+                return None
+            else:
+                attr_value.append(c)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_file.py b/samples-and-tests/i-am-a-developer/mechanize/_file.py
deleted file mode 100644
index db662a8ff0..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_file.py
+++ /dev/null
@@ -1,60 +0,0 @@
-try:
-    from cStringIO import StringIO
-except ImportError:
-    from StringIO import StringIO
-import mimetools
-import os
-import socket
-import urllib
-from urllib2 import BaseHandler, URLError
-
-
-class FileHandler(BaseHandler):
-    # Use local file or FTP depending on form of URL
-    def file_open(self, req):
-        url = req.get_selector()
-        if url[:2] == '//' and url[2:3] != '/':
-            req.type = 'ftp'
-            return self.parent.open(req)
-        else:
-            return self.open_local_file(req)
-
-    # names for the localhost
-    names = None
-    def get_names(self):
-        if FileHandler.names is None:
-            try:
-                FileHandler.names = (socket.gethostbyname('localhost'),
-                                    socket.gethostbyname(socket.gethostname()))
-            except socket.gaierror:
-                FileHandler.names = (socket.gethostbyname('localhost'),)
-        return FileHandler.names
-
-    # not entirely sure what the rules are here
-    def open_local_file(self, req):
-        try:
-            import email.utils as emailutils
-        except ImportError:
-            import email.Utils as emailutils
-        import mimetypes
-        host = req.get_host()
-        file = req.get_selector()
-        localfile = urllib.url2pathname(file)
-        try:
-            stats = os.stat(localfile)
-            size = stats.st_size
-            modified = emailutils.formatdate(stats.st_mtime, usegmt=True)
-            mtype = mimetypes.guess_type(file)[0]
-            headers = mimetools.Message(StringIO(
-                'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
-                (mtype or 'text/plain', size, modified)))
-            if host:
-                host, port = urllib.splitport(host)
-            if not host or \
-                (not port and socket.gethostbyname(host) in self.get_names()):
-                return urllib.addinfourl(open(localfile, 'rb'),
-                                  headers, 'file:'+file)
-        except OSError, msg:
-            # urllib2 users shouldn't expect OSErrors coming from urlopen()
-            raise URLError(msg)
-        raise URLError('file not on local host')
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py
deleted file mode 100644
index 34fe9799ed..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_firefox3cookiejar.py
+++ /dev/null
@@ -1,249 +0,0 @@
-"""Firefox 3 "cookies.sqlite" cookie persistence.
-
-Copyright 2008 John J Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import logging
-import time
-import sqlite3
-
-from _clientcookie import CookieJar, Cookie, MappingIterator
-from _util import isstringlike, experimental
-debug = logging.getLogger("mechanize.cookies").debug
-
-
-class Firefox3CookieJar(CookieJar):
-
-    """Firefox 3 cookie jar.
-
-    The cookies are stored in Firefox 3's "cookies.sqlite" format.
-
-    Constructor arguments:
-
-    filename: filename of cookies.sqlite (typically found at the top level
-     of a firefox profile directory)
-    autoconnect: as a convenience, connect to the SQLite cookies database at
-     Firefox3CookieJar construction time (default True)
-    policy: an object satisfying the mechanize.CookiePolicy interface
-
-    Note that this is NOT a FileCookieJar, and there are no .load(),
-    .save() or .restore() methods.  The database is in sync with the
-    cookiejar object's state after each public method call.
-
-    Following Firefox's own behaviour, session cookies are never saved to
-    the database.
-
-    The file is created, and an sqlite database written to it, if it does
-    not already exist. The moz_cookies database table is created if it does
-    not already exist.
-    """
-
-    # XXX
-    # handle DatabaseError exceptions
-    # add a FileCookieJar (explicit .save() / .revert() / .load() methods)
-
-    def __init__(self, filename, autoconnect=True, policy=None):
-        experimental("Firefox3CookieJar is experimental code")
-        CookieJar.__init__(self, policy)
-        if filename is not None and not isstringlike(filename):
-            raise ValueError("filename must be string-like")
-        self.filename = filename
-        self._conn = None
-        if autoconnect:
-            self.connect()
-
-    def connect(self):
-        self._conn = sqlite3.connect(self.filename)
-        self._conn.isolation_level = "DEFERRED"
-        self._create_table_if_necessary()
-
-    def close(self):
-        self._conn.close()
-
-    def _transaction(self, func):
-        try:
-            cur = self._conn.cursor()
-            try:
-                result = func(cur)
-            finally:
-                cur.close()
-        except:
-            self._conn.rollback()
-            raise
-        else:
-            self._conn.commit()
-        return result
-
-    def _execute(self, query, params=()):
-        return self._transaction(lambda cur: cur.execute(query, params))
-
-    def _query(self, query, params=()):
-        # XXX should we bother with a transaction?
-        cur = self._conn.cursor()
-        try:
-            cur.execute(query, params)
-            for row in cur.fetchall():
-                yield row
-        finally:
-            cur.close()
-
-    def _create_table_if_necessary(self):
-        self._execute("""\
-CREATE TABLE IF NOT EXISTS moz_cookies (id INTEGER PRIMARY KEY, name TEXT,
-    value TEXT, host TEXT, path TEXT,expiry INTEGER,
-    lastAccessed INTEGER, isSecure INTEGER, isHttpOnly INTEGER)""")
-
-    def _cookie_from_row(self, row):
-        (pk, name, value, domain, path, expires,
-         last_accessed, secure, http_only) = row
-
-        version = 0
-        domain = domain.encode("ascii", "ignore")
-        path = path.encode("ascii", "ignore")
-        name = name.encode("ascii", "ignore")
-        value = value.encode("ascii", "ignore")
-        secure = bool(secure)
-
-        # last_accessed isn't a cookie attribute, so isn't added to rest
-        rest = {}
-        if http_only:
-            rest["HttpOnly"] = None
-
-        if name == "":
-            name = value
-            value = None
-
-        initial_dot = domain.startswith(".")
-        domain_specified = initial_dot
-
-        discard = False
-        if expires == "":
-            expires = None
-            discard = True
-
-        return Cookie(version, name, value,
-                      None, False,
-                      domain, domain_specified, initial_dot,
-                      path, False,
-                      secure,
-                      expires,
-                      discard,
-                      None,
-                      None,
-                      rest)
-
-    def clear(self, domain=None, path=None, name=None):
-        CookieJar.clear(self, domain, path, name)
-        where_parts = []
-        sql_params = []
-        if domain is not None:
-            where_parts.append("host = ?")
-            sql_params.append(domain)
-            if path is not None:
-                where_parts.append("path = ?")
-                sql_params.append(path)
-                if name is not None:
-                    where_parts.append("name = ?")
-                    sql_params.append(name)
-        where = " AND ".join(where_parts)
-        if where:
-            where = " WHERE " + where
-        def clear(cur):
-            cur.execute("DELETE FROM moz_cookies%s" % where,
-                        tuple(sql_params))
-        self._transaction(clear)
-
-    def _row_from_cookie(self, cookie, cur):
-        expires = cookie.expires
-        if cookie.discard:
-            expires = ""
-
-        domain = unicode(cookie.domain)
-        path = unicode(cookie.path)
-        name = unicode(cookie.name)
-        value = unicode(cookie.value)
-        secure = bool(int(cookie.secure))
-
-        if value is None:
-            value = name
-            name = ""
-
-        last_accessed = int(time.time())
-        http_only = cookie.has_nonstandard_attr("HttpOnly")
-
-        query = cur.execute("""SELECT MAX(id) + 1 from moz_cookies""")
-        pk = query.fetchone()[0]
-        if pk is None:
-            pk = 1
-
-        return (pk, name, value, domain, path, expires,
-                last_accessed, secure, http_only)
-
-    def set_cookie(self, cookie):
-        if cookie.discard:
-            CookieJar.set_cookie(self, cookie)
-            return
-
-        def set_cookie(cur):
-            # XXX
-            # is this RFC 2965-correct?
-            # could this do an UPDATE instead?
-            row = self._row_from_cookie(cookie, cur)
-            name, unused, domain, path = row[1:5]
-            cur.execute("""\
-DELETE FROM moz_cookies WHERE host = ? AND path = ? AND name = ?""",
-                        (domain, path, name))
-            cur.execute("""\
-INSERT INTO moz_cookies VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-""", row)
-        self._transaction(set_cookie)
-
-    def __iter__(self):
-        # session (non-persistent) cookies
-        for cookie in MappingIterator(self._cookies):
-            yield cookie
-        # persistent cookies
-        for row in self._query("""\
-SELECT * FROM moz_cookies ORDER BY name, path, host"""):
-            yield self._cookie_from_row(row)
-
-    def _cookies_for_request(self, request):
-        session_cookies = CookieJar._cookies_for_request(self, request)
-        def get_cookies(cur):
-            query = cur.execute("SELECT host from moz_cookies")
-            domains = [row[0] for row in query.fetchmany()]
-            cookies = []
-            for domain in domains:
-                cookies += self._persistent_cookies_for_domain(domain,
-                                                               request, cur)
-            return cookies
-        persistent_coookies = self._transaction(get_cookies)
-        return session_cookies + persistent_coookies
-
-    def _persistent_cookies_for_domain(self, domain, request, cur):
-        cookies = []
-        if not self._policy.domain_return_ok(domain, request):
-            return []
-        debug("Checking %s for cookies to return", domain)
-        query = cur.execute("""\
-SELECT * from moz_cookies WHERE host = ? ORDER BY path""",
-                            (domain,))
-        cookies = [self._cookie_from_row(row) for row in query.fetchmany()]
-        last_path = None
-        r = []
-        for cookie in cookies:
-            if (cookie.path != last_path and
-                not self._policy.path_return_ok(cookie.path, request)):
-                last_path = cookie.path
-                continue
-            if not self._policy.return_ok(cookie, request):
-                debug("   not returning cookie")
-                continue
-            debug("   it's a match")
-            r.append(cookie)
-        return r
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_form.py b/samples-and-tests/i-am-a-developer/mechanize/_form.py
new file mode 100644
index 0000000000..c09fa4bf09
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/_form.py
@@ -0,0 +1,167 @@
+from __future__ import absolute_import
+
+import re
+from collections import defaultdict
+
+from ._form_controls import HTMLForm, Label
+from ._request import Request
+from .polyglot import urljoin, is_string, as_unicode
+
+
+class SkipControl(ValueError):
+    pass
+
+
+def normalize_line_endings(text):
+    return re.sub(as_unicode(r"(?:(?<!\r)\n)|(?:\r(?!\n))"), u"\r\n", text)
+
+
+def label_text(elem):
+    ans = []
+    if elem.text:
+        ans.append(elem.text)
+    for child in elem:
+        if child.tail:
+            ans.append(child.tail)
+    return ''.join(ans)
+
+
+def parse_control(elem, parent_of, default_type='text'):
+    attrs = dict(elem.attrib)
+    label_elem = parent_of(elem, 'label')
+    if label_elem is not None:
+        lt = label_text(label_elem)
+        if lt:
+            attrs["__label"] = lt
+    ctype = attrs.get('type') or default_type
+    return ctype, attrs.get('name'), attrs
+
+
+def parse_input(elem, parent_of, *a):
+    return parse_control(elem, parent_of)
+
+
+def parse_button(elem, parent_of, *a):
+    ctype, name, attrs = parse_control(elem, parent_of, default_type='submit')
+    ctype += 'button'
+    return ctype, name, attrs
+
+
+def parse_option(elem, parent_of, attrs_map):
+    ctype, name, attrs = parse_control(elem, parent_of)
+    og = parent_of(elem, 'optgroup')
+    contents = (elem.text or '').strip()
+    attrs['contents'] = contents
+    attrs['value'] = attrs.get('value', contents)
+    attrs['label'] = attrs.get('label', contents)
+    if og is not None and og.get('disabled') is not None:
+        attrs['disabled'] = 'disabled'
+    sel = parent_of(elem, 'select')
+    if sel is None:
+        raise SkipControl()
+    attrs['__select'] = sel = attrs_map[sel]['__select']
+    return 'select', sel.get('name'), attrs
+
+
+def parse_textarea(elem, parent_of, *a):
+    ctype, name, attrs = parse_control(elem, parent_of)
+    ctype = 'textarea'
+    attrs['value'] = normalize_line_endings(elem.text or u'')
+    return ctype, name, attrs
+
+
+def parse_select(elem, parent_of, *a):
+    ctype, name, attrs = parse_control(elem, parent_of)
+    ctype = 'select'
+    return ctype, name, {'__select': attrs}
+
+
+def parse_forms(root, base_url, request_class=None, select_default=False, encoding=None):
+    if request_class is None:
+        request_class = Request
+    global_form = HTMLForm(base_url, encoding=encoding)
+    forms, labels = [], []
+    form_elems = []
+    form_id_map = {}
+    all_elems = tuple(
+        e for e in root.iter('*') if is_string(e.tag))
+    parent_map = {c: p for p in all_elems for c in p}
+    id_to_labels = defaultdict(list)
+    for e in all_elems:
+        q = e.tag.lower()
+        if q == 'form':
+            form_elems.append(e)
+            fid = e.get('id')
+            if fid:
+                form_id_map[fid] = e
+        elif q == 'label':
+            for_id = e.get('for')
+            if for_id is not None:
+                label = Label(label_text(e), for_id)
+                labels.append(label)
+                id_to_labels[for_id].append(label)
+        elif q == 'base':
+            base_url = e.get('href') or base_url
+
+    def parent_of(elem, parent_name):
+        q = elem
+        while True:
+            q = parent_map.get(q)
+            if q is None:
+                return
+            if q.tag.lower() == parent_name:
+                return q
+
+    forms_map = {}
+    for form_elem in form_elems:
+        name = form_elem.get('name') or None
+        action = form_elem.get('action') or None
+        method = form_elem.get('method') or 'GET'
+        enctype = form_elem.get(
+            'enctype') or "application/x-www-form-urlencoded"
+        if action:
+            action = urljoin(base_url, action)
+        else:
+            action = base_url
+        form = HTMLForm(action, method, enctype, name, form_elem.attrib,
+                        request_class, forms, labels, id_to_labels, encoding=encoding)
+        forms_map[form_elem] = form
+        forms.append(form)
+
+    attrs_map = {}
+    control_names = {
+        'option': parse_option,
+        'button': parse_button,
+        'input': parse_input,
+        'textarea': parse_textarea,
+        'select': parse_select,
+    }
+
+    for i, elem in enumerate(all_elems):
+        q = elem.tag.lower()
+        cfunc = control_names.get(q)
+        if cfunc is not None:
+            fid = elem.get('form')
+            if fid and fid in form_id_map:
+                form_elem = form_id_map[fid]
+            else:
+                form_elem = parent_of(elem, 'form')
+            form = forms_map.get(form_elem, global_form)
+            try:
+                control_type, control_name, attrs = cfunc(elem, parent_of,
+                                                          attrs_map)
+            except SkipControl:
+                continue
+            attrs_map[elem] = attrs
+            form.new_control(
+                control_type,
+                control_name,
+                attrs,
+                index=i * 10,
+                select_default=select_default)
+
+    for form in forms:
+        form.fixup()
+    global_form.fixup()
+
+    return forms, global_form
diff --git a/samples-and-tests/i-am-a-developer/mechanize/ClientForm.py b/samples-and-tests/i-am-a-developer/mechanize/_form_controls.py
similarity index 50%
rename from samples-and-tests/i-am-a-developer/mechanize/ClientForm.py
rename to samples-and-tests/i-am-a-developer/mechanize/_form_controls.py
index a622de7b65..4a77aabf13 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/ClientForm.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_form_controls.py
@@ -1,301 +1,85 @@
-"""HTML form handling for web clients.
-
-ClientForm is a Python module for handling HTML forms on the client
-side, useful for parsing HTML forms, filling them in and returning the
-completed forms to the server.  It has developed from a port of Gisle
-Aas' Perl module HTML::Form, from the libwww-perl library, but the
-interface is not the same.
-
-The most useful docstring is the one for HTMLForm.
-
-RFC 1866: HTML 2.0
-RFC 1867: Form-based File Upload in HTML
-RFC 2388: Returning Values from Forms: multipart/form-data
-HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
-HTML 4.01 Specification, W3C Recommendation 24 December 1999
-
-
-Copyright 2002-2007 John J. Lee <jjl@pobox.com>
-Copyright 2005 Gary Poster
-Copyright 2005 Zope Corporation
-Copyright 1998-2000 Gisle Aas.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-# XXX
-# Remove parser testing hack
-# safeUrl()-ize action
-# Switch to unicode throughout (would be 0.3.x)
-#  See Wichert Akkerman's 2004-01-22 message to c.l.py.
-# Add charset parameter to Content-type headers?  How to find value??
-# Add some more functional tests
-#  Especially single and multiple file upload on the internet.
-#  Does file upload work when name is missing?  Sourceforge tracker form
-#   doesn't like it.  Check standards, and test with Apache.  Test
-#   binary upload with Apache.
-# mailto submission & enctype text/plain
-# I'm not going to fix this unless somebody tells me what real servers
-#  that want this encoding actually expect: If enctype is
-#  application/x-www-form-urlencoded and there's a FILE control present.
-#  Strictly, it should be 'name=data' (see HTML 4.01 spec., section
-#  17.13.2), but I send "name=" ATM.  What about multiple file upload??
-
-# Would be nice, but I'm not going to do it myself:
-# -------------------------------------------------
-# Maybe a 0.4.x?
-#   Replace by_label etc. with moniker / selector concept. Allows, eg.,
-#    a choice between selection by value / id / label / element
-#    contents.  Or choice between matching labels exactly or by
-#    substring.  Etc.
-#   Remove deprecated methods.
-#   ...what else?
-# Work on DOMForm.
-# XForms?  Don't know if there's a need here.
-
-__all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
-           'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
-           'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
-           'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
-           'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
-           'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
-           'RadioControl', 'ScalarControl', 'SelectControl',
-           'SubmitButtonControl', 'SubmitControl', 'TextControl',
-           'TextareaControl', 'XHTMLCompatibleFormParser']
-
-try: True
-except NameError:
-    True = 1
-    False = 0
-
-try: bool
-except NameError:
-    def bool(expr):
-        if expr: return True
-        else: return False
-
-try:
-    import logging
-    import inspect
-except ImportError:
-    def debug(msg, *args, **kwds):
-        pass
-else:
-    _logger = logging.getLogger("ClientForm")
-    OPTIMIZATION_HACK = True
-
-    def debug(msg, *args, **kwds):
-        if OPTIMIZATION_HACK:
-            return
-
-        caller_name = inspect.stack()[1][3]
-        extended_msg = '%%s %s' % msg
-        extended_args = (caller_name,)+args
-        debug = _logger.debug(extended_msg, *extended_args, **kwds)
-
-    def _show_debug_messages():
-        global OPTIMIZATION_HACK
-        OPTIMIZATION_HACK = False
-        _logger.setLevel(logging.DEBUG)
-        handler = logging.StreamHandler(sys.stdout)
-        handler.setLevel(logging.DEBUG)
-        _logger.addHandler(handler)
-
-import sys, urllib, urllib2, types, mimetools, copy, urlparse, \
-       htmlentitydefs, re, random
-from cStringIO import StringIO
-
-import sgmllib
-# monkeypatch to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
-
-# HTMLParser.HTMLParser is recent, so live without it if it's not available
-# (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
-try:
-    import HTMLParser
-except ImportError:
-    HAVE_MODULE_HTMLPARSER = False
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+from __future__ import absolute_import
+
+import random
+import re
+import sys
+import warnings
+from io import BytesIO
+from mimetypes import guess_type
+
+from . import _request
+from .polyglot import (as_unicode, is_py2, iteritems, unicode_type, urlencode,
+                       urlparse, urlunparse)
+
+if is_py2:
+    from cStringIO import StringIO
 else:
-    HAVE_MODULE_HTMLPARSER = True
+    class StringIO(BytesIO):
 
-try:
-    import warnings
-except ImportError:
-    def deprecation(message, stack_offset=0):
-        pass
-else:
-    def deprecation(message, stack_offset=0):
-        warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
+        def write(self, x):
+            if isinstance(x, str):
+                x = x.encode('utf-8')
+            BytesIO.write(self, x)
 
-VERSION = "0.2.10"
 
-CHUNK = 1024  # size of chunks fed to parser, in bytes
+class Missing:
+    pass
 
-DEFAULT_ENCODING = "latin-1"
 
-class Missing: pass
+class LocateError(ValueError):
+    pass
 
-_compress_re = re.compile(r"\s+")
-def compress_text(text): return _compress_re.sub(" ", text.strip())
 
-def normalize_line_endings(text):
-    return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
+class AmbiguityError(LocateError):
+    pass
 
 
-# This version of urlencode is from my Python 1.5.2 back-port of the
-# Python 2.1 CVS maintenance branch of urllib.  It will accept a sequence
-# of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
-def urlencode(query,doseq=False,):
-    """Encode a sequence of two-element tuples or dictionary into a URL query \
-string.
+class ControlNotFoundError(LocateError):
+    pass
 
-    If any values in the query arg are sequences and doseq is true, each
-    sequence element is converted to a separate parameter.
 
-    If the query arg is a sequence of two-element tuples, the order of the
-    parameters in the output will match the order of parameters in the
-    input.
-    """
+class ItemNotFoundError(LocateError):
+    pass
 
-    if hasattr(query,"items"):
-        # mapping objects
-        query = query.items()
-    else:
-        # it's a bother at times that strings and string-like objects are
-        # sequences...
-        try:
-            # non-sequence items should not work with len()
-            x = len(query)
-            # non-empty strings will fail this
-            if len(query) and type(query[0]) != types.TupleType:
-                raise TypeError()
-            # zero-length sequences of all types will get here and succeed,
-            # but that's a minor nit - since the original implementation
-            # allowed empty dicts that type of behavior probably should be
-            # preserved for consistency
-        except TypeError:
-            ty,va,tb = sys.exc_info()
-            raise TypeError("not a valid non-string sequence or mapping "
-                            "object", tb)
-
-    l = []
-    if not doseq:
-        # preserve old behavior
-        for k, v in query:
-            k = urllib.quote_plus(str(k))
-            v = urllib.quote_plus(str(v))
-            l.append(k + '=' + v)
-    else:
-        for k, v in query:
-            k = urllib.quote_plus(str(k))
-            if type(v) == types.StringType:
-                v = urllib.quote_plus(v)
-                l.append(k + '=' + v)
-            elif type(v) == types.UnicodeType:
-                # is there a reasonable way to convert to ASCII?
-                # encode generates a string, but "replace" or "ignore"
-                # lose information and "strict" can raise UnicodeError
-                v = urllib.quote_plus(v.encode("ASCII","replace"))
-                l.append(k + '=' + v)
-            else:
-                try:
-                    # is this a sufficient test for sequence-ness?
-                    x = len(v)
-                except TypeError:
-                    # not a sequence
-                    v = urllib.quote_plus(str(v))
-                    l.append(k + '=' + v)
-                else:
-                    # loop over the sequence
-                    for elt in v:
-                        l.append(k + '=' + urllib.quote_plus(str(elt)))
-    return '&'.join(l)
-
-def unescape(data, entities, encoding=DEFAULT_ENCODING):
-    if data is None or "&" not in data:
-        return data
-
-    def replace_entities(match, entities=entities, encoding=encoding):
-        ent = match.group()
-        if ent[1] == "#":
-            return unescape_charref(ent[2:-1], encoding)
-
-        repl = entities.get(ent)
-        if repl is not None:
-            if type(repl) != type(""):
-                try:
-                    repl = repl.encode(encoding)
-                except UnicodeError:
-                    repl = ent
-        else:
-            repl = ent
 
-        return repl
+class ItemCountError(ValueError):
+    pass
 
-    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
 
-def unescape_charref(data, encoding):
-    name, base = data, 10
-    if name.startswith("x"):
-        name, base= name[1:], 16
-    uc = unichr(int(name, base))
-    if encoding is None:
-        return uc
-    else:
-        try:
-            repl = uc.encode(encoding)
-        except UnicodeError:
-            repl = "&#%s;" % data
-        return repl
-
-def get_entitydefs():
-    import htmlentitydefs
-    from codecs import latin_1_decode
-    entitydefs = {}
-    try:
-        htmlentitydefs.name2codepoint
-    except AttributeError:
-        entitydefs = {}
-        for name, char in htmlentitydefs.entitydefs.items():
-            uc = latin_1_decode(char)[0]
-            if uc.startswith("&#") and uc.endswith(";"):
-                uc = unescape_charref(uc[2:-1], None)
-            entitydefs["&%s;" % name] = uc
-    else:
-        for name, codepoint in htmlentitydefs.name2codepoint.items():
-            entitydefs["&%s;" % name] = unichr(codepoint)
-    return entitydefs
-
-
-def issequence(x):
-    try:
-        x[0]
-    except (TypeError, KeyError):
-        return False
-    except IndexError:
-        pass
-    return True
+_compress_re = re.compile(r"\s+")
+
+
+def deprecation(message, stack_offset=0):
+    warnings.warn(message, DeprecationWarning, stacklevel=3 + stack_offset)
+
+
+def compress_whitespace(text):
+    return re.sub(r'\s+', ' ', text or '').strip()
+
 
 def isstringlike(x):
-    try: x+""
-    except: return False
-    else: return True
+    if isinstance(x, (bytes, unicode_type)):
+        return True
+    try:
+        x + ""
+        return True
+    except Exception:
+        return False
 
 
 def choose_boundary():
     """Return a string usable as a multipart boundary."""
     # follow IE and firefox
-    nonce = "".join([str(random.randint(0, sys.maxint-1)) for i in 0,1,2])
-    return "-"*27 + nonce
+    nonce = "".join(str(random.randint(0, sys.maxsize - 1)) for i in (0, 1, 2))
+    return "-" * 27 + nonce
+
 
 # This cut-n-pasted MimeWriter from standard library is here so can add
 # to HTTP headers rather than message body when appropriate.  It also uses
 # \r\n in place of \n.  This is a bit nasty.
 class MimeWriter:
-
     """Generic MIME writer.
 
     Methods:
@@ -316,22 +100,28 @@ class MimeWriter:
 
     General usage is:
 
-    f = <open the output file>
-    w = MimeWriter(f)
-    ...call w.addheader(key, value) 0 or more times...
+    .. code-block:: python
+
+        f = <open the output file>
+        w = MimeWriter(f)
+        ...call w.addheader(key, value) 0 or more times...
 
     followed by either:
 
-    f = w.startbody(content_type)
-    ...call f.write(data) for body data...
+    .. code-block:: python
+
+        f = w.startbody(content_type)
+        ...call f.write(data) for body data...
 
     or:
 
-    w.startmultipartbody(subtype)
-    for each part:
-        subwriter = w.nextpart()
-        ...use the subwriter's methods to create the subpart...
-    w.lastpart()
+    .. code-block:: python
+
+        w.startmultipartbody(subtype)
+        for each part:
+            subwriter = w.nextpart()
+            ...use the subwriter's methods to create the subpart...
+        w.lastpart()
 
     The subwriter is another MimeWriter instance, and should be
     treated in the same way as the toplevel MimeWriter.  This way,
@@ -371,14 +161,15 @@ def __init__(self, fp, http_hdrs=None):
         self._boundary = []
         self._first_part = True
 
-    def addheader(self, key, value, prefix=0,
-                  add_to_http_hdrs=0):
+    def addheader(self, key, value, prefix=0, add_to_http_hdrs=0):
         """
         prefix is ignored if add_to_http_hdrs is true.
         """
         lines = value.split("\r\n")
-        while lines and not lines[-1]: del lines[-1]
-        while lines and not lines[0]: del lines[0]
+        while lines and not lines[-1]:
+            del lines[-1]
+        while lines and not lines[0]:
+            del lines[0]
         if add_to_http_hdrs:
             value = "".join(lines)
             # 2.2 urllib2 doesn't normalize header case
@@ -394,33 +185,47 @@ def addheader(self, key, value, prefix=0,
                 self._headers.append(line)
 
     def flushheaders(self):
-        self._fp.writelines(self._headers)
+        for line in self._headers:
+            self._fp.write(line)
         self._headers = []
 
-    def startbody(self, ctype=None, plist=[], prefix=1,
-                  add_to_http_hdrs=0, content_type=1):
+    def startbody(self,
+                  ctype=None,
+                  plist=[],
+                  prefix=1,
+                  add_to_http_hdrs=0,
+                  content_type=1):
         """
         prefix is ignored if add_to_http_hdrs is true.
         """
         if content_type and ctype:
             for name, value in plist:
                 ctype = ctype + ';\r\n %s=%s' % (name, value)
-            self.addheader("Content-Type", ctype, prefix=prefix,
-                           add_to_http_hdrs=add_to_http_hdrs)
+            self.addheader(
+                "Content-Type",
+                ctype,
+                prefix=prefix,
+                add_to_http_hdrs=add_to_http_hdrs)
         self.flushheaders()
-        if not add_to_http_hdrs: self._fp.write("\r\n")
+        if not add_to_http_hdrs:
+            self._fp.write("\r\n")
         self._first_part = True
         return self._fp
 
-    def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
-                           add_to_http_hdrs=0, content_type=1):
+    def startmultipartbody(self,
+                           subtype,
+                           boundary=None,
+                           plist=[],
+                           prefix=1,
+                           add_to_http_hdrs=0,
+                           content_type=1):
         boundary = boundary or choose_boundary()
         self._boundary.append(boundary)
-        return self.startbody("multipart/" + subtype,
-                              [("boundary", boundary)] + plist,
-                              prefix=prefix,
-                              add_to_http_hdrs=add_to_http_hdrs,
-                              content_type=content_type)
+        return self.startbody(
+            "multipart/" + subtype, [("boundary", boundary)] + plist,
+            prefix=prefix,
+            add_to_http_hdrs=add_to_http_hdrs,
+            content_type=content_type)
 
     def nextpart(self):
         boundary = self._boundary[-1]
@@ -438,740 +243,22 @@ def lastpart(self):
         self._fp.write("\r\n--" + boundary + "--\r\n")
 
 
-class LocateError(ValueError): pass
-class AmbiguityError(LocateError): pass
-class ControlNotFoundError(LocateError): pass
-class ItemNotFoundError(LocateError): pass
-
-class ItemCountError(ValueError): pass
-
-# for backwards compatibility, ParseError derives from exceptions that were
-# raised by versions of ClientForm <= 0.2.5
-if HAVE_MODULE_HTMLPARSER:
-    SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
-    class ParseError(sgmllib.SGMLParseError,
-                     HTMLParser.HTMLParseError,
-                     ):
-        pass
-else:
-    if hasattr(sgmllib, "SGMLParseError"):
-        SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
-        class ParseError(sgmllib.SGMLParseError):
-            pass
-    else:
-        SGMLLIB_PARSEERROR = RuntimeError
-        class ParseError(RuntimeError):
-            pass
-
-
-class _AbstractFormParser:
-    """forms attribute contains HTMLForm instances on completion."""
-    # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
-    def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
-        if entitydefs is None:
-            entitydefs = get_entitydefs()
-        self._entitydefs = entitydefs
-        self._encoding = encoding
-
-        self.base = None
-        self.forms = []
-        self.labels = []
-        self._current_label = None
-        self._current_form = None
-        self._select = None
-        self._optgroup = None
-        self._option = None
-        self._textarea = None
-
-        # forms[0] will contain all controls that are outside of any form
-        # self._global_form is an alias for self.forms[0]
-        self._global_form = None
-        self.start_form([])
-        self.end_form()
-        self._current_form = self._global_form = self.forms[0]
-
-    def do_base(self, attrs):
-        debug("%s", attrs)
-        for key, value in attrs:
-            if key == "href":
-                self.base = self.unescape_attr_if_required(value)
-
-    def end_body(self):
-        debug("")
-        if self._current_label is not None:
-            self.end_label()
-        if self._current_form is not self._global_form:
-            self.end_form()
-
-    def start_form(self, attrs):
-        debug("%s", attrs)
-        if self._current_form is not self._global_form:
-            raise ParseError("nested FORMs")
-        name = None
-        action = None
-        enctype = "application/x-www-form-urlencoded"
-        method = "GET"
-        d = {}
-        for key, value in attrs:
-            if key == "name":
-                name = self.unescape_attr_if_required(value)
-            elif key == "action":
-                action = self.unescape_attr_if_required(value)
-            elif key == "method":
-                method = self.unescape_attr_if_required(value.upper())
-            elif key == "enctype":
-                enctype = self.unescape_attr_if_required(value.lower())
-            d[key] = self.unescape_attr_if_required(value)
-        controls = []
-        self._current_form = (name, action, method, enctype), d, controls
-
-    def end_form(self):
-        debug("")
-        if self._current_label is not None:
-            self.end_label()
-        if self._current_form is self._global_form:
-            raise ParseError("end of FORM before start")
-        self.forms.append(self._current_form)
-        self._current_form = self._global_form
-
-    def start_select(self, attrs):
-        debug("%s", attrs)
-        if self._select is not None:
-            raise ParseError("nested SELECTs")
-        if self._textarea is not None:
-            raise ParseError("SELECT inside TEXTAREA")
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-
-        self._select = d
-        self._add_label(d)
-
-        self._append_select_control({"__select": d})
-
-    def end_select(self):
-        debug("")
-        if self._select is None:
-            raise ParseError("end of SELECT before start")
-
-        if self._option is not None:
-            self._end_option()
-
-        self._select = None
-
-    def start_optgroup(self, attrs):
-        debug("%s", attrs)
-        if self._select is None:
-            raise ParseError("OPTGROUP outside of SELECT")
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-
-        self._optgroup = d
-
-    def end_optgroup(self):
-        debug("")
-        if self._optgroup is None:
-            raise ParseError("end of OPTGROUP before start")
-        self._optgroup = None
-
-    def _start_option(self, attrs):
-        debug("%s", attrs)
-        if self._select is None:
-            raise ParseError("OPTION outside of SELECT")
-        if self._option is not None:
-            self._end_option()
-
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-
-        self._option = {}
-        self._option.update(d)
-        if (self._optgroup and self._optgroup.has_key("disabled") and
-            not self._option.has_key("disabled")):
-            self._option["disabled"] = None
-
-    def _end_option(self):
-        debug("")
-        if self._option is None:
-            raise ParseError("end of OPTION before start")
-
-        contents = self._option.get("contents", "").strip()
-        self._option["contents"] = contents
-        if not self._option.has_key("value"):
-            self._option["value"] = contents
-        if not self._option.has_key("label"):
-            self._option["label"] = contents
-        # stuff dict of SELECT HTML attrs into a special private key
-        #  (gets deleted again later)
-        self._option["__select"] = self._select
-        self._append_select_control(self._option)
-        self._option = None
-
-    def _append_select_control(self, attrs):
-        debug("%s", attrs)
-        controls = self._current_form[2]
-        name = self._select.get("name")
-        controls.append(("select", name, attrs))
-
-    def start_textarea(self, attrs):
-        debug("%s", attrs)
-        if self._textarea is not None:
-            raise ParseError("nested TEXTAREAs")
-        if self._select is not None:
-            raise ParseError("TEXTAREA inside SELECT")
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-        self._add_label(d)
-
-        self._textarea = d
-
-    def end_textarea(self):
-        debug("")
-        if self._textarea is None:
-            raise ParseError("end of TEXTAREA before start")
-        controls = self._current_form[2]
-        name = self._textarea.get("name")
-        controls.append(("textarea", name, self._textarea))
-        self._textarea = None
-
-    def start_label(self, attrs):
-        debug("%s", attrs)
-        if self._current_label:
-            self.end_label()
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-        taken = bool(d.get("for"))  # empty id is invalid
-        d["__text"] = ""
-        d["__taken"] = taken
-        if taken:
-            self.labels.append(d)
-        self._current_label = d
-
-    def end_label(self):
-        debug("")
-        label = self._current_label
-        if label is None:
-            # something is ugly in the HTML, but we're ignoring it
-            return
-        self._current_label = None
-        # if it is staying around, it is True in all cases
-        del label["__taken"]
-
-    def _add_label(self, d):
-        #debug("%s", d)
-        if self._current_label is not None:
-            if not self._current_label["__taken"]:
-                self._current_label["__taken"] = True
-                d["__label"] = self._current_label
-
-    def handle_data(self, data):
-        debug("%s", data)
-
-        if self._option is not None:
-            # self._option is a dictionary of the OPTION element's HTML
-            # attributes, but it has two special keys, one of which is the
-            # special "contents" key contains text between OPTION tags (the
-            # other is the "__select" key: see the end_option method)
-            map = self._option
-            key = "contents"
-        elif self._textarea is not None:
-            map = self._textarea
-            key = "value"
-            data = normalize_line_endings(data)
-        # not if within option or textarea
-        elif self._current_label is not None:
-            map = self._current_label
-            key = "__text"
-        else:
-            return
-
-        if data and not map.has_key(key):
-            # according to
-            # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
-            # immediately after start tags or immediately before end tags must
-            # be ignored, but real browsers only ignore a line break after a
-            # start tag, so we'll do that.
-            if data[0:2] == "\r\n":
-                data = data[2:]
-            elif data[0:1] in ["\n", "\r"]:
-                data = data[1:]
-            map[key] = data
-        else:
-            map[key] = map[key] + data
-
-    def do_button(self, attrs):
-        debug("%s", attrs)
-        d = {}
-        d["type"] = "submit"  # default
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-        controls = self._current_form[2]
-
-        type = d["type"]
-        name = d.get("name")
-        # we don't want to lose information, so use a type string that
-        # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
-        # e.g. type for BUTTON/RESET is "resetbutton"
-        #     (type for INPUT/RESET is "reset")
-        type = type+"button"
-        self._add_label(d)
-        controls.append((type, name, d))
-
-    def do_input(self, attrs):
-        debug("%s", attrs)
-        d = {}
-        d["type"] = "text"  # default
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-        controls = self._current_form[2]
-
-        type = d["type"]
-        name = d.get("name")
-        self._add_label(d)
-        controls.append((type, name, d))
-
-    def do_isindex(self, attrs):
-        debug("%s", attrs)
-        d = {}
-        for key, val in attrs:
-            d[key] = self.unescape_attr_if_required(val)
-        controls = self._current_form[2]
-
-        self._add_label(d)
-        # isindex doesn't have type or name HTML attributes
-        controls.append(("isindex", None, d))
-
-    def handle_entityref(self, name):
-        #debug("%s", name)
-        self.handle_data(unescape(
-            '&%s;' % name, self._entitydefs, self._encoding))
-
-    def handle_charref(self, name):
-        #debug("%s", name)
-        self.handle_data(unescape_charref(name, self._encoding))
-
-    def unescape_attr(self, name):
-        #debug("%s", name)
-        return unescape(name, self._entitydefs, self._encoding)
-
-    def unescape_attrs(self, attrs):
-        #debug("%s", attrs)
-        escaped_attrs = {}
-        for key, val in attrs.items():
-            try:
-                val.items
-            except AttributeError:
-                escaped_attrs[key] = self.unescape_attr(val)
-            else:
-                # e.g. "__select" -- yuck!
-                escaped_attrs[key] = self.unescape_attrs(val)
-        return escaped_attrs
-
-    def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
-    def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
-
-
-if not HAVE_MODULE_HTMLPARSER:
-    class XHTMLCompatibleFormParser:
-        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
-            raise ValueError("HTMLParser could not be imported")
-else:
-    class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
-        """Good for XHTML, bad for tolerance of incorrect HTML."""
-        # thanks to Michael Howitz for this!
-        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
-            HTMLParser.HTMLParser.__init__(self)
-            _AbstractFormParser.__init__(self, entitydefs, encoding)
-
-        def feed(self, data):
-            try:
-                HTMLParser.HTMLParser.feed(self, data)
-            except HTMLParser.HTMLParseError, exc:
-                raise ParseError(exc)
-
-        def start_option(self, attrs):
-            _AbstractFormParser._start_option(self, attrs)
-
-        def end_option(self):
-            _AbstractFormParser._end_option(self)
-
-        def handle_starttag(self, tag, attrs):
-            try:
-                method = getattr(self, "start_" + tag)
-            except AttributeError:
-                try:
-                    method = getattr(self, "do_" + tag)
-                except AttributeError:
-                    pass  # unknown tag
-                else:
-                    method(attrs)
-            else:
-                method(attrs)
-
-        def handle_endtag(self, tag):
-            try:
-                method = getattr(self, "end_" + tag)
-            except AttributeError:
-                pass  # unknown tag
-            else:
-                method()
-
-        def unescape(self, name):
-            # Use the entitydefs passed into constructor, not
-            # HTMLParser.HTMLParser's entitydefs.
-            return self.unescape_attr(name)
-
-        def unescape_attr_if_required(self, name):
-            return name  # HTMLParser.HTMLParser already did it
-        def unescape_attrs_if_required(self, attrs):
-            return attrs  # ditto
-
-        def close(self):
-            HTMLParser.HTMLParser.close(self)
-            self.end_body()
-
-
-class _AbstractSgmllibParser(_AbstractFormParser):
-
-    def do_option(self, attrs):
-        _AbstractFormParser._start_option(self, attrs)
-
-    if sys.version_info[:2] >= (2,5):
-        # we override this attr to decode hex charrefs
-        entity_or_charref = re.compile(
-            '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
-        def convert_entityref(self, name):
-            return unescape("&%s;" % name, self._entitydefs, self._encoding)
-        def convert_charref(self, name):
-            return unescape_charref("%s" % name, self._encoding)
-        def unescape_attr_if_required(self, name):
-            return name  # sgmllib already did it
-        def unescape_attrs_if_required(self, attrs):
-            return attrs  # ditto
-    else:
-        def unescape_attr_if_required(self, name):
-            return self.unescape_attr(name)
-        def unescape_attrs_if_required(self, attrs):
-            return self.unescape_attrs(attrs)
-
-
-class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
-    """Good for tolerance of incorrect HTML, bad for XHTML."""
-    def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
-        sgmllib.SGMLParser.__init__(self)
-        _AbstractFormParser.__init__(self, entitydefs, encoding)
-
-    def feed(self, data):
-        try:
-            sgmllib.SGMLParser.feed(self, data)
-        except SGMLLIB_PARSEERROR, exc:
-            raise ParseError(exc)
-
-    def close(self):
-        sgmllib.SGMLParser.close(self)
-        self.end_body()
-
-
-# sigh, must support mechanize by allowing dynamic creation of classes based on
-# its bundled copy of BeautifulSoup (which was necessary because of dependency
-# problems)
-
-def _create_bs_classes(bs,
-                       icbinbs,
-                       ):
-    class _AbstractBSFormParser(_AbstractSgmllibParser):
-        bs_base_class = None
-        def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
-            _AbstractFormParser.__init__(self, entitydefs, encoding)
-            self.bs_base_class.__init__(self)
-        def handle_data(self, data):
-            _AbstractFormParser.handle_data(self, data)
-            self.bs_base_class.handle_data(self, data)
-        def feed(self, data):
-            try:
-                self.bs_base_class.feed(self, data)
-            except SGMLLIB_PARSEERROR, exc:
-                raise ParseError(exc)
-        def close(self):
-            self.bs_base_class.close(self)
-            self.end_body()
-
-    class RobustFormParser(_AbstractBSFormParser, bs):
-        """Tries to be highly tolerant of incorrect HTML."""
-        pass
-    RobustFormParser.bs_base_class = bs
-    class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
-        """Tries to be highly tolerant of incorrect HTML.
-
-        Different from RobustFormParser in that it more often guesses nesting
-        above missing end tags (see BeautifulSoup docs).
-
-        """
-        pass
-    NestingRobustFormParser.bs_base_class = icbinbs
-
-    return RobustFormParser, NestingRobustFormParser
-
-try:
-    if sys.version_info[:2] < (2, 2):
-        raise ImportError  # BeautifulSoup uses generators
-    import BeautifulSoup
-except ImportError:
-    pass
-else:
-    RobustFormParser, NestingRobustFormParser = _create_bs_classes(
-        BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
-        )
-    __all__ += ['RobustFormParser', 'NestingRobustFormParser']
-
-
-#FormParser = XHTMLCompatibleFormParser  # testing hack
-#FormParser = RobustFormParser  # testing hack
-
-
-def ParseResponseEx(response,
-                    select_default=False,
-                    form_parser_class=FormParser,
-                    request_class=urllib2.Request,
-                    entitydefs=None,
-                    encoding=DEFAULT_ENCODING,
-
-                    # private
-                    _urljoin=urlparse.urljoin,
-                    _urlparse=urlparse.urlparse,
-                    _urlunparse=urlparse.urlunparse,
-                    ):
-    """Identical to ParseResponse, except that:
-
-    1. The returned list contains an extra item.  The first form in the list
-    contains all controls not contained in any FORM element.
-
-    2. The arguments ignore_errors and backwards_compat have been removed.
-
-    3. Backwards-compatibility mode (backwards_compat=True) is not available.
-    """
-    return _ParseFileEx(response, response.geturl(),
-                        select_default,
-                        False,
-                        form_parser_class,
-                        request_class,
-                        entitydefs,
-                        False,
-                        encoding,
-                        _urljoin=_urljoin,
-                        _urlparse=_urlparse,
-                        _urlunparse=_urlunparse,
-                        )
-
-def ParseFileEx(file, base_uri,
-                select_default=False,
-                form_parser_class=FormParser,
-                request_class=urllib2.Request,
-                entitydefs=None,
-                encoding=DEFAULT_ENCODING,
-
-                # private
-                _urljoin=urlparse.urljoin,
-                _urlparse=urlparse.urlparse,
-                _urlunparse=urlparse.urlunparse,
-                ):
-    """Identical to ParseFile, except that:
-
-    1. The returned list contains an extra item.  The first form in the list
-    contains all controls not contained in any FORM element.
-
-    2. The arguments ignore_errors and backwards_compat have been removed.
-
-    3. Backwards-compatibility mode (backwards_compat=True) is not available.
-    """
-    return _ParseFileEx(file, base_uri,
-                        select_default,
-                        False,
-                        form_parser_class,
-                        request_class,
-                        entitydefs,
-                        False,
-                        encoding,
-                        _urljoin=_urljoin,
-                        _urlparse=_urlparse,
-                        _urlunparse=_urlunparse,
-                        )
-
-def ParseResponse(response, *args, **kwds):
-    """Parse HTTP response and return a list of HTMLForm instances.
-
-    The return value of urllib2.urlopen can be conveniently passed to this
-    function as the response parameter.
-
-    ClientForm.ParseError is raised on parse errors.
-
-    response: file-like object (supporting read() method) with a method
-     geturl(), returning the URI of the HTTP response
-    select_default: for multiple-selection SELECT controls and RADIO controls,
-     pick the first item as the default if none are selected in the HTML
-    form_parser_class: class to instantiate and use to pass
-    request_class: class to return from .click() method (default is
-     urllib2.Request)
-    entitydefs: mapping like {"&amp;": "&", ...} containing HTML entity
-     definitions (a sensible default is used)
-    encoding: character encoding used for encoding numeric character references
-     when matching link text.  ClientForm does not attempt to find the encoding
-     in a META HTTP-EQUIV attribute in the document itself (mechanize, for
-     example, does do that and will pass the correct value to ClientForm using
-     this parameter).
-
-    backwards_compat: boolean that determines whether the returned HTMLForm
-     objects are backwards-compatible with old code.  If backwards_compat is
-     true:
-
-     - ClientForm 0.1 code will continue to work as before.
-
-     - Label searches that do not specify a nr (number or count) will always
-       get the first match, even if other controls match.  If
-       backwards_compat is False, label searches that have ambiguous results
-       will raise an AmbiguityError.
-
-     - Item label matching is done by strict string comparison rather than
-       substring matching.
-
-     - De-selecting individual list items is allowed even if the Item is
-       disabled.
-
-    The backwards_compat argument will be deprecated in a future release.
-
-    Pass a true value for select_default if you want the behaviour specified by
-    RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
-    RADIO or multiple-selection SELECT control if none were selected in the
-    HTML.  Most browsers (including Microsoft Internet Explorer (IE) and
-    Netscape Navigator) instead leave all items unselected in these cases.  The
-    W3C HTML 4.0 standard leaves this behaviour undefined in the case of
-    multiple-selection SELECT controls, but insists that at least one RADIO
-    button should be checked at all times, in contradiction to browser
-    behaviour.
-
-    There is a choice of parsers.  ClientForm.XHTMLCompatibleFormParser (uses
-    HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
-    sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
-    Note that HTMLParser is only available in Python 2.2 and later.  You can
-    pass your own class in here as a hack to work around bad HTML, but at your
-    own risk: there is no well-defined interface.
-
-    """
-    return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
-
-def ParseFile(file, base_uri, *args, **kwds):
-    """Parse HTML and return a list of HTMLForm instances.
-
-    ClientForm.ParseError is raised on parse errors.
-
-    file: file-like object (supporting read() method) containing HTML with zero
-     or more forms to be parsed
-    base_uri: the URI of the document (note that the base URI used to submit
-     the form will be that given in the BASE element if present, not that of
-     the document)
-
-    For the other arguments and further details, see ParseResponse.__doc__.
-
-    """
-    return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
-
-def _ParseFileEx(file, base_uri,
-                 select_default=False,
-                 ignore_errors=False,
-                 form_parser_class=FormParser,
-                 request_class=urllib2.Request,
-                 entitydefs=None,
-                 backwards_compat=True,
-                 encoding=DEFAULT_ENCODING,
-                 _urljoin=urlparse.urljoin,
-                 _urlparse=urlparse.urlparse,
-                 _urlunparse=urlparse.urlunparse,
-                 ):
-    if backwards_compat:
-        deprecation("operating in backwards-compatibility mode", 1)
-    fp = form_parser_class(entitydefs, encoding)
-    while 1:
-        data = file.read(CHUNK)
-        try:
-            fp.feed(data)
-        except ParseError, e:
-            e.base_uri = base_uri
-            raise
-        if len(data) != CHUNK: break
-    fp.close()
-    if fp.base is not None:
-        # HTML BASE element takes precedence over document URI
-        base_uri = fp.base
-    labels = []  # Label(label) for label in fp.labels]
-    id_to_labels = {}
-    for l in fp.labels:
-        label = Label(l)
-        labels.append(label)
-        for_id = l["for"]
-        coll = id_to_labels.get(for_id)
-        if coll is None:
-            id_to_labels[for_id] = [label]
-        else:
-            coll.append(label)
-    forms = []
-    for (name, action, method, enctype), attrs, controls in fp.forms:
-        if action is None:
-            action = base_uri
-        else:
-            action = _urljoin(base_uri, action)
-        # would be nice to make HTMLForm class (form builder) pluggable
-        form = HTMLForm(
-            action, method, enctype, name, attrs, request_class,
-            forms, labels, id_to_labels, backwards_compat)
-        form._urlparse = _urlparse
-        form._urlunparse = _urlunparse
-        for ii in range(len(controls)):
-            type, name, attrs = controls[ii]
-            # index=ii*10 allows ImageControl to return multiple ordered pairs
-            form.new_control(
-                type, name, attrs, select_default=select_default, index=ii*10)
-        forms.append(form)
-    for form in forms:
-        form.fixup()
-    return forms
-
-
 class Label:
-    def __init__(self, attrs):
-        self.id = attrs.get("for")
-        self._text = attrs.get("__text").strip()
-        self._ctext = compress_text(self._text)
-        self.attrs = attrs
-        self._backwards_compat = False  # maintained by HTMLForm
-
-    def __getattr__(self, name):
-        if name == "text":
-            if self._backwards_compat:
-                return self._text
-            else:
-                return self._ctext
-        return getattr(Label, name)
-
-    def __setattr__(self, name, value):
-        if name == "text":
-            # don't see any need for this, so make it read-only
-            raise AttributeError("text attribute is read-only")
-        self.__dict__[name] = value
+    def __init__(self, text, for_id=None):
+        self.id = for_id
+        self.text = compress_whitespace(text or '')
 
-    def __str__(self):
+    def __repr__(self):
         return "<Label(id=%r, text=%r)>" % (self.id, self.text)
 
+    __str__ = __repr__
+
 
 def _get_label(attrs):
     text = attrs.get("__label")
     if text is not None:
         return Label(text)
-    else:
-        return None
+
 
 class Control:
     """An HTML form control.
@@ -1215,16 +302,17 @@ class Control:
 
     Public attributes:
 
-    type: string describing type of control (see the keys of the
-     HTMLForm.type2class dictionary for the allowable values) (readonly)
-    name: name of control (readonly)
-    value: current value of control (subclasses may allow a single value, a
-     sequence of values, or either)
-    disabled: disabled state
-    readonly: readonly state
-    id: value of id HTML attribute
+    :ivar str type: string describing type of control (see the keys of the
+        HTMLForm.type2class dictionary for the allowable values) (readonly)
+    :ivar str name: name of control (readonly)
+    :ivar value: current value of control (subclasses may allow a single value,
+        a sequence of values, or either)
+    :ivar bool disabled: disabled state
+    :ivar bool readonly: readonly state
+    :ivar str id: value of id HTML attribute
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         """
         type: string describing type of control (see the keys of the
@@ -1248,8 +336,11 @@ def is_of_kind(self, kind):
     def clear(self):
         raise NotImplementedError()
 
-    def __getattr__(self, name): raise NotImplementedError()
-    def __setattr__(self, name, value): raise NotImplementedError()
+    def __getattr__(self, name):
+        raise NotImplementedError()
+
+    def __setattr__(self, name, value):
+        raise NotImplementedError()
 
     def pairs(self):
         """Return list of (key, value) pairs suitable for passing to urlencode.
@@ -1269,8 +360,9 @@ def _write_mime_data(self, mw, name, value):
         """Write data for a subitem of this control to a MimeWriter."""
         # called by HTMLForm
         mw2 = mw.nextpart()
-        mw2.addheader("Content-Disposition",
-                      'form-data; name="%s"' % name, 1)
+        mw2.addheader(
+            "Content-Disposition", 'form-data; name="%s"' % as_unicode(name),
+            1)
         f = mw2.startbody(prefix=0)
         f.write(value)
 
@@ -1279,7 +371,7 @@ def __str__(self):
 
     def get_labels(self):
         """Return all labels (Label instances) for this control.
-        
+
         If the control was surrounded by a <label> tag, that will be the first
         label; all other labels, connected by 'for' and 'id', are in the order
         that appear in the HTML.
@@ -1293,7 +385,7 @@ def get_labels(self):
         return res
 
 
-#---------------------------------------------------
+# ---------------------------------------------------
 class ScalarControl(Control):
     """Control whose value is not restricted to one of a prescribed set.
 
@@ -1302,26 +394,27 @@ class ScalarControl(Control):
 
     Additional read-only public attribute:
 
-    attrs: dictionary mapping the names of original HTML attributes of the
-     control to their values
+    :ivar dict attrs: dictionary mapping the names of original HTML attributes
+        of the control to their values
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         self._index = index
         self._label = _get_label(attrs)
         self.__dict__["type"] = type.lower()
         self.__dict__["name"] = name
         self._value = attrs.get("value")
-        self.disabled = attrs.has_key("disabled")
-        self.readonly = attrs.has_key("readonly")
+        self.disabled = 'disabled' in attrs
+        self.readonly = 'readonly' in attrs
         self.id = attrs.get("id")
 
-        self.attrs = attrs.copy()
+        self.attrs = dict(attrs)
 
         self._clicked = False
 
-        self._urlparse = urlparse.urlparse
-        self._urlunparse = urlparse.urlunparse
+        self._urlparse = urlparse
+        self._urlunparse = urlunparse
 
     def __getattr__(self, name):
         if name == "value":
@@ -1359,45 +452,49 @@ def clear(self):
     def __str__(self):
         name = self.name
         value = self.value
-        if name is None: name = "<None>"
-        if value is None: value = "<None>"
+        if name is None:
+            name = "<None>"
+        if value is None:
+            value = "<None>"
 
         infos = []
-        if self.disabled: infos.append("disabled")
-        if self.readonly: infos.append("readonly")
+        if self.disabled:
+            infos.append("disabled")
+        if self.readonly:
+            infos.append("readonly")
         info = ", ".join(infos)
-        if info: info = " (%s)" % info
+        if info:
+            info = " (%s)" % info
 
         return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
 
 
-#---------------------------------------------------
+# ---------------------------------------------------
 class TextControl(ScalarControl):
     """Textual input control.
 
-    Covers:
-
-    INPUT/TEXT
-    INPUT/PASSWORD
-    INPUT/HIDDEN
-    TEXTAREA
+    Covers HTML elements: INPUT/TEXT, INPUT/PASSWORD, INPUT/HIDDEN, TEXTAREA
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         ScalarControl.__init__(self, type, name, attrs, index)
-        if self.type == "hidden": self.readonly = True
+        if self.type == "hidden":
+            self.readonly = True
         if self._value is None:
             self._value = ""
 
-    def is_of_kind(self, kind): return kind == "text"
+    def is_of_kind(self, kind):
+        return kind == "text"
+
 
-#---------------------------------------------------
+# ---------------------------------------------------
 class FileControl(ScalarControl):
     """File upload with INPUT TYPE=FILE.
 
     The value attribute of a FileControl is always None.  Use add_file instead.
 
-    Additional public method: add_file
+    Additional public method: :meth:`add_file`
 
     """
 
@@ -1406,7 +503,8 @@ def __init__(self, type, name, attrs, index=None):
         self._value = None
         self._upload_data = []
 
-    def is_of_kind(self, kind): return kind == "file"
+    def is_of_kind(self, kind):
+        return kind == "file"
 
     def clear(self):
         if self.readonly:
@@ -1420,6 +518,8 @@ def __setattr__(self, name, value):
             self.__dict__[name] = value
 
     def add_file(self, file_object, content_type=None, filename=None):
+        ''' Add data from the specified file to be uploaded. content_type and
+        filename are sent in the HTTP headers if specified. '''
         if not hasattr(file_object, "read"):
             raise TypeError("file-like object must have read method")
         if content_type is not None and not isstringlike(content_type):
@@ -1427,7 +527,9 @@ def add_file(self, file_object, content_type=None, filename=None):
         if filename is not None and not isstringlike(filename):
             raise TypeError("filename must be None or string-like")
         if content_type is None:
-            content_type = "application/octet-stream"
+            if getattr(file_object, 'name', None):
+                content_type = guess_type(file_object.name)[0]
+            content_type = content_type or "application/octet-stream"
         self._upload_data.append((file_object, content_type, filename))
 
     def _totally_ordered_pairs(self):
@@ -1436,12 +538,16 @@ def _totally_ordered_pairs(self):
             return []
         return [(self._index, self.name, "")]
 
+    # If enctype is application/x-www-form-urlencoded and there's a FILE
+    # control present, what should be sent?  Strictly, it should be 'name=data'
+    # (see HTML 4.01 spec., section 17.13.2), but code sends "name=" ATM.  What
+    # about multiple file upload?
     def _write_mime_data(self, mw, _name, _value):
         # called by HTMLForm
         # assert _name == self.name and _value == ''
         if len(self._upload_data) < 2:
             if len(self._upload_data) == 0:
-                file_object = StringIO()
+                file_object = BytesIO()
                 content_type = "application/octet-stream"
                 filename = ""
             else:
@@ -1473,7 +579,8 @@ def _write_mime_data(self, mw, _name, _value):
 
     def __str__(self):
         name = self.name
-        if name is None: name = "<None>"
+        if name is None:
+            name = "<None>"
 
         if not self._upload_data:
             value = "<No files added>"
@@ -1487,93 +594,23 @@ def __str__(self):
             value = ", ".join(value)
 
         info = []
-        if self.disabled: info.append("disabled")
-        if self.readonly: info.append("readonly")
+        if self.disabled:
+            info.append("disabled")
+        if self.readonly:
+            info.append("readonly")
         info = ", ".join(info)
-        if info: info = " (%s)" % info
+        if info:
+            info = " (%s)" % info
 
         return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
 
 
-#---------------------------------------------------
-class IsindexControl(ScalarControl):
-    """ISINDEX control.
-
-    ISINDEX is the odd-one-out of HTML form controls.  In fact, it isn't really
-    part of regular HTML forms at all, and predates it.  You're only allowed
-    one ISINDEX per HTML document.  ISINDEX and regular form submission are
-    mutually exclusive -- either submit a form, or the ISINDEX.
-
-    Having said this, since ISINDEX controls may appear in forms (which is
-    probably bad HTML), ParseFile / ParseResponse will include them in the
-    HTMLForm instances it returns.  You can set the ISINDEX's value, as with
-    any other control (but note that ISINDEX controls have no name, so you'll
-    need to use the type argument of set_value!).  When you submit the form,
-    the ISINDEX will not be successful (ie., no data will get returned to the
-    server as a result of its presence), unless you click on the ISINDEX
-    control, in which case the ISINDEX gets submitted instead of the form:
-
-    form.set_value("my isindex value", type="isindex")
-    urllib2.urlopen(form.click(type="isindex"))
-
-    ISINDEX elements outside of FORMs are ignored.  If you want to submit one
-    by hand, do it like so:
-
-    url = urlparse.urljoin(page_uri, "?"+urllib.quote_plus("my isindex value"))
-    result = urllib2.urlopen(url)
-
-    """
-    def __init__(self, type, name, attrs, index=None):
-        ScalarControl.__init__(self, type, name, attrs, index)
-        if self._value is None:
-            self._value = ""
-
-    def is_of_kind(self, kind): return kind in ["text", "clickable"]
-
-    def _totally_ordered_pairs(self):
-        return []
-
-    def _click(self, form, coord, return_type, request_class=urllib2.Request):
-        # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
-        # want "bar+baz".
-        # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
-        # deprecated in 4.01, but it should still say how to submit it).
-        # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
-        parts = self._urlparse(form.action)
-        rest, (query, frag) = parts[:-2], parts[-2:]
-        parts = rest + (urllib.quote_plus(self.value), None)
-        url = self._urlunparse(parts)
-        req_data = url, None, []
-
-        if return_type == "pairs":
-            return []
-        elif return_type == "request_data":
-            return req_data
-        else:
-            return request_class(url)
-
-    def __str__(self):
-        value = self.value
-        if value is None: value = "<None>"
-
-        infos = []
-        if self.disabled: infos.append("disabled")
-        if self.readonly: infos.append("readonly")
-        info = ", ".join(infos)
-        if info: info = " (%s)" % info
-
-        return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
-
-
-#---------------------------------------------------
+# ---------------------------------------------------
+# ---------------------------------------------------
 class IgnoreControl(ScalarControl):
     """Control that we're not interested in.
 
-    Covers:
-
-    INPUT/RESET
-    BUTTON/RESET
-    INPUT/BUTTON
+    Covers html elements: INPUT/RESET, BUTTON/RESET, INPUT/BUTTON,
     BUTTON/BUTTON
 
     These controls are always unsuccessful, in the terminology of HTML 4 (ie.
@@ -1584,27 +621,30 @@ class IgnoreControl(ScalarControl):
     The value attribute of IgnoreControl is always None.
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         ScalarControl.__init__(self, type, name, attrs, index)
         self._value = None
 
-    def is_of_kind(self, kind): return False
+    def is_of_kind(self, kind):
+        return False
 
     def __setattr__(self, name, value):
         if name == "value":
-            raise AttributeError(
-                "control '%s' is ignored, hence read-only" % self.name)
+            raise AttributeError("control '%s' is ignored, hence read-only" %
+                                 self.name)
         elif name in ("name", "type"):
             raise AttributeError("%s attribute is readonly" % name)
         else:
             self.__dict__[name] = value
 
 
-#---------------------------------------------------
+# ---------------------------------------------------
 # ListControls
 
 # helpers and subsidiary classes
 
+
 class Item:
     def __init__(self, control, attrs, index=None):
         label = _get_label(attrs)
@@ -1613,21 +653,21 @@ def __init__(self, control, attrs, index=None):
             "_labels": label and [label] or [],
             "attrs": attrs,
             "_control": control,
-            "disabled": attrs.has_key("disabled"),
+            "disabled": 'disabled' in attrs,
             "_selected": False,
             "id": attrs.get("id"),
             "_index": index,
-            })
+        })
         control.items.append(self)
 
     def get_labels(self):
         """Return all labels (Label instances) for this item.
-        
+
         For items that represent radio buttons or checkboxes, if the item was
         surrounded by a <label> tag, that will be the first label; all other
         labels, connected by 'for' and 'id', are in the order that appear in
         the HTML.
-        
+
         For items that represent select options, if the option had a label
         attribute, that will be the first label.  If the option has contents
         (text within the option tags) and it is not the same as the label
@@ -1644,7 +684,7 @@ def get_labels(self):
         return res
 
     def __getattr__(self, name):
-        if name=="selected":
+        if name == "selected":
             return self._selected
         raise AttributeError(name)
 
@@ -1667,15 +707,15 @@ def __str__(self):
     def __repr__(self):
         # XXX appending the attrs without distinguishing them from name and id
         # is silly
-        attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
-        return "<%s %s>" % (
-            self.__class__.__name__,
-            " ".join(["%s=%r" % (k, v) for k, v in attrs])
-            )
+        attrs = [("name", self.name), ("id", self.id)] + list(
+                iteritems(self.attrs))
+        return "<%s %s>" % (self.__class__.__name__,
+                            " ".join(["%s=%r" % (k, v) for k, v in attrs]))
+
 
 def disambiguate(items, nr, **kwds):
     msgs = []
-    for key, value in kwds.items():
+    for key, value in iteritems(kwds):
         msgs.append("%s=%r" % (key, value))
     msg = " ".join(msgs)
     if not items:
@@ -1688,6 +728,7 @@ def disambiguate(items, nr, **kwds):
         raise ItemNotFoundError(msg)
     return items[nr]
 
+
 class ListControl(Control):
     """Control representing a sequence of items.
 
@@ -1707,8 +748,10 @@ class ListControl(Control):
 
     Note the following mistake:
 
-    control.value = some_value
-    assert control.value == some_value    # not necessarily true
+    .. code-block:: python
+
+        control.value = some_value
+        assert control.value == some_value    # not necessarily true
 
     The reason for this is that the value attribute always gives the list items
     in the order they were listed in the HTML.
@@ -1721,17 +764,17 @@ class ListControl(Control):
     HTML by SELECT elements (which contain OPTION elements, representing
     individual list items), CHECKBOXes and RADIOs are not represented by *any*
     element.  Instead, those controls are represented by a collection of INPUT
-    elements.  For example, this is a SELECT control, named "control1":
+    elements.  For example, this is a SELECT control, named "control1"::
 
-    <select name="control1">
-     <option>foo</option>
-     <option value="1">bar</option>
-    </select>
+        <select name="control1">
+        <option>foo</option>
+        <option value="1">bar</option>
+        </select>
 
-    and this is a CHECKBOX control, named "control2":
+    and this is a CHECKBOX control, named "control2"::
 
-    <input type="checkbox" name="control2" value="foo" id="cbe1">
-    <input type="checkbox" name="control2" value="bar" id="cbe2">
+        <input type="checkbox" name="control2" value="foo" id="cbe1">
+        <input type="checkbox" name="control2" value="bar" id="cbe2">
 
     The id attribute of a CHECKBOX or RADIO ListControl is always that of its
     first element (for example, "cbe1" above).
@@ -1763,8 +806,13 @@ class ListControl(Control):
 
     _label = None
 
-    def __init__(self, type, name, attrs={}, select_default=False,
-                 called_as_base_class=False, index=None):
+    def __init__(self,
+                 type,
+                 name,
+                 attrs={},
+                 select_default=False,
+                 called_as_base_class=False,
+                 index=None):
         """
         select_default: for RADIO and multiple-selection SELECT controls, pick
          the first item as the default if no 'selected' HTML attribute is
@@ -1785,7 +833,7 @@ def __init__(self, type, name, attrs={}, select_default=False,
         # As Controls are merged in with .merge_control(), self.attrs will
         # refer to each Control in turn -- always the most recently merged
         # control.  Each merged-in Control instance corresponds to a single
-        # list item: see ListControl.__doc__.
+        # list item: see ListControl.__doc__.:
         self.items = []
         self._form = None
 
@@ -1796,7 +844,7 @@ def clear(self):
         self.value = []
 
     def is_of_kind(self, kind):
-        if kind  == "list":
+        if kind == "list":
             return True
         elif kind == "multilist":
             return bool(self.multiple)
@@ -1819,7 +867,6 @@ def get_items(self, name=None, label=None, id=None,
         if id is not None and not isstringlike(id):
             raise TypeError("item id must be string-like")
         items = []  # order is important
-        compat = self._form.backwards_compat
         for o in self.items:
             if exclude_disabled and o.disabled:
                 continue
@@ -1827,8 +874,7 @@ def get_items(self, name=None, label=None, id=None,
                 continue
             if label is not None:
                 for l in o.get_labels():
-                    if ((compat and l.text == label) or
-                        (not compat and l.text.find(label) > -1)):
+                    if label in l.text:
                         break
                 else:
                     continue
@@ -1837,7 +883,11 @@ def get_items(self, name=None, label=None, id=None,
             items.append(o)
         return items
 
-    def get(self, name=None, label=None, id=None, nr=None,
+    def get(self,
+            name=None,
+            label=None,
+            id=None,
+            nr=None,
             exclude_disabled=False):
         """Return item by name or label, disambiguating if necessary with nr.
 
@@ -1848,7 +898,7 @@ def get(self, name=None, label=None, id=None, nr=None,
 
         If label is specified, then the item must have a label whose
         whitespace-compressed, stripped, text substring-matches the indicated
-        label string (eg. label="please choose" will match
+        label string (e.g. label="please choose" will match
         "  Do  please  choose an item ").
 
         If id is specified, then the item must have the indicated id.
@@ -1856,8 +906,7 @@ def get(self, name=None, label=None, id=None, nr=None,
         nr is an optional 0-based index of the items matching the query.
 
         If nr is the default None value and more than item is found, raises
-        AmbiguityError (unless the HTMLForm instance's backwards_compat
-        attribute is true).
+        AmbiguityError.
 
         If no item is found, or if items are found but nr is specified and not
         found, raises ItemNotFoundError.
@@ -1865,8 +914,6 @@ def get(self, name=None, label=None, id=None, nr=None,
         Optionally excludes disabled items.
 
         """
-        if nr is None and self._form.backwards_compat:
-            nr = 0  # :-/
         items = self.get_items(name, label, id, exclude_disabled)
         return disambiguate(items, nr, name=name, label=label, id=id)
 
@@ -1905,8 +952,7 @@ def set(self, selected, name, by_label=False, nr=None):
         raise AttributeError.
 
         """
-        deprecation(
-            "control.get(...).selected = <boolean>")
+        deprecation("control.get(...).selected = <boolean>")
         self._set_selected_state(self._get(name, by_label, nr), selected)
 
     def _set_selected_state(self, item, action):
@@ -1918,27 +964,23 @@ def _set_selected_state(self, item, action):
         if self.readonly:
             raise AttributeError("control '%s' is readonly" % self.name)
         action == bool(action)
-        compat = self._form.backwards_compat
-        if not compat and item.disabled:
+        if item.disabled:
             raise AttributeError("item is disabled")
+        if self.multiple:
+            item.__dict__["_selected"] = action
         else:
-            if compat and item.disabled and action:
-                raise AttributeError("item is disabled")
-            if self.multiple:
-                item.__dict__["_selected"] = action
+            if not action:
+                item.__dict__["_selected"] = False
             else:
-                if not action:
-                    item.__dict__["_selected"] = False
-                else:
-                    for o in self.items:
-                        o.__dict__["_selected"] = False
-                    item.__dict__["_selected"] = True
+                for o in self.items:
+                    o.__dict__["_selected"] = False
+                item.__dict__["_selected"] = True
 
     def toggle_single(self, by_label=None):
         """Deprecated: toggle the selection of the single item in this control.
-        
+
         Raises ItemCountError if the control does not contain only one item.
-        
+
         by_label argument is ignored, and included only for backwards
         compatibility.
 
@@ -1946,47 +988,44 @@ def toggle_single(self, by_label=None):
         deprecation(
             "control.items[0].selected = not control.items[0].selected")
         if len(self.items) != 1:
-            raise ItemCountError(
-                "'%s' is not a single-item control" % self.name)
+            raise ItemCountError("'%s' is not a single-item control" %
+                                 self.name)
         item = self.items[0]
         self._set_selected_state(item, not item.selected)
 
     def set_single(self, selected, by_label=None):
         """Deprecated: set the selection of the single item in this control.
-        
+
         Raises ItemCountError if the control does not contain only one item.
-        
+
         by_label argument is ignored, and included only for backwards
         compatibility.
 
         """
-        deprecation(
-            "control.items[0].selected = <boolean>")
+        deprecation("control.items[0].selected = <boolean>")
         if len(self.items) != 1:
-            raise ItemCountError(
-                "'%s' is not a single-item control" % self.name)
+            raise ItemCountError("'%s' is not a single-item control" %
+                                 self.name)
         self._set_selected_state(self.items[0], selected)
 
     def get_item_disabled(self, name, by_label=False, nr=None):
         """Get disabled state of named list item in a ListControl."""
-        deprecation(
-            "control.get(...).disabled")
+        deprecation("control.get(...).disabled")
         return self._get(name, by_label, nr).disabled
 
     def set_item_disabled(self, disabled, name, by_label=False, nr=None):
         """Set disabled state of named list item in a ListControl.
 
-        disabled: boolean disabled state
+        :arg disabled: boolean disabled state
 
         """
-        deprecation(
-            "control.get(...).disabled = <boolean>")
+        deprecation("control.get(...).disabled = <boolean>")
         self._get(name, by_label, nr).disabled = disabled
 
     def set_all_items_disabled(self, disabled):
         """Set disabled state of all list items in a ListControl.
 
-        disabled: boolean disabled state
+        :arg disabled: boolean disabled state
 
         """
         for o in self.items:
@@ -2005,8 +1044,7 @@ def get_item_attrs(self, name, by_label=False, nr=None):
         and values are taken from the original HTML.
 
         """
-        deprecation(
-            "control.get(...).attrs")
+        deprecation("control.get(...).attrs")
         return self._get(name, by_label, nr).attrs
 
     def close_control(self):
@@ -2020,7 +1058,7 @@ def add_to_form(self, form):
             # always count nameless elements as separate controls
             Control.add_to_form(self, form)
         else:
-            for ii in range(len(form.controls)-1, -1, -1):
+            for ii in range(len(form.controls) - 1, -1, -1):
                 control = form.controls[ii]
                 if control.name == self.name and control.type == self.type:
                     if control._closed:
@@ -2040,8 +1078,8 @@ def fixup(self):
         """
         ListControls are built up from component list items (which are also
         ListControls) during parsing.  This method should be called after all
-        items have been added.  See ListControl.__doc__ for the reason this is
-        required.
+        items have been added.  See :class:`mechanize.ListControl` for the
+        reason this is required.
 
         """
         # Need to set default selection where no item was indicated as being
@@ -2067,18 +1105,18 @@ def fixup(self):
         # RFC 1866 if the _select_default attribute is set, and Netscape and IE
         # otherwise.  RFC 1866 and HTML 4 are always violated insofar as you
         # can deselect all items in a RadioControl.
-        
-        for o in self.items: 
+
+        for o in self.items:
             # set items' controls to self, now that we've merged
             o.__dict__["_control"] = self
 
     def __getattr__(self, name):
         if name == "value":
-            compat = self._form.backwards_compat
             if self.name is None:
                 return []
-            return [o.name for o in self.items if o.selected and
-                    (not o.disabled or compat)]
+            return [
+                o.name for o in self.items if o.selected and (not o.disabled)
+            ]
         else:
             raise AttributeError("%s instance has no attribute '%s'" %
                                  (self.__class__.__name__, name))
@@ -2099,16 +1137,14 @@ def _set_value(self, value):
         if value is None or isstringlike(value):
             raise TypeError("ListControl, must set a sequence")
         if not value:
-            compat = self._form.backwards_compat
             for o in self.items:
-                if not o.disabled or compat:
+                if not o.disabled:
                     o.selected = False
         elif self.multiple:
             self._multiple_set_value(value)
         elif len(value) > 1:
-            raise ItemCountError(
-                "single selection list, must set sequence of "
-                "length 0 or 1")
+            raise ItemCountError("single selection list, must set sequence of "
+                                 "length 0 or 1")
         else:
             self._single_set_value(value)
 
@@ -2117,8 +1153,8 @@ def _get_items(self, name, target=1):
         items = [o for o in all_items if not o.disabled]
         if len(items) < target:
             if len(all_items) < target:
-                raise ItemNotFoundError(
-                    "insufficient items with name %r" % name)
+                raise ItemNotFoundError("insufficient items with name %r" %
+                                        name)
             else:
                 raise AttributeError(
                     "insufficient non-disabled items with name %s" % name)
@@ -2139,17 +1175,15 @@ def _single_set_value(self, value):
             off[0].selected = True
 
     def _multiple_set_value(self, value):
-        compat = self._form.backwards_compat
         turn_on = []  # transactional-ish
-        turn_off = [item for item in self.items if
-                    item.selected and (not item.disabled or compat)]
+        turn_off = [
+            item for item in self.items
+            if item.selected and (not item.disabled)
+        ]
         names = {}
         for nn in value:
-            if nn in names.keys():
-                names[nn] += 1
-            else:
-                names[nn] = 1
-        for name, count in names.items():
+            names[nn] = names.setdefault(nn, 0) + 1
+        for name, count in iteritems(names):
             on, off = self._get_items(name, count)
             for i in range(count):
                 if on:
@@ -2172,35 +1206,29 @@ def set_value_by_label(self, value):
         the item labels that should be selected.  Before substring matching is
         performed, the original label text is whitespace-compressed
         (consecutive whitespace characters are converted to a single space
-        character) and leading and trailing whitespace is stripped.  Ambiguous
-        labels are accepted without complaint if the form's backwards_compat is
-        True; otherwise, it will not complain as long as all ambiguous labels
-        share the same item name (e.g. OPTION value).
+        character) and leading and trailing whitespace is stripped. Ambiguous
+        labels: it will not complain as long as all ambiguous labels share the
+        same item name (e.g. OPTION value).
 
         """
         if isstringlike(value):
             raise TypeError(value)
         if not self.multiple and len(value) > 1:
-            raise ItemCountError(
-                "single selection list, must set sequence of "
-                "length 0 or 1")
+            raise ItemCountError("single selection list, must set sequence of "
+                                 "length 0 or 1")
         items = []
         for nn in value:
             found = self.get_items(label=nn)
             if len(found) > 1:
-                if not self._form.backwards_compat:
-                    # ambiguous labels are fine as long as item names (e.g.
-                    # OPTION values) are same
-                    opt_name = found[0].name
-                    if [o for o in found[1:] if o.name != opt_name]:
-                        raise AmbiguityError(nn)
-                else:
-                    # OK, we'll guess :-(  Assume first available item.
-                    found = found[:1]
+                # ambiguous labels are fine as long as item names (e.g.
+                # OPTION values) are same
+                opt_name = found[0].name
+                if [o for o in found[1:] if o.name != opt_name]:
+                    raise AmbiguityError(nn)
             for o in found:
                 # For the multiple-item case, we could try to be smarter,
                 # saving them up and trying to resolve, but that's too much.
-                if self._form.backwards_compat or o not in items:
+                if o not in items:
                     items.append(o)
                     break
             else:  # all of them are used
@@ -2214,9 +1242,8 @@ def set_value_by_label(self, value):
     def get_value_by_label(self):
         """Return the value of the control as given by normalized labels."""
         res = []
-        compat = self._form.backwards_compat
         for o in self.items:
-            if (not o.disabled or compat) and o.selected:
+            if (not o.disabled) and o.selected:
                 for l in o.get_labels():
                     if l.text:
                         res.append(l.text)
@@ -2231,8 +1258,7 @@ def possible_items(self, by_label=False):
         Includes disabled items, which may be misleading for some use cases.
 
         """
-        deprecation(
-            "[item.name for item in self.items]")
+        deprecation("[item.name for item in self.items]")
         if by_label:
             res = []
             for o in self.items:
@@ -2254,18 +1280,22 @@ def _totally_ordered_pairs(self):
 
     def __str__(self):
         name = self.name
-        if name is None: name = "<None>"
+        if name is None:
+            name = "<None>"
 
         display = [str(o) for o in self.items]
 
         infos = []
-        if self.disabled: infos.append("disabled")
-        if self.readonly: infos.append("readonly")
+        if self.disabled:
+            infos.append("disabled")
+        if self.readonly:
+            infos.append("readonly")
         info = ", ".join(infos)
-        if info: info = " (%s)" % info
+        if info:
+            info = " (%s)" % info
 
-        return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
-                                    name, ", ".join(display), info)
+        return "<%s(%s=[%s])%s>" % (self.__class__.__name__, name,
+                                    ", ".join(display), info)
 
 
 class RadioControl(ListControl):
@@ -2275,13 +1305,20 @@ class RadioControl(ListControl):
     INPUT/RADIO
 
     """
+
     def __init__(self, type, name, attrs, select_default=False, index=None):
         attrs.setdefault("value", "on")
-        ListControl.__init__(self, type, name, attrs, select_default,
-                             called_as_base_class=True, index=index)
+        ListControl.__init__(
+            self,
+            type,
+            name,
+            attrs,
+            select_default,
+            called_as_base_class=True,
+            index=index)
         self.__dict__["multiple"] = False
         o = Item(self, attrs, index)
-        o.__dict__["_selected"] = attrs.has_key("checked")
+        o.__dict__["_selected"] = 'checked' in attrs
 
     def fixup(self):
         ListControl.fixup(self)
@@ -2301,6 +1338,7 @@ def fixup(self):
     def get_labels(self):
         return []
 
+
 class CheckboxControl(ListControl):
     """
     Covers:
@@ -2308,13 +1346,20 @@ class CheckboxControl(ListControl):
     INPUT/CHECKBOX
 
     """
+
     def __init__(self, type, name, attrs, select_default=False, index=None):
         attrs.setdefault("value", "on")
-        ListControl.__init__(self, type, name, attrs, select_default,
-                             called_as_base_class=True, index=index)
+        ListControl.__init__(
+            self,
+            type,
+            name,
+            attrs,
+            select_default,
+            called_as_base_class=True,
+            index=index)
         self.__dict__["multiple"] = True
         o = Item(self, attrs, index)
-        o.__dict__["_selected"] = attrs.has_key("checked")
+        o.__dict__["_selected"] = 'checked' in attrs
 
     def get_labels(self):
         return []
@@ -2327,16 +1372,16 @@ class SelectControl(ListControl):
     SELECT (and OPTION)
 
 
-    OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
+    OPTION 'values', in HTML parlance, are Item 'names' in mechanize parlance.
 
     SELECT control values and labels are subject to some messy defaulting
-    rules.  For example, if the HTML representation of the control is:
+    rules.  For example, if the HTML representation of the control is::
 
-    <SELECT name=year>
-      <OPTION value=0 label="2002">current year</OPTION>
-      <OPTION value=1>2001</OPTION>
-      <OPTION>2000</OPTION>
-    </SELECT>
+        <SELECT name=year>
+            <OPTION value=0 label="2002">current year</OPTION>
+            <OPTION value=1>2001</OPTION>
+            <OPTION>2000</OPTION>
+        </SELECT>
 
     The items, in order, have labels "2002", "2001" and "2000", whereas their
     names (the OPTION values) are "0", "1" and "2000" respectively.  Note that
@@ -2357,11 +1402,12 @@ class SelectControl(ListControl):
 
     Another special case is that the Item.attrs dictionaries have a special key
     "contents" which does not correspond to any real HTML attribute, but rather
-    contains the contents of the OPTION element:
+    contains the contents of the OPTION element::
 
-    <OPTION>this bit</OPTION>
+        <OPTION>this bit</OPTION>
 
     """
+
     # HTML attributes here are treated slightly differently from other list
     # controls:
     # -The SELECT HTML attributes dictionary is stuffed into the OPTION
@@ -2379,36 +1425,42 @@ class SelectControl(ListControl):
     def __init__(self, type, name, attrs, select_default=False, index=None):
         # fish out the SELECT HTML attributes from the OPTION HTML attributes
         # dictionary
-        self.attrs = attrs["__select"].copy()
+        self.attrs = dict(attrs["__select"])
         self.__dict__["_label"] = _get_label(self.attrs)
         self.__dict__["id"] = self.attrs.get("id")
-        self.__dict__["multiple"] = self.attrs.has_key("multiple")
+        self.__dict__["multiple"] = 'multiple' in self.attrs
         # the majority of the contents, label, and value dance already happened
         contents = attrs.get("contents")
-        attrs = attrs.copy()
+        attrs = dict(attrs)
         del attrs["__select"]
 
-        ListControl.__init__(self, type, name, self.attrs, select_default,
-                             called_as_base_class=True, index=index)
-        self.disabled = self.attrs.has_key("disabled")
-        self.readonly = self.attrs.has_key("readonly")
-        if attrs.has_key("value"):
+        ListControl.__init__(
+            self,
+            type,
+            name,
+            self.attrs,
+            select_default,
+            called_as_base_class=True,
+            index=index)
+        self.disabled = 'disabled' in self.attrs
+        self.readonly = 'readonly' in self.attrs
+        if 'value' in attrs:
             # otherwise it is a marker 'select started' token
             o = Item(self, attrs, index)
-            o.__dict__["_selected"] = attrs.has_key("selected")
+            o.__dict__["_selected"] = 'selected' in attrs
             # add 'label' label and contents label, if different.  If both are
-            # provided, the 'label' label is used for display in HTML 
+            # provided, the 'label' label is used for display in HTML
             # 4.0-compliant browsers (and any lower spec? not sure) while the
             # contents are used for display in older or less-compliant
             # browsers.  We make label objects for both, if the values are
             # different.
             label = attrs.get("label")
             if label:
-                o._labels.append(Label({"__text": label}))
+                o._labels.append(Label(label))
                 if contents and contents != label:
-                    o._labels.append(Label({"__text": contents}))
+                    o._labels.append(Label(contents))
             elif contents:
-                o._labels.append(Label({"__text": contents}))
+                o._labels.append(Label(contents))
 
     def fixup(self):
         ListControl.fixup(self)
@@ -2434,7 +1486,7 @@ def fixup(self):
                 o.selected = False
 
 
-#---------------------------------------------------
+# ---------------------------------------------------
 class SubmitControl(ScalarControl):
     """
     Covers:
@@ -2443,24 +1495,27 @@ class SubmitControl(ScalarControl):
     BUTTON/SUBMIT
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         ScalarControl.__init__(self, type, name, attrs, index)
         # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
         # blank, Konqueror 3.1 defaults to "Submit".  HTML spec. doesn't seem
         # to define this.
-        if self.value is None: self.value = ""
+        if self.value is None:
+            self.__dict__['_value'] = ""
         self.readonly = True
 
     def get_labels(self):
         res = []
         if self.value:
-            res.append(Label({"__text": self.value}))
+            res.append(Label(self.value))
         res.extend(ScalarControl.get_labels(self))
         return res
 
-    def is_of_kind(self, kind): return kind == "clickable"
+    def is_of_kind(self, kind):
+        return kind == "clickable"
 
-    def _click(self, form, coord, return_type, request_class=urllib2.Request):
+    def _click(self, form, coord, return_type, request_class=_request.Request):
         self._clicked = coord
         r = form._switch_click(return_type, request_class)
         self._clicked = False
@@ -2472,7 +1527,7 @@ def _totally_ordered_pairs(self):
         return ScalarControl._totally_ordered_pairs(self)
 
 
-#---------------------------------------------------
+# ---------------------------------------------------
 class ImageControl(SubmitControl):
     """
     Covers:
@@ -2482,6 +1537,7 @@ class ImageControl(SubmitControl):
     Coordinates are specified using one of the HTMLForm.click* methods.
 
     """
+
     def __init__(self, type, name, attrs, index=None):
         SubmitControl.__init__(self, type, name, attrs, index)
         self.readonly = False
@@ -2491,30 +1547,44 @@ def _totally_ordered_pairs(self):
         if self.disabled or not clicked:
             return []
         name = self.name
-        if name is None: return []
+        if name is None:
+            return []
         pairs = [
             (self._index, "%s.x" % name, str(clicked[0])),
-            (self._index+1, "%s.y" % name, str(clicked[1])),
-            ]
+            (self._index + 1, "%s.y" % name, str(clicked[1])),
+        ]
         value = self._value
         if value:
-            pairs.append((self._index+2, name, value))
+            pairs.append((self._index + 2, name, value))
         return pairs
 
     get_labels = ScalarControl.get_labels
 
+
 # aliases, just to make str(control) and str(form) clearer
-class PasswordControl(TextControl): pass
-class HiddenControl(TextControl): pass
-class TextareaControl(TextControl): pass
-class SubmitButtonControl(SubmitControl): pass
+class PasswordControl(TextControl):
+    pass
 
 
-def is_listcontrol(control): return control.is_of_kind("list")
+class HiddenControl(TextControl):
+    pass
+
+
+class TextareaControl(TextControl):
+    pass
+
+
+class SubmitButtonControl(SubmitControl):
+    pass
+
+
+def is_listcontrol(control):
+    return control.is_of_kind("list")
 
 
 class HTMLForm:
-    """Represents a single HTML <form> ... </form> element.
+    """
+    Represents a single HTML <form> ... </form> element.
 
     A form consists of a sequence of controls that usually have names, and
     which can take on various values.  The values of the various types of
@@ -2525,24 +1595,15 @@ class HTMLForm:
 
     Forms can be filled in with data to be returned to the server, and then
     submitted, using the click method to generate a request object suitable for
-    passing to urllib2.urlopen (or the click_request_data or click_pairs
-    methods if you're not using urllib2).
-
-    import ClientForm
-    forms = ClientForm.ParseFile(html, base_uri)
-    form = forms[0]
-
-    form["query"] = "Python"
-    form.find_control("nr_results").get("lots").selected = True
-
-    response = urllib2.urlopen(form.click())
+    passing to :func:`mechanize.urlopen` (or the click_request_data or
+    click_pairs methods for integration with third-party code).
 
-    Usually, HTMLForm instances are not created directly.  Instead, the
-    ParseFile or ParseResponse factory functions are used.  If you do construct
-    HTMLForm objects yourself, however, note that an HTMLForm instance is only
-    properly initialised after the fixup method has been called (ParseFile and
-    ParseResponse do this for you).  See ListControl.__doc__ for the reason
-    this is required.
+    Usually, HTMLForm instances are not created directly.  Instead, they are
+    automatically created when visting a page with a mechanize Browser.  If you
+    do construct HTMLForm objects yourself, however, note that an HTMLForm
+    instance is only properly initialised after the fixup method has been
+    called.  See :class:`mechanize.ListControl` for the reason this is
+    required.
 
     Indexing a form (form["control_name"]) returns the named Control's value
     attribute.  Assignment to a form index (form["control_name"] = something)
@@ -2555,7 +1616,7 @@ class HTMLForm:
     cause data to be returned to the server).  The list item's name is the
     value of the corresponding HTML element's"value" attribute.
 
-    Example:
+    Example::
 
       <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
       <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
@@ -2563,7 +1624,7 @@ class HTMLForm:
     defines a CHECKBOX control with name "cheeses" which has two items, named
     "leicester" and "cheddar".
 
-    Another example:
+    Another example::
 
       <SELECT name="more_cheeses">
         <OPTION>1</OPTION>
@@ -2572,20 +1633,22 @@ class HTMLForm:
 
     defines a SELECT control with name "more_cheeses" which has two items,
     named "1" and "2" (because the OPTION element's value HTML attribute
-    defaults to the element contents -- see SelectControl.__doc__ for more on
-    these defaulting rules).
+    defaults to the element contents -- see :class:`mechanize.SelectControl`
+    for more on these defaulting rules).
 
     To select, deselect or otherwise manipulate individual list items, use the
-    HTMLForm.find_control() and ListControl.get() methods.  To set the whole
-    value, do as for any other control: use indexing or the set_/get_value
-    methods.
+    :meth:`mechanize.HTMLForm.find_control()` and
+    :meth:`mechanize.ListControl.get()` methods.  To set the whole value, do as
+    for any other control: use indexing or the `set_value/get_value` methods.
 
     Example:
 
-    # select *only* the item named "cheddar"
-    form["cheeses"] = ["cheddar"]
-    # select "cheddar", leave other items unaffected
-    form.find_control("cheeses").get("cheddar").selected = True
+    .. code-block:: python
+
+        # select *only* the item named "cheddar"
+        form["cheeses"] = ["cheddar"]
+        # select "cheddar", leave other items unaffected
+        form.find_control("cheeses").get("cheddar").selected = True
 
     Some controls (RADIO and SELECT without the multiple attribute) can only
     have zero or one items selected at a time.  Some controls (CHECKBOX and
@@ -2593,7 +1656,9 @@ class HTMLForm:
     time.  To set the whole value of a ListControl, assign a sequence to a form
     index:
 
-    form["cheeses"] = ["cheddar", "leicester"]
+    .. code-block:: python
+
+        form["cheeses"] = ["cheddar", "leicester"]
 
     If the ListControl is not multiple-selection, the assigned list must be of
     length one.
@@ -2601,17 +1666,23 @@ class HTMLForm:
     To check if a control has an item, if an item is selected, or if an item is
     successful (selected and not disabled), respectively:
 
-    "cheddar" in [item.name for item in form.find_control("cheeses").items]
-    "cheddar" in [item.name for item in form.find_control("cheeses").items and
-                  item.selected]
-    "cheddar" in form["cheeses"]  # (or "cheddar" in form.get_value("cheeses"))
+    .. code-block:: python
+
+        "cheddar" in [item.name for item in form.find_control("cheeses").items]
+        "cheddar" in [item.name for item in form.find_control("cheeses").items
+                        and item.selected]
+        "cheddar" in form["cheeses"]
+        # or
+        "cheddar" in form.get_value("cheeses")
 
     Note that some list items may be disabled (see below).
 
     Note the following mistake:
 
-    form[control_name] = control_value
-    assert form[control_name] == control_value  # not necessarily true
+    .. code-block:: python
+
+        form[control_name] = control_value
+        assert form[control_name] == control_value  # not necessarily true
 
     The reason for this is that form[control_name] always gives the list items
     in the order they were listed in the HTML.
@@ -2621,8 +1692,9 @@ class HTMLForm:
     arguments.  Note that each item may have several labels.
 
     The question of default values of OPTION contents, labels and values is
-    somewhat complicated: see SelectControl.__doc__ and
-    ListControl.get_item_attrs.__doc__ if you think you need to know.
+    somewhat complicated: see :class:`mechanize.SelectControl` and
+    :meth:`mechanize.ListControl.get_item_attrs()` if you think you need to
+    know.
 
     Controls can be disabled or readonly.  In either case, the control's value
     cannot be changed until you clear those flags (see example below).
@@ -2635,24 +1707,30 @@ class HTMLForm:
 
     If a lot of controls are readonly, it can be useful to do this:
 
-    form.set_all_readonly(False)
+    .. code-block:: python
+
+        form.set_all_readonly(False)
 
     To clear a control's value attribute, so that it is not successful (until a
     value is subsequently set):
 
-    form.clear("cheeses")
+    .. code-block:: python
+
+        form.clear("cheeses")
 
     More examples:
 
-    control = form.find_control("cheeses")
-    control.disabled = False
-    control.readonly = False
-    control.get("gruyere").disabled = True
-    control.items[0].selected = True
+    .. code-block:: python
+
+        control = form.find_control("cheeses")
+        control.disabled = False
+        control.readonly = False
+        control.get("gruyere").disabled = True
+        control.items[0].selected = True
 
     See the various Control classes for further documentation.  Many methods
     take name, type, kind, id, label and nr arguments to specify the control to
-    be operated on: see HTMLForm.find_control.__doc__.
+    be operated on: see :meth:`mechanize.HTMLForm.find_control()`.
 
     ControlNotFoundError (subclass of ValueError) is raised if the specified
     control can't be found.  This includes occasions where a non-ListControl
@@ -2673,56 +1751,62 @@ class HTMLForm:
 
     Public attributes:
 
-    action: full (absolute URI) form action
-    method: "GET" or "POST"
-    enctype: form transfer encoding MIME type
-    name: name of form (None if no name was specified)
-    attrs: dictionary mapping original HTML form attributes to their values
-
-    controls: list of Control instances; do not alter this list
-     (instead, call form.new_control to make a Control and add it to the
-     form, or control.add_to_form if you already have a Control instance)
+    :ivar action: full (absolute URI) form action
+    :ivar method: "GET" or "POST"
+    :ivar enctype: form transfer encoding MIME type
+    :ivar name: name of form (None if no name was specified)
+    :ivar attrs: dictionary mapping original HTML form attributes to their
+        values
+    :ivar controls: list of Control instances; do not alter this list
+        (instead, call form.new_control to make a Control and add it to the
+        form, or control.add_to_form if you already have a Control instance)
 
 
 
     Methods for form filling:
-    -------------------------
 
     Most of the these methods have very similar arguments.  See
-    HTMLForm.find_control.__doc__ for details of the name, type, kind, label
-    and nr arguments.
+    :meth:`mechanize.HTMLForm.find_control()` for details of the name, type,
+    kind, label and nr arguments.
 
-    def find_control(self,
-                     name=None, type=None, kind=None, id=None, predicate=None,
-                     nr=None, label=None)
+    .. code-block:: python
 
-    get_value(name=None, type=None, kind=None, id=None, nr=None,
-              by_label=False,  # by_label is deprecated
-              label=None)
-    set_value(value,
-              name=None, type=None, kind=None, id=None, nr=None,
-              by_label=False,  # by_label is deprecated
-              label=None)
+        def find_control(self,
+                        name=None, type=None, kind=None, id=None,
+                        predicate=None, nr=None, label=None)
 
-    clear_all()
-    clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
+        get_value(name=None, type=None, kind=None, id=None, nr=None,
+                by_label=False,  # by_label is deprecated
+                label=None)
+        set_value(value,
+                name=None, type=None, kind=None, id=None, nr=None,
+                by_label=False,  # by_label is deprecated
+                label=None)
 
-    set_all_readonly(readonly)
+        clear_all()
+        clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
+
+        set_all_readonly(readonly)
 
 
     Method applying only to FileControls:
 
-    add_file(file_object,
+    .. code-block:: python
+
+        add_file(file_object,
              content_type="application/octet-stream", filename=None,
              name=None, id=None, nr=None, label=None)
 
 
     Methods applying only to clickable controls:
 
-    click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
-    click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
-                       label=None)
-    click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
+    .. code-block:: python
+
+        click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
+        click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
+                        label=None)
+        click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1),
+                        label=None)
 
     """
 
@@ -2731,34 +1815,33 @@ def find_control(self,
         "password": PasswordControl,
         "hidden": HiddenControl,
         "textarea": TextareaControl,
-
-        "isindex": IsindexControl,
-
         "file": FileControl,
-
         "button": IgnoreControl,
         "buttonbutton": IgnoreControl,
         "reset": IgnoreControl,
         "resetbutton": IgnoreControl,
-
         "submit": SubmitControl,
         "submitbutton": SubmitButtonControl,
         "image": ImageControl,
-
         "radio": RadioControl,
         "checkbox": CheckboxControl,
         "select": SelectControl,
-        }
+    }
 
-#---------------------------------------------------
-# Initialisation.  Use ParseResponse / ParseFile instead.
+    # ---------------------------------------------------
+    # Initialisation.  Use ParseResponse / ParseFile instead.
 
-    def __init__(self, action, method="GET",
+    def __init__(self,
+                 action,
+                 method="GET",
                  enctype="application/x-www-form-urlencoded",
-                 name=None, attrs=None,
-                 request_class=urllib2.Request,
-                 forms=None, labels=None, id_to_labels=None,
-                 backwards_compat=True):
+                 name=None,
+                 attrs=None,
+                 request_class=_request.Request,
+                 forms=None,
+                 labels=None,
+                 id_to_labels=None,
+                 encoding=None):
         """
         In the usual case, use ParseResponse (or ParseFile) to create new
         HTMLForm objects.
@@ -2773,9 +1856,10 @@ def __init__(self, action, method="GET",
         self.action = action
         self.method = method
         self.enctype = enctype
+        self.form_encoding = encoding or 'utf-8'
         self.name = name
         if attrs is not None:
-            self.attrs = attrs.copy()
+            self.attrs = dict(attrs)
         else:
             self.attrs = {}
         self.controls = []
@@ -2786,53 +1870,35 @@ def __init__(self, action, method="GET",
         self._labels = labels  # this is a semi-public API!
         self._id_to_labels = id_to_labels  # this is a semi-public API!
 
-        self.backwards_compat = backwards_compat  # note __setattr__
+        self._urlunparse = urlunparse
+        self._urlparse = urlparse
 
-        self._urlunparse = urlparse.urlunparse
-        self._urlparse = urlparse.urlparse
-
-    def __getattr__(self, name):
-        if name == "backwards_compat":
-            return self._backwards_compat
-        return getattr(HTMLForm, name)
-
-    def __setattr__(self, name, value):
-        # yuck
-        if name == "backwards_compat":
-            name = "_backwards_compat"
-            value = bool(value)
-            for cc in self.controls:
-                try:
-                    items = cc.items 
-                except AttributeError:
-                    continue
-                else:
-                    for ii in items:
-                        for ll in ii.get_labels():
-                            ll._backwards_compat = value
-        self.__dict__[name] = value
-
-    def new_control(self, type, name, attrs,
-                    ignore_unknown=False, select_default=False, index=None):
+    def new_control(self,
+                    type,
+                    name,
+                    attrs,
+                    ignore_unknown=False,
+                    select_default=False,
+                    index=None):
         """Adds a new control to the form.
 
-        This is usually called by ParseFile and ParseResponse.  Don't call it
-        youself unless you're building your own Control instances.
+        This is usually called by mechanize.  Don't call it
+        yourself unless you're building your own Control instances.
 
         Note that controls representing lists of items are built up from
-        controls holding only a single list item.  See ListControl.__doc__ for
-        further information.
-
-        type: type of control (see Control.__doc__ for a list)
-        attrs: HTML attributes of control
-        ignore_unknown: if true, use a dummy Control instance for controls of
-         unknown type; otherwise, use a TextControl
-        select_default: for RADIO and multiple-selection SELECT controls, pick
-         the first item as the default if no 'selected' HTML attribute is
-         present (this defaulting happens when the HTMLForm.fixup method is
-         called)
-        index: index of corresponding element in HTML (see
-         MoreFormTests.test_interspersed_controls for motivation)
+        controls holding only a single list item.  See
+        :class:`mechanize.ListControl` for further information.
+
+        :arg type: type of control (see :class:`mechanize.Control` for a list)
+        :arg attrs: HTML attributes of control
+        :arg ignore_unknown: if true, use a dummy Control instance for controls
+            of unknown type; otherwise, use a TextControl
+        :arg select_default: for RADIO and multiple-selection SELECT controls,
+            pick the first item as the default if no 'selected' HTML attribute
+            is present (this defaulting happens when the HTMLForm.fixup method
+            is called)
+        :arg index: index of corresponding element in HTML (see
+            MoreFormTests.test_interspersed_controls for motivation)
 
         """
         type = type.lower()
@@ -2843,14 +1909,14 @@ def new_control(self, type, name, attrs,
             else:
                 klass = TextControl
 
-        a = attrs.copy()
+        a = dict(attrs)
         if issubclass(klass, ListControl):
             control = klass(type, name, a, select_default, index)
         else:
             control = klass(type, name, a, index)
 
         if type == "select" and len(attrs) == 1:
-            for ii in range(len(self.controls)-1, -1, -1):
+            for ii in range(len(self.controls) - 1, -1, -1):
                 ctl = self.controls[ii]
                 if ctl.type == "select":
                     ctl.close_control()
@@ -2872,41 +1938,50 @@ def fixup(self):
         """
         for control in self.controls:
             control.fixup()
-        self.backwards_compat = self._backwards_compat
+            control.form_encoding = self.form_encoding
+
+# ---------------------------------------------------
 
-#---------------------------------------------------
     def __str__(self):
-        header = "%s%s %s %s" % (
-            (self.name and self.name+" " or ""),
-            self.method, self.action, self.enctype)
+        header = "%s%s %s %s" % ((self.name and self.name + " " or ""),
+                                 self.method, self.action, self.enctype)
         rep = [header]
         for control in self.controls:
             rep.append("  %s" % str(control))
         return "<%s>" % "\n".join(rep)
 
-#---------------------------------------------------
+# ---------------------------------------------------
 # Form-filling methods.
 
     def __getitem__(self, name):
         return self.find_control(name).value
+
     def __contains__(self, name):
         return bool(self.find_control(name))
+
     def __setitem__(self, name, value):
         control = self.find_control(name)
         try:
             control.value = value
-        except AttributeError, e:
+        except AttributeError as e:
             raise ValueError(str(e))
 
-    def get_value(self,
-                  name=None, type=None, kind=None, id=None, nr=None,
-                  by_label=False,  # by_label is deprecated
-                  label=None):
+    def get_value(
+            self,
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=False,  # by_label is deprecated
+            label=None):
         """Return value of control.
 
         If only name and value arguments are supplied, equivalent to
 
-        form[name]
+        .. code-block:: python
+
+            form[name]
 
         """
         if by_label:
@@ -2922,15 +1997,24 @@ def get_value(self,
                 return meth()
         else:
             return c.value
-    def set_value(self, value,
-                  name=None, type=None, kind=None, id=None, nr=None,
-                  by_label=False,  # by_label is deprecated
-                  label=None):
+
+    def set_value(
+            self,
+            value,
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=False,  # by_label is deprecated
+            label=None):
         """Set value of control.
 
         If only name and value arguments are supplied, equivalent to
 
-        form[name] = value
+        .. code-block:: python
+
+            form[name] = value
 
         """
         if by_label:
@@ -2946,8 +2030,14 @@ def set_value(self, value,
                 meth(value)
         else:
             c.value = value
-    def get_value_by_label(
-        self, name=None, type=None, kind=None, id=None, label=None, nr=None):
+
+    def get_value_by_label(self,
+                           name=None,
+                           type=None,
+                           kind=None,
+                           id=None,
+                           label=None,
+                           nr=None):
         """
 
         All arguments should be passed by name.
@@ -2956,9 +2046,14 @@ def get_value_by_label(
         c = self.find_control(name, type, kind, id, label=label, nr=nr)
         return c.get_value_by_label()
 
-    def set_value_by_label(
-        self, value,
-        name=None, type=None, kind=None, id=None, label=None, nr=None):
+    def set_value_by_label(self,
+                           value,
+                           name=None,
+                           type=None,
+                           kind=None,
+                           id=None,
+                           label=None,
+                           nr=None):
         """
 
         All arguments should be passed by name.
@@ -2974,14 +2069,19 @@ def set_all_readonly(self, readonly):
     def clear_all(self):
         """Clear the value attributes of all controls in the form.
 
-        See HTMLForm.clear.__doc__.
+        See :meth:`mechanize.HTMLForm.clear()`
 
         """
         for control in self.controls:
             control.clear()
 
     def clear(self,
-              name=None, type=None, kind=None, id=None, nr=None, label=None):
+              name=None,
+              type=None,
+              kind=None,
+              id=None,
+              nr=None,
+              label=None):
         """Clear the value attribute of a control.
 
         As a result, the affected control will not be successful until a value
@@ -2991,37 +2091,65 @@ def clear(self,
         c = self.find_control(name, type, kind, id, label=label, nr=nr)
         c.clear()
 
-
-#---------------------------------------------------
+# ---------------------------------------------------
 # Form-filling methods applying only to ListControls.
 
-    def possible_items(self,  # deprecated
-                       name=None, type=None, kind=None, id=None,
-                       nr=None, by_label=False, label=None):
+    def possible_items(
+            self,  # deprecated
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=False,
+            label=None):
         """Return a list of all values that the specified control can take."""
         c = self._find_list_control(name, type, kind, id, label, nr)
         return c.possible_items(by_label)
 
-    def set(self, selected, item_name,  # deprecated
-            name=None, type=None, kind=None, id=None, nr=None,
-            by_label=False, label=None):
+    def set(
+            self,
+            selected,
+            item_name,  # deprecated
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=False,
+            label=None):
         """Select / deselect named list item.
 
-        selected: boolean selected state
+        :arg selected: boolean selected state
 
         """
         self._find_list_control(name, type, kind, id, label, nr).set(
             selected, item_name, by_label)
-    def toggle(self, item_name,  # deprecated
-               name=None, type=None, kind=None, id=None, nr=None,
-               by_label=False, label=None):
+
+    def toggle(
+            self,
+            item_name,  # deprecated
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=False,
+            label=None):
         """Toggle selected state of named list item."""
         self._find_list_control(name, type, kind, id, label, nr).toggle(
             item_name, by_label)
 
-    def set_single(self, selected,  # deprecated
-                   name=None, type=None, kind=None, id=None,
-                   nr=None, by_label=None, label=None):
+    def set_single(
+            self,
+            selected,  # deprecated
+            name=None,
+            type=None,
+            kind=None,
+            id=None,
+            nr=None,
+            by_label=None,
+            label=None):
         """Select / deselect list item in a control having only one item.
 
         If the control has multiple list items, ItemCountError is raised.
@@ -3033,66 +2161,86 @@ def set_single(self, selected,  # deprecated
         For example, if a checkbox has a single item named "on", the following
         two calls are equivalent:
 
-        control.toggle("on")
-        control.toggle_single()
+        .. code-block:: python
+
+            control.toggle("on")
+            control.toggle_single()
 
         """  # by_label ignored and deprecated
-        self._find_list_control(
-            name, type, kind, id, label, nr).set_single(selected)
-    def toggle_single(self, name=None, type=None, kind=None, id=None,
-                      nr=None, by_label=None, label=None):  # deprecated
+        self._find_list_control(name, type, kind, id, label,
+                                nr).set_single(selected)
+
+    def toggle_single(self,
+                      name=None,
+                      type=None,
+                      kind=None,
+                      id=None,
+                      nr=None,
+                      by_label=None,
+                      label=None):  # deprecated
         """Toggle selected state of list item in control having only one item.
 
-        The rest is as for HTMLForm.set_single.__doc__.
+        The rest is as for :meth:`mechanize.HTMLForm.set_single()`
 
         """  # by_label ignored and deprecated
-        self._find_list_control(name, type, kind, id, label, nr).toggle_single()
+        self._find_list_control(name, type, kind, id, label,
+                                nr).toggle_single()
 
-#---------------------------------------------------
+# ---------------------------------------------------
 # Form-filling method applying only to FileControls.
 
-    def add_file(self, file_object, content_type=None, filename=None,
-                 name=None, id=None, nr=None, label=None):
+    def add_file(self,
+                 file_object,
+                 content_type=None,
+                 filename=None,
+                 name=None,
+                 id=None,
+                 nr=None,
+                 label=None):
         """Add a file to be uploaded.
 
-        file_object: file-like object (with read method) from which to read
-         data to upload
-        content_type: MIME content type of data to upload
-        filename: filename to pass to server
+        :arg file_object: file-like object (with read method) from which to
+            read data to upload
+        :arg content_type: MIME content type of data to upload
+        :arg filename: filename to pass to server
 
         If filename is None, no filename is sent to the server.
 
         If content_type is None, the content type is guessed based on the
         filename and the data from read from the file object.
 
-        XXX
         At the moment, guessed content type is always application/octet-stream.
-        Use sndhdr, imghdr modules.  Should also try to guess HTML, XML, and
-        plain text.
 
         Note the following useful HTML attributes of file upload controls (see
         HTML 4.01 spec, section 17):
 
-        accept: comma-separated list of content types that the server will
-         handle correctly; you can use this to filter out non-conforming files
-        size: XXX IIRC, this is indicative of whether form wants multiple or
-         single files
-        maxlength: XXX hint of max content length in bytes?
+          * `accept`: comma-separated list of content types
+             that the server will handle correctly;
+             you can use this to filter out non-conforming files
+          * `size`: XXX IIRC, this is indicative of whether form
+             wants multiple or single files
+          * `maxlength`: XXX hint of max content length in bytes?
 
         """
-        self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
-            file_object, content_type, filename)
+        self.find_control(
+            name, "file", id=id, label=label,
+            nr=nr).add_file(file_object, content_type, filename)
 
-#---------------------------------------------------
+# ---------------------------------------------------
 # Form submission methods, applying only to clickable controls.
 
-    def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
-              request_class=urllib2.Request,
+    def click(self,
+              name=None,
+              type=None,
+              id=None,
+              nr=0,
+              coord=(1, 1),
+              request_class=_request.Request,
               label=None):
         """Return request that would result from clicking on a control.
 
-        The request object is a urllib2.Request instance, which you can pass to
-        urllib2.urlopen (or ClientCookie.urlopen).
+        The request object is a mechanize.Request instance, which you can pass
+        to mechanize.urlopen.
 
         Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
         IMAGEs) can be clicked.
@@ -3114,69 +2262,55 @@ def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
                            self._request_class)
 
     def click_request_data(self,
-                           name=None, type=None, id=None,
-                           nr=0, coord=(1,1),
-                           request_class=urllib2.Request,
+                           name=None,
+                           type=None,
+                           id=None,
+                           nr=0,
+                           coord=(1, 1),
+                           request_class=_request.Request,
                            label=None):
         """As for click method, but return a tuple (url, data, headers).
 
         You can use this data to send a request to the server.  This is useful
-        if you're using httplib or urllib rather than urllib2.  Otherwise, use
-        the click method.
-
-        # Untested.  Have to subclass to add headers, I think -- so use urllib2
-        # instead!
-        import urllib
-        url, data, hdrs = form.click_request_data()
-        r = urllib.urlopen(url, data)
-
-        # Untested.  I don't know of any reason to use httplib -- you can get
-        # just as much control with urllib2.
-        import httplib, urlparse
-        url, data, hdrs = form.click_request_data()
-        tup = urlparse(url)
-        host, path = tup[1], urlparse.urlunparse((None, None)+tup[2:])
-        conn = httplib.HTTPConnection(host)
-        if data:
-            httplib.request("POST", path, data, hdrs)
-        else:
-            httplib.request("GET", path, headers=hdrs)
-        r = conn.getresponse()
+        if you're using httplib or urllib rather than mechanize.  Otherwise,
+        use the click method.
 
         """
         return self._click(name, type, id, label, nr, coord, "request_data",
                            self._request_class)
 
-    def click_pairs(self, name=None, type=None, id=None,
-                    nr=0, coord=(1,1),
+    def click_pairs(self,
+                    name=None,
+                    type=None,
+                    id=None,
+                    nr=0,
+                    coord=(1, 1),
                     label=None):
         """As for click_request_data, but returns a list of (key, value) pairs.
 
-        You can use this list as an argument to ClientForm.urlencode.  This is
+        You can use this list as an argument to urllib.urlencode.  This is
         usually only useful if you're using httplib or urllib rather than
-        urllib2 or ClientCookie.  It may also be useful if you want to manually
-        tweak the keys and/or values, but this should not be necessary.
-        Otherwise, use the click method.
+        mechanize.  It may also be useful if you want to manually tweak the
+        keys and/or values, but this should not be necessary.  Otherwise, use
+        the click method.
 
         Note that this method is only useful for forms of MIME type
         x-www-form-urlencoded.  In particular, it does not return the
         information required for file upload.  If you need file upload and are
-        not using urllib2, use click_request_data.
-
-        Also note that Python 2.0's urllib.urlencode is slightly broken: it
-        only accepts a mapping, not a sequence of pairs, as an argument.  This
-        messes up any ordering in the argument.  Use ClientForm.urlencode
-        instead.
-
+        not using mechanize, use click_request_data.
         """
         return self._click(name, type, id, label, nr, coord, "pairs",
                            self._request_class)
 
-#---------------------------------------------------
+# ---------------------------------------------------
 
     def find_control(self,
-                     name=None, type=None, kind=None, id=None,
-                     predicate=None, nr=None,
+                     name=None,
+                     type=None,
+                     kind=None,
+                     id=None,
+                     predicate=None,
+                     nr=None,
                      label=None):
         """Locate and return some specific control within the form.
 
@@ -3190,7 +2324,7 @@ def find_control(self,
         addition to the types possible for <input> HTML tags: "text",
         "password", "hidden", "submit", "image", "button", "radio", "checkbox",
         "file" we also have "reset", "buttonbutton", "submitbutton",
-        "resetbutton", "textarea", "select" and "isindex").
+        "resetbutton", "textarea", "select").
 
         If kind is specified, then the control must fall into the specified
         group, each of which satisfies a particular interface.  The types are
@@ -3207,37 +2341,40 @@ def find_control(self,
         first).  Note that control 0 is the first control matching all the
         other arguments (if supplied); it is not necessarily the first control
         in the form.  If no nr is supplied, AmbiguityError is raised if
-        multiple controls match the other arguments (unless the
-        .backwards-compat attribute is true).
+        multiple controls match the other arguments.
 
         If label is specified, then the control must have this label.  Note
         that radio controls and checkboxes never have labels: their items do.
 
         """
         if ((name is None) and (type is None) and (kind is None) and
-            (id is None) and (label is None) and (predicate is None) and
-            (nr is None)):
+                (id is None) and (label is None) and (predicate is None) and
+                (nr is None)):
             raise ValueError(
                 "at least one argument must be supplied to specify control")
         return self._find_control(name, type, kind, id, label, predicate, nr)
 
-#---------------------------------------------------
+# ---------------------------------------------------
 # Private methods.
 
     def _find_list_control(self,
-                           name=None, type=None, kind=None, id=None, 
-                           label=None, nr=None):
+                           name=None,
+                           type=None,
+                           kind=None,
+                           id=None,
+                           label=None,
+                           nr=None):
         if ((name is None) and (type is None) and (kind is None) and
-            (id is None) and (label is None) and (nr is None)):
+                (id is None) and (label is None) and (nr is None)):
             raise ValueError(
                 "at least one argument must be supplied to specify control")
 
-        return self._find_control(name, type, kind, id, label, 
-                                  is_listcontrol, nr)
+        return self._find_control(name, type, kind, id, label, is_listcontrol,
+                                  nr)
 
     def _find_control(self, name, type, kind, id, label, predicate, nr):
         if ((name is not None) and (name is not Missing) and
-            not isstringlike(name)):
+                not isstringlike(name)):
             raise TypeError("control name must be string-like")
         if (type is not None) and not isstringlike(type):
             raise TypeError("control type must be string-like")
@@ -3255,12 +2392,10 @@ def _find_control(self, name, type, kind, id, label, predicate, nr):
         orig_nr = nr
         found = None
         ambiguous = False
-        if nr is None and self.backwards_compat:
-            nr = 0
 
         for control in self.controls:
             if ((name is not None and name != control.name) and
-                (name is not Missing or control.name is not None)):
+                    (name is not Missing or control.name is not None)):
                 continue
             if type is not None and type != control.type:
                 continue
@@ -3290,42 +2425,66 @@ def _find_control(self, name, type, kind, id, label, predicate, nr):
             return found
 
         description = []
-        if name is not None: description.append("name %s" % repr(name))
-        if type is not None: description.append("type '%s'" % type)
-        if kind is not None: description.append("kind '%s'" % kind)
-        if id is not None: description.append("id '%s'" % id)
-        if label is not None: description.append("label '%s'" % label)
+        if name is not None:
+            description.append("name %s" % repr(name))
+        if type is not None:
+            description.append("type '%s'" % type)
+        if kind is not None:
+            description.append("kind '%s'" % kind)
+        if id is not None:
+            description.append("id '%s'" % id)
+        if label is not None:
+            description.append("label '%s'" % label)
         if predicate is not None:
             description.append("predicate %s" % predicate)
-        if orig_nr: description.append("nr %d" % orig_nr)
+        if orig_nr:
+            description.append("nr %d" % orig_nr)
         description = ", ".join(description)
 
         if ambiguous:
-            raise AmbiguityError("more than one control matching "+description)
+            raise AmbiguityError("more than one control matching " +
+                                 description)
         elif not found:
-            raise ControlNotFoundError("no control matching "+description)
+            raise ControlNotFoundError("no control matching " + description)
         assert False
 
-    def _click(self, name, type, id, label, nr, coord, return_type,
-               request_class=urllib2.Request):
+    def _click(self,
+               name,
+               type,
+               id,
+               label,
+               nr,
+               coord,
+               return_type,
+               request_class=_request.Request):
         try:
-            control = self._find_control(
-                name, type, "clickable", id, label, None, nr)
+            control = self._find_control(name, type, "clickable", id, label,
+                                         None, nr)
         except ControlNotFoundError:
             if ((name is not None) or (type is not None) or (id is not None) or
-                (nr != 0)):
+                    (label is not None) or (nr != 0)):
                 raise
             # no clickable controls, but no control was explicitly requested,
             # so return state without clicking any control
             return self._switch_click(return_type, request_class)
         else:
-            return control._click(self, coord, return_type, request_class)
+            originals = self.method, self.action, self.enctype
+            try:
+                if isinstance(control, ScalarControl):
+                    self.method = control.attrs.get(
+                        'formmethod') or self.method
+                    self.action = control.attrs.get(
+                        'formaction') or self.action
+                    self.enctype = control.attrs.get(
+                        'formenctype') or self.enctype
+                return control._click(self, coord, return_type, request_class)
+            finally:
+                self.method, self.action, self.enctype = originals
 
     def _pairs(self):
         """Return sequence of (key, value) pairs suitable for urlencoding."""
         return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
 
-
     def _pairs_and_controls(self):
         """Return sequence of (index, key, value, control_index)
         of totally ordered pairs suitable for urlencoding.
@@ -3336,6 +2495,8 @@ def _pairs_and_controls(self):
         for control_index in range(len(self.controls)):
             control = self.controls[control_index]
             for ii, key, val in control._totally_ordered_pairs():
+                if ii is None:
+                    ii = -1
                 pairs.append((ii, key, val, control_index))
 
         # stable sort by ONLY first item in tuple
@@ -3346,40 +2507,50 @@ def _pairs_and_controls(self):
     def _request_data(self):
         """Return a tuple (url, data, headers)."""
         method = self.method.upper()
-        #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(self.action)
         parts = self._urlparse(self.action)
         rest, (query, frag) = parts[:-2], parts[-2:]
+        frag
+
+        def encode_data(x):
+            if isinstance(x, unicode_type):
+                x = x.encode(self.form_encoding)
+            return x
+
+        def encode_query():
+            p = [(encode_data(k), encode_data(v)) for k, v in self._pairs()]
+            return urlencode(p)
 
         if method == "GET":
             if self.enctype != "application/x-www-form-urlencoded":
-                raise ValueError(
-                    "unknown GET form encoding type '%s'" % self.enctype)
-            parts = rest + (urlencode(self._pairs()), None)
+                raise ValueError("unknown GET form encoding type '%s'" %
+                                 self.enctype)
+            parts = rest + (encode_query(), None)
             uri = self._urlunparse(parts)
             return uri, None, []
         elif method == "POST":
             parts = rest + (query, None)
             uri = self._urlunparse(parts)
             if self.enctype == "application/x-www-form-urlencoded":
-                return (uri, urlencode(self._pairs()),
+                return (uri, encode_query(),
                         [("Content-Type", self.enctype)])
             elif self.enctype == "multipart/form-data":
                 data = StringIO()
                 http_hdrs = []
                 mw = MimeWriter(data, http_hdrs)
-                f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
-                                          prefix=0)
+                mw.startmultipartbody(
+                    "form-data", add_to_http_hdrs=True, prefix=0)
                 for ii, k, v, control_index in self._pairs_and_controls():
-                    self.controls[control_index]._write_mime_data(mw, k, v)
+                    self.controls[control_index]._write_mime_data(
+                            mw, encode_data(k), encode_data(v))
                 mw.lastpart()
                 return uri, data.getvalue(), http_hdrs
             else:
-                raise ValueError(
-                    "unknown POST form encoding type '%s'" % self.enctype)
+                raise ValueError("unknown POST form encoding type '%s'" %
+                                 self.enctype)
         else:
             raise ValueError("Unknown method '%s'" % method)
 
-    def _switch_click(self, return_type, request_class=urllib2.Request):
+    def _switch_click(self, return_type, request_class=_request.Request):
         # This is called by HTMLForm and clickable Controls to hide switching
         # on return_type.
         if return_type == "pairs":
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_gzip.py b/samples-and-tests/i-am-a-developer/mechanize/_gzip.py
index 26c2743832..7a7b347384 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_gzip.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_gzip.py
@@ -1,103 +1,209 @@
-import urllib2
-from cStringIO import StringIO
-import _response
-
-# GzipConsumer was taken from Fredrik Lundh's effbot.org-0.1-20041009 library
-class GzipConsumer:
-
-    def __init__(self, consumer):
-        self.__consumer = consumer
-        self.__decoder = None
-        self.__data = ""
-
-    def __getattr__(self, key):
-        return getattr(self.__consumer, key)
-
-    def feed(self, data):
-        if self.__decoder is None:
-            # check if we have a full gzip header
-            data = self.__data + data
-            try:
-                i = 10
-                flag = ord(data[3])
-                if flag & 4: # extra
-                    x = ord(data[i]) + 256*ord(data[i+1])
-                    i = i + 2 + x
-                if flag & 8: # filename
-                    while ord(data[i]):
-                        i = i + 1
-                    i = i + 1
-                if flag & 16: # comment
-                    while ord(data[i]):
-                        i = i + 1
-                    i = i + 1
-                if flag & 2: # crc
-                    i = i + 2
-                if len(data) < i:
-                    raise IndexError("not enough data")
-                if data[:3] != "\x1f\x8b\x08":
-                    raise IOError("invalid gzip data")
-                data = data[i:]
-            except IndexError:
-                self.__data = data
-                return # need more data
-            import zlib
-            self.__data = ""
-            self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
-        data = self.__decoder.decompress(data)
-        if data:
-            self.__consumer.feed(data)
+from __future__ import absolute_import
+
+import struct
+import zlib
+from io import DEFAULT_BUFFER_SIZE
+
+from ._urllib2_fork import BaseHandler
+from .polyglot import is_py2
+
+
+CRC_MASK = 0xffffffff
+if is_py2:
+    CRC_MASK = long(CRC_MASK)
+
+
+def gzip_prefix():
+    # See http://www.gzip.org/zlib/rfc-gzip.html
+    return b''.join((
+        b'\x1f\x8b',  # ID1 and ID2: gzip marker
+        b'\x08',  # CM: compression method
+        b'\x00',  # FLG: none set
+        # MTIME: 4 bytes, set to zero so as not to leak timezone information
+        b'\0\0\0\0',
+        b'\x02',  # XFL: max compression, slowest algo
+        b'\xff',  # OS: unknown
+    ))
+
+
+def compress_readable_output(src_file, compress_level=6):
+    crc = zlib.crc32(b"")
+    size = 0
+    zobj = zlib.compressobj(compress_level, zlib.DEFLATED, -zlib.MAX_WBITS,
+                            zlib.DEF_MEM_LEVEL, zlib.Z_DEFAULT_STRATEGY)
+    prefix_written = False
+    while True:
+        data = src_file.read(DEFAULT_BUFFER_SIZE)
+        if not data:
+            break
+        size += len(data)
+        crc = zlib.crc32(data, crc)
+        data = zobj.compress(data)
+        if not prefix_written:
+            prefix_written = True
+            data = gzip_prefix() + data
+        yield data
+    yield zobj.flush() + struct.pack(b"<LL", crc & CRC_MASK, size)
+
+
+def read_amt(f, amt):
+    ans = b''
+    while len(ans) < amt:
+        extra = f.read(amt - len(ans))
+        if not extra:
+            raise EOFError('Unexpected end of compressed stream')
+        ans += extra
+    return ans
+
+
+class UnzipWrapper:
+    def __init__(self, fp):
+        self.__decoder = zlib.decompressobj(-zlib.MAX_WBITS)
+        self.__data = b''
+        self.__crc = zlib.crc32(self.__data) & CRC_MASK
+        self.__fp = fp
+        self.__size = 0
+        self.__is_fully_read = False
+
+    def read(self, sz=-1):
+        amt_read = 0
+        ans = []
+        if self.__data:
+            if sz < 0 or len(self.__data) < sz:
+                ans.append(self.__data)
+                amt_read += len(self.__data)
+                self.__data = b''
+            else:
+                self.__data, ret = self.__data[sz:], self.__data[:sz]
+                return ret
+
+        if not self.__is_fully_read:
+            while not self.__decoder.unused_data and (sz < 0 or amt_read < sz):
+                chunk = self.__fp.read(1024)
+                if chunk:
+                    if self.__decoder.unconsumed_tail:
+                        chunk = self.__decoder.unconsumed_tail + chunk
+                    chunk = self.__decoder.decompress(chunk)
+                    ans.append(chunk)
+                    amt_read += len(chunk)
+                    self.__size += len(chunk)
+                    self.__crc = zlib.crc32(chunk, self.__crc)
+                else:
+                    if not self.__decoder.unused_data:
+                        raise ValueError(
+                            'unexpected end of compressed gzip data,'
+                            ' before reading trailer')
+                    break
+
+            if self.__decoder.unused_data:
+                # End of compressed stream reached
+                tail = self.__decoder.unused_data
+                if len(tail) < 8:
+                    tail += read_amt(self.__fp, 8 - len(tail))
+                # ignore any extra bytes after end of compressed stream
+                self.__fp.read()
+                # check CRC, ignore size mismatch
+                crc, size = struct.unpack(b'<LL', tail)
+                if (crc & CRC_MASK) != (self.__crc & CRC_MASK):
+                    raise ValueError(
+                        'gzip stream is corrupted, CRC does not match')
+                self.__is_fully_read = True
+
+        ans = b''.join(ans)
+        if len(ans) > sz and sz > -1:
+            ans, self.__data = ans[:sz], ans[sz:]
+        return ans
+
+    def readline(self, sz=-1):
+        # Dont care about making this efficient
+        data = self.read()
+        idx = data.find(b'\n')
+        if idx > 0:
+            if sz < 0 or idx < sz:
+                line, self.__data = data[:idx + 1], data[idx + 1:]
+            else:
+                line, self.__data = data[:sz], data[sz:]
+        else:
+            if sz > -1:
+                line, self.__data = data[:sz], data[sz:]
+            else:
+                line = data
+        return line
 
     def close(self):
-        if self.__decoder:
-            data = self.__decoder.flush()
-            if data:
-                self.__consumer.feed(data)
-        self.__consumer.close()
-
-
-# --------------------------------------------------------------------
-
-# the rest of this module is John Lee's stupid code, not
-# Fredrik's nice code :-)
-
-class stupid_gzip_consumer:
-    def __init__(self): self.data = []
-    def feed(self, data): self.data.append(data)
-
-class stupid_gzip_wrapper(_response.closeable_response):
-    def __init__(self, response):
-        self._response = response
-
-        c = stupid_gzip_consumer()
-        gzc = GzipConsumer(c)
-        gzc.feed(response.read())
-        self.__data = StringIO("".join(c.data))
-
-    def read(self, size=-1):
-        return self.__data.read(size)
-    def readline(self, size=-1):
-        return self.__data.readline(size)
-    def readlines(self, sizehint=-1):
-        return self.__data.readlines(sizehint)
+        self.__fp.close()
+
+    def fileno(self):
+        return self.__fp.fileno()
+
+    def __iter__(self):
+        ans = self.readline()
+        if ans:
+            yield ans
+
+    def next(self):
+        ans = self.readline()
+        if not ans:
+            raise StopIteration()
+        return ans
+
+
+def create_gzip_decompressor(zipped_file):
+    prefix = read_amt(zipped_file, 10)
+    if prefix[:2] != b'\x1f\x8b':
+        raise ValueError('gzip stream has incorrect magic bytes: %r' %
+                         prefix[:2])
+    if prefix[2:3] != b'\x08':
+        raise ValueError('gzip stream has unknown compression method: %r' %
+                         prefix[2])
+    flag = ord(prefix[3:4])
+    if flag & 4:  # extra
+        extra_amt = read_amt(zipped_file, 2)
+        extra_amt = ord(extra_amt[0]) + 256 * ord(extra_amt[1])
+        if extra_amt:
+            read_amt(zipped_file, extra_amt)
+    if flag & 8:  # filename
+        while read_amt(zipped_file, 1) != b'\0':
+            continue
+    if flag & 16:  # comment
+        while read_amt(zipped_file, 1) != b'\0':
+            continue
+    if flag & 2:  # crc
+        read_amt(zipped_file, 2)
+    return UnzipWrapper(zipped_file)
+
+
+class HTTPGzipProcessor(BaseHandler):
+    handler_order = 200  # response processing before HTTPEquivProcessor
 
-    def __getattr__(self, name):
-        # delegate unknown methods/attributes
-        return getattr(self._response, name)
+    def __init__(self, request_gzip=False):
+        self.request_gzip = request_gzip
 
-class HTTPGzipProcessor(urllib2.BaseHandler):
-    handler_order = 200  # response processing before HTTPEquivProcessor
+    def __copy__(self):
+        return self.__class__(self.request_gzip)
 
     def http_request(self, request):
-        request.add_header("Accept-Encoding", "gzip")
+        if self.request_gzip:
+            existing = [
+                x.strip().lower()
+                for x in request.get_header('Accept-Encoding', '').split(',')
+            ]
+            if 'gzip' not in existing:
+                existing.append('gzip')
+                request.add_header("Accept-Encoding",
+                                   ', '.join(filter(None, existing)))
         return request
 
     def http_response(self, request, response):
         # post-process response
-        enc_hdrs = response.info().getheaders("Content-encoding")
+        h = response.info()
+        enc_hdrs = h.getheaders("Content-encoding")
         for enc_hdr in enc_hdrs:
-            if ("gzip" in enc_hdr) or ("compress" in enc_hdr):
-                return stupid_gzip_wrapper(response)
+            if "gzip" in enc_hdr:
+                response._set_fp(create_gzip_decompressor(response.fp))
+                del h['Content-encoding']
+                del h['Content-length']
         return response
 
     https_response = http_response
+    https_request = http_request
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py b/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py
index 49ba5de022..deddd18fd9 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_headersutil.py
@@ -5,50 +5,67 @@
 
 This code is free software; you can redistribute it and/or modify it
 under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
+LICENSE included with the distribution).
 
 """
 
-import os, re
-from types import StringType
-from types import UnicodeType
-STRING_TYPES = StringType, UnicodeType
+from __future__ import absolute_import
 
-from _util import http2time
-import _rfc3986
+import os
+import re
 
-def is_html(ct_headers, url, allow_xhtml=False):
+from . import _rfc3986
+from ._util import http2time
+from .polyglot import is_string
+
+
+def is_html_file_extension(url, allow_xhtml):
+    if url is None:
+        return False
+    ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
+    html_exts = [".htm", ".html"]
+    if allow_xhtml:
+        html_exts += [".xhtml"]
+    return ext in html_exts
+
+
+def is_html(ct_headers, url=None, allow_xhtml=False):
     """
     ct_headers: Sequence of Content-Type headers
     url: Response URL
 
     """
     if not ct_headers:
-        # guess
-        ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
-        html_exts = [".htm", ".html"]
-        if allow_xhtml:
-            html_exts += [".xhtml"]
-        return ext in html_exts
-    # use first header
-    ct = split_header_words(ct_headers)[0][0][0]
+        return is_html_file_extension(url, allow_xhtml)
+    headers = split_header_words(ct_headers)
+    if len(headers) < 1:
+        return is_html_file_extension(url, allow_xhtml)
+    first_header = headers[0]
+    first_parameter = first_header[0]
+    ct = first_parameter[0]
     html_types = ["text/html"]
     if allow_xhtml:
         html_types += [
-            "text/xhtml", "text/xml",
-            "application/xml", "application/xhtml+xml",
-            ]
+            "text/xhtml",
+            "text/xml",
+            "application/xml",
+            "application/xhtml+xml",
+        ]
     return ct in html_types
 
+
 def unmatched(match):
     """Return unmatched part of re.Match object."""
     start, end = match.span(0)
-    return match.string[:start]+match.string[end:]
+    return match.string[:start] + match.string[end:]
 
-token_re =        re.compile(r"^\s*([^=\s;,]+)")
+
+token_re = re.compile(r"^\s*([^=\s;,]+)")
 quoted_value_re = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
-value_re =        re.compile(r"^\s*=\s*([^\s;,]*)")
+value_re = re.compile(r"^\s*=\s*([^\s;,]*)")
 escape_re = re.compile(r"\\(.)")
+
+
 def split_header_words(header_values):
     r"""Parse header values into a list of lists containing key,value pairs.
 
@@ -94,7 +111,7 @@ def split_header_words(header_values):
     [[('Basic', None), ('realm', '"foobar"')]]
 
     """
-    assert type(header_values) not in STRING_TYPES
+    assert not is_string(header_values)
     result = []
     for text in header_values:
         orig_text = text
@@ -122,19 +139,24 @@ def split_header_words(header_values):
             elif text.lstrip().startswith(","):
                 # concatenated headers, as per RFC 2616 section 4.2
                 text = text.lstrip()[1:]
-                if pairs: result.append(pairs)
+                if pairs:
+                    result.append(pairs)
                 pairs = []
             else:
                 # skip junk
-                non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
+                non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
                 assert nr_junk_chars > 0, (
                     "split_header_words bug: '%s', '%s', %s" %
                     (orig_text, text, pairs))
                 text = non_junk
-        if pairs: result.append(pairs)
+        if pairs:
+            result.append(pairs)
     return result
 
+
 join_escape_re = re.compile(r"([\"\\])")
+
+
 def join_header_words(lists):
     """Do the inverse of the conversion done by split_header_words.
 
@@ -143,7 +165,8 @@ def join_header_words(lists):
 
     >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
     'text/plain; charset="iso-8859/1"'
-    >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
+    >>> join_header_words([[(\
+            "text/plain", None)], [("charset", "iso-8859/1")]])
     'text/plain, charset="iso-8859/1"'
 
     """
@@ -160,9 +183,11 @@ def join_header_words(lists):
                 else:
                     k = "%s=%s" % (k, v)
             attr.append(k)
-        if attr: headers.append("; ".join(attr))
+        if attr:
+            headers.append("; ".join(attr))
     return ", ".join(headers)
 
+
 def strip_quotes(text):
     if text.startswith('"'):
         text = text[1:]
@@ -170,6 +195,7 @@ def strip_quotes(text):
         text = text[:-1]
     return text
 
+
 def parse_ns_headers(ns_headers):
     """Ad-hoc parser for Netscape protocol cookie-attributes.
 
@@ -185,9 +211,15 @@ def parse_ns_headers(ns_headers):
     Currently, this is also used for parsing RFC 2109 cookies.
 
     """
-    known_attrs = ("expires", "domain", "path", "secure",
-                   # RFC 2109 attrs (may turn up in Netscape cookies, too)
-                   "version", "port", "max-age")
+    known_attrs = (
+        "expires",
+        "domain",
+        "path",
+        "secure",
+        # RFC 2109 attrs (may turn up in Netscape cookies, too)
+        "version",
+        "port",
+        "max-age")
 
     result = []
     for ns_header in ns_headers:
@@ -197,7 +229,8 @@ def parse_ns_headers(ns_headers):
         for ii in range(len(params)):
             param = params[ii]
             param = param.rstrip()
-            if param == "": continue
+            if param == "":
+                continue
             if "=" not in param:
                 k, v = param, None
             else:
@@ -224,9 +257,24 @@ def parse_ns_headers(ns_headers):
     return result
 
 
+uppercase_headers = {'WWW', 'TE'}
+
+
+def normalize_header_name(name):
+    parts = [x.capitalize() for x in name.split('-')]
+    q = parts[0].upper()
+    if q in uppercase_headers:
+        parts[0] = q
+    if len(parts) == 3 and parts[1] == 'Websocket':
+        parts[1] = 'WebSocket'
+    return '-'.join(parts)
+
+
 def _test():
-   import doctest, _headersutil
-   return doctest.testmod(_headersutil)
+    import doctest
+    from . import _headersutil
+    return doctest.testmod(_headersutil)
+
 
 if __name__ == "__main__":
-   _test()
+    _test()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_html.py b/samples-and-tests/i-am-a-developer/mechanize/_html.py
index 5da0815380..a50a17f0a0 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_html.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_html.py
@@ -1,63 +1,123 @@
-"""HTML handling.
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-
-This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
-included with the distribution).
-
-"""
-
-import re, copy, htmlentitydefs
-import sgmllib, ClientForm
-
-import _request
-from _headersutil import split_header_words, is_html as _is_html
-import _rfc3986
-
-DEFAULT_ENCODING = "latin-1"
-
-COMPRESS_RE = re.compile(r"\s+")
-
-
-# the base classe is purely for backwards compatibility
-class ParseError(ClientForm.ParseError): pass
-
-
-class CachingGeneratorFunction(object):
-    """Caching wrapper around a no-arguments iterable."""
-
-    def __init__(self, iterable):
-        self._cache = []
-        # wrap iterable to make it non-restartable (otherwise, repeated
-        # __call__ would give incorrect results)
-        self._iterator = iter(iterable)
-
-    def __call__(self):
-        cache = self._cache
-        for item in cache:
-            yield item
-        for item in self._iterator:
-            cache.append(item)
-            yield item
+from __future__ import absolute_import
+
+import codecs
+import copy
+import re
+
+from ._form import parse_forms
+from ._headersutil import is_html as _is_html
+from ._headersutil import split_header_words
+from ._rfc3986 import clean_url, urljoin
+from .polyglot import is_string
+
+DEFAULT_ENCODING = "utf-8"
+_encoding_pats = (
+    # XML declaration
+    r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
+    # HTML 5 charset
+    r'''<meta\s+charset=['"]([-_a-z0-9]+)['"][^<>]*>(?:\s*</meta>){0,1}''',
+    # HTML 4 Pragma directive
+    r'''<meta\s+?[^<>]*?content\s*=\s*['"][^'"]*?charset=([-_a-z0-9]+)[^'"]*?['"][^<>]*>(?:\s*</meta>){0,1}''',
+)
+
+
+def compile_pats(binary):
+    for raw in _encoding_pats:
+        if binary:
+            raw = raw.encode('ascii')
+        yield re.compile(raw, flags=re.IGNORECASE)
+
+
+class LazyEncodingPats(object):
+
+    def __call__(self, binary=False):
+        attr = 'binary_pats' if binary else 'unicode_pats'
+        pats = getattr(self, attr, None)
+        if pats is None:
+            pats = tuple(compile_pats(binary))
+            setattr(self, attr, pats)
+        for pat in pats:
+            yield pat
+
+
+lazy_encoding_pats = LazyEncodingPats()
+
+
+def find_declared_encoding(raw, limit=50*1024):
+    prefix = raw[:limit]
+    is_binary = isinstance(raw, bytes)
+    for pat in lazy_encoding_pats(is_binary):
+        m = pat.search(prefix)
+        if m is not None:
+            ans = m.group(1)
+            if is_binary:
+                ans = ans.decode('ascii', 'replace')
+                return ans
+
+
+def elem_text(elem):
+    if elem.text:
+        yield elem.text
+    for child in elem:
+        for text in elem_text(child):
+            yield text
+        if child.tail:
+            yield child.tail
+
+
+def iterlinks(root, base_url):
+    link_tags = {"a": "href", "area": "href", "iframe": "src"}
+    for tag in root.iter('*'):
+        if not is_string(tag.tag):
+            continue
+        q = tag.tag.lower()
+        attr = link_tags.get(q)
+        if attr is not None:
+            val = tag.get(attr)
+            if val:
+                url = clean_url(val)
+                yield Link(base_url, url,
+                           compress_whitespace(u''.join(elem_text(tag))), q,
+                           tag.items())
+        elif q == 'base':
+            href = tag.get('href')
+            if href:
+                base_url = href
+
+
+def compress_whitespace(text):
+    return re.sub(r'\s+', ' ', text or '').strip()
+
+
+def get_encoding_from_response(response, verify=True):
+    # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
+    # headers may be in the response.  HTTP-EQUIV headers come last,
+    # so try in order from first to last.
+    if response:
+        for ct in response.info().getheaders("content-type"):
+            for k, v in split_header_words([ct])[0]:
+                if k == "charset":
+                    if not verify:
+                        return v
+                    try:
+                        codecs.lookup(v)
+                        return v
+                    except LookupError:
+                        continue
 
 
 class EncodingFinder:
     def __init__(self, default_encoding):
         self._default_encoding = default_encoding
+
     def encoding(self, response):
-        # HTTPEquivProcessor may be in use, so both HTTP and HTTP-EQUIV
-        # headers may be in the response.  HTTP-EQUIV headers come last,
-        # so try in order from first to last.
-        for ct in response.info().getheaders("content-type"):
-            for k, v in split_header_words([ct])[0]:
-                if k == "charset":
-                    return v
-        return self._default_encoding
+        return get_encoding_from_response(response) or self._default_encoding
+
 
 class ResponseTypeFinder:
     def __init__(self, allow_xhtml):
         self._allow_xhtml = allow_xhtml
+
     def is_html(self, response, encoding):
         ct_hdrs = response.info().getheaders("content-type")
         url = response.geturl()
@@ -65,385 +125,86 @@ def is_html(self, response, encoding):
         return _is_html(ct_hdrs, url, self._allow_xhtml)
 
 
-# idea for this argument-processing trick is from Peter Otten
-class Args:
-    def __init__(self, args_map):
-        self.dictionary = dict(args_map)
-    def __getattr__(self, key):
-        try:
-            return self.dictionary[key]
-        except KeyError:
-            return getattr(self.__class__, key)
-
-def form_parser_args(
-    select_default=False,
-    form_parser_class=None,
-    request_class=None,
-    backwards_compat=False,
-    ):
-    return Args(locals())
+class Link:
+    '''
+    A link in a HTML document
 
+    :ivar absolute_url: The absolutized link URL
+    :ivar url: The link URL
+    :ivar base_url: The base URL against which this link is resolved
+    :ivar text: The link text
+    :ivar tag: The link tag name
+    :ivar attrs: The tag attributes
 
-class Link:
+    '''
     def __init__(self, base_url, url, text, tag, attrs):
         assert None not in [url, tag, attrs]
         self.base_url = base_url
-        self.absolute_url = _rfc3986.urljoin(base_url, url)
+        self.absolute_url = urljoin(base_url, url)
         self.url, self.text, self.tag, self.attrs = url, text, tag, attrs
-    def __cmp__(self, other):
+        self.text = self.text
+
+    def __eq__(self, other):
         try:
-            for name in "url", "text", "tag", "attrs":
+            for name in "url", "text", "tag":
                 if getattr(self, name) != getattr(other, name):
-                    return -1
+                    return False
+            if dict(self.attrs) != dict(other.attrs):
+                return False
         except AttributeError:
-            return -1
-        return 0
+            return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     def __repr__(self):
         return "Link(base_url=%r, url=%r, text=%r, tag=%r, attrs=%r)" % (
             self.base_url, self.url, self.text, self.tag, self.attrs)
 
 
-class LinksFactory:
-
-    def __init__(self,
-                 link_parser_class=None,
-                 link_class=Link,
-                 urltags=None,
-                 ):
-        import _pullparser
-        if link_parser_class is None:
-            link_parser_class = _pullparser.TolerantPullParser
-        self.link_parser_class = link_parser_class
-        self.link_class = link_class
-        if urltags is None:
-            urltags = {
-                "a": "href",
-                "area": "href",
-                "frame": "src",
-                "iframe": "src",
-                }
-        self.urltags = urltags
-        self._response = None
-        self._encoding = None
-
-    def set_response(self, response, base_url, encoding):
-        self._response = response
-        self._encoding = encoding
-        self._base_url = base_url
-
-    def links(self):
-        """Return an iterator that provides links of the document."""
-        response = self._response
-        encoding = self._encoding
-        base_url = self._base_url
-        p = self.link_parser_class(response, encoding=encoding)
-
-        try:
-            for token in p.tags(*(self.urltags.keys()+["base"])):
-                if token.type == "endtag":
-                    continue
-                if token.data == "base":
-                    base_href = dict(token.attrs).get("href")
-                    if base_href is not None:
-                        base_url = base_href
-                    continue
-                attrs = dict(token.attrs)
-                tag = token.data
-                name = attrs.get("name")
-                text = None
-                # XXX use attr_encoding for ref'd doc if that doc does not
-                #  provide one by other means
-                #attr_encoding = attrs.get("charset")
-                url = attrs.get(self.urltags[tag])  # XXX is "" a valid URL?
-                if not url:
-                    # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
-                    # For our purposes a link is something with a URL, so
-                    # ignore this.
-                    continue
-
-                url = _rfc3986.clean_url(url, encoding)
-                if tag == "a":
-                    if token.type != "startendtag":
-                        # hmm, this'd break if end tag is missing
-                        text = p.get_compressed_text(("endtag", tag))
-                    # but this doesn't work for eg.
-                    # <a href="blah"><b>Andy</b></a>
-                    #text = p.get_compressed_text()
-
-                yield Link(base_url, url, text, tag, token.attrs)
-        except sgmllib.SGMLParseError, exc:
-            raise ParseError(exc)
-
-class FormsFactory:
-
-    """Makes a sequence of objects satisfying ClientForm.HTMLForm interface.
-
-    After calling .forms(), the .global_form attribute is a form object
-    containing all controls not a descendant of any FORM element.
-
-    For constructor argument docs, see ClientForm.ParseResponse
-    argument docs.
-
-    """
-
-    def __init__(self,
-                 select_default=False,
-                 form_parser_class=None,
-                 request_class=None,
-                 backwards_compat=False,
-                 ):
-        import ClientForm
-        self.select_default = select_default
-        if form_parser_class is None:
-            form_parser_class = ClientForm.FormParser
-        self.form_parser_class = form_parser_class
-        if request_class is None:
-            request_class = _request.Request
-        self.request_class = request_class
-        self.backwards_compat = backwards_compat
-        self._response = None
-        self.encoding = None
-        self.global_form = None
-
-    def set_response(self, response, encoding):
-        self._response = response
-        self.encoding = encoding
-        self.global_form = None
+def content_parser(data,
+                   url=None,
+                   response_info=None,
+                   transport_encoding=None,
+                   default_encoding=DEFAULT_ENCODING,
+                   is_html=True):
+    '''
+    Parse data (a bytes object) into an etree representation such as
+    :py:mod:`xml.etree.ElementTree` or `lxml.etree`
+
+    :param bytes data: The data to parse
+    :param url: The URL of the document being parsed or None
+    :param response_info: Information about the document
+        (contains all HTTP headers as :class:`HTTPMessage`)
+    :param transport_encoding: The character encoding for the document being
+        parsed as specified in the HTTP headers or None.
+    :param default_encoding: The character encoding to use if no encoding
+        could be detected and no transport_encoding is specified
+    :param is_html: If the document is to be parsed as HTML.
+    '''
+    if not is_html:
+        return
+    try:
+        from html5_parser import parse
+    except Exception:
+        from html5lib import parse
+        kw = {'namespaceHTMLElements': False}
+        if transport_encoding and isinstance(data, bytes):
+            kw['transport_encoding'] = transport_encoding
+        return parse(data, **kw)
+    else:
+        return parse(data, transport_encoding=transport_encoding)
 
-    def forms(self):
-        import ClientForm
-        encoding = self.encoding
-        try:
-            forms = ClientForm.ParseResponseEx(
-                self._response,
-                select_default=self.select_default,
-                form_parser_class=self.form_parser_class,
-                request_class=self.request_class,
-                encoding=encoding,
-                _urljoin=_rfc3986.urljoin,
-                _urlparse=_rfc3986.urlsplit,
-                _urlunparse=_rfc3986.urlunsplit,
-                )
-        except ClientForm.ParseError, exc:
-            raise ParseError(exc)
-        self.global_form = forms[0]
-        return forms[1:]
-
-class TitleFactory:
-    def __init__(self):
-        self._response = self._encoding = None
-
-    def set_response(self, response, encoding):
-        self._response = response
-        self._encoding = encoding
-
-    def _get_title_text(self, parser):
-        import _pullparser
-        text = []
-        tok = None
-        while 1:
-            try:
-                tok = parser.get_token()
-            except _pullparser.NoMoreTokensError:
-                break
-            if tok.type == "data":
-                text.append(str(tok))
-            elif tok.type == "entityref":
-                t = unescape("&%s;" % tok.data,
-                             parser._entitydefs, parser.encoding)
-                text.append(t)
-            elif tok.type == "charref":
-                t = unescape_charref(tok.data, parser.encoding)
-                text.append(t)
-            elif tok.type in ["starttag", "endtag", "startendtag"]:
-                tag_name = tok.data
-                if tok.type == "endtag" and tag_name == "title":
-                    break
-                text.append(str(tok))
-        return COMPRESS_RE.sub(" ", "".join(text).strip())
 
-    def title(self):
-        import _pullparser
-        p = _pullparser.TolerantPullParser(
-            self._response, encoding=self._encoding)
-        try:
-            try:
-                p.get_tag("title")
-            except _pullparser.NoMoreTokensError:
-                return None
-            else:
-                return self._get_title_text(p)
-        except sgmllib.SGMLParseError, exc:
-            raise ParseError(exc)
-
-
-def unescape(data, entities, encoding):
-    if data is None or "&" not in data:
-        return data
-
-    def replace_entities(match):
-        ent = match.group()
-        if ent[1] == "#":
-            return unescape_charref(ent[2:-1], encoding)
-
-        repl = entities.get(ent[1:-1])
-        if repl is not None:
-            repl = unichr(repl)
-            if type(repl) != type(""):
-                try:
-                    repl = repl.encode(encoding)
-                except UnicodeError:
-                    repl = ent
-        else:
-            repl = ent
-        return repl
-
-    return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
-
-def unescape_charref(data, encoding):
-    name, base = data, 10
-    if name.startswith("x"):
-        name, base= name[1:], 16
-    uc = unichr(int(name, base))
-    if encoding is None:
-        return uc
-    else:
-        try:
-            repl = uc.encode(encoding)
-        except UnicodeError:
-            repl = "&#%s;" % data
-        return repl
-
-
-# bizarre import gymnastics for bundled BeautifulSoup
-import _beautifulsoup
-import ClientForm
-RobustFormParser, NestingRobustFormParser = ClientForm._create_bs_classes(
-    _beautifulsoup.BeautifulSoup, _beautifulsoup.ICantBelieveItsBeautifulSoup
-    )
-# monkeypatch sgmllib to fix http://www.python.org/sf/803422 :-(
-sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
-
-class MechanizeBs(_beautifulsoup.BeautifulSoup):
-    _entitydefs = htmlentitydefs.name2codepoint
-    # don't want the magic Microsoft-char workaround
-    PARSER_MASSAGE = [(re.compile('(<[^<>]*)/>'),
-                       lambda(x):x.group(1) + ' />'),
-                      (re.compile('<!\s+([^<>]*)>'),
-                       lambda(x):'<!' + x.group(1) + '>')
-                      ]
-
-    def __init__(self, encoding, text=None, avoidParserProblems=True,
-                 initialTextIsEverything=True):
-        self._encoding = encoding
-        _beautifulsoup.BeautifulSoup.__init__(
-            self, text, avoidParserProblems, initialTextIsEverything)
-
-    def handle_charref(self, ref):
-        t = unescape("&#%s;"%ref, self._entitydefs, self._encoding)
-        self.handle_data(t)
-    def handle_entityref(self, ref):
-        t = unescape("&%s;"%ref, self._entitydefs, self._encoding)
-        self.handle_data(t)
-    def unescape_attrs(self, attrs):
-        escaped_attrs = []
-        for key, val in attrs:
-            val = unescape(val, self._entitydefs, self._encoding)
-            escaped_attrs.append((key, val))
-        return escaped_attrs
-
-class RobustLinksFactory:
-
-    compress_re = COMPRESS_RE
-
-    def __init__(self,
-                 link_parser_class=None,
-                 link_class=Link,
-                 urltags=None,
-                 ):
-        if link_parser_class is None:
-            link_parser_class = MechanizeBs
-        self.link_parser_class = link_parser_class
-        self.link_class = link_class
-        if urltags is None:
-            urltags = {
-                "a": "href",
-                "area": "href",
-                "frame": "src",
-                "iframe": "src",
-                }
-        self.urltags = urltags
-        self._bs = None
-        self._encoding = None
-        self._base_url = None
-
-    def set_soup(self, soup, base_url, encoding):
-        self._bs = soup
-        self._base_url = base_url
-        self._encoding = encoding
+def get_title(root):
+    for title in root.iter('title'):
+        text = compress_whitespace(title.text)
+        if text:
+            return text
 
-    def links(self):
-        import _beautifulsoup
-        bs = self._bs
-        base_url = self._base_url
-        encoding = self._encoding
-        gen = bs.recursiveChildGenerator()
-        for ch in bs.recursiveChildGenerator():
-            if (isinstance(ch, _beautifulsoup.Tag) and
-                ch.name in self.urltags.keys()+["base"]):
-                link = ch
-                attrs = bs.unescape_attrs(link.attrs)
-                attrs_dict = dict(attrs)
-                if link.name == "base":
-                    base_href = attrs_dict.get("href")
-                    if base_href is not None:
-                        base_url = base_href
-                    continue
-                url_attr = self.urltags[link.name]
-                url = attrs_dict.get(url_attr)
-                if not url:
-                    continue
-                url = _rfc3986.clean_url(url, encoding)
-                text = link.fetchText(lambda t: True)
-                if not text:
-                    # follow _pullparser's weird behaviour rigidly
-                    if link.name == "a":
-                        text = ""
-                    else:
-                        text = None
-                else:
-                    text = self.compress_re.sub(" ", " ".join(text).strip())
-                yield Link(base_url, url, text, link.name, attrs)
-
-
-class RobustFormsFactory(FormsFactory):
-    def __init__(self, *args, **kwds):
-        args = form_parser_args(*args, **kwds)
-        if args.form_parser_class is None:
-            args.form_parser_class = RobustFormParser
-        FormsFactory.__init__(self, **args.dictionary)
-
-    def set_response(self, response, encoding):
-        self._response = response
-        self.encoding = encoding
-
-
-class RobustTitleFactory:
-    def __init__(self):
-        self._bs = self._encoding = None
-
-    def set_soup(self, soup, encoding):
-        self._bs = soup
-        self._encoding = encoding
 
-    def title(self):
-        import _beautifulsoup
-        title = self._bs.first("title")
-        if title == _beautifulsoup.Null:
-            return None
-        else:
-            inner_html = "".join([str(node) for node in title.contents])
-            return COMPRESS_RE.sub(" ", inner_html.strip())
+lazy = object()
 
 
 class Factory:
@@ -474,158 +235,104 @@ class Factory:
 
     """
 
-    LAZY_ATTRS = ["encoding", "is_html", "title", "global_form"]
-
-    def __init__(self, forms_factory, links_factory, title_factory,
-                 encoding_finder=EncodingFinder(DEFAULT_ENCODING),
-                 response_type_finder=ResponseTypeFinder(allow_xhtml=False),
-                 ):
+    def __init__(
+            self,
+            default_encoding=DEFAULT_ENCODING,
+            allow_xhtml=False, ):
         """
 
         Pass keyword arguments only.
 
-        default_encoding: character encoding to use if encoding cannot be
-         determined (or guessed) from the response.  You should turn on
-         HTTP-EQUIV handling if you want the best chance of getting this right
-         without resorting to this default.  The default value of this
-         parameter (currently latin-1) may change in future.
-
         """
-        self._forms_factory = forms_factory
-        self._links_factory = links_factory
-        self._title_factory = title_factory
-        self._encoding_finder = encoding_finder
-        self._response_type_finder = response_type_finder
+        self._encoding_finder = EncodingFinder(default_encoding)
+        self.form_encoding = default_encoding
+        self._response_type_finder = ResponseTypeFinder(
+            allow_xhtml=allow_xhtml)
+        self._content_parser = content_parser
+        self._current_forms = self._current_links = self._current_title = lazy
+        self._current_global_form = self._root = lazy
+        self._raw_data = b''
+        self.is_html, self.encoding = False, DEFAULT_ENCODING
 
         self.set_response(None)
 
+    def set_content_parser(self, val):
+        self._content_parser = val
+
     def set_request_class(self, request_class):
-        """Set urllib2.Request class.
+        """Set request class (mechanize.Request by default).
 
-        ClientForm.HTMLForm instances returned by .forms() will return
-        instances of this class when .click()ed.
+        HTMLForm instances returned by .forms() will return instances of this
+        class when .click()ed.
 
         """
-        self._forms_factory.request_class = request_class
+        self._request_class = request_class
 
     def set_response(self, response):
         """Set response.
 
         The response must either be None or implement the same interface as
-        objects returned by urllib2.urlopen().
+        objects returned by mechanize.urlopen().
 
         """
-        self._response = response
-        self._forms_genf = self._links_genf = None
-        self._get_title = None
-        for name in self.LAZY_ATTRS:
-            try:
-                delattr(self, name)
-            except AttributeError:
-                pass
-
-    def __getattr__(self, name):
-        if name not in self.LAZY_ATTRS:
-            return getattr(self.__class__, name)
-
-        if name == "encoding":
-            self.encoding = self._encoding_finder.encoding(
-                copy.copy(self._response))
-            return self.encoding
-        elif name == "is_html":
-            self.is_html = self._response_type_finder.is_html(
-                copy.copy(self._response), self.encoding)
-            return self.is_html
-        elif name == "title":
-            if self.is_html:
-                self.title = self._title_factory.title()
-            else:
-                self.title = None
-            return self.title
-        elif name == "global_form":
+        self._response = copy.copy(response)
+        self._current_forms = self._current_links = self._current_title = lazy
+        self._current_global_form = self._root = lazy
+        self.encoding = self._encoding_finder.encoding(self._response)
+        self.is_html = self._response_type_finder.is_html(
+            self._response, self.encoding) if self._response else False
+
+    @property
+    def root(self):
+        if self._root is lazy:
+            response = self._response
+            raw = self._response.read() if self._response else b''
+            default_encoding = self._encoding_finder._default_encoding
+            transport_encoding = get_encoding_from_response(response, verify=False)
+            declared_encoding = find_declared_encoding(raw)
+            self.form_encoding = declared_encoding or transport_encoding or default_encoding
+            self._root = self._content_parser(
+                raw,
+                url=response.geturl() if response else None,
+                response_info=response.info() if response else None,
+                default_encoding=default_encoding,
+                is_html=self.is_html,
+                transport_encoding=transport_encoding)
+        return self._root
+
+    @property
+    def title(self):
+        if self._current_title is lazy:
+            self._current_title = get_title(
+                self.root) if self.root is not None else None
+        return self._current_title or u''
+
+    @property
+    def global_form(self):
+        if self._current_global_form is lazy:
             self.forms()
-            return self.global_form
+        return self._current_global_form
 
     def forms(self):
-        """Return iterable over ClientForm.HTMLForm-like objects.
-
-        Raises mechanize.ParseError on failure.
-        """
-        # this implementation sets .global_form as a side-effect, for benefit
-        # of __getattr__ impl
-        if self._forms_genf is None:
-            try:
-                self._forms_genf = CachingGeneratorFunction(
-                    self._forms_factory.forms())
-            except:  # XXXX define exception!
-                self.set_response(self._response)
-                raise
-            self.global_form = getattr(
-                self._forms_factory, "global_form", None)
-        return self._forms_genf()
+        """ Return tuple of HTMLForm-like objects. """
+        # this implementation sets .global_form as a side-effect
+        if self._current_forms is lazy:
+            self._current_forms, self._current_global_form = self._get_forms()
+        return self._current_forms
 
     def links(self):
-        """Return iterable over mechanize.Link-like objects.
-
-        Raises mechanize.ParseError on failure.
-        """
-        if self._links_genf is None:
-            try:
-                self._links_genf = CachingGeneratorFunction(
-                    self._links_factory.links())
-            except:  # XXXX define exception!
-                self.set_response(self._response)
-                raise
-        return self._links_genf()
-
-class DefaultFactory(Factory):
-    """Based on sgmllib."""
-    def __init__(self, i_want_broken_xhtml_support=False):
-        Factory.__init__(
-            self,
-            forms_factory=FormsFactory(),
-            links_factory=LinksFactory(),
-            title_factory=TitleFactory(),
-            response_type_finder=ResponseTypeFinder(
-                allow_xhtml=i_want_broken_xhtml_support),
-            )
-
-    def set_response(self, response):
-        Factory.set_response(self, response)
-        if response is not None:
-            self._forms_factory.set_response(
-                copy.copy(response), self.encoding)
-            self._links_factory.set_response(
-                copy.copy(response), response.geturl(), self.encoding)
-            self._title_factory.set_response(
-                copy.copy(response), self.encoding)
-
-class RobustFactory(Factory):
-    """Based on BeautifulSoup, hopefully a bit more robust to bad HTML than is
-    DefaultFactory.
-
-    """
-    def __init__(self, i_want_broken_xhtml_support=False,
-                 soup_class=None):
-        Factory.__init__(
-            self,
-            forms_factory=RobustFormsFactory(),
-            links_factory=RobustLinksFactory(),
-            title_factory=RobustTitleFactory(),
-            response_type_finder=ResponseTypeFinder(
-                allow_xhtml=i_want_broken_xhtml_support),
-            )
-        if soup_class is None:
-            soup_class = MechanizeBs
-        self._soup_class = soup_class
-
-    def set_response(self, response):
-        Factory.set_response(self, response)
-        if response is not None:
-            data = response.read()
-            soup = self._soup_class(self.encoding, data)
-            self._forms_factory.set_response(
-                copy.copy(response), self.encoding)
-            self._links_factory.set_soup(
-                soup, response.geturl(), self.encoding)
-            self._title_factory.set_soup(soup, self.encoding)
+        """Return tuple of mechanize.Link-like objects.  """
+        if self._current_links is lazy:
+            self._current_links = self._get_links()
+        return self._get_links()
+
+    def _get_links(self):
+        if self.root is None:
+            return ()
+        return tuple(iterlinks(self.root, self._response.geturl()))
+
+    def _get_forms(self):
+        if self.root is None:
+            return (), None
+        return parse_forms(self.root,
+                           self._response.geturl(), self._request_class, encoding=self.form_encoding)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_http.py b/samples-and-tests/i-am-a-developer/mechanize/_http.py
index 1b80e2babd..d6f8a55c3a 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_http.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_http.py
@@ -8,469 +8,186 @@
 
 This code is free software; you can redistribute it and/or modify it
 under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
+LICENSE included with the distribution).
 
 """
 
-import time, htmlentitydefs, logging, socket, \
-       urllib2, urllib, httplib, sgmllib
-from urllib2 import URLError, HTTPError, BaseHandler
-from cStringIO import StringIO
+from __future__ import absolute_import
 
-from _clientcookie import CookieJar
-from _headersutil import is_html
-from _html import unescape, unescape_charref
-from _request import Request
-from _response import closeable_response, response_seek_wrapper
-import _rfc3986
-import _sockettimeout
+import logging
+import socket
+import time
+from io import BytesIO
+
+from . import _rfc3986, _sockettimeout
+from ._headersutil import is_html
+from ._request import Request
+from ._response import response_seek_wrapper
+from ._urllib2_fork import BaseHandler, HTTPError
+from ._equiv import HTTPEquivParser
+from .polyglot import create_response_info, RobotFileParser, is_py2, as_unicode
 
 debug = logging.getLogger("mechanize").debug
 debug_robots = logging.getLogger("mechanize.robots").debug
 
-# monkeypatch urllib2.HTTPError to show URL
-## def urllib2_str(self):
-##     return 'HTTP Error %s: %s (%s)' % (
-##         self.code, self.msg, self.geturl())
-## urllib2.HTTPError.__str__ = urllib2_str
-
-
-CHUNK = 1024  # size of chunks fed to HTML HEAD parser, in bytes
-DEFAULT_ENCODING = 'latin-1'
-
-
-try:
-    socket._fileobject("fake socket", close=True)
-except TypeError:
-    # python <= 2.4
-    create_readline_wrapper = socket._fileobject
-else:
-    def create_readline_wrapper(fh):
-        return socket._fileobject(fh, close=True)
-
-
-# This adds "refresh" to the list of redirectables and provides a redirection
-# algorithm that doesn't go into a loop in the presence of cookies
-# (Python 2.4 has this new algorithm, 2.3 doesn't).
-class HTTPRedirectHandler(BaseHandler):
-    # maximum number of redirections to any single URL
-    # this is needed because of the state that cookies introduce
-    max_repeats = 4
-    # maximum total number of redirections (regardless of URL) before
-    # assuming we're in a loop
-    max_redirections = 10
-
-    # Implementation notes:
-
-    # To avoid the server sending us into an infinite loop, the request
-    # object needs to track what URLs we have already seen.  Do this by
-    # adding a handler-specific attribute to the Request object.  The value
-    # of the dict is used to count the number of times the same URL has
-    # been visited.  This is needed because visiting the same URL twice
-    # does not necessarily imply a loop, thanks to state introduced by
-    # cookies.
-
-    # Always unhandled redirection codes:
-    # 300 Multiple Choices: should not handle this here.
-    # 304 Not Modified: no need to handle here: only of interest to caches
-    #     that do conditional GETs
-    # 305 Use Proxy: probably not worth dealing with here
-    # 306 Unused: what was this for in the previous versions of protocol??
-
-    def redirect_request(self, newurl, req, fp, code, msg, headers):
-        """Return a Request or None in response to a redirect.
-
-        This is called by the http_error_30x methods when a redirection
-        response is received.  If a redirection should take place, return a
-        new Request to allow http_error_30x to perform the redirect;
-        otherwise, return None to indicate that an HTTPError should be
-        raised.
-
-        """
-        if code in (301, 302, 303, "refresh") or \
-               (code == 307 and not req.has_data()):
-            # Strictly (according to RFC 2616), 301 or 302 in response to
-            # a POST MUST NOT cause a redirection without confirmation
-            # from the user (of urllib2, in this case).  In practice,
-            # essentially all clients do redirect in this case, so we do
-            # the same.
-            # XXX really refresh redirections should be visiting; tricky to
-            #  fix, so this will wait until post-stable release
-            new = Request(newurl,
-                          headers=req.headers,
-                          origin_req_host=req.get_origin_req_host(),
-                          unverifiable=True,
-                          visit=False,
-                          )
-            new._origin_req = getattr(req, "_origin_req", req)
-            return new
-        else:
-            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
-    def http_error_302(self, req, fp, code, msg, headers):
-        # Some servers (incorrectly) return multiple Location headers
-        # (so probably same goes for URI).  Use first header.
-        if headers.has_key('location'):
-            newurl = headers.getheaders('location')[0]
-        elif headers.has_key('uri'):
-            newurl = headers.getheaders('uri')[0]
-        else:
-            return
-        newurl = _rfc3986.clean_url(newurl, "latin-1")
-        newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
-
-        # XXX Probably want to forget about the state of the current
-        # request, although that might interact poorly with other
-        # handlers that also use handler-specific request attributes
-        new = self.redirect_request(newurl, req, fp, code, msg, headers)
-        if new is None:
-            return
-
-        # loop detection
-        # .redirect_dict has a key url if url was previously visited.
-        if hasattr(req, 'redirect_dict'):
-            visited = new.redirect_dict = req.redirect_dict
-            if (visited.get(newurl, 0) >= self.max_repeats or
-                len(visited) >= self.max_redirections):
-                raise HTTPError(req.get_full_url(), code,
-                                self.inf_msg + msg, headers, fp)
-        else:
-            visited = new.redirect_dict = req.redirect_dict = {}
-        visited[newurl] = visited.get(newurl, 0) + 1
-
-        # Don't close the fp until we are sure that we won't use it
-        # with HTTPError.  
-        fp.read()
-        fp.close()
-
-        return self.parent.open(new)
-
-    http_error_301 = http_error_303 = http_error_307 = http_error_302
-    http_error_refresh = http_error_302
-
-    inf_msg = "The HTTP server returned a redirect error that would " \
-              "lead to an infinite loop.\n" \
-              "The last 30x error message was:\n"
-
-
-# XXX would self.reset() work, instead of raising this exception?
-class EndOfHeadError(Exception): pass
-class AbstractHeadParser:
-    # only these elements are allowed in or before HEAD of document
-    head_elems = ("html", "head",
-                  "title", "base",
-                  "script", "style", "meta", "link", "object")
-    _entitydefs = htmlentitydefs.name2codepoint
-    _encoding = DEFAULT_ENCODING
-
-    def __init__(self):
-        self.http_equiv = []
-
-    def start_meta(self, attrs):
-        http_equiv = content = None
-        for key, value in attrs:
-            if key == "http-equiv":
-                http_equiv = self.unescape_attr_if_required(value)
-            elif key == "content":
-                content = self.unescape_attr_if_required(value)
-        if http_equiv is not None and content is not None:
-            self.http_equiv.append((http_equiv, content))
-
-    def end_head(self):
-        raise EndOfHeadError()
-
-    def handle_entityref(self, name):
-        #debug("%s", name)
-        self.handle_data(unescape(
-            '&%s;' % name, self._entitydefs, self._encoding))
-
-    def handle_charref(self, name):
-        #debug("%s", name)
-        self.handle_data(unescape_charref(name, self._encoding))
-
-    def unescape_attr(self, name):
-        #debug("%s", name)
-        return unescape(name, self._entitydefs, self._encoding)
-
-    def unescape_attrs(self, attrs):
-        #debug("%s", attrs)
-        escaped_attrs = {}
-        for key, val in attrs.items():
-            escaped_attrs[key] = self.unescape_attr(val)
-        return escaped_attrs
-
-    def unknown_entityref(self, ref):
-        self.handle_data("&%s;" % ref)
-
-    def unknown_charref(self, ref):
-        self.handle_data("&#%s;" % ref)
-
-
-try:
-    import HTMLParser
-except ImportError:
-    pass
-else:
-    class XHTMLCompatibleHeadParser(AbstractHeadParser,
-                                    HTMLParser.HTMLParser):
-        def __init__(self):
-            HTMLParser.HTMLParser.__init__(self)
-            AbstractHeadParser.__init__(self)
-
-        def handle_starttag(self, tag, attrs):
-            if tag not in self.head_elems:
-                raise EndOfHeadError()
-            try:
-                method = getattr(self, 'start_' + tag)
-            except AttributeError:
-                try:
-                    method = getattr(self, 'do_' + tag)
-                except AttributeError:
-                    pass # unknown tag
-                else:
-                    method(attrs)
-            else:
-                method(attrs)
-
-        def handle_endtag(self, tag):
-            if tag not in self.head_elems:
-                raise EndOfHeadError()
-            try:
-                method = getattr(self, 'end_' + tag)
-            except AttributeError:
-                pass # unknown tag
-            else:
-                method()
-
-        def unescape(self, name):
-            # Use the entitydefs passed into constructor, not
-            # HTMLParser.HTMLParser's entitydefs.
-            return self.unescape_attr(name)
-
-        def unescape_attr_if_required(self, name):
-            return name  # HTMLParser.HTMLParser already did it
-
-class HeadParser(AbstractHeadParser, sgmllib.SGMLParser):
-
-    def _not_called(self):
-        assert False
-
-    def __init__(self):
-        sgmllib.SGMLParser.__init__(self)
-        AbstractHeadParser.__init__(self)
 
-    def handle_starttag(self, tag, method, attrs):
-        if tag not in self.head_elems:
-            raise EndOfHeadError()
-        if tag == "meta":
-            method(attrs)
-
-    def unknown_starttag(self, tag, attrs):
-        self.handle_starttag(tag, self._not_called, attrs)
-
-    def handle_endtag(self, tag, method):
-        if tag in self.head_elems:
-            method()
-        else:
-            raise EndOfHeadError()
-
-    def unescape_attr_if_required(self, name):
-        return self.unescape_attr(name)
-
-def parse_head(fileobj, parser):
+def parse_head(fileobj):
     """Return a list of key, value pairs."""
-    while 1:
-        data = fileobj.read(CHUNK)
-        try:
-            parser.feed(data)
-        except EndOfHeadError:
-            break
-        if len(data) != CHUNK:
-            # this should only happen if there is no HTML body, or if
-            # CHUNK is big
-            break
-    return parser.http_equiv
+    p = HTTPEquivParser(fileobj.read(4096))
+    return p()
+
 
 class HTTPEquivProcessor(BaseHandler):
     """Append META HTTP-EQUIV headers to regular HTTP headers."""
 
     handler_order = 300  # before handlers that look at HTTP headers
 
-    def __init__(self, head_parser_class=HeadParser,
-                 i_want_broken_xhtml_support=False,
-                 ):
-        self.head_parser_class = head_parser_class
-        self._allow_xhtml = i_want_broken_xhtml_support
-
     def http_response(self, request, response):
         if not hasattr(response, "seek"):
             response = response_seek_wrapper(response)
         http_message = response.info()
         url = response.geturl()
         ct_hdrs = http_message.getheaders("content-type")
-        if is_html(ct_hdrs, url, self._allow_xhtml):
+        if is_html(ct_hdrs, url, True):
             try:
                 try:
-                    html_headers = parse_head(response,
-                                              self.head_parser_class())
+                    html_headers = parse_head(response)
                 finally:
                     response.seek(0)
-            except (HTMLParser.HTMLParseError,
-                    sgmllib.SGMLParseError):
+            except Exception:
                 pass
             else:
                 for hdr, val in html_headers:
-                    # add a header
-                    http_message.dict[hdr.lower()] = val
-                    text = hdr + ": " + val
-                    for line in text.split("\n"):
-                        http_message.headers.append(line + "\n")
+                    if is_py2:
+                        # add a header
+                        http_message.dict[hdr.lower()] = val
+                        text = hdr + b": " + val
+                        for line in text.split(b"\n"):
+                            http_message.headers.append(line + b"\n")
+                    else:
+                        hdr = hdr.decode('iso-8859-1')
+                        http_message[hdr] = val.decode('iso-8859-1')
         return response
 
     https_response = http_response
 
-class HTTPCookieProcessor(BaseHandler):
-    """Handle HTTP cookies.
-
-    Public attributes:
 
-    cookiejar: CookieJar instance
+class MechanizeRobotFileParser(RobotFileParser):
 
-    """
-    def __init__(self, cookiejar=None):
-        if cookiejar is None:
-            cookiejar = CookieJar()
-        self.cookiejar = cookiejar
-
-    def http_request(self, request):
-        self.cookiejar.add_cookie_header(request)
-        return request
+    def __init__(self, url='', opener=None):
+        RobotFileParser.__init__(self, url)
+        self._opener = opener
+        self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
 
-    def http_response(self, request, response):
-        self.cookiejar.extract_cookies(response, request)
-        return response
+    def set_opener(self, opener=None):
+        from . import _opener
+        if opener is None:
+            opener = _opener.OpenerDirector()
+        self._opener = opener
 
-    https_request = http_request
-    https_response = http_response
+    def set_timeout(self, timeout):
+        self._timeout = timeout
 
-try:
-    import robotparser
-except ImportError:
-    pass
-else:
-    class MechanizeRobotFileParser(robotparser.RobotFileParser):
-
-        def __init__(self, url='', opener=None):
-            robotparser.RobotFileParser.__init__(self, url)
-            self._opener = opener
-            self._timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT
-
-        def set_opener(self, opener=None):
-            import _opener
-            if opener is None:
-                opener = _opener.OpenerDirector()
-            self._opener = opener
-
-        def set_timeout(self, timeout):
-            self._timeout = timeout
-
-        def read(self):
-            """Reads the robots.txt URL and feeds it to the parser."""
-            if self._opener is None:
-                self.set_opener()
-            req = Request(self.url, unverifiable=True, visit=False,
-                          timeout=self._timeout)
-            try:
-                f = self._opener.open(req)
-            except HTTPError, f:
-                pass
-            except (IOError, socket.error, OSError), exc:
-                debug_robots("ignoring error opening %r: %s" %
-                                   (self.url, exc))
-                return
-            lines = []
+    def read(self):
+        """Reads the robots.txt URL and feeds it to the parser."""
+        if self._opener is None:
+            self.set_opener()
+        req = Request(self.url, unverifiable=True, visit=False,
+                      timeout=self._timeout)
+        try:
+            f = self._opener.open(req)
+        except HTTPError as err:
+            f = err
+        except (IOError, socket.error, OSError) as exc:
+            debug_robots("ignoring error opening %r: %s" %
+                         (self.url, exc))
+            return
+        lines = []
+        line = f.readline()
+        while line:
+            lines.append(line.strip())
             line = f.readline()
-            while line:
-                lines.append(line.strip())
-                line = f.readline()
-            status = f.code
-            if status == 401 or status == 403:
-                self.disallow_all = True
-                debug_robots("disallow all")
-            elif status >= 400:
-                self.allow_all = True
-                debug_robots("allow all")
-            elif status == 200 and lines:
-                debug_robots("parse lines")
+        status = f.code
+        if status == 401 or status == 403:
+            self.disallow_all = True
+            debug_robots("disallow all")
+        elif status >= 400:
+            self.allow_all = True
+            debug_robots("allow all")
+        elif status == 200 and lines:
+            debug_robots("parse lines")
+            if is_py2:
                 self.parse(lines)
+            else:
+                self.parse(map(as_unicode, lines))
 
-    class RobotExclusionError(urllib2.HTTPError):
-        def __init__(self, request, *args):
-            apply(urllib2.HTTPError.__init__, (self,)+args)
-            self.request = request
 
-    class HTTPRobotRulesProcessor(BaseHandler):
-        # before redirections, after everything else
-        handler_order = 800
+class RobotExclusionError(HTTPError):
 
-        try:
-            from httplib import HTTPMessage
-        except:
-            from mimetools import Message
-            http_response_class = Message
-        else:
-            http_response_class = HTTPMessage
+    def __init__(self, request, *args):
+        HTTPError.__init__(self, *args)
+        self.request = request
+
+
+class HTTPRobotRulesProcessor(BaseHandler):
+    # before redirections, after everything else
+    handler_order = 800
+    http_response_class = None
+
+    def __init__(self, rfp_class=MechanizeRobotFileParser):
+        self.rfp_class = rfp_class
+        self.rfp = None
+        self._host = None
 
-        def __init__(self, rfp_class=MechanizeRobotFileParser):
-            self.rfp_class = rfp_class
-            self.rfp = None
-            self._host = None
+    def __copy__(self):
+        return self.__class__(self.rfp_class)
 
-        def http_request(self, request):
-            scheme = request.get_type()
-            if scheme not in ["http", "https"]:
-                # robots exclusion only applies to HTTP
-                return request
+    def http_request(self, request):
+        scheme = request.get_type()
+        if scheme not in ["http", "https"]:
+            # robots exclusion only applies to HTTP
+            return request
 
-            if request.get_selector() == "/robots.txt":
-                # /robots.txt is always OK to fetch
-                return request
+        if request.get_selector() == "/robots.txt":
+            # /robots.txt is always OK to fetch
+            return request
 
-            host = request.get_host()
+        host = request.get_host()
 
-            # robots.txt requests don't need to be allowed by robots.txt :-)
-            origin_req = getattr(request, "_origin_req", None)
-            if (origin_req is not None and
+        # robots.txt requests don't need to be allowed by robots.txt :-)
+        origin_req = getattr(request, "_origin_req", None)
+        if (origin_req is not None and
                 origin_req.get_selector() == "/robots.txt" and
-                origin_req.get_host() == host
-                ):
-                return request
+                origin_req.get_host() == host):
+            return request
 
-            if host != self._host:
-                self.rfp = self.rfp_class()
-                try:
-                    self.rfp.set_opener(self.parent)
-                except AttributeError:
-                    debug("%r instance does not support set_opener" %
-                          self.rfp.__class__)
-                self.rfp.set_url(scheme+"://"+host+"/robots.txt")
-                self.rfp.set_timeout(request.timeout)
-                self.rfp.read()
-                self._host = host
-
-            ua = request.get_header("User-agent", "")
-            if self.rfp.can_fetch(ua, request.get_full_url()):
-                return request
-            else:
-                # XXX This should really have raised URLError.  Too late now...
-                msg = "request disallowed by robots.txt"
-                raise RobotExclusionError(
-                    request,
-                    request.get_full_url(),
-                    403, msg,
-                    self.http_response_class(StringIO()), StringIO(msg))
+        if host != self._host:
+            self.rfp = self.rfp_class()
+            try:
+                self.rfp.set_opener(self.parent)
+            except AttributeError:
+                debug("%r instance does not support set_opener" %
+                      self.rfp.__class__)
+            self.rfp.set_url(scheme + "://" + host + "/robots.txt")
+            self.rfp.set_timeout(request.timeout)
+            self.rfp.read()
+            self._host = host
+
+        ua = request.get_header("User-agent", "")
+        if self.rfp.can_fetch(ua, request.get_full_url()):
+            return request
+        else:
+            # XXX This should really have raised URLError.  Too late now...
+            factory = self.http_response_class or create_response_info
+            msg = b"request disallowed by robots.txt"
+            raise RobotExclusionError(
+                request,
+                request.get_full_url(),
+                403, msg,
+                factory(BytesIO()), BytesIO(msg))
+
+    https_request = http_request
 
-        https_request = http_request
 
 class HTTPRefererProcessor(BaseHandler):
     """Add Referer header to requests.
@@ -483,12 +200,13 @@ class HTTPRefererProcessor(BaseHandler):
     There's a proper implementation of this in mechanize.Browser.
 
     """
+
     def __init__(self):
         self.referer = None
 
     def http_request(self, request):
         if ((self.referer is not None) and
-            not request.has_header("Referer")):
+                not request.has_header("Referer")):
             request.add_unredirected_header("Referer", self.referer)
         return request
 
@@ -503,9 +221,10 @@ def http_response(self, request, response):
 def clean_refresh_url(url):
     # e.g. Firefox 1.5 does (something like) this
     if ((url.startswith('"') and url.endswith('"')) or
-        (url.startswith("'") and url.endswith("'"))):
+            (url.startswith("'") and url.endswith("'"))):
         url = url[1:-1]
-    return _rfc3986.clean_url(url, "latin-1")  # XXX encoding
+    return _rfc3986.clean_url(url, 'utf-8')  # XXX encoding
+
 
 def parse_refresh_header(refresh):
     """
@@ -515,7 +234,7 @@ def parse_refresh_header(refresh):
     (1.0, 'http://example.com/')
     >>> parse_refresh_header("1")
     (1.0, None)
-    >>> parse_refresh_header("blah")
+    >>> parse_refresh_header("blah")  # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
     ValueError: invalid literal for float(): blah
 
@@ -523,11 +242,11 @@ def parse_refresh_header(refresh):
 
     ii = refresh.find(";")
     if ii != -1:
-        pause, newurl_spec = float(refresh[:ii]), refresh[ii+1:]
+        pause, newurl_spec = float(refresh[:ii]), refresh[ii + 1:]
         jj = newurl_spec.find("=")
         key = None
         if jj != -1:
-            key, newurl = newurl_spec[:jj], newurl_spec[jj+1:]
+            key, newurl = newurl_spec[:jj], newurl_spec[jj + 1:]
             newurl = clean_refresh_url(newurl)
         if key is None or key.strip().lower() != "url":
             raise ValueError()
@@ -535,6 +254,7 @@ def parse_refresh_header(refresh):
         pause, newurl = float(refresh), None
     return pause, newurl
 
+
 class HTTPRefreshProcessor(BaseHandler):
     """Perform HTTP Refresh redirections.
 
@@ -560,10 +280,13 @@ def __init__(self, max_time=0, honor_time=True):
         self.honor_time = honor_time
         self._sleep = time.sleep
 
+    def __copy__(self):
+        return self.__class__(self.max_time, self.honor_time)
+
     def http_response(self, request, response):
         code, msg, hdrs = response.code, response.msg, response.info()
 
-        if code == 200 and hdrs.has_key("refresh"):
+        if code == 200 and 'refresh' in hdrs:
             refresh = hdrs.getheaders("refresh")[0]
             try:
                 pause, newurl = parse_refresh_header(refresh)
@@ -587,172 +310,3 @@ def http_response(self, request, response):
         return response
 
     https_response = http_response
-
-class HTTPErrorProcessor(BaseHandler):
-    """Process HTTP error responses.
-
-    The purpose of this handler is to to allow other response processors a
-    look-in by removing the call to parent.error() from
-    AbstractHTTPHandler.
-
-    For non-200 error codes, this just passes the job on to the
-    Handler.<proto>_error_<code> methods, via the OpenerDirector.error
-    method.  Eventually, urllib2.HTTPDefaultErrorHandler will raise an
-    HTTPError if no other handler handles the error.
-
-    """
-    handler_order = 1000  # after all other processors
-
-    def http_response(self, request, response):
-        code, msg, hdrs = response.code, response.msg, response.info()
-
-        if code != 200:
-            # hardcoded http is NOT a bug
-            response = self.parent.error(
-                "http", request, response, code, msg, hdrs)
-
-        return response
-
-    https_response = http_response
-
-
-class HTTPDefaultErrorHandler(BaseHandler):
-    def http_error_default(self, req, fp, code, msg, hdrs):
-        # why these error methods took the code, msg, headers args in the first
-        # place rather than a response object, I don't know, but to avoid
-        # multiple wrapping, we're discarding them
-
-        if isinstance(fp, urllib2.HTTPError):
-            response = fp
-        else:
-            response = urllib2.HTTPError(
-                req.get_full_url(), code, msg, hdrs, fp)
-        assert code == response.code
-        assert msg == response.msg
-        assert hdrs == response.hdrs
-        raise response
-
-
-class AbstractHTTPHandler(BaseHandler):
-
-    def __init__(self, debuglevel=0):
-        self._debuglevel = debuglevel
-
-    def set_http_debuglevel(self, level):
-        self._debuglevel = level
-
-    def do_request_(self, request):
-        host = request.get_host()
-        if not host:
-            raise URLError('no host given')
-
-        if request.has_data():  # POST
-            data = request.get_data()
-            if not request.has_header('Content-type'):
-                request.add_unredirected_header(
-                    'Content-type',
-                    'application/x-www-form-urlencoded')
-            if not request.has_header('Content-length'):
-                request.add_unredirected_header(
-                    'Content-length', '%d' % len(data))
-
-        scheme, sel = urllib.splittype(request.get_selector())
-        sel_host, sel_path = urllib.splithost(sel)
-        if not request.has_header('Host'):
-            request.add_unredirected_header('Host', sel_host or host)
-        for name, value in self.parent.addheaders:
-            name = name.capitalize()
-            if not request.has_header(name):
-                request.add_unredirected_header(name, value)
-
-        return request
-
-    def do_open(self, http_class, req):
-        """Return an addinfourl object for the request, using http_class.
-
-        http_class must implement the HTTPConnection API from httplib.
-        The addinfourl return value is a file-like object.  It also
-        has methods and attributes including:
-            - info(): return a mimetools.Message object for the headers
-            - geturl(): return the original request URL
-            - code: HTTP status code
-        """
-        host_port = req.get_host()
-        if not host_port:
-            raise URLError('no host given')
-
-        try:
-            h = http_class(host_port, timeout=req.timeout)
-        except TypeError:
-            # Python < 2.6, no per-connection timeout support
-            h = http_class(host_port)
-        h.set_debuglevel(self._debuglevel)
-
-        headers = dict(req.headers)
-        headers.update(req.unredirected_hdrs)
-        # We want to make an HTTP/1.1 request, but the addinfourl
-        # class isn't prepared to deal with a persistent connection.
-        # It will try to read all remaining data from the socket,
-        # which will block while the server waits for the next request.
-        # So make sure the connection gets closed after the (only)
-        # request.
-        headers["Connection"] = "close"
-        headers = dict(
-            [(name.title(), val) for name, val in headers.items()])
-        try:
-            h.request(req.get_method(), req.get_selector(), req.data, headers)
-            r = h.getresponse()
-        except socket.error, err: # XXX what error?
-            raise URLError(err)
-
-        # Pick apart the HTTPResponse object to get the addinfourl
-        # object initialized properly.
-
-        # Wrap the HTTPResponse object in socket's file object adapter
-        # for Windows.  That adapter calls recv(), so delegate recv()
-        # to read().  This weird wrapping allows the returned object to
-        # have readline() and readlines() methods.
-
-        # XXX It might be better to extract the read buffering code
-        # out of socket._fileobject() and into a base class.
-
-        r.recv = r.read
-        fp = create_readline_wrapper(r)
-
-        resp = closeable_response(fp, r.msg, req.get_full_url(),
-                                  r.status, r.reason)
-        return resp
-
-
-class HTTPHandler(AbstractHTTPHandler):
-    def http_open(self, req):
-        return self.do_open(httplib.HTTPConnection, req)
-
-    http_request = AbstractHTTPHandler.do_request_
-
-if hasattr(httplib, 'HTTPS'):
-
-    class HTTPSConnectionFactory:
-        def __init__(self, key_file, cert_file):
-            self._key_file = key_file
-            self._cert_file = cert_file
-        def __call__(self, hostport):
-            return httplib.HTTPSConnection(
-                hostport,
-                key_file=self._key_file, cert_file=self._cert_file)
-
-    class HTTPSHandler(AbstractHTTPHandler):
-        def __init__(self, client_cert_manager=None):
-            AbstractHTTPHandler.__init__(self)
-            self.client_cert_manager = client_cert_manager
-
-        def https_open(self, req):
-            if self.client_cert_manager is not None:
-                key_file, cert_file = self.client_cert_manager.find_key_cert(
-                    req.get_full_url())
-                conn_factory = HTTPSConnectionFactory(key_file, cert_file)
-            else:
-                conn_factory = httplib.HTTPSConnection
-            return self.do_open(conn_factory, req)
-
-        https_request = AbstractHTTPHandler.do_request_
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py
deleted file mode 100644
index f8d49cf2d4..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_lwpcookiejar.py
+++ /dev/null
@@ -1,185 +0,0 @@
-"""Load / save to libwww-perl (LWP) format files.
-
-Actually, the format is slightly extended from that used by LWP's
-(libwww-perl's) HTTP::Cookies, to avoid losing some RFC 2965 information
-not recorded by LWP.
-
-It uses the version string "2.0", though really there isn't an LWP Cookies
-2.0 format.  This indicates that there is extra information in here
-(domain_dot and port_spec) while still being compatible with libwww-perl,
-I hope.
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import time, re, logging
-
-from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
-     MISSING_FILENAME_TEXT, LoadError
-from _headersutil import join_header_words, split_header_words
-from _util import iso2time, time2isoz
-
-debug = logging.getLogger("mechanize").debug
-
-
-def lwp_cookie_str(cookie):
-    """Return string representation of Cookie in an the LWP cookie file format.
-
-    Actually, the format is extended a bit -- see module docstring.
-
-    """
-    h = [(cookie.name, cookie.value),
-         ("path", cookie.path),
-         ("domain", cookie.domain)]
-    if cookie.port is not None: h.append(("port", cookie.port))
-    if cookie.path_specified: h.append(("path_spec", None))
-    if cookie.port_specified: h.append(("port_spec", None))
-    if cookie.domain_initial_dot: h.append(("domain_dot", None))
-    if cookie.secure: h.append(("secure", None))
-    if cookie.expires: h.append(("expires",
-                               time2isoz(float(cookie.expires))))
-    if cookie.discard: h.append(("discard", None))
-    if cookie.comment: h.append(("comment", cookie.comment))
-    if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
-    if cookie.rfc2109: h.append(("rfc2109", None))
-
-    keys = cookie.nonstandard_attr_keys()
-    keys.sort()
-    for k in keys:
-        h.append((k, str(cookie.get_nonstandard_attr(k))))
-
-    h.append(("version", str(cookie.version)))
-
-    return join_header_words([h])
-
-class LWPCookieJar(FileCookieJar):
-    """
-    The LWPCookieJar saves a sequence of"Set-Cookie3" lines.
-    "Set-Cookie3" is the format used by the libwww-perl libary, not known
-    to be compatible with any browser, but which is easy to read and
-    doesn't lose information about RFC 2965 cookies.
-
-    Additional methods
-
-    as_lwp_str(ignore_discard=True, ignore_expired=True)
-
-    """
-
-    magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
-
-    def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
-        """Return cookies as a string of "\n"-separated "Set-Cookie3" headers.
-
-        ignore_discard and ignore_expires: see docstring for FileCookieJar.save
-
-        """
-        now = time.time()
-        r = []
-        for cookie in self:
-            if not ignore_discard and cookie.discard:
-                debug("   Not saving %s: marked for discard", cookie.name)
-                continue
-            if not ignore_expires and cookie.is_expired(now):
-                debug("   Not saving %s: expired", cookie.name)
-                continue
-            r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
-        return "\n".join(r+[""])
-
-    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
-        if filename is None:
-            if self.filename is not None: filename = self.filename
-            else: raise ValueError(MISSING_FILENAME_TEXT)
-
-        f = open(filename, "w")
-        try:
-            debug("Saving LWP cookies file")
-            # There really isn't an LWP Cookies 2.0 format, but this indicates
-            # that there is extra information in here (domain_dot and
-            # port_spec) while still being compatible with libwww-perl, I hope.
-            f.write("#LWP-Cookies-2.0\n")
-            f.write(self.as_lwp_str(ignore_discard, ignore_expires))
-        finally:
-            f.close()
-
-    def _really_load(self, f, filename, ignore_discard, ignore_expires):
-        magic = f.readline()
-        if not re.search(self.magic_re, magic):
-            msg = "%s does not seem to contain cookies" % filename
-            raise LoadError(msg)
-
-        now = time.time()
-
-        header = "Set-Cookie3:"
-        boolean_attrs = ("port_spec", "path_spec", "domain_dot",
-                         "secure", "discard", "rfc2109")
-        value_attrs = ("version",
-                       "port", "path", "domain",
-                       "expires",
-                       "comment", "commenturl")
-
-        try:
-            while 1:
-                line = f.readline()
-                if line == "": break
-                if not line.startswith(header):
-                    continue
-                line = line[len(header):].strip()
-
-                for data in split_header_words([line]):
-                    name, value = data[0]
-                    standard = {}
-                    rest = {}
-                    for k in boolean_attrs:
-                        standard[k] = False
-                    for k, v in data[1:]:
-                        if k is not None:
-                            lc = k.lower()
-                        else:
-                            lc = None
-                        # don't lose case distinction for unknown fields
-                        if (lc in value_attrs) or (lc in boolean_attrs):
-                            k = lc
-                        if k in boolean_attrs:
-                            if v is None: v = True
-                            standard[k] = v
-                        elif k in value_attrs:
-                            standard[k] = v
-                        else:
-                            rest[k] = v
-
-                    h = standard.get
-                    expires = h("expires")
-                    discard = h("discard")
-                    if expires is not None:
-                        expires = iso2time(expires)
-                    if expires is None:
-                        discard = True
-                    domain = h("domain")
-                    domain_specified = domain.startswith(".")
-                    c = Cookie(h("version"), name, value,
-                               h("port"), h("port_spec"),
-                               domain, domain_specified, h("domain_dot"),
-                               h("path"), h("path_spec"),
-                               h("secure"),
-                               expires,
-                               discard,
-                               h("comment"),
-                               h("commenturl"),
-                               rest,
-                               h("rfc2109"),
-                               ) 
-                    if not ignore_discard and c.discard:
-                        continue
-                    if not ignore_expires and c.is_expired(now):
-                        continue
-                    self.set_cookie(c)
-        except:
-            reraise_unmasked_exceptions((IOError,))
-            raise LoadError("invalid Set-Cookie3 format file %s" % filename)
-
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py b/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py
index ad729c9d0c..8f8de79770 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_mechanize.py
@@ -4,30 +4,39 @@
 Copyright 2003 Andy Lester (original Perl code)
 
 This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+under the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE
 included with the distribution).
 
 """
+from __future__ import absolute_import
 
-import urllib2, copy, re, os, urllib
+import copy
+import os
+import re
 
+from . import _request, _response, _rfc3986, _sockettimeout, _urllib2_fork
+from ._clientcookie import Cookie
+from ._headersutil import normalize_header_name
+from ._html import Factory
+from ._useragent import UserAgentBase
+from .polyglot import pathname2url, HTTPError, is_string, iteritems
+from ._response import make_response
 
-from _html import DefaultFactory
-import _response
-import _request
-import _rfc3986
-import _sockettimeout
-from _useragent import UserAgentBase
 
-__version__ = (0, 1, 11, None, None)  # 0.1.11
+class BrowserStateError(Exception):
+    pass
 
-class BrowserStateError(Exception): pass
-class LinkNotFoundError(Exception): pass
-class FormNotFoundError(Exception): pass
+
+class LinkNotFoundError(Exception):
+    pass
+
+
+class FormNotFoundError(Exception):
+    pass
 
 
 def sanepathname2url(path):
-    urlpath = urllib.pathname2url(path)
+    urlpath = pathname2url(path)
     if os.name == "nt" and urlpath.startswith("///"):
         urlpath = urlpath[2:]
     # XXX don't ask me about the mac...
@@ -40,10 +49,13 @@ class History:
     Though this will become public, the implied interface is not yet stable.
 
     """
+
     def __init__(self):
         self._history = []  # LIFO
+
     def add(self, request, response):
         self._history.append((request, response))
+
     def back(self, n, _response):
         response = _response  # XXX move Browser._response into this class?
         while n > 0 or response is None:
@@ -53,16 +65,23 @@ def back(self, n, _response):
                 raise BrowserStateError("already at start of history")
             n -= 1
         return request, response
+
     def clear(self):
         del self._history[:]
+
     def close(self):
         for request, response in self._history:
             if response is not None:
                 response.close()
         del self._history[:]
 
+    def __copy__(self):
+        ans = self.__class__()
+        ans._history = self._history[:]
+        return ans
+
 
-class HTTPRefererProcessor(urllib2.BaseHandler):
+class HTTPRefererProcessor(_urllib2_fork.BaseHandler):
     def http_request(self, request):
         # See RFC 2616 14.36.  The only times we know the source of the
         # request URI has a URI associated with it are redirect, and
@@ -80,15 +99,30 @@ def http_request(self, request):
 class Browser(UserAgentBase):
     """Browser-like class with support for history, forms and links.
 
-    BrowserStateError is raised whenever the browser is in the wrong state to
-    complete the requested operation - eg., when .back() is called when the
-    browser history is empty, or when .follow_link() is called when the current
-    response does not contain HTML data.
+    :class:`BrowserStateError` is raised whenever the browser is in the wrong
+    state to complete the requested operation - e.g., when :meth:`back()` is
+    called when the browser history is empty, or when :meth:`follow_link()` is
+    called when the current response does not contain HTML data.
 
     Public attributes:
 
-    request: current request (mechanize.Request or urllib2.Request)
-    form: currently selected form (see .select_form())
+    request: current request (:class:`mechanize.Request`)
+
+    form: currently selected form (see :meth:`select_form()`)
+
+    :param history: object implementing the :class:`mechanize.History`
+                    interface.  Note this interface is still experimental
+                    and may change in future. This object is owned
+                    by the browser instance and must not be shared
+                    among browsers.
+    :param request_class: Request class to use. Defaults to
+                            :class:`mechanize.Request`
+    :param content_parser: A function that is responsible for parsing
+        received html/xhtml content. See the builtin
+        :func:`mechanize._html.content_parser()` function for details
+        on the interface this function must support.
+    :param factory_class: HTML Factory class to use. Defaults to
+                            :class:`mechanize.Factory`
 
     """
 
@@ -97,29 +131,16 @@ class Browser(UserAgentBase):
     default_features = copy.copy(UserAgentBase.default_features)
     default_features.append("_referer")
 
-    def __init__(self,
-                 factory=None,
-                 history=None,
-                 request_class=None,
-                 ):
+    def __init__(
+            self,
+            history=None,
+            request_class=None,
+            content_parser=None,
+            factory_class=Factory,
+            allow_xhtml=False, ):
         """
-
         Only named arguments should be passed to this constructor.
 
-        factory: object implementing the mechanize.Factory interface.
-        history: object implementing the mechanize.History interface.  Note
-         this interface is still experimental and may change in future.
-        request_class: Request class to use.  Defaults to mechanize.Request
-         by default for Pythons older than 2.4, urllib2.Request otherwise.
-
-        The Factory and History objects passed in are 'owned' by the Browser,
-        so they should not be shared across Browsers.  In particular,
-        factory.set_response() should not be called except by the owning
-        Browser itself.
-
-        Note that the supplied factory's request_class is overridden by this
-        constructor, to ensure only one Request class is used.
-
         """
         self._handle_referer = True
 
@@ -128,14 +149,12 @@ def __init__(self,
         self._history = history
 
         if request_class is None:
-            if not hasattr(urllib2.Request, "add_unredirected_header"):
-                request_class = _request.Request
-            else:
-                request_class = urllib2.Request  # Python >= 2.4
+            request_class = _request.Request
 
-        if factory is None:
-            factory = DefaultFactory()
+        factory = factory_class(allow_xhtml=allow_xhtml)
         factory.set_request_class(request_class)
+        if content_parser is not None:
+            factory.set_content_parser(content_parser)
         self._factory = factory
         self.request_class = request_class
 
@@ -145,10 +164,26 @@ def __init__(self,
         # do this last to avoid __getattr__ problems
         UserAgentBase.__init__(self)
 
+    def __copy__(self):
+        '''
+        Clone this browser instance. The clone will share the same, thread-safe
+        cookie jar, and have all the same handlers/settings, but will not share
+        any other state, making it safe to use in another thread.
+        '''
+        ans = self.__class__()
+        self._copy_state(ans)
+        ans._handle_referer = self._handle_referer
+        for attr in ('_response_type_finder', '_encoding_finder',
+                     '_content_parser'):
+            setattr(ans._factory, attr, getattr(self._factory, attr))
+        ans.request_class = self.request_class
+        ans._history = copy.copy(self._history)
+        return ans
+
     def close(self):
         UserAgentBase.close(self)
         if self._response is not None:
-            self._response.close()    
+            self._response.close()
         if self._history is not None:
             self._history.close()
             self._history = None
@@ -157,7 +192,7 @@ def close(self):
         self.form = None
         self.request = self._response = None
         self.request = self.response = self.set_response = None
-        self.geturl =  self.reload = self.back = None
+        self.geturl = self.reload = self.back = None
         self.clear_history = self.set_cookie = self.links = self.forms = None
         self.viewing_html = self.encoding = self.title = None
         self.select_form = self.click = self.submit = self.click_link = None
@@ -178,37 +213,54 @@ def _add_referer_header(self, request, origin_request=True):
         if not origin_request and not self.request.has_header("Referer"):
             return request
 
-        if (self._handle_referer and
-            original_scheme in ["http", "https"] and
-            not (original_scheme == "https" and scheme != "https")):
+        if (self._handle_referer and original_scheme in ["http", "https"] and
+                not (original_scheme == "https" and scheme != "https")):
             # strip URL fragment (RFC 2616 14.36)
             parts = _rfc3986.urlsplit(self.request.get_full_url())
-            parts = parts[:-1]+(None,)
+            parts = parts[:-1] + (None, )
             referer = _rfc3986.urlunsplit(parts)
             request.add_unredirected_header("Referer", referer)
         return request
 
-    def open_novisit(self, url, data=None,
+    def open_novisit(self,
+                     url_or_request,
+                     data=None,
                      timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
         """Open a URL without visiting it.
 
         Browser state (including request, response, history, forms and links)
         is left unchanged by calling this function.
 
-        The interface is the same as for .open().
+        The interface is the same as for :meth:`open()`.
 
         This is useful for things like fetching images.
 
-        See also .retrieve().
+        See also :meth:`retrieve()`
 
         """
-        return self._mech_open(url, data, visit=False, timeout=timeout)
+        return self._mech_open(
+            url_or_request, data, visit=False, timeout=timeout)
 
-    def open(self, url, data=None,
+    def open(self,
+             url_or_request,
+             data=None,
              timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
-        return self._mech_open(url, data, timeout=timeout)
-
-    def _mech_open(self, url, data=None, update_history=True, visit=None,
+        '''
+        Open a URL. Loads the page so that you can subsequently use
+        :meth:`forms()`, :meth:`links()`, etc. on it.
+
+        :param url_or_request: Either a URL or a :class:`mechanize.Request`
+        :param dict data: data to send with a POST request
+        :param timeout: Timeout in seconds
+        :return: A :class:`mechanize.Response` object
+        '''
+        return self._mech_open(url_or_request, data, timeout=timeout)
+
+    def _mech_open(self,
+                   url,
+                   data=None,
+                   update_history=True,
+                   visit=None,
                    timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
         try:
             url.get_full_url
@@ -218,9 +270,8 @@ def _mech_open(self, url, data=None, update_history=True, visit=None,
             if scheme is None:
                 # relative URL
                 if self._response is None:
-                    raise BrowserStateError(
-                        "can't fetch relative reference: "
-                        "not viewing any document")
+                    raise BrowserStateError("can't fetch relative reference: "
+                                            "not viewing any document")
                 url = _rfc3986.urljoin(self._response.geturl(), url)
 
         request = self._request(url, data, visit, timeout)
@@ -234,22 +285,23 @@ def _mech_open(self, url, data=None, update_history=True, visit=None,
         success = True
         try:
             response = UserAgentBase.open(self, request, data)
-        except urllib2.HTTPError, error:
+        except HTTPError as error:
             success = False
             if error.fp is None:  # not a response
                 raise
             response = error
-##         except (IOError, socket.error, OSError), error:
-##             # Yes, urllib2 really does raise all these :-((
-##             # See test_urllib2.py for examples of socket.gaierror and OSError,
-##             # plus note that FTPHandler raises IOError.
-##             # XXX I don't seem to have an example of exactly socket.error being
-##             #  raised, only socket.gaierror...
-##             # I don't want to start fixing these here, though, since this is a
-##             # subclass of OpenerDirector, and it would break old code.  Even in
-##             # Python core, a fix would need some backwards-compat. hack to be
-##             # acceptable.
-##             raise
+
+#         except (IOError, socket.error, OSError) as error:
+#             Yes, urllib2 really does raise all these :-((
+#             See test_urllib2.py for examples of socket.gaierror and OSError,
+#             plus note that FTPHandler raises IOError.
+#             XXX I don't seem to have an example of exactly socket.error being
+#              raised, only socket.gaierror...
+#             I don't want to start fixing these here, though, since this is a
+#             subclass of OpenerDirector, and it would break old code.  Even in
+#             Python core, a fix would need some backwards-compat. hack to be
+#             acceptable.
+#             raise
 
         if visit:
             self._set_response(response, False)
@@ -277,14 +329,14 @@ def response(self):
         """Return a copy of the current response.
 
         The returned object has the same interface as the object returned by
-        .open() (or urllib2.urlopen()).
+        :meth:`.open()`
 
         """
         return copy.copy(self._response)
 
     def open_local_file(self, filename):
         path = sanepathname2url(os.path.abspath(filename))
-        url = 'file://'+path
+        url = 'file://' + path
         return self.open(url)
 
     def set_response(self, response):
@@ -300,9 +352,7 @@ def _set_response(self, response, close_current):
         # sanity check, necessary but far from sufficient
         if not (response is None or
                 (hasattr(response, "info") and hasattr(response, "geturl") and
-                 hasattr(response, "read")
-                 )
-                ):
+                 hasattr(response, "read"))):
             raise ValueError("not a response object")
 
         self.form = None
@@ -314,10 +364,10 @@ def _set_response(self, response, close_current):
         self._factory.set_response(response)
 
     def visit_response(self, response, request=None):
-        """Visit the response, as if it had been .open()ed.
+        """Visit the response, as if it had been :meth:`open()` ed.
 
-        Unlike .set_response(), this updates history rather than replacing the
-        current response.
+        Unlike :meth:`set_response()`, this updates history rather than
+        replacing the current response.
         """
         if request is None:
             request = _request.Request(response.geturl())
@@ -334,6 +384,15 @@ def _visit_request(self, request, update_history):
         # fails
         self.request = request
 
+    def set_html(self, html, url="http://example.com/"):
+        """Set the response to dummy with given HTML, and URL if given.
+
+        Allows you to then parse that HTML, especially to extract forms
+        information. If no URL was given then the default is "example.com".
+        """
+        response = make_response(html, [("Content-type", "text/html")], url)
+        self._set_response(response, True)
+
     def geturl(self):
         """Get URL of current document."""
         if self._response is None:
@@ -366,7 +425,7 @@ def clear_history(self):
         self._history.clear()
 
     def set_cookie(self, cookie_string):
-        """Request to set a cookie.
+        """Set a cookie.
 
         Note that it is NOT necessary to call this method under ordinary
         circumstances: cookie handling is normally entirely automatic.  The
@@ -387,12 +446,16 @@ def set_cookie(self, cookie_string):
 
         For example:
 
-        browser.set_cookie(
-            "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
+        .. code-block:: python
+
+            browser.set_cookie(
+                "sid=abcdef; expires=Wednesday, 09-Nov-06 23:12:40 GMT")
 
         Currently, this method does not allow for adding RFC 2986 cookies.
         This limitation will be lifted if anybody requests it.
 
+        See also :meth:`set_simple_cookie()` for an easier way to set cookies
+        without needing to create a Set-Cookie header string.
         """
         if self._response is None:
             raise BrowserStateError("not viewing any document")
@@ -405,8 +468,57 @@ def set_cookie(self, cookie_string):
         headers["Set-cookie"] = cookie_string
         cookiejar.extract_cookies(response, self.request)
 
+    def set_simple_cookie(self, name, value, domain, path='/'):
+        '''
+        Similar to :meth:`set_cookie()` except that instead of using a
+        cookie string, you simply specify the `name`, `value`, `domain`
+        and optionally the `path`.
+        The created cookie will never expire. For example:
+
+        .. code-block:: python
+
+            browser.set_simple_cookie('some_key', 'some_value', '.example.com',
+                                      path='/some-page')
+        '''
+        self.cookiejar.set_cookie(
+            Cookie(0, name, value, None, False, domain, True, False, path,
+                   True, False, None, False, None, None, None))
+
+    @property
+    def cookiejar(self):
+        ' Return the current cookiejar (:class:`mechanize.CookieJar`) or None '
+        try:
+            return self._ua_handlers["_cookies"].cookiejar
+        except Exception:
+            pass
+
+    def set_header(self, header, value=None):
+        '''
+        Convenience method to set a header value in `self.addheaders`
+        so that the header is sent out with all requests automatically.
+
+        :param header: The header name, e.g. User-Agent
+        :param value: The header value. If set to None the header is removed.
+        '''
+        found = False
+        header = normalize_header_name(header)
+        q = header.lower()
+        remove = []
+        for i, (k, v) in enumerate(tuple(self.addheaders)):
+            if k.lower() == q:
+                if value:
+                    self.addheaders[i] = (header, value)
+                    found = True
+                else:
+                    remove.append(i)
+        if not found:
+            self.addheaders.append((header, value))
+        if remove:
+            for i in reversed(remove):
+                del self.addheaders[i]
+
     def links(self, **kwds):
-        """Return iterable over links (mechanize.Link objects)."""
+        """Return iterable over links (:class:`mechanize.Link` objects)."""
         if not self.viewing_html():
             raise BrowserStateError("not viewing HTML")
         links = self._factory.links()
@@ -418,7 +530,8 @@ def links(self, **kwds):
     def forms(self):
         """Return iterable over forms.
 
-        The returned form objects implement the ClientForm.HTMLForm interface.
+        The returned form objects implement the :class:`mechanize.HTMLForm`
+        interface.
 
         """
         if not self.viewing_html():
@@ -432,7 +545,8 @@ def global_form(self):
         The "global" form object contains all controls that are not descendants
         of any FORM element.
 
-        The returned form object implements the ClientForm.HTMLForm interface.
+        The returned form object implements the :class:`mechanize.HTMLForm`
+        interface.
 
         This is a separate method since the global form is not regarded as part
         of the sequence of forms in the document -- mostly for
@@ -455,39 +569,35 @@ def encoding(self):
         return self._factory.encoding
 
     def title(self):
-        r"""Return title, or None if there is no title element in the document.
-
-        Treatment of any tag children of attempts to follow Firefox and IE
-        (currently, tags are preserved).
-
-        """
+        ' Return title, or None if there is no title element in the document. '
         if not self.viewing_html():
             raise BrowserStateError("not viewing HTML")
         return self._factory.title
 
-    def select_form(self, name=None, predicate=None, nr=None):
+    def select_form(self, name=None, predicate=None, nr=None, **attrs):
         """Select an HTML form for input.
 
         This is a bit like giving a form the "input focus" in a browser.
 
         If a form is selected, the Browser object supports the HTMLForm
-        interface, so you can call methods like .set_value(), .set(), and
-        .click().
+        interface, so you can call methods like :meth:`set_value()`,
+        :meth:`set()`, and :meth:`click()`.
 
         Another way to select a form is to assign to the .form attribute.  The
-        form assigned should be one of the objects returned by the .forms()
-        method.
+        form assigned should be one of the objects returned by the
+        :meth:`forms()` method.
 
-        At least one of the name, predicate and nr arguments must be supplied.
-        If no matching form is found, mechanize.FormNotFoundError is raised.
+        If no matching form is found,
+        :class:`mechanize.FormNotFoundError` is raised.
 
-        If name is specified, then the form must have the indicated name.
+        If `name` is specified, then the form must have the indicated name.
 
-        If predicate is specified, then the form must match that function.  The
-        predicate function is passed the HTMLForm as its single argument, and
-        should return a boolean value indicating whether the form matched.
+        If `predicate` is specified, then the form must match that function.
+        The predicate function is passed the :class:`mechanize.HTMLForm` as its
+        single argument, and should return a boolean value indicating whether
+        the form matched.
 
-        nr, if supplied, is the sequence number of the form (where 0 is the
+        `nr`, if supplied, is the sequence number of the form (where 0 is the
         first).  Note that control 0 is the first form matching all the other
         arguments (if supplied); it is not necessarily the first control in the
         form.  The "global form" (consisting of all form controls not contained
@@ -495,19 +605,53 @@ def select_form(self, name=None, predicate=None, nr=None):
         to have no name, so will not be matched unless both name and nr are
         None.
 
+        You can also match on any HTML attribute of the `<form>` tag by passing
+        in the attribute name and value as keyword arguments. To convert HTML
+        attributes into syntactically valid python keyword arguments, the
+        following simple rule is used. The python keyword argument name is
+        converted to an HTML attribute name by: Replacing all underscores with
+        hyphens and removing any trailing underscores. You can pass in strings,
+        functions or regular expression objects as the values to match. For
+        example:
+
+        .. code-block:: python
+
+            # Match form with the exact action specified
+            br.select_form(action='http://foo.com/submit.php')
+            # Match form with a class attribute that contains 'login'
+            br.select_form(class_=lambda x: 'login' in x)
+            # Match form with a data-form-type attribute that matches a regex
+            br.select_form(data_form_type=re.compile(r'a|b'))
+
         """
         if not self.viewing_html():
             raise BrowserStateError("not viewing HTML")
-        if (name is None) and (predicate is None) and (nr is None):
+        if name is None and predicate is None and nr is None and not attrs:
             raise ValueError(
                 "at least one argument must be supplied to specify form")
 
         global_form = self._factory.global_form
-        if nr is None and name is None and \
-               predicate is not None and predicate(global_form):
+        if nr is None and name is None and predicate is not None and predicate(
+                global_form):
             self.form = global_form
             return
 
+        def attr_selector(q):
+            if is_string(q):
+                return lambda x: x == q
+            if callable(q):
+                return q
+            return lambda x: q.match(x) is not None
+        attrsq = {aname.rstrip('_').replace('_', '-'): attr_selector(v)
+                  for aname, v in iteritems(attrs)}
+
+        def form_attrs_match(form_attrs):
+            for aname, q in iteritems(attrsq):
+                val = form_attrs.get(aname)
+                if val is None or not q(val):
+                    return False
+            return True
+
         orig_nr = nr
         for form in self.forms():
             if name is not None and name != form.name:
@@ -517,20 +661,27 @@ def select_form(self, name=None, predicate=None, nr=None):
             if nr:
                 nr -= 1
                 continue
+            if attrs and not form_attrs_match(form.attrs):
+                continue
             self.form = form
             break  # success
         else:
             # failure
             description = []
-            if name is not None: description.append("name '%s'" % name)
+            if name is not None:
+                description.append("name '%s'" % name)
             if predicate is not None:
                 description.append("predicate %s" % predicate)
-            if orig_nr is not None: description.append("nr %d" % orig_nr)
+            if orig_nr is not None:
+                description.append("nr %d" % orig_nr)
+            if attrs:
+                for k, v in iteritems(attrs):
+                    description.append('%s = %r' % (k, v))
             description = ", ".join(description)
-            raise FormNotFoundError("no form matching "+description)
+            raise FormNotFoundError("no form matching " + description)
 
     def click(self, *args, **kwds):
-        """See ClientForm.HTMLForm.click for documentation."""
+        """See :meth:`mechanize.HTMLForm.click()` for documentation."""
         if not self.viewing_html():
             raise BrowserStateError("not viewing HTML")
         request = self.form.click(*args, **kwds)
@@ -539,18 +690,17 @@ def click(self, *args, **kwds):
     def submit(self, *args, **kwds):
         """Submit current form.
 
-        Arguments are as for ClientForm.HTMLForm.click().
-
-        Return value is same as for Browser.open().
+        Arguments are as for :meth:`mechanize.HTMLForm.click()`.
 
+        Return value is same as for :meth:`open()`.
         """
         return self.open(self.click(*args, **kwds))
 
     def click_link(self, link=None, **kwds):
         """Find a link and return a Request object for it.
 
-        Arguments are as for .find_link(), except that a link may be supplied
-        as the first argument.
+        Arguments are as for :meth:`find_link()`, except that a link may be
+        supplied as the first argument.
 
         """
         if not self.viewing_html():
@@ -565,68 +715,82 @@ def click_link(self, link=None, **kwds):
         return self._add_referer_header(request)
 
     def follow_link(self, link=None, **kwds):
-        """Find a link and .open() it.
+        """Find a link and :meth:`open()` it.
 
-        Arguments are as for .click_link().
+        Arguments are as for :meth:`click_link()`.
 
-        Return value is same as for Browser.open().
+        Return value is same as for :meth:`open()`.
 
         """
         return self.open(self.click_link(link, **kwds))
 
-    def find_link(self, **kwds):
+    def find_link(self,
+                  text=None,
+                  text_regex=None,
+                  name=None,
+                  name_regex=None,
+                  url=None,
+                  url_regex=None,
+                  tag=None,
+                  predicate=None,
+                  nr=0):
         """Find a link in current page.
 
-        Links are returned as mechanize.Link objects.
+        Links are returned as :class:`mechanize.Link` objects. Examples:
+
+        .. code-block:: python
 
-        # Return third link that .search()-matches the regexp "python"
-        # (by ".search()-matches", I mean that the regular expression method
-        # .search() is used, rather than .match()).
-        find_link(text_regex=re.compile("python"), nr=2)
+            # Return third link that .search()-matches the regexp "python" (by
+            # ".search()-matches", I mean that the regular expression method
+            # .search() is used, rather than .match()).
+            find_link(text_regex=re.compile("python"), nr=2)
 
-        # Return first http link in the current page that points to somewhere
-        # on python.org whose link text (after tags have been removed) is
-        # exactly "monty python".
-        find_link(text="monty python",
-                  url_regex=re.compile("http.*python.org"))
+            # Return first http link in the current page that points to
+            # somewhere on python.org whose link text (after tags have been
+            # removed) is exactly "monty python".
+            find_link(text="monty python",
+                    url_regex=re.compile("http.*python.org"))
 
-        # Return first link with exactly three HTML attributes.
-        find_link(predicate=lambda link: len(link.attrs) == 3)
+            # Return first link with exactly three HTML attributes.
+            find_link(predicate=lambda link: len(link.attrs) == 3)
 
-        Links include anchors (<a>), image maps (<area>), and frames (<frame>,
-        <iframe>).
+        Links include anchors `<a>`, image maps `<area>`, and frames
+        `<iframe>`.
 
         All arguments must be passed by keyword, not position.  Zero or more
         arguments may be supplied.  In order to find a link, all arguments
         supplied must match.
 
-        If a matching link is not found, mechanize.LinkNotFoundError is raised.
-
-        text: link text between link tags: eg. <a href="blah">this bit</a> (as
-         returned by pullparser.get_compressed_text(), ie. without tags but
-         with opening tags "textified" as per the pullparser docs) must compare
-         equal to this argument, if supplied
-        text_regex: link text between tag (as defined above) must match the
-         regular expression object or regular expression string passed as this
-         argument, if supplied
-        name, name_regex: as for text and text_regex, but matched against the
-         name HTML attribute of the link tag
-        url, url_regex: as for text and text_regex, but matched against the
-         URL of the link tag (note this matches against Link.url, which is a
-         relative or absolute URL according to how it was written in the HTML)
-        tag: element name of opening tag, eg. "a"
-        predicate: a function taking a Link object as its single argument,
-         returning a boolean result, indicating whether the links
-        nr: matches the nth link that matches all other criteria (default 0)
+        If a matching link is not found, :class:`mechanize.LinkNotFoundError`
+        is raised.
+
+        :param text: link text between link tags: e.g. <a href="blah">this
+            bit</a> with whitespace compressed.
+        :param text_regex: link text between tag (as defined above) must match
+            the regular expression object or regular expression string passed
+            as this argument, if supplied
+        :param name: as for text and text_regex, but matched
+            against the name HTML attribute of the link tag
+        :param url: as for text and text_regex, but matched against the
+            URL of the link tag (note this matches against Link.url, which is a
+            relative or absolute URL according to how it was written in the
+            HTML)
+        :param tag: element name of opening tag, e.g. "a"
+        :param predicate: a function taking a Link object as its single
+            argument, returning a boolean result, indicating whether the links
+        :param nr: matches the nth link that matches all other
+            criteria (default 0)
 
         """
         try:
-            return self._filter_links(self._factory.links(), **kwds).next()
+            return next(self._filter_links(
+                self._factory.links(), text, text_regex, name, name_regex, url,
+                url_regex, tag, predicate, nr))
         except StopIteration:
             raise LinkNotFoundError()
 
     def __getattr__(self, name):
-        # pass through ClientForm / DOMForm methods and attributes
+        # pass through _form.HTMLForm methods and attributes
         form = self.__dict__.get("form")
         if form is None:
             raise AttributeError(
@@ -634,18 +798,30 @@ def __getattr__(self, name):
                 ".select_form()?)" % (self.__class__, name))
         return getattr(form, name)
 
-    def _filter_links(self, links,
-                    text=None, text_regex=None,
-                    name=None, name_regex=None,
-                    url=None, url_regex=None,
-                    tag=None,
-                    predicate=None,
-                    nr=0
-                    ):
+    def __getitem__(self, name):
+        if self.form is None:
+            raise BrowserStateError('No form selected')
+        return self.form[name]
+
+    def __setitem__(self, name, val):
+        if self.form is None:
+            raise BrowserStateError('No form selected')
+        self.form[name] = val
+
+    def _filter_links(self,
+                      links,
+                      text=None,
+                      text_regex=None,
+                      name=None,
+                      name_regex=None,
+                      url=None,
+                      url_regex=None,
+                      tag=None,
+                      predicate=None,
+                      nr=0):
         if not self.viewing_html():
             raise BrowserStateError("not viewing HTML")
 
-        found_links = []
         orig_nr = nr
 
         for link in links:
@@ -653,11 +829,12 @@ def _filter_links(self, links,
                 continue
             if url_regex is not None and not re.search(url_regex, link.url):
                 continue
-            if (text is not None and
-                (link.text is None or text != link.text)):
+            if (text is not None and (link.text is None or text != link.text)):
                 continue
-            if (text_regex is not None and
-                (link.text is None or not re.search(text_regex, link.text))):
+            if (
+                    text_regex is not None and (
+                        link.text is None or not re.search(
+                            text_regex, link.text))):
                 continue
             if name is not None and name != dict(link.attrs).get("name"):
                 continue
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_mozillacookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_mozillacookiejar.py
deleted file mode 100644
index 51e81bb62d..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_mozillacookiejar.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Mozilla / Netscape cookie loading / saving.
-
-Copyright 2002-2006 John J Lee <jjl@pobox.com>
-Copyright 1997-1999 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-import re, time, logging
-
-from _clientcookie import reraise_unmasked_exceptions, FileCookieJar, Cookie, \
-     MISSING_FILENAME_TEXT, LoadError
-debug = logging.getLogger("ClientCookie").debug
-
-
-class MozillaCookieJar(FileCookieJar):
-    """
-
-    WARNING: you may want to backup your browser's cookies file if you use
-    this class to save cookies.  I *think* it works, but there have been
-    bugs in the past!
-
-    This class differs from CookieJar only in the format it uses to save and
-    load cookies to and from a file.  This class uses the Mozilla/Netscape
-    `cookies.txt' format.  lynx uses this file format, too.
-
-    Don't expect cookies saved while the browser is running to be noticed by
-    the browser (in fact, Mozilla on unix will overwrite your saved cookies if
-    you change them on disk while it's running; on Windows, you probably can't
-    save at all while the browser is running).
-
-    Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
-    Netscape cookies on saving.
-
-    In particular, the cookie version and port number information is lost,
-    together with information about whether or not Path, Port and Discard were
-    specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
-    domain as set in the HTTP header started with a dot (yes, I'm aware some
-    domains in Netscape files start with a dot and some don't -- trust me, you
-    really don't want to know any more about this).
-
-    Note that though Mozilla and Netscape use the same format, they use
-    slightly different headers.  The class saves cookies using the Netscape
-    header by default (Mozilla can cope with that).
-
-    """
-    magic_re = "#( Netscape)? HTTP Cookie File"
-    header = """\
-    # Netscape HTTP Cookie File
-    # http://www.netscape.com/newsref/std/cookie_spec.html
-    # This is a generated file!  Do not edit.
-
-"""
-
-    def _really_load(self, f, filename, ignore_discard, ignore_expires):
-        now = time.time()
-
-        magic = f.readline()
-        if not re.search(self.magic_re, magic):
-            f.close()
-            raise LoadError(
-                "%s does not look like a Netscape format cookies file" %
-                filename)
-
-        try:
-            while 1:
-                line = f.readline()
-                if line == "": break
-
-                # last field may be absent, so keep any trailing tab
-                if line.endswith("\n"): line = line[:-1]
-
-                # skip comments and blank lines XXX what is $ for?
-                if (line.strip().startswith("#") or
-                    line.strip().startswith("$") or
-                    line.strip() == ""):
-                    continue
-
-                domain, domain_specified, path, secure, expires, name, value = \
-                    line.split("\t", 6)
-                secure = (secure == "TRUE")
-                domain_specified = (domain_specified == "TRUE")
-                if name == "":
-                    name = value
-                    value = None
-
-                initial_dot = domain.startswith(".")
-                if domain_specified != initial_dot:
-                    raise LoadError("domain and domain specified flag don't "
-                                    "match in %s: %s" % (filename, line))
-
-                discard = False
-                if expires == "":
-                    expires = None
-                    discard = True
-
-                # assume path_specified is false
-                c = Cookie(0, name, value,
-                           None, False,
-                           domain, domain_specified, initial_dot,
-                           path, False,
-                           secure,
-                           expires,
-                           discard,
-                           None,
-                           None,
-                           {})
-                if not ignore_discard and c.discard:
-                    continue
-                if not ignore_expires and c.is_expired(now):
-                    continue
-                self.set_cookie(c)
-
-        except:
-            reraise_unmasked_exceptions((IOError, LoadError))
-            raise LoadError("invalid Netscape format file %s: %s" %
-                            (filename, line))
-
-    def save(self, filename=None, ignore_discard=False, ignore_expires=False):
-        if filename is None:
-            if self.filename is not None: filename = self.filename
-            else: raise ValueError(MISSING_FILENAME_TEXT)
-
-        f = open(filename, "w")
-        try:
-            debug("Saving Netscape cookies.txt file")
-            f.write(self.header)
-            now = time.time()
-            for cookie in self:
-                if not ignore_discard and cookie.discard:
-                    debug("   Not saving %s: marked for discard", cookie.name)
-                    continue
-                if not ignore_expires and cookie.is_expired(now):
-                    debug("   Not saving %s: expired", cookie.name)
-                    continue
-                if cookie.secure: secure = "TRUE"
-                else: secure = "FALSE"
-                if cookie.domain.startswith("."): initial_dot = "TRUE"
-                else: initial_dot = "FALSE"
-                if cookie.expires is not None:
-                    expires = str(cookie.expires)
-                else:
-                    expires = ""
-                if cookie.value is None:
-                    # cookies.txt regards 'Set-Cookie: foo' as a cookie
-                    # with no name, whereas cookielib regards it as a
-                    # cookie with no value.
-                    name = ""
-                    value = cookie.name
-                else:
-                    name = cookie.name
-                    value = cookie.value
-                f.write(
-                    "\t".join([cookie.domain, initial_dot, cookie.path,
-                               secure, expires, name, value])+
-                    "\n")
-        finally:
-            f.close()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_msiecookiejar.py b/samples-and-tests/i-am-a-developer/mechanize/_msiecookiejar.py
deleted file mode 100644
index 1057811510..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_msiecookiejar.py
+++ /dev/null
@@ -1,388 +0,0 @@
-"""Microsoft Internet Explorer cookie loading on Windows.
-
-Copyright 2002-2003 Johnny Lee <typo_pl@hotmail.com> (MSIE Perl code)
-Copyright 2002-2006 John J Lee <jjl@pobox.com> (The Python port)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
-"""
-
-# XXX names and comments are not great here
-
-import os, re, time, struct, logging
-if os.name == "nt":
-    import _winreg
-
-from _clientcookie import FileCookieJar, CookieJar, Cookie, \
-     MISSING_FILENAME_TEXT, LoadError
-
-debug = logging.getLogger("mechanize").debug
-
-
-def regload(path, leaf):
-    key = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, path, 0,
-                          _winreg.KEY_ALL_ACCESS)
-    try:
-        value = _winreg.QueryValueEx(key, leaf)[0]
-    except WindowsError:
-        value = None
-    return value
-
-WIN32_EPOCH = 0x019db1ded53e8000L  # 1970 Jan 01 00:00:00 in Win32 FILETIME
-
-def epoch_time_offset_from_win32_filetime(filetime):
-    """Convert from win32 filetime to seconds-since-epoch value.
-
-    MSIE stores create and expire times as Win32 FILETIME, which is 64
-    bits of 100 nanosecond intervals since Jan 01 1601.
-
-    mechanize expects time in 32-bit value expressed in seconds since the
-    epoch (Jan 01 1970).
-
-    """
-    if filetime < WIN32_EPOCH:
-        raise ValueError("filetime (%d) is before epoch (%d)" %
-                         (filetime, WIN32_EPOCH))
-
-    return divmod((filetime - WIN32_EPOCH), 10000000L)[0]
-
-def binary_to_char(c): return "%02X" % ord(c)
-def binary_to_str(d): return "".join(map(binary_to_char, list(d)))
-
-class MSIEBase:
-    magic_re = re.compile(r"Client UrlCache MMF Ver \d\.\d.*")
-    padding = "\x0d\xf0\xad\x0b"
-
-    msie_domain_re = re.compile(r"^([^/]+)(/.*)$")
-    cookie_re = re.compile("Cookie\:.+\@([\x21-\xFF]+).*?"
-                           "(.+\@[\x21-\xFF]+\.txt)")
-
-    # path under HKEY_CURRENT_USER from which to get location of index.dat
-    reg_path = r"software\microsoft\windows" \
-               r"\currentversion\explorer\shell folders"
-    reg_key = "Cookies"
-
-    def __init__(self):
-        self._delayload_domains = {}
-
-    def _delayload_domain(self, domain):
-        # if necessary, lazily load cookies for this domain
-        delayload_info = self._delayload_domains.get(domain)
-        if delayload_info is not None:
-            cookie_file, ignore_discard, ignore_expires = delayload_info
-            try:
-                self.load_cookie_data(cookie_file,
-                                      ignore_discard, ignore_expires)
-            except (LoadError, IOError):
-                debug("error reading cookie file, skipping: %s", cookie_file)
-            else:
-                del self._delayload_domains[domain]
-
-    def _load_cookies_from_file(self, filename):
-        debug("Loading MSIE cookies file: %s", filename)
-        cookies = []
-
-        cookies_fh = open(filename)
-
-        try:
-            while 1:
-                key = cookies_fh.readline()
-                if key == "": break
-
-                rl = cookies_fh.readline
-                def getlong(rl=rl): return long(rl().rstrip())
-                def getstr(rl=rl): return rl().rstrip()
-
-                key = key.rstrip()
-                value = getstr()
-                domain_path = getstr()
-                flags = getlong()  # 0x2000 bit is for secure I think
-                lo_expire = getlong()
-                hi_expire = getlong()
-                lo_create = getlong()
-                hi_create = getlong()
-                sep = getstr()
-
-                if "" in (key, value, domain_path, flags, hi_expire, lo_expire,
-                          hi_create, lo_create, sep) or (sep != "*"):
-                    break
-
-                m = self.msie_domain_re.search(domain_path)
-                if m:
-                    domain = m.group(1)
-                    path = m.group(2)
-
-                    cookies.append({"KEY": key, "VALUE": value,
-                                    "DOMAIN": domain, "PATH": path,
-                                    "FLAGS": flags, "HIXP": hi_expire,
-                                    "LOXP": lo_expire, "HICREATE": hi_create,
-                                    "LOCREATE": lo_create})
-        finally:
-            cookies_fh.close()
-
-        return cookies
-
-    def load_cookie_data(self, filename,
-                         ignore_discard=False, ignore_expires=False):
-        """Load cookies from file containing actual cookie data.
-
-        Old cookies are kept unless overwritten by newly loaded ones.
-
-        You should not call this method if the delayload attribute is set.
-
-        I think each of these files contain all cookies for one user, domain,
-        and path.
-
-        filename: file containing cookies -- usually found in a file like
-         C:\WINNT\Profiles\joe\Cookies\joe@blah[1].txt
-
-        """
-        now = int(time.time())
-
-        cookie_data = self._load_cookies_from_file(filename)
-
-        for cookie in cookie_data:
-            flags = cookie["FLAGS"]
-            secure = ((flags & 0x2000) != 0)
-            filetime = (cookie["HIXP"] << 32) + cookie["LOXP"]
-            expires = epoch_time_offset_from_win32_filetime(filetime)
-            if expires < now:
-                discard = True
-            else:
-                discard = False
-            domain = cookie["DOMAIN"]
-            initial_dot = domain.startswith(".")
-            if initial_dot:
-                domain_specified = True
-            else:
-                # MSIE 5 does not record whether the domain cookie-attribute
-                # was specified.
-                # Assuming it wasn't is conservative, because with strict
-                # domain matching this will match less frequently; with regular
-                # Netscape tail-matching, this will match at exactly the same
-                # times that domain_specified = True would.  It also means we
-                # don't have to prepend a dot to achieve consistency with our
-                # own & Mozilla's domain-munging scheme.
-                domain_specified = False
-
-            # assume path_specified is false
-            # XXX is there other stuff in here? -- eg. comment, commentURL?
-            c = Cookie(0,
-                       cookie["KEY"], cookie["VALUE"],
-                       None, False,
-                       domain, domain_specified, initial_dot,
-                       cookie["PATH"], False,
-                       secure,
-                       expires,
-                       discard,
-                       None,
-                       None,
-                       {"flags": flags})
-            if not ignore_discard and c.discard:
-                continue
-            if not ignore_expires and c.is_expired(now):
-                continue
-            CookieJar.set_cookie(self, c)
-
-    def load_from_registry(self, ignore_discard=False, ignore_expires=False,
-                           username=None):
-        """
-        username: only required on win9x
-
-        """
-        cookies_dir = regload(self.reg_path, self.reg_key)
-        filename = os.path.normpath(os.path.join(cookies_dir, "INDEX.DAT"))
-        self.load(filename, ignore_discard, ignore_expires, username)
-
-    def _really_load(self, index, filename, ignore_discard, ignore_expires,
-                     username):
-        now = int(time.time())
-
-        if username is None:
-            username = os.environ['USERNAME'].lower()
-
-        cookie_dir = os.path.dirname(filename)
-
-        data = index.read(256)
-        if len(data) != 256:
-            raise LoadError("%s file is too short" % filename)
-
-        # Cookies' index.dat file starts with 32 bytes of signature
-        # followed by an offset to the first record, stored as a little-
-        # endian DWORD.
-        sig, size, data = data[:32], data[32:36], data[36:]
-        size = struct.unpack("<L", size)[0]
-
-        # check that sig is valid
-        if not self.magic_re.match(sig) or size != 0x4000:
-            raise LoadError("%s ['%s' %s] does not seem to contain cookies" %
-                          (str(filename), sig, size))
-
-        # skip to start of first record
-        index.seek(size, 0)
-
-        sector = 128  # size of sector in bytes
-
-        while 1:
-            data = ""
-
-            # Cookies are usually in two contiguous sectors, so read in two
-            # sectors and adjust if not a Cookie.
-            to_read = 2 * sector
-            d = index.read(to_read)
-            if len(d) != to_read:
-                break
-            data = data + d
-
-            # Each record starts with a 4-byte signature and a count
-            # (little-endian DWORD) of sectors for the record.
-            sig, size, data = data[:4], data[4:8], data[8:]
-            size = struct.unpack("<L", size)[0]
-
-            to_read = (size - 2) * sector
-
-##             from urllib import quote
-##             print "data", quote(data)
-##             print "sig", quote(sig)
-##             print "size in sectors", size
-##             print "size in bytes", size*sector
-##             print "size in units of 16 bytes", (size*sector) / 16
-##             print "size to read in bytes", to_read
-##             print
-
-            if sig != "URL ":
-                assert sig in ("HASH", "LEAK", \
-                               self.padding, "\x00\x00\x00\x00"), \
-                               "unrecognized MSIE index.dat record: %s" % \
-                               binary_to_str(sig)
-                if sig == "\x00\x00\x00\x00":
-                    # assume we've got all the cookies, and stop
-                    break
-                if sig == self.padding:
-                    continue
-                # skip the rest of this record
-                assert to_read >= 0
-                if size != 2:
-                    assert to_read != 0
-                    index.seek(to_read, 1)
-                continue
-
-            # read in rest of record if necessary
-            if size > 2:
-                more_data = index.read(to_read)
-                if len(more_data) != to_read: break
-                data = data + more_data
-
-            cookie_re = ("Cookie\:%s\@([\x21-\xFF]+).*?" % username +
-                         "(%s\@[\x21-\xFF]+\.txt)" % username)
-            m = re.search(cookie_re, data, re.I)
-            if m:
-                cookie_file = os.path.join(cookie_dir, m.group(2))
-                if not self.delayload:
-                    try:
-                        self.load_cookie_data(cookie_file,
-                                              ignore_discard, ignore_expires)
-                    except (LoadError, IOError):
-                        debug("error reading cookie file, skipping: %s",
-                              cookie_file)
-                else:
-                    domain = m.group(1)
-                    i = domain.find("/")
-                    if i != -1:
-                        domain = domain[:i]
-
-                    self._delayload_domains[domain] = (
-                        cookie_file, ignore_discard, ignore_expires)
-
-
-class MSIECookieJar(MSIEBase, FileCookieJar):
-    """FileCookieJar that reads from the Windows MSIE cookies database.
-
-    MSIECookieJar can read the cookie files of Microsoft Internet Explorer
-    (MSIE) for Windows version 5 on Windows NT and version 6 on Windows XP and
-    Windows 98.  Other configurations may also work, but are untested.  Saving
-    cookies in MSIE format is NOT supported.  If you save cookies, they'll be
-    in the usual Set-Cookie3 format, which you can read back in using an
-    instance of the plain old CookieJar class.  Don't save using the same
-    filename that you loaded cookies from, because you may succeed in
-    clobbering your MSIE cookies index file!
-
-    You should be able to have LWP share Internet Explorer's cookies like
-    this (note you need to supply a username to load_from_registry if you're on
-    Windows 9x or Windows ME):
-
-    cj = MSIECookieJar(delayload=1)
-    # find cookies index file in registry and load cookies from it
-    cj.load_from_registry()
-    opener = mechanize.build_opener(mechanize.HTTPCookieProcessor(cj))
-    response = opener.open("http://example.com/")
-
-    Iterating over a delayloaded MSIECookieJar instance will not cause any
-    cookies to be read from disk.  To force reading of all cookies from disk,
-    call read_all_cookies.  Note that the following methods iterate over self:
-    clear_temporary_cookies, clear_expired_cookies, __len__, __repr__, __str__
-    and as_string.
-
-    Additional methods:
-
-    load_from_registry(ignore_discard=False, ignore_expires=False,
-                       username=None)
-    load_cookie_data(filename, ignore_discard=False, ignore_expires=False)
-    read_all_cookies()
-
-    """
-    def __init__(self, filename=None, delayload=False, policy=None):
-        MSIEBase.__init__(self)
-        FileCookieJar.__init__(self, filename, delayload, policy)
-
-    def set_cookie(self, cookie):
-        if self.delayload:
-            self._delayload_domain(cookie.domain)
-        CookieJar.set_cookie(self, cookie)
-
-    def _cookies_for_request(self, request):
-        """Return a list of cookies to be returned to server."""
-        domains = self._cookies.copy()
-        domains.update(self._delayload_domains)
-        domains = domains.keys()
-
-        cookies = []
-        for domain in domains:
-            cookies.extend(self._cookies_for_domain(domain, request))
-        return cookies
-
-    def _cookies_for_domain(self, domain, request):
-        if not self._policy.domain_return_ok(domain, request):
-            return []
-        debug("Checking %s for cookies to return", domain)
-        if self.delayload:
-            self._delayload_domain(domain)
-        return CookieJar._cookies_for_domain(self, domain, request)
-
-    def read_all_cookies(self):
-        """Eagerly read in all cookies."""
-        if self.delayload:
-            for domain in self._delayload_domains.keys():
-                self._delayload_domain(domain)
-
-    def load(self, filename, ignore_discard=False, ignore_expires=False,
-             username=None):
-        """Load cookies from an MSIE 'index.dat' cookies index file.
-
-        filename: full path to cookie index file
-        username: only required on win9x
-
-        """
-        if filename is None:
-            if self.filename is not None: filename = self.filename
-            else: raise ValueError(MISSING_FILENAME_TEXT)
-
-        index = open(filename, "rb")
-
-        try:
-            self._really_load(index, filename, ignore_discard, ignore_expires,
-                              username)
-        finally:
-            index.close()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_opener.py b/samples-and-tests/i-am-a-developer/mechanize/_opener.py
index d94eacfa4e..a51c33a672 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_opener.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_opener.py
@@ -1,38 +1,36 @@
-"""Integration with Python standard library module urllib2: OpenerDirector
-class.
+"""URL opener.
 
 Copyright 2004-2006 John J Lee <jjl@pobox.com>
 
 This code is free software; you can redistribute it and/or modify it
 under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
+LICENSE included with the distribution).
 
 """
 
-import os, urllib2, bisect, httplib, types, tempfile
-try:
-    import threading as _threading
-except ImportError:
-    import dummy_threading as _threading
-try:
-    set
-except NameError:
-    import sets
-    set = sets.Set
-
-import _file
-import _http
-from _request import Request
-import _response
-import _rfc3986
-import _sockettimeout
-import _upgrade
-from _util import isstringlike
-
-
-class ContentTooShortError(urllib2.URLError):
+from __future__ import absolute_import
+
+import bisect
+import os
+import tempfile
+import threading
+
+from . import _response
+from . import _rfc3986
+from . import _sockettimeout
+from . import _urllib2_fork
+from ._request import Request
+from ._util import isstringlike
+from .polyglot import HTTPError, URLError, iteritems, is_class
+
+
+open_file = open
+
+
+class ContentTooShortError(URLError):
+
     def __init__(self, reason, result):
-        urllib2.URLError.__init__(self, reason)
+        URLError.__init__(self, reason)
         self.result = result
 
 
@@ -45,9 +43,10 @@ def set_request_attr(req, name, value, default):
         setattr(req, name, value)
 
 
-class OpenerDirector(urllib2.OpenerDirector):
+class OpenerDirector(_urllib2_fork.OpenerDirector):
+
     def __init__(self):
-        urllib2.OpenerDirector.__init__(self)
+        _urllib2_fork.OpenerDirector.__init__(self)
         # really none of these are (sanely) public -- the lack of initial
         # underscore on some is just due to following urllib2
         self.process_response = {}
@@ -58,6 +57,10 @@ def __init__(self):
         self._tempfiles = []
 
     def add_handler(self, handler):
+        if not hasattr(handler, "add_parent"):
+            raise TypeError("expected BaseHandler instance, got %r" %
+                            type(handler))
+
         if handler in self.handlers:
             return
         # XXX why does self.handlers need to be sorted?
@@ -95,11 +98,11 @@ def _maybe_reindex_handlers(self):
 
                 ii = meth.find("_")
                 scheme = meth[:ii]
-                condition = meth[ii+1:]
+                condition = meth[ii + 1:]
 
                 if condition.startswith("error"):
-                    jj = meth[ii+1:].find("_") + ii + 1
-                    kind = meth[jj+1:]
+                    jj = meth[ii + 1:].find("_") + ii + 1
+                    kind = meth[jj + 1:]
                     try:
                         kind = int(kind)
                     except ValueError:
@@ -129,14 +132,14 @@ def _maybe_reindex_handlers(self):
         # sort indexed methods
         # XXX could be cleaned up
         for lookup in [process_request, process_response]:
-            for scheme, handlers in lookup.iteritems():
+            for scheme, handlers in iteritems(lookup):
                 lookup[scheme] = handlers
-        for scheme, lookup in handle_error.iteritems():
-            for code, handlers in lookup.iteritems():
+        for scheme, lookup in iteritems(handle_error):
+            for code, handlers in iteritems(lookup):
                 handlers = list(handlers)
                 handlers.sort()
                 lookup[code] = handlers
-        for scheme, handlers in handle_open.iteritems():
+        for scheme, handlers in iteritems(handle_open):
             handlers = list(handlers)
             handlers.sort()
             handle_open[scheme] = handlers
@@ -154,7 +157,7 @@ def _request(self, url_or_req, data, visit,
         if isstringlike(url_or_req):
             req = Request(url_or_req, data, visit=visit, timeout=timeout)
         else:
-            # already a urllib2.Request or mechanize.Request instance
+            # already a mechanize.Request instance
             req = url_or_req
             if data is not None:
                 req.add_data(data)
@@ -179,15 +182,14 @@ def open(self, fullurl, data=None,
         request_processors = list(request_processors)
         request_processors.sort()
         for processor in request_processors:
-            for meth_name in ["any_request", req_scheme+"_request"]:
+            for meth_name in ["any_request", req_scheme + "_request"]:
                 meth = getattr(processor, meth_name, None)
                 if meth:
                     req = meth(req)
 
         # In Python >= 2.4, .open() supports processors already, so we must
         # call ._open() instead.
-        urlopen = getattr(urllib2.OpenerDirector, "_open",
-                          urllib2.OpenerDirector.open)
+        urlopen = _urllib2_fork.OpenerDirector._open
         response = urlopen(self, req, data)
 
         # post-process response
@@ -196,7 +198,7 @@ def open(self, fullurl, data=None,
         response_processors = list(response_processors)
         response_processors.sort()
         for processor in response_processors:
-            for meth_name in ["any_response", req_scheme+"_response"]:
+            for meth_name in ["any_response", req_scheme + "_response"]:
                 meth = getattr(processor, meth_name, None)
                 if meth:
                     response = meth(req, response)
@@ -206,7 +208,8 @@ def open(self, fullurl, data=None,
     def error(self, proto, *args):
         if proto in ['http', 'https']:
             # XXX http[s] protocols are special-cased
-            dict = self.handle_error['http'] # https is not different than http
+            # https is not different than http
+            dict = self.handle_error['http']
             proto = args[2]  # YUCK!
             meth_name = 'http_error_%s' % proto
             http_err = 1
@@ -216,17 +219,19 @@ def error(self, proto, *args):
             meth_name = proto + '_error'
             http_err = 0
         args = (dict, proto, meth_name) + args
-        result = apply(self._call_chain, args)
+        result = self._call_chain(*args)
         if result:
             return result
 
         if http_err:
             args = (dict, 'default', 'http_error_default') + orig_args
-            return apply(self._call_chain, args)
+            return self._call_chain(*args)
+
+    BLOCK_SIZE = 1024 * 8
 
-    BLOCK_SIZE = 1024*8
     def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
-                 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+                 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
+                 open=open_file):
         """Returns (filename, headers).
 
         For remote objects, the default filename will refer to a temporary
@@ -245,43 +250,44 @@ def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
         req = self._request(fullurl, data, False, timeout)
         scheme = req.get_type()
         fp = self.open(req)
-        headers = fp.info()
-        if filename is None and scheme == 'file':
-            # XXX req.get_selector() seems broken here, return None,
-            #   pending sanity :-/
-            return None, headers
-            #return urllib.url2pathname(req.get_selector()), headers
-        if filename:
-            tfp = open(filename, 'wb')
-        else:
-            path = _rfc3986.urlsplit(req.get_full_url())[2]
-            suffix = os.path.splitext(path)[1]
-            fd, filename = tempfile.mkstemp(suffix)
-            self._tempfiles.append(filename)
-            tfp = os.fdopen(fd, 'wb')
-
-        result = filename, headers
-        bs = self.BLOCK_SIZE
-        size = -1
-        read = 0
-        blocknum = 0
-        if reporthook:
-            if "content-length" in headers:
-                size = int(headers["Content-Length"])
-            reporthook(blocknum, bs, size)
-        while 1:
-            block = fp.read(bs)
-            if block == "":
-                break
-            read += len(block)
-            tfp.write(block)
-            blocknum += 1
-            if reporthook:
-                reporthook(blocknum, bs, size)
-        fp.close()
-        tfp.close()
-        del fp
-        del tfp
+        try:
+            headers = fp.info()
+            if filename is None and scheme == 'file':
+                # XXX req.get_selector() seems broken here, return None,
+                #   pending sanity :-/
+                return None, headers
+                # return urllib.url2pathname(req.get_selector()), headers
+            if filename:
+                tfp = open(filename, 'wb')
+            else:
+                path = _rfc3986.urlsplit(req.get_full_url())[2]
+                suffix = os.path.splitext(path)[1]
+                fd, filename = tempfile.mkstemp(suffix)
+                self._tempfiles.append(filename)
+                tfp = os.fdopen(fd, 'wb')
+            try:
+                result = filename, headers
+                bs = self.BLOCK_SIZE
+                size = -1
+                read = 0
+                blocknum = 0
+                if reporthook:
+                    if "content-length" in headers:
+                        size = int(headers["content-length"])
+                    reporthook(blocknum, bs, size)
+                while 1:
+                    block = fp.read(bs)
+                    if not block:
+                        break
+                    read += len(block)
+                    tfp.write(block)
+                    blocknum += 1
+                    if reporthook:
+                        reporthook(blocknum, bs, size)
+            finally:
+                tfp.close()
+        finally:
+            fp.close()
 
         # raise exception if actual size does not match content-length header
         if size >= 0 and read < size:
@@ -289,12 +295,12 @@ def retrieve(self, fullurl, filename=None, reporthook=None, data=None,
                 "retrieval incomplete: "
                 "got only %i out of %i bytes" % (read, size),
                 result
-                )
+            )
 
         return result
 
     def close(self):
-        urllib2.OpenerDirector.close(self)
+        _urllib2_fork.OpenerDirector.close(self)
 
         # make it very obvious this object is no longer supposed to be used
         self.open = self.error = self.retrieve = self.add_handler = None
@@ -313,7 +319,7 @@ def wrapped_open(urlopen, process_response_object, fullurl, data=None,
     success = True
     try:
         response = urlopen(fullurl, data, timeout)
-    except urllib2.HTTPError, error:
+    except HTTPError as error:
         success = False
         if error.fp is None:  # not a response
             raise
@@ -326,6 +332,7 @@ def wrapped_open(urlopen, process_response_object, fullurl, data=None,
         raise response
     return response
 
+
 class ResponseProcessingOpener(OpenerDirector):
 
     def open(self, fullurl, data=None,
@@ -341,6 +348,7 @@ def process_response_object(self, response):
 
 
 class SeekableResponseOpener(ResponseProcessingOpener):
+
     def process_response_object(self, response):
         return _response.seek_wrapped_response(response)
 
@@ -350,20 +358,18 @@ class OpenerFactory:
 
     default_classes = [
         # handlers
-        urllib2.ProxyHandler,
-        urllib2.UnknownHandler,
-        _http.HTTPHandler,  # derived from new AbstractHTTPHandler
-        _http.HTTPDefaultErrorHandler,
-        _http.HTTPRedirectHandler,  # bugfixed
-        urllib2.FTPHandler,
-        _file.FileHandler,
+        _urllib2_fork.ProxyHandler,
+        _urllib2_fork.UnknownHandler,
+        _urllib2_fork.HTTPHandler,
+        _urllib2_fork.HTTPDefaultErrorHandler,
+        _urllib2_fork.HTTPRedirectHandler,
+        _urllib2_fork.FTPHandler,
+        _urllib2_fork.FileHandler,
         # processors
-        _upgrade.HTTPRequestUpgradeProcessor,
-        _http.HTTPCookieProcessor,
-        _http.HTTPErrorProcessor,
-        ]
-    if hasattr(httplib, 'HTTPS'):
-        default_classes.append(_http.HTTPSHandler)
+        _urllib2_fork.HTTPCookieProcessor,
+        _urllib2_fork.HTTPErrorProcessor,
+    ]
+    default_classes.append(_urllib2_fork.HTTPSHandler)
     handlers = []
     replacement_handlers = []
 
@@ -382,22 +388,21 @@ def build_opener(self, *handlers):
         """
         opener = self.klass()
         default_classes = list(self.default_classes)
-        skip = []
+        skip = set()
         for klass in default_classes:
             for check in handlers:
-                if type(check) == types.ClassType:
+                if is_class(check):
                     if issubclass(check, klass):
-                        skip.append(klass)
-                elif type(check) == types.InstanceType:
-                    if isinstance(check, klass):
-                        skip.append(klass)
+                        skip.add(klass)
+                elif isinstance(check, klass):
+                    skip.add(klass)
         for klass in skip:
             default_classes.remove(klass)
 
         for klass in default_classes:
             opener.add_handler(klass())
         for h in handlers:
-            if type(h) == types.ClassType:
+            if is_class(h):
                 h = h()
             opener.add_handler(h)
 
@@ -406,31 +411,36 @@ def build_opener(self, *handlers):
 
 build_opener = OpenerFactory().build_opener
 
-_opener = None
-urlopen_lock = _threading.Lock()
+thread_local = threading.local()
+thread_local.opener = None
+
+
+def get_thread_local_opener():
+    try:
+        ans = thread_local.opener
+    except AttributeError:
+        # threading module is broken, use a single global instance
+        ans = getattr(get_thread_local_opener, 'ans', None)
+        if ans is None:
+            ans = get_thread_local_opener.ans = build_opener()
+    if ans is None:
+        ans = thread_local.opener = build_opener()
+    return ans
+
+
 def urlopen(url, data=None, timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
-    global _opener
-    if _opener is None:
-        urlopen_lock.acquire()
-        try:
-            if _opener is None:
-                _opener = build_opener()
-        finally:
-            urlopen_lock.release()
-    return _opener.open(url, data, timeout)
+    return get_thread_local_opener().open(url, data, timeout)
+
 
 def urlretrieve(url, filename=None, reporthook=None, data=None,
                 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
-    global _opener
-    if _opener is None:
-        urlopen_lock.acquire()
-        try:
-            if _opener is None:
-                _opener = build_opener()
-        finally:
-            urlopen_lock.release()
-    return _opener.retrieve(url, filename, reporthook, data, timeout)
+    return get_thread_local_opener().retrieve(
+        url, filename, reporthook, data, timeout)
+
 
 def install_opener(opener):
-    global _opener
-    _opener = opener
+    get_thread_local_opener.ans = opener
+    try:
+        thread_local.opener = opener
+    except AttributeError:
+        pass
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_pullparser.py b/samples-and-tests/i-am-a-developer/mechanize/_pullparser.py
deleted file mode 100644
index 4d8d9d37d9..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_pullparser.py
+++ /dev/null
@@ -1,390 +0,0 @@
-"""A simple "pull API" for HTML parsing, after Perl's HTML::TokeParser.
-
-Examples
-
-This program extracts all links from a document.  It will print one
-line for each link, containing the URL and the textual description
-between the <A>...</A> tags:
-
-import pullparser, sys
-f = file(sys.argv[1])
-p = pullparser.PullParser(f)
-for token in p.tags("a"):
-    if token.type == "endtag": continue
-    url = dict(token.attrs).get("href", "-")
-    text = p.get_compressed_text(endat=("endtag", "a"))
-    print "%s\t%s" % (url, text)
-
-This program extracts the <TITLE> from the document:
-
-import pullparser, sys
-f = file(sys.argv[1])
-p = pullparser.PullParser(f)
-if p.get_tag("title"):
-    title = p.get_compressed_text()
-    print "Title: %s" % title
-
-
-Copyright 2003-2006 John J. Lee <jjl@pobox.com>
-Copyright 1998-2001 Gisle Aas (original libwww-perl code)
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses.
-
-"""
-
-import re, htmlentitydefs
-import sgmllib, HTMLParser
-from xml.sax import saxutils
-
-from _html import unescape, unescape_charref
-
-
-class NoMoreTokensError(Exception): pass
-
-class Token:
-    """Represents an HTML tag, declaration, processing instruction etc.
-
-    Behaves as both a tuple-like object (ie. iterable) and has attributes
-    .type, .data and .attrs.
-
-    >>> t = Token("starttag", "a", [("href", "http://www.python.org/")])
-    >>> t == ("starttag", "a", [("href", "http://www.python.org/")])
-    True
-    >>> (t.type, t.data) == ("starttag", "a")
-    True
-    >>> t.attrs == [("href", "http://www.python.org/")]
-    True
-
-    Public attributes
-
-    type: one of "starttag", "endtag", "startendtag", "charref", "entityref",
-     "data", "comment", "decl", "pi", after the corresponding methods of
-     HTMLParser.HTMLParser
-    data: For a tag, the tag name; otherwise, the relevant data carried by the
-     tag, as a string
-    attrs: list of (name, value) pairs representing HTML attributes
-     (or None if token does not represent an opening tag)
-
-    """
-    def __init__(self, type, data, attrs=None):
-        self.type = type
-        self.data = data
-        self.attrs = attrs
-    def __iter__(self):
-        return iter((self.type, self.data, self.attrs))
-    def __eq__(self, other):
-        type, data, attrs = other
-        if (self.type == type and
-            self.data == data and
-            self.attrs == attrs):
-            return True
-        else:
-            return False
-    def __ne__(self, other): return not self.__eq__(other)
-    def __repr__(self):
-        args = ", ".join(map(repr, [self.type, self.data, self.attrs]))
-        return self.__class__.__name__+"(%s)" % args
-
-    def __str__(self):
-        """
-        >>> print Token("starttag", "br")
-        <br>
-        >>> print Token("starttag", "a",
-        ...     [("href", "http://www.python.org/"), ("alt", '"foo"')])
-        <a href="http://www.python.org/" alt='"foo"'>
-        >>> print Token("startendtag", "br")
-        <br />
-        >>> print Token("startendtag", "br", [("spam", "eggs")])
-        <br spam="eggs" />
-        >>> print Token("endtag", "p")
-        </p>
-        >>> print Token("charref", "38")
-        &#38;
-        >>> print Token("entityref", "amp")
-        &amp;
-        >>> print Token("data", "foo\\nbar")
-        foo
-        bar
-        >>> print Token("comment", "Life is a bowl\\nof cherries.")
-        <!--Life is a bowl
-        of cherries.-->
-        >>> print Token("decl", "decl")
-        <!decl>
-        >>> print Token("pi", "pi")
-        <?pi>
-        """
-        if self.attrs is not None:
-            attrs = "".join([" %s=%s" % (k, saxutils.quoteattr(v)) for
-                             k, v in self.attrs])
-        else:
-            attrs = ""
-        if self.type == "starttag":
-            return "<%s%s>" % (self.data, attrs)
-        elif self.type == "startendtag":
-            return "<%s%s />" % (self.data, attrs)
-        elif self.type == "endtag":
-            return "</%s>" % self.data
-        elif self.type == "charref":
-            return "&#%s;" % self.data
-        elif self.type == "entityref":
-            return "&%s;" % self.data
-        elif self.type == "data":
-            return self.data
-        elif self.type == "comment":
-            return "<!--%s-->" % self.data
-        elif self.type == "decl":
-            return "<!%s>" % self.data
-        elif self.type == "pi":
-            return "<?%s>" % self.data
-        assert False
-
-
-def iter_until_exception(fn, exception, *args, **kwds):
-    while 1:
-        try:
-            yield fn(*args, **kwds)
-        except exception:
-            raise StopIteration
-
-
-class _AbstractParser:
-    chunk = 1024
-    compress_re = re.compile(r"\s+")
-    def __init__(self, fh, textify={"img": "alt", "applet": "alt"},
-                 encoding="ascii", entitydefs=None):
-        """
-        fh: file-like object (only a .read() method is required) from which to
-         read HTML to be parsed
-        textify: mapping used by .get_text() and .get_compressed_text() methods
-         to represent opening tags as text
-        encoding: encoding used to encode numeric character references by
-         .get_text() and .get_compressed_text() ("ascii" by default)
-
-        entitydefs: mapping like {"amp": "&", ...} containing HTML entity
-         definitions (a sensible default is used).  This is used to unescape
-         entities in .get_text() (and .get_compressed_text()) and attribute
-         values.  If the encoding can not represent the character, the entity
-         reference is left unescaped.  Note that entity references (both
-         numeric - e.g. &#123; or &#xabc; - and non-numeric - e.g. &amp;) are
-         unescaped in attribute values and the return value of .get_text(), but
-         not in data outside of tags.  Instead, entity references outside of
-         tags are represented as tokens.  This is a bit odd, it's true :-/
-
-        If the element name of an opening tag matches a key in the textify
-        mapping then that tag is converted to text.  The corresponding value is
-        used to specify which tag attribute to obtain the text from.  textify
-        maps from element names to either:
-
-          - an HTML attribute name, in which case the HTML attribute value is
-            used as its text value along with the element name in square
-            brackets (eg."alt text goes here[IMG]", or, if the alt attribute
-            were missing, just "[IMG]")
-          - a callable object (eg. a function) which takes a Token and returns
-            the string to be used as its text value
-
-        If textify has no key for an element name, nothing is substituted for
-        the opening tag.
-
-        Public attributes:
-
-        encoding and textify: see above
-
-        """
-        self._fh = fh
-        self._tokenstack = []  # FIFO
-        self.textify = textify
-        self.encoding = encoding
-        if entitydefs is None:
-            entitydefs = htmlentitydefs.name2codepoint
-        self._entitydefs = entitydefs
-
-    def __iter__(self): return self
-
-    def tags(self, *names):
-        return iter_until_exception(self.get_tag, NoMoreTokensError, *names)
-
-    def tokens(self, *tokentypes):
-        return iter_until_exception(self.get_token, NoMoreTokensError,
-                                    *tokentypes)
-
-    def next(self):
-        try:
-            return self.get_token()
-        except NoMoreTokensError:
-            raise StopIteration()
-
-    def get_token(self, *tokentypes):
-        """Pop the next Token object from the stack of parsed tokens.
-
-        If arguments are given, they are taken to be token types in which the
-        caller is interested: tokens representing other elements will be
-        skipped.  Element names must be given in lower case.
-
-        Raises NoMoreTokensError.
-
-        """
-        while 1:
-            while self._tokenstack:
-                token = self._tokenstack.pop(0)
-                if tokentypes:
-                    if token.type in tokentypes:
-                        return token
-                else:
-                    return token
-            data = self._fh.read(self.chunk)
-            if not data:
-                raise NoMoreTokensError()
-            self.feed(data)
-
-    def unget_token(self, token):
-        """Push a Token back onto the stack."""
-        self._tokenstack.insert(0, token)
-
-    def get_tag(self, *names):
-        """Return the next Token that represents an opening or closing tag.
-
-        If arguments are given, they are taken to be element names in which the
-        caller is interested: tags representing other elements will be skipped.
-        Element names must be given in lower case.
-
-        Raises NoMoreTokensError.
-
-        """
-        while 1:
-            tok = self.get_token()
-            if tok.type not in ["starttag", "endtag", "startendtag"]:
-                continue
-            if names:
-                if tok.data in names:
-                    return tok
-            else:
-                return tok
-
-    def get_text(self, endat=None):
-        """Get some text.
-
-        endat: stop reading text at this tag (the tag is included in the
-         returned text); endtag is a tuple (type, name) where type is
-         "starttag", "endtag" or "startendtag", and name is the element name of
-         the tag (element names must be given in lower case)
-
-        If endat is not given, .get_text() will stop at the next opening or
-        closing tag, or when there are no more tokens (no exception is raised).
-        Note that .get_text() includes the text representation (if any) of the
-        opening tag, but pushes the opening tag back onto the stack.  As a
-        result, if you want to call .get_text() again, you need to call
-        .get_tag() first (unless you want an empty string returned when you
-        next call .get_text()).
-
-        Entity references are translated using the value of the entitydefs
-        constructor argument (a mapping from names to characters like that
-        provided by the standard module htmlentitydefs).  Named entity
-        references that are not in this mapping are left unchanged.
-
-        The textify attribute is used to translate opening tags into text: see
-        the class docstring.
-
-        """
-        text = []
-        tok = None
-        while 1:
-            try:
-                tok = self.get_token()
-            except NoMoreTokensError:
-                # unget last token (not the one we just failed to get)
-                if tok: self.unget_token(tok)
-                break
-            if tok.type == "data":
-                text.append(tok.data)
-            elif tok.type == "entityref":
-                t = unescape("&%s;"%tok.data, self._entitydefs, self.encoding)
-                text.append(t)
-            elif tok.type == "charref":
-                t = unescape_charref(tok.data, self.encoding)
-                text.append(t)
-            elif tok.type in ["starttag", "endtag", "startendtag"]:
-                tag_name = tok.data
-                if tok.type in ["starttag", "startendtag"]:
-                    alt = self.textify.get(tag_name)
-                    if alt is not None:
-                        if callable(alt):
-                            text.append(alt(tok))
-                        elif tok.attrs is not None:
-                            for k, v in tok.attrs:
-                                if k == alt:
-                                    text.append(v)
-                            text.append("[%s]" % tag_name.upper())
-                if endat is None or endat == (tok.type, tag_name):
-                    self.unget_token(tok)
-                    break
-        return "".join(text)
-
-    def get_compressed_text(self, *args, **kwds):
-        """
-        As .get_text(), but collapses each group of contiguous whitespace to a
-        single space character, and removes all initial and trailing
-        whitespace.
-
-        """
-        text = self.get_text(*args, **kwds)
-        text = text.strip()
-        return self.compress_re.sub(" ", text)
-
-    def handle_startendtag(self, tag, attrs):
-        self._tokenstack.append(Token("startendtag", tag, attrs))
-    def handle_starttag(self, tag, attrs):
-        self._tokenstack.append(Token("starttag", tag, attrs))
-    def handle_endtag(self, tag):
-        self._tokenstack.append(Token("endtag", tag))
-    def handle_charref(self, name):
-        self._tokenstack.append(Token("charref", name))
-    def handle_entityref(self, name):
-        self._tokenstack.append(Token("entityref", name))
-    def handle_data(self, data):
-        self._tokenstack.append(Token("data", data))
-    def handle_comment(self, data):
-        self._tokenstack.append(Token("comment", data))
-    def handle_decl(self, decl):
-        self._tokenstack.append(Token("decl", decl))
-    def unknown_decl(self, data):
-        # XXX should this call self.error instead?
-        #self.error("unknown declaration: " + `data`)
-        self._tokenstack.append(Token("decl", data))
-    def handle_pi(self, data):
-        self._tokenstack.append(Token("pi", data))
-
-    def unescape_attr(self, name):
-        return unescape(name, self._entitydefs, self.encoding)
-    def unescape_attrs(self, attrs):
-        escaped_attrs = []
-        for key, val in attrs:
-            escaped_attrs.append((key, self.unescape_attr(val)))
-        return escaped_attrs
-
-class PullParser(_AbstractParser, HTMLParser.HTMLParser):
-    def __init__(self, *args, **kwds):
-        HTMLParser.HTMLParser.__init__(self)
-        _AbstractParser.__init__(self, *args, **kwds)
-    def unescape(self, name):
-        # Use the entitydefs passed into constructor, not
-        # HTMLParser.HTMLParser's entitydefs.
-        return self.unescape_attr(name)
-
-class TolerantPullParser(_AbstractParser, sgmllib.SGMLParser):
-    def __init__(self, *args, **kwds):
-        sgmllib.SGMLParser.__init__(self)
-        _AbstractParser.__init__(self, *args, **kwds)
-    def unknown_starttag(self, tag, attrs):
-        attrs = self.unescape_attrs(attrs)
-        self._tokenstack.append(Token("starttag", tag, attrs))
-    def unknown_endtag(self, tag):
-        self._tokenstack.append(Token("endtag", tag))
-
-
-def _test():
-   import doctest, _pullparser
-   return doctest.testmod(_pullparser)
-
-if __name__ == "__main__":
-   _test()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_request.py b/samples-and-tests/i-am-a-developer/mechanize/_request.py
index 7824441840..9bc7f00ce7 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_request.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_request.py
@@ -4,23 +4,56 @@
 
 This code is free software; you can redistribute it and/or modify it
 under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
+LICENSE included with the distribution).
 
 """
 
-import urllib2, urllib, logging
+from __future__ import absolute_import
+import logging
 
-from _clientcookie import request_host_lc
-import _rfc3986
-import _sockettimeout
+from . import _rfc3986
+from . import _sockettimeout
+from . import _urllib2_fork
+from .polyglot import urlencode, is_string, unicode_type, iteritems
 
 warn = logging.getLogger("mechanize").warning
 
 
-class Request(urllib2.Request):
+def as_utf8(x):
+    if isinstance(x, bytes):
+        return x
+    if not is_string(x):
+        x = unicode_type(x)
+    if isinstance(x, unicode_type):
+        x = x.encode('utf-8')
+    return x
+
+
+class Request(_urllib2_fork.Request):
+
+    '''
+    A request for some network resource. Note that if you specify the method as
+    'GET' and the data as a dict, then it will be automatically appended to the
+    URL. If you leave method as None, then the method will be auto-set to
+    POST and the data will become part of the POST request.
+
+    :arg str url: The URL to request
+    :arg data: Data to send with this request. Can be either a dictionary
+        which will be encoded and sent as application/x-www-form-urlencoded
+        data or a bytestring which will be sent as is. If you use a bytestring
+        you should also set the Content-Type header appropriately.
+    :arg dict headers: Headers to send with this request
+    :arg str method: Method to use for HTTP requests. If not specified
+        mechanize will choose GET or POST automatically as appropriate.
+    :arg float timeout: Timeout in seconds
+
+    The remaining arguments are for internal use.
+    '''
+
     def __init__(self, url, data=None, headers={},
                  origin_req_host=None, unverifiable=False, visit=None,
-                 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
+                 timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
+                 method=None):
         # In mechanize 0.2, the interpretation of a unicode url argument will
         # change: A unicode url argument will be interpreted as an IRI, and a
         # bytestring as a URI. For now, we accept unicode or bytestring.  We
@@ -31,57 +64,17 @@ def __init__(self, url, data=None, headers={},
         if not _rfc3986.is_clean_uri(url):
             warn("url argument is not a URI "
                  "(contains illegal characters) %r" % url)
-        urllib2.Request.__init__(self, url, data, headers)
+        if isinstance(data, dict):
+            data = {as_utf8(k): as_utf8(v) for k, v in iteritems(data)}
+            data = urlencode(data)
+            data = data or None
+            if data and method == 'GET':
+                url += ('&' if '?' in url else '?') + data
+                data = None
+        _urllib2_fork.Request.__init__(self, url, data, headers, method=method)
         self.selector = None
-        self.unredirected_hdrs = {}
         self.visit = visit
         self.timeout = timeout
 
-        # All the terminology below comes from RFC 2965.
-        self.unverifiable = unverifiable
-        # Set request-host of origin transaction.
-        # The origin request-host is needed in order to decide whether
-        # unverifiable sub-requests (automatic redirects, images embedded
-        # in HTML, etc.) are to third-party hosts.  If they are, the
-        # resulting transactions might need to be conducted with cookies
-        # turned off.
-        if origin_req_host is None:
-            origin_req_host = request_host_lc(self)
-        self.origin_req_host = origin_req_host
-
-    def get_selector(self):
-        return urllib.splittag(self.__r_host)[0]
-
-    def get_origin_req_host(self):
-        return self.origin_req_host
-
-    def is_unverifiable(self):
-        return self.unverifiable
-
-    def add_unredirected_header(self, key, val):
-        """Add a header that will not be added to a redirected request."""
-        self.unredirected_hdrs[key.capitalize()] = val
-
-    def has_header(self, header_name):
-        """True iff request has named header (regular or unredirected)."""
-        return (header_name in self.headers or
-                header_name in self.unredirected_hdrs)
-
-    def get_header(self, header_name, default=None):
-        return self.headers.get(
-            header_name,
-            self.unredirected_hdrs.get(header_name, default))
-
-    def header_items(self):
-        hdrs = self.unredirected_hdrs.copy()
-        hdrs.update(self.headers)
-        return hdrs.items()
-
     def __str__(self):
         return "<Request for %s>" % self.get_full_url()
-
-    def get_method(self):
-        if self.has_data():
-            return "POST"
-        else:
-            return "GET"
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_response.py b/samples-and-tests/i-am-a-developer/mechanize/_response.py
index fad9b57116..48c793cb85 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_response.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_response.py
@@ -2,24 +2,27 @@
 
 The seek_wrapper code is not used if you're using UserAgent with
 .set_seekable_responses(False), or if you're using the urllib2-level interface
-without SeekableProcessor or HTTPEquivProcessor.  Class closeable_response is
-instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
-interface is only depended upon by Browser-level code.  Function
-upgrade_response is only used if you're using Browser or
-ResponseUpgradeProcessor.
+HTTPEquivProcessor.  Class closeable_response is instantiated by some handlers
+(AbstractHTTPHandler), but the closeable_response interface is only depended
+upon by Browser-level code.  Function upgrade_response is only used if you're
+using Browser.
 
 
 Copyright 2006 John J. Lee <jjl@pobox.com>
 
 This code is free software; you can redistribute it and/or modify it
-under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+under the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE
 included with the distribution).
 
 """
 
-import copy, mimetools
-from cStringIO import StringIO
-import urllib2
+from __future__ import absolute_import
+from functools import partial
+import copy
+from io import BytesIO
+
+from ._headersutil import normalize_header_name
+from .polyglot import HTTPError, create_response_info
 
 
 def len_of_seekable(file_):
@@ -38,13 +41,14 @@ def len_of_seekable(file_):
 # instead, but I think he's released his code publicly since, could pinch the
 # tests from it, at least...
 
+
 # For testing seek_wrapper invariant (note that
 # test_urllib2.HandlerTest.test_seekable is expected to fail when this
 # invariant checking is turned on).  The invariant checking is done by module
 # ipdc, which is available here:
 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
-## from ipdbc import ContractBase
-## class seek_wrapper(ContractBase):
+# from ipdbc import ContractBase
+# class seek_wrapper(ContractBase):
 class seek_wrapper:
     """Adds a seek method to a file object.
 
@@ -66,8 +70,9 @@ class seek_wrapper:
     particular file object.
 
     """
+
     # General strategy is to check that cache is full enough, then delegate to
-    # the cache (self.__cache, which is a cStringIO.StringIO instance).  A seek
+    # the cache (self.__cache, which is a BytesIO instance).  A seek
     # position (self.__pos) is maintained independently of the cache, in order
     # that a single cache may be shared between multiple seek_wrapper objects.
     # Copying using module copy shares the cache in this way.
@@ -77,7 +82,7 @@ def __init__(self, wrapped):
         self.__read_complete_state = [False]
         self.__is_closed_state = [False]
         self.__have_readline = hasattr(self.wrapped, "readline")
-        self.__cache = StringIO()
+        self.__cache = BytesIO()
         self.__pos = 0  # seek position
 
     def invariant(self):
@@ -112,17 +117,19 @@ def __setattr__(self, name, value):
             self.__dict__[name] = value
 
     def seek(self, offset, whence=0):
-        assert whence in [0,1,2]
+        assert whence in [0, 1, 2]
 
         # how much data, if any, do we need to read?
         if whence == 2:  # 2: relative to end of *wrapped* file
-            if offset < 0: raise ValueError("negative seek offset")
+            if offset < 0:
+                raise ValueError("negative seek offset")
             # since we don't know yet where the end of that file is, we must
             # read everything
             to_read = None
         else:
             if whence == 0:  # 0: absolute
-                if offset < 0: raise ValueError("negative seek offset")
+                if offset < 0:
+                    raise ValueError("negative seek offset")
                 dest = offset
             else:  # 1: relative to current position
                 pos = self.__pos
@@ -151,7 +158,7 @@ def seek(self, offset, whence=0):
                 # of .wrapped, since fseek() doesn't complain in that case.
                 # Also like fseek(), pretend we have seek()ed past the end,
                 # i.e. not:
-                #self.__pos = self.__cache.tell()
+                # self.__pos = self.__cache.tell()
                 # but rather:
                 self.__pos = dest
         else:
@@ -183,7 +190,7 @@ def read(self, size=-1):
         # enough data already cached?
         if size <= available and size != -1:
             self.__cache.seek(pos)
-            self.__pos = pos+size
+            self.__pos = pos + size
             return self.__cache.read(size)
 
         # no, so read sufficient data from wrapped file and cache it
@@ -224,10 +231,10 @@ def readline(self, size=-1):
         data = self.__cache.readline()
         if size != -1:
             r = data[:size]
-            self.__pos = pos+size
+            self.__pos = pos + size
         else:
             r = data
-            self.__pos = pos+len(data)
+            self.__pos = pos + len(data)
         return r
 
     def readlines(self, sizehint=-1):
@@ -240,11 +247,15 @@ def readlines(self, sizehint=-1):
         self.__pos = self.__cache.tell()
         return data
 
-    def __iter__(self): return self
-    def next(self):
+    def __iter__(self):
+        return self
+
+    def __next__(self):
         line = self.readline()
-        if line == "": raise StopIteration
+        if not line:
+            raise StopIteration
         return line
+    next = __next__
 
     xreadlines = __iter__
 
@@ -254,7 +265,6 @@ def __repr__(self):
 
 
 class response_seek_wrapper(seek_wrapper):
-
     """
     Supports copying response objects and setting response body data.
 
@@ -286,18 +296,30 @@ def set_data(self, data):
         self.seek(0)
         self.read()
         self.close()
-        cache = self._seek_wrapper__cache = StringIO()
+        cache = self._seek_wrapper__cache = BytesIO()
         cache.write(data)
         self.seek(0)
 
 
 class eoffile:
     # file-like object that always claims to be at end-of-file...
-    def read(self, size=-1): return ""
-    def readline(self, size=-1): return ""
-    def __iter__(self): return self
-    def next(self): return ""
-    def close(self): pass
+
+    def read(self, size=-1):
+        return b""
+
+    def readline(self, size=-1):
+        return b""
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        return b""
+    next = __next__
+
+    def close(self):
+        pass
+
 
 class eofresponse(eoffile):
     def __init__(self, url, headers, code, msg):
@@ -305,8 +327,12 @@ def __init__(self, url, headers, code, msg):
         self._headers = headers
         self.code = code
         self.msg = msg
-    def geturl(self): return self._url
-    def info(self): return self._headers
+
+    def geturl(self):
+        return self._url
+
+    def info(self):
+        return self._headers
 
 
 class closeable_response:
@@ -328,6 +354,7 @@ class closeable_response:
 
     .code
     .msg
+    .http_version
 
     Also supports pickling (but the stdlib currently does something to prevent
     it: http://python.org/sf/1144636).
@@ -336,74 +363,87 @@ class closeable_response:
     # presence of this attr indicates is useable after .close()
     closeable_response = None
 
-    def __init__(self, fp, headers, url, code, msg):
+    def __init__(
+            self, fp, headers, url, code=200, msg='OK', http_version=None):
         self._set_fp(fp)
         self._headers = headers
         self._url = url
         self.code = code
         self.msg = msg
+        self.http_version = http_version
 
     def _set_fp(self, fp):
         self.fp = fp
         self.read = self.fp.read
         self.readline = self.fp.readline
-        if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
+        if hasattr(self.fp, "readlines"):
+            self.readlines = self.fp.readlines
         if hasattr(self.fp, "fileno"):
             self.fileno = self.fp.fileno
         else:
             self.fileno = lambda: None
         self.__iter__ = self.fp.__iter__
-        self.next = self.fp.next
+        self.next = partial(next, self.fp)
 
     def __repr__(self):
-        return '<%s at %s whose fp = %r>' % (
-            self.__class__.__name__, hex(abs(id(self))), self.fp)
+        return '<%s at %s whose fp = %r>' % (self.__class__.__name__,
+                                             hex(abs(id(self))), self.fp)
 
     def info(self):
         return self._headers
 
+    def getcode(self):
+        return self.code
+
+    def get_header_values(self, name):
+        return self._headers.getheaders(name)
+
+    def get_all_header_names(self, normalize=True):
+        ans = []
+        for line in self._headers.headers:
+            h = line.partition(':')[0]
+            if normalize:
+                h = normalize_header_name(h)
+            ans.append(h)
+        return ans
+
+    def __getitem__(self, name):
+        return self._headers[name]
+
+    def get(self, name, default):
+        return self._headers.get(name)
+
     def geturl(self):
         return self._url
 
     def close(self):
         wrapped = self.fp
         wrapped.close()
-        new_wrapped = eofresponse(
-            self._url, self._headers, self.code, self.msg)
+        new_wrapped = eofresponse(self._url, self._headers, self.code,
+                                  self.msg)
         self._set_fp(new_wrapped)
 
-    def __getstate__(self):
-        # There are three obvious options here:
-        # 1. truncate
-        # 2. read to end
-        # 3. close socket, pickle state including read position, then open
-        #    again on unpickle and use Range header
-        # XXXX um, 4. refuse to pickle unless .close()d.  This is better,
-        #  actually ("errors should never pass silently").  Pickling doesn't
-        #  work anyway ATM, because of http://python.org/sf/1144636 so fix
-        #  this later
-
-        # 2 breaks pickle protocol, because one expects the original object
-        # to be left unscathed by pickling.  3 is too complicated and
-        # surprising (and too much work ;-) to happen in a sane __getstate__.
-        # So we do 1.
-
-        state = self.__dict__.copy()
-        new_wrapped = eofresponse(
-            self._url, self._headers, self.code, self.msg)
-        state["wrapped"] = new_wrapped
-        return state
-
-def test_response(data='test data', headers=[],
-                  url="http://example.com/", code=200, msg="OK"):
-    return make_response(data, headers, url, code, msg)
-
-def test_html_response(data='test data', headers=[],
-                       url="http://example.com/", code=200, msg="OK"):
-    headers += [("Content-type", "text/html")]
-    return make_response(data, headers, url, code, msg)
-
-def make_response(data, headers, url, code, msg):
+
+def test_response(data='test data',
+                  headers=(),
+                  url=None,
+                  code=200,
+                  msg="OK"):
+    return make_response(data, list(headers), url, code, msg)
+
+
+_html_header = [("Content-type", "text/html")]
+
+
+def test_html_response(data='test data',
+                       headers=(),
+                       url=None,
+                       code=200,
+                       msg="OK"):
+    return make_response(data, list(headers) + _html_header, url, code, msg)
+
+
+def make_response(data, headers, url=None, code=200, msg="OK"):
     """Convenient factory for objects implementing response interface.
 
     data: string containing response body data
@@ -414,7 +454,9 @@ def make_response(data, headers, url, code, msg):
 
     """
     mime_headers = make_headers(headers)
-    r = closeable_response(StringIO(data), mime_headers, url, code, msg)
+    if not isinstance(data, bytes):
+        data = data.encode('utf-8')
+    r = closeable_response(BytesIO(data), mime_headers, url, code, msg)
     return response_seek_wrapper(r)
 
 
@@ -425,24 +467,31 @@ def make_headers(headers):
     hdr_text = []
     for name_value in headers:
         hdr_text.append("%s: %s" % name_value)
-    return mimetools.Message(StringIO("\n".join(hdr_text)))
+    ans = "\n".join(hdr_text)
+    if not isinstance(ans, bytes):
+        ans = ans.encode('iso-8859-1')
+    return create_response_info(BytesIO(ans))
 
 
 # Rest of this module is especially horrible, but needed, at least until fork
 # urllib2.  Even then, may want to preseve urllib2 compatibility.
 
+
 def get_seek_wrapper_class(response):
     # in order to wrap response objects that are also exceptions, we must
     # dynamically subclass the exception :-(((
-    if (isinstance(response, urllib2.HTTPError) and
-        not hasattr(response, "seek")):
+    if (
+            isinstance(response, HTTPError) and
+            not isinstance(response, seek_wrapper)
+    ):
         if response.__class__.__module__ == "__builtin__":
             exc_class_name = response.__class__.__name__
         else:
-            exc_class_name = "%s.%s" % (
-                response.__class__.__module__, response.__class__.__name__)
+            exc_class_name = "%s.%s" % (response.__class__.__module__,
+                                        response.__class__.__name__)
 
-        class httperror_seek_wrapper(response_seek_wrapper, response.__class__):
+        class httperror_seek_wrapper(response_seek_wrapper,
+                                     response.__class__):
             # this only derives from HTTPError in order to be a subclass --
             # the HTTPError behaviour comes from delegation
 
@@ -455,32 +504,42 @@ def __init__(self, wrapped):
                 self.filename = wrapped.geturl()
 
             def __repr__(self):
-                return (
-                    "<%s (%s instance) at %s "
-                    "whose wrapped object = %r>" % (
-                    self.__class__.__name__, self._exc_class_name,
-                    hex(abs(id(self))), self.wrapped)
-                    )
+                return ("<%s (%s instance) at %s "
+                        "whose wrapped object = %r>" %
+                        (self.__class__.__name__, self._exc_class_name,
+                         hex(abs(id(self))), self.wrapped))
+
         wrapper_class = httperror_seek_wrapper
     else:
         wrapper_class = response_seek_wrapper
     return wrapper_class
 
+
+def needs_seek_wrapper(obj):
+    return (
+            not isinstance(obj, seek_wrapper) and (
+                hasattr(obj, 'seek') or isinstance(obj, HTTPError)
+                or not hasattr(obj, 'get_data')
+                )
+            )
+
+
 def seek_wrapped_response(response):
     """Return a copy of response that supports seekable response interface.
 
     Accepts responses from both mechanize and urllib2 handlers.
 
-    Copes with both oridinary response instances and HTTPError instances (which
+    Copes with both ordinary response instances and HTTPError instances (which
     can't be simply wrapped due to the requirement of preserving the exception
     base class).
     """
-    if not hasattr(response, "seek"):
+    if needs_seek_wrapper(response):
         wrapper_class = get_seek_wrapper_class(response)
         response = wrapper_class(response)
     assert hasattr(response, "get_data")
     return response
 
+
 def upgrade_response(response):
     """Return a copy of response that supports Browser response interface.
 
@@ -496,7 +555,7 @@ def upgrade_response(response):
     """
     wrapper_class = get_seek_wrapper_class(response)
     if hasattr(response, "closeable_response"):
-        if not hasattr(response, "seek"):
+        if needs_seek_wrapper(response):
             response = wrapper_class(response)
         assert hasattr(response, "get_data")
         return copy.copy(response)
@@ -519,8 +578,9 @@ def upgrade_response(response):
     if get_data:
         data = get_data()
 
-    response = closeable_response(
-        response.fp, response.info(), response.geturl(), code, msg)
+    response = closeable_response(response.fp,
+                                  response.info(), response.geturl(), code,
+                                  msg)
     response = wrapper_class(response)
     if data:
         response.set_data(data)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_rfc3986.py b/samples-and-tests/i-am-a-developer/mechanize/_rfc3986.py
index 1bb5021b7f..fd13785442 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_rfc3986.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_rfc3986.py
@@ -5,42 +5,49 @@
 Copyright 2006 John J. Lee <jjl@pobox.com>
 
 This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE
 included with the distribution).
 
 """
 
 # XXX Wow, this is ugly.  Overly-direct translation of the RFC ATM.
 
-import re, urllib
+from __future__ import absolute_import
+import re
 
-## def chr_range(a, b):
-##     return "".join(map(chr, range(ord(a), ord(b)+1)))
+from .polyglot import quote
+# def chr_range(a, b):
+# return "".join(map(chr, range(ord(a), ord(b)+1)))
 
-## UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-##                         "abcdefghijklmnopqrstuvwxyz"
-##                         "0123456789"
-##                         "-_.~")
-## RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
-## URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
+# UNRESERVED_URI_CHARS = ("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+# "abcdefghijklmnopqrstuvwxyz"
+# "0123456789"
+# "-_.~")
+# RESERVED_URI_CHARS = "!*'();:@&=+$,/?#[]"
+# URI_CHARS = RESERVED_URI_CHARS+UNRESERVED_URI_CHARS+'%'
 # this re matches any character that's not in URI_CHARS
-BAD_URI_CHARS_RE = re.compile("[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
+BAD_URI_CHARS_RE = re.compile(r"[^A-Za-z0-9\-_.~!*'();:@&=+$,/?%#[\]]")
 
 
-def clean_url(url, encoding):
+def clean_url(url, encoding='utf-8'):
     # percent-encode illegal URI characters
     # Trying to come up with test cases for this gave me a headache, revisit
     # when do switch to unicode.
     # Somebody else's comments (lost the attribution):
-##     - IE will return you the url in the encoding you send it
-##     - Mozilla/Firefox will send you latin-1 if there's no non latin-1
-##     characters in your link. It will send you utf-8 however if there are...
-    if type(url) == type(""):
+    # - IE will return you the url in the encoding you send it
+    # - Mozilla/Firefox will send you latin-1 if there's no non latin-1
+    # characters in your link. It will send you utf-8 however if there are...
+    is_unicode = not isinstance(url, bytes)
+    if not is_unicode:
         url = url.decode(encoding, "replace")
     url = url.strip()
     # for second param to urllib.quote(), we want URI_CHARS, minus the
     # 'always_safe' characters that urllib.quote() never percent-encodes
-    return urllib.quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
+    ans = quote(url.encode(encoding), "!*'();:@&=+$,/?%#[]~")
+    if is_unicode and isinstance(ans, bytes):
+        ans = ans.decode(encoding)
+    return ans
+
 
 def is_clean_uri(uri):
     """
@@ -64,6 +71,8 @@ def is_clean_uri(uri):
 
 SPLIT_MATCH = re.compile(
     r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?").match
+
+
 def urlsplit(absolute_uri):
     """Return scheme, authority, path, query, fragment."""
     match = SPLIT_MATCH(absolute_uri)
@@ -71,6 +80,7 @@ def urlsplit(absolute_uri):
         g = match.groups()
         return g[1], g[3], g[4], g[6], g[8]
 
+
 def urlunsplit(parts):
     scheme, authority, path, query, fragment = parts
     r = []
@@ -90,38 +100,44 @@ def urlunsplit(parts):
         append(fragment)
     return "".join(r)
 
+
 def urljoin(base_uri, uri_reference):
+    """Join a base URI with a URI reference and return the resulting URI.
+
+    See RFC 3986.
+    """
     return urlunsplit(urljoin_parts(urlsplit(base_uri),
                                     urlsplit(uri_reference)))
 
 # oops, this doesn't do the same thing as the literal translation
 # from the RFC below
-## import posixpath
-## def urljoin_parts(base_parts, reference_parts):
-##     scheme, authority, path, query, fragment = base_parts
-##     rscheme, rauthority, rpath, rquery, rfragment = reference_parts
-
-##     # compute target URI path
-##     if rpath == "":
-##         tpath = path
-##     else:
-##         tpath = rpath
-##         if not tpath.startswith("/"):
-##             tpath = merge(authority, path, tpath)
-##         tpath = posixpath.normpath(tpath)
-
-##     if rscheme is not None:
-##         return (rscheme, rauthority, tpath, rquery, rfragment)
-##     elif rauthority is not None:
-##         return (scheme, rauthority, tpath, rquery, rfragment)
-##     elif rpath == "":
-##         if rquery is not None:
-##             tquery = rquery
-##         else:
-##             tquery = query
-##         return (scheme, authority, tpath, tquery, rfragment)
-##     else:
-##         return (scheme, authority, tpath, rquery, rfragment)
+# import posixpath
+# def urljoin_parts(base_parts, reference_parts):
+#     scheme, authority, path, query, fragment = base_parts
+#     rscheme, rauthority, rpath, rquery, rfragment = reference_parts
+
+# compute target URI path
+# if rpath == "":
+#         tpath = path
+# else:
+#         tpath = rpath
+# if not tpath.startswith("/"):
+#             tpath = merge(authority, path, tpath)
+#         tpath = posixpath.normpath(tpath)
+
+# if rscheme is not None:
+# return (rscheme, rauthority, tpath, rquery, rfragment)
+# elif rauthority is not None:
+# return (scheme, rauthority, tpath, rquery, rfragment)
+# elif rpath == "":
+# if rquery is not None:
+#             tquery = rquery
+# else:
+#             tquery = query
+# return (scheme, authority, tpath, tquery, rfragment)
+# else:
+# return (scheme, authority, tpath, rquery, rfragment)
+
 
 def urljoin_parts(base_parts, reference_parts):
     scheme, authority, path, query, fragment = base_parts
@@ -159,21 +175,21 @@ def urljoin_parts(base_parts, reference_parts):
 # um, something *vaguely* like this is what I want, but I have to generate
 # lots of test cases first, if only to understand what it is that
 # remove_dot_segments really does...
-## def remove_dot_segments(path):
-##     if path == '':
-##         return ''
-##     comps = path.split('/')
-##     new_comps = []
-##     for comp in comps:
-##         if comp in ['.', '']:
-##             if not new_comps or new_comps[-1]:
-##                 new_comps.append('')
-##             continue
-##         if comp != '..':
-##             new_comps.append(comp)
-##         elif new_comps:
-##             new_comps.pop()
-##     return '/'.join(new_comps)
+# def remove_dot_segments(path):
+# if path == '':
+# return ''
+#     comps = path.split('/')
+#     new_comps = []
+# for comp in comps:
+# if comp in ['.', '']:
+# if not new_comps or new_comps[-1]:
+# new_comps.append('')
+# continue
+# if comp != '..':
+# new_comps.append(comp)
+# elif new_comps:
+# new_comps.pop()
+# return '/'.join(new_comps)
 
 
 def remove_dot_segments(path):
@@ -224,18 +240,20 @@ def remove_dot_segments(path):
         path = path[ii:]
     return "".join(r)
 
+
 def merge(base_authority, base_path, ref_path):
     # XXXX Oddly, the sample Perl implementation of this by Roy Fielding
     # doesn't even take base_authority as a parameter, despite the wording in
     # the RFC suggesting otherwise.  Perhaps I'm missing some obvious identity.
-    #if base_authority is not None and base_path == "":
+    # if base_authority is not None and base_path == "":
     if base_path == "":
         return "/" + ref_path
     ii = base_path.rfind("/")
     if ii >= 0:
-        return base_path[:ii+1] + ref_path
+        return base_path[:ii + 1] + ref_path
     return ref_path
 
+
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_seek.py b/samples-and-tests/i-am-a-developer/mechanize/_seek.py
deleted file mode 100644
index 4086d52792..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_seek.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from urllib2 import BaseHandler
-from _util import deprecation
-from _response import response_seek_wrapper
-
-
-class SeekableProcessor(BaseHandler):
-    """Deprecated: Make responses seekable."""
-
-    def __init__(self):
-        deprecation(
-            "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
-
-    def any_response(self, request, response):
-        if not hasattr(response, "seek"):
-            return response_seek_wrapper(response)
-        return response
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_sockettimeout.py b/samples-and-tests/i-am-a-developer/mechanize/_sockettimeout.py
index c22b7346a0..8c7c5ba2f6 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_sockettimeout.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_sockettimeout.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 import socket
 
 try:
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_testcase.py b/samples-and-tests/i-am-a-developer/mechanize/_testcase.py
index a13cca3087..eb1c35617e 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_testcase.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_testcase.py
@@ -1,10 +1,11 @@
+from __future__ import absolute_import
+import os
 import shutil
 import tempfile
 import unittest
 
 
 class SetupStack(object):
-
     def __init__(self):
         self._on_teardown = []
 
@@ -17,7 +18,6 @@ def tear_down(self):
 
 
 class TearDownConvenience(object):
-
     def __init__(self, setup_stack=None):
         self._own_setup_stack = setup_stack is None
         if setup_stack is None:
@@ -31,43 +31,108 @@ def tear_down(self):
 
 
 class TempDirMaker(TearDownConvenience):
+    def make_temp_dir(self, dir_=None):
+        temp_dir = tempfile.mkdtemp(
+            prefix="tmp-%s-" % self.__class__.__name__, dir=dir_)
 
-    def make_temp_dir(self):
-        temp_dir = tempfile.mkdtemp(prefix="tmp-%s-" % self.__class__.__name__)
         def tear_down():
             shutil.rmtree(temp_dir)
+
         self._setup_stack.add_teardown(tear_down)
         return temp_dir
 
 
 class MonkeyPatcher(TearDownConvenience):
 
+    Unset = object()
+
     def monkey_patch(self, obj, name, value):
         orig_value = getattr(obj, name)
         setattr(obj, name, value)
+
         def reverse_patch():
             setattr(obj, name, orig_value)
+
         self._setup_stack.add_teardown(reverse_patch)
 
+    def _set_environ(self, env, name, value):
+        if value is self.Unset:
+            try:
+                del env[name]
+            except KeyError:
+                pass
+        else:
+            env[name] = value
 
-class TestCase(unittest.TestCase):
+    def monkey_patch_environ(self, name, value, env=os.environ):
+        orig_value = env.get(name, self.Unset)
+        self._set_environ(env, name, value)
+
+        def reverse_patch():
+            self._set_environ(env, name, orig_value)
+
+        self._setup_stack.add_teardown(reverse_patch)
+
+
+class FixtureFactory(object):
+    def __init__(self):
+        self._setup_stack = SetupStack()
+        self._context_managers = {}
+        self._fixtures = {}
+
+    def register_context_manager(self, name, context_manager):
+        self._context_managers[name] = context_manager
+
+    def get_fixture(self, name, add_teardown):
+        context_manager = self._context_managers[name]
+        fixture = context_manager.__enter__()
+        add_teardown(lambda: context_manager.__exit__(None, None, None))
+        return fixture
+
+    def get_cached_fixture(self, name):
+        fixture = self._fixtures.get(name)
+        if fixture is None:
+            fixture = self.get_fixture(name, self._setup_stack.add_teardown)
+            self._fixtures[name] = fixture
+        return fixture
+
+    def tear_down(self):
+        self._setup_stack.tear_down()
 
+
+class TestCase(unittest.TestCase):
     def setUp(self):
         self._setup_stack = SetupStack()
+        self._monkey_patcher = MonkeyPatcher(self._setup_stack)
 
     def tearDown(self):
         self._setup_stack.tear_down()
 
+    def register_context_manager(self, name, context_manager):
+        return self.fixture_factory.register_context_manager(name,
+                                                             context_manager)
+
+    def get_fixture(self, name):
+        return self.fixture_factory.get_fixture(name, self.add_teardown)
+
+    def get_cached_fixture(self, name):
+        return self.fixture_factory.get_cached_fixture(name)
+
+    def add_teardown(self, *args, **kwds):
+        self._setup_stack.add_teardown(*args, **kwds)
+
     def make_temp_dir(self, *args, **kwds):
         return TempDirMaker(self._setup_stack).make_temp_dir(*args, **kwds)
 
     def monkey_patch(self, *args, **kwds):
-        return MonkeyPatcher(self._setup_stack).monkey_patch(*args, **kwds)
+        return self._monkey_patcher.monkey_patch(*args, **kwds)
+
+    def monkey_patch_environ(self, *args, **kwds):
+        return self._monkey_patcher.monkey_patch_environ(*args, **kwds)
 
     def assert_contains(self, container, containee):
-        self.assertTrue(containee in container, "%r not in %r" %
-                        (containee, container))
+        self.assertTrue(containee in container,
+                        "%r not in %r" % (containee, container))
 
     def assert_less_than(self, got, expected):
-        self.assertTrue(got < expected, "%r >= %r" %
-                        (got, expected))
+        self.assertTrue(got < expected, "%r >= %r" % (got, expected))
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_upgrade.py b/samples-and-tests/i-am-a-developer/mechanize/_upgrade.py
deleted file mode 100644
index df59c01021..0000000000
--- a/samples-and-tests/i-am-a-developer/mechanize/_upgrade.py
+++ /dev/null
@@ -1,40 +0,0 @@
-from urllib2 import BaseHandler
-
-from _request import Request
-from _response import upgrade_response
-from _util import deprecation
-
-
-class HTTPRequestUpgradeProcessor(BaseHandler):
-    # upgrade urllib2.Request to this module's Request
-    # yuck!
-    handler_order = 0  # before anything else
-
-    def http_request(self, request):
-        if not hasattr(request, "add_unredirected_header"):
-            newrequest = Request(request.get_full_url(), request.data,
-                                 request.headers)
-            try: newrequest.origin_req_host = request.origin_req_host
-            except AttributeError: pass
-            try: newrequest.unverifiable = request.unverifiable
-            except AttributeError: pass
-            try: newrequest.visit = request.visit
-            except AttributeError: pass
-            request = newrequest
-        return request
-
-    https_request = http_request
-
-
-class ResponseUpgradeProcessor(BaseHandler):
-    # upgrade responses to be .close()able without becoming unusable
-    handler_order = 0  # before anything else
-
-    def __init__(self):
-        deprecation(
-            "See http://wwwsearch.sourceforge.net/mechanize/doc.html#seekable")
-
-    def any_response(self, request, response):
-        if not hasattr(response, 'closeable_response'):
-            response = upgrade_response(response)
-        return response
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_urllib2.py b/samples-and-tests/i-am-a-developer/mechanize/_urllib2.py
index cbb761bec1..a0bac95d45 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_urllib2.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_urllib2.py
@@ -1,55 +1,64 @@
+from __future__ import absolute_import
 # urllib2 work-alike interface
 # ...from urllib2...
-from urllib2 import \
-     URLError, \
-     HTTPError, \
-     BaseHandler, \
-     UnknownHandler, \
-     FTPHandler, \
-     CacheFTPHandler
+from .polyglot import HTTPError, URLError
+
 # ...and from mechanize
-from _auth import \
-     HTTPPasswordMgr, \
-     HTTPPasswordMgrWithDefaultRealm, \
-     AbstractBasicAuthHandler, \
-     AbstractDigestAuthHandler, \
-     HTTPProxyPasswordMgr, \
-     ProxyHandler, \
-     ProxyBasicAuthHandler, \
-     ProxyDigestAuthHandler, \
-     HTTPBasicAuthHandler, \
-     HTTPDigestAuthHandler, \
-     HTTPSClientCertMgr
-from _debug import \
-     HTTPResponseDebugProcessor, \
-     HTTPRedirectDebugProcessor
-from _file import \
-     FileHandler
+from ._auth import HTTPProxyPasswordMgr, HTTPSClientCertMgr
+from ._debug import HTTPRedirectDebugProcessor, HTTPResponseDebugProcessor
+from ._http import (HTTPEquivProcessor, HTTPRefererProcessor,
+                    HTTPRefreshProcessor, HTTPRobotRulesProcessor,
+                    RobotExclusionError)
+from ._opener import (OpenerDirector, SeekableResponseOpener, build_opener,
+                      install_opener, urlopen)
+from ._request import Request
 # crap ATM
-## from _gzip import \
-##      HTTPGzipProcessor
-from _http import \
-     HTTPHandler, \
-     HTTPDefaultErrorHandler, \
-     HTTPRedirectHandler, \
-     HTTPEquivProcessor, \
-     HTTPCookieProcessor, \
-     HTTPRefererProcessor, \
-     HTTPRefreshProcessor, \
-     HTTPErrorProcessor, \
-     HTTPRobotRulesProcessor, \
-     RobotExclusionError
-import httplib
-if hasattr(httplib, 'HTTPS'):
-    from _http import HTTPSHandler
-del httplib
-from _opener import OpenerDirector, \
-     SeekableResponseOpener, \
-     build_opener, install_opener, urlopen
-from _request import \
-     Request
-from _seek import \
-     SeekableProcessor
-from _upgrade import \
-     HTTPRequestUpgradeProcessor, \
-     ResponseUpgradeProcessor
+# from _gzip import \
+# HTTPGzipProcessor
+from ._urllib2_fork import (
+    AbstractBasicAuthHandler, AbstractDigestAuthHandler, BaseHandler,
+    CacheFTPHandler, FileHandler, FTPHandler, HTTPBasicAuthHandler,
+    HTTPCookieProcessor, HTTPDefaultErrorHandler, HTTPDigestAuthHandler,
+    HTTPErrorProcessor, HTTPHandler, HTTPPasswordMgr,
+    HTTPPasswordMgrWithDefaultRealm, HTTPRedirectHandler, HTTPSHandler,
+    ProxyBasicAuthHandler, ProxyDigestAuthHandler, ProxyHandler,
+    UnknownHandler)
+__all__ = [
+    'URLError',
+    'HTTPError',
+    'HTTPSClientCertMgr',
+    'HTTPProxyPasswordMgr',
+    'HTTPRedirectDebugProcessor',
+    'HTTPRobotRulesProcessor',
+    'HTTPResponseDebugProcessor',
+    'HTTPRefreshProcessor',
+    'HTTPRefererProcessor',
+    'HTTPEquivProcessor',
+    'RobotExclusionError',
+    'OpenerDirector',
+    'build_opener',
+    'SeekableResponseOpener',
+    'install_opener',
+    'urlopen',
+    'Request',
+    'HTTPHandler',
+    'HTTPSHandler',
+    'AbstractBasicAuthHandler',
+    'ProxyHandler',
+    'HTTPDefaultErrorHandler',
+    'ProxyDigestAuthHandler',
+    'HTTPDigestAuthHandler',
+    'FTPHandler',
+    'HTTPPasswordMgrWithDefaultRealm',
+    'CacheFTPHandler',
+    'HTTPErrorProcessor',
+    'AbstractDigestAuthHandler',
+    'HTTPRedirectHandler',
+    'UnknownHandler',
+    'HTTPCookieProcessor',
+    'BaseHandler',
+    'HTTPBasicAuthHandler',
+    'ProxyBasicAuthHandler',
+    'HTTPPasswordMgr',
+    'FileHandler',
+]
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_urllib2_fork.py b/samples-and-tests/i-am-a-developer/mechanize/_urllib2_fork.py
new file mode 100644
index 0000000000..147538c3d1
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/_urllib2_fork.py
@@ -0,0 +1,1553 @@
+"""Fork of urllib2.
+
+When reading this, don't assume that all code in here is reachable.  Code in
+the rest of mechanize may be used instead.
+
+Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Python
+Software Foundation; All Rights Reserved
+
+Copyright 2002-2009 John J Lee <jjl@pobox.com>
+
+This code is free software; you can redistribute it and/or modify it
+under the terms of the BSD or ZPL 2.1 licenses (see the file
+LICENSE included with the distribution).
+
+"""
+
+# XXX issues:
+# If an authentication error handler that tries to perform
+# authentication for some reason but fails, how should the error be
+# signalled?  The client needs to know the HTTP error code.  But if
+# the handler knows that the problem was, e.g., that it didn't know
+# that hash algo that requested in the challenge, it would be good to
+# pass that information along to the client, too.
+# ftp errors aren't handled cleanly
+# check digest against correct (i.e. non-apache) implementation
+
+# Possible extensions:
+# complex proxies  XXX not sure what exactly was meant by this
+# abstract factory for opener
+
+from __future__ import absolute_import
+
+import base64
+import bisect
+import copy
+import hashlib
+import logging
+import os
+import platform
+import posixpath
+import re
+import socket
+import sys
+import time
+from collections import OrderedDict
+from functools import partial
+from io import BufferedReader, BytesIO
+
+from . import _rfc3986
+from ._clientcookie import CookieJar
+from ._headersutil import normalize_header_name
+from ._response import closeable_response
+from .polyglot import (HTTPConnection, HTTPError, HTTPSConnection, URLError,
+                       as_unicode, create_response_info, ftpwrapper,
+                       getproxies, is_class, is_mapping, is_py2, is_string,
+                       iteritems, map, raise_with_traceback, splitattr,
+                       splitpasswd, splitport, splittype, splituser,
+                       splitvalue, unquote, unwrap, url2pathname,
+                       urllib_proxy_bypass, urllib_splithost, urlparse,
+                       urlsplit, urlunparse)
+
+
+def sha1_digest(data):
+    if not isinstance(data, bytes):
+        data = data.encode('utf-8')
+    return hashlib.sha1(data).hexdigest()
+
+
+def md5_digest(data):
+    if not isinstance(data, bytes):
+        data = data.encode('utf-8')
+    return hashlib.md5(data).hexdigest()
+
+
+if platform.python_implementation() == 'PyPy':
+    def create_readline_wrapper(fh):
+        fh.recv = fh.read
+        if not hasattr(fh, '_drop'):
+            fh._drop = lambda: None
+            fh._reuse = lambda: None
+        return socket._fileobject(fh, close=True)
+else:
+    def create_readline_wrapper(fh):
+        fh.recv = fh.read
+        if is_py2:
+            ans = socket._fileobject(fh, close=True)
+        else:
+            fh.recv_into = fh.readinto
+            fh._decref_socketios = lambda: None
+            ans = BufferedReader(socket.SocketIO(fh, 'r'))
+        return ans
+
+
+splithost = urllib_splithost
+
+
+# used in User-Agent header sent
+__version__ = sys.version[:3]
+
+_opener = None
+
+
+def urlopen(url, data=None):
+    global _opener
+    if _opener is None:
+        _opener = build_opener()
+    return _opener._open(url, data)
+
+
+def install_opener(opener):
+    global _opener
+    _opener = opener
+
+
+# copied from cookielib.py
+_cut_port_re = re.compile(r":\d+$")
+
+
+def request_host(request):
+    """Return request-host, as defined by RFC 2965.
+
+    Variation from RFC: returned value is lowercased, for convenient
+    comparison.
+
+    """
+    url = request.get_full_url()
+    host = urlparse(url)[1]
+    if host == "":
+        host = request.get_header("Host", "")
+
+    # remove port, if present
+    host = _cut_port_re.sub("", host, 1)
+    return host.lower()
+
+
+PERCENT_RE = re.compile(b"%[a-fA-F0-9]{2}")
+ZONE_ID_CHARS = set(bytearray(
+    b"ABCDEFGHIJKLMNOPQRSTUVWXYZ" b"abcdefghijklmnopqrstuvwxyz" b"0123456789._!-"
+))
+USERINFO_CHARS = ZONE_ID_CHARS | set(bytearray(b"$&'()*+,;=:"))
+PATH_CHARS = USERINFO_CHARS | set(bytearray(b'@/~'))
+QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {ord(b"?")}
+
+
+def fix_invalid_bytes_in_url_component(component, allowed_chars=PATH_CHARS):
+    if not component:
+        return component
+    is_bytes = isinstance(component, bytes)
+    if not is_bytes:
+        component = component.encode('utf-8', 'surrogatepass')
+    percent_encodings = PERCENT_RE.findall(component)
+    for enc in percent_encodings:
+        if not enc.isupper():
+            component = component.replace(enc, enc.upper())
+    is_percent_encoded = len(percent_encodings) == component.count(b"%")
+    encoded_component = bytearray()
+    percent = ord('%')
+    for byte_ord in bytearray(component):
+        if (is_percent_encoded and byte_ord == percent) or (byte_ord < 128 and byte_ord in allowed_chars):
+            encoded_component.append(byte_ord)
+            continue
+        encoded_component.extend(b"%" + (hex(byte_ord)[2:].encode().zfill(2).upper()))
+    encoded_component = bytes(encoded_component)
+    if not is_bytes:
+        encoded_component = encoded_component.decode('utf-8')
+    return encoded_component
+
+
+def normalize_url(url):
+    parsed = urlparse(url)
+    netloc = parsed.netloc
+    if not isinstance(netloc, bytes) and netloc:
+        def safe_encode(label):
+            try:
+                return label.encode('idna').decode('ascii')
+            except ValueError:
+                return label.encode('ascii', 'replace').decode('ascii')
+        netloc = u'.'.join(map(safe_encode, netloc.split(u'.')))
+
+    return urlunparse(parsed._replace(
+        path=fix_invalid_bytes_in_url_component(parsed.path), netloc=netloc,
+        query=fix_invalid_bytes_in_url_component(parsed.query, QUERY_CHARS),
+        fragment=fix_invalid_bytes_in_url_component(parsed.fragment, FRAGMENT_CHARS),
+    ))
+
+
+class Request:
+
+    def __init__(self, url, data=None, headers={},
+                 origin_req_host=None, unverifiable=False, method=None):
+        # unwrap('<URL:type://host/path>') --> 'type://host/path'
+        self.__original = normalize_url(unwrap(url))
+        self.type = None
+        self._method = method and str(method)
+        # self.__r_type is what's left after doing the splittype
+        self.host = None
+        self.port = None
+        self._tunnel_host = None
+        self.data = data
+        self.headers = OrderedDict()
+        for key, value in iteritems(headers):
+            self.add_header(key, value)
+        self.unredirected_hdrs = OrderedDict()
+        if origin_req_host is None:
+            origin_req_host = request_host(self)
+        self.origin_req_host = origin_req_host
+        self.unverifiable = unverifiable
+        try:
+            self.get_host()  # in py3 cookiejar expect self.host to be not None
+        except Exception:
+            self.host = None
+
+    def __getattr__(self, attr):
+        # XXX this is a fallback mechanism to guard against these
+        # methods getting called in a non-standard order.  this may be
+        # too complicated and/or unnecessary.
+        # XXX should the __r_XXX attributes be public?
+        if attr[:12] == '_Request__r_':
+            name = attr[12:]
+            if hasattr(Request, 'get_' + name):
+                getattr(self, 'get_' + name)()
+                return getattr(self, attr)
+        raise AttributeError(attr)
+
+    def get_method(self):
+        ' The method used for HTTP requests '
+        if self._method is None:
+            return "POST" if self.has_data() else 'GET'
+        return self._method
+
+    # XXX these helper methods are lame
+
+    def set_data(self, data):
+        ' Set the data (a bytestring) to be sent with this request '
+        self.data = data
+    add_data = set_data
+
+    def has_data(self):
+        ' True iff there is some data to be sent with this request '
+        return self.data is not None
+
+    def get_data(self):
+        ' The data to be sent with this request '
+        return self.data
+
+    def get_full_url(self):
+        return self.__original
+
+    @property
+    def full_url(self):
+        # In python 3 this is a deleteable and settable property, which when
+        # deleted gets set to None. But this interface does not seem to be used
+        # by any stdlib code, so this should be sufficient.
+        return self.__original
+
+    def get_type(self):
+        if self.type is None:
+            self.type, self.__r_type = splittype(self.__original)
+            if self.type is None:
+                raise ValueError("unknown url type: %s" % self.__original)
+        return self.type
+
+    def get_host(self):
+        if self.host is None:
+            self.host, self.__r_host = splithost(self.__r_type)
+            if self.host:
+                self.host = unquote(self.host)
+        return self.host
+
+    def get_selector(self):
+        scheme, authority, path, query, fragment = _rfc3986.urlsplit(
+            self.__r_host)
+        if path == "":
+            path = "/"  # RFC 2616, section 3.2.2
+        fragment = None  # RFC 3986, section 3.5
+        return _rfc3986.urlunsplit([scheme, authority, path, query, fragment])
+
+    def set_proxy(self, host, type):
+        orig_host = self.get_host()
+        if self.get_type() == 'https' and not self._tunnel_host:
+            self._tunnel_host = orig_host
+        else:
+            self.type = type
+            self.__r_host = self.__original
+
+        self.host = host
+
+    def has_proxy(self):
+        """Private method."""
+        # has non-HTTPS proxy
+        return self.__r_host == self.__original
+
+    def get_origin_req_host(self):
+        return self.origin_req_host
+
+    def is_unverifiable(self):
+        return self.unverifiable
+
+    def add_header(self, key, val=None):
+        ''' Add the specified header, replacing existing one, if needed. If val
+        is None, remove the header. '''
+        # useful for something like authentication
+        key = normalize_header_name(key)
+        if val is None:
+            self.headers.pop(key, None)
+        else:
+            self.headers[key] = val
+
+    def add_unredirected_header(self, key, val):
+        ''' Same as :meth:`add_header()` except that this header will not
+        be sent for redirected requests. '''
+        key = normalize_header_name(key)
+        if val is None:
+            self.unredirected_hdrs.pop(key, None)
+        else:
+            self.unredirected_hdrs[key] = val
+
+    def has_header(self, header_name):
+        ''' Check if the specified header is present '''
+        header_name = normalize_header_name(header_name)
+        return (header_name in self.headers or
+                header_name in self.unredirected_hdrs)
+
+    def get_header(self, header_name, default=None):
+        ''' Get the value of the specified header. If absent, return `default`
+        '''
+        header_name = normalize_header_name(header_name)
+        return self.headers.get(
+            header_name,
+            self.unredirected_hdrs.get(header_name, default))
+
+    def header_items(self):
+        ''' Get a copy of all headers for this request as a list of 2-tuples
+        '''
+        hdrs = self.unredirected_hdrs.copy()
+        hdrs.update(self.headers)
+        return list(iteritems(hdrs))
+
+
+class OpenerDirector(object):
+
+    def __init__(self):
+        client_version = "Python-urllib/%s" % __version__
+        self.addheaders = [('User-agent', client_version)]
+        self.finalize_request_headers = None
+        # manage the individual handlers
+        self.handlers = []
+        self.handle_open = {}
+        self.handle_error = {}
+        self.process_response = {}
+        self.process_request = {}
+
+    def add_handler(self, handler):
+        if not hasattr(handler, "add_parent"):
+            raise TypeError("expected BaseHandler instance, got %r" %
+                            type(handler))
+
+        added = False
+        for meth in dir(handler):
+            if meth in ["redirect_request", "do_open", "proxy_open"]:
+                # oops, coincidental match
+                continue
+
+            i = meth.find("_")
+            protocol = meth[:i]
+            condition = meth[i + 1:]
+
+            if condition.startswith("error"):
+                j = condition.find("_") + i + 1
+                kind = meth[j + 1:]
+                try:
+                    kind = int(kind)
+                except ValueError:
+                    pass
+                lookup = self.handle_error.get(protocol, {})
+                self.handle_error[protocol] = lookup
+            elif condition == "open":
+                kind = protocol
+                lookup = self.handle_open
+            elif condition == "response":
+                kind = protocol
+                lookup = self.process_response
+            elif condition == "request":
+                kind = protocol
+                lookup = self.process_request
+            else:
+                continue
+
+            handlers = lookup.setdefault(kind, [])
+            if handlers:
+                bisect.insort(handlers, handler)
+            else:
+                handlers.append(handler)
+            added = True
+
+        if added:
+            # the handlers must work in an specific order, the order
+            # is specified in a Handler attribute
+            bisect.insort(self.handlers, handler)
+            handler.add_parent(self)
+
+    def close(self):
+        # Only exists for backwards compatibility.
+        pass
+
+    def _call_chain(self, chain, kind, meth_name, *args):
+        # Handlers raise an exception if no one else should try to handle
+        # the request, or return None if they can't but another handler
+        # could.  Otherwise, they return the response.
+        handlers = chain.get(kind, ())
+        for handler in handlers:
+            func = getattr(handler, meth_name)
+
+            result = func(*args)
+            if result is not None:
+                return result
+
+    def _open(self, req, data=None):
+        result = self._call_chain(self.handle_open, 'default',
+                                  'default_open', req)
+        if result:
+            return result
+
+        protocol = req.get_type()
+        result = self._call_chain(self.handle_open, protocol, protocol +
+                                  '_open', req)
+        if result:
+            return result
+
+        return self._call_chain(self.handle_open, 'unknown',
+                                'unknown_open', req)
+
+    def error(self, proto, *args):
+        if proto in ('http', 'https'):
+            # XXX http[s] protocols are special-cased
+            # https is not different than http
+            dict = self.handle_error['http']
+            proto = args[2]  # YUCK!
+            meth_name = 'http_error_%s' % proto
+            http_err = 1
+            orig_args = args
+        else:
+            dict = self.handle_error
+            meth_name = proto + '_error'
+            http_err = 0
+        args = (dict, proto, meth_name) + args
+        result = self._call_chain(*args)
+        if result:
+            return result
+
+        if http_err:
+            args = (dict, 'default', 'http_error_default') + orig_args
+            return self._call_chain(*args)
+
+# XXX probably also want an abstract factory that knows when it makes
+# sense to skip a superclass in favor of a subclass and when it might
+# make sense to include both
+
+
+def build_opener(*handlers):
+    """Create an opener object from a list of handlers.
+
+    The opener will use several default handlers, including support
+    for HTTP, FTP and when applicable, HTTPS.
+
+    If any of the handlers passed as arguments are subclasses of the
+    default handlers, the default handlers will not be used.
+    """
+    opener = OpenerDirector()
+    default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
+                       HTTPDefaultErrorHandler, HTTPRedirectHandler,
+                       FTPHandler, FileHandler, HTTPErrorProcessor]
+    default_classes.append(HTTPSHandler)
+    skip = set()
+    for klass in default_classes:
+        for check in handlers:
+            if is_class(check):
+                if issubclass(check, klass):
+                    skip.add(klass)
+            elif isinstance(check, klass):
+                skip.add(klass)
+    for klass in skip:
+        default_classes.remove(klass)
+
+    for klass in default_classes:
+        opener.add_handler(klass())
+
+    for h in handlers:
+        if is_class(h):
+            h = h()
+        opener.add_handler(h)
+    return opener
+
+
+class BaseHandler:
+    handler_order = 500
+
+    def add_parent(self, parent):
+        self.parent = parent
+
+    def close(self):
+        # Only exists for backwards compatibility
+        pass
+
+    def __lt__(self, other):
+        return self.handler_order < getattr(
+                other, 'handler_order', sys.maxsize)
+
+    def __copy__(self):
+        return self.__class__()
+
+
+class HTTPErrorProcessor(BaseHandler):
+    """Process HTTP error responses.
+
+    The purpose of this handler is to to allow other response processors a
+    look-in by removing the call to parent.error() from
+    AbstractHTTPHandler.
+
+    For non-2xx error codes, this just passes the job on to the
+    Handler.<proto>_error_<code> methods, via the OpenerDirector.error method.
+    Eventually, HTTPDefaultErrorHandler will raise an HTTPError if no other
+    handler handles the error.
+
+    """
+    handler_order = 1000  # after all other processors
+
+    def http_response(self, request, response):
+        code, msg, hdrs = response.code, response.msg, response.info()
+
+        # According to RFC 2616, "2xx" code indicates that the client's
+        # request was successfully received, understood, and accepted.
+        if not (200 <= code < 300):
+            # hardcoded http is NOT a bug
+            response = self.parent.error(
+                'http', request, response, code, msg, hdrs)
+
+        return response
+
+    https_response = http_response
+
+
+class HTTPDefaultErrorHandler(BaseHandler):
+
+    def http_error_default(self, req, fp, code, msg, hdrs):
+        # why these error methods took the code, msg, headers args in the first
+        # place rather than a response object, I don't know, but to avoid
+        # multiple wrapping, we're discarding them
+
+        if isinstance(fp, HTTPError):
+            response = fp
+        else:
+            response = HTTPError(
+                req.get_full_url(), code, msg, hdrs, fp)
+        assert code == response.code
+        assert msg == response.msg
+        assert hdrs == response.hdrs
+        raise response
+
+
+class HTTPRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+
+    # Implementation notes:
+
+    # To avoid the server sending us into an infinite loop, the request
+    # object needs to track what URLs we have already seen.  Do this by
+    # adding a handler-specific attribute to the Request object.  The value
+    # of the dict is used to count the number of times the same URL has
+    # been visited.  This is needed because visiting the same URL twice
+    # does not necessarily imply a loop, thanks to state introduced by
+    # cookies.
+
+    # Always unhandled redirection codes:
+    # 300 Multiple Choices: should not handle this here.
+    # 304 Not Modified: no need to handle here: only of interest to caches
+    #     that do conditional GETs
+    # 305 Use Proxy: probably not worth dealing with here
+    # 306 Unused: what was this for in the previous versions of protocol??
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        from ._request import Request
+        m = req.get_method()
+        if (code in (301, 302, 303, 307, "refresh") and m in ("GET", "HEAD")
+                or code in (301, 302, 303, "refresh") and m == "POST"):
+            # Strictly (according to RFC 2616), 301 or 302 in response
+            # to a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we do
+            # the same.
+            # TODO: really refresh redirections should be visiting; tricky to
+            # fix
+            new = Request(
+                newurl,
+                headers=req.headers,
+                origin_req_host=req.get_origin_req_host(),
+                unverifiable=True,
+                visit=False,
+                timeout=req.timeout)
+            new._origin_req = getattr(req, "_origin_req", req)
+            return new
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    def http_error_302(self, req, fp, code, msg, headers):
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if 'location' in headers:
+            newurl = headers.getheaders('location')[0]
+        elif 'uri' in headers:
+            newurl = headers.getheaders('uri')[0]
+        else:
+            return
+        newurl = _rfc3986.clean_url(newurl)
+        newurl = _rfc3986.urljoin(req.get_full_url(), newurl)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(req, fp, code, msg, headers, newurl)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                    len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.
+        fp.read()
+        fp.close()
+
+        return self.parent.open(new)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+    http_error_refresh = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+
+
+def _parse_proxy(proxy):
+    """Return (scheme, user, password, host/port) given a URL or an authority.
+
+    If a URL is supplied, it must have an authority (host:port) component.
+    According to RFC 3986, having an authority component means the URL must
+    have two slashes after the scheme:
+
+    >>> _parse_proxy('file:/ftp.example.com/')
+    Traceback (most recent call last):
+    ValueError: proxy URL with no authority: 'file:/ftp.example.com/'
+
+    The first three items of the returned tuple may be None.
+
+    Examples of authority parsing:
+
+    >>> _parse_proxy('proxy.example.com')
+    (None, None, None, 'proxy.example.com')
+    >>> _parse_proxy('proxy.example.com:3128')
+    (None, None, None, 'proxy.example.com:3128')
+
+    The authority component may optionally include userinfo (assumed to be
+    username:password):
+
+    >>> _parse_proxy('joe:password@proxy.example.com')
+    (None, 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('joe:password@proxy.example.com:3128')
+    (None, 'joe', 'password', 'proxy.example.com:3128')
+
+    Same examples, but with URLs instead:
+
+    >>> _parse_proxy('http://proxy.example.com/')
+    ('http', None, None, 'proxy.example.com')
+    >>> _parse_proxy('http://proxy.example.com:3128/')
+    ('http', None, None, 'proxy.example.com:3128')
+    >>> _parse_proxy('http://joe:password@proxy.example.com/')
+    ('http', 'joe', 'password', 'proxy.example.com')
+    >>> _parse_proxy('http://joe:password@proxy.example.com:3128')
+    ('http', 'joe', 'password', 'proxy.example.com:3128')
+
+    Everything after the authority is ignored:
+
+    >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')
+    ('ftp', 'joe', 'password', 'proxy.example.com')
+
+    Test for no trailing '/' case:
+
+    >>> _parse_proxy('http://joe:password@proxy.example.com')
+    ('http', 'joe', 'password', 'proxy.example.com')
+
+    """
+    scheme, r_scheme = splittype(proxy)
+    if not r_scheme.startswith("/"):
+        # authority
+        scheme = None
+        authority = proxy
+    else:
+        # URL
+        if not r_scheme.startswith("//"):
+            raise ValueError("proxy URL with no authority: %r" % proxy)
+        # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
+        # and 3.3.), path is empty or starts with '/'
+        end = r_scheme.find("/", 2)
+        if end == -1:
+            end = None
+        authority = r_scheme[2:end]
+    userinfo, hostport = splituser(authority)
+    if userinfo is not None:
+        user, password = splitpasswd(userinfo)
+    else:
+        user = password = None
+    return scheme, user, password, hostport
+
+
+class ProxyHandler(BaseHandler):
+    # Proxies must be in front
+    handler_order = 100
+
+    def __init__(self, proxies=None, proxy_bypass=None):
+        if proxies is None:
+            proxies = getproxies()
+
+        assert is_mapping(proxies), "proxies must be a mapping"
+        self.proxies = proxies
+        for type, url in iteritems(proxies):
+            setattr(self, '%s_open' % type,
+                    lambda r, proxy=url, type=type, meth=self.proxy_open:
+                    meth(r, proxy, type))
+        if proxy_bypass is None:
+            proxy_bypass = urllib_proxy_bypass
+        self._proxy_bypass = proxy_bypass
+
+    def proxy_open(self, req, proxy, type):
+        orig_type = req.get_type()
+        proxy_type, user, password, hostport = _parse_proxy(proxy)
+
+        if proxy_type is None:
+            proxy_type = orig_type
+
+        if req.get_host() and self._proxy_bypass(req.get_host()):
+            return None
+
+        if user and password:
+            user_pass = '%s:%s' % (unquote(user), unquote(password))
+            if not isinstance(user_pass, bytes):
+                user_pass = user_pass.encode('utf-8')
+            creds = base64.b64encode(user_pass).strip()
+            if isinstance(creds, bytes):
+                creds = creds.decode('ascii')
+            req.add_header('Proxy-authorization', 'Basic ' + creds)
+        hostport = unquote(hostport)
+        req.set_proxy(hostport, proxy_type)
+        if orig_type == proxy_type or orig_type == 'https':
+            # let other handlers take care of it
+            return None
+        else:
+            # need to start over, because the other handlers don't
+            # grok the proxy's URL type
+            # e.g. if we have a constructor arg proxies like so:
+            # {'http': 'ftp://proxy.example.com'}, we may end up turning
+            # a request for http://acme.example.com/a into one for
+            # ftp://proxy.example.com/a
+            return self.parent.open(req)
+
+    def __copy__(self):
+        return ProxyHandler(self.proxies.copy(), self._proxy_bypass)
+
+
+class HTTPPasswordMgr:
+
+    def __init__(self):
+        self.passwd = {}
+
+    def add_password(self, realm, uri, user, passwd):
+        # uri could be a single URI or a sequence
+        if is_string(uri):
+            uri = [uri]
+        if realm not in self.passwd:
+            self.passwd[realm] = {}
+        for default_port in True, False:
+            reduced_uri = tuple(
+                [self.reduce_uri(u, default_port) for u in uri])
+            self.passwd[realm][reduced_uri] = (user, passwd)
+
+    def find_user_password(self, realm, authuri):
+        domains = self.passwd.get(realm, {})
+        for default_port in True, False:
+            reduced_authuri = self.reduce_uri(authuri, default_port)
+            for uris, authinfo in iteritems(domains):
+                for uri in uris:
+                    if self.is_suburi(uri, reduced_authuri):
+                        return authinfo
+        return None, None
+
+    def reduce_uri(self, uri, default_port=True):
+        """Accept authority or URI and extract only the authority and path."""
+        # note HTTP URLs do not have a userinfo component
+        parts = urlsplit(uri)
+        if parts[1]:
+            # URI
+            scheme = parts[0]
+            authority = parts[1]
+            path = parts[2] or '/'
+        else:
+            # host or host:port
+            scheme = None
+            authority = uri
+            path = '/'
+        host, port = splitport(authority)
+        if default_port and port is None and scheme is not None:
+            dport = {"http": 80,
+                     "https": 443,
+                     }.get(scheme)
+            if dport is not None:
+                authority = "%s:%d" % (host, dport)
+        return authority, path
+
+    def is_suburi(self, base, test):
+        """Check if test is below base in a URI tree
+
+        Both args must be URIs in reduced form.
+        """
+        if base == test:
+            return True
+        if base[0] != test[0]:
+            return False
+        common = posixpath.commonprefix((base[1], test[1]))
+        if len(common) == len(base[1]):
+            return True
+        return False
+
+    def __copy__(self):
+        ans = self.__class__()
+        ans.passwd = copy.deepcopy(self.passwd)
+        return ans
+
+
+class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
+
+    def find_user_password(self, realm, authuri):
+        user, password = HTTPPasswordMgr.find_user_password(self, realm,
+                                                            authuri)
+        if user is not None:
+            return user, password
+        return HTTPPasswordMgr.find_user_password(self, None, authuri)
+
+
+class AbstractBasicAuthHandler:
+
+    # XXX this allows for multiple auth-schemes, but will stupidly pick
+    # the last one with a realm specified.
+
+    # allow for double- and single-quoted realm values
+    # (single quotes are a violation of the RFC, but appear in the wild)
+    rx = re.compile('(?:^|,)'     # start of the string or ','
+                    '[ \t]*'      # optional whitespaces
+                    '([^ \t,]+)'  # scheme like "Basic"
+                    '[ \t]+'      # mandatory whitespaces
+                    # realm=xxx
+                    # realm='xxx'
+                    # realm="xxx"
+                    'realm=(["\']?)([^"\']*)\\2',
+                    re.I)
+
+    # XXX could pre-emptively send auth info already accepted (RFC 2617,
+    # end of section 2, and section 1.2 immediately after "credentials"
+    # production).
+
+    def __init__(self, password_mgr=None):
+        if password_mgr is None:
+            password_mgr = HTTPPasswordMgr()
+        self.passwd = password_mgr
+        self.add_password = self.passwd.add_password
+
+    def http_error_auth_reqed(self, authreq, host, req, headers):
+        # host may be an authority (without userinfo) or a URL with an
+        # authority
+        # XXX could be multiple headers
+        authreq = headers.get(authreq, None)
+        if authreq:
+            mo = AbstractBasicAuthHandler.rx.search(authreq)
+            if mo:
+                scheme, quote, realm = mo.groups()
+                if scheme.lower() == 'basic':
+                    return self.retry_http_basic_auth(host, req, realm)
+
+    def retry_http_basic_auth(self, host, req, realm):
+        user, pw = self.passwd.find_user_password(realm, host)
+        if pw is not None:
+            raw = "%s:%s" % (user, pw)
+            auth = str('Basic %s' % base64.b64encode(
+                    raw.encode('utf-8')).strip().decode('ascii'))
+            if req.get_header(self.auth_header, None) == auth:
+                return None
+            newreq = copy.copy(req)
+            newreq.add_header(self.auth_header, auth)
+            newreq.visit = False
+            return self.parent.open(newreq)
+        else:
+            return None
+
+    def __copy__(self):
+        return self.__class__(self.passwd.__copy__())
+
+
+class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Authorization'
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        url = req.get_full_url()
+        return self.http_error_auth_reqed('www-authenticate',
+                                          url, req, headers)
+
+    def __copy__(self):
+        return AbstractBasicAuthHandler.__copy__(self)
+
+
+class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
+
+    auth_header = 'Proxy-authorization'
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        # http_error_auth_reqed requires that there is no userinfo component in
+        # authority.  Assume there isn't one, since urllib2 does not (and
+        # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
+        # userinfo.
+        authority = req.get_host()
+        return self.http_error_auth_reqed('proxy-authenticate',
+                                          authority, req, headers)
+
+    def __copy__(self):
+        return AbstractBasicAuthHandler.__copy__(self)
+
+
+randombytes = os.urandom
+
+
+class AbstractDigestAuthHandler:
+    # Digest authentication is specified in RFC 2617.
+
+    # XXX The client does not inspect the Authentication-Info header
+    # in a successful response.
+
+    # XXX It should be possible to test this implementation against
+    # a mock server that just generates a static set of challenges.
+
+    # XXX qop="auth-int" supports is shaky
+
+    def __init__(self, passwd=None):
+        if passwd is None:
+            passwd = HTTPPasswordMgr()
+        self.passwd = passwd
+        self.add_password = self.passwd.add_password
+        self.retried = 0
+        self.nonce_count = 0
+        self.last_nonce = None
+
+    def reset_retry_count(self):
+        self.retried = 0
+
+    def http_error_auth_reqed(self, auth_header, host, req, headers):
+        authreq = headers.get(auth_header, None)
+        if self.retried > 5:
+            # Don't fail endlessly - if we failed once, we'll probably
+            # fail a second time. Hm. Unless the Password Manager is
+            # prompting for the information. Crap. This isn't great
+            # but it's better than the current 'repeat until recursion
+            # depth exceeded' approach <wink>
+            raise HTTPError(req.get_full_url(), 401, "digest auth failed",
+                            headers, None)
+        else:
+            self.retried += 1
+        if authreq:
+            scheme = authreq.split()[0]
+            if scheme.lower() == 'digest':
+                return self.retry_http_digest_auth(req, authreq)
+
+    def retry_http_digest_auth(self, req, auth):
+        token, challenge = auth.split(' ', 1)
+        chal = parse_keqv_list(parse_http_list(challenge))
+        auth = self.get_authorization(req, chal)
+        if auth:
+            auth_val = 'Digest %s' % auth
+            if req.get_header(self.auth_header, None) == auth_val:
+                return None
+            newreq = copy.copy(req)
+            newreq.add_unredirected_header(self.auth_header, auth_val)
+            newreq.visit = False
+            return self.parent.open(newreq)
+
+    def get_cnonce(self, nonce):
+        # The cnonce-value is an opaque
+        # quoted string value provided by the client and used by both client
+        # and server to avoid chosen plaintext attacks, to provide mutual
+        # authentication, and to provide some message integrity protection.
+        # This isn't a fabulous effort, but it's probably Good Enough.
+        dig = sha1_digest("%s:%s:%s:%s" % (self.nonce_count, nonce,
+                                           time.ctime(), randombytes(8)))
+        return dig[:16]
+
+    def get_authorization(self, req, chal):
+        try:
+            realm = chal['realm']
+            nonce = chal['nonce']
+            qop = chal.get('qop')
+            algorithm = chal.get('algorithm', 'MD5')
+            # mod_digest doesn't send an opaque, even though it isn't
+            # supposed to be optional
+            opaque = chal.get('opaque', None)
+        except KeyError:
+            return None
+
+        H, KD = self.get_algorithm_impls(algorithm)
+        if H is None:
+            return None
+
+        user, pw = self.passwd.find_user_password(realm, req.get_full_url())
+        if user is None:
+            return None
+
+        # XXX not implemented yet
+        if req.has_data():
+            entdig = self.get_entity_digest(req.get_data(), chal)
+        else:
+            entdig = None
+
+        A1 = "%s:%s:%s" % (user, realm, pw)
+        A2 = "%s:%s" % (req.get_method(),
+                        # XXX selector: what about proxies and full urls
+                        req.get_selector())
+        if qop == 'auth':
+            if nonce == self.last_nonce:
+                self.nonce_count += 1
+            else:
+                self.nonce_count = 1
+                self.last_nonce = nonce
+
+            ncvalue = '%08x' % self.nonce_count
+            cnonce = self.get_cnonce(nonce)
+            noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
+            respdig = KD(H(A1), noncebit)
+        elif qop is None:
+            respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
+        else:
+            # XXX handle auth-int.
+            logger = logging.getLogger("mechanize.auth")
+            logger.info("digest auth auth-int qop is not supported, not "
+                        "handling digest authentication")
+            return None
+
+        # XXX should the partial digests be encoded too?
+
+        base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
+               'response="%s"' % (user, realm, nonce, req.get_selector(),
+                                  respdig)
+        if opaque:
+            base += ', opaque="%s"' % opaque
+        if entdig:
+            base += ', digest="%s"' % entdig
+        base += ', algorithm="%s"' % algorithm
+        if qop:
+            base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
+        return base
+
+    def get_algorithm_impls(self, algorithm):
+        # algorithm should be case-insensitive according to RFC2617
+        algorithm = algorithm.upper()
+        if algorithm == 'MD5':
+            H = md5_digest
+        elif algorithm == 'SHA':
+            H = sha1_digest
+        # XXX MD5-sess
+        KD = lambda s, d: H("%s:%s" % (s, d))  # noqa
+        return H, KD
+
+    def get_entity_digest(self, data, chal):
+        # XXX not implemented yet
+        return None
+
+    def __copy__(self):
+        return self.__class__(self.passwd.__copy__())
+
+
+class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+    """An authentication protocol defined by RFC 2069
+
+    Digest authentication improves on basic authentication because it
+    does not transmit passwords in the clear.
+    """
+
+    auth_header = 'Authorization'
+    handler_order = 490  # before Basic auth
+
+    def http_error_401(self, req, fp, code, msg, headers):
+        host = urlparse(req.get_full_url())[1]
+        retry = self.http_error_auth_reqed('www-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+    def __copy__(self):
+        return AbstractDigestAuthHandler.__copy__(self)
+
+
+class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
+
+    auth_header = 'Proxy-Authorization'
+    handler_order = 490  # before Basic auth
+
+    def http_error_407(self, req, fp, code, msg, headers):
+        host = req.get_host()
+        retry = self.http_error_auth_reqed('proxy-authenticate',
+                                           host, req, headers)
+        self.reset_retry_count()
+        return retry
+
+    def __copy__(self):
+        return AbstractDigestAuthHandler.__copy__(self)
+
+
+class AbstractHTTPHandler(BaseHandler):
+
+    def __init__(self, debuglevel=0):
+        self._debuglevel = debuglevel
+
+    def set_http_debuglevel(self, level):
+        self._debuglevel = level
+
+    def do_request_(self, request):
+        host = request.get_host()
+        if not host:
+            raise URLError('no host given')
+
+        if request.has_data():  # POST
+            data = request.get_data()
+            if not request.has_header('Content-type'):
+                request.add_unredirected_header(
+                    'Content-type',
+                    'application/x-www-form-urlencoded')
+            if not request.has_header('Content-length'):
+                request.add_unredirected_header(
+                    'Content-length', '%d' % len(data))
+
+        sel_host = host
+        if request.has_proxy():
+            scheme, sel = splittype(request.get_selector())
+            sel_host, sel_path = splithost(sel)
+
+        for name, value in self.parent.addheaders:
+            name = name.capitalize()
+            if not request.has_header(name):
+                request.add_unredirected_header(name, value)
+        if not request.has_header('Host'):
+            request.add_unredirected_header('Host', sel_host)
+
+        return request
+
+    def do_open(self, http_class, req):
+        """Return an addinfourl object for the request, using http_class.
+
+        http_class must implement the HTTPConnection API from httplib.
+        The addinfourl return value is a file-like object.  It also
+        has methods and attributes including:
+            - info(): return a HTTPMessage object for the headers
+            - geturl(): return the original request URL
+            - code: HTTP status code
+        """
+        host_port = req.get_host()
+        if not host_port:
+            raise URLError('no host given')
+
+        h = http_class(host_port, timeout=req.timeout)
+        h.set_debuglevel(self._debuglevel)
+
+        headers = OrderedDict(req.headers)
+        for key, val in iteritems(req.unredirected_hdrs):
+            headers[key] = val
+        # We want to make an HTTP/1.1 request, but the addinfourl
+        # class isn't prepared to deal with a persistent connection.
+        # It will try to read all remaining data from the socket,
+        # which will block while the server waits for the next request.
+        # So make sure the connection gets closed after the (only)
+        # request.
+        headers["Connection"] = "close"
+        # httplib in python 2 needs str() not unicode() for all request
+        # parameters
+        if is_py2:
+            headers = OrderedDict(
+                    (str(name.title()), str(val))
+                    for name, val in iteritems(headers))
+        else:
+            headers = OrderedDict(
+                    (as_unicode(name, 'iso-8859-1').title(),
+                     as_unicode(val, 'iso-8859-1'))
+                    for name, val in iteritems(headers))
+
+        if req._tunnel_host:
+            set_tunnel = h.set_tunnel if hasattr(
+                h, "set_tunnel") else h._set_tunnel
+            tunnel_headers = {}
+            proxy_auth_hdr = "Proxy-Authorization"
+            if proxy_auth_hdr in headers:
+                tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
+                # Proxy-Authorization should not be sent to origin server.
+                del headers[proxy_auth_hdr]
+            set_tunnel(req._tunnel_host, headers=tunnel_headers)
+
+        if self.parent.finalize_request_headers is not None:
+            self.parent.finalize_request_headers(req, headers)
+
+        try:
+            h.request(str(req.get_method()), str(req.get_selector()), req.data,
+                      headers)
+            r = h.getresponse()
+        except socket.error as err:  # XXX what error?
+            raise URLError(err)
+
+        # Pick apart the HTTPResponse object to get the addinfourl
+        # object initialized properly.
+        fp = create_readline_wrapper(r)
+
+        resp = closeable_response(
+            fp, r.msg, req.get_full_url(), r.status, r.reason,
+            getattr(r, 'version', None))
+        return resp
+
+    def __copy__(self):
+        return self.__class__(self._debuglevel)
+
+
+class HTTPHandler(AbstractHTTPHandler):
+
+    def http_open(self, req):
+        return self.do_open(HTTPConnection, req)
+
+    http_request = AbstractHTTPHandler.do_request_
+
+
+class HTTPSHandler(AbstractHTTPHandler):
+
+    def __init__(self, client_cert_manager=None):
+        AbstractHTTPHandler.__init__(self)
+        self.client_cert_manager = client_cert_manager
+        self.ssl_context = None
+
+    def https_open(self, req):
+        key_file = cert_file = None
+        if self.client_cert_manager is not None:
+            key_file, cert_file = self.client_cert_manager.find_key_cert(
+                req.get_full_url())
+        if self.ssl_context is None:
+            conn_factory = partial(
+                HTTPSConnection, key_file=key_file,
+                cert_file=cert_file)
+        else:
+            conn_factory = partial(
+                HTTPSConnection, key_file=key_file,
+                cert_file=cert_file, context=self.ssl_context)
+        return self.do_open(conn_factory, req)
+
+    https_request = AbstractHTTPHandler.do_request_
+
+    def __copy__(self):
+        ans = self.__class__(self.client_cert_manager)
+        ans._debuglevel = self._debuglevel
+        ans.ssl_context = self.ssl_context
+        return ans
+
+
+class HTTPCookieProcessor(BaseHandler):
+    """Handle HTTP cookies.
+
+    Public attributes:
+
+    cookiejar: CookieJar instance
+
+    """
+
+    def __init__(self, cookiejar=None):
+        if cookiejar is None:
+            cookiejar = CookieJar()
+        self.cookiejar = cookiejar
+
+    def http_request(self, request):
+        self.cookiejar.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        self.cookiejar.extract_cookies(response, request)
+        return response
+
+    def __copy__(self):
+        return self.__class__(self.cookiejar)
+
+    https_request = http_request
+    https_response = http_response
+
+
+class UnknownHandler(BaseHandler):
+
+    def unknown_open(self, req):
+        type = req.get_type()
+        raise URLError('unknown url type: %s' % type)
+
+
+def parse_keqv_list(ln):
+    """Parse list of key=value strings where keys are not duplicated."""
+    parsed = {}
+    for elt in ln:
+        k, v = elt.split('=', 1)
+        if v[0:1] == '"' and v[-1:] == '"':
+            v = v[1:-1]
+        parsed[k] = v
+    return parsed
+
+
+def parse_http_list(s):
+    """Parse lists as described by RFC 2068 Section 2.
+
+    In particular, parse comma-separated lists where the elements of
+    the list may include quoted-strings.  A quoted-string could
+    contain a comma.  A non-quoted string could have quotes in the
+    middle.  Neither commas nor quotes count if they are escaped.
+    Only double-quotes count, not single-quotes.
+    """
+    res = []
+    part = ''
+
+    escape = quote = False
+    for cur in s:
+        if escape:
+            part += cur
+            escape = False
+            continue
+        if quote:
+            if cur == '\\':
+                escape = True
+                continue
+            elif cur == '"':
+                quote = False
+            part += cur
+            continue
+
+        if cur == ',':
+            res.append(part)
+            part = ''
+            continue
+
+        if cur == '"':
+            quote = True
+
+        part += cur
+
+    # append last part
+    if part:
+        res.append(part)
+
+    return list(filter(None, (part_.strip() for part_ in res)))
+
+
+class FileHandler(BaseHandler):
+    # Use local file or FTP depending on form of URL
+
+    def file_open(self, req):
+        url = req.get_selector()
+        if url[:2] == '//' and url[2:3] != '/':
+            req.type = 'ftp'
+            return self.parent.open(req)
+        else:
+            return self.open_local_file(req)
+
+    # names for the localhost
+    names = None
+
+    def get_names(self):
+        if FileHandler.names is None:
+            try:
+                FileHandler.names = (socket.gethostbyname('localhost'),
+                                     socket.gethostbyname(socket.gethostname())
+                                     )
+            except socket.gaierror:
+                FileHandler.names = (socket.gethostbyname('localhost'),)
+        return FileHandler.names
+
+    # not entirely sure what the rules are here
+    def open_local_file(self, req):
+        import email.utils as emailutils
+        import mimetypes
+        host = req.get_host()
+        file = req.get_selector()
+        try:
+            localfile = url2pathname(file)
+        except IOError as err:
+            # url2pathname raises this on windows for bad urls
+            raise URLError(err)
+        try:
+            stats = os.stat(localfile)
+            size = stats.st_size
+            modified = emailutils.formatdate(stats.st_mtime, usegmt=True)
+            mtype = mimetypes.guess_type(file)[0]
+            headers = create_response_info(BytesIO(
+                ('Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
+                    (mtype or 'text/plain', size, modified)).encode(
+                        'iso-8859-1')))
+            if host:
+                host, port = splitport(host)
+            if not host or (
+                    not port and socket.gethostbyname(host) in self.get_names()
+            ):
+                fp = open(localfile, 'rb')
+                return closeable_response(fp, headers, 'file:' + file)
+        except OSError as msg:
+            # urllib2 users shouldn't expect OSErrors coming from urlopen()
+            raise URLError(msg)
+        raise URLError('file not on local host')
+
+
+class FTPHandler(BaseHandler):
+
+    def ftp_open(self, req):
+        import ftplib
+        import mimetypes
+        host = req.get_host()
+        if not host:
+            raise URLError('ftp error: no host given')
+        host, port = splitport(host)
+        if port is None:
+            port = ftplib.FTP_PORT
+        else:
+            port = int(port)
+
+        # username/password handling
+        user, host = splituser(host)
+        if user:
+            user, passwd = splitpasswd(user)
+        else:
+            passwd = None
+        host = unquote(host)
+        user = unquote(user or '')
+        passwd = unquote(passwd or '')
+
+        try:
+            host = socket.gethostbyname(host)
+        except socket.error as msg:
+            raise URLError(msg)
+        path, attrs = splitattr(req.get_selector())
+        dirs = path.split('/')
+        dirs = list(map(unquote, dirs))
+        dirs, file = dirs[:-1], dirs[-1]
+        if dirs and not dirs[0]:
+            dirs = dirs[1:]
+        try:
+            fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
+            type = file and 'I' or 'D'
+            for attr in attrs:
+                attr, value = splitvalue(attr)
+                if attr.lower() == 'type' and \
+                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
+                    type = value.upper()
+            fp, retrlen = fw.retrfile(file, type)
+            headers = ""
+            mtype = mimetypes.guess_type(req.get_full_url())[0]
+            if mtype:
+                headers += "Content-type: %s\n" % mtype
+            if retrlen is not None and retrlen >= 0:
+                headers += "Content-length: %d\n" % retrlen
+            sf = BytesIO(headers.encode('iso-8859-1'))
+            headers = create_response_info(sf)
+            return closeable_response(fp, headers, req.get_full_url())
+        except ftplib.all_errors as msg:
+            raise_with_traceback(URLError('ftp error: %s' % msg))
+
+    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+        try:
+            fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
+        except TypeError:
+            # Python < 2.6, no per-connection timeout support
+            fw = ftpwrapper(user, passwd, host, port, dirs)
+# fw.ftp.set_debuglevel(1)
+        return fw
+
+
+class CacheFTPHandler(FTPHandler):
+    # XXX would be nice to have pluggable cache strategies
+    # XXX this stuff is definitely not thread safe
+
+    def __init__(self):
+        self.cache = {}
+        self.timeout = {}
+        self.soonest = 0
+        self.delay = 60
+        self.max_conns = 16
+
+    def setTimeout(self, t):
+        self.delay = t
+
+    def setMaxConns(self, m):
+        self.max_conns = m
+
+    def connect_ftp(self, user, passwd, host, port, dirs, timeout):
+        key = user, host, port, '/'.join(dirs), timeout
+        if key in self.cache:
+            self.timeout[key] = time.time() + self.delay
+        else:
+            self.cache[key] = ftpwrapper(
+                user, passwd, host, port, dirs, timeout)
+            self.timeout[key] = time.time() + self.delay
+        self.check_cache()
+        return self.cache[key]
+
+    def check_cache(self):
+        # first check for old ones
+        t = time.time()
+        if self.soonest <= t:
+            for k, v in iteritems(self.timeout):
+                if v < t:
+                    self.cache[k].close()
+                    del self.cache[k]
+                    del self.timeout[k]
+        self.soonest = min(self.timeout.values())
+
+        # then check the size
+        if len(self.cache) == self.max_conns:
+            for k, v in iteritems(self.timeout):
+                if v == self.soonest:
+                    del self.cache[k]
+                    del self.timeout[k]
+                    break
+            self.soonest = min(self.timeout.values())
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_useragent.py b/samples-and-tests/i-am-a-developer/mechanize/_useragent.py
index 723f87c0dc..0375b6d3d8 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_useragent.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_useragent.py
@@ -6,19 +6,17 @@
 Copyright 2003-2006 John J. Lee <jjl@pobox.com>
 
 This code is free software; you can redistribute it and/or modify it under
-the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
+the terms of the BSD or ZPL 2.1 licenses (see the file LICENSE
 included with the distribution).
 
 """
 
-import warnings
+from __future__ import absolute_import
 
-import _auth
-import _gzip
-import _opener
-import _response
-import _sockettimeout
-import _urllib2
+import copy
+
+from . import _auth, _gzip, _opener, _response, _sockettimeout, _urllib2
+from .polyglot import iteritems, itervalues
 
 
 class UserAgentBase(_opener.OpenerDirector):
@@ -56,7 +54,6 @@ class UserAgentBase(_opener.OpenerDirector):
         "_unknown": _urllib2.UnknownHandler,
         # HTTP{S,}Handler depend on HTTPErrorProcessor too
         "_http_error": _urllib2.HTTPErrorProcessor,
-        "_http_request_upgrade": _urllib2.HTTPRequestUpgradeProcessor,
         "_http_default_error": _urllib2.HTTPDefaultErrorHandler,
 
         # feature handlers
@@ -70,23 +67,28 @@ class UserAgentBase(_opener.OpenerDirector):
         "_proxy_basicauth": _urllib2.ProxyBasicAuthHandler,
         "_proxy_digestauth": _urllib2.ProxyDigestAuthHandler,
         "_robots": _urllib2.HTTPRobotRulesProcessor,
-        "_gzip": _gzip.HTTPGzipProcessor,  # experimental!
+        "_gzip": _gzip.HTTPGzipProcessor,
 
         # debug handlers
         "_debug_redirect": _urllib2.HTTPRedirectDebugProcessor,
         "_debug_response_body": _urllib2.HTTPResponseDebugProcessor,
-        }
+    }
 
     default_schemes = ["http", "ftp", "file"]
-    default_others = ["_unknown", "_http_error", "_http_request_upgrade",
-                      "_http_default_error",
-                      ]
-    default_features = ["_redirect", "_cookies",
-                        "_refresh", "_equiv",
-                        "_basicauth", "_digestauth",
-                        "_proxy", "_proxy_basicauth", "_proxy_digestauth",
-                        "_robots",
-                        ]
+    default_others = ["_unknown", "_http_error", "_http_default_error"]
+    default_features = [
+        "_gzip",
+        "_redirect",
+        "_cookies",
+        "_refresh",
+        "_equiv",
+        "_basicauth",
+        "_digestauth",
+        "_proxy",
+        "_proxy_basicauth",
+        "_proxy_digestauth",
+        "_robots",
+    ]
     if hasattr(_urllib2, 'HTTPSHandler'):
         handler_classes["https"] = _urllib2.HTTPSHandler
         default_schemes.append("https")
@@ -95,12 +97,11 @@ def __init__(self):
         _opener.OpenerDirector.__init__(self)
 
         ua_handlers = self._ua_handlers = {}
-        for scheme in (self.default_schemes+
-                       self.default_others+
+        for scheme in (self.default_schemes + self.default_others +
                        self.default_features):
             klass = self.handler_classes[scheme]
             ua_handlers[scheme] = klass()
-        for handler in ua_handlers.itervalues():
+        for handler in tuple(itervalues(ua_handlers)):
             self.add_handler(handler)
 
         # Yuck.
@@ -115,7 +116,7 @@ def __init__(self):
         if "_basicauth" in ua_handlers or "_digestauth" in ua_handlers:
             pm = _urllib2.HTTPPasswordMgrWithDefaultRealm()
         if ("_proxy_basicauth" in ua_handlers or
-            "_proxy_digestauth" in ua_handlers):
+                "_proxy_digestauth" in ua_handlers):
             ppm = _auth.HTTPProxyPasswordMgr()
         self.set_password_manager(pm)
         self.set_proxy_password_manager(ppm)
@@ -128,14 +129,14 @@ def close(self):
         _opener.OpenerDirector.close(self)
         self._ua_handlers = None
 
-    # XXX
-##     def set_timeout(self, timeout):
-##         self._timeout = timeout
-##     def set_http_connection_cache(self, conn_cache):
-##         self._http_conn_cache = conn_cache
-##     def set_ftp_connection_cache(self, conn_cache):
-##         # XXX ATM, FTP has cache as part of handler; should it be separate?
-##         self._ftp_conn_cache = conn_cache
+# XXX
+# def set_timeout(self, timeout):
+#         self._timeout = timeout
+# def set_http_connection_cache(self, conn_cache):
+#         self._http_conn_cache = conn_cache
+# def set_ftp_connection_cache(self, conn_cache):
+# XXX ATM, FTP has cache as part of handler; should it be separate?
+#         self._ftp_conn_cache = conn_cache
 
     def set_handled_schemes(self, schemes):
         """Set sequence of URL scheme (protocol) strings.
@@ -155,14 +156,15 @@ def set_handled_schemes(self, schemes):
             want[scheme] = None
 
         # get rid of scheme handlers we don't want
-        for scheme, oldhandler in self._ua_handlers.items():
-            if scheme.startswith("_"): continue  # not a scheme handler
+        for scheme, oldhandler in tuple(iteritems(self._ua_handlers)):
+            if scheme.startswith("_"):
+                continue  # not a scheme handler
             if scheme not in want:
                 self._replace_handler(scheme, None)
             else:
                 del want[scheme]  # already got it
         # add the scheme handlers that are missing
-        for scheme in want.keys():
+        for scheme in want:
             self._set_handler(scheme, True)
 
     def set_cookiejar(self, cookiejar):
@@ -171,20 +173,40 @@ def set_cookiejar(self, cookiejar):
 
     # XXX could use Greg Stein's httpx for some of this instead?
     # or httplib2??
-    def set_proxies(self, proxies):
-        """Set a dictionary mapping URL scheme to proxy specification, or None.
+    def set_proxies(self, proxies=None, proxy_bypass=None):
+        """Configure proxy settings.
+
+        :arg proxies: dictionary mapping URL scheme to proxy specification.
+          None means use the default system-specific settings.
+        :arg proxy_bypass: function taking hostname, returning whether proxy
+          should be used.  None means use the default system-specific settings.
 
-        e.g. {"http": "joe:password@myproxy.example.com:3128",
-              "ftp": "proxy.example.com"}
+        The default is to try to obtain proxy settings from the system (see the
+        documentation for urllib.urlopen for information about the
+        system-specific methods used -- note that's urllib, not urllib2).
+
+        To avoid all use of proxies, pass an empty proxies dict.
+
+        >>> ua = UserAgentBase()
+        >>> def proxy_bypass(hostname):
+        ...     return hostname == "noproxy.com"
+        >>> ua.set_proxies(
+        ...     {"http": "joe:password@myproxy.example.com:3128",
+        ...      "ftp": "proxy.example.com"},
+        ...     proxy_bypass)
 
         """
-        self._set_handler("_proxy", obj=proxies)
+        self._set_handler(
+            "_proxy",
+            True,
+            constructor_kwds=dict(proxies=proxies, proxy_bypass=proxy_bypass))
 
     def add_password(self, url, user, password, realm=None):
         self._password_manager.add_password(realm, url, user, password)
+
     def add_proxy_password(self, user, password, hostport=None, realm=None):
-        self._proxy_password_manager.add_password(
-            realm, hostport, user, password)
+        self._proxy_password_manager.add_password(realm, hostport, user,
+                                                  password)
 
     def add_client_certificate(self, url, key_file, cert_file):
         """Add an SSL client certificate, for HTTPS client auth.
@@ -212,28 +234,57 @@ def set_password_manager(self, password_manager):
         self._password_manager = password_manager
         self._set_handler("_basicauth", obj=password_manager)
         self._set_handler("_digestauth", obj=password_manager)
+
     def set_proxy_password_manager(self, password_manager):
         """Set a mechanize.HTTPProxyPasswordMgr, or None."""
         self._proxy_password_manager = password_manager
         self._set_handler("_proxy_basicauth", obj=password_manager)
         self._set_handler("_proxy_digestauth", obj=password_manager)
+
     def set_client_cert_manager(self, cert_manager):
         """Set a mechanize.HTTPClientCertMgr, or None."""
-        self._client_cert_manager = cert_manager
         handler = self._ua_handlers["https"]
-        handler.client_cert_manager = cert_manager
+        self._client_cert_manager = handler.client_cert_manager = cert_manager
+
+    def set_ca_data(self, cafile=None, capath=None, cadata=None, context=None):
+        '''
+        Set the SSL Context used for connecting to SSL servers.
+
+        This method accepts the same arguments as the
+        :py:meth:`ssl.SSLContext.load_verify_locations()` method from the
+        python standard library. You can also pass a pre-built context via the
+        `context` keyword argument. Note that to use this feature, you must be
+        using python >= 2.7.9. In addition you can directly pass in
+        a pre-built :class:`ssl.SSLContext` as the `context` argument.
+
+        '''
+        import ssl
+        if context is None:
+            try:
+                context = ssl.create_default_context(
+                    cafile=cafile, capath=capath, cadata=cadata)
+            except AttributeError:
+                raise RuntimeError('python >= 2.7.9 required')
+        handler = self._ua_handlers["https"]
+        handler.ssl_context = context
 
     # these methods all take a boolean parameter
     def set_handle_robots(self, handle):
         """Set whether to observe rules from robots.txt."""
         self._set_handler("_robots", handle)
+
     def set_handle_redirect(self, handle):
         """Set whether to handle HTTP 30x redirections."""
         self._set_handler("_redirect", handle)
+
     def set_handle_refresh(self, handle, max_time=None, honor_time=True):
         """Set whether to handle HTTP Refresh headers."""
-        self._set_handler("_refresh", handle, constructor_kwds=
-                          {"max_time": max_time, "honor_time": honor_time})
+        self._set_handler(
+            "_refresh",
+            handle,
+            constructor_kwds={"max_time": max_time,
+                              "honor_time": honor_time})
+
     def set_handle_equiv(self, handle, head_parser_class=None):
         """Set whether to treat HTML http-equiv headers like HTTP headers.
 
@@ -244,52 +295,62 @@ def set_handle_equiv(self, handle, head_parser_class=None):
         if head_parser_class is not None:
             constructor_kwds = {"head_parser_class": head_parser_class}
         else:
-            constructor_kwds={}
+            constructor_kwds = {}
         self._set_handler("_equiv", handle, constructor_kwds=constructor_kwds)
-    def set_handle_gzip(self, handle):
-        """Handle gzip transfer encoding.
+
+    def set_request_gzip(self, handle):
+        """Add header indicating to server that we handle gzip
+        content encoding. Note that if the server sends gzip'ed content,
+        it is handled automatically in any case, regardless of this setting.
 
         """
-        if handle:
-            warnings.warn(
-                "gzip transfer encoding is experimental!", stacklevel=2)
-        self._set_handler("_gzip", handle)
+        self._set_handler(
+            "_gzip", True, constructor_kwds={'request_gzip': bool(handle)})
+    set_handle_gzip = set_request_gzip  # legacy
+
     def set_debug_redirects(self, handle):
-        """Log information about HTTP redirects (including refreshes).
+        """
+        Log information about HTTP redirects (including refreshes).
 
         Logging is performed using module logging.  The logger name is
-        "mechanize.http_redirects".  To actually print some debug output,
+        `"mechanize.http_redirects"`.  To actually print some debug output,
         eg:
 
-        import sys, logging
-        logger = logging.getLogger("mechanize.http_redirects")
-        logger.addHandler(logging.StreamHandler(sys.stdout))
-        logger.setLevel(logging.INFO)
+        .. code-block:: python
+
+            import sys, logging
+            logger = logging.getLogger("mechanize.http_redirects")
+            logger.addHandler(logging.StreamHandler(sys.stdout))
+            logger.setLevel(logging.INFO)
 
         Other logger names relevant to this module:
 
-        "mechanize.http_responses"
-        "mechanize.cookies" (or "cookielib" if running Python 2.4)
+        * `mechanize.http_responses`
+        * `mechanize.cookies`
 
         To turn on everything:
 
-        import sys, logging
-        logger = logging.getLogger("mechanize")
-        logger.addHandler(logging.StreamHandler(sys.stdout))
-        logger.setLevel(logging.INFO)
+        .. code-block:: python
+
+            import sys, logging
+            logger = logging.getLogger("mechanize")
+            logger.addHandler(logging.StreamHandler(sys.stdout))
+            logger.setLevel(logging.INFO)
 
         """
         self._set_handler("_debug_redirect", handle)
+
     def set_debug_responses(self, handle):
         """Log HTTP response bodies.
 
-        See docstring for .set_debug_redirects() for details of logging.
+        See :meth:`set_debug_redirects()` for details of logging.
 
         Response objects may be .seek()able if this is set (currently returned
         responses are, raised HTTPError exception responses are not).
 
         """
         self._set_handler("_debug_response_body", handle)
+
     def set_debug_http(self, handle):
         """Print HTTP headers to sys.stdout."""
         level = int(bool(handle))
@@ -298,8 +359,36 @@ def set_debug_http(self, handle):
             if h is not None:
                 h.set_http_debuglevel(level)
 
-    def _set_handler(self, name, handle=None, obj=None,
-                     constructor_args=(), constructor_kwds={}):
+    def _copy_state(self, other):
+        if self._ua_handlers is None:
+            raise ValueError('Cannot copy state from a closed UserAgentBase')
+        other.addheaders = self.addheaders[:]
+        rmap = {v: k for k, v in iteritems(self._ua_handlers)}
+
+        def clone_handler(h):
+            ans = copy.copy(h)
+            ans.add_parent(other)
+            try:
+                other._ua_handlers[rmap[h]] = ans
+            except KeyError:
+                pass
+            return ans
+
+        other._ua_handlers.clear()
+        other.handlers = [clone_handler(h) for h in self.handlers]
+        other._handler_index_valid = False
+
+    def handlers_by_class(self, cls):
+        for h in self.handlers:
+            if isinstance(h, cls):
+                yield h
+
+    def _set_handler(self,
+                     name,
+                     handle=None,
+                     obj=None,
+                     constructor_args=(),
+                     constructor_kwds={}):
         if handle is None:
             handle = obj is not None
         if handle:
@@ -307,8 +396,8 @@ def _set_handler(self, name, handle=None, obj=None,
             if obj is not None:
                 newhandler = handler_class(obj)
             else:
-                newhandler = handler_class(
-                    *constructor_args, **constructor_kwds)
+                newhandler = handler_class(*constructor_args,
+                                           **constructor_kwds)
         else:
             newhandler = None
         self._replace_handler(name, newhandler)
@@ -316,8 +405,8 @@ def _set_handler(self, name, handle=None, obj=None,
     def _replace_handler(self, name, newhandler=None):
         # first, if handler was previously added, remove it
         if name is not None:
-            handler = self._ua_handlers.get(name)
-            if handler:
+            handler = self._ua_handlers.pop(name, None)
+            if handler is not None:
                 try:
                     self.handlers.remove(handler)
                 except ValueError:
@@ -329,7 +418,6 @@ def _replace_handler(self, name, newhandler=None):
 
 
 class UserAgent(UserAgentBase):
-
     def __init__(self):
         UserAgentBase.__init__(self)
         self._seekable = False
@@ -338,15 +426,20 @@ def set_seekable_responses(self, handle):
         """Make response objects .seek()able."""
         self._seekable = bool(handle)
 
-    def open(self, fullurl, data=None,
+    def open(self,
+             fullurl,
+             data=None,
              timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
         if self._seekable:
-            def bound_open(fullurl, data=None,
+
+            def bound_open(fullurl,
+                           data=None,
                            timeout=_sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
                 return UserAgentBase.open(self, fullurl, data, timeout)
-            response = _opener.wrapped_open(
-                bound_open, _response.seek_wrapped_response, fullurl, data,
-                timeout)
+
+            response = _opener.wrapped_open(bound_open,
+                                            _response.seek_wrapped_response,
+                                            fullurl, data, timeout)
         else:
             response = UserAgentBase.open(self, fullurl, data)
         return response
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_util.py b/samples-and-tests/i-am-a-developer/mechanize/_util.py
index dcdefa913f..61a02ee9d1 100644
--- a/samples-and-tests/i-am-a-developer/mechanize/_util.py
+++ b/samples-and-tests/i-am-a-developer/mechanize/_util.py
@@ -4,62 +4,92 @@
 
 This code is free software; you can redistribute it and/or modify it
 under the terms of the BSD or ZPL 2.1 licenses (see the file
-COPYING.txt included with the distribution).
-
+LICENSE included with the distribution).
 """
 
-import re, time, warnings
+from __future__ import absolute_import
+
+import re
+import time
+import warnings
+from calendar import timegm
 
 
 class ExperimentalWarning(UserWarning):
     pass
 
+
 def experimental(message):
     warnings.warn(message, ExperimentalWarning, stacklevel=3)
+
+
 def hide_experimental_warnings():
     warnings.filterwarnings("ignore", category=ExperimentalWarning)
+
+
 def reset_experimental_warnings():
     warnings.filterwarnings("default", category=ExperimentalWarning)
 
+
 def deprecation(message):
     warnings.warn(message, DeprecationWarning, stacklevel=3)
+
+
 def hide_deprecations():
     warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+
 def reset_deprecations():
     warnings.filterwarnings("default", category=DeprecationWarning)
 
 
-def isstringlike(x):
-    try: x+""
-    except: return False
-    else: return True
+def read_file(filename):
+    with open(filename, 'rb') as f:
+        return f.read()
 
-## def caller():
-##     try:
-##         raise SyntaxError
-##     except:
-##         import sys
-##     return sys.exc_traceback.tb_frame.f_back.f_back.f_code.co_name
 
+def write_file(filename, data):
+    f = open(filename, "wb")
+    try:
+        f.write(data)
+    finally:
+        f.close()
+
+
+def get1(sequence):
+    assert len(sequence) == 1
+    return sequence[0]
 
-from calendar import timegm
+
+def isstringlike(x):
+    try:
+        x + ""
+    except Exception:
+        return False
+    else:
+        return True
 
 # Date/time conversion routines for formats used by the HTTP protocol.
 
+
 EPOCH = 1970
+
+
 def my_timegm(tt):
     year, month, mday, hour, min, sec = tt[:6]
     if ((year >= EPOCH) and (1 <= month <= 12) and (1 <= mday <= 31) and
-        (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
+            (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
         return timegm(tt)
     else:
         return None
 
+
 days = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
 months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
           "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
 months_lower = []
-for month in months: months_lower.append(month.lower())
+for month in months:
+    months_lower.append(month.lower())
 
 
 def time2isoz(t=None):
@@ -74,11 +104,13 @@ def time2isoz(t=None):
     1994-11-24 08:49:37Z
 
     """
-    if t is None: t = time.time()
+    if t is None:
+        t = time.time()
     year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
     return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
         year, mon, mday, hour, min, sec)
 
+
 def time2netscape(t=None):
     """Return a string representing time in seconds since epoch, t.
 
@@ -90,18 +122,21 @@ def time2netscape(t=None):
     Wed, DD-Mon-YYYY HH:MM:SS GMT
 
     """
-    if t is None: t = time.time()
+    if t is None:
+        t = time.time()
     year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
     return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
-        days[wday], mday, months[mon-1], year, hour, min, sec)
+        days[wday], mday, months[mon - 1], year, hour, min, sec)
 
 
 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
 
 timezone_re = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
+
+
 def offset_from_tz_string(tz):
     offset = None
-    if UTC_ZONES.has_key(tz):
+    if tz in UTC_ZONES:
         offset = 0
     else:
         m = timezone_re.search(tz)
@@ -113,11 +148,12 @@ def offset_from_tz_string(tz):
                 offset = -offset
     return offset
 
+
 def _str2time(day, mon, yr, hr, min, sec, tz):
     # translate month name to number
     # month numbers start with 1 (January)
     try:
-        mon = months_lower.index(mon.lower())+1
+        mon = months_lower.index(mon.lower()) + 1
     except ValueError:
         # maybe it's already a number
         try:
@@ -130,9 +166,12 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
             return None
 
     # make sure clock elements are defined
-    if hr is None: hr = 0
-    if min is None: min = 0
-    if sec is None: sec = 0
+    if hr is None:
+        hr = 0
+    if min is None:
+        min = 0
+    if sec is None:
+        sec = 0
 
     yr = int(yr)
     day = int(day)
@@ -148,8 +187,10 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
         yr = yr + cur_yr - m
         m = m - tmp
         if abs(m) > 50:
-            if m > 0: yr = yr + 100
-            else: yr = yr - 100
+            if m > 0:
+                yr = yr + 100
+            else:
+                yr = yr - 100
 
     # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
     t = my_timegm((yr, mon, day, hr, min, sec, tz))
@@ -188,6 +229,8 @@ def _str2time(day, mon, yr, hr, min, sec, tz):
        \s*
     (?:\(\w+\))?       # ASCII representation of timezone in parens.
        \s*$""", re.X)
+
+
 def http2time(text):
     """Returns time in seconds since epoch of time represented by a string.
 
@@ -233,7 +276,7 @@ def http2time(text):
     text = wkday_re.sub("", text, 1)  # Useless weekday
 
     # tz is time zone specifier string
-    day, mon, yr, hr, min, sec, tz = [None]*7
+    day, mon, yr, hr, min, sec, tz = [None] * 7
 
     # loose regexp parse
     m = loose_http_re.search(text)
@@ -246,7 +289,7 @@ def http2time(text):
 
 
 iso_re = re.compile(
-    """^
+    r"""^
     (\d{4})              # year
        [-\/]?
     (\d\d?)              # numerical month
@@ -261,6 +304,8 @@ def http2time(text):
    ([-+]?\d\d?:?(:?\d\d)?
     |Z|z)?               # timezone  (Z is "zero meridian", i.e. GMT)
       \s*$""", re.X)
+
+
 def iso2time(text):
     """
     As for http2time, but parses the ISO 8601 formats:
@@ -277,7 +322,7 @@ def iso2time(text):
     text = text.lstrip()
 
     # tz is time zone specifier string
-    day, mon, yr, hr, min, sec, tz = [None]*7
+    day, mon, yr, hr, min, sec, tz = [None] * 7
 
     # loose regexp parse
     m = iso_re.search(text)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/_version.py b/samples-and-tests/i-am-a-developer/mechanize/_version.py
new file mode 100644
index 0000000000..22403c6047
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/_version.py
@@ -0,0 +1,2 @@
+"0.4.7"
+__version__ = (0, 4, 7, None, None)
diff --git a/samples-and-tests/i-am-a-developer/mechanize/polyglot.py b/samples-and-tests/i-am-a-developer/mechanize/polyglot.py
new file mode 100644
index 0000000000..73abf69f76
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/mechanize/polyglot.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (absolute_import, division, print_function,
+                        unicode_literals)
+
+import collections
+import sys
+
+is_py2 = sys.version_info.major < 3
+
+if is_py2:
+    import types
+    from urllib import (
+            urlencode, pathname2url, quote, addinfourl, quote_plus,
+            urlopen, splitattr, splithost as urllib_splithost, getproxies,
+            ftpwrapper, proxy_bypass as urllib_proxy_bypass, splitpasswd,
+            splitport, splittype, splituser, splitvalue,
+            unquote, unwrap, url2pathname
+    )
+    from urllib2 import (
+            HTTPError, URLError, install_opener, build_opener, ProxyHandler
+    )
+    from robotparser import RobotFileParser
+    from urlparse import urlsplit, urljoin, urlparse, urlunparse
+    from httplib import HTTPMessage, HTTPConnection, HTTPSConnection
+    from cookielib import (
+            DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy,
+            FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match,
+            eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach,
+            request_path, request_port, user_domain_match, Cookie, CookieJar,
+            MozillaCookieJar, request_host)
+    from cStringIO import StringIO
+    from future_builtins import map  # noqa
+
+    def is_string(x):
+        return isinstance(x, basestring)
+
+    def iteritems(x):
+        return x.iteritems()
+
+    def itervalues(x):
+        return x.itervalues()
+
+    def is_class(obj):
+        return isinstance(obj, (types.ClassType, type))
+
+    def raise_with_traceback(exc):
+        exec('raise exc, None, sys.exc_info()[2]')
+
+    def is_mapping(x):
+        return isinstance(x, collections.Mapping)
+
+    codepoint_to_chr = unichr
+    unicode_type = unicode
+    create_response_info = HTTPMessage
+
+
+else:
+    import re
+    from urllib.error import HTTPError, URLError
+    from urllib.robotparser import RobotFileParser
+    from urllib.parse import (
+            urlsplit, urljoin, urlparse, urlunparse, urlencode, quote_plus,
+            unquote, unwrap, quote
+    )
+    from urllib.request import (
+            pathname2url, addinfourl, install_opener, build_opener,
+            ProxyHandler, urlopen as _urlopen, getproxies, ftpwrapper,
+            proxy_bypass as urllib_proxy_bypass, url2pathname, Request)
+    from http.client import (
+            HTTPMessage, parse_headers, HTTPConnection,
+            HTTPSConnection)
+    from http.cookiejar import (
+            DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy,
+            FileCookieJar, LoadError, LWPCookieJar, _debug, domain_match,
+            eff_request_host, escape_path, is_HDN, lwp_cookie_str, reach,
+            request_path, request_port, user_domain_match, Cookie, CookieJar,
+            MozillaCookieJar, request_host)
+    from io import StringIO
+
+    def splitattr(url):
+        words = url.split(';')
+        return words[0], words[1:]
+
+    def is_string(x):
+        return isinstance(x, str)
+
+    def iteritems(x):
+        return x.items()
+
+    def itervalues(x):
+        return x.values()
+
+    def is_class(obj):
+        return isinstance(obj, type)
+
+    def raise_with_traceback(exc):
+        raise exc.with_traceback(sys.exc_info()[2])
+
+    codepoint_to_chr = chr
+    unicode_type = str
+    map = map
+
+    # Legacy code expects HTTPMessage.getheaders()
+    def getheaders(self, name):
+        return self.get_all(name, failobj=[])
+    HTTPMessage.getheaders = getheaders
+
+    # We want __getitem__ to return the last header not the first
+    def getitem(self, name):
+        vals = self.get_all(name, [None])
+        return vals[-1]
+    HTTPMessage.__getitem__ = getitem
+
+    # Legacy method names
+    HTTPMessage.gettype = HTTPMessage.get_content_type
+    HTTPMessage.getmainttype = HTTPMessage.get_content_maintype
+    HTTPMessage.getsubtype = HTTPMessage.get_content_subtype
+
+    def is_mapping(x):
+        return isinstance(x, collections.abc.Mapping)
+
+    def create_response_info(fp):
+        return parse_headers(fp)
+
+    def urlopen(*a, **kw):
+        proxies = kw.pop('proxies', None)
+        if proxies is None:
+            return _urlopen(*a, **kw)
+        r = Request(a[0])
+        for k, v in proxies.items():
+            r.set_proxy(v, k)
+        return _urlopen(r, *a[1:], **kw)
+
+    _hostprog = None
+
+    def urllib_splithost(url):
+        """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
+        global _hostprog
+        if _hostprog is None:
+            _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)
+
+        match = _hostprog.match(url)
+        if match:
+            host_port, path = match.groups()
+            if path and path[0] != '/':
+                path = '/' + path
+            return host_port, path
+        return None, url
+
+    _typeprog = None
+
+    def splittype(url):
+        """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
+        global _typeprog
+        if _typeprog is None:
+            _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
+
+        match = _typeprog.match(url)
+        if match:
+            scheme, data = match.groups()
+            return scheme.lower(), data
+        return None, url
+
+    def splituser(host):
+        """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
+        user, delim, host = host.rpartition('@')
+        return (user if delim else None), host
+
+    def splitpasswd(user):
+        """splitpasswd('user:passwd') -> 'user', 'passwd'."""
+        user, delim, passwd = user.partition(':')
+        return user, (passwd if delim else None)
+
+    _portprog = None
+
+    def splitport(host):
+        """splitport('host:port') --> 'host', 'port'."""
+        global _portprog
+        if _portprog is None:
+            _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)
+
+        match = _portprog.match(host)
+        if match:
+            host, port = match.groups()
+            if port:
+                return host, port
+        return host, None
+
+    def splitvalue(attr):
+        """splitvalue('attr=value') --> 'attr', 'value'."""
+        attr, delim, value = attr.partition('=')
+        return attr, (value if delim else None)
+
+
+def as_unicode(x, encoding='utf-8'):
+    if isinstance(x, bytes):
+        x = x.decode('utf-8')
+    return x
+
+
+if False:
+    (HTTPError, urlsplit, urljoin, urlparse, urlunparse, urlencode,
+     HTTPMessage, splitattr, urllib_splithost, getproxies, ftpwrapper,
+     urllib_proxy_bypass, splituser, splitpasswd, splitport,
+     splitvalue, splittype, unquote, unwrap, url2pathname)
+    pathname2url, RobotFileParser, URLError, quote, HTTPConnection
+    HTTPSConnection, StringIO, addinfourl, install_opener, build_opener
+    ProxyHandler, quote_plus, urlopen
+    (DEFAULT_HTTP_PORT, CookiePolicy, DefaultCookiePolicy,
+     FileCookieJar, LoadError, LWPCookieJar, _debug,
+     domain_match, eff_request_host, escape_path, is_HDN,
+     lwp_cookie_str, reach, request_path, request_port,
+     user_domain_match, Cookie, CookieJar, MozillaCookieJar, request_host)
diff --git a/samples-and-tests/i-am-a-developer/six.py b/samples-and-tests/i-am-a-developer/six.py
new file mode 100644
index 0000000000..4e15675d8b
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/six.py
@@ -0,0 +1,998 @@
+# Copyright (c) 2010-2020 Benjamin Peterson
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+"""Utilities for writing code that runs on Python 2 and 3"""
+
+from __future__ import absolute_import
+
+import functools
+import itertools
+import operator
+import sys
+import types
+
+__author__ = "Benjamin Peterson <benjamin@python.org>"
+__version__ = "1.16.0"
+
+
+# Useful for very coarse version differentiation.
+PY2 = sys.version_info[0] == 2
+PY3 = sys.version_info[0] == 3
+PY34 = sys.version_info[0:2] >= (3, 4)
+
+if PY3:
+    string_types = str,
+    integer_types = int,
+    class_types = type,
+    text_type = str
+    binary_type = bytes
+
+    MAXSIZE = sys.maxsize
+else:
+    string_types = basestring,
+    integer_types = (int, long)
+    class_types = (type, types.ClassType)
+    text_type = unicode
+    binary_type = str
+
+    if sys.platform.startswith("java"):
+        # Jython always uses 32 bits.
+        MAXSIZE = int((1 << 31) - 1)
+    else:
+        # It's possible to have sizeof(long) != sizeof(Py_ssize_t).
+        class X(object):
+
+            def __len__(self):
+                return 1 << 31
+        try:
+            len(X())
+        except OverflowError:
+            # 32-bit
+            MAXSIZE = int((1 << 31) - 1)
+        else:
+            # 64-bit
+            MAXSIZE = int((1 << 63) - 1)
+        del X
+
+if PY34:
+    from importlib.util import spec_from_loader
+else:
+    spec_from_loader = None
+
+
+def _add_doc(func, doc):
+    """Add documentation to a function."""
+    func.__doc__ = doc
+
+
+def _import_module(name):
+    """Import module, returning the module after the last dot."""
+    __import__(name)
+    return sys.modules[name]
+
+
+class _LazyDescr(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, tp):
+        result = self._resolve()
+        setattr(obj, self.name, result)  # Invokes __set__.
+        try:
+            # This is a bit ugly, but it avoids running this again by
+            # removing this descriptor.
+            delattr(obj.__class__, self.name)
+        except AttributeError:
+            pass
+        return result
+
+
+class MovedModule(_LazyDescr):
+
+    def __init__(self, name, old, new=None):
+        super(MovedModule, self).__init__(name)
+        if PY3:
+            if new is None:
+                new = name
+            self.mod = new
+        else:
+            self.mod = old
+
+    def _resolve(self):
+        return _import_module(self.mod)
+
+    def __getattr__(self, attr):
+        _module = self._resolve()
+        value = getattr(_module, attr)
+        setattr(self, attr, value)
+        return value
+
+
+class _LazyModule(types.ModuleType):
+
+    def __init__(self, name):
+        super(_LazyModule, self).__init__(name)
+        self.__doc__ = self.__class__.__doc__
+
+    def __dir__(self):
+        attrs = ["__doc__", "__name__"]
+        attrs += [attr.name for attr in self._moved_attributes]
+        return attrs
+
+    # Subclasses should override this
+    _moved_attributes = []
+
+
+class MovedAttribute(_LazyDescr):
+
+    def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
+        super(MovedAttribute, self).__init__(name)
+        if PY3:
+            if new_mod is None:
+                new_mod = name
+            self.mod = new_mod
+            if new_attr is None:
+                if old_attr is None:
+                    new_attr = name
+                else:
+                    new_attr = old_attr
+            self.attr = new_attr
+        else:
+            self.mod = old_mod
+            if old_attr is None:
+                old_attr = name
+            self.attr = old_attr
+
+    def _resolve(self):
+        module = _import_module(self.mod)
+        return getattr(module, self.attr)
+
+
+class _SixMetaPathImporter(object):
+
+    """
+    A meta path importer to import six.moves and its submodules.
+
+    This class implements a PEP302 finder and loader. It should be compatible
+    with Python 2.5 and all existing versions of Python3
+    """
+
+    def __init__(self, six_module_name):
+        self.name = six_module_name
+        self.known_modules = {}
+
+    def _add_module(self, mod, *fullnames):
+        for fullname in fullnames:
+            self.known_modules[self.name + "." + fullname] = mod
+
+    def _get_module(self, fullname):
+        return self.known_modules[self.name + "." + fullname]
+
+    def find_module(self, fullname, path=None):
+        if fullname in self.known_modules:
+            return self
+        return None
+
+    def find_spec(self, fullname, path, target=None):
+        if fullname in self.known_modules:
+            return spec_from_loader(fullname, self)
+        return None
+
+    def __get_module(self, fullname):
+        try:
+            return self.known_modules[fullname]
+        except KeyError:
+            raise ImportError("This loader does not know module " + fullname)
+
+    def load_module(self, fullname):
+        try:
+            # in case of a reload
+            return sys.modules[fullname]
+        except KeyError:
+            pass
+        mod = self.__get_module(fullname)
+        if isinstance(mod, MovedModule):
+            mod = mod._resolve()
+        else:
+            mod.__loader__ = self
+        sys.modules[fullname] = mod
+        return mod
+
+    def is_package(self, fullname):
+        """
+        Return true, if the named module is a package.
+
+        We need this method to get correct spec objects with
+        Python 3.4 (see PEP451)
+        """
+        return hasattr(self.__get_module(fullname), "__path__")
+
+    def get_code(self, fullname):
+        """Return None
+
+        Required, if is_package is implemented"""
+        self.__get_module(fullname)  # eventually raises ImportError
+        return None
+    get_source = get_code  # same as get_code
+
+    def create_module(self, spec):
+        return self.load_module(spec.name)
+
+    def exec_module(self, module):
+        pass
+
+_importer = _SixMetaPathImporter(__name__)
+
+
+class _MovedItems(_LazyModule):
+
+    """Lazy loading of moved objects"""
+    __path__ = []  # mark as package
+
+
+_moved_attributes = [
+    MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
+    MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
+    MovedAttribute("filterfalse", "itertools", "itertools", "ifilterfalse", "filterfalse"),
+    MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
+    MovedAttribute("intern", "__builtin__", "sys"),
+    MovedAttribute("map", "itertools", "builtins", "imap", "map"),
+    MovedAttribute("getcwd", "os", "os", "getcwdu", "getcwd"),
+    MovedAttribute("getcwdb", "os", "os", "getcwd", "getcwdb"),
+    MovedAttribute("getoutput", "commands", "subprocess"),
+    MovedAttribute("range", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("reload_module", "__builtin__", "importlib" if PY34 else "imp", "reload"),
+    MovedAttribute("reduce", "__builtin__", "functools"),
+    MovedAttribute("shlex_quote", "pipes", "shlex", "quote"),
+    MovedAttribute("StringIO", "StringIO", "io"),
+    MovedAttribute("UserDict", "UserDict", "collections"),
+    MovedAttribute("UserList", "UserList", "collections"),
+    MovedAttribute("UserString", "UserString", "collections"),
+    MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
+    MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
+    MovedAttribute("zip_longest", "itertools", "itertools", "izip_longest", "zip_longest"),
+    MovedModule("builtins", "__builtin__"),
+    MovedModule("configparser", "ConfigParser"),
+    MovedModule("collections_abc", "collections", "collections.abc" if sys.version_info >= (3, 3) else "collections"),
+    MovedModule("copyreg", "copy_reg"),
+    MovedModule("dbm_gnu", "gdbm", "dbm.gnu"),
+    MovedModule("dbm_ndbm", "dbm", "dbm.ndbm"),
+    MovedModule("_dummy_thread", "dummy_thread", "_dummy_thread" if sys.version_info < (3, 9) else "_thread"),
+    MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
+    MovedModule("http_cookies", "Cookie", "http.cookies"),
+    MovedModule("html_entities", "htmlentitydefs", "html.entities"),
+    MovedModule("html_parser", "HTMLParser", "html.parser"),
+    MovedModule("http_client", "httplib", "http.client"),
+    MovedModule("email_mime_base", "email.MIMEBase", "email.mime.base"),
+    MovedModule("email_mime_image", "email.MIMEImage", "email.mime.image"),
+    MovedModule("email_mime_multipart", "email.MIMEMultipart", "email.mime.multipart"),
+    MovedModule("email_mime_nonmultipart", "email.MIMENonMultipart", "email.mime.nonmultipart"),
+    MovedModule("email_mime_text", "email.MIMEText", "email.mime.text"),
+    MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
+    MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
+    MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
+    MovedModule("cPickle", "cPickle", "pickle"),
+    MovedModule("queue", "Queue"),
+    MovedModule("reprlib", "repr"),
+    MovedModule("socketserver", "SocketServer"),
+    MovedModule("_thread", "thread", "_thread"),
+    MovedModule("tkinter", "Tkinter"),
+    MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
+    MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
+    MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
+    MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
+    MovedModule("tkinter_ttk", "ttk", "tkinter.ttk"),
+    MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
+    MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
+    MovedModule("tkinter_colorchooser", "tkColorChooser",
+                "tkinter.colorchooser"),
+    MovedModule("tkinter_commondialog", "tkCommonDialog",
+                "tkinter.commondialog"),
+    MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
+    MovedModule("tkinter_font", "tkFont", "tkinter.font"),
+    MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
+    MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
+                "tkinter.simpledialog"),
+    MovedModule("urllib_parse", __name__ + ".moves.urllib_parse", "urllib.parse"),
+    MovedModule("urllib_error", __name__ + ".moves.urllib_error", "urllib.error"),
+    MovedModule("urllib", __name__ + ".moves.urllib", __name__ + ".moves.urllib"),
+    MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
+    MovedModule("xmlrpc_client", "xmlrpclib", "xmlrpc.client"),
+    MovedModule("xmlrpc_server", "SimpleXMLRPCServer", "xmlrpc.server"),
+]
+# Add windows specific modules.
+if sys.platform == "win32":
+    _moved_attributes += [
+        MovedModule("winreg", "_winreg"),
+    ]
+
+for attr in _moved_attributes:
+    setattr(_MovedItems, attr.name, attr)
+    if isinstance(attr, MovedModule):
+        _importer._add_module(attr, "moves." + attr.name)
+del attr
+
+_MovedItems._moved_attributes = _moved_attributes
+
+moves = _MovedItems(__name__ + ".moves")
+_importer._add_module(moves, "moves")
+
+
+class Module_six_moves_urllib_parse(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_parse"""
+
+
+_urllib_parse_moved_attributes = [
+    MovedAttribute("ParseResult", "urlparse", "urllib.parse"),
+    MovedAttribute("SplitResult", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qs", "urlparse", "urllib.parse"),
+    MovedAttribute("parse_qsl", "urlparse", "urllib.parse"),
+    MovedAttribute("urldefrag", "urlparse", "urllib.parse"),
+    MovedAttribute("urljoin", "urlparse", "urllib.parse"),
+    MovedAttribute("urlparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunparse", "urlparse", "urllib.parse"),
+    MovedAttribute("urlunsplit", "urlparse", "urllib.parse"),
+    MovedAttribute("quote", "urllib", "urllib.parse"),
+    MovedAttribute("quote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_plus", "urllib", "urllib.parse"),
+    MovedAttribute("unquote_to_bytes", "urllib", "urllib.parse", "unquote", "unquote_to_bytes"),
+    MovedAttribute("urlencode", "urllib", "urllib.parse"),
+    MovedAttribute("splitquery", "urllib", "urllib.parse"),
+    MovedAttribute("splittag", "urllib", "urllib.parse"),
+    MovedAttribute("splituser", "urllib", "urllib.parse"),
+    MovedAttribute("splitvalue", "urllib", "urllib.parse"),
+    MovedAttribute("uses_fragment", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_netloc", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_params", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_query", "urlparse", "urllib.parse"),
+    MovedAttribute("uses_relative", "urlparse", "urllib.parse"),
+]
+for attr in _urllib_parse_moved_attributes:
+    setattr(Module_six_moves_urllib_parse, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_parse._moved_attributes = _urllib_parse_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_parse(__name__ + ".moves.urllib_parse"),
+                      "moves.urllib_parse", "moves.urllib.parse")
+
+
+class Module_six_moves_urllib_error(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_error"""
+
+
+_urllib_error_moved_attributes = [
+    MovedAttribute("URLError", "urllib2", "urllib.error"),
+    MovedAttribute("HTTPError", "urllib2", "urllib.error"),
+    MovedAttribute("ContentTooShortError", "urllib", "urllib.error"),
+]
+for attr in _urllib_error_moved_attributes:
+    setattr(Module_six_moves_urllib_error, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_error._moved_attributes = _urllib_error_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_error(__name__ + ".moves.urllib.error"),
+                      "moves.urllib_error", "moves.urllib.error")
+
+
+class Module_six_moves_urllib_request(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_request"""
+
+
+_urllib_request_moved_attributes = [
+    MovedAttribute("urlopen", "urllib2", "urllib.request"),
+    MovedAttribute("install_opener", "urllib2", "urllib.request"),
+    MovedAttribute("build_opener", "urllib2", "urllib.request"),
+    MovedAttribute("pathname2url", "urllib", "urllib.request"),
+    MovedAttribute("url2pathname", "urllib", "urllib.request"),
+    MovedAttribute("getproxies", "urllib", "urllib.request"),
+    MovedAttribute("Request", "urllib2", "urllib.request"),
+    MovedAttribute("OpenerDirector", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDefaultErrorHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPRedirectHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPCookieProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyHandler", "urllib2", "urllib.request"),
+    MovedAttribute("BaseHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgr", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPPasswordMgrWithDefaultRealm", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyBasicAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("AbstractDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("ProxyDigestAuthHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPSHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FileHandler", "urllib2", "urllib.request"),
+    MovedAttribute("FTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("CacheFTPHandler", "urllib2", "urllib.request"),
+    MovedAttribute("UnknownHandler", "urllib2", "urllib.request"),
+    MovedAttribute("HTTPErrorProcessor", "urllib2", "urllib.request"),
+    MovedAttribute("urlretrieve", "urllib", "urllib.request"),
+    MovedAttribute("urlcleanup", "urllib", "urllib.request"),
+    MovedAttribute("URLopener", "urllib", "urllib.request"),
+    MovedAttribute("FancyURLopener", "urllib", "urllib.request"),
+    MovedAttribute("proxy_bypass", "urllib", "urllib.request"),
+    MovedAttribute("parse_http_list", "urllib2", "urllib.request"),
+    MovedAttribute("parse_keqv_list", "urllib2", "urllib.request"),
+]
+for attr in _urllib_request_moved_attributes:
+    setattr(Module_six_moves_urllib_request, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_request._moved_attributes = _urllib_request_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_request(__name__ + ".moves.urllib.request"),
+                      "moves.urllib_request", "moves.urllib.request")
+
+
+class Module_six_moves_urllib_response(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_response"""
+
+
+_urllib_response_moved_attributes = [
+    MovedAttribute("addbase", "urllib", "urllib.response"),
+    MovedAttribute("addclosehook", "urllib", "urllib.response"),
+    MovedAttribute("addinfo", "urllib", "urllib.response"),
+    MovedAttribute("addinfourl", "urllib", "urllib.response"),
+]
+for attr in _urllib_response_moved_attributes:
+    setattr(Module_six_moves_urllib_response, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_response._moved_attributes = _urllib_response_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_response(__name__ + ".moves.urllib.response"),
+                      "moves.urllib_response", "moves.urllib.response")
+
+
+class Module_six_moves_urllib_robotparser(_LazyModule):
+
+    """Lazy loading of moved objects in six.moves.urllib_robotparser"""
+
+
+_urllib_robotparser_moved_attributes = [
+    MovedAttribute("RobotFileParser", "robotparser", "urllib.robotparser"),
+]
+for attr in _urllib_robotparser_moved_attributes:
+    setattr(Module_six_moves_urllib_robotparser, attr.name, attr)
+del attr
+
+Module_six_moves_urllib_robotparser._moved_attributes = _urllib_robotparser_moved_attributes
+
+_importer._add_module(Module_six_moves_urllib_robotparser(__name__ + ".moves.urllib.robotparser"),
+                      "moves.urllib_robotparser", "moves.urllib.robotparser")
+
+
+class Module_six_moves_urllib(types.ModuleType):
+
+    """Create a six.moves.urllib namespace that resembles the Python 3 namespace"""
+    __path__ = []  # mark as package
+    parse = _importer._get_module("moves.urllib_parse")
+    error = _importer._get_module("moves.urllib_error")
+    request = _importer._get_module("moves.urllib_request")
+    response = _importer._get_module("moves.urllib_response")
+    robotparser = _importer._get_module("moves.urllib_robotparser")
+
+    def __dir__(self):
+        return ['parse', 'error', 'request', 'response', 'robotparser']
+
+_importer._add_module(Module_six_moves_urllib(__name__ + ".moves.urllib"),
+                      "moves.urllib")
+
+
+def add_move(move):
+    """Add an item to six.moves."""
+    setattr(_MovedItems, move.name, move)
+
+
+def remove_move(name):
+    """Remove item from six.moves."""
+    try:
+        delattr(_MovedItems, name)
+    except AttributeError:
+        try:
+            del moves.__dict__[name]
+        except KeyError:
+            raise AttributeError("no such move, %r" % (name,))
+
+
+if PY3:
+    _meth_func = "__func__"
+    _meth_self = "__self__"
+
+    _func_closure = "__closure__"
+    _func_code = "__code__"
+    _func_defaults = "__defaults__"
+    _func_globals = "__globals__"
+else:
+    _meth_func = "im_func"
+    _meth_self = "im_self"
+
+    _func_closure = "func_closure"
+    _func_code = "func_code"
+    _func_defaults = "func_defaults"
+    _func_globals = "func_globals"
+
+
+try:
+    advance_iterator = next
+except NameError:
+    def advance_iterator(it):
+        return it.next()
+next = advance_iterator
+
+
+try:
+    callable = callable
+except NameError:
+    def callable(obj):
+        return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
+
+
+if PY3:
+    def get_unbound_function(unbound):
+        return unbound
+
+    create_bound_method = types.MethodType
+
+    def create_unbound_method(func, cls):
+        return func
+
+    Iterator = object
+else:
+    def get_unbound_function(unbound):
+        return unbound.im_func
+
+    def create_bound_method(func, obj):
+        return types.MethodType(func, obj, obj.__class__)
+
+    def create_unbound_method(func, cls):
+        return types.MethodType(func, None, cls)
+
+    class Iterator(object):
+
+        def next(self):
+            return type(self).__next__(self)
+
+    callable = callable
+_add_doc(get_unbound_function,
+         """Get the function out of a possibly unbound function""")
+
+
+get_method_function = operator.attrgetter(_meth_func)
+get_method_self = operator.attrgetter(_meth_self)
+get_function_closure = operator.attrgetter(_func_closure)
+get_function_code = operator.attrgetter(_func_code)
+get_function_defaults = operator.attrgetter(_func_defaults)
+get_function_globals = operator.attrgetter(_func_globals)
+
+
+if PY3:
+    def iterkeys(d, **kw):
+        return iter(d.keys(**kw))
+
+    def itervalues(d, **kw):
+        return iter(d.values(**kw))
+
+    def iteritems(d, **kw):
+        return iter(d.items(**kw))
+
+    def iterlists(d, **kw):
+        return iter(d.lists(**kw))
+
+    viewkeys = operator.methodcaller("keys")
+
+    viewvalues = operator.methodcaller("values")
+
+    viewitems = operator.methodcaller("items")
+else:
+    def iterkeys(d, **kw):
+        return d.iterkeys(**kw)
+
+    def itervalues(d, **kw):
+        return d.itervalues(**kw)
+
+    def iteritems(d, **kw):
+        return d.iteritems(**kw)
+
+    def iterlists(d, **kw):
+        return d.iterlists(**kw)
+
+    viewkeys = operator.methodcaller("viewkeys")
+
+    viewvalues = operator.methodcaller("viewvalues")
+
+    viewitems = operator.methodcaller("viewitems")
+
+_add_doc(iterkeys, "Return an iterator over the keys of a dictionary.")
+_add_doc(itervalues, "Return an iterator over the values of a dictionary.")
+_add_doc(iteritems,
+         "Return an iterator over the (key, value) pairs of a dictionary.")
+_add_doc(iterlists,
+         "Return an iterator over the (key, [values]) pairs of a dictionary.")
+
+
+if PY3:
+    def b(s):
+        return s.encode("latin-1")
+
+    def u(s):
+        return s
+    unichr = chr
+    import struct
+    int2byte = struct.Struct(">B").pack
+    del struct
+    byte2int = operator.itemgetter(0)
+    indexbytes = operator.getitem
+    iterbytes = iter
+    import io
+    StringIO = io.StringIO
+    BytesIO = io.BytesIO
+    del io
+    _assertCountEqual = "assertCountEqual"
+    if sys.version_info[1] <= 1:
+        _assertRaisesRegex = "assertRaisesRegexp"
+        _assertRegex = "assertRegexpMatches"
+        _assertNotRegex = "assertNotRegexpMatches"
+    else:
+        _assertRaisesRegex = "assertRaisesRegex"
+        _assertRegex = "assertRegex"
+        _assertNotRegex = "assertNotRegex"
+else:
+    def b(s):
+        return s
+    # Workaround for standalone backslash
+
+    def u(s):
+        return unicode(s.replace(r'\\', r'\\\\'), "unicode_escape")
+    unichr = unichr
+    int2byte = chr
+
+    def byte2int(bs):
+        return ord(bs[0])
+
+    def indexbytes(buf, i):
+        return ord(buf[i])
+    iterbytes = functools.partial(itertools.imap, ord)
+    import StringIO
+    StringIO = BytesIO = StringIO.StringIO
+    _assertCountEqual = "assertItemsEqual"
+    _assertRaisesRegex = "assertRaisesRegexp"
+    _assertRegex = "assertRegexpMatches"
+    _assertNotRegex = "assertNotRegexpMatches"
+_add_doc(b, """Byte literal""")
+_add_doc(u, """Text literal""")
+
+
+def assertCountEqual(self, *args, **kwargs):
+    return getattr(self, _assertCountEqual)(*args, **kwargs)
+
+
+def assertRaisesRegex(self, *args, **kwargs):
+    return getattr(self, _assertRaisesRegex)(*args, **kwargs)
+
+
+def assertRegex(self, *args, **kwargs):
+    return getattr(self, _assertRegex)(*args, **kwargs)
+
+
+def assertNotRegex(self, *args, **kwargs):
+    return getattr(self, _assertNotRegex)(*args, **kwargs)
+
+
+if PY3:
+    exec_ = getattr(moves.builtins, "exec")
+
+    def reraise(tp, value, tb=None):
+        try:
+            if value is None:
+                value = tp()
+            if value.__traceback__ is not tb:
+                raise value.with_traceback(tb)
+            raise value
+        finally:
+            value = None
+            tb = None
+
+else:
+    def exec_(_code_, _globs_=None, _locs_=None):
+        """Execute code in a namespace."""
+        if _globs_ is None:
+            frame = sys._getframe(1)
+            _globs_ = frame.f_globals
+            if _locs_ is None:
+                _locs_ = frame.f_locals
+            del frame
+        elif _locs_ is None:
+            _locs_ = _globs_
+        exec("""exec _code_ in _globs_, _locs_""")
+
+    exec_("""def reraise(tp, value, tb=None):
+    try:
+        raise tp, value, tb
+    finally:
+        tb = None
+""")
+
+
+if sys.version_info[:2] > (3,):
+    exec_("""def raise_from(value, from_value):
+    try:
+        raise value from from_value
+    finally:
+        value = None
+""")
+else:
+    def raise_from(value, from_value):
+        raise value
+
+
+print_ = getattr(moves.builtins, "print", None)
+if print_ is None:
+    def print_(*args, **kwargs):
+        """The new-style print function for Python 2.4 and 2.5."""
+        fp = kwargs.pop("file", sys.stdout)
+        if fp is None:
+            return
+
+        def write(data):
+            if not isinstance(data, basestring):
+                data = str(data)
+            # If the file has an encoding, encode unicode with it.
+            if (isinstance(fp, file) and
+                    isinstance(data, unicode) and
+                    fp.encoding is not None):
+                errors = getattr(fp, "errors", None)
+                if errors is None:
+                    errors = "strict"
+                data = data.encode(fp.encoding, errors)
+            fp.write(data)
+        want_unicode = False
+        sep = kwargs.pop("sep", None)
+        if sep is not None:
+            if isinstance(sep, unicode):
+                want_unicode = True
+            elif not isinstance(sep, str):
+                raise TypeError("sep must be None or a string")
+        end = kwargs.pop("end", None)
+        if end is not None:
+            if isinstance(end, unicode):
+                want_unicode = True
+            elif not isinstance(end, str):
+                raise TypeError("end must be None or a string")
+        if kwargs:
+            raise TypeError("invalid keyword arguments to print()")
+        if not want_unicode:
+            for arg in args:
+                if isinstance(arg, unicode):
+                    want_unicode = True
+                    break
+        if want_unicode:
+            newline = unicode("\n")
+            space = unicode(" ")
+        else:
+            newline = "\n"
+            space = " "
+        if sep is None:
+            sep = space
+        if end is None:
+            end = newline
+        for i, arg in enumerate(args):
+            if i:
+                write(sep)
+            write(arg)
+        write(end)
+if sys.version_info[:2] < (3, 3):
+    _print = print_
+
+    def print_(*args, **kwargs):
+        fp = kwargs.get("file", sys.stdout)
+        flush = kwargs.pop("flush", False)
+        _print(*args, **kwargs)
+        if flush and fp is not None:
+            fp.flush()
+
+_add_doc(reraise, """Reraise an exception.""")
+
+if sys.version_info[0:2] < (3, 4):
+    # This does exactly the same what the :func:`py3:functools.update_wrapper`
+    # function does on Python versions after 3.2. It sets the ``__wrapped__``
+    # attribute on ``wrapper`` object and it doesn't raise an error if any of
+    # the attributes mentioned in ``assigned`` and ``updated`` are missing on
+    # ``wrapped`` object.
+    def _update_wrapper(wrapper, wrapped,
+                        assigned=functools.WRAPPER_ASSIGNMENTS,
+                        updated=functools.WRAPPER_UPDATES):
+        for attr in assigned:
+            try:
+                value = getattr(wrapped, attr)
+            except AttributeError:
+                continue
+            else:
+                setattr(wrapper, attr, value)
+        for attr in updated:
+            getattr(wrapper, attr).update(getattr(wrapped, attr, {}))
+        wrapper.__wrapped__ = wrapped
+        return wrapper
+    _update_wrapper.__doc__ = functools.update_wrapper.__doc__
+
+    def wraps(wrapped, assigned=functools.WRAPPER_ASSIGNMENTS,
+              updated=functools.WRAPPER_UPDATES):
+        return functools.partial(_update_wrapper, wrapped=wrapped,
+                                 assigned=assigned, updated=updated)
+    wraps.__doc__ = functools.wraps.__doc__
+
+else:
+    wraps = functools.wraps
+
+
+def with_metaclass(meta, *bases):
+    """Create a base class with a metaclass."""
+    # This requires a bit of explanation: the basic idea is to make a dummy
+    # metaclass for one level of class instantiation that replaces itself with
+    # the actual metaclass.
+    class metaclass(type):
+
+        def __new__(cls, name, this_bases, d):
+            if sys.version_info[:2] >= (3, 7):
+                # This version introduced PEP 560 that requires a bit
+                # of extra care (we mimic what is done by __build_class__).
+                resolved_bases = types.resolve_bases(bases)
+                if resolved_bases is not bases:
+                    d['__orig_bases__'] = bases
+            else:
+                resolved_bases = bases
+            return meta(name, resolved_bases, d)
+
+        @classmethod
+        def __prepare__(cls, name, this_bases):
+            return meta.__prepare__(name, bases)
+    return type.__new__(metaclass, 'temporary_class', (), {})
+
+
+def add_metaclass(metaclass):
+    """Class decorator for creating a class with a metaclass."""
+    def wrapper(cls):
+        orig_vars = cls.__dict__.copy()
+        slots = orig_vars.get('__slots__')
+        if slots is not None:
+            if isinstance(slots, str):
+                slots = [slots]
+            for slots_var in slots:
+                orig_vars.pop(slots_var)
+        orig_vars.pop('__dict__', None)
+        orig_vars.pop('__weakref__', None)
+        if hasattr(cls, '__qualname__'):
+            orig_vars['__qualname__'] = cls.__qualname__
+        return metaclass(cls.__name__, cls.__bases__, orig_vars)
+    return wrapper
+
+
+def ensure_binary(s, encoding='utf-8', errors='strict'):
+    """Coerce **s** to six.binary_type.
+
+    For Python 2:
+      - `unicode` -> encoded to `str`
+      - `str` -> `str`
+
+    For Python 3:
+      - `str` -> encoded to `bytes`
+      - `bytes` -> `bytes`
+    """
+    if isinstance(s, binary_type):
+        return s
+    if isinstance(s, text_type):
+        return s.encode(encoding, errors)
+    raise TypeError("not expecting type '%s'" % type(s))
+
+
+def ensure_str(s, encoding='utf-8', errors='strict'):
+    """Coerce *s* to `str`.
+
+    For Python 2:
+      - `unicode` -> encoded to `str`
+      - `str` -> `str`
+
+    For Python 3:
+      - `str` -> `str`
+      - `bytes` -> decoded to `str`
+    """
+    # Optimization: Fast return for the common case.
+    if type(s) is str:
+        return s
+    if PY2 and isinstance(s, text_type):
+        return s.encode(encoding, errors)
+    elif PY3 and isinstance(s, binary_type):
+        return s.decode(encoding, errors)
+    elif not isinstance(s, (text_type, binary_type)):
+        raise TypeError("not expecting type '%s'" % type(s))
+    return s
+
+
+def ensure_text(s, encoding='utf-8', errors='strict'):
+    """Coerce *s* to six.text_type.
+
+    For Python 2:
+      - `unicode` -> `unicode`
+      - `str` -> `unicode`
+
+    For Python 3:
+      - `str` -> `str`
+      - `bytes` -> decoded to `str`
+    """
+    if isinstance(s, binary_type):
+        return s.decode(encoding, errors)
+    elif isinstance(s, text_type):
+        return s
+    else:
+        raise TypeError("not expecting type '%s'" % type(s))
+
+
+def python_2_unicode_compatible(klass):
+    """
+    A class decorator that defines __unicode__ and __str__ methods under Python 2.
+    Under Python 3 it does nothing.
+
+    To support Python 2 and 3 with a single code base, define a __str__ method
+    returning text and apply this decorator to the class.
+    """
+    if PY2:
+        if '__str__' not in klass.__dict__:
+            raise ValueError("@python_2_unicode_compatible cannot be applied "
+                             "to %s because it doesn't define __str__()." %
+                             klass.__name__)
+        klass.__unicode__ = klass.__str__
+        klass.__str__ = lambda self: self.__unicode__().encode('utf-8')
+    return klass
+
+
+# Complete the moves implementation.
+# This code is at the end of this module to speed up module loading.
+# Turn this module into a package.
+__path__ = []  # required for PEP 302 and PEP 451
+__package__ = __name__  # see PEP 366 @ReservedAssignment
+if globals().get("__spec__") is not None:
+    __spec__.submodule_search_locations = []  # PEP 451 @UndefinedVariable
+# Remove other six meta path importers, since they cause problems. This can
+# happen if six is removed from sys.modules and then reloaded. (Setuptools does
+# this for some reason.)
+if sys.meta_path:
+    for i, importer in enumerate(sys.meta_path):
+        # Here's some real nastiness: Another "instance" of the six module might
+        # be floating around. Therefore, we can't use isinstance() to check for
+        # the six meta path importer, since the other six instance will have
+        # inserted an importer with different class.
+        if (type(importer).__name__ == "_SixMetaPathImporter" and
+                importer.name == __name__):
+            del sys.meta_path[i]
+            break
+    del i, importer
+# Finally, add the importer to the meta path import hook.
+sys.meta_path.append(_importer)
diff --git a/samples-and-tests/i-am-a-developer/tests.py b/samples-and-tests/i-am-a-developer/tests.py
index c435950756..45a6dc421a 100755
--- a/samples-and-tests/i-am-a-developer/tests.py
+++ b/samples-and-tests/i-am-a-developer/tests.py
@@ -1,5 +1,6 @@
 #!/usr/bin/python
 
+from __future__ import print_function
 import os
 import shutil
 import ssl
@@ -8,8 +9,9 @@
 import threading
 import time
 import unittest
-import urllib2
+import urllib
 
+from signal import SIGTERM
 import mechanize
 
 
@@ -28,9 +30,9 @@ def testSSLConfig(self):
         step('Create a new project')
 
         self.play = callPlay(self, ['new', '%s/sslconfigapp' % self.working_directory, '--name=SSLCONFIGAPP'])
-        self.assert_(waitFor(self.play, 'The new application will be created'))
-        self.assert_(waitFor(self.play, 'OK, the application is created'))
-        self.assert_(waitFor(self.play, 'Have fun!'))
+        self.assertTrue(waitFor(self.play, 'The new application will be created'))
+        self.assertTrue(waitFor(self.play, 'OK, the application is created'))
+        self.assertTrue(waitFor(self.play, 'Have fun!'))
 
         self.play.wait()
 
@@ -133,7 +135,7 @@ def testSSLConfig(self):
 
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTPS on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTPS on port 9000'))
 
         step("Send request to https")
 
@@ -141,10 +143,10 @@ def testSSLConfig(self):
         response = browser.open('https://localhost:9000/')
 
         step("check that ssl message is logged")
-        self.assert_(waitFor(self.play, 'I am ssl secured!'))
+        self.assertTrue(waitFor(self.play, 'I am ssl secured!'))
 
         step("stop play")
-        killPlay('https')
+        killPlay(self.play, 'https')
         self.play.wait()
 
         #now we're going to manually configure log4j to log debug messages
@@ -158,7 +160,7 @@ def testSSLConfig(self):
 
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTPS on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTPS on port 9000'))
 
         step("Send request to https")
 
@@ -166,13 +168,9 @@ def testSSLConfig(self):
         response = browser.open('https://localhost:9000/')
 
         step("check that ssl message is logged")
-        self.assert_(waitFor(self.play, 'I am ssl secured!'))
+        self.assertTrue(waitFor(self.play, 'I am ssl secured!'))
 
-        step("stop play")
-        killPlay('https')
-        self.play.wait()
-
-        step("done testing ssl config")
+        step("done testing ssl config")        
 
     def testLogLevelsAndLog4jConfig(self):
 
@@ -183,11 +181,10 @@ def testLogLevelsAndLog4jConfig(self):
     
         # play new job-app
         step('Create a new project')
-    
         self.play = callPlay(self, ['new', '%s/loglevelsapp' % self.working_directory, '--name=LOGLEVELSAPP'])
-        self.assert_(waitFor(self.play, 'The new application will be created'))
-        self.assert_(waitFor(self.play, 'OK, the application is created'))
-        self.assert_(waitFor(self.play, 'Have fun!'))
+        self.assertTrue(waitFor(self.play, 'The new application will be created'))
+        self.assertTrue(waitFor(self.play, 'OK, the application is created'))
+        self.assertTrue(waitFor(self.play, 'Have fun!'))
         
         self.play.wait()
     
@@ -200,9 +197,10 @@ def testLogLevelsAndLog4jConfig(self):
         # Run the newly created application
         step('Run our logger-application')
     
+        killPlay(self.play)
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTP on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTP on port 9000'))
     
         step("Send request to trigger some logging")
 
@@ -211,11 +209,10 @@ def testLogLevelsAndLog4jConfig(self):
 
     
         step("check that only info log message is logged")
-        self.assert_(waitForWithFail(self.play, 'I am an info message', 'I am a debug message'))
+        self.assertTrue(waitForWithFail(self.play, 'I am an info message', 'I am a debug message'))
 
         step("stop play")
-        killPlay()
-        self.play.wait()
+        killPlay(self.play)
 
         #now we're going to manually configure log4j to log debug messages
         step('Writing log4j config file')
@@ -241,7 +238,7 @@ def testLogLevelsAndLog4jConfig(self):
     
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTP on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTP on port 9000'))
     
         step("Send request to trigger some logging")
 
@@ -250,13 +247,9 @@ def testLogLevelsAndLog4jConfig(self):
 
     
         step("check that both debug and info message is logged")
-        self.assert_(waitFor(self.play, 'I am a debug message'))        
-        self.assert_(waitFor(self.play, 'I am an info message'))
+        self.assertTrue(waitFor(self.play, 'I am a debug message'))        
+        self.assertTrue(waitFor(self.play, 'I am an info message'))
 
-        step("stop play")
-        killPlay()
-        self.play.wait()
-    
         step("done testing logging")
 
 
@@ -271,11 +264,10 @@ def testCreateAndRunForJobProject(self):
         step('Create a new project')
     
         self.play = callPlay(self, ['new', '%s/jobapp' % self.working_directory, '--name=JOBAPP'])
-        self.assert_(waitFor(self.play, 'The new application will be created'))
-        self.assert_(waitFor(self.play, 'OK, the application is created'))
-        self.assert_(waitFor(self.play, 'Have fun!'))
-        self.play.wait()
-    
+        self.assertTrue(waitFor(self.play, 'The new application will be created'))
+        self.assertTrue(waitFor(self.play, 'OK, the application is created'))
+        self.assertTrue(waitFor(self.play, 'Have fun!'))
+        self.play.wait()    
         app = '%s/jobapp' % self.working_directory
             
         #create our first job - which is executed sync on startup with @OnApplicationStart
@@ -303,7 +295,7 @@ def testCreateAndRunForJobProject(self):
     
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTP on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTP on port 9000'))
     
         step("Send request to start app")
 
@@ -312,13 +304,12 @@ def testCreateAndRunForJobProject(self):
 
     
         step("check that job completed before processing request")
-        self.assert_(waitFor(self.play, 'Job done'))
-        self.assert_(waitFor(self.play, 'Processing request'))
+        self.assertTrue(waitFor(self.play, 'Job done'))
+        self.assertTrue(waitFor(self.play, 'Processing request'))
 
         step("stop play")
-        killPlay()
-        self.play.wait()
-            
+        killPlay(self.play)
+        self.play.wait()            
         #now we change the job to be async
         step("Change job to async")
     
@@ -329,7 +320,7 @@ def testCreateAndRunForJobProject(self):
     
         self.play = callPlay(self, ['run', app])
         #wait for play to be ready
-        self.assert_(waitFor(self.play, 'Listening for HTTP on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTP on port 9000'))
     
         step("Send request to start app")
 
@@ -338,14 +329,8 @@ def testCreateAndRunForJobProject(self):
 
     
         step("check that the request is processed before the job finishes")
-        self.assert_(waitFor(self.play, 'Processing request'))
-        self.assert_(waitFor(self.play, 'Job done'))
-
-        step("stop play")
-        killPlay()
-        self.play.wait()
-    
-        step("done testing jobapp")
+        self.assertTrue(waitFor(self.play, 'Processing request'))
+        self.assertTrue(waitFor(self.play, 'Job done'))
     
 
     def testSimpleProjectCreation(self):
@@ -359,26 +344,26 @@ def testSimpleProjectCreation(self):
         step('Create a new project')
         
         self.play = callPlay(self, ['new', '%s/yop' % self.working_directory, '--name=YOP'])
-        self.assert_(waitFor(self.play, 'The new application will be created'))
-        self.assert_(waitFor(self.play, 'OK, the application is created'))
-        self.assert_(waitFor(self.play, 'Have fun!'))
+        self.assertTrue(waitFor(self.play, 'The new application will be created'))
+        self.assertTrue(waitFor(self.play, 'OK, the application is created'))
+        self.assertTrue(waitFor(self.play, 'Have fun!'))
         self.play.wait()
         
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/controllers')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/controllers/Application.java')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/models')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/Application')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/Application/index.html')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/main.html')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/errors/404.html')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/errors/500.html')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/conf')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/conf/routes')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/conf/messages')))
-        self.assert_(os.path.exists(os.path.join(self.working_directory, 'yop/conf/application.conf')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/controllers')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/controllers/Application.java')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/models')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/Application')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/Application/index.html')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/main.html')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/errors/404.html')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/app/views/errors/500.html')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/conf')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/conf/routes')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/conf/messages')))
+        self.assertTrue(os.path.exists(os.path.join(self.working_directory, 'yop/conf/application.conf')))
 
         app = '%s/yop' % self.working_directory
 
@@ -386,7 +371,7 @@ def testSimpleProjectCreation(self):
         step('Run the newly created application')
         
         self.play = callPlay(self, ['run', app])
-        self.assert_(waitFor(self.play, 'Listening for HTTP on port 9000'))
+        self.assertTrue(waitFor(self.play, 'Listening for HTTP on port 9000'))
         
         # Start a browser
         step('Start a browser')
@@ -397,39 +382,37 @@ def testSimpleProjectCreation(self):
         step('Open the home page')
         
         response = browser.open('http://localhost:9000/')
-        self.assert_(waitFor(self.play, "Application 'YOP' is now started !"))
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Your application is ready !')
+        self.assertTrue(waitFor(self.play, "Application 'YOP' is now started !"))
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Your application is ready !')
         
         html = response.get_data()
-        self.assert_(html.count('Your application is ready !'))
+        self.assertTrue(html.count(b'Your application is ready !'))
         
         # Open the documentation
         step('Open the documentation')
     
         browser.addheaders = [("Accept-Language", "en")]
         response = browser.open('http://localhost:9000/@documentation')
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Play manual - Documentation')
-        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Play manual - Documentation')
         html = response.get_data()
-        self.assert_(html.count('Getting started'))
+        self.assertTrue(html.count(b'Getting started'))
         
         # Go back to home
         step('Go back to home')
         
         response = browser.back()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Your application is ready !')
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Your application is ready !')
         
         # Refresh
-        step('Refresh home')
-        
+        step('Refresh home')        
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Your application is ready !')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Your application is ready !')        
         html = response.get_data()
-        self.assert_(html.count('Your application is ready !'))
+        self.assertTrue(html.count(b'Your application is ready !'))
         
         # Make a mistake in Application.java and refresh
         step('Make a mistake in Application.java')
@@ -438,18 +421,18 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Compilation error'))
-            self.assert_(html.count('insert ";" to complete BlockStatements'))
-            self.assert_(html.count('In /app/controllers/Application.java (around line 13)'))
-            self.assert_(html.count('       render()'))            
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Compilation error (In /app/controllers/Application.java around line 13)'))
-            self.assert_(waitFor(self.play, 'Syntax error, insert ";" to complete BlockStatements'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+
+        except urllib.error.URLError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Compilation error'))
+            self.assertTrue(html.count(b'insert ";" to complete BlockStatements'))
+            self.assertTrue(html.count(b'In /app/controllers/Application.java (around line 13)'))
+            self.assertTrue(html.count(b'       render()'))            
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Compilation error (In /app/controllers/Application.java around line 13)'))
+            self.assertTrue(waitFor(self.play, 'Syntax error, insert ";" to complete BlockStatements'))
 
         # Refresh again
         step('Refresh again')
@@ -457,37 +440,37 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Compilation error'))
-            self.assert_(html.count('insert ";" to complete BlockStatements'))
-            self.assert_(html.count('In /app/controllers/Application.java (around line 13)'))
-            self.assert_(html.count('       render()'))            
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Compilation error (In /app/controllers/Application.java around line 13)'))
-            self.assert_(waitFor(self.play, 'Syntax error, insert ";" to complete BlockStatements'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Compilation error'))
+            self.assertTrue(html.count(b'insert ";" to complete BlockStatements'))
+            self.assertTrue(html.count(b'In /app/controllers/Application.java (around line 13)'))
+            self.assertTrue(html.count(b'       render()'))            
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Compilation error (In /app/controllers/Application.java around line 13)'))
+            self.assertTrue(waitFor(self.play, 'Syntax error, insert ";" to complete BlockStatements'))
         
         # Correct the error
         step('Correct the error')
         
         edit(app, 'app/controllers/Application.java', 13, '        render();')
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Your application is ready !')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Your application is ready !')        
         html = response.get_data()
-        self.assert_(html.count('Your application is ready !'))
+        self.assertTrue(html.count(b'Your application is ready !'))
 
         # Refresh again
         step('Refresh again')
         
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Your application is ready !')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Your application is ready !')        
         html = response.get_data()
-        self.assert_(html.count('Your application is ready !'))
+        self.assertTrue(html.count(b'Your application is ready !'))
         
         # Let's code hello world
         step('Let\'s code hello world')
@@ -498,16 +481,16 @@ def testSimpleProjectCreation(self):
         edit(app, 'app/views/Application/index.html', 2, "#{set title:'Hello world app' /}")
         edit(app, 'app/views/Application/index.html', 4, "Hello ${name} !!")
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Hello world app')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Hello world app')        
         html = response.get_data()
-        self.assert_(html.count('Hello  !!'))
+        self.assertTrue(html.count(b'Hello  !!'))
         
         response = browser.open('http://localhost:9000/?name=Guillaume')
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Hello world app')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Hello world app')        
         html = response.get_data()
-        self.assert_(html.count('Hello Guillaume !!'))
+        self.assertTrue(html.count(b'Hello Guillaume !!'))
         
         # Make a mistake in the template
         step('Make a mistake in the template')
@@ -517,15 +500,16 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines()) 
-            self.assert_(html.count('Template compilation error'))
-            self.assert_(html.count('The template <strong>/app/views/Application/index.html</strong> does not compile : <strong>Unexpected input: \'{\' </strong>'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Template compilation error (In /app/views/Application/index.html around line 0)'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines()) 
+            self.assertTrue(html.count(b'Template compilation error'))
+            self.assertTrue(html.count(b'The template <strong>/app/views/Application/index.html</strong> does not compile : <strong>Unexpected input: \'{\' </strong>'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Template compilation error (In /app/views/Application/index.html around line 0)'))
+
         
         # Refresh again
         step('Refresh again')
@@ -533,15 +517,16 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines()) 
-            self.assert_(html.count('Template compilation error'))
-            self.assert_(html.count('The template <strong>/app/views/Application/index.html</strong> does not compile : <strong>Unexpected input: \'{\' </strong>'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Template compilation error (In /app/views/Application/index.html around line 0)'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines()) 
+            self.assertTrue(html.count(b'Template compilation error'))
+            self.assertTrue(html.count(b'The template <strong>/app/views/Application/index.html</strong> does not compile : <strong>Unexpected input: \'{\' </strong>'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Template compilation error (In /app/views/Application/index.html around line 0)'))
+
             
         # Try a template runtime exception  
         step('Try a template runtime exception ')  
@@ -551,19 +536,19 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines()) 
-            self.assert_(html.count('Template execution error '))
-            self.assert_(html.count('In /app/views/Application/index.html (around line 4)'))
-            self.assert_(html.count('Cannot get property \'name\' on null object'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Template execution error (In /app/views/Application/index.html around line 4)'))
-            self.assert_(waitFor(self.play, 'Execution error occurred in template /app/views/Application/index.html.'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
-            self.assert_(waitFor(self.play, 'at /app/views/Application/index.html.(line:4)'))
-            self.assert_(waitFor(self.play, '...'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines()) 
+            self.assertTrue(html.count(b'Template execution error '))
+            self.assertTrue(html.count(b'In /app/views/Application/index.html (around line 4)'))
+            self.assertTrue(html.count(b'Cannot get property \'name\' on null object'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Template execution error (In /app/views/Application/index.html around line 4)'))
+            self.assertTrue(waitFor(self.play, 'Execution error occurred in template /app/views/Application/index.html.'))
+            self.assertTrue(waitFor(self.play, 'at /app/views/Application/index.html.(line:4)'))
+            self.assertTrue(waitFor(self.play, '...'))
 
         # Refresh again
         step('Refresh again')
@@ -571,19 +556,19 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines()) 
-            self.assert_(html.count('Template execution error '))
-            self.assert_(html.count('In /app/views/Application/index.html (around line 4)'))
-            self.assert_(html.count('Cannot get property \'name\' on null object'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Template execution error (In /app/views/Application/index.html around line 4)'))
-            self.assert_(waitFor(self.play, 'Execution error occurred in template /app/views/Application/index.html.'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
-            self.assert_(waitFor(self.play, 'at /app/views/Application/index.html.(line:4)'))
-            self.assert_(waitFor(self.play, '...'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines()) 
+            self.assertTrue(html.count(b'Template execution error '))
+            self.assertTrue(html.count(b'In /app/views/Application/index.html (around line 4)'))
+            self.assertTrue(html.count(b'Cannot get property \'name\' on null object'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Template execution error (In /app/views/Application/index.html around line 4)'))
+            self.assertTrue(waitFor(self.play, 'Execution error occurred in template /app/views/Application/index.html.'))
+            self.assertTrue(waitFor(self.play, 'at /app/views/Application/index.html.(line:4)'))
+            self.assertTrue(waitFor(self.play, '...'))
 
         # Fix it
         step('Fix it')        
@@ -591,10 +576,10 @@ def testSimpleProjectCreation(self):
         
         edit(app, 'app/views/Application/index.html', 4, "Hello ${name} !!")
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Hello world app')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Hello world app')        
         html = response.get_data()
-        self.assert_(html.count('Hello Guillaume !!'))
+        self.assertTrue(html.count(b'Hello Guillaume !!'))
 
         # Make a Java runtime exception
         step('Make a Java runtime exception')  
@@ -603,18 +588,19 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Execution exception'))
-            self.assert_(html.count('/ by zero'))
-            self.assert_(html.count('In /app/controllers/Application.java (around line 13)'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Execution exception (In /app/controllers/Application.java around line 13)'))
-            self.assert_(waitFor(self.play, 'ArithmeticException occurred : / by zero'))
-            self.assert_(waitFor(self.play, 'at controllers.Application.index(Application.java:13)'))
-            self.assert_(waitFor(self.play, '...'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Execution exception'))
+            self.assertTrue(html.count(b'/ by zero'))
+            self.assertTrue(html.count(b'In /app/controllers/Application.java (around line 13)'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Execution exception (In /app/controllers/Application.java around line 13)'))
+            self.assertTrue(waitFor(self.play, 'ArithmeticException occurred : / by zero'))
+            self.assertTrue(waitFor(self.play, 'at controllers.Application.index(Application.java:13)'))
+            self.assertTrue(waitFor(self.play, '...'))
 
         # Refresh again
         step('Refresh again')
@@ -622,18 +608,19 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Execution exception'))
-            self.assert_(html.count('/ by zero'))
-            self.assert_(html.count('In /app/controllers/Application.java (around line 13)'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Execution exception (In /app/controllers/Application.java around line 13)'))
-            self.assert_(waitFor(self.play, 'ArithmeticException occurred : / by zero'))
-            self.assert_(waitFor(self.play, 'at controllers.Application.index(Application.java:13)'))
-            self.assert_(waitFor(self.play, '...'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Execution exception'))
+            self.assertTrue(html.count(b'/ by zero'))
+            self.assertTrue(html.count(b'In /app/controllers/Application.java (around line 13)'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Execution exception (In /app/controllers/Application.java around line 13)'))
+            self.assertTrue(waitFor(self.play, 'ArithmeticException occurred : / by zero'))
+            self.assertTrue(waitFor(self.play, 'at controllers.Application.index(Application.java:13)'))
+            self.assertTrue(waitFor(self.play, '...'))
 
         # Fix it
         step('Fix it')        
@@ -641,19 +628,19 @@ def testSimpleProjectCreation(self):
         
         delete(app, 'app/controllers/Application.java', 13)    
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Hello world app')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Hello world app')        
         html = response.get_data()
-        self.assert_(html.count('Hello Guillaume !!'))
+        self.assertTrue(html.count(b'Hello Guillaume !!'))
 
         # Refresh again
         step('Refresh again')
         
         response = browser.reload()
-        self.assert_(browser.viewing_html())
-        self.assert_(browser.title() == 'Hello world app')        
+        self.assertTrue(browser.viewing_html())
+        self.assertTrue(browser.title() == 'Hello world app')        
         html = response.get_data()
-        self.assert_(html.count('Hello Guillaume !!'))
+        self.assertTrue(html.count(b'Hello Guillaume !!'))
 
         # Create a new route
         step('Create a new route')
@@ -662,9 +649,9 @@ def testSimpleProjectCreation(self):
         try:
             response = browser.open('http://localhost:9000/hello')
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Not found')
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Not found')
         
         # Create the new controller
         step('Create the new controller')
@@ -683,9 +670,9 @@ def testSimpleProjectCreation(self):
         step('Retry')
         
         browser.reload()
-        self.assert_(not browser.viewing_html())   
+        self.assertTrue(not browser.viewing_html())   
         html = response.get_data()
-        self.assert_(html.count('Hello'))
+        self.assertTrue(html.count(b'Hello'))
         
         # Rename the Hello controller
         step('Rename the Hello controller')
@@ -697,9 +684,9 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Not found')
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Not found')
 
         # Refresh again
         step('Refresh again')
@@ -707,9 +694,9 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Not found')            
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Not found')            
 
         # Correct the routes file
         step('Correct the routes file')
@@ -718,17 +705,17 @@ def testSimpleProjectCreation(self):
         edit(app, 'conf/routes', 7, "GET      /hello          Hello2.hello")
 
         browser.reload()
-        self.assert_(not browser.viewing_html())   
+        self.assertTrue(not browser.viewing_html())   
         html = response.get_data()
-        self.assert_(html.count('Hello'))        
+        self.assertTrue(html.count(b'Hello'))        
 
         # Retry
         step('Retry')
         
         browser.reload()
-        self.assert_(not browser.viewing_html())   
+        self.assertTrue(not browser.viewing_html())   
         html = response.get_data()
-        self.assert_(html.count('Hello'))
+        self.assertTrue(html.count(b'Hello'))
         
         # Rename again
         step('Rename again')
@@ -740,16 +727,16 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Compilation error'))
-            self.assert_(html.count('/app/controllers/Hello3.java</strong> could not be compiled'))
-            self.assert_(html.count('The public type Hello2 must be defined in its own file'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Compilation error (In /app/controllers/Hello3.java around line 3)'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Compilation error'))
+            self.assertTrue(html.count(b'/app/controllers/Hello3.java</strong> could not be compiled'))
+            self.assertTrue(html.count(b'The public type Hello2 must be defined in its own file'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Compilation error (In /app/controllers/Hello3.java around line 3)'))
             
         # Refresh again
         step('Refresh again')
@@ -757,36 +744,29 @@ def testSimpleProjectCreation(self):
         try:
             browser.reload()
             self.fail()
-        except urllib2.HTTPError, error:
-            self.assert_(browser.viewing_html())
-            self.assert_(browser.title() == 'Application error')
-            html = ''.join(error.readlines())
-            self.assert_(html.count('Compilation error'))
-            self.assert_(html.count('/app/controllers/Hello3.java</strong> could not be compiled'))
-            self.assert_(html.count('The public type Hello2 must be defined in its own file'))
-            self.assert_(waitFor(self.play, 'ERROR play -'))
-            self.assert_(waitFor(self.play, 'Compilation error (In /app/controllers/Hello3.java around line 3)'))
-            self.assert_(waitFor(self.play, 'at Invocation.HTTP Request(Play!)'))
+        except urllib.error.HTTPError as error:
+            self.assertTrue(browser.viewing_html())
+            self.assertTrue(browser.title() == 'Application error')
+            html = b''.join(error.readlines())
+            self.assertTrue(html.count(b'Compilation error'))
+            self.assertTrue(html.count(b'/app/controllers/Hello3.java</strong> could not be compiled'))
+            self.assertTrue(html.count(b'The public type Hello2 must be defined in its own file'))
+            self.assertTrue(waitFor(self.play, 'ERROR play'))
+            self.assertTrue(waitFor(self.play, 'Compilation error (In /app/controllers/Hello3.java around line 3)'))
             
         # Fix it
         step('Fix it')
         
         edit(app, 'app/controllers/Hello3.java', 3, "public class Hello3 extends Application {")
         browser.reload()
-        self.assert_(not browser.viewing_html())   
+        self.assertTrue(not browser.viewing_html())   
         html = response.get_data()
-        self.assert_(html.count('Hello'))
+        self.assertTrue(html.count(b'Hello'))
 
-        # Stop the application
-        step('Kill play')
-        
-        killPlay()
-        self.play.wait()
-
-    def tearDown(self):
-        killPlay()
 
 
+    def tearDown(self):
+        killPlay(self.play)
 
 
 
@@ -806,7 +786,8 @@ def callPlay(self, args):
         play_script += "".join('.bat')
         
     process_args = [play_script] + args
-    play_process = subprocess.Popen(process_args,stdout=subprocess.PIPE)
+    # encode subprocess output with system default codec
+    play_process = subprocess.Popen(process_args,stdout=subprocess.PIPE, universal_newlines=True)
     return play_process
 
 #returns true when pattern is seen
@@ -819,15 +800,15 @@ def waitForWithFail(process, pattern, failPattern):
     timer = threading.Timer(90, timeout, [process])
     timer.start()
     while True:
-	sys.stdout.flush()
+        sys.stdout.flush()
         line = process.stdout.readline().strip()
-	sys.stdout.flush()
+        sys.stdout.flush()
         #print timeoutOccurred
         if timeoutOccurred:
             return False
         if line == '@KILLED':
             return False
-        if line: print line
+        if line: print(line)
         if failPattern != "" and line.count(failPattern):
             timer.cancel()
             return False
@@ -839,20 +820,26 @@ def waitForWithFail(process, pattern, failPattern):
 
 def timeout(process):
     global timeoutOccurred 
-    print '@@@@ TIMEOUT !'
-    killPlay()
+    print('@@@@ TIMEOUT !')
+    killPlay(process)
     timeoutOccurred = True
 
-def killPlay(http = 'http'):
-    try:
-        urllib2.urlopen('%s://localhost:9000/@kill' % http)
-    except:
-        pass
+def killPlay(process, http = 'http'):
+    process.stdout.close()
+    
+    # kill subprocess tree, because calling urllib.urlopen(f"{http}://localhost:9000/@kill") is not enough
+    while True:
+        if process.poll() is None:
+            print ("Kill Play subprocess")
+            os.kill(process.pid, SIGTERM)
+            process.wait(3)
+        else:
+            return
 
 def step(msg):
-    print
-    print '# --- %s' % msg
-    print
+    print()
+    print('# --- %s' % msg)
+    print()
 
 def edit(app, file, line, text):
     fname = os.path.join(app, file)
diff --git a/samples-and-tests/i-am-a-developer/webencodings/__init__.py b/samples-and-tests/i-am-a-developer/webencodings/__init__.py
new file mode 100644
index 0000000000..d21d697c88
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/webencodings/__init__.py
@@ -0,0 +1,342 @@
+# coding: utf-8
+"""
+
+    webencodings
+    ~~~~~~~~~~~~
+
+    This is a Python implementation of the `WHATWG Encoding standard
+    <http://encoding.spec.whatwg.org/>`. See README for details.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+import codecs
+
+from .labels import LABELS
+
+
+VERSION = '0.5.1'
+
+
+# Some names in Encoding are not valid Python aliases. Remap these.
+PYTHON_NAMES = {
+    'iso-8859-8-i': 'iso-8859-8',
+    'x-mac-cyrillic': 'mac-cyrillic',
+    'macintosh': 'mac-roman',
+    'windows-874': 'cp874'}
+
+CACHE = {}
+
+
+def ascii_lower(string):
+    r"""Transform (only) ASCII letters to lower case: A-Z is mapped to a-z.
+
+    :param string: An Unicode string.
+    :returns: A new Unicode string.
+
+    This is used for `ASCII case-insensitive
+    <http://encoding.spec.whatwg.org/#ascii-case-insensitive>`_
+    matching of encoding labels.
+    The same matching is also used, among other things,
+    for `CSS keywords <http://dev.w3.org/csswg/css-values/#keywords>`_.
+
+    This is different from the :meth:`~py:str.lower` method of Unicode strings
+    which also affect non-ASCII characters,
+    sometimes mapping them into the ASCII range:
+
+        >>> keyword = u'Bac\N{KELVIN SIGN}ground'
+        >>> assert keyword.lower() == u'background'
+        >>> assert ascii_lower(keyword) != keyword.lower()
+        >>> assert ascii_lower(keyword) == u'bac\N{KELVIN SIGN}ground'
+
+    """
+    # This turns out to be faster than unicode.translate()
+    return string.encode('utf8').lower().decode('utf8')
+
+
+def lookup(label):
+    """
+    Look for an encoding by its label.
+    This is the spec’s `get an encoding
+    <http://encoding.spec.whatwg.org/#concept-encoding-get>`_ algorithm.
+    Supported labels are listed there.
+
+    :param label: A string.
+    :returns:
+        An :class:`Encoding` object, or :obj:`None` for an unknown label.
+
+    """
+    # Only strip ASCII whitespace: U+0009, U+000A, U+000C, U+000D, and U+0020.
+    label = ascii_lower(label.strip('\t\n\f\r '))
+    name = LABELS.get(label)
+    if name is None:
+        return None
+    encoding = CACHE.get(name)
+    if encoding is None:
+        if name == 'x-user-defined':
+            from .x_user_defined import codec_info
+        else:
+            python_name = PYTHON_NAMES.get(name, name)
+            # Any python_name value that gets to here should be valid.
+            codec_info = codecs.lookup(python_name)
+        encoding = Encoding(name, codec_info)
+        CACHE[name] = encoding
+    return encoding
+
+
+def _get_encoding(encoding_or_label):
+    """
+    Accept either an encoding object or label.
+
+    :param encoding: An :class:`Encoding` object or a label string.
+    :returns: An :class:`Encoding` object.
+    :raises: :exc:`~exceptions.LookupError` for an unknown label.
+
+    """
+    if hasattr(encoding_or_label, 'codec_info'):
+        return encoding_or_label
+
+    encoding = lookup(encoding_or_label)
+    if encoding is None:
+        raise LookupError('Unknown encoding label: %r' % encoding_or_label)
+    return encoding
+
+
+class Encoding(object):
+    """Reresents a character encoding such as UTF-8,
+    that can be used for decoding or encoding.
+
+    .. attribute:: name
+
+        Canonical name of the encoding
+
+    .. attribute:: codec_info
+
+        The actual implementation of the encoding,
+        a stdlib :class:`~codecs.CodecInfo` object.
+        See :func:`codecs.register`.
+
+    """
+    def __init__(self, name, codec_info):
+        self.name = name
+        self.codec_info = codec_info
+
+    def __repr__(self):
+        return '<Encoding %s>' % self.name
+
+
+#: The UTF-8 encoding. Should be used for new content and formats.
+UTF8 = lookup('utf-8')
+
+_UTF16LE = lookup('utf-16le')
+_UTF16BE = lookup('utf-16be')
+
+
+def decode(input, fallback_encoding, errors='replace'):
+    """
+    Decode a single string.
+
+    :param input: A byte string
+    :param fallback_encoding:
+        An :class:`Encoding` object or a label string.
+        The encoding to use if :obj:`input` does note have a BOM.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+    :return:
+        A ``(output, encoding)`` tuple of an Unicode string
+        and an :obj:`Encoding`.
+
+    """
+    # Fail early if `encoding` is an invalid label.
+    fallback_encoding = _get_encoding(fallback_encoding)
+    bom_encoding, input = _detect_bom(input)
+    encoding = bom_encoding or fallback_encoding
+    return encoding.codec_info.decode(input, errors)[0], encoding
+
+
+def _detect_bom(input):
+    """Return (bom_encoding, input), with any BOM removed from the input."""
+    if input.startswith(b'\xFF\xFE'):
+        return _UTF16LE, input[2:]
+    if input.startswith(b'\xFE\xFF'):
+        return _UTF16BE, input[2:]
+    if input.startswith(b'\xEF\xBB\xBF'):
+        return UTF8, input[3:]
+    return None, input
+
+
+def encode(input, encoding=UTF8, errors='strict'):
+    """
+    Encode a single string.
+
+    :param input: An Unicode string.
+    :param encoding: An :class:`Encoding` object or a label string.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+    :return: A byte string.
+
+    """
+    return _get_encoding(encoding).codec_info.encode(input, errors)[0]
+
+
+def iter_decode(input, fallback_encoding, errors='replace'):
+    """
+    "Pull"-based decoder.
+
+    :param input:
+        An iterable of byte strings.
+
+        The input is first consumed just enough to determine the encoding
+        based on the precense of a BOM,
+        then consumed on demand when the return value is.
+    :param fallback_encoding:
+        An :class:`Encoding` object or a label string.
+        The encoding to use if :obj:`input` does note have a BOM.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+    :returns:
+        An ``(output, encoding)`` tuple.
+        :obj:`output` is an iterable of Unicode strings,
+        :obj:`encoding` is the :obj:`Encoding` that is being used.
+
+    """
+
+    decoder = IncrementalDecoder(fallback_encoding, errors)
+    generator = _iter_decode_generator(input, decoder)
+    encoding = next(generator)
+    return generator, encoding
+
+
+def _iter_decode_generator(input, decoder):
+    """Return a generator that first yields the :obj:`Encoding`,
+    then yields output chukns as Unicode strings.
+
+    """
+    decode = decoder.decode
+    input = iter(input)
+    for chunck in input:
+        output = decode(chunck)
+        if output:
+            assert decoder.encoding is not None
+            yield decoder.encoding
+            yield output
+            break
+    else:
+        # Input exhausted without determining the encoding
+        output = decode(b'', final=True)
+        assert decoder.encoding is not None
+        yield decoder.encoding
+        if output:
+            yield output
+        return
+
+    for chunck in input:
+        output = decode(chunck)
+        if output:
+            yield output
+    output = decode(b'', final=True)
+    if output:
+        yield output
+
+
+def iter_encode(input, encoding=UTF8, errors='strict'):
+    """
+    “Pull”-based encoder.
+
+    :param input: An iterable of Unicode strings.
+    :param encoding: An :class:`Encoding` object or a label string.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+    :returns: An iterable of byte strings.
+
+    """
+    # Fail early if `encoding` is an invalid label.
+    encode = IncrementalEncoder(encoding, errors).encode
+    return _iter_encode_generator(input, encode)
+
+
+def _iter_encode_generator(input, encode):
+    for chunck in input:
+        output = encode(chunck)
+        if output:
+            yield output
+    output = encode('', final=True)
+    if output:
+        yield output
+
+
+class IncrementalDecoder(object):
+    """
+    “Push”-based decoder.
+
+    :param fallback_encoding:
+        An :class:`Encoding` object or a label string.
+        The encoding to use if :obj:`input` does note have a BOM.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+    """
+    def __init__(self, fallback_encoding, errors='replace'):
+        # Fail early if `encoding` is an invalid label.
+        self._fallback_encoding = _get_encoding(fallback_encoding)
+        self._errors = errors
+        self._buffer = b''
+        self._decoder = None
+        #: The actual :class:`Encoding` that is being used,
+        #: or :obj:`None` if that is not determined yet.
+        #: (Ie. if there is not enough input yet to determine
+        #: if there is a BOM.)
+        self.encoding = None  # Not known yet.
+
+    def decode(self, input, final=False):
+        """Decode one chunk of the input.
+
+        :param input: A byte string.
+        :param final:
+            Indicate that no more input is available.
+            Must be :obj:`True` if this is the last call.
+        :returns: An Unicode string.
+
+        """
+        decoder = self._decoder
+        if decoder is not None:
+            return decoder(input, final)
+
+        input = self._buffer + input
+        encoding, input = _detect_bom(input)
+        if encoding is None:
+            if len(input) < 3 and not final:  # Not enough data yet.
+                self._buffer = input
+                return ''
+            else:  # No BOM
+                encoding = self._fallback_encoding
+        decoder = encoding.codec_info.incrementaldecoder(self._errors).decode
+        self._decoder = decoder
+        self.encoding = encoding
+        return decoder(input, final)
+
+
+class IncrementalEncoder(object):
+    """
+    “Push”-based encoder.
+
+    :param encoding: An :class:`Encoding` object or a label string.
+    :param errors: Type of error handling. See :func:`codecs.register`.
+    :raises: :exc:`~exceptions.LookupError` for an unknown encoding label.
+
+    .. method:: encode(input, final=False)
+
+        :param input: An Unicode string.
+        :param final:
+            Indicate that no more input is available.
+            Must be :obj:`True` if this is the last call.
+        :returns: A byte string.
+
+    """
+    def __init__(self, encoding=UTF8, errors='strict'):
+        encoding = _get_encoding(encoding)
+        self.encode = encoding.codec_info.incrementalencoder(errors).encode
diff --git a/samples-and-tests/i-am-a-developer/webencodings/labels.py b/samples-and-tests/i-am-a-developer/webencodings/labels.py
new file mode 100644
index 0000000000..29cbf91ef7
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/webencodings/labels.py
@@ -0,0 +1,231 @@
+"""
+
+    webencodings.labels
+    ~~~~~~~~~~~~~~~~~~~
+
+    Map encoding labels to their name.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+# XXX Do not edit!
+# This file is automatically generated by mklabels.py
+
+LABELS = {
+    'unicode-1-1-utf-8':   'utf-8',
+    'utf-8':               'utf-8',
+    'utf8':                'utf-8',
+    '866':                 'ibm866',
+    'cp866':               'ibm866',
+    'csibm866':            'ibm866',
+    'ibm866':              'ibm866',
+    'csisolatin2':         'iso-8859-2',
+    'iso-8859-2':          'iso-8859-2',
+    'iso-ir-101':          'iso-8859-2',
+    'iso8859-2':           'iso-8859-2',
+    'iso88592':            'iso-8859-2',
+    'iso_8859-2':          'iso-8859-2',
+    'iso_8859-2:1987':     'iso-8859-2',
+    'l2':                  'iso-8859-2',
+    'latin2':              'iso-8859-2',
+    'csisolatin3':         'iso-8859-3',
+    'iso-8859-3':          'iso-8859-3',
+    'iso-ir-109':          'iso-8859-3',
+    'iso8859-3':           'iso-8859-3',
+    'iso88593':            'iso-8859-3',
+    'iso_8859-3':          'iso-8859-3',
+    'iso_8859-3:1988':     'iso-8859-3',
+    'l3':                  'iso-8859-3',
+    'latin3':              'iso-8859-3',
+    'csisolatin4':         'iso-8859-4',
+    'iso-8859-4':          'iso-8859-4',
+    'iso-ir-110':          'iso-8859-4',
+    'iso8859-4':           'iso-8859-4',
+    'iso88594':            'iso-8859-4',
+    'iso_8859-4':          'iso-8859-4',
+    'iso_8859-4:1988':     'iso-8859-4',
+    'l4':                  'iso-8859-4',
+    'latin4':              'iso-8859-4',
+    'csisolatincyrillic':  'iso-8859-5',
+    'cyrillic':            'iso-8859-5',
+    'iso-8859-5':          'iso-8859-5',
+    'iso-ir-144':          'iso-8859-5',
+    'iso8859-5':           'iso-8859-5',
+    'iso88595':            'iso-8859-5',
+    'iso_8859-5':          'iso-8859-5',
+    'iso_8859-5:1988':     'iso-8859-5',
+    'arabic':              'iso-8859-6',
+    'asmo-708':            'iso-8859-6',
+    'csiso88596e':         'iso-8859-6',
+    'csiso88596i':         'iso-8859-6',
+    'csisolatinarabic':    'iso-8859-6',
+    'ecma-114':            'iso-8859-6',
+    'iso-8859-6':          'iso-8859-6',
+    'iso-8859-6-e':        'iso-8859-6',
+    'iso-8859-6-i':        'iso-8859-6',
+    'iso-ir-127':          'iso-8859-6',
+    'iso8859-6':           'iso-8859-6',
+    'iso88596':            'iso-8859-6',
+    'iso_8859-6':          'iso-8859-6',
+    'iso_8859-6:1987':     'iso-8859-6',
+    'csisolatingreek':     'iso-8859-7',
+    'ecma-118':            'iso-8859-7',
+    'elot_928':            'iso-8859-7',
+    'greek':               'iso-8859-7',
+    'greek8':              'iso-8859-7',
+    'iso-8859-7':          'iso-8859-7',
+    'iso-ir-126':          'iso-8859-7',
+    'iso8859-7':           'iso-8859-7',
+    'iso88597':            'iso-8859-7',
+    'iso_8859-7':          'iso-8859-7',
+    'iso_8859-7:1987':     'iso-8859-7',
+    'sun_eu_greek':        'iso-8859-7',
+    'csiso88598e':         'iso-8859-8',
+    'csisolatinhebrew':    'iso-8859-8',
+    'hebrew':              'iso-8859-8',
+    'iso-8859-8':          'iso-8859-8',
+    'iso-8859-8-e':        'iso-8859-8',
+    'iso-ir-138':          'iso-8859-8',
+    'iso8859-8':           'iso-8859-8',
+    'iso88598':            'iso-8859-8',
+    'iso_8859-8':          'iso-8859-8',
+    'iso_8859-8:1988':     'iso-8859-8',
+    'visual':              'iso-8859-8',
+    'csiso88598i':         'iso-8859-8-i',
+    'iso-8859-8-i':        'iso-8859-8-i',
+    'logical':             'iso-8859-8-i',
+    'csisolatin6':         'iso-8859-10',
+    'iso-8859-10':         'iso-8859-10',
+    'iso-ir-157':          'iso-8859-10',
+    'iso8859-10':          'iso-8859-10',
+    'iso885910':           'iso-8859-10',
+    'l6':                  'iso-8859-10',
+    'latin6':              'iso-8859-10',
+    'iso-8859-13':         'iso-8859-13',
+    'iso8859-13':          'iso-8859-13',
+    'iso885913':           'iso-8859-13',
+    'iso-8859-14':         'iso-8859-14',
+    'iso8859-14':          'iso-8859-14',
+    'iso885914':           'iso-8859-14',
+    'csisolatin9':         'iso-8859-15',
+    'iso-8859-15':         'iso-8859-15',
+    'iso8859-15':          'iso-8859-15',
+    'iso885915':           'iso-8859-15',
+    'iso_8859-15':         'iso-8859-15',
+    'l9':                  'iso-8859-15',
+    'iso-8859-16':         'iso-8859-16',
+    'cskoi8r':             'koi8-r',
+    'koi':                 'koi8-r',
+    'koi8':                'koi8-r',
+    'koi8-r':              'koi8-r',
+    'koi8_r':              'koi8-r',
+    'koi8-u':              'koi8-u',
+    'csmacintosh':         'macintosh',
+    'mac':                 'macintosh',
+    'macintosh':           'macintosh',
+    'x-mac-roman':         'macintosh',
+    'dos-874':             'windows-874',
+    'iso-8859-11':         'windows-874',
+    'iso8859-11':          'windows-874',
+    'iso885911':           'windows-874',
+    'tis-620':             'windows-874',
+    'windows-874':         'windows-874',
+    'cp1250':              'windows-1250',
+    'windows-1250':        'windows-1250',
+    'x-cp1250':            'windows-1250',
+    'cp1251':              'windows-1251',
+    'windows-1251':        'windows-1251',
+    'x-cp1251':            'windows-1251',
+    'ansi_x3.4-1968':      'windows-1252',
+    'ascii':               'windows-1252',
+    'cp1252':              'windows-1252',
+    'cp819':               'windows-1252',
+    'csisolatin1':         'windows-1252',
+    'ibm819':              'windows-1252',
+    'iso-8859-1':          'windows-1252',
+    'iso-ir-100':          'windows-1252',
+    'iso8859-1':           'windows-1252',
+    'iso88591':            'windows-1252',
+    'iso_8859-1':          'windows-1252',
+    'iso_8859-1:1987':     'windows-1252',
+    'l1':                  'windows-1252',
+    'latin1':              'windows-1252',
+    'us-ascii':            'windows-1252',
+    'windows-1252':        'windows-1252',
+    'x-cp1252':            'windows-1252',
+    'cp1253':              'windows-1253',
+    'windows-1253':        'windows-1253',
+    'x-cp1253':            'windows-1253',
+    'cp1254':              'windows-1254',
+    'csisolatin5':         'windows-1254',
+    'iso-8859-9':          'windows-1254',
+    'iso-ir-148':          'windows-1254',
+    'iso8859-9':           'windows-1254',
+    'iso88599':            'windows-1254',
+    'iso_8859-9':          'windows-1254',
+    'iso_8859-9:1989':     'windows-1254',
+    'l5':                  'windows-1254',
+    'latin5':              'windows-1254',
+    'windows-1254':        'windows-1254',
+    'x-cp1254':            'windows-1254',
+    'cp1255':              'windows-1255',
+    'windows-1255':        'windows-1255',
+    'x-cp1255':            'windows-1255',
+    'cp1256':              'windows-1256',
+    'windows-1256':        'windows-1256',
+    'x-cp1256':            'windows-1256',
+    'cp1257':              'windows-1257',
+    'windows-1257':        'windows-1257',
+    'x-cp1257':            'windows-1257',
+    'cp1258':              'windows-1258',
+    'windows-1258':        'windows-1258',
+    'x-cp1258':            'windows-1258',
+    'x-mac-cyrillic':      'x-mac-cyrillic',
+    'x-mac-ukrainian':     'x-mac-cyrillic',
+    'chinese':             'gbk',
+    'csgb2312':            'gbk',
+    'csiso58gb231280':     'gbk',
+    'gb2312':              'gbk',
+    'gb_2312':             'gbk',
+    'gb_2312-80':          'gbk',
+    'gbk':                 'gbk',
+    'iso-ir-58':           'gbk',
+    'x-gbk':               'gbk',
+    'gb18030':             'gb18030',
+    'hz-gb-2312':          'hz-gb-2312',
+    'big5':                'big5',
+    'big5-hkscs':          'big5',
+    'cn-big5':             'big5',
+    'csbig5':              'big5',
+    'x-x-big5':            'big5',
+    'cseucpkdfmtjapanese': 'euc-jp',
+    'euc-jp':              'euc-jp',
+    'x-euc-jp':            'euc-jp',
+    'csiso2022jp':         'iso-2022-jp',
+    'iso-2022-jp':         'iso-2022-jp',
+    'csshiftjis':          'shift_jis',
+    'ms_kanji':            'shift_jis',
+    'shift-jis':           'shift_jis',
+    'shift_jis':           'shift_jis',
+    'sjis':                'shift_jis',
+    'windows-31j':         'shift_jis',
+    'x-sjis':              'shift_jis',
+    'cseuckr':             'euc-kr',
+    'csksc56011987':       'euc-kr',
+    'euc-kr':              'euc-kr',
+    'iso-ir-149':          'euc-kr',
+    'korean':              'euc-kr',
+    'ks_c_5601-1987':      'euc-kr',
+    'ks_c_5601-1989':      'euc-kr',
+    'ksc5601':             'euc-kr',
+    'ksc_5601':            'euc-kr',
+    'windows-949':         'euc-kr',
+    'csiso2022kr':         'iso-2022-kr',
+    'iso-2022-kr':         'iso-2022-kr',
+    'utf-16be':            'utf-16be',
+    'utf-16':              'utf-16le',
+    'utf-16le':            'utf-16le',
+    'x-user-defined':      'x-user-defined',
+}
diff --git a/samples-and-tests/i-am-a-developer/webencodings/mklabels.py b/samples-and-tests/i-am-a-developer/webencodings/mklabels.py
new file mode 100644
index 0000000000..295dc928ba
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/webencodings/mklabels.py
@@ -0,0 +1,59 @@
+"""
+
+    webencodings.mklabels
+    ~~~~~~~~~~~~~~~~~~~~~
+
+    Regenarate the webencodings.labels module.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+import json
+try:
+    from urllib import urlopen
+except ImportError:
+    from urllib.request import urlopen
+
+
+def assert_lower(string):
+    assert string == string.lower()
+    return string
+
+
+def generate(url):
+    parts = ['''\
+"""
+
+    webencodings.labels
+    ~~~~~~~~~~~~~~~~~~~
+
+    Map encoding labels to their name.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+# XXX Do not edit!
+# This file is automatically generated by mklabels.py
+
+LABELS = {
+''']
+    labels = [
+        (repr(assert_lower(label)).lstrip('u'),
+         repr(encoding['name']).lstrip('u'))
+        for category in json.loads(urlopen(url).read().decode('ascii'))
+        for encoding in category['encodings']
+        for label in encoding['labels']]
+    max_len = max(len(label) for label, name in labels)
+    parts.extend(
+        '    %s:%s %s,\n' % (label, ' ' * (max_len - len(label)), name)
+        for label, name in labels)
+    parts.append('}')
+    return ''.join(parts)
+
+
+if __name__ == '__main__':
+    print(generate('http://encoding.spec.whatwg.org/encodings.json'))
diff --git a/samples-and-tests/i-am-a-developer/webencodings/tests.py b/samples-and-tests/i-am-a-developer/webencodings/tests.py
new file mode 100644
index 0000000000..e12c10d033
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/webencodings/tests.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+"""
+
+    webencodings.tests
+    ~~~~~~~~~~~~~~~~~~
+
+    A basic test suite for Encoding.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+from . import (lookup, LABELS, decode, encode, iter_decode, iter_encode,
+               IncrementalDecoder, IncrementalEncoder, UTF8)
+
+
+def assert_raises(exception, function, *args, **kwargs):
+    try:
+        function(*args, **kwargs)
+    except exception:
+        return
+    else:  # pragma: no cover
+        raise AssertionError('Did not raise %s.' % exception)
+
+
+def test_labels():
+    assert lookup('utf-8').name == 'utf-8'
+    assert lookup('Utf-8').name == 'utf-8'
+    assert lookup('UTF-8').name == 'utf-8'
+    assert lookup('utf8').name == 'utf-8'
+    assert lookup('utf8').name == 'utf-8'
+    assert lookup('utf8 ').name == 'utf-8'
+    assert lookup(' \r\nutf8\t').name == 'utf-8'
+    assert lookup('u8') is None  # Python label.
+    assert lookup('utf-8 ') is None  # Non-ASCII white space.
+
+    assert lookup('US-ASCII').name == 'windows-1252'
+    assert lookup('iso-8859-1').name == 'windows-1252'
+    assert lookup('latin1').name == 'windows-1252'
+    assert lookup('LATIN1').name == 'windows-1252'
+    assert lookup('latin-1') is None
+    assert lookup('LATİN1') is None  # ASCII-only case insensitivity.
+
+
+def test_all_labels():
+    for label in LABELS:
+        assert decode(b'', label) == ('', lookup(label))
+        assert encode('', label) == b''
+        for repeat in [0, 1, 12]:
+            output, _ = iter_decode([b''] * repeat, label)
+            assert list(output) == []
+            assert list(iter_encode([''] * repeat, label)) == []
+        decoder = IncrementalDecoder(label)
+        assert decoder.decode(b'') == ''
+        assert decoder.decode(b'', final=True) == ''
+        encoder = IncrementalEncoder(label)
+        assert encoder.encode('') == b''
+        assert encoder.encode('', final=True) == b''
+    # All encoding names are valid labels too:
+    for name in set(LABELS.values()):
+        assert lookup(name).name == name
+
+
+def test_invalid_label():
+    assert_raises(LookupError, decode, b'\xEF\xBB\xBF\xc3\xa9', 'invalid')
+    assert_raises(LookupError, encode, 'é', 'invalid')
+    assert_raises(LookupError, iter_decode, [], 'invalid')
+    assert_raises(LookupError, iter_encode, [], 'invalid')
+    assert_raises(LookupError, IncrementalDecoder, 'invalid')
+    assert_raises(LookupError, IncrementalEncoder, 'invalid')
+
+
+def test_decode():
+    assert decode(b'\x80', 'latin1') == ('€', lookup('latin1'))
+    assert decode(b'\x80', lookup('latin1')) == ('€', lookup('latin1'))
+    assert decode(b'\xc3\xa9', 'utf8') == ('é', lookup('utf8'))
+    assert decode(b'\xc3\xa9', UTF8) == ('é', lookup('utf8'))
+    assert decode(b'\xc3\xa9', 'ascii') == ('Ã©', lookup('ascii'))
+    assert decode(b'\xEF\xBB\xBF\xc3\xa9', 'ascii') == ('é', lookup('utf8'))  # UTF-8 with BOM
+
+    assert decode(b'\xFE\xFF\x00\xe9', 'ascii') == ('é', lookup('utf-16be'))  # UTF-16-BE with BOM
+    assert decode(b'\xFF\xFE\xe9\x00', 'ascii') == ('é', lookup('utf-16le'))  # UTF-16-LE with BOM
+    assert decode(b'\xFE\xFF\xe9\x00', 'ascii') == ('\ue900', lookup('utf-16be'))
+    assert decode(b'\xFF\xFE\x00\xe9', 'ascii') == ('\ue900', lookup('utf-16le'))
+
+    assert decode(b'\x00\xe9', 'UTF-16BE') == ('é', lookup('utf-16be'))
+    assert decode(b'\xe9\x00', 'UTF-16LE') == ('é', lookup('utf-16le'))
+    assert decode(b'\xe9\x00', 'UTF-16') == ('é', lookup('utf-16le'))
+
+    assert decode(b'\xe9\x00', 'UTF-16BE') == ('\ue900', lookup('utf-16be'))
+    assert decode(b'\x00\xe9', 'UTF-16LE') == ('\ue900', lookup('utf-16le'))
+    assert decode(b'\x00\xe9', 'UTF-16') == ('\ue900', lookup('utf-16le'))
+
+
+def test_encode():
+    assert encode('é', 'latin1') == b'\xe9'
+    assert encode('é', 'utf8') == b'\xc3\xa9'
+    assert encode('é', 'utf8') == b'\xc3\xa9'
+    assert encode('é', 'utf-16') == b'\xe9\x00'
+    assert encode('é', 'utf-16le') == b'\xe9\x00'
+    assert encode('é', 'utf-16be') == b'\x00\xe9'
+
+
+def test_iter_decode():
+    def iter_decode_to_string(input, fallback_encoding):
+        output, _encoding = iter_decode(input, fallback_encoding)
+        return ''.join(output)
+    assert iter_decode_to_string([], 'latin1') == ''
+    assert iter_decode_to_string([b''], 'latin1') == ''
+    assert iter_decode_to_string([b'\xe9'], 'latin1') == 'é'
+    assert iter_decode_to_string([b'hello'], 'latin1') == 'hello'
+    assert iter_decode_to_string([b'he', b'llo'], 'latin1') == 'hello'
+    assert iter_decode_to_string([b'hell', b'o'], 'latin1') == 'hello'
+    assert iter_decode_to_string([b'\xc3\xa9'], 'latin1') == 'Ã©'
+    assert iter_decode_to_string([b'\xEF\xBB\xBF\xc3\xa9'], 'latin1') == 'é'
+    assert iter_decode_to_string([
+        b'\xEF\xBB\xBF', b'\xc3', b'\xa9'], 'latin1') == 'é'
+    assert iter_decode_to_string([
+        b'\xEF\xBB\xBF', b'a', b'\xc3'], 'latin1') == 'a\uFFFD'
+    assert iter_decode_to_string([
+        b'', b'\xEF', b'', b'', b'\xBB\xBF\xc3', b'\xa9'], 'latin1') == 'é'
+    assert iter_decode_to_string([b'\xEF\xBB\xBF'], 'latin1') == ''
+    assert iter_decode_to_string([b'\xEF\xBB'], 'latin1') == 'ï»'
+    assert iter_decode_to_string([b'\xFE\xFF\x00\xe9'], 'latin1') == 'é'
+    assert iter_decode_to_string([b'\xFF\xFE\xe9\x00'], 'latin1') == 'é'
+    assert iter_decode_to_string([
+        b'', b'\xFF', b'', b'', b'\xFE\xe9', b'\x00'], 'latin1') == 'é'
+    assert iter_decode_to_string([
+        b'', b'h\xe9', b'llo'], 'x-user-defined') == 'h\uF7E9llo'
+
+
+def test_iter_encode():
+    assert b''.join(iter_encode([], 'latin1')) == b''
+    assert b''.join(iter_encode([''], 'latin1')) == b''
+    assert b''.join(iter_encode(['é'], 'latin1')) == b'\xe9'
+    assert b''.join(iter_encode(['', 'é', '', ''], 'latin1')) == b'\xe9'
+    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16')) == b'\xe9\x00'
+    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16le')) == b'\xe9\x00'
+    assert b''.join(iter_encode(['', 'é', '', ''], 'utf-16be')) == b'\x00\xe9'
+    assert b''.join(iter_encode([
+        '', 'h\uF7E9', '', 'llo'], 'x-user-defined')) == b'h\xe9llo'
+
+
+def test_x_user_defined():
+    encoded = b'2,\x0c\x0b\x1aO\xd9#\xcb\x0f\xc9\xbbt\xcf\xa8\xca'
+    decoded = '2,\x0c\x0b\x1aO\uf7d9#\uf7cb\x0f\uf7c9\uf7bbt\uf7cf\uf7a8\uf7ca'
+    encoded = b'aa'
+    decoded = 'aa'
+    assert decode(encoded, 'x-user-defined') == (decoded, lookup('x-user-defined'))
+    assert encode(decoded, 'x-user-defined') == encoded
diff --git a/samples-and-tests/i-am-a-developer/webencodings/x_user_defined.py b/samples-and-tests/i-am-a-developer/webencodings/x_user_defined.py
new file mode 100644
index 0000000000..d16e326024
--- /dev/null
+++ b/samples-and-tests/i-am-a-developer/webencodings/x_user_defined.py
@@ -0,0 +1,325 @@
+# coding: utf-8
+"""
+
+    webencodings.x_user_defined
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    An implementation of the x-user-defined encoding.
+
+    :copyright: Copyright 2012 by Simon Sapin
+    :license: BSD, see LICENSE for details.
+
+"""
+
+from __future__ import unicode_literals
+
+import codecs
+
+
+### Codec APIs
+
+class Codec(codecs.Codec):
+
+    def encode(self, input, errors='strict'):
+        return codecs.charmap_encode(input, errors, encoding_table)
+
+    def decode(self, input, errors='strict'):
+        return codecs.charmap_decode(input, errors, decoding_table)
+
+
+class IncrementalEncoder(codecs.IncrementalEncoder):
+    def encode(self, input, final=False):
+        return codecs.charmap_encode(input, self.errors, encoding_table)[0]
+
+
+class IncrementalDecoder(codecs.IncrementalDecoder):
+    def decode(self, input, final=False):
+        return codecs.charmap_decode(input, self.errors, decoding_table)[0]
+
+
+class StreamWriter(Codec, codecs.StreamWriter):
+    pass
+
+
+class StreamReader(Codec, codecs.StreamReader):
+    pass
+
+
+### encodings module API
+
+codec_info = codecs.CodecInfo(
+    name='x-user-defined',
+    encode=Codec().encode,
+    decode=Codec().decode,
+    incrementalencoder=IncrementalEncoder,
+    incrementaldecoder=IncrementalDecoder,
+    streamreader=StreamReader,
+    streamwriter=StreamWriter,
+)
+
+
+### Decoding Table
+
+# Python 3:
+# for c in range(256): print('    %r' % chr(c if c < 128 else c + 0xF700))
+decoding_table = (
+    '\x00'
+    '\x01'
+    '\x02'
+    '\x03'
+    '\x04'
+    '\x05'
+    '\x06'
+    '\x07'
+    '\x08'
+    '\t'
+    '\n'
+    '\x0b'
+    '\x0c'
+    '\r'
+    '\x0e'
+    '\x0f'
+    '\x10'
+    '\x11'
+    '\x12'
+    '\x13'
+    '\x14'
+    '\x15'
+    '\x16'
+    '\x17'
+    '\x18'
+    '\x19'
+    '\x1a'
+    '\x1b'
+    '\x1c'
+    '\x1d'
+    '\x1e'
+    '\x1f'
+    ' '
+    '!'
+    '"'
+    '#'
+    '$'
+    '%'
+    '&'
+    "'"
+    '('
+    ')'
+    '*'
+    '+'
+    ','
+    '-'
+    '.'
+    '/'
+    '0'
+    '1'
+    '2'
+    '3'
+    '4'
+    '5'
+    '6'
+    '7'
+    '8'
+    '9'
+    ':'
+    ';'
+    '<'
+    '='
+    '>'
+    '?'
+    '@'
+    'A'
+    'B'
+    'C'
+    'D'
+    'E'
+    'F'
+    'G'
+    'H'
+    'I'
+    'J'
+    'K'
+    'L'
+    'M'
+    'N'
+    'O'
+    'P'
+    'Q'
+    'R'
+    'S'
+    'T'
+    'U'
+    'V'
+    'W'
+    'X'
+    'Y'
+    'Z'
+    '['
+    '\\'
+    ']'
+    '^'
+    '_'
+    '`'
+    'a'
+    'b'
+    'c'
+    'd'
+    'e'
+    'f'
+    'g'
+    'h'
+    'i'
+    'j'
+    'k'
+    'l'
+    'm'
+    'n'
+    'o'
+    'p'
+    'q'
+    'r'
+    's'
+    't'
+    'u'
+    'v'
+    'w'
+    'x'
+    'y'
+    'z'
+    '{'
+    '|'
+    '}'
+    '~'
+    '\x7f'
+    '\uf780'
+    '\uf781'
+    '\uf782'
+    '\uf783'
+    '\uf784'
+    '\uf785'
+    '\uf786'
+    '\uf787'
+    '\uf788'
+    '\uf789'
+    '\uf78a'
+    '\uf78b'
+    '\uf78c'
+    '\uf78d'
+    '\uf78e'
+    '\uf78f'
+    '\uf790'
+    '\uf791'
+    '\uf792'
+    '\uf793'
+    '\uf794'
+    '\uf795'
+    '\uf796'
+    '\uf797'
+    '\uf798'
+    '\uf799'
+    '\uf79a'
+    '\uf79b'
+    '\uf79c'
+    '\uf79d'
+    '\uf79e'
+    '\uf79f'
+    '\uf7a0'
+    '\uf7a1'
+    '\uf7a2'
+    '\uf7a3'
+    '\uf7a4'
+    '\uf7a5'
+    '\uf7a6'
+    '\uf7a7'
+    '\uf7a8'
+    '\uf7a9'
+    '\uf7aa'
+    '\uf7ab'
+    '\uf7ac'
+    '\uf7ad'
+    '\uf7ae'
+    '\uf7af'
+    '\uf7b0'
+    '\uf7b1'
+    '\uf7b2'
+    '\uf7b3'
+    '\uf7b4'
+    '\uf7b5'
+    '\uf7b6'
+    '\uf7b7'
+    '\uf7b8'
+    '\uf7b9'
+    '\uf7ba'
+    '\uf7bb'
+    '\uf7bc'
+    '\uf7bd'
+    '\uf7be'
+    '\uf7bf'
+    '\uf7c0'
+    '\uf7c1'
+    '\uf7c2'
+    '\uf7c3'
+    '\uf7c4'
+    '\uf7c5'
+    '\uf7c6'
+    '\uf7c7'
+    '\uf7c8'
+    '\uf7c9'
+    '\uf7ca'
+    '\uf7cb'
+    '\uf7cc'
+    '\uf7cd'
+    '\uf7ce'
+    '\uf7cf'
+    '\uf7d0'
+    '\uf7d1'
+    '\uf7d2'
+    '\uf7d3'
+    '\uf7d4'
+    '\uf7d5'
+    '\uf7d6'
+    '\uf7d7'
+    '\uf7d8'
+    '\uf7d9'
+    '\uf7da'
+    '\uf7db'
+    '\uf7dc'
+    '\uf7dd'
+    '\uf7de'
+    '\uf7df'
+    '\uf7e0'
+    '\uf7e1'
+    '\uf7e2'
+    '\uf7e3'
+    '\uf7e4'
+    '\uf7e5'
+    '\uf7e6'
+    '\uf7e7'
+    '\uf7e8'
+    '\uf7e9'
+    '\uf7ea'
+    '\uf7eb'
+    '\uf7ec'
+    '\uf7ed'
+    '\uf7ee'
+    '\uf7ef'
+    '\uf7f0'
+    '\uf7f1'
+    '\uf7f2'
+    '\uf7f3'
+    '\uf7f4'
+    '\uf7f5'
+    '\uf7f6'
+    '\uf7f7'
+    '\uf7f8'
+    '\uf7f9'
+    '\uf7fa'
+    '\uf7fb'
+    '\uf7fc'
+    '\uf7fd'
+    '\uf7fe'
+    '\uf7ff'
+)
+
+### Encoding table
+encoding_table = codecs.charmap_build(decoding_table)