diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index f1de048ea4..6b9fab5d7c 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -29,10 +29,10 @@ jobs: # we don't know what commit the last tag was it's safer to get entire repo so previousStableVersion resolves fetch-depth: 0 - - name: Set up python 2 - uses: actions/setup-python@v2 + - name: Set up python 3 + uses: actions/setup-python@v3 with: - python-version: '2.x' + python-version: '3.x' architecture: 'x64' - name: Set up JDK ${{ matrix.jdk }} @@ -79,4 +79,4 @@ jobs: name: play-${{ github.sha }} if-no-files-found: error path: | - ./framework/dist/* \ No newline at end of file + ./framework/dist/* diff --git a/framework/pym/play/application.py b/framework/pym/play/application.py index cb79af335f..644d041708 100644 --- a/framework/pym/play/application.py +++ b/framework/pym/play/application.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import input +from builtins import object import sys import os import os.path @@ -24,23 +27,23 @@ def __init__(self, application_path, env, ignoreMissingModules = False): # only parse conf it is exists - if it should be there, it will be caught later # (depends on command) confExists = os.path.exists(os.path.join(self.path, 'conf', 'application.conf')); - if application_path is not None and confExists: + if application_path != None and confExists: confFolder = os.path.join(application_path, 'conf/') try: self.conf = PlayConfParser(confFolder, env) except Exception as err: - print "~ Failed to parse application configuration", err + print("~ Failed to parse application configuration", err) self.conf = None # No app / Invalid app else: self.conf = None self.play_env = env - if env.has_key('jpda.port'): + if 'jpda.port' in env: self.jpda_port = env['jpda.port'] else: self.jpda_port = self.readConf('jpda.port') - if env.has_key('jpda.address'): + if 'jpda.address' in env: self.jpda_address = env['jpda.address'] else: self.jpda_address = self.readConf('jpda.address') @@ -54,9 +57,9 @@ def check(self): assert os.path.exists(os.path.join(self.path, 'conf', 'routes')) assert os.path.exists(os.path.join(self.path, 'conf', 'application.conf')) except AssertionError: - print "~ Oops. conf/routes or conf/application.conf missing." - print "~ %s does not seem to host a valid application." % os.path.normpath(self.path) - print "~" + print("~ Oops. conf/routes or conf/application.conf missing.") + print("~ %s does not seem to host a valid application." % os.path.normpath(self.path)) + print("~") sys.exit(-1) def readConf(self, key): @@ -78,20 +81,20 @@ def modules(self): application_mode = "dev" if application_mode == 'dev': #Load docviewer module - modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer'))) + modules.append(os.path.normpath(os.path.join(self.play_env["basedir"], 'modules/docviewer'))) for m in self.readConfs('module.'): if '${play.path}' in m: m = m.replace('${play.path}', self.play_env["basedir"]) - if m[0] is not '/': + if m[0] != '/': m = os.path.normpath(os.path.join(self.path, m)) if not os.path.exists(m) and not self.ignoreMissingModules: - print "~ Oops," - print "~ Module not found: %s" % (m) - print "~" + print("~ Oops,") + print("~ Module not found: %s" % (m)) + print("~") if m.startswith('${play.path}/modules'): - print "~ You can try to install the missing module using 'play install %s'" % (m[21:]) - print "~" + print("~ You can try to install the missing module using 'play install %s'" % (m[21:])) + print("~") sys.exit(-1) modules.append(m) if self.path and os.path.exists(os.path.join(self.path, 'modules')): @@ -108,7 +111,7 @@ def modules(self): return set(modules) # Ensure we don't have duplicates def module_names(self): - return map(lambda x: x[7:],self.conf.getAllKeys("module.")) + return [x[7:] for x in self.conf.getAllKeys("module.")] def override(self, f, t): fromFile = None @@ -116,18 +119,18 @@ def override(self, f, t): pc = os.path.join(module, f) if os.path.exists(pc): fromFile = pc if not fromFile: - print "~ %s not found in any module" % f - print "~ " + print("~ %s not found in any module" % f) + print("~ ") sys.exit(-1) toFile = os.path.join(self.path, t) if os.path.exists(toFile): - response = raw_input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile) + response = input("~ Warning! %s already exists and will be overridden (y/n)? " % toFile) if not response == 'y': return if not os.path.exists(os.path.dirname(toFile)): os.makedirs(os.path.dirname(toFile)) shutil.copyfile(fromFile, toFile) - print "~ Copied %s to %s " % (fromFile, toFile) + print("~ Copied %s to %s " % (fromFile, toFile)) def name(self): return self.readConf("application.name") @@ -207,15 +210,15 @@ def fw_cp_args(self): return cp_args def pid_path(self): - if self.play_env.has_key('pid_file'): + if 'pid_file' in self.play_env: return os.path.join(self.path, self.play_env['pid_file']) - elif os.environ.has_key('PLAY_PID_PATH'): + elif 'PLAY_PID_PATH' in os.environ: return os.environ['PLAY_PID_PATH'] else: return os.path.join(self.path, 'server.pid') def log_path(self): - if not os.environ.has_key('PLAY_LOG_PATH'): + if 'PLAY_LOG_PATH' not in os.environ: log_path = os.path.join(self.path, 'logs') else: log_path = os.environ['PLAY_LOG_PATH'] @@ -231,12 +234,12 @@ def check_jpda(self): else: s.bind((self.jpda_address, int(self.jpda_port))) s.close() - except socket.error, e: + except socket.error as e: if "disable_random_jpda" in self.play_env and self.play_env["disable_random_jpda"]: - print 'JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port + print('JPDA port %s is already used, and command line option "-f" was specified. Cannot start server\n' % self.jpda_port) sys.exit(-1) else: - print 'JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port + print('JPDA port %s is already used. Will try to use any free port for debugging' % self.jpda_port) self.jpda_port = 0 def java_args_memory(self, java_args): @@ -272,29 +275,29 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args if cp_args is None: cp_args = self.cp_args() - if self.play_env.has_key('jpda.port'): + if 'jpda.port' in self.play_env: self.jpda_port = self.play_env['jpda.port'] - if self.play_env.has_key('jpda.address'): + if 'jpda.address' in self.play_env: self.jpda_address = self.play_env['jpda.address'] application_mode = self.readConf('application.mode').lower() if not application_mode: - print "~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode." + print("~ Warning: no application.mode defined in you conf/application.conf. Using DEV mode.") application_mode = "dev" if application_mode == 'prod': java_args.append('-server') - if self.play_env.has_key('jvm_version'): + if 'jvm_version' in self.play_env: javaVersion = self.play_env['jvm_version'] else: javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"): - print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion + print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion) java_args.append('-noverify') @@ -302,13 +305,13 @@ def java_cmd(self, java_args, cp_args=None, className='play.server.Server', args if java_policy != '': policyFile = os.path.join(self.path, 'conf', java_policy) if os.path.exists(policyFile): - print "~ using policy file \"%s\"" % policyFile + print("~ using policy file \"%s\"" % policyFile) java_args.append('-Djava.security.manager') java_args.append('-Djava.security.policy==%s' % policyFile) - if self.play_env.has_key('http.port'): + if 'http.port' in self.play_env: args += ["--http.port=%s" % self.play_env['http.port']] - if self.play_env.has_key('https.port'): + if 'https.port' in self.play_env: args += ["--https.port=%s" % self.play_env['https.port']] java_args.append('-Dfile.encoding=utf-8') @@ -345,7 +348,7 @@ def _absoluteToRelative(path, start): return os.path.curdir return os.path.join(*rel_list) -class PlayConfParser: +class PlayConfParser(object): DEFAULTS = { 'http.port': '9000', @@ -355,15 +358,15 @@ class PlayConfParser: def __init__(self, confFolder, env): self.id = env["id"] self.entries = self.readFile(confFolder, "application.conf") - if env.has_key('jpda.port'): + if 'jpda.port' in env: self.entries['jpda.port'] = env['jpda.port'] - if env.has_key('http.port'): + if 'http.port' in env: self.entries['http.port'] = env['http.port'] - if env.has_key('jvm_version'): + if 'jvm_version' in env: self.entries['jvm_version'] = env['jvm_version'] def readFile(self, confFolder, filename): - f = file(confFolder + filename) + f = open(confFolder + filename, 'r') result = dict() for line in f: linedef = line.strip() @@ -382,12 +385,12 @@ def readFile(self, confFolder, filename): washedResult = dict() # first get all keys with correct framework id - for (key, value) in result.items(): + for (key, value) in list(result.items()): if key.startswith('%' + self.id + '.'): stripedKey = key[(len(self.id)+2):] washedResult[stripedKey]=value # now get all without framework id if we don't already have it - for (key, value) in result.items(): + for (key, value) in list(result.items()): if not key.startswith('%'): # check if we already have it if not (key in washedResult): @@ -396,7 +399,7 @@ def readFile(self, confFolder, filename): # find all @include includeFiles = [] - for (key, value) in washedResult.items(): + for (key, value) in list(washedResult.items()): if key.startswith('@include.'): includeFiles.append(value) @@ -407,10 +410,10 @@ def readFile(self, confFolder, filename): fromIncludeFile = self.readFile(confFolder, self._expandValue(includeFile)) # add everything from include file - for (key, value) in fromIncludeFile.items(): + for (key, value) in list(fromIncludeFile.items()): washedResult[key]=value except Exception as err: - print "~ Failed to load included configuration %s: %s" % (includeFile, err) + print("~ Failed to load included configuration %s: %s" % (includeFile, err)) return washedResult @@ -423,7 +426,7 @@ def get(self, key): def getAllKeys(self, query): result = [] - for (key, value) in self.entries.items(): + for (key, value) in list(self.entries.items()): if key.startswith(query): result.append(key) return result diff --git a/framework/pym/play/cmdloader.py b/framework/pym/play/cmdloader.py index fa4406cecd..459f6850fc 100644 --- a/framework/pym/play/cmdloader.py +++ b/framework/pym/play/cmdloader.py @@ -1,6 +1,8 @@ +from __future__ import print_function import imp import os import warnings +import traceback def play_formatwarning(msg, *a): # ignore everything except the message @@ -9,7 +11,7 @@ def play_formatwarning(msg, *a): warnings.formatwarning = play_formatwarning -class CommandLoader: +class CommandLoader(object): def __init__(self, play_path): self.path = os.path.join(play_path, 'framework', 'pym', 'play', 'commands') self.commands = {} @@ -23,7 +25,9 @@ def load_core(self): name = filename.replace(".py", "") mod = load_python_module(name, self.path) self._load_cmd_from(mod) - except: + except Exception as e: + print (e) + traceback.print_exc() warnings.warn("!! Warning: could not load core command file " + filename, RuntimeWarning) def load_play_module(self, modname): @@ -33,10 +37,10 @@ def load_play_module(self, modname): leafname = os.path.basename(modname).split('.')[0] mod = imp.load_source(leafname, os.path.join(modname, "commands.py")) self._load_cmd_from(mod) - except Exception, e: - print '~' - print '~ !! Error while loading %s: %s' % (commands, e) - print '~' + except Exception as e: + print('~') + print('~ !! Error while loading %s: %s' % (commands, e)) + print('~') pass # No command to load in this module def _load_cmd_from(self, mod): @@ -57,6 +61,6 @@ def load_python_module(name, location): try: return imp.load_module(name, mod_desc[0], mod_desc[1], mod_desc[2]) finally: - if mod_file is not None and not mod_file.closed: + if mod_file != None and not mod_file.closed: mod_file.close() diff --git a/framework/pym/play/commands/ant.py b/framework/pym/play/commands/ant.py index 366415701c..5cba882638 100644 --- a/framework/pym/play/commands/ant.py +++ b/framework/pym/play/commands/ant.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import time @@ -20,6 +21,6 @@ def execute(**kargs): shutil.copyfile(os.path.join(play_env["basedir"], 'resources/build.xml'), os.path.join(app.path, 'build.xml')) - print "~ OK, a build.xml file has been created" - print "~ Define the PLAY_PATH env property, and use it with ant run|start|stop" - print "~" + print("~ OK, a build.xml file has been created") + print("~ Define the PLAY_PATH env property, and use it with ant run|start|stop") + print("~") diff --git a/framework/pym/play/commands/autotest.py b/framework/pym/play/commands/autotest.py index 3b082bd8fa..22702c8e36 100644 --- a/framework/pym/play/commands/autotest.py +++ b/framework/pym/play/commands/autotest.py @@ -1,9 +1,10 @@ +from __future__ import print_function # Command related to execution: auto-test import sys import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import webbrowser import time @@ -28,14 +29,14 @@ def execute(**kargs): def autotest(app, args): app.check() - print "~ Running in test mode" - print "~ Ctrl+C to stop" - print "~ " + print("~ Running in test mode") + print("~ Ctrl+C to stop") + print("~ ") - print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp')) + print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, 'tmp'))) if os.path.exists(os.path.join(app.path, 'tmp')): shutil.rmtree(os.path.join(app.path, 'tmp')) - print "~" + print("~") # Kill if exists http_port = 9000 @@ -46,16 +47,16 @@ def autotest(app, args): else: http_port = app.readConf('http.port') try: - proxy_handler = urllib2.ProxyHandler({}) - opener = urllib2.build_opener(proxy_handler) + proxy_handler = urllib.request.ProxyHandler({}) + opener = urllib.request.build_opener(proxy_handler) opener.open('http://localhost:%s/@kill' % http_port) - except Exception, e: + except Exception as e: pass # Do not run the app if SSL is configured and no cert store is configured keystore = app.readConf('keystore.file') if protocol == 'https' and not keystore: - print "https without keystore configured. play auto-test will fail. Exiting now." + print("https without keystore configured. play auto-test will fail. Exiting now.") sys.exit(-1) # read parameters @@ -94,25 +95,25 @@ def autotest(app, args): try: play_process = subprocess.Popen(java_cmd, env=os.environ, stdout=sout) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) soutint = open(os.path.join(app.log_path(), 'system.out'), 'r') while True: if play_process.poll(): - print "~" - print "~ Oops, application has not started?" - print "~" + print("~") + print("~ Oops, application has not started?") + print("~") sys.exit(-1) line = soutint.readline().strip() if line: - print line + print(line) if line.find('Server is up and running') > -1: # This line is written out by Server.java to system.out and is not log file dependent soutint.close() break # Run FirePhoque - print "~" - print "~ Starting FirePhoque..." + print("~") + print("~ Starting FirePhoque...") headless_browser = '' if app.readConf('headlessBrowser'): @@ -134,25 +135,25 @@ def autotest(app, args): try: subprocess.call(java_cmd, env=os.environ) except OSError: - print "Could not execute the headless browser. " + print("Could not execute the headless browser. ") sys.exit(-1) - print "~" + print("~") time.sleep(1) # Kill if exists try: - proxy_handler = urllib2.ProxyHandler({}) - opener = urllib2.build_opener(proxy_handler) + proxy_handler = urllib.request.ProxyHandler({}) + opener = urllib.request.build_opener(proxy_handler) opener.open('%s://localhost:%s/@kill' % (protocol, http_port)) - except Exception, e: + except Exception as e: pass if os.path.exists(os.path.join(app.path, 'test-result/result.passed')): - print "~ All tests passed" - print "~" + print("~ All tests passed") + print("~") testspassed = True if os.path.exists(os.path.join(app.path, 'test-result/result.failed')): - print "~ Some tests have failed. See file://%s for results" % test_result - print "~" + print("~ Some tests have failed. See file://%s for results" % test_result) + print("~") sys.exit(1) diff --git a/framework/pym/play/commands/base.py b/framework/pym/play/commands/base.py index d427f3b30d..7d21047f86 100644 --- a/framework/pym/play/commands/base.py +++ b/framework/pym/play/commands/base.py @@ -1,11 +1,14 @@ +from __future__ import print_function # Command related to creation and execution: run, new, clean +from builtins import input +from builtins import str import sys import os import subprocess import shutil import getopt -import urllib2 +import urllib.request, urllib.error, urllib.parse import webbrowser import time import signal @@ -54,14 +57,14 @@ def new(app, args, env, cmdloader=None): withModules = a.split(',') if o in ('--name'): application_name = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ Sorry, unrecognized option" - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ Sorry, unrecognized option") + print("~ ") sys.exit(-1) if os.path.exists(app.path): - print "~ Oops. %s already exists" % app.path - print "~" + print("~ Oops. %s already exists" % app.path) + print("~") sys.exit(-1) md = [] @@ -76,16 +79,16 @@ def new(app, args, env, cmdloader=None): break if not dirname: - print "~ Oops. No module %s found" % m - print "~ Try to install it using 'play install %s'" % m - print "~" + print("~ Oops. No module %s found" % m) + print("~ Try to install it using 'play install %s'" % m) + print("~") sys.exit(-1) md.append(dirname) - print "~ The new application will be created in %s" % os.path.normpath(app.path) + print("~ The new application will be created in %s" % os.path.normpath(app.path)) if application_name is None: - application_name = raw_input("~ What is the application name? [%s] " % os.path.basename(app.path)) + application_name = input("~ What is the application name? [%s] " % os.path.basename(app.path)) if application_name == "": application_name = os.path.basename(app.path) copy_directory(os.path.join(env["basedir"], 'resources/application-skel'), app.path) @@ -94,7 +97,7 @@ def new(app, args, env, cmdloader=None): app.check() replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%APPLICATION_NAME%', application_name) replaceAll(os.path.join(app.path, 'conf/application.conf'), r'%SECRET_KEY%', secretKey()) - print "~" + print("~") # Configure modules for m in md: @@ -110,10 +113,10 @@ def new(app, args, env, cmdloader=None): cmdloader.commands['dependencies'].execute(command='dependencies', app=app, args=['--sync'], env=env, cmdloader=cmdloader) - print "~ OK, the application is created." - print "~ Start it with : play run %s" % sys.argv[2] - print "~ Have fun!" - print "~" + print("~ OK, the application is created.") + print("~ Start it with : play run %s" % sys.argv[2]) + print("~ Have fun!") + print("~") process = None @@ -131,19 +134,19 @@ def handle_sigint(signum, frame): if 'process' in globals(): if first_sigint: # Prefix with new line because ^C usually appears on the terminal - print "\nTerminating Java process" + print("\nTerminating Java process") process.terminate() first_sigint = False else: - print "\nKilling Java process" + print("\nKilling Java process") process.kill() def run(app, args): global process app.check() - print "~ Ctrl+C to stop" - print "~ " + print("~ Ctrl+C to stop") + print("~ ") java_cmd = app.java_cmd(args) try: process = subprocess.Popen (java_cmd, env=os.environ) @@ -153,46 +156,46 @@ def run(app, args): if 0 != return_code: sys.exit(return_code) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print + print() def clean(app): app.check() tmp = app.readConf('play.tmp') if tmp is None or not tmp.strip(): tmp = 'tmp' - print "~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp)) + print("~ Deleting %s" % os.path.normpath(os.path.join(app.path, tmp))) if os.path.exists(os.path.join(app.path, tmp)): shutil.rmtree(os.path.join(app.path, tmp)) - print "~" + print("~") def show_modules(app, args): app.check() modules = app.modules() if len(modules): - print "~ Application modules are:" - print "~ " + print("~ Application modules are:") + print("~ ") for module in modules: - print "~ %s" % module + print("~ %s" % module) else: - print "~ No modules installed in this application" - print "~ " + print("~ No modules installed in this application") + print("~ ") sys.exit(0) def id(play_env): if not play_env["id"]: - print "~ framework ID is not set" - new_id = raw_input("~ What is the new framework ID (or blank to unset)? ") + print("~ framework ID is not set") + new_id = input("~ What is the new framework ID (or blank to unset)? ") if new_id: - print "~" - print "~ OK, the framework ID is now %s" % new_id - print "~" + print("~") + print("~ OK, the framework ID is now %s" % new_id) + print("~") open(play_env["id_file"], 'w').write(new_id) else: - print "~" - print "~ OK, the framework ID is unset" - print "~" + print("~") + print("~ OK, the framework ID is unset") + print("~") if os.path.exists(play_env["id_file"]): os.remove(play_env["id_file"]) @@ -203,13 +206,13 @@ def kill(pid): import ctypes handle = ctypes.windll.kernel32.OpenProcess(1, False, int(pid)) if not ctypes.windll.kernel32.TerminateProcess(handle, 0): - print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()) - print "~ " + print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())) + print("~ ") sys.exit(-1) else: try: os.kill(int(pid), 15) except OSError: - print "~ Play was not running (Process id %s not found)" % pid - print "~" + print("~ Play was not running (Process id %s not found)" % pid) + print("~") sys.exit(-1) diff --git a/framework/pym/play/commands/check.py b/framework/pym/play/commands/check.py index 5ec55595a2..e176cd2379 100644 --- a/framework/pym/play/commands/check.py +++ b/framework/pym/play/commands/check.py @@ -1,6 +1,10 @@ +from __future__ import print_function +from builtins import str +from builtins import range +from builtins import object import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import simplejson as json from play.utils import * @@ -27,23 +31,23 @@ def execute(**kargs): releases = allreleases() if len(releases) == 0: - print "~ No release found." + print("~ No release found.") elif current == max(releases): - print "~ You are using the latest version." + print("~ You are using the latest version.") else: - print "~ \tLatest release: " + str(max(releases)) - print "~ \tYour version : " + str(current) - print "~" - print "~ Latest release download: " + max(releases).url() + print("~ \tLatest release: " + str(max(releases))) + print("~ \tYour version : " + str(current)) + print("~") + print("~ Latest release download: " + max(releases).url()) - print "~" + print("~") def allreleases(): try: - req = urllib2.Request(TAGS_URL) + req = urllib.request.Request(TAGS_URL) req.add_header('Accept', 'application/json') - opener = urllib2.build_opener() + opener = urllib.request.build_opener() result = opener.open(req) jsonObject = json.loads(result.read()) releases = [] @@ -51,18 +55,18 @@ def allreleases(): releases.append(Release(tagObj["name"])) return releases - except urllib2.HTTPError, e: - print "~ Oops," - print "~ Cannot contact github..." - print "~" + except urllib.error.HTTPError as e: + print("~ Oops,") + print("~ Cannot contact github...") + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Oops," - print "~ Cannot contact github..." - print "~" + except urllib.error.URLError as e: + print("~ Oops,") + print("~ Cannot contact github...") + print("~") sys.exit(-1) -class Release: +class Release(object): # TODO: Be smarter at analysing the rest (ex: RC1 vs RC2) def __init__(self, strversion): @@ -73,7 +77,7 @@ def __init__(self, strversion): self.numpart = '' self.rest = strversion.replace(self.numpart, "") try: - self.versions = map(lambda x: int(x), self.numpart.split(".")) + self.versions = [int(x) for x in self.numpart.split(".")] except: self.versions = [0,0] if not self.rest: self.rest = "Z" diff --git a/framework/pym/play/commands/classpath.py b/framework/pym/play/commands/classpath.py index a887c9a4de..b249fe2dd8 100644 --- a/framework/pym/play/commands/classpath.py +++ b/framework/pym/play/commands/classpath.py @@ -1,3 +1,4 @@ +from __future__ import print_function # Show the computed classpath for the application COMMANDS = ['cp', 'classpath'] @@ -10,7 +11,7 @@ def execute(**kargs): command = kargs.get("command") app = kargs.get("app") args = kargs.get("args") - print "~ Computed classpath is:" - print "~ " - print app.getClasspath() - print "~ " + print("~ Computed classpath is:") + print("~ ") + print(app.getClasspath()) + print("~ ") diff --git a/framework/pym/play/commands/daemon.py b/framework/pym/play/commands/daemon.py index 9f66612af3..f6b2512790 100644 --- a/framework/pym/play/commands/daemon.py +++ b/framework/pym/play/commands/daemon.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range import errno import os import os.path @@ -44,12 +47,12 @@ def start(app, args): if os.path.exists(app.pid_path()): pid = open(app.pid_path()).readline().strip() if process_running(pid): - print "~ Oops. %s is already started (pid:%s)! (or delete %s)" % ( - os.path.normpath(app.path), pid, os.path.normpath(app.pid_path())) - print "~" + print("~ Oops. %s is already started (pid:%s)! (or delete %s)" % ( + os.path.normpath(app.path), pid, os.path.normpath(app.pid_path()))) + print("~") sys.exit(1) else: - print "~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid) + print("~ removing pid file %s for not running pid %s" % (os.path.normpath(app.pid_path()), pid)) os.remove(app.pid_path()) sysout = app.readConf('application.log.system.out') @@ -61,35 +64,35 @@ def start(app, args): try: pid = subprocess.Popen(app.java_cmd(args), stdout=sout, env=os.environ).pid except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ OK, %s is started" % os.path.normpath(app.path) + print("~ OK, %s is started" % os.path.normpath(app.path)) if sysout: - print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) + print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) pid_file = open(app.pid_path(), 'w') pid_file.write(str(pid)) - print "~ pid is %s" % pid - print "~" + print("~ pid is %s" % pid) + print("~") def stop(app): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") sys.exit(-1) pid = open(app.pid_path()).readline().strip() kill(pid) os.remove(app.pid_path()) - print "~ OK, %s is stopped" % app.path - print "~" + print("~ OK, %s is stopped" % app.path) + print("~") def restart(app, args): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") else: pid = open(app.pid_path()).readline().strip() os.remove(app.pid_path()) @@ -105,34 +108,34 @@ def restart(app, args): try: pid = subprocess.Popen(java_cmd, stdout=sout, env=os.environ).pid except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ OK, %s is restarted" % os.path.normpath(app.path) + print("~ OK, %s is restarted" % os.path.normpath(app.path)) if sysout: - print "~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) + print("~ output is redirected to %s" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) pid_file = open(app.pid_path(), 'w') pid_file.write(str(pid)) - print "~ New pid is %s" % pid - print "~" + print("~ New pid is %s" % pid) + print("~") sys.exit(0) def pid(app): app.check() if not os.path.exists(app.pid_path()): - print "~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path) - print "~" + print("~ Oops! %s is not started (server.pid not found)" % os.path.normpath(app.path)) + print("~") sys.exit(-1) pid = open(app.pid_path()).readline().strip() - print "~ PID of the running applications is %s" % pid - print "~ " + print("~ PID of the running applications is %s" % pid) + print("~ ") def out(app): app.check() if not os.path.exists(os.path.join(app.log_path(), 'system.out')): - print "~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out')) - print "~" + print("~ Oops! %s not found" % os.path.normpath(os.path.join(app.log_path(), 'system.out'))) + print("~") sys.exit(-1) sout = open(os.path.join(app.log_path(), 'system.out'), 'r') try: @@ -146,7 +149,7 @@ def out(app): time.sleep(1) sout.seek(where) else: - print line + print(line) def kill(pid): @@ -156,16 +159,16 @@ def kill(pid): process = ctypes.windll.kernel32.TerminateProcess(handle, 0) ctypes.windll.kernel32.CloseHandle(handle) if not process: - print "~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError()) - print "~ " + print("~ Cannot kill the process with pid %s (ERROR %s)" % (pid, ctypes.windll.kernel32.GetLastError())) + print("~ ") sys.exit(-1) - print "~ Process with PID %s terminated" % pid + print("~ Process with PID %s terminated" % pid) else: try: _terminate_unix_process_if_exists(int(pid)) except OSError: - print "~ Play was not running (Process id %s not found)" % pid - print "~" + print("~ Play was not running (Process id %s not found)" % pid) + print("~") sys.exit(-1) @@ -211,8 +214,8 @@ def process_list_nt(): else: proc_dict[instance] = 0 idProcessLocalizedName = win32pdhutil.find_pdh_counter_localized_name("ID Process") - for instance, max_instances in proc_dict.items(): - for inum in xrange(max_instances + 1): + for instance, max_instances in list(proc_dict.items()): + for inum in range(max_instances + 1): hq = win32pdh.OpenQuery() # initializes the query handle path = win32pdh.MakeCounterPath((None, processLocalizedName, instance, None, inum, idProcessLocalizedName)) counter_handle = win32pdh.AddCounter(hq, path) diff --git a/framework/pym/play/commands/deps.py b/framework/pym/play/commands/deps.py index d82da2b62f..2d2b91d046 100644 --- a/framework/pym/play/commands/deps.py +++ b/framework/pym/play/commands/deps.py @@ -1,6 +1,7 @@ +from __future__ import print_function import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import simplejson as json @@ -57,14 +58,14 @@ def execute(**kargs): add_options.append('-Dclearcache') if args.count('--jpda'): args.remove('--jpda') - print "~ Waiting for JPDA client to continue" + print("~ Waiting for JPDA client to continue") add_options.append('-Xdebug') add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port) for arg in args: if arg.startswith("-D"): add_options.append(arg) elif not arg.startswith('-Xm'): - print "~ WARNING: " + arg + " argument will be skipped" + print("~ WARNING: " + arg + " argument will be skipped") java_cmd = [java_path()] + add_options + args_memory + ['-classpath', app.fw_cp_args(), 'play.deps.DependenciesManager'] try: @@ -72,5 +73,5 @@ def execute(**kargs): if 0 != return_code: sys.exit(return_code); except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) diff --git a/framework/pym/play/commands/eclipse.py b/framework/pym/play/commands/eclipse.py index 93211c16f2..01a6eb1b82 100644 --- a/framework/pym/play/commands/eclipse.py +++ b/framework/pym/play/commands/eclipse.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import str import os, os.path import shutil import time @@ -30,9 +32,9 @@ def execute(**kargs): javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5") or javaVersion.startswith("1.6") or javaVersion.startswith("1.7"): - print "~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion + print("~ ERROR: java version prior to 1.8 are no longer supported: current version \"%s\" : please update" % javaVersion) vm_arguments = vm_arguments +' -noverify' @@ -77,7 +79,7 @@ def execute(**kargs): # pointers to source jars produced by 'play deps' src_file = os.path.join(lib_src, os.path.basename(el) + '.src') if os.path.exists(src_file): - f = file(src_file) + f = open(src_file, 'r') cpJarToSource[el] = f.readline().rstrip() f.close() @@ -93,10 +95,10 @@ def execute(**kargs): if el == playJarPath: cpXML += '\n\t' % (os.path.normpath(el) , playSourcePath) else: - if cpJarToSource.has_key(el): + if el in cpJarToSource: cpXML += '\n\t' % (os.path.normpath(el), cpJarToSource[el]) else: - if javadocLocation.has_key(el): + if el in javadocLocation: cpXML += '\n\t\t' % os.path.normpath(el) cpXML += '\n\t\t\t' f = file(javadocLocation[el]) @@ -156,11 +158,11 @@ def execute(**kargs): os.rename(os.path.join(app.path, 'eclipse/debug.launch'), os.path.join(app.path, 'eclipse/%s.launch' % application_name)) if is_application: - print "~ OK, the application \"%s\" is ready for eclipse" % application_name + print("~ OK, the application \"%s\" is ready for eclipse" % application_name) else: - print "~ OK, the module \"%s\" is ready for eclipse" % application_name - print "~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path) - print "~" - print "~ Use eclipsify again when you want to update eclipse configuration files." - print "~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches..." - print "~" + print("~ OK, the module \"%s\" is ready for eclipse" % application_name) + print("~ Use File/Import/General/Existing project to import %s into eclipse" % os.path.normpath(app.path)) + print("~") + print("~ Use eclipsify again when you want to update eclipse configuration files.") + print("~ However, it's often better to delete and re-import the project into your workspace since eclipse keeps dirty caches...") + print("~") diff --git a/framework/pym/play/commands/evolutions.py b/framework/pym/play/commands/evolutions.py index acf5eb26bc..27449296ab 100644 --- a/framework/pym/play/commands/evolutions.py +++ b/framework/pym/play/commands/evolutions.py @@ -1,6 +1,7 @@ +from __future__ import print_function import os, os.path import shutil -import urllib, urllib2 +import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse import subprocess import simplejson as json @@ -36,7 +37,7 @@ def execute(**kargs): add_options = ['-Dapplication.path=%s' % (app.path), '-Dframework.path=%s' % (play_env['basedir']), '-Dplay.id=%s' % play_env['id'], '-Dplay.version=%s' % play_env['version']] if args.count('--jpda'): - print "~ Waiting for JPDA client to continue" + print("~ Waiting for JPDA client to continue") args.remove('--jpda') add_options.append('-Xdebug') add_options.append('-Xrunjdwp:transport=dt_socket,address=%s,server=y,suspend=y' % app.jpda_port) @@ -52,5 +53,5 @@ def execute(**kargs): if 0 != return_code: sys.exit(return_code); except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) diff --git a/framework/pym/play/commands/help.py b/framework/pym/play/commands/help.py index 821428240b..2f4e4efa73 100644 --- a/framework/pym/play/commands/help.py +++ b/framework/pym/play/commands/help.py @@ -1,3 +1,4 @@ +from __future__ import print_function # Display help import sys, os, re @@ -19,7 +20,7 @@ def execute(**kargs): cmd = args[0] help_file = os.path.join(play_env["basedir"], 'documentation', 'commands', 'cmd-%s.txt' % cmd) if os.path.exists(help_file): - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) else: exists = False slugCmd = re.sub('[-\s]+', '-', re.sub('[^\w\s-]', '', cmd.encode('ascii', 'ignore')).strip().lower()) @@ -30,38 +31,38 @@ def execute(**kargs): % slugCmd) exists = os.path.exists(help_file) if exists: - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) break if not exists: - print '~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd - print '~' + print('~ Oops, command \'%s\' not found. Try just \'play help\' to list all commands.' % cmd) + print('~') sys.exit(-1) else: main_help(cmdloader.commands, play_env) def main_help(commands, play_env): modules_commands = [] - print "~ For all commands, if the application is not specified, the current directory is used" - print "~ Use 'play help cmd' to get more help on a specific command" - print "~" - print "~ Core commands:" - print "~ ~~~~~~~~~~~~~~" + print("~ For all commands, if the application is not specified, the current directory is used") + print("~ Use 'play help cmd' to get more help on a specific command") + print("~") + print("~ Core commands:") + print("~ ~~~~~~~~~~~~~~") for cmd in sorted(commands): if not isCore(commands[cmd], play_env): modules_commands.append(cmd) continue if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP: - print "~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd] + print("~ " + cmd + (' ' * (16 - len(cmd))) + commands[cmd].HELP[cmd]) if len(modules_commands) > 0: - print "~" - print "~ Modules commands:" - print "~ ~~~~~~~~~~~~~~~~~" + print("~") + print("~ Modules commands:") + print("~ ~~~~~~~~~~~~~~~~~") for cmd in modules_commands: if 'HELP' in dir(commands[cmd]) and cmd in commands[cmd].HELP: - print "~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd] - print "~" - print "~ Also refer to documentation at https://www.playframework.com/documentation" - print "~" + print("~ " + cmd + (' ' * (20 - len(cmd))) + commands[cmd].HELP[cmd]) + print("~") + print("~ Also refer to documentation at https://www.playframework.com/documentation") + print("~") def isCore(mod, play_env): path = os.path.realpath(mod.__file__) diff --git a/framework/pym/play/commands/intellij.py b/framework/pym/play/commands/intellij.py index a5f1e675dc..4c61e9c7d6 100644 --- a/framework/pym/play/commands/intellij.py +++ b/framework/pym/play/commands/intellij.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil @@ -66,7 +67,7 @@ def execute(**kargs): replaceAll(iprFile, r'%PROJECT_NAME%', application_name) - print "~ OK, the application is ready for Intellij Idea" - print "~ Use File, Open Project... to open \"" + application_name + ".ipr\"" - print "~" + print("~ OK, the application is ready for Intellij Idea") + print("~ Use File, Open Project... to open \"" + application_name + ".ipr\"") + print("~") diff --git a/framework/pym/play/commands/javadoc.py b/framework/pym/play/commands/javadoc.py index dcde0319cc..b366c6dbbf 100644 --- a/framework/pym/play/commands/javadoc.py +++ b/framework/pym/play/commands/javadoc.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import subprocess @@ -19,7 +20,7 @@ def execute(**kargs): args = kargs.get("args") play_env = kargs.get("env") - if not os.environ.has_key('JAVA_HOME'): + if 'JAVA_HOME' not in os.environ: javadoc_path = "javadoc" else: javadoc_path = os.path.normpath("%s/bin/javadoc" % os.environ['JAVA_HOME']) @@ -38,7 +39,7 @@ def execute(**kargs): defineJavadocFiles(app, outdir) javadoc_cmd = [javadoc_path, '@'+os.path.join(outdir,'javadocOptions'), '@'+os.path.join(outdir,'javadocFiles')] - print "Generating Javadoc in " + outdir + "..." + print("Generating Javadoc in " + outdir + "...") return_code = subprocess.call(javadoc_cmd, env=os.environ, stdout=sout, stderr=serr) # Remove configuration file @@ -47,10 +48,10 @@ def execute(**kargs): # Display the status if return_code != 0: - print "Unable to create Javadocs. See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors." + print("Unable to create Javadocs. See " + os.path.join(app.log_path(), 'javadoc.err') + " for errors.") sys.exit(return_code) - print "Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser." + print("Done! You can open " + os.path.join(outdir, 'overview-tree.html') + " in your browser.") @@ -69,21 +70,21 @@ def defineJavadocOptions(app, outdir, args): f.write(' -footer "' + app.readConf('application.name') + '"') if args.count('--links'): - print "~ Build project Javadoc with links to :" + print("~ Build project Javadoc with links to :") args.remove('--links') # Add link to JavaDoc of JAVA javaVersion = getJavaVersion() - print "~ using java version \"%s\"" % javaVersion + print("~ using java version \"%s\"" % javaVersion) if javaVersion.startswith("1.5"): - print "~ Java(TM) Platform, Platform Standard Edition 5.0" - print "~ Java(TM) EE 5 Specification APIs" + print("~ Java(TM) Platform, Platform Standard Edition 5.0") + print("~ Java(TM) EE 5 Specification APIs") f.write(' -link http://docs.oracle.com/javase/1.5.0/docs/api/') f.write(' -link http://docs.oracle.com/javaee/5/api/') else: urlVersion = javaVersion[2:3] - print "~ Java(TM) Platform, Standard Edition " + urlVersion + " API Specification" - print "~ Java(TM) EE " + urlVersion + " Specification APIs" + print("~ Java(TM) Platform, Standard Edition " + urlVersion + " API Specification") + print("~ Java(TM) EE " + urlVersion + " Specification APIs") f.write(' -link http://docs.oracle.com/javase/' + urlVersion + '/docs/api/') f.write(' -link http://docs.oracle.com/javaee/' + urlVersion + '/api/') @@ -91,10 +92,10 @@ def defineJavadocOptions(app, outdir, args): # Add link to JavaDoc of Play Framework playVersion = app.play_env['version'] if "localbuild" in playVersion: - print "~ API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION + print("~ API documentation to Play! Framework V" + playVersion + " doesn't exist => link to V" + DEFAULT_API_VERSION) playVersion = DEFAULT_API_VERSION - print "~ Play Framework V" + playVersion + " API documentation" + print("~ Play Framework V" + playVersion + " API documentation") f.write(' -link https://www.playframework.com/documentation/' + playVersion + '/api/') diff --git a/framework/pym/play/commands/modulesrepo.py b/framework/pym/play/commands/modulesrepo.py index d8de6670fe..c05dbd125d 100644 --- a/framework/pym/play/commands/modulesrepo.py +++ b/framework/pym/play/commands/modulesrepo.py @@ -1,14 +1,20 @@ +from __future__ import print_function +from __future__ import division +from builtins import str +from builtins import input +from builtins import range +from builtins import object import os import subprocess import sys import re import zipfile -import urllib2 +import urllib.request, urllib.error, urllib.parse import shutil import string import imp import time -import urllib +import urllib.request, urllib.parse, urllib.error import yaml from play.utils import * @@ -78,12 +84,12 @@ class Downloader(object): before = .0 history = [] cycles = 0 - average = lambda self: sum(self.history) / (len(self.history) or 1) + average = lambda self: sum(self.history) // (len(self.history) or 1) def __init__(self, width=55): self.width = width - self.kibi = lambda bits: bits / 2 ** 10 - self.proc = lambda a, b: a / (b * 0.01) + self.kibi = lambda bits: bits // (2 ** 10) + self.proc = lambda a, b: a // (b * 0.01) def retrieve(self, url, destination, callback=None): self.size = 0 @@ -92,12 +98,12 @@ def retrieve(self, url, destination, callback=None): headers={'User-Agent':DEFAULT_USER_AGENT, 'Accept': 'application/json' } - req = urllib2.Request(url, headers=headers) - result = urllib2.urlopen(req) + req = urllib.request.Request(url, headers=headers) + result = urllib.request.urlopen(req) self.chunk_read(result, destination, report_hook=self.chunk_report) except KeyboardInterrupt: - print '\n~ Download cancelled' - print '~' + print('\n~ Download cancelled') + print('~') for i in range(5): try: os.remove(destination) @@ -107,7 +113,7 @@ def retrieve(self, url, destination, callback=None): else: raise if callback: callback() sys.exit() - print '' + print('') return self.size def chunk_read(self, response, destination, chunk_size=8192, report_hook=None): @@ -151,13 +157,13 @@ def progress(self, bytes_so_far, blocksize, filesize): now = time.clock() elapsed = now-self.before if elapsed: - speed = self.kibi(blocksize * 3 / elapsed) + speed = self.kibi(blocksize * 3 // elapsed) self.history.append(speed) self.history = self.history[-4:] self.before = now - average = round(sum(self.history[-4:]) / 4, 1) + average = round(sum(self.history[-4:]) // 4, 1) self.size = self.kibi(bits) - print '\r~ [%s] %s KiB/s ' % (bar, str(average)), + print('\r~ [%s] %s KiB/s ' % (bar, str(average)), end=' ') def bar(self, bytes_so_far, filesize, done): span = self.width * done * 0.01 @@ -165,7 +171,7 @@ def bar(self, bytes_so_far, filesize, done): result = ('%s of %s KiB (%d%%)' % (self.kibi(bytes_so_far), self.kibi(filesize), done,)).center(self.width) return result.replace(' ', '-', int(span - offset)) -class Unzip: +class Unzip(object): def __init__(self, verbose = False, percent = 10): self.verbose = verbose self.percent = percent @@ -178,12 +184,12 @@ def extract(self, file, dir): self._createstructure(file, dir) num_files = len(zf.namelist()) percent = self.percent - divisions = 100 / percent - perc = int(num_files / divisions) + divisions = 100 // percent + perc = int( num_files / divisions) # extract files to directory structure for i, name in enumerate(zf.namelist()): if self.verbose == True: - print "Extracting %s" % name + print("Extracting %s" % name) elif perc > 0 and (i % perc) == 0 and i > 0: complete = int (i / perc) * percent if not name.endswith('/'): @@ -219,12 +225,12 @@ def _listdirs(self, file): def new(app, args, play_env): if os.path.exists(app.path): - print "~ Oops. %s already exists" % app.path - print "~" + print("~ Oops. %s already exists" % app.path) + print("~") sys.exit(-1) - print "~ The new module will be created in %s" % os.path.normpath(app.path) - print "~" + print("~ The new module will be created in %s" % os.path.normpath(app.path)) + print("~") application_name = os.path.basename(app.path) copy_directory(os.path.join(play_env["basedir"], 'resources/module-skel'), app.path) # check_application() @@ -247,25 +253,25 @@ def new(app, args, play_env): os.mkdir(os.path.join(app.path, 'src/play/modules')) os.mkdir(os.path.join(app.path, 'src/play/modules/%s' % application_name)) - print "~ OK, the module is created." - print "~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation." - print "~" - print "~ Have fun!" - print "~" + print("~ OK, the module is created.") + print("~ Start using it by adding it to the dependencies.yml of your project, as decribed in the documentation.") + print("~") + print("~ Have fun!") + print("~") def list(app, args): - print "~ You can also browse this list online at:" + print("~ You can also browse this list online at:") for repo in repositories: - print "~ %s/modules" % repo - print "~" + print("~ %s/modules" % repo) + print("~") modules_list = load_module_list() for mod in modules_list: - print "~ [%s]" % mod['name'] - print "~ %s" % mod['fullname'] - print "~ %s/modules/%s" % (mod['server'], mod['name']) + print("~ [%s]" % mod['name']) + print("~ %s" % mod['fullname']) + print("~ %s/modules/%s" % (mod['server'], mod['name'])) vl = '' i = 0 @@ -276,17 +282,17 @@ def list(app, args): vl += ', ' if vl: - print "~ Versions: %s" % vl + print("~ Versions: %s" % vl) else: - print "~ (No versions released yet)" - print "~" + print("~ (No versions released yet)") + print("~") - print "~ To install one of these modules use:" - print "~ play install module-version (eg: play install scala-1.0)" - print "~" - print "~ Or you can just install the default release of a module using:" - print "~ play install module (eg: play install scala)" - print "~" + print("~ To install one of these modules use:") + print("~ play install module-version (eg: play install scala-1.0)") + print("~") + print("~ Or you can just install the default release of a module using:") + print("~ play install module (eg: play install scala)") + print("~") def build(app, args, env): @@ -305,9 +311,9 @@ def build(app, args, env): version = a if o in ('--require'): fwkMatch = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) deps_file = os.path.join(app.path, 'conf', 'dependencies.yml') @@ -324,7 +330,7 @@ def build(app, args, env): version = splitted.pop() name = splitted.pop() for dep in deps["require"]: - if isinstance(dep, basestring): + if isinstance(dep, str): splitted = dep.split(" ") if len(splitted) == 2 and splitted[0] == "play": fwkMatch = splitted[1] @@ -334,9 +340,9 @@ def build(app, args, env): if name is None: name = os.path.basename(app.path) if version is None: - version = raw_input("~ What is the module version number? ") + version = input("~ What is the module version number? ") if fwkMatch is None: - fwkMatch = raw_input("~ What are the playframework versions required? ") + fwkMatch = input("~ What are the playframework versions required? ") if os.path.exists(deps_file): f = open(deps_file) @@ -358,11 +364,11 @@ def build(app, args, env): build_file = os.path.join(app.path, 'build.xml') if os.path.exists(build_file): - print "~" - print "~ Building..." - print "~" + print("~") + print("~ Building...") + print("~") status = subprocess.call('ant -f %s -Dplay.path=%s' % (build_file, ftb), shell=True) - print "~" + print("~") if status: sys.exit(status) @@ -404,16 +410,16 @@ def build(app, args, env): except: pass - print "~" - print "~ Done!" - print "~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv) - print "~" + print("~") + print("~ Done!") + print("~ Package is available at %s" % os.path.join(dist_dir, '%s.zip' % mv)) + print("~") def install(app, args, env): if len(sys.argv) < 3: help_file = os.path.join(env["basedir"], 'documentation/commands/cmd-install.txt') - print open(help_file, 'r').read() + print(open(help_file, 'r').read()) sys.exit(0) name = cmd = sys.argv[2] @@ -422,7 +428,7 @@ def install(app, args, env): version = groups.group(3) server = None - if args is not None: + if args != None: for param in args: if param.startswith("--force-server="): server = param[15:] @@ -433,48 +439,48 @@ def install(app, args, env): if mod['name'] == module: for v in mod['versions']: if version is None and v['isDefault']: - print '~ Will install %s-%s' % (module, v['version']) - print '~ This module is compatible with: %s' % v['matches'] - ok = raw_input('~ Do you want to install this version (y/n)? ') + print('~ Will install %s-%s' % (module, v['version'])) + print('~ This module is compatible with: %s' % v['matches']) + ok = input('~ Do you want to install this version (y/n)? ') if not ok == 'y': - print '~' + print('~') sys.exit(-1) - print '~ Installing module %s-%s...' % (module, v['version']) + print('~ Installing module %s-%s...' % (module, v['version'])) fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version']) break if version == v['version']: - print '~ Will install %s-%s' % (module, v['version']) - print '~ This module is compatible with: %s' % v['matches'] - ok = raw_input('~ Do you want to install this version (y/n)? ') + print('~ Will install %s-%s' % (module, v['version'])) + print('~ This module is compatible with: %s' % v['matches']) + ok = input('~ Do you want to install this version (y/n)? ') if not ok == 'y': - print '~' + print('~') sys.exit(-1) - print '~ Installing module %s-%s...' % (module, v['version']) + print('~ Installing module %s-%s...' % (module, v['version'])) fetch = '%s/modules/%s-%s.zip' % (mod['server'], module, v['version']) break if fetch is None: - print '~ No module found \'%s\'' % name - print '~ Try play list-modules to get the modules list' - print '~' + print('~ No module found \'%s\'' % name) + print('~ Try play list-modules to get the modules list') + print('~') sys.exit(-1) archive = os.path.join(env["basedir"], 'modules/%s-%s.zip' % (module, v['version'])) if os.path.exists(archive): os.remove(archive) - print '~' - print '~ Fetching %s' % fetch + print('~') + print('~ Fetching %s' % fetch) Downloader().retrieve(fetch, archive) if not os.path.exists(archive): - print '~ Oops, file does not exist' - print '~' + print('~ Oops, file does not exist') + print('~') sys.exist(-1) - print '~ Unzipping...' + print('~ Unzipping...') if os.path.exists(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))): shutil.rmtree(os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))) @@ -482,13 +488,13 @@ def install(app, args, env): Unzip().extract(archive, os.path.join(env["basedir"], 'modules/%s-%s' % (module, v['version']))) os.remove(archive) - print '~' - print '~ Module %s-%s is installed!' % (module, v['version']) - print '~ You can now use it by adding it to the dependencies.yml file:' - print '~' - print '~ require:' - print '~ play -> %s %s' % (module, v['version']) - print '~' + print('~') + print('~ Module %s-%s is installed!' % (module, v['version'])) + print('~ You can now use it by adding it to the dependencies.yml file:') + print('~') + print('~ require:') + print('~ play -> %s %s' % (module, v['version'])) + print('~') sys.exit(0) @@ -501,20 +507,20 @@ def add(app, args, env): for o, a in optlist: if o in ('--module'): m = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) if m is None: - print "~ Usage: play add --module=" - print "~ " + print("~ Usage: play add --module=") + print("~ ") sys.exit(-1) appConf = os.path.join(app.path, 'conf/application.conf') if not fileHas(appConf, '# ---- MODULES ----'): - print "~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command." - print "~ " + print("~ Line '---- MODULES ----' missing in your application.conf. Add it to use this command.") + print("~ ") sys.exit(-1) mn = m @@ -522,13 +528,13 @@ def add(app, args, env): mn = mn[:mn.find('-')] if mn in app.module_names(): - print "~ Module %s already declared in application.conf, not doing anything." % mn - print "~ " + print("~ Module %s already declared in application.conf, not doing anything." % mn) + print("~ ") sys.exit(-1) replaceAll(appConf, r'# ---- MODULES ----', '# ---- MODULES ----\nmodule.%s=${play.path}/modules/%s' % (mn, m) ) - print "~ Module %s add to application %s." % (mn, app.name()) - print "~ " + print("~ Module %s add to application %s." % (mn, app.name())) + print("~ ") def load_module_list(custom_server=None): @@ -543,7 +549,7 @@ def any(arr, func): return False modules = None - if custom_server is not None: + if custom_server != None: rev = [custom_server] else: rev = repositories[:] # clone @@ -552,7 +558,7 @@ def any(arr, func): for repo in rev: result = load_modules_from(repo) if modules is None: - modules = map(lambda m: addServer(m, repo), result['modules']) + modules = [addServer(m, repo) for m in result['modules']] else: for module in result['modules']: if not any(modules, lambda m: m['name'] == module['name']): @@ -566,17 +572,17 @@ def load_modules_from(modules_server): headers={'User-Agent':DEFAULT_USER_AGENT, 'Accept': 'application/json' } - req = urllib2.Request(url, headers=headers) - result = urllib2.urlopen(req) + req = urllib.request.Request(url, headers=headers) + result = urllib.request.urlopen(req) return json.loads(result.read()) - except urllib2.HTTPError, e: - print "~ Oops," - print "~ Cannot fetch the modules list from %s (%s)..." % (url, e.code) - print e.reason - print "~" + except urllib.error.HTTPError as e: + print("~ Oops,") + print("~ Cannot fetch the modules list from %s (%s)..." % (url, e.code)) + print(e.reason) + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Oops," - print "~ Cannot fetch the modules list from %s ..." % (url) - print "~" + except urllib.error.URLError as e: + print("~ Oops,") + print("~ Cannot fetch the modules list from %s ..." % (url)) + print("~") sys.exit(-1) diff --git a/framework/pym/play/commands/netbeans.py b/framework/pym/play/commands/netbeans.py index f0ce397ed7..cca700f087 100644 --- a/framework/pym/play/commands/netbeans.py +++ b/framework/pym/play/commands/netbeans.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import time @@ -43,8 +44,8 @@ def execute(**kargs): if not re.search("\.[svn|git|hg|scc|vssscc]", dir): mr = '%s' % (dir, dir) replaceAll(os.path.join(nbproject, 'project.xml'), r'%MORE%', mr) - print "~ OK, the application is ready for netbeans" - print "~ Just open %s as a netbeans project" % os.path.normpath(app.path) - print "~" - print "~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again." - print "~" + print("~ OK, the application is ready for netbeans") + print("~ Just open %s as a netbeans project" % os.path.normpath(app.path)) + print("~") + print("~ Use netbeansify again when you want to update netbeans configuration files, then close and open you project again.") + print("~") diff --git a/framework/pym/play/commands/precompile.py b/framework/pym/play/commands/precompile.py index 9b3c1e1784..9064d79af3 100644 --- a/framework/pym/play/commands/precompile.py +++ b/framework/pym/play/commands/precompile.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os, os.path import shutil import subprocess @@ -26,6 +27,6 @@ def execute(**kargs): try: return subprocess.call(java_cmd, env=os.environ) except OSError: - print "~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " - print "~ " + print("~ Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") + print("~ ") sys.exit(-1) diff --git a/framework/pym/play/commands/secret.py b/framework/pym/play/commands/secret.py index a5237d24bd..a67fe733fe 100644 --- a/framework/pym/play/commands/secret.py +++ b/framework/pym/play/commands/secret.py @@ -1,3 +1,4 @@ +from __future__ import print_function from play.utils import * COMMANDS = ['secret'] @@ -10,8 +11,8 @@ def execute(**kargs): app = kargs.get("app") app.check() - print "~ Generating the secret key..." + print("~ Generating the secret key...") sk = secretKey() replaceAll(os.path.join(app.path, 'conf', 'application.conf'), r'application.secret=.*', 'application.secret=%s' % sk, True) - print "~ Keep the secret : %s" % sk - print "~" + print("~ Keep the secret : %s" % sk) + print("~") diff --git a/framework/pym/play/commands/status.py b/framework/pym/play/commands/status.py index a053d8ba6a..538b190d6c 100644 --- a/framework/pym/play/commands/status.py +++ b/framework/pym/play/commands/status.py @@ -1,7 +1,9 @@ +from __future__ import print_function +from builtins import str import os, os.path import shutil import getopt -import urllib2 +import urllib.request, urllib.error, urllib.parse from play.utils import * @@ -29,9 +31,9 @@ def execute(**kargs): url = a + '/@status' if o in ('--secret'): secret_key = a - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ " + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ ") sys.exit(-1) if not url or not secret_key: @@ -43,21 +45,21 @@ def execute(**kargs): secret_key = app.readConf('application.statusKey') try: - proxy_handler = urllib2.ProxyHandler({}) - req = urllib2.Request(url) + proxy_handler = urllib.request.ProxyHandler({}) + req = urllib.request.Request(url) req.add_header('Authorization', secret_key) - opener = urllib2.build_opener(proxy_handler) + opener = urllib.request.build_opener(proxy_handler) status = opener.open(req) - print '~ Status from %s,' % url - print '~' - print status.read() - print '~' - except urllib2.HTTPError, e: - print "~ Cannot retrieve the application status... (%s)" % (e.code) - print "~" + print('~ Status from %s,' % url) + print('~') + print(status.read()) + print('~') + except urllib.error.HTTPError as e: + print("~ Cannot retrieve the application status... (%s)" % (e.code)) + print("~") sys.exit(-1) - except urllib2.URLError, e: - print "~ Cannot contact the application..." - print "~" + except urllib.error.URLError as e: + print("~ Cannot contact the application...") + print("~") sys.exit(-1) - print + print() diff --git a/framework/pym/play/commands/test.py b/framework/pym/play/commands/test.py index f956c11072..2288e2819d 100644 --- a/framework/pym/play/commands/test.py +++ b/framework/pym/play/commands/test.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys import subprocess @@ -21,17 +22,17 @@ def execute(**kargs): def test(app, args): app.check() java_cmd = app.java_cmd(args) - print "~ Running in test mode" - print "~ Ctrl+C to stop" - print "~ " + print("~ Running in test mode") + print("~ Ctrl+C to stop") + print("~ ") try: return_code = subprocess.call(java_cmd, env=os.environ) if 0 != return_code: sys.exit(return_code) except OSError: - print "Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). " + print("Could not execute the java executable, please make sure the JAVA_HOME environment variable is set properly (the java executable should reside at JAVA_HOME/bin/java). ") sys.exit(-1) - print "~ " + print("~ ") diff --git a/framework/pym/play/commands/version.py b/framework/pym/play/commands/version.py index 309d6e187c..83512cb01f 100644 --- a/framework/pym/play/commands/version.py +++ b/framework/pym/play/commands/version.py @@ -1,3 +1,4 @@ +from __future__ import print_function COMMANDS = ['version'] @@ -11,4 +12,4 @@ def execute(**kargs): # If we've shown the logo, then the version has already been printed if not showLogo: - print env["version"] + print(env["version"]) diff --git a/framework/pym/play/commands/war.py b/framework/pym/play/commands/war.py index cf69f33087..35eac30f7c 100644 --- a/framework/pym/play/commands/war.py +++ b/framework/pym/play/commands/war.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from builtins import str import sys import os import getopt @@ -32,50 +34,50 @@ def execute(**kargs): war_zip_path = war_path + '.war' if o in ('--exclude'): war_exclusion_list = a.split(':') - print "~ Excluding these directories :" + print("~ Excluding these directories :") for excluded in war_exclusion_list: - print "~ %s" %excluded - except getopt.GetoptError, err: - print "~ %s" % str(err) - print "~ Please specify a path where to generate the WAR, using the -o or --output option." - print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)." - print "~ " + print("~ %s" %excluded) + except getopt.GetoptError as err: + print("~ %s" % str(err)) + print("~ Please specify a path where to generate the WAR, using the -o or --output option.") + print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).") + print("~ ") sys.exit(-1) if not war_path: - print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option" - print "~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option") + print("~ To exclude some directories, use the --exclude option and ':'-separator (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')): - print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure" - print "~" + print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure") + print("~") sys.exit(-1) if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list): - print "~ Oops. Please specify a destination directory outside of the application" - print "~ or exclude war destination directory using the --exclude option and ':'-separator " - print "~ (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a destination directory outside of the application") + print("~ or exclude war destination directory using the --exclude option and ':'-separator ") + print("~ (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) # Precompile first precompilation_result = play.commands.precompile.execute(command=command, app=app, args=args, env=env) if precompilation_result != 0: - print "~ Please fix compilation errors before packaging WAR" - print "~" + print("~ Please fix compilation errors before packaging WAR") + print("~") sys.exit(precompilation_result) # Package package_as_war(app, env, war_path, war_zip_path, war_exclusion_list) - print "~ Done !" - print "~" - print "~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path)) - print "~ You can't use play standard commands to run/stop/debug the WAR application..." - print "~ ... just use your servlet container commands instead" - print "~" - print "~ Have fun!" - print "~" + print("~ Done !") + print("~") + print("~ You can now load %s as a standard WAR into your servlet container" % (os.path.normpath(war_path))) + print("~ You can't use play standard commands to run/stop/debug the WAR application...") + print("~ ... just use your servlet container commands instead") + print("~") + print("~ Have fun!") + print("~") diff --git a/framework/pym/play/utils.py b/framework/pym/play/utils.py index 88866ba1f7..d695c9b225 100644 --- a/framework/pym/play/utils.py +++ b/framework/pym/play/utils.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from builtins import str +from builtins import range import sys import os, os.path import re @@ -75,8 +78,8 @@ def getWithModules(args, env): dirname = os.path.join(env["basedir"], 'modules/%s' % f) break if not dirname: - print "~ Oops. Module " + m + " not found (try running `play install " + m + "`)" - print "~" + print("~ Oops. Module " + m + " not found (try running `play install " + m + "`)") + print("~") sys.exit(-1) md.append(dirname) @@ -91,23 +94,23 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): classpath = app.getClasspath() if not war_path: - print "~ Oops. Please specify a path where to generate the WAR, using the -o or --output option" - print "~" + print("~ Oops. Please specify a path where to generate the WAR, using the -o or --output option") + print("~") sys.exit(-1) if os.path.exists(war_path) and not os.path.exists(os.path.join(war_path, 'WEB-INF')): - print "~ Oops. The destination path already exists but does not seem to host a valid WAR structure" - print "~" + print("~ Oops. The destination path already exists but does not seem to host a valid WAR structure") + print("~") sys.exit(-1) if isParentOf(app.path, war_path) and not isExcluded(war_path, war_exclusion_list): - print "~ Oops. Please specify a destination directory outside of the application" - print "~ or exclude war destination directory using the --exclude option and ':'-separator " - print "~ (eg: --exclude .svn:target:logs:tmp)." - print "~" + print("~ Oops. Please specify a destination directory outside of the application") + print("~ or exclude war destination directory using the --exclude option and ':'-separator ") + print("~ (eg: --exclude .svn:target:logs:tmp).") + print("~") sys.exit(-1) - print "~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path)) + print("~ Packaging current version of the framework and the application to %s ..." % (os.path.normpath(war_path))) if os.path.exists(war_path): shutil.rmtree(war_path) if os.path.exists(os.path.join(app.path, 'war')): copy_directory(os.path.join(app.path, 'war'), war_path) @@ -118,7 +121,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): shutil.copyfile(os.path.join(env["basedir"], 'resources/war/web.xml'), os.path.join(war_path, 'WEB-INF/web.xml')) application_name = app.readConf('application.name') replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%APPLICATION_NAME%', application_name) - if env["id"] is not "": + if env["id"] != "": replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', env["id"]) else: replaceAll(os.path.join(war_path, 'WEB-INF/web.xml'), r'%PLAY_ID%', 'war') @@ -167,7 +170,7 @@ def package_as_war(app, env, war_path, war_zip_path, war_exclusion_list = None): shutil.copyfile(os.path.join(env["basedir"], 'resources/messages'), os.path.join(war_path, 'WEB-INF/resources/messages')) if war_zip_path: - print "~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path)) + print("~ Creating zipped archive to %s ..." % (os.path.normpath(war_zip_path))) if os.path.exists(war_zip_path): os.remove(war_zip_path) zip = zipfile.ZipFile(war_zip_path, 'w', zipfile.ZIP_STORED) @@ -242,7 +245,7 @@ def isTestFrameworkId( framework_id ): return (framework_id == 'test' or (framework_id.startswith('test-') and framework_id.__len__() >= 6 )) def java_path(): - if not os.environ.has_key('JAVA_HOME'): + if 'JAVA_HOME' not in os.environ: return "java" else: return os.path.normpath("%s/bin/java" % os.environ['JAVA_HOME']) @@ -257,5 +260,5 @@ def getJavaVersion(): if result: return result.group(1) else: - print "Unable to retrieve java version from " + javaVersion + print("Unable to retrieve java version from " + javaVersion) return "" diff --git a/framework/pym/simplejson/__init__.py b/framework/pym/simplejson/__init__.py index d5b4d39913..2e3b838ee8 100644 --- a/framework/pym/simplejson/__init__.py +++ b/framework/pym/simplejson/__init__.py @@ -5,24 +5,23 @@ :mod:`simplejson` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained version of the :mod:`json` library contained in Python 2.6, but maintains -compatibility with Python 2.4 and Python 2.5 and (currently) has -significant performance advantages, even without using the optional C -extension for speedups. +compatibility back to Python 2.5 and (currently) has significant performance +advantages, even without using the optional C extension for speedups. Encoding basic Python object hierarchies:: >>> import simplejson as json >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) '["foo", {"bar": ["baz", null, 1.0, 2]}]' - >>> print json.dumps("\"foo\bar") + >>> print(json.dumps("\"foo\bar")) "\"foo\bar" - >>> print json.dumps(u'\u1234') + >>> print(json.dumps(u'\u1234')) "\u1234" - >>> print json.dumps('\\') + >>> print(json.dumps('\\')) "\\" - >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) {"a": 0, "b": 0, "c": 0} - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO() >>> json.dump(['streaming API'], io) >>> io.getvalue() @@ -31,14 +30,14 @@ Compact encoding:: >>> import simplejson as json - >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) '[1,2,3,{"4":5,"6":7}]' Pretty printing:: >>> import simplejson as json - >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) - >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) { "4": 5, "6": 7 @@ -52,7 +51,7 @@ True >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' True - >>> from StringIO import StringIO + >>> from simplejson.compat import StringIO >>> io = StringIO('["streaming API"]') >>> json.load(io)[0] == 'streaming API' True @@ -68,8 +67,8 @@ >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', ... object_hook=as_complex) (1+2j) - >>> import decimal - >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1') + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') True Specializing JSON object encoding:: @@ -78,7 +77,8 @@ >>> def encode_complex(obj): ... if isinstance(obj, complex): ... return [obj.real, obj.imag] - ... raise TypeError(repr(o) + " is not JSON serializable") + ... raise TypeError('Object of type %s is not JSON serializable' % + ... obj.__class__.__name__) ... >>> json.dumps(2 + 1j, default=encode_complex) '[2.0, 1.0]' @@ -87,7 +87,6 @@ >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) '[2.0, 1.0]' - Using simplejson.tool from the shell to validate and pretty-print:: $ echo '{"json":"obj"}' | python -m simplejson.tool @@ -95,18 +94,60 @@ "json": "obj" } $ echo '{ 1.2:3.4}' | python -m simplejson.tool - Expecting property name: line 1 column 2 (char 2) + Expecting property name: line 1 column 3 (char 2) + +Parsing multiple documents serialized as JSON lines (newline-delimited JSON):: + + >>> import simplejson as json + >>> def loads_lines(docs): + ... for doc in docs.splitlines(): + ... yield json.loads(doc) + ... + >>> sum(doc["count"] for doc in loads_lines('{"count":1}\n{"count":2}\n{"count":3}\n')) + 6 + +Serializing multiple objects to JSON lines (newline-delimited JSON):: + + >>> import simplejson as json + >>> def dumps_lines(objs): + ... for obj in objs: + ... yield json.dumps(obj, separators=(',',':')) + '\n' + ... + >>> ''.join(dumps_lines([{'count': 1}, {'count': 2}, {'count': 3}])) + '{"count":1}\n{"count":2}\n{"count":3}\n' + """ -__version__ = '2.0.9' +from __future__ import absolute_import +__version__ = '3.17.2' __all__ = [ 'dump', 'dumps', 'load', 'loads', - 'JSONDecoder', 'JSONEncoder', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', 'RawJSON' ] __author__ = 'Bob Ippolito ' -from decoder import JSONDecoder -from encoder import JSONEncoder +from decimal import Decimal + +from .errors import JSONDecodeError +from .raw_json import RawJSON +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None _default_encoder = JSONEncoder( skipkeys=False, @@ -117,56 +158,115 @@ separators=None, encoding='utf-8', default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + iterable_as_array=False, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, ) def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a ``.write()``-supporting file-like object). - If ``skipkeys`` is true then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + If *skipkeys* is true then ``dict`` keys that are not basic types + (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the some chunks written to ``fp`` - may be ``unicode`` instances, subject to normal Python ``str`` to - ``unicode`` coercion rules. Unless ``fp.write()`` explicitly - understands ``unicode`` (as in ``codecs.getwriter()``) this is likely - to cause an error. - - If ``check_circular`` is false, then the circular reference check - for container types will be skipped and a circular reference will - result in an ``OverflowError`` (or worse). + If *ensure_ascii* is false (default: ``True``), then the output may + contain non-ASCII characters, so long as they do not need to be escaped + by JSON. When it is true, all non-ASCII characters are escaped. - If ``allow_nan`` is false, then it will be a ``ValueError`` to + If *allow_nan* is false, then it will be a ``ValueError`` to serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) - in strict compliance of the JSON specification, instead of using the - JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. - If ``indent`` is a non-negative integer, then JSON array elements and object - members will be pretty-printed with that indent level. An indent level - of 0 will only insert newlines. ``None`` is the most compact representation. + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + *encoding* is the character encoding for str instances, default is UTF-8. - ``default(obj)`` is a function that should return a serializable version - of obj or raise TypeError. The default simply raises TypeError. + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): iterable = _default_encoder.iterencode(obj) else: if cls is None: @@ -174,7 +274,17 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, - default=default, **kw).iterencode(obj) + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).iterencode(obj) # could accelerate with writelines in some versions of Python, at # a debuggability cost for chunk in iterable: @@ -182,17 +292,21 @@ def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, - allow_nan=True, cls=None, indent=None, separators=None, - encoding='utf-8', default=None, **kw): + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, + iterable_as_array=False, **kw): """Serialize ``obj`` to a JSON formatted ``str``. If ``skipkeys`` is false then ``dict`` keys that are not basic types - (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + (``str``, ``int``, ``long``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the return value will be a - ``unicode`` instance subject to normal Python ``str`` to ``unicode`` - coercion rules instead of being escaped to an ASCII ``str``. + If *ensure_ascii* is false (default: ``True``), then the output may + contain non-ASCII characters, so long as they do not need to be escaped + by JSON. When it is true, all non-ASCII characters are escaped. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -203,30 +317,81 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, strict compliance of the JSON specification, instead of using the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). - If ``indent`` is a non-negative integer, then JSON array elements and - object members will be pretty-printed with that indent level. An indent - level of 0 will only insert newlines. ``None`` is the most compact - representation. + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If ``separators`` is an ``(item_separator, dict_separator)`` tuple - then it will be used instead of the default ``(', ', ': ')`` separators. - ``(',', ':')`` is the most compact JSON representation. + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. - ``encoding`` is the character encoding for str instances, default is UTF-8. + ``encoding`` is the character encoding for bytes instances, default is + UTF-8. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the ``.default()`` method to serialize additional types), specify it with - the ``cls`` kwarg. + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. """ # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and - encoding == 'utf-8' and default is None and not kw): + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array and not iterable_as_array + and not bigint_as_string and not sort_keys + and not item_sort_key and not for_json + and not ignore_nan and int_as_string_bitcount is None + and not kw + ): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder @@ -234,85 +399,186 @@ def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + iterable_as_array=iterable_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, **kw).encode(obj) -_default_decoder = JSONDecoder(encoding=None, object_hook=None) +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing - a JSON document) to a Python object. - - If the contents of ``fp`` is encoded with an ASCII based encoding other - than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must - be specified. Encodings that are not ASCII based (such as UCS-2) are - not allowed, and should be wrapped with - ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` - object and passed to ``loads()`` - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). + a JSON document as `str` or `bytes`) to a Python object. + + *encoding* determines the encoding used to interpret any + `bytes` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding `str` objects. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ return loads(fp.read(), encoding=encoding, cls=cls, object_hook=object_hook, parse_float=parse_float, parse_int=parse_int, - parse_constant=parse_constant, **kw) + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, **kw): + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) to a Python object. - If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding - other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name - must be specified. Encodings that are not ASCII based (such as UCS-2) - are not allowed and should be decoded to ``unicode`` first. - - ``object_hook`` is an optional function that will be called with the - result of any object literal decode (a ``dict``). The return value of - ``object_hook`` will be used instead of the ``dict``. This feature - can be used to implement custom decoders (e.g. JSON-RPC class hinting). - - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN, null, true, false. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *encoding* determines the encoding used to interpret any + :class:`bytes` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` - kwarg. + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. """ if (cls is None and encoding is None and object_hook is None and parse_int is None and parse_float is None and - parse_constant is None and not kw): + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): return _default_decoder.decode(s) if cls is None: cls = JSONDecoder if object_hook is not None: kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook if parse_float is not None: kw['parse_float'] = parse_float if parse_int is not None: kw['parse_int'] = parse_int if parse_constant is not None: kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/framework/pym/simplejson/_speedups.c b/framework/pym/simplejson/_speedups.c deleted file mode 100644 index 23b5f4a6e6..0000000000 --- a/framework/pym/simplejson/_speedups.c +++ /dev/null @@ -1,2329 +0,0 @@ -#include "Python.h" -#include "structmember.h" -#if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#endif -#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) -typedef int Py_ssize_t; -#define PY_SSIZE_T_MAX INT_MAX -#define PY_SSIZE_T_MIN INT_MIN -#define PyInt_FromSsize_t PyInt_FromLong -#define PyInt_AsSsize_t PyInt_AsLong -#endif -#ifndef Py_IS_FINITE -#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) -#endif - -#ifdef __GNUC__ -#define UNUSED __attribute__((__unused__)) -#else -#define UNUSED -#endif - -#define DEFAULT_ENCODING "utf-8" - -#define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) -#define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) -#define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) -#define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) - -static PyTypeObject PyScannerType; -static PyTypeObject PyEncoderType; - -typedef struct _PyScannerObject { - PyObject_HEAD - PyObject *encoding; - PyObject *strict; - PyObject *object_hook; - PyObject *parse_float; - PyObject *parse_int; - PyObject *parse_constant; -} PyScannerObject; - -static PyMemberDef scanner_members[] = { - {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, - {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, - {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, - {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, - {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, - {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, - {NULL} -}; - -typedef struct _PyEncoderObject { - PyObject_HEAD - PyObject *markers; - PyObject *defaultfn; - PyObject *encoder; - PyObject *indent; - PyObject *key_separator; - PyObject *item_separator; - PyObject *sort_keys; - PyObject *skipkeys; - int fast_encode; - int allow_nan; -} PyEncoderObject; - -static PyMemberDef encoder_members[] = { - {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, - {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, - {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, - {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, - {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, - {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, - {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, - {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, - {NULL} -}; - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); -static PyObject * -ascii_escape_unicode(PyObject *pystr); -static PyObject * -ascii_escape_str(PyObject *pystr); -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); -void init_speedups(void); -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -scanner_dealloc(PyObject *self); -static int -scanner_clear(PyObject *self); -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds); -static void -encoder_dealloc(PyObject *self); -static int -encoder_clear(PyObject *self); -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); -static PyObject * -_encoded_const(PyObject *const); -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj); -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj); - -#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') -#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) - -#define MIN_EXPANSION 6 -#ifdef Py_UNICODE_WIDE -#define MAX_EXPANSION (2 * MIN_EXPANSION) -#else -#define MAX_EXPANSION MIN_EXPANSION -#endif - -static int -_convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) -{ - /* PyObject to Py_ssize_t converter */ - *size_ptr = PyInt_AsSsize_t(o); - if (*size_ptr == -1 && PyErr_Occurred()); - return 1; - return 0; -} - -static PyObject * -_convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) -{ - /* Py_ssize_t to PyObject converter */ - return PyInt_FromSsize_t(*size_ptr); -} - -static Py_ssize_t -ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) -{ - /* Escape unicode code point c to ASCII escape sequences - in char *output. output must have at least 12 bytes unused to - accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ - output[chars++] = '\\'; - switch (c) { - case '\\': output[chars++] = (char)c; break; - case '"': output[chars++] = (char)c; break; - case '\b': output[chars++] = 'b'; break; - case '\f': output[chars++] = 'f'; break; - case '\n': output[chars++] = 'n'; break; - case '\r': output[chars++] = 'r'; break; - case '\t': output[chars++] = 't'; break; - default: -#ifdef Py_UNICODE_WIDE - if (c >= 0x10000) { - /* UTF-16 surrogate pair */ - Py_UNICODE v = c - 0x10000; - c = 0xd800 | ((v >> 10) & 0x3ff); - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - c = 0xdc00 | (v & 0x3ff); - output[chars++] = '\\'; - } -#endif - output[chars++] = 'u'; - output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; - output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; - output[chars++] = "0123456789abcdef"[(c ) & 0xf]; - } - return chars; -} - -static PyObject * -ascii_escape_unicode(PyObject *pystr) -{ - /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t max_output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - Py_UNICODE *input_unicode; - - input_chars = PyUnicode_GET_SIZE(pystr); - input_unicode = PyUnicode_AS_UNICODE(pystr); - - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - max_output_size = 2 + (input_chars * MAX_EXPANSION); - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - chars = 0; - output[chars++] = '"'; - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = input_unicode[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - if (output_size - chars < (1 + MAX_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - Py_ssize_t new_output_size = output_size * 2; - /* This is an upper bound */ - if (new_output_size > max_output_size) { - new_output_size = max_output_size; - } - /* Make sure that the output size changed before resizing */ - if (new_output_size != output_size) { - output_size = new_output_size; - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static PyObject * -ascii_escape_str(PyObject *pystr) -{ - /* Take a PyString pystr and return a new ASCII-only escaped PyString */ - Py_ssize_t i; - Py_ssize_t input_chars; - Py_ssize_t output_size; - Py_ssize_t chars; - PyObject *rval; - char *output; - char *input_str; - - input_chars = PyString_GET_SIZE(pystr); - input_str = PyString_AS_STRING(pystr); - - /* Fast path for a string that's already ASCII */ - for (i = 0; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (!S_CHAR(c)) { - /* If we have to escape something, scan the string for unicode */ - Py_ssize_t j; - for (j = i; j < input_chars; j++) { - c = (Py_UNICODE)(unsigned char)input_str[j]; - if (c > 0x7f) { - /* We hit a non-ASCII character, bail to unicode mode */ - PyObject *uni; - uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); - if (uni == NULL) { - return NULL; - } - rval = ascii_escape_unicode(uni); - Py_DECREF(uni); - return rval; - } - } - break; - } - } - - if (i == input_chars) { - /* Input is already ASCII */ - output_size = 2 + input_chars; - } - else { - /* One char input can be up to 6 chars output, estimate 4 of these */ - output_size = 2 + (MIN_EXPANSION * 4) + input_chars; - } - rval = PyString_FromStringAndSize(NULL, output_size); - if (rval == NULL) { - return NULL; - } - output = PyString_AS_STRING(rval); - output[0] = '"'; - - /* We know that everything up to i is ASCII already */ - chars = i + 1; - memcpy(&output[1], input_str, i); - - for (; i < input_chars; i++) { - Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; - if (S_CHAR(c)) { - output[chars++] = (char)c; - } - else { - chars = ascii_escape_char(c, output, chars); - } - /* An ASCII char can't possibly expand to a surrogate! */ - if (output_size - chars < (1 + MIN_EXPANSION)) { - /* There's more than four, so let's resize by a lot */ - output_size *= 2; - if (output_size > 2 + (input_chars * MIN_EXPANSION)) { - output_size = 2 + (input_chars * MIN_EXPANSION); - } - if (_PyString_Resize(&rval, output_size) == -1) { - return NULL; - } - output = PyString_AS_STRING(rval); - } - } - output[chars++] = '"'; - if (_PyString_Resize(&rval, chars) == -1) { - return NULL; - } - return rval; -} - -static void -raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) -{ - /* Use the Python function simplejson.decoder.errmsg to raise a nice - looking ValueError exception */ - static PyObject *errmsg_fn = NULL; - PyObject *pymsg; - if (errmsg_fn == NULL) { - PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); - if (decoder == NULL) - return; - errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); - Py_DECREF(decoder); - if (errmsg_fn == NULL) - return; - } - pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); - if (pymsg) { - PyErr_SetObject(PyExc_ValueError, pymsg); - Py_DECREF(pymsg); - } -} - -static PyObject * -join_list_unicode(PyObject *lst) -{ - /* return u''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -join_list_string(PyObject *lst) -{ - /* return ''.join(lst) */ - static PyObject *joinfn = NULL; - if (joinfn == NULL) { - PyObject *ustr = PyString_FromStringAndSize(NULL, 0); - if (ustr == NULL) - return NULL; - - joinfn = PyObject_GetAttrString(ustr, "join"); - Py_DECREF(ustr); - if (joinfn == NULL) - return NULL; - } - return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); -} - -static PyObject * -_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { - /* return (rval, idx) tuple, stealing reference to rval */ - PyObject *tpl; - PyObject *pyidx; - /* - steal a reference to rval, returns (rval, idx) - */ - if (rval == NULL) { - return NULL; - } - pyidx = PyInt_FromSsize_t(idx); - if (pyidx == NULL) { - Py_DECREF(rval); - return NULL; - } - tpl = PyTuple_New(2); - if (tpl == NULL) { - Py_DECREF(pyidx); - Py_DECREF(rval); - return NULL; - } - PyTuple_SET_ITEM(tpl, 0, rval); - PyTuple_SET_ITEM(tpl, 1, pyidx); - return tpl; -} - -static PyObject * -scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyString pystr. - end is the index of the first character after the quote. - encoding is the encoding of pystr (must be an ASCII superset) - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyString (if ASCII-only) or PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyString_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - int has_unicode = 0; - char *buf = PyString_AS_STRING(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; - for (next = end; next < len; next++) { - c = (unsigned char)buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - else if (c > 0x7f) { - has_unicode = 1; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); - if (strchunk == NULL) { - goto bail; - } - if (has_unicode) { - chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); - Py_DECREF(strchunk); - if (chunk == NULL) { - goto bail; - } - } - else { - chunk = strchunk; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - if (c > 0x7f) { - has_unicode = 1; - } - if (has_unicode) { - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - } - else { - char c_char = Py_CHARMASK(c); - chunk = PyString_FromStringAndSize(&c_char, 1); - if (chunk == NULL) { - goto bail; - } - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - - rval = join_list_string(chunks); - if (rval == NULL) { - goto bail; - } - Py_CLEAR(chunks); - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunks); - return NULL; -} - - -static PyObject * -scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) -{ - /* Read the JSON string from PyUnicode pystr. - end is the index of the first character after the quote. - if strict is zero then literal control characters are allowed - *next_end_ptr is a return-by-reference index of the character - after the end quote - - Return value is a new PyUnicode - */ - PyObject *rval; - Py_ssize_t len = PyUnicode_GET_SIZE(pystr); - Py_ssize_t begin = end - 1; - Py_ssize_t next = begin; - const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); - PyObject *chunks = PyList_New(0); - if (chunks == NULL) { - goto bail; - } - if (end < 0 || len <= end) { - PyErr_SetString(PyExc_ValueError, "end is out of bounds"); - goto bail; - } - while (1) { - /* Find the end of the string or the next escape */ - Py_UNICODE c = 0; - PyObject *chunk = NULL; - for (next = end; next < len; next++) { - c = buf[next]; - if (c == '"' || c == '\\') { - break; - } - else if (strict && c <= 0x1f) { - raise_errmsg("Invalid control character at", pystr, next); - goto bail; - } - } - if (!(c == '"' || c == '\\')) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - /* Pick up this chunk if it's not zero length */ - if (next != end) { - chunk = PyUnicode_FromUnicode(&buf[end], next - end); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - next++; - if (c == '"') { - end = next; - break; - } - if (next == len) { - raise_errmsg("Unterminated string starting at", pystr, begin); - goto bail; - } - c = buf[next]; - if (c != 'u') { - /* Non-unicode backslash escapes */ - end = next + 1; - switch (c) { - case '"': break; - case '\\': break; - case '/': break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - default: c = 0; - } - if (c == 0) { - raise_errmsg("Invalid \\escape", pystr, end - 2); - goto bail; - } - } - else { - c = 0; - next++; - end = next + 4; - if (end >= len) { - raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); - goto bail; - } - /* Decode 4 hex digits */ - for (; next < end; next++) { - Py_UNICODE digit = buf[next]; - c <<= 4; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } -#ifdef Py_UNICODE_WIDE - /* Surrogate pair */ - if ((c & 0xfc00) == 0xd800) { - Py_UNICODE c2 = 0; - if (end + 6 >= len) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - if (buf[next++] != '\\' || buf[next++] != 'u') { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - end += 6; - /* Decode 4 hex digits */ - for (; next < end; next++) { - c2 <<= 4; - Py_UNICODE digit = buf[next]; - switch (digit) { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - c2 |= (digit - '0'); break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - c2 |= (digit - 'a' + 10); break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - c2 |= (digit - 'A' + 10); break; - default: - raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); - goto bail; - } - } - if ((c2 & 0xfc00) != 0xdc00) { - raise_errmsg("Unpaired high surrogate", pystr, end - 5); - goto bail; - } - c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); - } - else if ((c & 0xfc00) == 0xdc00) { - raise_errmsg("Unpaired low surrogate", pystr, end - 5); - goto bail; - } -#endif - } - chunk = PyUnicode_FromUnicode(&c, 1); - if (chunk == NULL) { - goto bail; - } - if (PyList_Append(chunks, chunk)) { - Py_DECREF(chunk); - goto bail; - } - Py_DECREF(chunk); - } - - rval = join_list_unicode(chunks); - if (rval == NULL) { - goto bail; - } - Py_DECREF(chunks); - *next_end_ptr = end; - return rval; -bail: - *next_end_ptr = -1; - Py_XDECREF(chunks); - return NULL; -} - -PyDoc_STRVAR(pydoc_scanstring, - "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" - "\n" - "Scan the string s for a JSON string. End is the index of the\n" - "character in s after the quote that started the JSON string.\n" - "Unescapes all valid JSON string escape sequences and raises ValueError\n" - "on attempt to decode an invalid string. If strict is False then literal\n" - "control characters are allowed in the string.\n" - "\n" - "Returns a tuple of the decoded string and the index of the character in s\n" - "after the end quote." -); - -static PyObject * -py_scanstring(PyObject* self UNUSED, PyObject *args) -{ - PyObject *pystr; - PyObject *rval; - Py_ssize_t end; - Py_ssize_t next_end = -1; - char *encoding = NULL; - int strict = 1; - if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { - return NULL; - } - if (encoding == NULL) { - encoding = DEFAULT_ENCODING; - } - if (PyString_Check(pystr)) { - rval = scanstring_str(pystr, end, encoding, strict, &next_end); - } - else if (PyUnicode_Check(pystr)) { - rval = scanstring_unicode(pystr, end, strict, &next_end); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - return _build_rval_index_tuple(rval, next_end); -} - -PyDoc_STRVAR(pydoc_encode_basestring_ascii, - "encode_basestring_ascii(basestring) -> str\n" - "\n" - "Return an ASCII-only JSON representation of a Python string" -); - -static PyObject * -py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) -{ - /* Return an ASCII-only JSON representation of a Python string */ - /* METH_O */ - if (PyString_Check(pystr)) { - return ascii_escape_str(pystr); - } - else if (PyUnicode_Check(pystr)) { - return ascii_escape_unicode(pystr); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } -} - -static void -scanner_dealloc(PyObject *self) -{ - /* Deallocate scanner object */ - scanner_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -scanner_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_VISIT(s->encoding); - Py_VISIT(s->strict); - Py_VISIT(s->object_hook); - Py_VISIT(s->parse_float); - Py_VISIT(s->parse_int); - Py_VISIT(s->parse_constant); - return 0; -} - -static int -scanner_clear(PyObject *self) -{ - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - return 0; -} - -static PyObject * -_parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyString pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *rval = PyDict_New(); - PyObject *key = NULL; - PyObject *val = NULL; - char *encoding = PyString_AS_STRING(s->encoding); - int strict = PyObject_IsTrue(s->strict); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); - if (key == NULL) - goto bail; - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON data type */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(key); - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON object from PyUnicode pystr. - idx is the index of the first character after the opening curly brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing curly brace. - - Returns a new PyObject (usually a dict, but object_hook can change that) - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyDict_New(); - PyObject *key = NULL; - int strict = PyObject_IsTrue(s->strict); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after { */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the object is non-empty */ - if (idx <= end_idx && str[idx] != '}') { - while (idx <= end_idx) { - /* read key */ - if (str[idx] != '"') { - raise_errmsg("Expecting property name", pystr, idx); - goto bail; - } - key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); - if (key == NULL) - goto bail; - idx = next_idx; - - /* skip whitespace between key and : delimiter, read :, skip whitespace */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - if (idx > end_idx || str[idx] != ':') { - raise_errmsg("Expecting : delimiter", pystr, idx); - goto bail; - } - idx++; - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyDict_SetItem(rval, key, val) == -1) - goto bail; - - Py_CLEAR(key); - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace before } or , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the object is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == '}') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , delimiter */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be '}' */ - if (idx > end_idx || str[idx] != '}') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - - /* if object_hook is not None: rval = object_hook(rval) */ - if (s->object_hook != Py_None) { - val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); - if (val == NULL) - goto bail; - Py_DECREF(rval); - rval = val; - val = NULL; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(key); - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term and de-tuplefy the (rval, idx) */ - val = scan_once_str(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON array from PyString pystr. - idx is the index of the first character after the opening brace. - *next_idx_ptr is a return-by-reference index to the first character after - the closing brace. - - Returns a new PyList - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - PyObject *val = NULL; - PyObject *rval = PyList_New(0); - Py_ssize_t next_idx; - if (rval == NULL) - return NULL; - - /* skip whitespace after [ */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* only loop if the array is non-empty */ - if (idx <= end_idx && str[idx] != ']') { - while (idx <= end_idx) { - - /* read any JSON term */ - val = scan_once_unicode(s, pystr, idx, &next_idx); - if (val == NULL) - goto bail; - - if (PyList_Append(rval, val) == -1) - goto bail; - - Py_CLEAR(val); - idx = next_idx; - - /* skip whitespace between term and , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - - /* bail if the array is closed or we didn't get the , delimiter */ - if (idx > end_idx) break; - if (str[idx] == ']') { - break; - } - else if (str[idx] != ',') { - raise_errmsg("Expecting , delimiter", pystr, idx); - goto bail; - } - idx++; - - /* skip whitespace after , */ - while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; - } - } - - /* verify that idx < end_idx, str[idx] should be ']' */ - if (idx > end_idx || str[idx] != ']') { - raise_errmsg("Expecting object", pystr, end_idx); - goto bail; - } - *next_idx_ptr = idx + 1; - return rval; -bail: - Py_XDECREF(val); - Py_DECREF(rval); - return NULL; -} - -static PyObject * -_parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { - /* Read a JSON constant from PyString pystr. - constant is the constant string that was found - ("NaN", "Infinity", "-Infinity"). - idx is the index of the first character of the constant - *next_idx_ptr is a return-by-reference index to the first character after - the constant. - - Returns the result of parse_constant - */ - PyObject *cstr; - PyObject *rval; - /* constant is "NaN", "Infinity", or "-Infinity" */ - cstr = PyString_InternFromString(constant); - if (cstr == NULL) - return NULL; - - /* rval = parse_constant(constant) */ - rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); - idx += PyString_GET_SIZE(cstr); - Py_DECREF(cstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyString pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - - /* save the index of the 'e' or 'E' just in case we need to backtrack */ - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyString_FromStringAndSize(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); - } - } - else { - /* parse as an int using a fast path if available, otherwise call user defined method */ - if (s->parse_int != (PyObject *)&PyInt_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - else { - rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); - } - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { - /* Read a JSON number from PyUnicode pystr. - idx is the index of the first character of the number - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of that number: - PyInt, PyLong, or PyFloat. - May return other types if parse_int or parse_float are set - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; - Py_ssize_t idx = start; - int is_float = 0; - PyObject *rval; - PyObject *numstr; - - /* read a sign if it's there, make sure it's not the end of the string */ - if (str[idx] == '-') { - idx++; - if (idx > end_idx) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - } - - /* read as many integer digits as we find as long as it doesn't start with 0 */ - if (str[idx] >= '1' && str[idx] <= '9') { - idx++; - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - /* if it starts with 0 we only expect one integer digit */ - else if (str[idx] == '0') { - idx++; - } - /* no integer digits, error */ - else { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - - /* if the next char is '.' followed by a digit then read all float digits */ - if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { - is_float = 1; - idx += 2; - while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - } - - /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ - if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { - Py_ssize_t e_start = idx; - idx++; - - /* read an exponent sign if present */ - if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; - - /* read all digits */ - while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; - - /* if we got a digit, then parse as float. if not, backtrack */ - if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { - is_float = 1; - } - else { - idx = e_start; - } - } - - /* copy the section we determined to be a number */ - numstr = PyUnicode_FromUnicode(&str[start], idx - start); - if (numstr == NULL) - return NULL; - if (is_float) { - /* parse as a float using a fast path if available, otherwise call user defined method */ - if (s->parse_float != (PyObject *)&PyFloat_Type) { - rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); - } - else { - rval = PyFloat_FromString(numstr, NULL); - } - } - else { - /* no fast path for unicode -> int, just call */ - rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); - } - Py_DECREF(numstr); - *next_idx_ptr = idx; - return rval; -} - -static PyObject * -scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyString pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - char *str = PyString_AS_STRING(pystr); - Py_ssize_t length = PyString_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_str(pystr, idx + 1, - PyString_AS_STRING(s->encoding), - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_str(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) -{ - /* Read one JSON term (of any kind) from PyUnicode pystr. - idx is the index of the first character of the term - *next_idx_ptr is a return-by-reference index to the first character after - the number. - - Returns a new PyObject representation of the term. - */ - Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); - Py_ssize_t length = PyUnicode_GET_SIZE(pystr); - if (idx >= length) { - PyErr_SetNone(PyExc_StopIteration); - return NULL; - } - switch (str[idx]) { - case '"': - /* string */ - return scanstring_unicode(pystr, idx + 1, - PyObject_IsTrue(s->strict), - next_idx_ptr); - case '{': - /* object */ - return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); - case '[': - /* array */ - return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); - case 'n': - /* null */ - if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { - Py_INCREF(Py_None); - *next_idx_ptr = idx + 4; - return Py_None; - } - break; - case 't': - /* true */ - if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { - Py_INCREF(Py_True); - *next_idx_ptr = idx + 4; - return Py_True; - } - break; - case 'f': - /* false */ - if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { - Py_INCREF(Py_False); - *next_idx_ptr = idx + 5; - return Py_False; - } - break; - case 'N': - /* NaN */ - if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { - return _parse_constant(s, "NaN", idx, next_idx_ptr); - } - break; - case 'I': - /* Infinity */ - if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { - return _parse_constant(s, "Infinity", idx, next_idx_ptr); - } - break; - case '-': - /* -Infinity */ - if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { - return _parse_constant(s, "-Infinity", idx, next_idx_ptr); - } - break; - } - /* Didn't find a string, object, array, or named constant. Look for a number. */ - return _match_number_unicode(s, pystr, idx, next_idx_ptr); -} - -static PyObject * -scanner_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to scan_once_{str,unicode} */ - PyObject *pystr; - PyObject *rval; - Py_ssize_t idx; - Py_ssize_t next_idx = -1; - static char *kwlist[] = {"string", "idx", NULL}; - PyScannerObject *s; - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) - return NULL; - - if (PyString_Check(pystr)) { - rval = scan_once_str(s, pystr, idx, &next_idx); - } - else if (PyUnicode_Check(pystr)) { - rval = scan_once_unicode(s, pystr, idx, &next_idx); - } - else { - PyErr_Format(PyExc_TypeError, - "first argument must be a string, not %.80s", - Py_TYPE(pystr)->tp_name); - return NULL; - } - return _build_rval_index_tuple(rval, next_idx); -} - -static PyObject * -scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyScannerObject *s; - s = (PyScannerObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->encoding = NULL; - s->strict = NULL; - s->object_hook = NULL; - s->parse_float = NULL; - s->parse_int = NULL; - s->parse_constant = NULL; - } - return (PyObject *)s; -} - -static int -scanner_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Initialize Scanner object */ - PyObject *ctx; - static char *kwlist[] = {"context", NULL}; - PyScannerObject *s; - - assert(PyScanner_Check(self)); - s = (PyScannerObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) - return -1; - - /* PyString_AS_STRING is used on encoding */ - s->encoding = PyObject_GetAttrString(ctx, "encoding"); - if (s->encoding == Py_None) { - Py_DECREF(Py_None); - s->encoding = PyString_InternFromString(DEFAULT_ENCODING); - } - else if (PyUnicode_Check(s->encoding)) { - PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); - Py_DECREF(s->encoding); - s->encoding = tmp; - } - if (s->encoding == NULL || !PyString_Check(s->encoding)) - goto bail; - - /* All of these will fail "gracefully" so we don't need to verify them */ - s->strict = PyObject_GetAttrString(ctx, "strict"); - if (s->strict == NULL) - goto bail; - s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); - if (s->object_hook == NULL) - goto bail; - s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); - if (s->parse_float == NULL) - goto bail; - s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); - if (s->parse_int == NULL) - goto bail; - s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); - if (s->parse_constant == NULL) - goto bail; - - return 0; - -bail: - Py_CLEAR(s->encoding); - Py_CLEAR(s->strict); - Py_CLEAR(s->object_hook); - Py_CLEAR(s->parse_float); - Py_CLEAR(s->parse_int); - Py_CLEAR(s->parse_constant); - return -1; -} - -PyDoc_STRVAR(scanner_doc, "JSON scanner object"); - -static -PyTypeObject PyScannerType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Scanner", /* tp_name */ - sizeof(PyScannerObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - scanner_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - scanner_call, /* tp_call */ - 0, /* tp_str */ - 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ - 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - scanner_doc, /* tp_doc */ - scanner_traverse, /* tp_traverse */ - scanner_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - scanner_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - scanner_init, /* tp_init */ - 0,/* PyType_GenericAlloc, */ /* tp_alloc */ - scanner_new, /* tp_new */ - 0,/* PyObject_GC_Del, */ /* tp_free */ -}; - -static PyObject * -encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) -{ - PyEncoderObject *s; - s = (PyEncoderObject *)type->tp_alloc(type, 0); - if (s != NULL) { - s->markers = NULL; - s->defaultfn = NULL; - s->encoder = NULL; - s->indent = NULL; - s->key_separator = NULL; - s->item_separator = NULL; - s->sort_keys = NULL; - s->skipkeys = NULL; - } - return (PyObject *)s; -} - -static int -encoder_init(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* initialize Encoder object */ - static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; - - PyEncoderObject *s; - PyObject *allow_nan; - - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, - &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) - return -1; - - Py_INCREF(s->markers); - Py_INCREF(s->defaultfn); - Py_INCREF(s->encoder); - Py_INCREF(s->indent); - Py_INCREF(s->key_separator); - Py_INCREF(s->item_separator); - Py_INCREF(s->sort_keys); - Py_INCREF(s->skipkeys); - s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); - s->allow_nan = PyObject_IsTrue(allow_nan); - return 0; -} - -static PyObject * -encoder_call(PyObject *self, PyObject *args, PyObject *kwds) -{ - /* Python callable interface to encode_listencode_obj */ - static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj; - PyObject *rval; - Py_ssize_t indent_level; - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, - &obj, _convertPyInt_AsSsize_t, &indent_level)) - return NULL; - rval = PyList_New(0); - if (rval == NULL) - return NULL; - if (encoder_listencode_obj(s, rval, obj, indent_level)) { - Py_DECREF(rval); - return NULL; - } - return rval; -} - -static PyObject * -_encoded_const(PyObject *obj) -{ - /* Return the JSON string representation of None, True, False */ - if (obj == Py_None) { - static PyObject *s_null = NULL; - if (s_null == NULL) { - s_null = PyString_InternFromString("null"); - } - Py_INCREF(s_null); - return s_null; - } - else if (obj == Py_True) { - static PyObject *s_true = NULL; - if (s_true == NULL) { - s_true = PyString_InternFromString("true"); - } - Py_INCREF(s_true); - return s_true; - } - else if (obj == Py_False) { - static PyObject *s_false = NULL; - if (s_false == NULL) { - s_false = PyString_InternFromString("false"); - } - Py_INCREF(s_false); - return s_false; - } - else { - PyErr_SetString(PyExc_ValueError, "not a const"); - return NULL; - } -} - -static PyObject * -encoder_encode_float(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a PyFloat */ - double i = PyFloat_AS_DOUBLE(obj); - if (!Py_IS_FINITE(i)) { - if (!s->allow_nan) { - PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); - return NULL; - } - if (i > 0) { - return PyString_FromString("Infinity"); - } - else if (i < 0) { - return PyString_FromString("-Infinity"); - } - else { - return PyString_FromString("NaN"); - } - } - /* Use a better float format here? */ - return PyObject_Repr(obj); -} - -static PyObject * -encoder_encode_string(PyEncoderObject *s, PyObject *obj) -{ - /* Return the JSON representation of a string */ - if (s->fast_encode) - return py_encode_basestring_ascii(NULL, obj); - else - return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); -} - -static int -_steal_list_append(PyObject *lst, PyObject *stolen) -{ - /* Append stolen and then decrement its reference count */ - int rval = PyList_Append(lst, stolen); - Py_DECREF(stolen); - return rval; -} - -static int -encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) -{ - /* Encode Python object obj to a JSON term, rval is a PyList */ - PyObject *newobj; - int rv; - - if (obj == Py_None || obj == Py_True || obj == Py_False) { - PyObject *cstr = _encoded_const(obj); - if (cstr == NULL) - return -1; - return _steal_list_append(rval, cstr); - } - else if (PyString_Check(obj) || PyUnicode_Check(obj)) - { - PyObject *encoded = encoder_encode_string(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyInt_Check(obj) || PyLong_Check(obj)) { - PyObject *encoded = PyObject_Str(obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyFloat_Check(obj)) { - PyObject *encoded = encoder_encode_float(s, obj); - if (encoded == NULL) - return -1; - return _steal_list_append(rval, encoded); - } - else if (PyList_Check(obj) || PyTuple_Check(obj)) { - return encoder_listencode_list(s, rval, obj, indent_level); - } - else if (PyDict_Check(obj)) { - return encoder_listencode_dict(s, rval, obj, indent_level); - } - else { - PyObject *ident = NULL; - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(obj); - if (ident == NULL) - return -1; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - Py_DECREF(ident); - return -1; - } - if (PyDict_SetItem(s->markers, ident, obj)) { - Py_DECREF(ident); - return -1; - } - } - newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); - if (newobj == NULL) { - Py_XDECREF(ident); - return -1; - } - rv = encoder_listencode_obj(s, rval, newobj, indent_level); - Py_DECREF(newobj); - if (rv) { - Py_XDECREF(ident); - return -1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) { - Py_XDECREF(ident); - return -1; - } - Py_XDECREF(ident); - } - return rv; - } -} - -static int -encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) -{ - /* Encode Python dict dct a JSON term, rval is a PyList */ - static PyObject *open_dict = NULL; - static PyObject *close_dict = NULL; - static PyObject *empty_dict = NULL; - PyObject *kstr = NULL; - PyObject *ident = NULL; - PyObject *key, *value; - Py_ssize_t pos; - int skipkeys; - Py_ssize_t idx; - - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { - open_dict = PyString_InternFromString("{"); - close_dict = PyString_InternFromString("}"); - empty_dict = PyString_InternFromString("{}"); - if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) - return -1; - } - if (PyDict_Size(dct) == 0) - return PyList_Append(rval, empty_dict); - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(dct); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, dct)) { - goto bail; - } - } - - if (PyList_Append(rval, open_dict)) - goto bail; - - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - - /* TODO: C speedup not implemented for sort_keys */ - - pos = 0; - skipkeys = PyObject_IsTrue(s->skipkeys); - idx = 0; - while (PyDict_Next(dct, &pos, &key, &value)) { - PyObject *encoded; - - if (PyString_Check(key) || PyUnicode_Check(key)) { - Py_INCREF(key); - kstr = key; - } - else if (PyFloat_Check(key)) { - kstr = encoder_encode_float(s, key); - if (kstr == NULL) - goto bail; - } - else if (PyInt_Check(key) || PyLong_Check(key)) { - kstr = PyObject_Str(key); - if (kstr == NULL) - goto bail; - } - else if (key == Py_True || key == Py_False || key == Py_None) { - kstr = _encoded_const(key); - if (kstr == NULL) - goto bail; - } - else if (skipkeys) { - continue; - } - else { - /* TODO: include repr of key */ - PyErr_SetString(PyExc_ValueError, "keys must be a string"); - goto bail; - } - - if (idx) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - - encoded = encoder_encode_string(s, kstr); - Py_CLEAR(kstr); - if (encoded == NULL) - goto bail; - if (PyList_Append(rval, encoded)) { - Py_DECREF(encoded); - goto bail; - } - Py_DECREF(encoded); - if (PyList_Append(rval, s->key_separator)) - goto bail; - if (encoder_listencode_obj(s, rval, value, indent_level)) - goto bail; - idx += 1; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_dict)) - goto bail; - return 0; - -bail: - Py_XDECREF(kstr); - Py_XDECREF(ident); - return -1; -} - - -static int -encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) -{ - /* Encode Python list seq to a JSON term, rval is a PyList */ - static PyObject *open_array = NULL; - static PyObject *close_array = NULL; - static PyObject *empty_array = NULL; - PyObject *ident = NULL; - PyObject *s_fast = NULL; - Py_ssize_t num_items; - PyObject **seq_items; - Py_ssize_t i; - - if (open_array == NULL || close_array == NULL || empty_array == NULL) { - open_array = PyString_InternFromString("["); - close_array = PyString_InternFromString("]"); - empty_array = PyString_InternFromString("[]"); - if (open_array == NULL || close_array == NULL || empty_array == NULL) - return -1; - } - ident = NULL; - s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); - if (s_fast == NULL) - return -1; - num_items = PySequence_Fast_GET_SIZE(s_fast); - if (num_items == 0) { - Py_DECREF(s_fast); - return PyList_Append(rval, empty_array); - } - - if (s->markers != Py_None) { - int has_key; - ident = PyLong_FromVoidPtr(seq); - if (ident == NULL) - goto bail; - has_key = PyDict_Contains(s->markers, ident); - if (has_key) { - if (has_key != -1) - PyErr_SetString(PyExc_ValueError, "Circular reference detected"); - goto bail; - } - if (PyDict_SetItem(s->markers, ident, seq)) { - goto bail; - } - } - - seq_items = PySequence_Fast_ITEMS(s_fast); - if (PyList_Append(rval, open_array)) - goto bail; - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level += 1; - /* - newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) - separator = _item_separator + newline_indent - buf += newline_indent - */ - } - for (i = 0; i < num_items; i++) { - PyObject *obj = seq_items[i]; - if (i) { - if (PyList_Append(rval, s->item_separator)) - goto bail; - } - if (encoder_listencode_obj(s, rval, obj, indent_level)) - goto bail; - } - if (ident != NULL) { - if (PyDict_DelItem(s->markers, ident)) - goto bail; - Py_CLEAR(ident); - } - if (s->indent != Py_None) { - /* TODO: DOES NOT RUN */ - indent_level -= 1; - /* - yield '\n' + (' ' * (_indent * _current_indent_level)) - */ - } - if (PyList_Append(rval, close_array)) - goto bail; - Py_DECREF(s_fast); - return 0; - -bail: - Py_XDECREF(ident); - Py_DECREF(s_fast); - return -1; -} - -static void -encoder_dealloc(PyObject *self) -{ - /* Deallocate Encoder */ - encoder_clear(self); - Py_TYPE(self)->tp_free(self); -} - -static int -encoder_traverse(PyObject *self, visitproc visit, void *arg) -{ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_VISIT(s->markers); - Py_VISIT(s->defaultfn); - Py_VISIT(s->encoder); - Py_VISIT(s->indent); - Py_VISIT(s->key_separator); - Py_VISIT(s->item_separator); - Py_VISIT(s->sort_keys); - Py_VISIT(s->skipkeys); - return 0; -} - -static int -encoder_clear(PyObject *self) -{ - /* Deallocate Encoder */ - PyEncoderObject *s; - assert(PyEncoder_Check(self)); - s = (PyEncoderObject *)self; - Py_CLEAR(s->markers); - Py_CLEAR(s->defaultfn); - Py_CLEAR(s->encoder); - Py_CLEAR(s->indent); - Py_CLEAR(s->key_separator); - Py_CLEAR(s->item_separator); - Py_CLEAR(s->sort_keys); - Py_CLEAR(s->skipkeys); - return 0; -} - -PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); - -static -PyTypeObject PyEncoderType = { - PyObject_HEAD_INIT(NULL) - 0, /* tp_internal */ - "simplejson._speedups.Encoder", /* tp_name */ - sizeof(PyEncoderObject), /* tp_basicsize */ - 0, /* tp_itemsize */ - encoder_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - encoder_call, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ - encoder_doc, /* tp_doc */ - encoder_traverse, /* tp_traverse */ - encoder_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - encoder_members, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - encoder_init, /* tp_init */ - 0, /* tp_alloc */ - encoder_new, /* tp_new */ - 0, /* tp_free */ -}; - -static PyMethodDef speedups_methods[] = { - {"encode_basestring_ascii", - (PyCFunction)py_encode_basestring_ascii, - METH_O, - pydoc_encode_basestring_ascii}, - {"scanstring", - (PyCFunction)py_scanstring, - METH_VARARGS, - pydoc_scanstring}, - {NULL, NULL, 0, NULL} -}; - -PyDoc_STRVAR(module_doc, -"simplejson speedups\n"); - -void -init_speedups(void) -{ - PyObject *m; - PyScannerType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyScannerType) < 0) - return; - PyEncoderType.tp_new = PyType_GenericNew; - if (PyType_Ready(&PyEncoderType) < 0) - return; - m = Py_InitModule3("_speedups", speedups_methods, module_doc); - Py_INCREF((PyObject*)&PyScannerType); - PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); - Py_INCREF((PyObject*)&PyEncoderType); - PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); -} diff --git a/framework/pym/simplejson/compat.py b/framework/pym/simplejson/compat.py new file mode 100644 index 0000000000..5fc1412844 --- /dev/null +++ b/framework/pym/simplejson/compat.py @@ -0,0 +1,34 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + try: + from cStringIO import StringIO + except ImportError: + from StringIO import StringIO + BytesIO = StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + def b(s): + return bytes(s, 'latin1') + from io import StringIO, BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + unichr = chr + +long_type = integer_types[-1] diff --git a/framework/pym/simplejson/decoder.py b/framework/pym/simplejson/decoder.py index b769ea486c..7f0b0568fd 100644 --- a/framework/pym/simplejson/decoder.py +++ b/framework/pym/simplejson/decoder.py @@ -1,53 +1,37 @@ """Implementation of JSONDecoder """ +from __future__ import absolute_import import re import sys import struct - -from simplejson.scanner import make_scanner -try: - from simplejson._speedups import scanstring as c_scanstring -except ImportError: - c_scanstring = None - +from .compat import PY3, unichr +from .scanner import make_scanner, JSONDecodeError + +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() + +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ __all__ = ['JSONDecoder'] FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL def _floatconstants(): - _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') - if sys.byteorder != 'big': - _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] - nan, inf = struct.unpack('dd', _BYTES) + if sys.version_info < (2, 6): + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + nan, inf = struct.unpack('>dd', _BYTES) + else: + nan = float('nan') + inf = float('inf') return nan, inf, -inf NaN, PosInf, NegInf = _floatconstants() - -def linecol(doc, pos): - lineno = doc.count('\n', 0, pos) + 1 - if lineno == 1: - colno = pos - else: - colno = pos - doc.rindex('\n', 0, pos) - return lineno, colno - - -def errmsg(msg, doc, pos, end=None): - # Note that this function is called from _speedups - lineno, colno = linecol(doc, pos) - if end is None: - #fmt = '{0}: line {1} column {2} (char {3})' - #return fmt.format(msg, lineno, colno, pos) - fmt = '%s: line %d column %d (char %d)' - return fmt % (msg, lineno, colno, pos) - endlineno, endcolno = linecol(doc, end) - #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' - #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) - fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' - return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) - - _CONSTANTS = { '-Infinity': NegInf, 'Infinity': PosInf, @@ -62,13 +46,15 @@ def errmsg(msg, doc, pos, end=None): DEFAULT_ENCODING = "utf-8" -def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, + _PY3=PY3, _maxunicode=sys.maxunicode): """Scan the string s for a JSON string. End is the index of the character in s after the quote that started the JSON string. Unescapes all valid JSON string escape sequences and raises ValueError on attempt to decode an invalid string. If strict is False then literal control characters are allowed in the string. - + Returns a tuple of the decoded string and the index of the character in s after the end quote.""" if encoding is None: @@ -79,13 +65,13 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU while 1: chunk = _m(s, end) if chunk is None: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) end = chunk.end() content, terminator = chunk.groups() # Content is contains zero or more unescaped string characters if content: - if not isinstance(content, unicode): + if not _PY3 and not isinstance(content, unicode): content = unicode(content, encoding) _append(content) # Terminator is the end of string, a literal control character, @@ -94,49 +80,57 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU break elif terminator != '\\': if strict: - msg = "Invalid control character %r at" % (terminator,) - #msg = "Invalid control character {0!r} at".format(terminator) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) else: _append(terminator) continue try: esc = s[end] except IndexError: - raise ValueError( - errmsg("Unterminated string starting at", s, begin)) + raise JSONDecodeError( + "Unterminated string starting at", s, begin) # If not a unicode escape sequence, must be in the lookup table if esc != 'u': try: char = _b[esc] except KeyError: - msg = "Invalid \\escape: " + repr(esc) - raise ValueError(errmsg(msg, s, end)) + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) end += 1 else: # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" esc = s[end + 1:end + 5] - next_end = end + 5 - if len(esc) != 4: - msg = "Invalid \\uXXXX escape" - raise ValueError(errmsg(msg, s, end)) - uni = int(esc, 16) + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 # Check for surrogate pair on UCS-4 systems - if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: - msg = "Invalid \\uXXXX\\uXXXX surrogate pair" - if not s[end + 5:end + 7] == '\\u': - raise ValueError(errmsg(msg, s, end)) - esc2 = s[end + 7:end + 11] - if len(esc2) != 4: - raise ValueError(errmsg(msg, s, end)) - uni2 = int(esc2, 16) - uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) - next_end += 6 + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 char = unichr(uni) - end = next_end # Append the unescaped character _append(char) - return u''.join(chunks), end + return _join(chunks), end # Use speedup if available @@ -145,8 +139,15 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) WHITESPACE_STR = ' \t\n\r' -def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR): - pairs = {} +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] @@ -157,19 +158,28 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end)) + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) end += 1 while True: key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': - raise ValueError(errmsg("Expecting : delimiter", s, end)) + raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 @@ -181,11 +191,8 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE except IndexError: pass - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) - pairs[key] = value + value, end = scan_once(s, end) + pairs.append((key, value)) try: nextchar = s[end] @@ -199,7 +206,7 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE if nextchar == '}': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) try: nextchar = s[end] @@ -214,13 +221,20 @@ def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE end += 1 if nextchar != '"': - raise ValueError(errmsg("Expecting property name", s, end - 1)) - + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end -def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state values = [] nextchar = s[end:end + 1] if nextchar in _ws: @@ -229,12 +243,11 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): # Look-ahead for trivial empty array if nextchar == ']': return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) _append = values.append while True: - try: - value, end = scan_once(s, end) - except StopIteration: - raise ValueError(errmsg("Expecting object", s, end)) + value, end = scan_once(s, end) _append(value) nextchar = s[end:end + 1] if nextchar in _ws: @@ -244,7 +257,7 @@ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): if nextchar == ']': break elif nextchar != ',': - raise ValueError(errmsg("Expecting , delimiter", s, end)) + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) try: if s[end] in _ws: @@ -268,7 +281,7 @@ class JSONDecoder(object): +---------------+-------------------+ | array | list | +---------------+-------------------+ - | string | unicode | + | string | str, unicode | +---------------+-------------------+ | number (int) | int, long | +---------------+-------------------+ @@ -287,37 +300,56 @@ class JSONDecoder(object): """ def __init__(self, encoding=None, object_hook=None, parse_float=None, - parse_int=None, parse_constant=None, strict=True): - """``encoding`` determines the encoding used to interpret any ``str`` - objects decoded by this instance (utf-8 by default). It has no - effect when decoding ``unicode`` objects. + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. Note that currently only encodings that are a superset of ASCII work, - strings of other encodings should be passed in as ``unicode``. + strings of other encodings should be passed in as :class:`unicode`. - ``object_hook``, if specified, will be called with the result - of every JSON object decoded and its return value will be used in - place of the given ``dict``. This can be used to provide custom + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). - ``parse_float``, if specified, will be called with the string - of every JSON float to be decoded. By default this is equivalent to - float(num_str). This can be used to use another datatype or parser - for JSON floats (e.g. decimal.Decimal). - - ``parse_int``, if specified, will be called with the string - of every JSON int to be decoded. By default this is equivalent to - int(num_str). This can be used to use another datatype or parser - for JSON integers (e.g. float). - - ``parse_constant``, if specified, will be called with one of the - following strings: -Infinity, Infinity, NaN. - This can be used to raise an exception if invalid JSON numbers - are encountered. + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. """ + if encoding is None: + encoding = DEFAULT_ENCODING self.encoding = encoding self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook self.parse_float = parse_float or float self.parse_int = parse_int or int self.parse_constant = parse_constant or _CONSTANTS.__getitem__ @@ -325,30 +357,44 @@ def __init__(self, encoding=None, object_hook=None, parse_float=None, self.parse_object = JSONObject self.parse_array = JSONArray self.parse_string = scanstring + self.memo = {} self.scan_once = make_scanner(self) - def decode(self, s, _w=WHITESPACE.match): + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): """Return the Python representation of ``s`` (a ``str`` or ``unicode`` instance containing a JSON document) """ - obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + if _PY3 and isinstance(s, bytes): + s = str(s, self.encoding) + obj, end = self.raw_decode(s) end = _w(s, end).end() if end != len(s): - raise ValueError(errmsg("Extra data", s, end, len(s))) + raise JSONDecodeError("Extra data", s, end, len(s)) return obj - def raw_decode(self, s, idx=0): - """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning - with a JSON document) and return a 2-tuple of the Python + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. This can be used to decode a JSON document from a string that may have extraneous data at the end. """ - try: - obj, end = self.scan_once(s, idx) - except StopIteration: - raise ValueError("No JSON object could be decoded") - return obj, end + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, str): + raise TypeError("Input string must be text, not bytes") + # strip UTF-8 bom + if len(s) > idx: + ord0 = ord(s[idx]) + if ord0 == 0xfeff: + idx += 1 + elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': + idx += 3 + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/framework/pym/simplejson/encoder.py b/framework/pym/simplejson/encoder.py index cf58290366..7ea172e7d2 100644 --- a/framework/pym/simplejson/encoder.py +++ b/framework/pym/simplejson/encoder.py @@ -1,17 +1,23 @@ """Implementation of JSONEncoder """ +from __future__ import absolute_import import re - -try: - from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii -except ImportError: - c_encode_basestring_ascii = None -try: - from simplejson._speedups import make_encoder as c_make_encoder -except ImportError: - c_make_encoder = None - -ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') +from operator import itemgetter +# Do not import Decimal directly to avoid reload issues +import decimal +from .compat import unichr, binary_type, text_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from .decoder import PosInf +from .raw_json import RawJSON + +ESCAPE = re.compile(r'[\x00-\x1f\\"]') ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') HAS_UTF8 = re.compile(r'[\x80-\xff]') ESCAPE_DCT = { @@ -27,25 +33,57 @@ #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) -# Assume this produces an infinity on all machines (probably not guaranteed) -INFINITY = float('1e66666') FLOAT_REPR = repr -def encode_basestring(s): +def encode_basestring(s, _PY3=PY3, _q=u'"'): """Return a JSON representation of a Python string """ + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] def replace(match): return ESCAPE_DCT[match.group(0)] - return '"' + ESCAPE.sub(replace, s) + '"' + return _q + ESCAPE.sub(replace, s) + _q -def py_encode_basestring_ascii(s): +def py_encode_basestring_ascii(s, _PY3=PY3): """Return an ASCII-only JSON representation of a Python string """ - if isinstance(s, str) and HAS_UTF8.search(s) is not None: - s = s.decode('utf-8') + if _PY3: + if isinstance(s, bytes): + s = str(s, 'utf-8') + elif type(s) is not str: + # convert an str subclass instance to exact str + # raise a TypeError otherwise + s = str.__str__(s) + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = unicode(s, 'utf-8') + elif type(s) not in (str, unicode): + # convert an str subclass instance to exact str + # convert a unicode subclass instance to exact unicode + # raise a TypeError otherwise + if isinstance(s, str): + s = str.__str__(s) + else: + s = unicode.__getnewargs__(s)[0] def replace(match): s = match.group(0) try: @@ -65,7 +103,8 @@ def replace(match): return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' -encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) class JSONEncoder(object): """Extensible JSON encoder for Python data structures. @@ -75,7 +114,7 @@ class JSONEncoder(object): +-------------------+---------------+ | Python | JSON | +===================+===============+ - | dict | object | + | dict, namedtuple | object | +-------------------+---------------+ | list, tuple | array | +-------------------+---------------+ @@ -98,9 +137,14 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, - check_circular=True, allow_nan=True, sort_keys=False, - indent=None, separators=None, encoding='utf-8', default=None): + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None, iterable_as_array=False): """Constructor for JSONEncoder, with sensible defaults. If skipkeys is false, then it is a TypeError to attempt @@ -125,14 +169,17 @@ def __init__(self, skipkeys=False, ensure_ascii=True, sorted by key; this is useful for regression tests to ensure that JSON serializations can be compared on a day-to-day basis. - If indent is a non-negative integer, then JSON array - elements and object members will be pretty-printed with that - indent level. An indent level of 0 will only insert newlines. - None is the most compact representation. + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. - If specified, separators should be a (item_separator, key_separator) - tuple. The default is (', ', ': '). To get the most compact JSON - representation you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. If specified, default is a function that gets called for objects that can't otherwise be serialized. It should return a JSON encodable @@ -142,6 +189,41 @@ def __init__(self, skipkeys=False, ensure_ascii=True, transformed into unicode using that encoding prior to JSON-encoding. The default is UTF-8. + If use_decimal is true (default: ``True``), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If *iterable_as_array* is true (default: ``False``), + any object not in the above table that implements ``__iter__()`` + will be encoded as a JSON array. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + """ self.skipkeys = skipkeys @@ -149,9 +231,22 @@ def __init__(self, skipkeys=False, ensure_ascii=True, self.check_circular = check_circular self.allow_nan = allow_nan self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.iterable_as_array = iterable_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' self.indent = indent if separators is not None: self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' if default is not None: self.default = default self.encoding = encoding @@ -174,22 +269,23 @@ def default(self, o): return JSONEncoder.default(self, o) """ - raise TypeError(repr(o) + " is not JSON serializable") + raise TypeError('Object of type %s is not JSON serializable' % + o.__class__.__name__) def encode(self, o): """Return a JSON string representation of a Python data structure. + >>> from simplejson import JSONEncoder >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) '{"foo": ["bar", "baz"]}' """ # This is for extremely simple cases and benchmarks. - if isinstance(o, basestring): - if isinstance(o, str): - _encoding = self.encoding - if (_encoding is not None - and not (_encoding == 'utf-8')): - o = o.decode(_encoding) + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = text_type(o, _encoding) + if isinstance(o, string_types): if self.ensure_ascii: return encode_basestring_ascii(o) else: @@ -200,7 +296,10 @@ def encode(self, o): chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) - return ''.join(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string @@ -220,15 +319,17 @@ def iterencode(self, o, _one_shot=False): _encoder = encode_basestring_ascii else: _encoder = encode_basestring - if self.encoding != 'utf-8': + if self.encoding != 'utf-8' and self.encoding is not None: def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): - if isinstance(o, str): - o = o.decode(_encoding) + if isinstance(o, binary_type): + o = text_type(o, _encoding) return _orig_encoder(o) - def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY): - # Check for specials. Note that this type of test is processor- and/or - # platform-specific, so do tests which don't depend on the internals. + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. if o != o: text = 'NaN' @@ -237,44 +338,135 @@ def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _negi elif o == _neginf: text = '-Infinity' else: + if type(o) != float: + # See #118, do not trust custom str/repr + o = float(o) return _repr(o) - if not allow_nan: + if ignore_nan: + text = 'null' + elif not allow_nan: raise ValueError( "Out of range float values are not JSON compliant: " + repr(o)) return text - - if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: + key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) + if (_one_shot and c_make_encoder is not None + and self.indent is None): _iterencode = c_make_encoder( markers, self.default, _encoder, self.indent, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, self.allow_nan) + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, decimal.Decimal, self.iterable_as_array) else: _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, - self.skipkeys, _one_shot) - return _iterencode(o, 0) + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.iterable_as_array, Decimal=decimal.Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. -def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within ') + # => <script> do_nasty_stuff() </script> + # sanitize_html('Click here for $100') + # => Click here for $100 + def sanitize_token(self, token): + + # accommodate filters which use token_type differently + token_type = token["type"] + if token_type in ("StartTag", "EndTag", "EmptyTag"): + name = token["name"] + namespace = token["namespace"] + if ((namespace, name) in self.allowed_elements or + (namespace is None and + (namespaces["html"], name) in self.allowed_elements)): + return self.allowed_token(token) + else: + return self.disallowed_token(token) + elif token_type == "Comment": + pass + else: + return token + + def allowed_token(self, token): + if "data" in token: + attrs = token["data"] + attr_names = set(attrs.keys()) + + # Remove forbidden attributes + for to_remove in (attr_names - self.allowed_attributes): + del token["data"][to_remove] + attr_names.remove(to_remove) + + # Remove attributes with disallowed URL values + for attr in (attr_names & self.attr_val_is_uri): + assert attr in attrs + # I don't have a clue where this regexp comes from or why it matches those + # characters, nor why we call unescape. I just know it's always been here. + # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all + # this will do is remove *more* than it otherwise would. + val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + try: + uri = urlparse.urlparse(val_unescaped) + except ValueError: + uri = None + del attrs[attr] + if uri and uri.scheme: + if uri.scheme not in self.allowed_protocols: + del attrs[attr] + if uri.scheme == 'data': + m = data_content_type.match(uri.path) + if not m: + del attrs[attr] + elif m.group('content_type') not in self.allowed_content_types: + del attrs[attr] + + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', + attrs[(namespaces['xlink'], 'href')])): + del attrs[(namespaces['xlink'], 'href')] + if (None, 'style') in attrs: + attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) + token["data"] = attrs + return token + + def disallowed_token(self, token): + token_type = token["type"] + if token_type == "EndTag": + token["data"] = "" % token["name"] + elif token["data"]: + assert token_type in ("StartTag", "EmptyTag") + attrs = [] + for (ns, name), v in token["data"].items(): + attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) + token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + token["type"] = "Characters" + + del token["name"] + return token + + def sanitize_css(self, style): + # disallow urls + style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) + + # gauntlet + if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' + + clean = [] + for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if keyword not in self.allowed_css_keywords and \ + not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') + + return ' '.join(clean) diff --git a/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py b/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py new file mode 100644 index 0000000000..0d12584b45 --- /dev/null +++ b/samples-and-tests/i-am-a-developer/html5lib/filters/whitespace.py @@ -0,0 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals + +import re + +from . import base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) + + +class Filter(base.Filter): + """Collapses whitespace except in pre, textarea, and script elements""" + spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) + + def __iter__(self): + preserve = 0 + for token in base.Filter.__iter__(self): + type = token["type"] + if type == "StartTag" \ + and (preserve or token["name"] in self.spacePreserveElements): + preserve += 1 + + elif type == "EndTag" and preserve: + preserve -= 1 + + elif not preserve and type == "SpaceCharacters" and token["data"]: + # Test on token["data"] above to not introduce spaces where there were not + token["data"] = " " + + elif not preserve and type == "Characters": + token["data"] = collapse_spaces(token["data"]) + + yield token + + +def collapse_spaces(text): + return SPACES_REGEX.sub(' ', text) diff --git a/samples-and-tests/i-am-a-developer/html5lib/html5parser.py b/samples-and-tests/i-am-a-developer/html5lib/html5parser.py new file mode 100644 index 0000000000..74d829d984 --- /dev/null +++ b/samples-and-tests/i-am-a-developer/html5lib/html5parser.py @@ -0,0 +1,2795 @@ +from __future__ import absolute_import, division, unicode_literals +from six import with_metaclass, viewkeys + +import types + +from . import _inputstream +from . import _tokenizer + +from . import treebuilders +from .treebuilders.base import Marker + +from . import _utils +from .constants import ( + spaceCharacters, asciiUpper2Lower, + specialElements, headingElements, cdataElements, rcdataElements, + tokenTypes, tagTokenTypes, + namespaces, + htmlIntegrationPointElements, mathmlTextIntegrationPointElements, + adjustForeignAttributes as adjustForeignAttributesMap, + adjustMathMLAttributes, adjustSVGAttributes, + E, + _ReparseException +) + + +def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML document as a string or file-like object into a tree + + :arg doc: the document to parse as a string or file-like object + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import parse + >>> parse('

This is a doc

') + + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parse(doc, **kwargs) + + +def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML fragment as a string or file-like object into a tree + + :arg doc: the fragment to parse as a string or file-like object + + :arg container: the container context to parse the fragment in + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import parseFragment + >>> parseFragment('this is a fragment') + + + """ + tb = treebuilders.getTreeBuilder(treebuilder) + p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) + return p.parseFragment(doc, container=container, **kwargs) + + +def method_decorator_metaclass(function): + class Decorated(type): + def __new__(meta, classname, bases, classDict): + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): + attribute = function(attribute) + + classDict[attributeName] = attribute + return type.__new__(meta, classname, bases, classDict) + return Decorated + + +class HTMLParser(object): + """HTML parser + + Generates a tree structure from a stream of (possibly malformed) HTML. + + """ + + def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): + """ + :arg tree: a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + :arg strict: raise an exception when a parse error is encountered + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :arg debug: whether or not to enable debug mode which logs things + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() # generates parser with etree builder + >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict + + """ + + # Raise an exception on the first error encountered + self.strict = strict + + if tree is None: + tree = treebuilders.getTreeBuilder("etree") + self.tree = tree(namespaceHTMLElements) + self.errors = [] + + self.phases = {name: cls(self, self.tree) for name, cls in + getPhases(debug).items()} + + def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): + + self.innerHTMLMode = innerHTML + self.container = container + self.scripting = scripting + self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) + self.reset() + + try: + self.mainLoop() + except _ReparseException: + self.reset() + self.mainLoop() + + def reset(self): + self.tree.reset() + self.firstStartTag = False + self.errors = [] + self.log = [] # only used with debug mode + # "quirks" / "limited quirks" / "no quirks" + self.compatMode = "no quirks" + + if self.innerHTMLMode: + self.innerHTML = self.container.lower() + + if self.innerHTML in cdataElements: + self.tokenizer.state = self.tokenizer.rcdataState + elif self.innerHTML in rcdataElements: + self.tokenizer.state = self.tokenizer.rawtextState + elif self.innerHTML == 'plaintext': + self.tokenizer.state = self.tokenizer.plaintextState + else: + # state already is data state + # self.tokenizer.state = self.tokenizer.dataState + pass + self.phase = self.phases["beforeHtml"] + self.phase.insertHtmlElement() + self.resetInsertionMode() + else: + self.innerHTML = False # pylint:disable=redefined-variable-type + self.phase = self.phases["initial"] + + self.lastPhase = None + + self.beforeRCDataPhase = None + + self.framesetOK = True + + @property + def documentEncoding(self): + """Name of the character encoding that was used to decode the input stream, or + :obj:`None` if that is not determined yet + + """ + if not hasattr(self, 'tokenizer'): + return None + return self.tokenizer.stream.charEncoding[0].name + + def isHTMLIntegrationPoint(self, element): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): + return ("encoding" in element.attributes and + element.attributes["encoding"].translate( + asciiUpper2Lower) in + ("text/html", "application/xhtml+xml")) + else: + return (element.namespace, element.name) in htmlIntegrationPointElements + + def isMathMLTextIntegrationPoint(self, element): + return (element.namespace, element.name) in mathmlTextIntegrationPointElements + + def mainLoop(self): + CharactersToken = tokenTypes["Characters"] + SpaceCharactersToken = tokenTypes["SpaceCharacters"] + StartTagToken = tokenTypes["StartTag"] + EndTagToken = tokenTypes["EndTag"] + CommentToken = tokenTypes["Comment"] + DoctypeToken = tokenTypes["Doctype"] + ParseErrorToken = tokenTypes["ParseError"] + + for token in self.tokenizer: + prev_token = None + new_token = token + while new_token is not None: + prev_token = new_token + currentNode = self.tree.openElements[-1] if self.tree.openElements else None + currentNodeNamespace = currentNode.namespace if currentNode else None + currentNodeName = currentNode.name if currentNode else None + + type = new_token["type"] + + if type == ParseErrorToken: + self.parseError(new_token["data"], new_token.get("datavars", {})) + new_token = None + else: + if (len(self.tree.openElements) == 0 or + currentNodeNamespace == self.tree.defaultNamespace or + (self.isMathMLTextIntegrationPoint(currentNode) and + ((type == StartTagToken and + token["name"] not in frozenset(["mglyph", "malignmark"])) or + type in (CharactersToken, SpaceCharactersToken))) or + (currentNodeNamespace == namespaces["mathml"] and + currentNodeName == "annotation-xml" and + type == StartTagToken and + token["name"] == "svg") or + (self.isHTMLIntegrationPoint(currentNode) and + type in (StartTagToken, CharactersToken, SpaceCharactersToken))): + phase = self.phase + else: + phase = self.phases["inForeignContent"] + + if type == CharactersToken: + new_token = phase.processCharacters(new_token) + elif type == SpaceCharactersToken: + new_token = phase.processSpaceCharacters(new_token) + elif type == StartTagToken: + new_token = phase.processStartTag(new_token) + elif type == EndTagToken: + new_token = phase.processEndTag(new_token) + elif type == CommentToken: + new_token = phase.processComment(new_token) + elif type == DoctypeToken: + new_token = phase.processDoctype(new_token) + + if (type == StartTagToken and prev_token["selfClosing"] and + not prev_token["selfClosingAcknowledged"]): + self.parseError("non-void-element-with-trailing-solidus", + {"name": prev_token["name"]}) + + # When the loop finishes it's EOF + reprocess = True + phases = [] + while reprocess: + phases.append(self.phase) + reprocess = self.phase.processEOF() + if reprocess: + assert self.phase not in phases + + def parse(self, stream, *args, **kwargs): + """Parse a HTML document into a well-formed tree + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element). + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parse('

This is a doc

') + + + """ + self._parse(stream, False, None, *args, **kwargs) + return self.tree.getDocument() + + def parseFragment(self, stream, *args, **kwargs): + """Parse a HTML fragment into a well-formed tree fragment + + :arg container: name of the element we're setting the innerHTML + property if set to None, default to 'div' + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + :arg scripting: treat noscript elements as if JavaScript was turned on + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parseFragment('this is a fragment') + + + """ + self._parse(stream, True, *args, **kwargs) + return self.tree.getFragment() + + def parseError(self, errorcode="XXX-undefined-error", datavars=None): + # XXX The idea is to make errorcode mandatory. + if datavars is None: + datavars = {} + self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) + if self.strict: + raise ParseError(E[errorcode] % datavars) + + def adjustMathMLAttributes(self, token): + adjust_attributes(token, adjustMathMLAttributes) + + def adjustSVGAttributes(self, token): + adjust_attributes(token, adjustSVGAttributes) + + def adjustForeignAttributes(self, token): + adjust_attributes(token, adjustForeignAttributesMap) + + def reparseTokenNormal(self, token): + # pylint:disable=unused-argument + self.parser.phase() + + def resetInsertionMode(self): + # The name of this method is mostly historical. (It's also used in the + # specification.) + last = False + newModes = { + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" + } + for node in self.tree.openElements[::-1]: + nodeName = node.name + new_phase = None + if node == self.tree.openElements[0]: + assert self.innerHTML + last = True + nodeName = self.innerHTML + # Check for conditions that should only happen in the innerHTML + # case + if nodeName in ("select", "colgroup", "head", "html"): + assert self.innerHTML + + if not last and node.namespace != self.tree.defaultNamespace: + continue + + if nodeName in newModes: + new_phase = self.phases[newModes[nodeName]] + break + elif last: + new_phase = self.phases["inBody"] + break + + self.phase = new_phase + + def parseRCDataRawtext(self, token, contentType): + # Generic RCDATA/RAWTEXT Parsing algorithm + assert contentType in ("RAWTEXT", "RCDATA") + + self.tree.insertElement(token) + + if contentType == "RAWTEXT": + self.tokenizer.state = self.tokenizer.rawtextState + else: + self.tokenizer.state = self.tokenizer.rcdataState + + self.originalPhase = self.phase + + self.phase = self.phases["text"] + + +@_utils.memoize +def getPhases(debug): + def log(function): + """Logger that records which phase processes each token""" + type_names = {value: key for key, value in tokenTypes.items()} + + def wrapped(self, *args, **kwargs): + if function.__name__.startswith("process") and len(args) > 0: + token = args[0] + info = {"type": type_names[token['type']]} + if token['type'] in tagTokenTypes: + info["name"] = token['name'] + + self.parser.log.append((self.parser.tokenizer.state.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, + info)) + return function(self, *args, **kwargs) + else: + return function(self, *args, **kwargs) + return wrapped + + def getMetaclass(use_metaclass, metaclass_func): + if use_metaclass: + return method_decorator_metaclass(metaclass_func) + else: + return type + + # pylint:disable=unused-argument + class Phase(with_metaclass(getMetaclass(debug, log))): + """Base class for helper object that implements each phase of processing + """ + __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") + + def __init__(self, parser, tree): + self.parser = parser + self.tree = tree + self.__startTagCache = {} + self.__endTagCache = {} + + def processEOF(self): + raise NotImplementedError + + def processComment(self, token): + # For most phases the following is correct. Where it's not it will be + # overridden. + self.tree.insertComment(token, self.tree.openElements[-1]) + + def processDoctype(self, token): + self.parser.parseError("unexpected-doctype") + + def processCharacters(self, token): + self.tree.insertText(token["data"]) + + def processSpaceCharacters(self, token): + self.tree.insertText(token["data"]) + + def processStartTag(self, token): + # Note the caching is done here rather than BoundMethodDispatcher as doing it there + # requires a circular reference to the Phase, and this ends up with a significant + # (CPython 2.7, 3.8) GC cost when parsing many short inputs + name = token["name"] + # In Py2, using `in` is quicker in general than try/except KeyError + # In Py3, `in` is quicker when there are few cache hits (typically short inputs) + if name in self.__startTagCache: + func = self.__startTagCache[name] + else: + func = self.__startTagCache[name] = self.startTagHandler[name] + # bound the cache size in case we get loads of unknown tags + while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: + # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 + self.__startTagCache.pop(next(iter(self.__startTagCache))) + return func(token) + + def startTagHtml(self, token): + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") + # XXX Need a check here to see if the first start tag token emitted is + # this token... If it's not, invoke self.parser.parseError(). + for attr, value in token["data"].items(): + if attr not in self.tree.openElements[0].attributes: + self.tree.openElements[0].attributes[attr] = value + self.parser.firstStartTag = False + + def processEndTag(self, token): + # Note the caching is done here rather than BoundMethodDispatcher as doing it there + # requires a circular reference to the Phase, and this ends up with a significant + # (CPython 2.7, 3.8) GC cost when parsing many short inputs + name = token["name"] + # In Py2, using `in` is quicker in general than try/except KeyError + # In Py3, `in` is quicker when there are few cache hits (typically short inputs) + if name in self.__endTagCache: + func = self.__endTagCache[name] + else: + func = self.__endTagCache[name] = self.endTagHandler[name] + # bound the cache size in case we get loads of unknown tags + while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: + # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 + self.__endTagCache.pop(next(iter(self.__endTagCache))) + return func(token) + + class InitialPhase(Phase): + __slots__ = tuple() + + def processSpaceCharacters(self, token): + pass + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processDoctype(self, token): + name = token["name"] + publicId = token["publicId"] + systemId = token["systemId"] + correct = token["correct"] + + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): + self.parser.parseError("unknown-doctype") + + if publicId is None: + publicId = "" + + self.tree.insertDoctype(token) + + if publicId != "": + publicId = publicId.translate(asciiUpper2Lower) + + if (not correct or token["name"] != "html" or + publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or + publicId in ("-//w3o//dtd w3 html strict 3.0//en//", + "-/w3c/dtd html 4.0 transitional/en", + "html") or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None or + systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + self.parser.compatMode = "quirks" + elif (publicId.startswith( + ("-//w3c//dtd xhtml 1.0 frameset//", + "-//w3c//dtd xhtml 1.0 transitional//")) or + publicId.startswith( + ("-//w3c//dtd html 4.01 frameset//", + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): + self.parser.compatMode = "limited quirks" + + self.parser.phase = self.parser.phases["beforeHtml"] + + def anythingElse(self): + self.parser.compatMode = "quirks" + self.parser.phase = self.parser.phases["beforeHtml"] + + def processCharacters(self, token): + self.parser.parseError("expected-doctype-but-got-chars") + self.anythingElse() + return token + + def processStartTag(self, token): + self.parser.parseError("expected-doctype-but-got-start-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEndTag(self, token): + self.parser.parseError("expected-doctype-but-got-end-tag", + {"name": token["name"]}) + self.anythingElse() + return token + + def processEOF(self): + self.parser.parseError("expected-doctype-but-got-eof") + self.anythingElse() + return True + + class BeforeHtmlPhase(Phase): + __slots__ = tuple() + + # helper methods + def insertHtmlElement(self): + self.tree.insertRoot(impliedTagToken("html", "StartTag")) + self.parser.phase = self.parser.phases["beforeHead"] + + # other + def processEOF(self): + self.insertHtmlElement() + return True + + def processComment(self, token): + self.tree.insertComment(token, self.tree.document) + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.insertHtmlElement() + return token + + def processStartTag(self, token): + if token["name"] == "html": + self.parser.firstStartTag = True + self.insertHtmlElement() + return token + + def processEndTag(self, token): + if token["name"] not in ("head", "body", "html", "br"): + self.parser.parseError("unexpected-end-tag-before-html", + {"name": token["name"]}) + else: + self.insertHtmlElement() + return token + + class BeforeHeadPhase(Phase): + __slots__ = tuple() + + def processEOF(self): + self.startTagHead(impliedTagToken("head", "StartTag")) + return True + + def processSpaceCharacters(self, token): + pass + + def processCharacters(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.tree.insertElement(token) + self.tree.headPointer = self.tree.openElements[-1] + self.parser.phase = self.parser.phases["inHead"] + + def startTagOther(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagImplyHead(self, token): + self.startTagHead(impliedTagToken("head", "StartTag")) + return token + + def endTagOther(self, token): + self.parser.parseError("end-tag-after-implied-root", + {"name": token["name"]}) + + startTagHandler = _utils.MethodDispatcher([ + ("html", startTagHtml), + ("head", startTagHead) + ]) + startTagHandler.default = startTagOther + + endTagHandler = _utils.MethodDispatcher([ + (("head", "body", "html", "br"), endTagImplyHead) + ]) + endTagHandler.default = endTagOther + + class InHeadPhase(Phase): + __slots__ = tuple() + + # the real thing + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagHead(self, token): + self.parser.parseError("two-heads-are-not-better-than-one") + + def startTagBaseLinkCommand(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + def startTagMeta(self, token): + self.tree.insertElement(token) + self.tree.openElements.pop() + token["selfClosingAcknowledged"] = True + + attributes = token["data"] + if self.parser.tokenizer.stream.charEncoding[1] == "tentative": + if "charset" in attributes: + self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): + # Encoding it as UTF-8 here is a hack, as really we should pass + # the abstract Unicode string, and just use the + # ContentAttrParser on that, but using UTF-8 allows all chars + # to be encoded and as a ASCII-superset works. + data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) + parser = _inputstream.ContentAttrParser(data) + codec = parser.parse() + self.parser.tokenizer.stream.changeEncoding(codec) + + def startTagTitle(self, token): + self.parser.parseRCDataRawtext(token, "RCDATA") + + def startTagNoFramesStyle(self, token): + # Need to decide whether to implement the scripting-disabled case + self.parser.parseRCDataRawtext(token, "RAWTEXT") + + def startTagNoscript(self, token): + if self.parser.scripting: + self.parser.parseRCDataRawtext(token, "RAWTEXT") + else: + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inHeadNoscript"] + + def startTagScript(self, token): + self.tree.insertElement(token) + self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState + self.parser.originalPhase = self.parser.phase + self.parser.phase = self.parser.phases["text"] + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHead(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "head", "Expected head got %s" % node.name + self.parser.phase = self.parser.phases["afterHead"] + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.endTagHead(impliedTagToken("head")) + + startTagHandler = _utils.MethodDispatcher([ + ("html", startTagHtml), + ("title", startTagTitle), + (("noframes", "style"), startTagNoFramesStyle), + ("noscript", startTagNoscript), + ("script", startTagScript), + (("base", "basefont", "bgsound", "command", "link"), + startTagBaseLinkCommand), + ("meta", startTagMeta), + ("head", startTagHead) + ]) + startTagHandler.default = startTagOther + + endTagHandler = _utils.MethodDispatcher([ + ("head", endTagHead), + (("br", "html", "body"), endTagHtmlBodyBr) + ]) + endTagHandler.default = endTagOther + + class InHeadNoscriptPhase(Phase): + __slots__ = tuple() + + def processEOF(self): + self.parser.parseError("eof-in-head-noscript") + self.anythingElse() + return True + + def processComment(self, token): + return self.parser.phases["inHead"].processComment(token) + + def processCharacters(self, token): + self.parser.parseError("char-in-head-noscript") + self.anythingElse() + return token + + def processSpaceCharacters(self, token): + return self.parser.phases["inHead"].processSpaceCharacters(token) + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBaseLinkCommand(self, token): + return self.parser.phases["inHead"].processStartTag(token) + + def startTagHeadNoscript(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagNoscript(self, token): + node = self.parser.tree.openElements.pop() + assert node.name == "noscript", "Expected noscript got %s" % node.name + self.parser.phase = self.parser.phases["inHead"] + + def endTagBr(self, token): + self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + # Caller must raise parse error first! + self.endTagNoscript(impliedTagToken("noscript")) + + startTagHandler = _utils.MethodDispatcher([ + ("html", startTagHtml), + (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), + (("head", "noscript"), startTagHeadNoscript), + ]) + startTagHandler.default = startTagOther + + endTagHandler = _utils.MethodDispatcher([ + ("noscript", endTagNoscript), + ("br", endTagBr), + ]) + endTagHandler.default = endTagOther + + class AfterHeadPhase(Phase): + __slots__ = tuple() + + def processEOF(self): + self.anythingElse() + return True + + def processCharacters(self, token): + self.anythingElse() + return token + + def startTagHtml(self, token): + return self.parser.phases["inBody"].processStartTag(token) + + def startTagBody(self, token): + self.parser.framesetOK = False + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inBody"] + + def startTagFrameset(self, token): + self.tree.insertElement(token) + self.parser.phase = self.parser.phases["inFrameset"] + + def startTagFromHead(self, token): + self.parser.parseError("unexpected-start-tag-out-of-my-head", + {"name": token["name"]}) + self.tree.openElements.append(self.tree.headPointer) + self.parser.phases["inHead"].processStartTag(token) + for node in self.tree.openElements[::-1]: + if node.name == "head": + self.tree.openElements.remove(node) + break + + def startTagHead(self, token): + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) + + def startTagOther(self, token): + self.anythingElse() + return token + + def endTagHtmlBodyBr(self, token): + self.anythingElse() + return token + + def endTagOther(self, token): + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) + + def anythingElse(self): + self.tree.insertElement(impliedTagToken("body", "StartTag")) + self.parser.phase = self.parser.phases["inBody"] + self.parser.framesetOK = True + + startTagHandler = _utils.MethodDispatcher([ + ("html", startTagHtml), + ("body", startTagBody), + ("frameset", startTagFrameset), + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + "style", "title"), + startTagFromHead), + ("head", startTagHead) + ]) + startTagHandler.default = startTagOther + endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), + endTagHtmlBodyBr)]) + endTagHandler.default = endTagOther + + class InBodyPhase(Phase): + # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody + # the really-really-really-very crazy mode + __slots__ = ("processSpaceCharacters",) + + def __init__(self, *args, **kwargs): + super(InBodyPhase, self).__init__(*args, **kwargs) + # Set this to the default handler + self.processSpaceCharacters = self.processSpaceCharactersNonPre + + def isMatchingFormattingElement(self, node1, node2): + return (node1.name == node2.name and + node1.namespace == node2.namespace and + node1.attributes == node2.attributes) + + # helper + def addFormattingElement(self, token): + self.tree.insertElement(token) + element = self.tree.openElements[-1] + + matchingElements = [] + for node in self.tree.activeFormattingElements[::-1]: + if node is Marker: + break + elif self.isMatchingFormattingElement(node, element): + matchingElements.append(node) + + assert len(matchingElements) <= 3 + if len(matchingElements) == 3: + self.tree.activeFormattingElements.remove(matchingElements[-1]) + self.tree.activeFormattingElements.append(element) + + # the real deal + def processEOF(self): + allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", + "tfoot", "th", "thead", "tr", "body", + "html")) + for node in self.tree.openElements[::-1]: + if node.name not in allowed_elements: + self.parser.parseError("expected-closing-tag-but-got-eof") + break + # Stop parsing + + def processSpaceCharactersDropNewline(self, token): + # Sometimes (start of
, , and