From 4f44c2ec9824e13c35d53039bf54c5198fe7b4d7 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 14 Apr 2022 21:30:52 -0700 Subject: [PATCH] Post query json parse fix (#711) * post append query: fix json parsing of lists to be identical to cdxj-indexer if json parsing errors occur, log to stderr fixes #709 in a better way * update CHANGES.rst --- CHANGES.rst | 1 + pywb/warcserver/inputrequest.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7e6ca6779..70f06f1aa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,6 +3,7 @@ pywb 2.6.7 changelist * dependency: bump gevent to latest (21.12.0) * rewrite: fix eval rewriting where '._eval' was accidentally being rewritten +* post-to-get conversion: properly handle json with top-level lists, to match cdxj-indexer, print parse errors, fixes `#709 `_ pywb 2.6.6 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index c3dae6595..654610f50 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -11,6 +11,7 @@ import base64 import cgi import json +import sys #============================================================================= @@ -277,6 +278,7 @@ def handle_binary(query): try: query = self.json_parse(query) except Exception as e: + sys.stderr.write("Ignoring query, error parsing as json: " + query.decode("utf-8") + "\n") query = '' elif mime.startswith('text/plain'): @@ -316,12 +318,17 @@ def get_key(n): dupes[n] += 1 return n + "." + str(dupes[n]) + "_"; - def _parser(dict_var): - for n, v in dict_var.items(): - if isinstance(v, dict): - _parser(v) - else: - data[get_key(n)] = str(v) + def _parser(json_obj, name=""): + if isinstance(json_obj, dict): + for n, v in json_obj.items(): + _parser(v, n) + + elif isinstance(json_obj, list): + for v in json_obj: + _parser(v, name) + + elif name: + data[get_key(name)] = str(json_obj) _parser(json.loads(string)) return urlencode(data)