From bae6ea669f25e48a2ca087c5eabc9b85fbc535e1 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 11 Nov 2019 18:02:45 -0800 Subject: [PATCH 1/3] indexing: restrict POST body appended to query to 16384, avoid reading very large POST requests on indexing --- pywb/warcserver/inputrequest.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index f910d2e3b..97ce778c3 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -10,6 +10,7 @@ import base64 import cgi +import sys #============================================================================= @@ -181,6 +182,8 @@ def _get_header(self, name): # ============================================================================ class MethodQueryCanonicalizer(object): + MAX_POST_SIZE = 16384 + def __init__(self, method, mime, length, stream, buffered_stream=None, environ=None): @@ -210,7 +213,9 @@ def __init__(self, method, mime, length, stream, if length <= 0: return - query = b'' + # max POST query allowed, for size considerations, only read upto this size + length = min(length, self.MAX_POST_SIZE) + query = [] while length > 0: buff = stream.read(length) @@ -219,7 +224,9 @@ def __init__(self, method, mime, length, stream, if not buff: break - query += buff + query.append(buff) + + query = b''.join(query) if buffered_stream: buffered_stream.write(query) @@ -236,7 +243,8 @@ def handle_binary(query): if mime.startswith('application/x-www-form-urlencoded'): try: - query = to_native_str(query.decode('utf-8')) + if PY3: + query = query.decode('utf-8') query = unquote_plus(query) except UnicodeDecodeError: query = handle_binary(query) From 4b8a42420807b513406334088b486f6f381238bb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 11 Nov 2019 21:10:53 -0800 Subject: [PATCH 2/3] fix py27 --- pywb/warcserver/inputrequest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index 97ce778c3..dcd54f6d3 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -243,8 +243,7 @@ def handle_binary(query): if mime.startswith('application/x-www-form-urlencoded'): try: - if PY3: - query = query.decode('utf-8') + query = to_native_str(query.decode('utf-8')) query = unquote_plus(query) except UnicodeDecodeError: query = handle_binary(query) From 8090955bb7a99de1f1cd43b9ec4d390134239a6c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 12 Nov 2019 12:23:08 -0800 Subject: [PATCH 3/3] remove unused import --- pywb/warcserver/inputrequest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pywb/warcserver/inputrequest.py b/pywb/warcserver/inputrequest.py index dcd54f6d3..f616648e9 100644 --- a/pywb/warcserver/inputrequest.py +++ b/pywb/warcserver/inputrequest.py @@ -10,7 +10,6 @@ import base64 import cgi -import sys #=============================================================================