Skip to content

Commit cc61050

Browse files
committed
httputil: Raise errors instead of logging in multipart/form-data parsing
We used to continue after logging an error, which allowed repeated errors to spam the logs. The error raised here will still be logged, but only once per request, consistent with other error handling in Tornado.
1 parent ae4a4e4 commit cc61050

File tree

4 files changed

+34
-30
lines changed

4 files changed

+34
-30
lines changed

tornado/httputil.py

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
3535

3636
from tornado.escape import native_str, parse_qs_bytes, utf8, to_unicode
37-
from tornado.log import gen_log
3837
from tornado.util import ObjectDict, unicode_type
3938

4039

@@ -884,25 +883,22 @@ def parse_body_arguments(
884883
"""
885884
if content_type.startswith("application/x-www-form-urlencoded"):
886885
if headers and "Content-Encoding" in headers:
887-
gen_log.warning(
888-
"Unsupported Content-Encoding: %s", headers["Content-Encoding"]
886+
raise HTTPInputError(
887+
"Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
889888
)
890-
return
891889
try:
892890
# real charset decoding will happen in RequestHandler.decode_argument()
893891
uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
894892
except Exception as e:
895-
gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
896-
uri_arguments = {}
893+
raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
897894
for name, values in uri_arguments.items():
898895
if values:
899896
arguments.setdefault(name, []).extend(values)
900897
elif content_type.startswith("multipart/form-data"):
901898
if headers and "Content-Encoding" in headers:
902-
gen_log.warning(
903-
"Unsupported Content-Encoding: %s", headers["Content-Encoding"]
899+
raise HTTPInputError(
900+
"Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
904901
)
905-
return
906902
try:
907903
fields = content_type.split(";")
908904
for field in fields:
@@ -911,9 +907,9 @@ def parse_body_arguments(
911907
parse_multipart_form_data(utf8(v), body, arguments, files)
912908
break
913909
else:
914-
raise ValueError("multipart boundary not found")
910+
raise HTTPInputError("multipart boundary not found")
915911
except Exception as e:
916-
gen_log.warning("Invalid multipart/form-data: %s", e)
912+
raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
917913

918914

919915
def parse_multipart_form_data(
@@ -942,26 +938,22 @@ def parse_multipart_form_data(
942938
boundary = boundary[1:-1]
943939
final_boundary_index = data.rfind(b"--" + boundary + b"--")
944940
if final_boundary_index == -1:
945-
gen_log.warning("Invalid multipart/form-data: no final boundary")
946-
return
941+
raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
947942
parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
948943
for part in parts:
949944
if not part:
950945
continue
951946
eoh = part.find(b"\r\n\r\n")
952947
if eoh == -1:
953-
gen_log.warning("multipart/form-data missing headers")
954-
continue
948+
raise HTTPInputError("multipart/form-data missing headers")
955949
headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
956950
disp_header = headers.get("Content-Disposition", "")
957951
disposition, disp_params = _parse_header(disp_header)
958952
if disposition != "form-data" or not part.endswith(b"\r\n"):
959-
gen_log.warning("Invalid multipart/form-data")
960-
continue
953+
raise HTTPInputError("Invalid multipart/form-data")
961954
value = part[eoh + 4 : -2]
962955
if not disp_params.get("name"):
963-
gen_log.warning("multipart/form-data value missing name")
964-
continue
956+
raise HTTPInputError("multipart/form-data missing name")
965957
name = disp_params["name"]
966958
if disp_params.get("filename"):
967959
ctype = headers.get("Content-Type", "application/unknown")

tornado/test/httpserver_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1148,9 +1148,9 @@ def test_gzip_unsupported(self):
11481148
# Gzip support is opt-in; without it the server fails to parse
11491149
# the body (but parsing form bodies is currently just a log message,
11501150
# not a fatal error).
1151-
with ExpectLog(gen_log, "Unsupported Content-Encoding"):
1151+
with ExpectLog(gen_log, ".*Unsupported Content-Encoding"):
11521152
response = self.post_gzip("foo=bar")
1153-
self.assertEqual(json_decode(response.body), {})
1153+
self.assertEqual(response.code, 400)
11541154

11551155

11561156
class StreamingChunkSizeTest(AsyncHTTPTestCase):

tornado/test/httputil_test.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
)
1313
from tornado.escape import utf8, native_str
1414
from tornado.log import gen_log
15-
from tornado.testing import ExpectLog
1615
from tornado.test.util import ignore_deprecation
1716

1817
import copy
@@ -195,7 +194,9 @@ def test_missing_headers(self):
195194
b"\n", b"\r\n"
196195
)
197196
args, files = form_data_args()
198-
with ExpectLog(gen_log, "multipart/form-data missing headers"):
197+
with self.assertRaises(
198+
HTTPInputError, msg="multipart/form-data missing headers"
199+
):
199200
parse_multipart_form_data(b"1234", data, args, files)
200201
self.assertEqual(files, {})
201202

@@ -209,7 +210,7 @@ def test_invalid_content_disposition(self):
209210
b"\n", b"\r\n"
210211
)
211212
args, files = form_data_args()
212-
with ExpectLog(gen_log, "Invalid multipart/form-data"):
213+
with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
213214
parse_multipart_form_data(b"1234", data, args, files)
214215
self.assertEqual(files, {})
215216

@@ -222,7 +223,7 @@ def test_line_does_not_end_with_correct_line_break(self):
222223
b"\n", b"\r\n"
223224
)
224225
args, files = form_data_args()
225-
with ExpectLog(gen_log, "Invalid multipart/form-data"):
226+
with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
226227
parse_multipart_form_data(b"1234", data, args, files)
227228
self.assertEqual(files, {})
228229

@@ -236,7 +237,9 @@ def test_content_disposition_header_without_name_parameter(self):
236237
b"\n", b"\r\n"
237238
)
238239
args, files = form_data_args()
239-
with ExpectLog(gen_log, "multipart/form-data value missing name"):
240+
with self.assertRaises(
241+
HTTPInputError, msg="multipart/form-data value missing name"
242+
):
240243
parse_multipart_form_data(b"1234", data, args, files)
241244
self.assertEqual(files, {})
242245

tornado/web.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,14 @@ async def _execute(
18011801
try:
18021802
if self.request.method not in self.SUPPORTED_METHODS:
18031803
raise HTTPError(405)
1804+
1805+
# If we're not in stream_request_body mode, this is the place where we parse the body.
1806+
if not _has_stream_request_body(self.__class__):
1807+
try:
1808+
self.request._parse_body()
1809+
except httputil.HTTPInputError as e:
1810+
raise HTTPError(400, "Invalid body: %s" % e) from e
1811+
18041812
self.path_args = [self.decode_argument(arg) for arg in args]
18051813
self.path_kwargs = {
18061814
k: self.decode_argument(v, name=k) for (k, v) in kwargs.items()
@@ -1992,7 +2000,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool:
19922000

19932001

19942002
def removeslash(
1995-
method: Callable[..., Optional[Awaitable[None]]]
2003+
method: Callable[..., Optional[Awaitable[None]]],
19962004
) -> Callable[..., Optional[Awaitable[None]]]:
19972005
"""Use this decorator to remove trailing slashes from the request path.
19982006
@@ -2021,7 +2029,7 @@ def wrapper( # type: ignore
20212029

20222030

20232031
def addslash(
2024-
method: Callable[..., Optional[Awaitable[None]]]
2032+
method: Callable[..., Optional[Awaitable[None]]],
20252033
) -> Callable[..., Optional[Awaitable[None]]]:
20262034
"""Use this decorator to add a missing trailing slash to the request path.
20272035
@@ -2445,8 +2453,9 @@ def finish(self) -> None:
24452453
if self.stream_request_body:
24462454
future_set_result_unless_cancelled(self.request._body_future, None)
24472455
else:
2456+
# Note that the body gets parsed in RequestHandler._execute so it can be in
2457+
# the right exception handler scope.
24482458
self.request.body = b"".join(self.chunks)
2449-
self.request._parse_body()
24502459
self.execute()
24512460

24522461
def on_connection_close(self) -> None:
@@ -3332,7 +3341,7 @@ def transform_chunk(self, chunk: bytes, finishing: bool) -> bytes:
33323341

33333342

33343343
def authenticated(
3335-
method: Callable[..., Optional[Awaitable[None]]]
3344+
method: Callable[..., Optional[Awaitable[None]]],
33363345
) -> Callable[..., Optional[Awaitable[None]]]:
33373346
"""Decorate methods with this to require that the user be logged in.
33383347

0 commit comments

Comments
 (0)