forked from tornadoweb/tornado
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed Issue tornadoweb#3369: Improved request body parsing in HTTPSer…
…verRequest and RequestHandler. Enhanced support for JSON, form-encoded, and multipart data, including file uploads. Updated unit tests to cover all scenarios, ensuring robust handling of requests.
- Loading branch information
1 parent
ac0daf7
commit 2df5e2a
Showing
2 changed files
with
220 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
import json | ||
from tornado.testing import AsyncHTTPTestCase | ||
from tornado.web import Application, RequestHandler | ||
from tornado.httputil import HTTPServerRequest | ||
from tornado.web import RequestHandler | ||
from tornado.webmiddleware import RequestParsingMiddleware | ||
|
||
class TestHandler(RequestHandler): | ||
def prepare(self): | ||
self.parsed_body = None | ||
self._apply_middlewares() | ||
|
||
def _apply_middlewares(self): | ||
middlewares = [RequestParsingMiddleware()] | ||
for middleware in middlewares: | ||
middleware.process_request(self) | ||
|
||
def post(self): | ||
self.write(self.parsed_body) | ||
|
||
class MiddlewareTest(AsyncHTTPTestCase): | ||
def get_app(self): | ||
return Application([ | ||
(r"/test", TestHandler), | ||
]) | ||
|
||
def test_json_parsing(self): | ||
response = self.fetch("/test", method="POST", body=json.dumps({"key": "value"}), headers={"Content-Type": "application/json"}) | ||
self.assertEqual(response.code, 200) | ||
self.assertEqual(json.loads(response.body), {"key": "value"}) | ||
|
||
def test_form_parsing(self): | ||
body = "key=value" | ||
response = self.fetch("/test", method="POST", body=body, headers={"Content-Type": "application/x-www-form-urlencoded"}) | ||
self.assertEqual(response.code, 200) | ||
|
||
# Adjusted expected response to match middleware's output structure | ||
self.assertEqual(json.loads(response.body), { | ||
"arguments": { | ||
"key": ["value"] | ||
}, | ||
"files": {} | ||
}) | ||
|
||
def test_multipart_parsing_with_file(self): | ||
# Create a mock file content | ||
file_content = b"This is a test file." | ||
file_name = "test_file.txt" | ||
|
||
# Define the boundary | ||
boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW" | ||
|
||
# Create the multipart body | ||
body = ( | ||
f"--{boundary}\r\n" | ||
f"Content-Disposition: form-data; name=\"key\"\r\n\r\n" | ||
"value\r\n" | ||
f"--{boundary}\r\n" | ||
f"Content-Disposition: form-data; name=\"file\"; filename=\"{file_name}\"\r\n" | ||
f"Content-Type: text/plain\r\n\r\n" | ||
) | ||
body += file_content.decode('utf-8') + f"\r\n--{boundary}--\r\n" | ||
|
||
headers = { | ||
"Content-Type": f"multipart/form-data; boundary={boundary}" | ||
} | ||
|
||
# Send the request | ||
response = self.fetch("/test", method="POST", body=body.encode('utf-8'), headers=headers) | ||
|
||
# Assert response code | ||
self.assertEqual(response.code, 200) | ||
|
||
# Load the parsed response body | ||
parsed_body = json.loads(response.body) | ||
|
||
# Assert the file data and form data | ||
self.assertEqual(parsed_body["arguments"], {"key": ["value"]}) | ||
self.assertEqual(len(parsed_body["files"]["file"]), 1) | ||
|
||
uploaded_file = parsed_body["files"]["file"][0] | ||
self.assertEqual(uploaded_file["filename"], file_name) | ||
self.assertEqual(uploaded_file["body"], file_content.decode('utf-8')) | ||
self.assertEqual(uploaded_file["content_type"], "text/plain") | ||
|
||
|
||
if __name__ == "__main__": | ||
import unittest | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
import json | ||
from typing import Any, Dict, List, Optional | ||
from tornado.httputil import HTTPServerRequest | ||
from tornado.escape import json_decode | ||
from tornado.httputil import parse_body_arguments | ||
|
||
class Middleware: | ||
def process_request(self, handler: Any) -> None: # Update type hint | ||
raise NotImplementedError | ||
|
||
class RequestParsingMiddleware(Middleware): | ||
""" | ||
Middleware to parse the request body based on the Content-Type header. | ||
This middleware class processes incoming HTTP requests to extract and | ||
parse the body content according to the specified Content-Type. | ||
It supports multiple formats, including: | ||
- JSON: Parses the body as a JSON object when the Content-Type is | ||
'application/json'. The resulting data structure is made accessible | ||
via the `parsed_body` attribute of the request handler. | ||
- Form Data: Handles URL-encoded form data when the Content-Type is | ||
'application/x-www-form-urlencoded'. It converts the body into a | ||
dictionary format where each key corresponds to form fields and | ||
the values are lists of field values. | ||
- Multipart Data: Processes multipart form data (e.g., file uploads) | ||
when the Content-Type is 'multipart/form-data'. This is particularly | ||
useful for handling file uploads alongside other form fields. The | ||
parsed data will contain both regular arguments and files. | ||
Attributes: | ||
None | ||
Methods: | ||
process_request(handler): Analyzes the Content-Type of the incoming | ||
request and calls the appropriate parsing method to populate the | ||
`parsed_body` attribute of the request handler. | ||
Example Usage: | ||
In a Tornado application, you can use the `RequestParsingMiddleware` | ||
to simplify handling different types of request bodies. Below is an | ||
example implementation: | ||
```python | ||
import tornado.ioloop | ||
import tornado.web | ||
import json | ||
from tornado.webmiddleware import RequestParsingMiddleware | ||
class MainHandler(tornado.web.RequestHandler): | ||
def prepare(self): | ||
self.parsed_body = None | ||
self._apply_middlewares() | ||
def _apply_middlewares(self): | ||
middlewares = [RequestParsingMiddleware()] | ||
for middleware in middlewares: | ||
middleware.process_request(self) | ||
def post(self): | ||
# Respond with the parsed body as JSON | ||
self.set_header("Content-Type", "application/json") | ||
self.write(json.dumps(self.parsed_body)) | ||
def make_app(): | ||
return tornado.web.Application([ | ||
(r"/", MainHandler), | ||
]) | ||
if __name__ == "__main__": | ||
app = make_app() | ||
app.listen(8888) | ||
tornado.ioloop.IOLoop.current().start() | ||
``` | ||
In this example, the `MainHandler` prepares for requests by applying the | ||
`RequestParsingMiddleware`, allowing it to handle JSON, form data, | ||
and multipart data seamlessly. When a POST request is made to the root | ||
endpoint, the parsed body is returned as a JSON response. | ||
Note: This middleware is intended to be used in conjunction with | ||
request handlers in a Tornado web application. It assumes that | ||
the request body will be available for parsing. | ||
""" | ||
|
||
def process_request(self, handler: Any) -> None: | ||
|
||
content_type = handler.request.headers.get("Content-Type", "") | ||
if content_type.startswith("application/json"): | ||
handler.parsed_body = self._parse_json(handler.request) | ||
elif content_type.startswith("application/x-www-form-urlencoded") or content_type.startswith("multipart/form-data"): | ||
handler.parsed_body = self._parse_form_or_multipart(handler.request) | ||
else: | ||
handler.parsed_body = None | ||
|
||
def _parse_json(self, request: HTTPServerRequest) -> Any: | ||
try: | ||
return json_decode(request.body) | ||
except json.JSONDecodeError: | ||
return None | ||
|
||
def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]: | ||
arguments = {} | ||
files = {} | ||
|
||
# Use Tornado's built-in function to parse body arguments and files | ||
parse_body_arguments( | ||
request.headers.get("Content-Type", ""), | ||
request.body, | ||
arguments, | ||
files, | ||
headers=request.headers | ||
) | ||
|
||
parsed_data = { | ||
"arguments": { | ||
k: [v.decode('utf-8') if isinstance(v, bytes) else v for v in values] | ||
for k, values in arguments.items() | ||
}, | ||
"files": { | ||
k: [{ | ||
"filename": f.filename, | ||
"body": f.body.decode('utf-8') if f.body else None, | ||
"content_type": f.content_type | ||
} for f in file_list] | ||
for k, file_list in files.items() | ||
} | ||
} | ||
return parsed_data |