Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RequestParsingMiddleware for Simplified Request Body Parsing (Fixes #3369) #3426

82 changes: 82 additions & 0 deletions tornado/test/test_webmiddleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import json
from tornado.testing import AsyncHTTPTestCase
from tornado.web import Application, RequestHandler
from tornado.httputil import HTTPServerRequest
from tornado.web import RequestHandler
from tornado.webmiddleware import RequestParsingMiddleware

class TestHandler(RequestHandler):
def prepare(self):
self.parsed_body = None
self._apply_middlewares()

def _apply_middlewares(self):
middlewares = [RequestParsingMiddleware()]
for middleware in middlewares:
middleware.process_request(self)

def post(self):
self.write(self.parsed_body)

class MiddlewareTest(AsyncHTTPTestCase):
def get_app(self):
return Application([
(r"/test", TestHandler),
])

def test_json_parsing(self):
response = self.fetch("/test", method="POST", body=json.dumps({"key": "value"}), headers={"Content-Type": "application/json"})
self.assertEqual(response.code, 200)
self.assertEqual(json.loads(response.body), {"key": "value"})

def test_form_parsing(self):
body = "key=value"
response = self.fetch("/test", method="POST", body=body, headers={"Content-Type": "application/x-www-form-urlencoded"})
self.assertEqual(response.code, 200)
self.assertEqual(json.loads(response.body), {"key": ["value"]})

def test_multipart_parsing_with_file(self):
# Create a mock file content
file_content = b"This is a test file."
file_name = "test_file.txt"

# Define the boundary
boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW"

# Create the multipart body
body = (
f"--{boundary}\r\n"
f"Content-Disposition: form-data; name=\"key\"\r\n\r\n"
"value\r\n"
f"--{boundary}\r\n"
f"Content-Disposition: form-data; name=\"file\"; filename=\"{file_name}\"\r\n"
f"Content-Type: text/plain\r\n\r\n"
)
body += file_content.decode('utf-8') + f"\r\n--{boundary}--\r\n"

headers = {
"Content-Type": f"multipart/form-data; boundary={boundary}"
}

# Send the request
response = self.fetch("/test", method="POST", body=body.encode('utf-8'), headers=headers)

# Assert response code
self.assertEqual(response.code, 200)

# Load the parsed response body
parsed_body = json.loads(response.body)

# Assert the file data and form data
self.assertEqual(parsed_body["arguments"], {"key": ["value"]})
self.assertEqual(len(parsed_body["files"]["file"]), 1)

uploaded_file = parsed_body["files"]["file"][0]
self.assertEqual(uploaded_file["filename"], file_name)
self.assertEqual(uploaded_file["body"], file_content.decode('utf-8'))
self.assertEqual(uploaded_file["content_type"], "text/plain")


if __name__ == "__main__":
import unittest
unittest.main()
10 changes: 8 additions & 2 deletions tornado/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ async def main():
import types
import urllib.parse
from urllib.parse import urlencode

from tornado.webmiddleware import RequestParsingMiddleware
from tornado.concurrent import Future, future_set_result_unless_cancelled
from tornado import escape
from tornado import gen
Expand Down Expand Up @@ -294,7 +294,13 @@ def prepare(self) -> Optional[Awaitable[None]]:
.. versionadded:: 3.1
Asynchronous support.
"""
pass
self.parsed_body = None
self._apply_middlewares()

def _apply_middlewares(self):
middlewares = [RequestParsingMiddleware()]
for middleware in middlewares:
middleware.process_request(self)

def on_finish(self) -> None:
"""Called after the end of a request.
Expand Down
130 changes: 130 additions & 0 deletions tornado/webmiddleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import json
from typing import Any, Dict, List, Optional
from tornado.httputil import HTTPServerRequest
from tornado.escape import json_decode

class Middleware:
def process_request(self, handler: Any) -> None: # Update type hint
raise NotImplementedError

class RequestParsingMiddleware(Middleware):
"""
Middleware to parse the request body based on the Content-Type header.

This middleware class processes incoming HTTP requests to extract and
parse the body content according to the specified Content-Type.
It supports multiple formats, including:

- JSON: Parses the body as a JSON object when the Content-Type is
'application/json'. The resulting data structure is made accessible
via the `parsed_body` attribute of the request handler.

- Form Data: Handles URL-encoded form data when the Content-Type is
'application/x-www-form-urlencoded'. It converts the body into a
dictionary format where each key corresponds to form fields and
the values are lists of field values.

- Multipart Data: Processes multipart form data (e.g., file uploads)
when the Content-Type is 'multipart/form-data'. This is particularly
useful for handling file uploads alongside other form fields. The
parsed data will contain both regular arguments and files.

Attributes:
None

Methods:
process_request(handler): Analyzes the Content-Type of the incoming
request and calls the appropriate parsing method to populate the
`parsed_body` attribute of the request handler.

Example Usage:
In a Tornado application, you can use the `RequestParsingMiddleware`
to simplify handling different types of request bodies. Below is an
example implementation:

```python
import tornado.ioloop
import tornado.web
import json
from tornado.webmiddleware import RequestParsingMiddleware

class MainHandler(tornado.web.RequestHandler):
def prepare(self):
self.parsed_body = None
self._apply_middlewares()

def _apply_middlewares(self):
middlewares = [RequestParsingMiddleware()]
for middleware in middlewares:
middleware.process_request(self)

def post(self):
# Respond with the parsed body as JSON
self.set_header("Content-Type", "application/json")
self.write(json.dumps(self.parsed_body))

def make_app():
return tornado.web.Application([
(r"/", MainHandler),
])

if __name__ == "__main__":
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
```

In this example, the `MainHandler` prepares for requests by applying the
`RequestParsingMiddleware`, allowing it to handle JSON, form data,
and multipart data seamlessly. When a POST request is made to the root
endpoint, the parsed body is returned as a JSON response.

Note: This middleware is intended to be used in conjunction with
request handlers in a Tornado web application. It assumes that
the request body will be available for parsing.
"""

def process_request(self, handler: Any) -> None:

content_type = handler.request.headers.get("Content-Type", "")
if content_type.startswith("application/json"):
handler.parsed_body = self._parse_json(handler.request)
elif content_type.startswith("application/x-www-form-urlencoded"):
handler.parsed_body = self._parse_form(handler.request)
elif content_type.startswith("multipart/form-data"):
handler.parsed_body = self._parse_multipart(handler.request)
else:
handler.parsed_body = None

def _parse_json(self, request: HTTPServerRequest) -> Any:
try:
return json_decode(request.body)
except json.JSONDecodeError:
return None

def _parse_form(self, request: HTTPServerRequest) -> Dict[str, List[str]]:
# Parse form-encoded data correctly
return {
k: [v.decode('utf-8') if isinstance(v, bytes) else v for v in request.arguments[k]]
for k in request.arguments
}

def _parse_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]:

parsed_data = {
"arguments": {
k: [v.decode('utf-8') if isinstance(v, bytes) else v for v in request.body_arguments[k]]
for k in request.body_arguments
},
"files": {
k: [{
"filename": f.filename,
"body": f.body.decode('utf-8') if f.body else None,
"content_type": f.content_type
} for f in request.files[k]]
for k in request.files
}
}
return parsed_data