Skip to content

Commit

Permalink
Fixed Issue tornadoweb#3369: Improved request body parsing in HTTPSer…
Browse files Browse the repository at this point in the history
…verRequest and RequestHandler. Enhanced support for JSON, form-encoded, and multipart data, including file uploads. Updated unit tests to cover all scenarios, ensuring robust handling of requests.
  • Loading branch information
Nirab123456 committed Oct 18, 2024
1 parent ac0daf7 commit 2df5e2a
Show file tree
Hide file tree
Showing 2 changed files with 220 additions and 0 deletions.
89 changes: 89 additions & 0 deletions tornado/test/test_webmiddleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import json
from tornado.testing import AsyncHTTPTestCase
from tornado.web import Application, RequestHandler
from tornado.httputil import HTTPServerRequest
from tornado.web import RequestHandler
from tornado.webmiddleware import RequestParsingMiddleware

class TestHandler(RequestHandler):
def prepare(self):
self.parsed_body = None
self._apply_middlewares()

def _apply_middlewares(self):
middlewares = [RequestParsingMiddleware()]
for middleware in middlewares:
middleware.process_request(self)

def post(self):
self.write(self.parsed_body)

class MiddlewareTest(AsyncHTTPTestCase):
def get_app(self):
return Application([
(r"/test", TestHandler),
])

def test_json_parsing(self):
response = self.fetch("/test", method="POST", body=json.dumps({"key": "value"}), headers={"Content-Type": "application/json"})
self.assertEqual(response.code, 200)
self.assertEqual(json.loads(response.body), {"key": "value"})

def test_form_parsing(self):
body = "key=value"
response = self.fetch("/test", method="POST", body=body, headers={"Content-Type": "application/x-www-form-urlencoded"})
self.assertEqual(response.code, 200)

# Adjusted expected response to match middleware's output structure
self.assertEqual(json.loads(response.body), {
"arguments": {
"key": ["value"]
},
"files": {}
})

def test_multipart_parsing_with_file(self):
# Create a mock file content
file_content = b"This is a test file."
file_name = "test_file.txt"

# Define the boundary
boundary = "----WebKitFormBoundary7MA4YWxkTrZu0gW"

# Create the multipart body
body = (
f"--{boundary}\r\n"
f"Content-Disposition: form-data; name=\"key\"\r\n\r\n"
"value\r\n"
f"--{boundary}\r\n"
f"Content-Disposition: form-data; name=\"file\"; filename=\"{file_name}\"\r\n"
f"Content-Type: text/plain\r\n\r\n"
)
body += file_content.decode('utf-8') + f"\r\n--{boundary}--\r\n"

headers = {
"Content-Type": f"multipart/form-data; boundary={boundary}"
}

# Send the request
response = self.fetch("/test", method="POST", body=body.encode('utf-8'), headers=headers)

# Assert response code
self.assertEqual(response.code, 200)

# Load the parsed response body
parsed_body = json.loads(response.body)

# Assert the file data and form data
self.assertEqual(parsed_body["arguments"], {"key": ["value"]})
self.assertEqual(len(parsed_body["files"]["file"]), 1)

uploaded_file = parsed_body["files"]["file"][0]
self.assertEqual(uploaded_file["filename"], file_name)
self.assertEqual(uploaded_file["body"], file_content.decode('utf-8'))
self.assertEqual(uploaded_file["content_type"], "text/plain")


if __name__ == "__main__":
import unittest
unittest.main()
131 changes: 131 additions & 0 deletions tornado/webmiddleware.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import json
from typing import Any, Dict, List, Optional
from tornado.httputil import HTTPServerRequest
from tornado.escape import json_decode
from tornado.httputil import parse_body_arguments

class Middleware:
def process_request(self, handler: Any) -> None: # Update type hint
raise NotImplementedError

class RequestParsingMiddleware(Middleware):
"""
Middleware to parse the request body based on the Content-Type header.
This middleware class processes incoming HTTP requests to extract and
parse the body content according to the specified Content-Type.
It supports multiple formats, including:
- JSON: Parses the body as a JSON object when the Content-Type is
'application/json'. The resulting data structure is made accessible
via the `parsed_body` attribute of the request handler.
- Form Data: Handles URL-encoded form data when the Content-Type is
'application/x-www-form-urlencoded'. It converts the body into a
dictionary format where each key corresponds to form fields and
the values are lists of field values.
- Multipart Data: Processes multipart form data (e.g., file uploads)
when the Content-Type is 'multipart/form-data'. This is particularly
useful for handling file uploads alongside other form fields. The
parsed data will contain both regular arguments and files.
Attributes:
None
Methods:
process_request(handler): Analyzes the Content-Type of the incoming
request and calls the appropriate parsing method to populate the
`parsed_body` attribute of the request handler.
Example Usage:
In a Tornado application, you can use the `RequestParsingMiddleware`
to simplify handling different types of request bodies. Below is an
example implementation:
```python
import tornado.ioloop
import tornado.web
import json
from tornado.webmiddleware import RequestParsingMiddleware
class MainHandler(tornado.web.RequestHandler):
def prepare(self):
self.parsed_body = None
self._apply_middlewares()
def _apply_middlewares(self):
middlewares = [RequestParsingMiddleware()]
for middleware in middlewares:
middleware.process_request(self)
def post(self):
# Respond with the parsed body as JSON
self.set_header("Content-Type", "application/json")
self.write(json.dumps(self.parsed_body))
def make_app():
return tornado.web.Application([
(r"/", MainHandler),
])
if __name__ == "__main__":
app = make_app()
app.listen(8888)
tornado.ioloop.IOLoop.current().start()
```
In this example, the `MainHandler` prepares for requests by applying the
`RequestParsingMiddleware`, allowing it to handle JSON, form data,
and multipart data seamlessly. When a POST request is made to the root
endpoint, the parsed body is returned as a JSON response.
Note: This middleware is intended to be used in conjunction with
request handlers in a Tornado web application. It assumes that
the request body will be available for parsing.
"""

def process_request(self, handler: Any) -> None:

content_type = handler.request.headers.get("Content-Type", "")
if content_type.startswith("application/json"):
handler.parsed_body = self._parse_json(handler.request)
elif content_type.startswith("application/x-www-form-urlencoded") or content_type.startswith("multipart/form-data"):
handler.parsed_body = self._parse_form_or_multipart(handler.request)
else:
handler.parsed_body = None

def _parse_json(self, request: HTTPServerRequest) -> Any:
try:
return json_decode(request.body)
except json.JSONDecodeError:
return None

def _parse_form_or_multipart(self, request: HTTPServerRequest) -> Dict[str, Any]:
arguments = {}
files = {}

# Use Tornado's built-in function to parse body arguments and files
parse_body_arguments(
request.headers.get("Content-Type", ""),
request.body,
arguments,
files,
headers=request.headers
)

parsed_data = {
"arguments": {
k: [v.decode('utf-8') if isinstance(v, bytes) else v for v in values]
for k, values in arguments.items()
},
"files": {
k: [{
"filename": f.filename,
"body": f.body.decode('utf-8') if f.body else None,
"content_type": f.content_type
} for f in file_list]
for k, file_list in files.items()
}
}
return parsed_data

0 comments on commit 2df5e2a

Please sign in to comment.