Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️ Refactor header option parser to use the standard library instead of a custom RegEx #75

Merged
merged 1 commit into from
Feb 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import tempfile
from io import BytesIO
from numbers import Number
from email.message import Message
from typing import Dict, Union, Tuple

# Unique missing object.
_missing = object()
Expand Down Expand Up @@ -76,44 +78,44 @@
QUOTE = b'"'[0]


def parse_options_header(value):
def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]:
"""
Parses a Content-Type header into a value in the following format:
(content_type, {parameters})
"""
# Uses email.message.Message to parse the header as described in PEP 594.
# Ref: https://peps.python.org/pep-0594/#cgi
if not value:
return (b'', {})

# If we are passed a string, we assume that it conforms to WSGI and does
# not contain any code point that's not in latin-1.
if isinstance(value, str): # pragma: no cover
value = value.encode('latin-1')
# If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.
if isinstance(value, bytes): # pragma: no cover
value = value.decode('latin-1')

# For types
assert isinstance(value, str), 'Value should be a string by now'

# If we have no options, return the string as-is.
if b';' not in value:
return (value.lower().strip(), {})
if ";" not in value:
return (value.lower().strip().encode('latin-1'), {})

# Split at the first semicolon, to get our value and then options.
ctype, rest = value.split(b';', 1)
# ctype, rest = value.split(b';', 1)
message = Message()
message['content-type'] = value
params = message.get_params()
# If there were no parameters, this would have already returned above
assert params, 'At least the content type value should be present'
ctype = params.pop(0)[0].encode('latin-1')
options = {}

# Parse the options.
for match in OPTION_RE.finditer(rest):
key = match.group(1).lower()
value = match.group(2)
if value[0] == QUOTE and value[-1] == QUOTE:
# Unquote the value.
value = value[1:-1]
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')

for param in params:
key, value = param
# If the value is a filename, we need to fix a bug on IE6 that sends
# the full file path instead of the filename.
if key == b'filename':
if value[1:3] == b':\\' or value[:2] == b'\\\\':
value = value.split(b'\\')[-1]

options[key] = value

if key == 'filename':
if value[1:3] == ':\\' or value[:2] == '\\\\':
value = value.split('\\')[-1]
options[key.encode('latin-1')] = value.encode('latin-1')
return ctype, options


Expand Down
Loading