diff --git a/gunicorn/util.py b/gunicorn/util.py index ecd817472..b90e7fb18 100644 --- a/gunicorn/util.py +++ b/gunicorn/util.py @@ -31,6 +31,7 @@ import urllib.parse REDIRECT_TO = getattr(os, 'devnull', '/dev/null') +REASON_PHRASE_RE = re.compile(rb'[ \t\x21-\x7e\x80-\xff]*') # Server and Date aren't technically hop-by-hop # headers, but they are in the purview of the @@ -311,6 +312,16 @@ def write_nonblock(sock, data, chunked=False): def write_error(sock, status_int, reason, mesg): + # we may reflect user input in mesg + # .. as long as it is escaped appropriately for indicated Content-Type + # we should send our own reason text + # .. we shall never send misleading or invalid HTTP status lines + if not REASON_PHRASE_RE.fullmatch(reason.encode("latin-1")): + raise AssertionError("Attempted to return malformed error reason: %r" % (reason, )) + # we should avoid chosing status codes that are already in use + # indicating special handling in our proxies + if not (100 <= status_int <= 599): # RFC9110 15 + raise AssertionError("Attempted to return invalid error status code: %r" % (status_int, )) html_error = textwrap.dedent("""\ diff --git a/tests/test_http.py b/tests/test_http.py index 3aa4808f9..2741b49d8 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -104,6 +104,23 @@ def test_http_header_encoding(): with pytest.raises(UnicodeEncodeError): mocked_socket.sendall(util.to_bytestring(header_str, "ascii")) +def test_http_reflected_xss_in_error(): + """ If we put arbitrary user input into the HTTP status line, our proxy could get confused """ + + mocked_socket = mock.MagicMock() + with pytest.raises(UnicodeEncodeError): + util.write_error( + mocked_socket, 501, + "Not latin-1: \N{egg}", + "unused_", + ) + + with pytest.raises(AssertionError): + util.write_error( + mocked_socket, 501, + "Extra newline shall not appear in HTTP Status line: \n", + "harmless, will appear properly quoted in html", + ) def test_http_invalid_response_header(): """ tests whether http response headers are contains control chars """