diff --git a/src/brpc/details/http_message.cpp b/src/brpc/details/http_message.cpp index 2846b37c00..1dbef80147 100644 --- a/src/brpc/details/http_message.cpp +++ b/src/brpc/details/http_message.cpp @@ -33,6 +33,8 @@ namespace brpc { +DEFINE_bool(allow_chunked_length, false, + "Allow both Transfer-Encoding and Content-Length headers are present."); DEFINE_bool(http_verbose, false, "[DEBUG] Print EVERY http request/response"); DEFINE_int32(http_verbose_max_body_length, 512, @@ -172,6 +174,28 @@ int HttpMessage::on_headers_complete(http_parser *parser) { } } + // https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.3 + // If a message is received with both a Transfer-Encoding and a + // Content-Length header field, the Transfer-Encoding overrides the + // Content-Length. Such a message might indicate an attempt to + // perform request smuggling (Section 9.5) or response splitting + // (Section 9.4) and ought to be handled as an error. A sender MUST + // remove the received Content-Length field prior to forwarding such + // a message. + + // Reject message if both Transfer-Encoding and Content-Length headers + // are present or if allowed by gflag and 'Transfer-Encoding' + // is chunked - remove Content-Length and serve request. + if (parser->uses_transfer_encoding && parser->flags & F_CONTENTLENGTH) { + if (parser->flags & F_CHUNKED && FLAGS_allow_chunked_length) { + http_message->header().RemoveHeader("Content-Length"); + } else { + LOG(ERROR) << "HTTP/1.1 protocol error: both Content-Length " + << "and Transfer-Encoding are set."; + return -1; + } + } + // If server receives a response to a HEAD request, returns 1 and then // the parser will interpret that as saying that this message has no body. if (parser->type == HTTP_RESPONSE && @@ -401,6 +425,7 @@ HttpMessage::HttpMessage(bool read_body_progressively, , _cur_value(NULL) , _vbodylen(0) { http_parser_init(&_parser, HTTP_BOTH); + _parser.allow_chunked_length = 1; _parser.data = this; } @@ -489,6 +514,9 @@ static void DescribeHttpParserFlags(std::ostream& os, unsigned int flags) { if (flags & F_SKIPBODY) { os << "F_SKIPBODY|"; } + if (flags & F_CONTENTLENGTH) { + os << "F_CONTENTLENGTH|"; + } } std::ostream& operator<<(std::ostream& os, const http_parser& parser) { @@ -548,7 +576,13 @@ void MakeRawHttpRequest(butil::IOBuf* request, << h->minor_version() << BRPC_CRLF; // Never use "Content-Length" set by user. h->RemoveHeader("Content-Length"); - if (h->method() != HTTP_METHOD_GET) { + const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding"); + if (h->method() == HTTP_METHOD_GET) { + h->RemoveHeader("Transfer-Encoding"); + } else if (!transfer_encoding) { + // https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2 + // A sender MUST NOT send a Content-Length header field in any message + // that contains a Transfer-Encoding header field. os << "Content-Length: " << (content ? content->length() : 0) << BRPC_CRLF; } @@ -638,25 +672,36 @@ void MakeRawHttpResponse(butil::IOBuf* response, // A server MUST NOT send a Content-Length header field in any response // with a status code of 1xx (Informational) or 204 (No Content). h->RemoveHeader("Content-Length"); - } else if (content) { - const std::string* content_length = h->GetHeader("Content-Length"); - if (is_head_req) { - // Prioritize "Content-Length" set by user. - // If "Content-Length" is not set, set it to the length of content. - if (!content_length) { - os << "Content-Length: " << content->length() << BRPC_CRLF; - } - } else { - if (content_length) { - h->RemoveHeader("Content-Length"); + } else { + const std::string* transfer_encoding = h->GetHeader("Transfer-Encoding"); + // https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.2 + // A sender MUST NOT send a Content-Length header field in any message + // that contains a Transfer-Encoding header field. + if (transfer_encoding) { + h->RemoveHeader("Content-Length"); + } + if (content) { + const std::string* content_length = h->GetHeader("Content-Length"); + if (is_head_req) { + if (!content_length && !transfer_encoding) { + // Prioritize "Content-Length" set by user. + // If "Content-Length" is not set, set it to the length of content. + os << "Content-Length: " << content->length() << BRPC_CRLF; + } + } else { + if (!transfer_encoding) { + if (content_length) { + h->RemoveHeader("Content-Length"); + } + // Never use "Content-Length" set by user. + // Always set Content-Length since lighttpd requires the header to be + // set to 0 for empty content. + os << "Content-Length: " << content->length() << BRPC_CRLF; + } } - // Never use "Content-Length" set by user. - // Always set Content-Length since lighttpd requires the header to be - // set to 0 for empty content. - os << "Content-Length: " << content->length() << BRPC_CRLF; } } - if (!h->content_type().empty()) { + if (!is_invalid_content && !h->content_type().empty()) { os << "Content-Type: " << h->content_type() << BRPC_CRLF; } diff --git a/src/brpc/details/http_parser.cpp b/src/brpc/details/http_parser.cpp index 29e88fc28f..0f9e906a1a 100644 --- a/src/brpc/details/http_parser.cpp +++ b/src/brpc/details/http_parser.cpp @@ -410,7 +410,10 @@ enum header_states , h_transfer_encoding , h_upgrade + , h_matching_transfer_encoding_token_start , h_matching_transfer_encoding_chunked + , h_matching_transfer_encoding_token + , h_matching_connection_keep_alive , h_matching_connection_close @@ -499,6 +502,12 @@ bool is_url_char(char c) { return IS_URL_CHAR(c); } #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) +/** + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + **/ +#define IS_HEADER_CHAR(ch) \ + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) #if BRPC_HTTP_PARSER_STRICT # define STRICT_CHECK(cond) \ @@ -691,6 +700,8 @@ size_t http_parser_execute (http_parser *parser, const char *url_mark = 0; const char *body_mark = 0; const char *status_mark = 0; + const unsigned int lenient = parser->lenient_http_headers; + const unsigned int allow_chunked_length = parser->allow_chunked_length; /* We're in an error state. Don't bother doing anything. */ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { @@ -782,6 +793,7 @@ size_t http_parser_execute (http_parser *parser, if (ch == CR || ch == LF) break; parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; if (ch == 'H') { @@ -817,6 +829,7 @@ size_t http_parser_execute (http_parser *parser, case s_start_res: { parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; switch (ch) { @@ -1015,6 +1028,7 @@ size_t http_parser_execute (http_parser *parser, if (ch == CR || ch == LF) break; parser->flags = 0; + parser->uses_transfer_encoding = 0; parser->content_length = ULLONG_MAX; if (!IS_ALPHA(ch)) { @@ -1470,6 +1484,7 @@ size_t http_parser_execute (http_parser *parser, parser->header_state = h_general; } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { parser->header_state = h_transfer_encoding; + parser->uses_transfer_encoding = 1; } break; @@ -1544,16 +1559,26 @@ size_t http_parser_execute (http_parser *parser, if ('c' == c) { parser->header_state = h_matching_transfer_encoding_chunked; } else { - parser->header_state = h_general; + parser->header_state = h_matching_transfer_encoding_token; } break; + /* Multi-value `Transfer-Encoding` header */ + case h_matching_transfer_encoding_token_start: + break; + case h_content_length: if (!IS_NUM(ch)) { SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); goto error; } + if (parser->flags & F_CONTENTLENGTH) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + parser->flags |= F_CONTENTLENGTH; parser->content_length = ch - '0'; break; @@ -1591,6 +1616,11 @@ size_t http_parser_execute (http_parser *parser, goto reexecute_byte; } + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + c = LOWER(ch); switch (parser->header_state) { @@ -1627,17 +1657,47 @@ size_t http_parser_execute (http_parser *parser, break; } + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_token_start: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else if (TOKEN(c)) { + /* NOTE(gejun): Not use strict mode for these macros since the additional + * characeters seem to be OK. + */ + + /* TODO(indutny): similar code below does this, but why? + * At the very least it seems to be inconsistent given that + * h_matching_transfer_encoding_token does not check for + * `STRICT_TOKEN` + */ + parser->header_state = h_matching_transfer_encoding_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + parser->header_state = h_general; + } + break; + /* Transfer-Encoding: chunked */ case h_matching_transfer_encoding_chunked: parser->index++; if (parser->index > sizeof(CHUNKED)-1 || c != CHUNKED[parser->index]) { - parser->header_state = h_general; + parser->header_state = h_matching_transfer_encoding_token; } else if (parser->index == sizeof(CHUNKED)-2) { parser->header_state = h_transfer_encoding_chunked; } break; + case h_matching_transfer_encoding_token: + if (ch == ',') { + parser->header_state = h_matching_transfer_encoding_token_start; + parser->index = 0; + } + break; + /* looking for 'Connection: keep-alive' */ case h_matching_connection_keep_alive: parser->index++; @@ -1660,6 +1720,9 @@ size_t http_parser_execute (http_parser *parser, break; case h_transfer_encoding_chunked: + if (ch != ' ') parser->header_state = h_matching_transfer_encoding_token; + break; + case h_connection_keep_alive: case h_connection_close: if (ch != ' ') parser->header_state = h_general; @@ -1739,6 +1802,24 @@ size_t http_parser_execute (http_parser *parser, break; } + /* Cannot use transfer-encoding and a content-length header together + per the HTTP specification. (RFC 7230 Section 3.3.3) */ + if ((parser->uses_transfer_encoding == 1) && + (parser->flags & F_CONTENTLENGTH)) { + /* Allow it for lenient parsing as long as `Transfer-Encoding` is + * not `chunked` or allow_length_with_encoding is set + */ + if (parser->flags & F_CHUNKED) { + if (!allow_chunked_length) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + } else if (!lenient) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + } + parser->state = s_headers_done; /* Here we call the headers_complete callback. This is somewhat @@ -1791,6 +1872,26 @@ size_t http_parser_execute (http_parser *parser, } else if (parser->flags & F_CHUNKED) { /* chunked encoding - ignore Content-Length header */ parser->state = s_chunk_size_start; + } else if (parser->uses_transfer_encoding == 1) { + if (parser->type == HTTP_REQUEST && !lenient) { + /* RFC 7230 3.3.3 */ + /* If a Transfer-Encoding header field + * is present in a request and the chunked transfer coding is not + * the final encoding, the message body length cannot be determined + * reliably; the server MUST respond with the 400 (Bad Request) + * status code and then close the connection. + */ + SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING); + return (p - data); /* Error */ + } else { + /* RFC 7230 3.3.3 */ + /* If a Transfer-Encoding header field is present in a response and + * the chunked transfer coding is not the final encoding, the + * message body length is determined by reading the connection until + * it is closed by the server. + */ + parser->state = s_body_identity_eof; + } } else { if (parser->content_length == 0) { /* Content-Length header given but zero: Content-Length: 0\r\n */ @@ -2037,6 +2138,12 @@ http_message_needs_eof (const http_parser *parser) return 0; } + /* RFC 7230 3.3.3, see `s_headers_almost_done` */ + if ((parser->uses_transfer_encoding == 1) && + (parser->flags & F_CHUNKED) == 0) { + return 1; + } + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { return 0; } diff --git a/src/brpc/details/http_parser.h b/src/brpc/details/http_parser.h index b64eb00d8c..0ea4efd051 100644 --- a/src/brpc/details/http_parser.h +++ b/src/brpc/details/http_parser.h @@ -138,6 +138,7 @@ enum http_parser_flags , F_TRAILING = 1 << 3 , F_UPGRADE = 1 << 4 , F_SKIPBODY = 1 << 5 + , F_CONTENTLENGTH = 1 << 6 }; @@ -178,13 +179,17 @@ enum http_parser_flags XX(INVALID_HEADER_TOKEN, "invalid character in header") \ XX(INVALID_CONTENT_LENGTH, \ "invalid character in content-length header") \ + XX(UNEXPECTED_CONTENT_LENGTH, \ + "unexpected content-length header") \ XX(INVALID_CHUNK_SIZE, \ "invalid character in chunk size header") \ XX(INVALID_CONSTANT, "invalid constant string") \ XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ XX(STRICT, "strict mode assertion failed") \ XX(PAUSED, "parser is paused") \ - XX(UNKNOWN, "an unknown error occurred") + XX(UNKNOWN, "an unknown error occurred") \ + XX(INVALID_TRANSFER_ENCODING, \ + "request has invalid transfer-encoding") /* Define HPE_* values for each errno value above */ @@ -202,10 +207,15 @@ enum http_errno { struct http_parser { /** PRIVATE **/ unsigned int type : 2; /* enum http_parser_type */ - unsigned int flags : 6; /* F_* values from 'flags' enum; semi-public */ - unsigned int state : 8; /* enum state from http_parser.c */ - unsigned int header_state : 8; /* enum header_state from http_parser.c */ - unsigned int index : 8; /* index into current matcher */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 7; /* enum state from http_parser.c */ + unsigned int header_state : 7; /* enum header_state from http_parser.c */ + unsigned int index : 5; /* index into current matcher */ + unsigned int uses_transfer_encoding : 1; /* Transfer-Encoding header is present */ + unsigned int allow_chunked_length : 1; /* Allow headers with both + * `Content-Length` and + * `Transfer-Encoding: chunked` set */ + unsigned int lenient_http_headers : 1; uint32_t nread; /* # bytes read in various scenarios */ uint64_t content_length; /* # bytes in body. `(uint64_t) -1` (all bits one) diff --git a/test/brpc_http_message_unittest.cpp b/test/brpc_http_message_unittest.cpp index b48fff61b2..568162cd5d 100644 --- a/test/brpc_http_message_unittest.cpp +++ b/test/brpc_http_message_unittest.cpp @@ -26,6 +26,9 @@ #include "echo.pb.h" namespace brpc { + +DECLARE_bool(allow_chunked_length); + namespace policy { Server::MethodProperty* FindMethodPropertyByURI(const std::string& uri_path, const Server* server, @@ -259,6 +262,93 @@ TEST(HttpMessageTest, parse_http_head_response) { ASSERT_EQ("chunked", *transfer_encoding); } +TEST(HttpMessageTest, cl_and_te) { + // https://datatracker.ietf.org/doc/html/rfc2616#section-14.41 + // If multiple encodings have been applied to an entity, the transfer- + // codings MUST be listed in the order in which they were applied. + const char* request_buf1 = "POST /chunked_w_content_length HTTP/1.1\r\n" + "Content-Length: 10\r\n" + "Transfer-Encoding: gzip,chunked\r\n" + "\r\n" + "5; ilovew3;whattheluck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n"; + butil::IOBuf request1; + request1.append(request_buf1); + + const char* request_buf2 = "POST /chunked_w_content_length HTTP/1.1\r\n" + "Content-Length: 19\r\n" + "Transfer-Encoding: chunked,gzip\r\n" + "\r\n" + "Message Body sdfsdf"; + butil::IOBuf request2; + request2.append(request_buf2); + + const char* response_buf1 = "HTTP/1.1 200 OK\r\n" + "Content-Length: 10\r\n" + "Transfer-Encoding: gzip,chunked\r\n" + "\r\n" + "5; ilovew3;whattheluck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n"; + butil::IOBuf response1; + response1.append(response_buf1); + + const char* response_buf2 = "HTTP/1.1 200 OK\r\n" + "Content-Length: 19\r\n" + "Transfer-Encoding: chunked,gzip\r\n" + "\r\n" + "Message Body sdfsdf"; + butil::IOBuf response2; + response2.append(response_buf2); + + brpc::FLAGS_allow_chunked_length = false; + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(request1), -1) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(request2), -1) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(response1), -1) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(response2), -1) + << http_message._parser; + } + + brpc::FLAGS_allow_chunked_length = true; + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(request1), request1.size()) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(request2), -1) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(response1), response1.size()) + << http_message._parser; + } + { + brpc::HttpMessage http_message; + ASSERT_EQ(http_message.ParseFromIOBuf(response2), -1) + << http_message._parser; + } +} + TEST(HttpMessageTest, find_method_property_by_uri) { brpc::Server server; ASSERT_EQ(0, server.AddService(new test::EchoService(), @@ -408,6 +498,10 @@ TEST(HttpMessageTest, serialize_http_request) { MakeRawHttpRequest(&request, &header, ep, &content); ASSERT_EQ("POST / HTTP/1.1\r\nContent-Length: 4\r\naccePT: blahblah\r\nuser-AGENT: myUA\r\nauthorization: myAuthString\r\nFoo: Bar\r\nHost: MyHost: 4321\r\n\r\ndata", request); + header.SetHeader("Transfer-Encoding", "chunked"); + MakeRawHttpRequest(&request, &header, ep, &content); + ASSERT_EQ("POST / HTTP/1.1\r\naccePT: blahblah\r\nTransfer-Encoding: chunked\r\nuser-AGENT: myUA\r\nauthorization: myAuthString\r\nFoo: Bar\r\nHost: MyHost: 4321\r\n\r\ndata", request); + // GET does not serialize content and user-set content-length is ignored. header.set_method(brpc::HTTP_METHOD_GET); header.SetHeader("Content-Length", "100"); @@ -434,12 +528,25 @@ TEST(HttpMessageTest, serialize_http_response) { ASSERT_EQ("HTTP/1.1 200 OK\r\nContent-Length: 100\r\nFoo: Bar\r\n\r\n", response) << butil::ToPrintable(response); + header.SetHeader("Transfer-Encoding", "chunked"); + MakeRawHttpResponse(&response, &header, NULL); + ASSERT_EQ("HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\nFoo: Bar\r\n\r\n", response) + << butil::ToPrintable(response); + header.RemoveHeader("Transfer-Encoding"); + // User-set content-length is ignored. content.append("data2"); MakeRawHttpResponse(&response, &header, &content); ASSERT_EQ("HTTP/1.1 200 OK\r\nContent-Length: 5\r\nFoo: Bar\r\n\r\ndata2", response) << butil::ToPrintable(response); + header.SetHeader("Content-Length", "100"); + header.SetHeader("Transfer-Encoding", "chunked"); + MakeRawHttpResponse(&response, &header, NULL); + ASSERT_EQ("HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\nFoo: Bar\r\n\r\n", response) + << butil::ToPrintable(response); + header.RemoveHeader("Transfer-Encoding"); + // User-set content-length and transfer-encoding is ignored when status code is 204 or 1xx. // 204 No Content. header.SetHeader("Content-Length", "100");