From 37ac7e5b03ba7f9504b4c198204f3110666376bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20B=C3=A9ky?= Date: Tue, 10 May 2022 10:23:46 -0400 Subject: [PATCH 1/3] [balsa] Verify URL. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes Http1ServerConnectionImplTest.Http11InvalidRequest, and adds extensive testing to the URL validator that both http-parser and the new isUrlValid in BalsaParser pass. Tested by patching in #22039 and running bazel test //test/common/http/http1:codec_impl_test --test_arg=--runtime-feature-override-for-tests=envoy.reloadable_features.http1_use_balsa_parser Tracking issue: #21245 Signed-off-by: Bence Béky --- source/common/http/http1/balsa_parser.cc | 67 ++++++++++++++++- test/common/http/http1/codec_impl_test.cc | 91 +++++++++++++++++++++++ 2 files changed, 157 insertions(+), 1 deletion(-) diff --git a/source/common/http/http1/balsa_parser.cc b/source/common/http/http1/balsa_parser.cc index ede3211c8ddd..37d5546aa73f 100644 --- a/source/common/http/http1/balsa_parser.cc +++ b/source/common/http/http1/balsa_parser.cc @@ -13,9 +13,67 @@ namespace Envoy { namespace Http { namespace Http1 { +namespace { + using ::quiche::BalsaFrameEnums; using ::quiche::BalsaHeaders; +constexpr absl::string_view kColonSlashSlash = "://"; + +// This method is crafted to match the URL validation behavior of the http-parser library. +bool isUrlValid(absl::string_view url, bool is_connect) { + if (url.empty()) { + return false; + } + + // Same set of characters are allowed for path and query. + const auto is_valid_path_query_char = [](char c) { + return c == 9 || c == 12 || ('!' <= c && c <= 126); + }; + + // The URL may start with a path. + if (auto it = url.begin(); *it == '/' || *it == '*') { + ++it; + return std::all_of(it, url.end(), is_valid_path_query_char); + } + + // If method is not CONNECT, parse scheme. + if (!is_connect) { + // Scheme must be alpha and non-empty. + auto it = std::find_if_not(url.begin(), url.end(), [](char c) { return std::isalpha(c); }); + if (it == url.begin()) { + return false; + } + url.remove_prefix(it - url.begin()); + if (!absl::StartsWith(url, kColonSlashSlash)) { + return false; + } + url.remove_prefix(kColonSlashSlash.length()); + } + + // Path and query start with the first '/' or '?' character. + const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; }; + + // Divide the rest of the URL into two sections: host, and path/query/fragments. + auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start); + const absl::string_view host = url.substr(0, path_query_begin - url.begin()); + const absl::string_view path_query = url.substr(path_query_begin - url.begin()); + + const auto valid_host_char = [](char c) { + return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' || + c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' || + c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' || + c == '~'; + }; + + // Match http-parser's quirk of allowing any number of '@' characters in host + // as long as they are not consecutive. + return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") && + std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char); +} + +} // anonymous namespace + BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length) : connection_(connection) { ASSERT(connection_ != nullptr); @@ -124,7 +182,7 @@ void BalsaParser::ProcessTrailers(const BalsaHeaders& trailer) { } void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/, - absl::string_view /*method_input*/, + absl::string_view method_input, absl::string_view request_uri, absl::string_view /*version_input*/) { if (status_ == ParserStatus::Error) { @@ -134,6 +192,13 @@ void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/, if (status_ == ParserStatus::Error) { return; } + const bool is_connect = method_input == Headers::get().MethodValues.Connect; + if (!isUrlValid(request_uri, is_connect)) { + status_ = ParserStatus::Error; + // Error message matching that of http-parser. + error_message_ = "HPE_INVALID_URL"; + return; + } status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size())); } diff --git a/test/common/http/http1/codec_impl_test.cc b/test/common/http/http1/codec_impl_test.cc index 8f024e4d3fea..b0e1f1e668af 100644 --- a/test/common/http/http1/codec_impl_test.cc +++ b/test/common/http/http1/codec_impl_test.cc @@ -3478,5 +3478,96 @@ TEST_F(Http1ClientConnectionImplTest, ShouldDumpCorrespondingRequestWithoutAlloc EXPECT_THAT(ostream.contents(), testing::HasSubstr("Dumping corresponding downstream request:")); } +// Test the URL validation logic of the Parser. This is executed as soon as the +// first line of the request is parsed. Note that the additional URL parsing +// code in ServerConnectionImpl::handlePath() is intentionally not exercised in +// this test. Http1Settings members like `allow_absolute_url` do not matter as +// they are not passed on to the Parser. + +const char* kValidFirstLines[] = { + "GET http://www.somewhere.com HTTP/1.1\r\n", + "GET scheme://$,www]][-_..!~\'(&=+.com/path HTTP/1.1\r\n", + "GET foo:///www.this.is.not.host.but.path.com HTTP/1.1\r\n", + "GET http://www/* HTTP/1.1\r\n", + "GET http://www?query HTTP/1.1\r\n", + "GET http://www*?query HTTP/1.1\r\n", + "GET http://?query?more##fragment??more#? HTTP/1.1\r\n", + "GET http://@ HTTP/1.1\r\n", + "GET http://@www@]@[]@?#?# HTTP/1.1\r\n", + "GET http://* HTTP/1.1\r\n", + "GET /path HTTP/1.1\r\n", // "://" is the path + "GET /path?query HTTP/1.1\r\n", // "://" is the path + "GET * HTTP/1.1\r\n", + "GET ** HTTP/1.1\r\n", + "GET *path@@/foo?# HTTP/1.1\r\n", + "GET /@@path& HTTP/1.1\r\n", + "GET http://host://this.is.path.com HTTP/1.1\r\n", + "CONNECT www.somewhere.com HTTP/1.1\r\n", + "CONNECT http://there.is.no.scheme.com HTTP/1.1\r\n", +}; + +const char* kInvalidFirstLines[] = { + "GET www.somewhere.com HTTP/1.1\r\n", + "GET http11://www.somewhere.com HTTP/1.1\r\n", + "GET 0ttp:/\r\n", + "GET http:/www.somewhere.com HTTP/1.1\r\n", + "GET http:://www.somewhere.com HTTP/1.1\r\n", + "GET http/somewhere HTTP/1.1\r\n", + "GET http://www@@ HTTP/1.1\r\n", + "GET http://www@w@w@@ HTTP/1.1\r\n", + "GET http://www@]@[]@# HTTP/1.1\r\n", + "GET http://www.hash#mark.com HTTP/1.1\r\n", + "GET # HTTP/1.1\r\n", + "GET ? HTTP/1.1\r\n", + "GET ?query HTTP/1.1\r\n", + "GET ?#query#@@ HTTP/1.1\r\n", + "GET @ HTTP/1.1\r\n", +}; + +TEST_F(Http1ServerConnectionImplTest, ParseUrl) { + for (const char* valid_first_line : kValidFirstLines) { + initialize(); + + StrictMock decoder; + Http::ResponseEncoder* response_encoder = nullptr; + EXPECT_CALL(callbacks_, newStream(_, _)) + .WillOnce(Invoke([&](ResponseEncoder& encoder, bool) -> RequestDecoder& { + response_encoder = &encoder; + return decoder; + })); + + Buffer::OwnedImpl buffer(valid_first_line); + auto status = codec_->dispatch(buffer); + + EXPECT_EQ(Protocol::Http11, codec_->protocol()); + EXPECT_TRUE(status.ok()) << valid_first_line; + EXPECT_EQ("", status.message()); + EXPECT_EQ("", response_encoder->getStream().responseDetails()); + } + + for (const char* invalid_first_line : kInvalidFirstLines) { + initialize(); + + InSequence sequence; + + StrictMock decoder; + Http::ResponseEncoder* response_encoder = nullptr; + EXPECT_CALL(callbacks_, newStream(_, _)) + .WillOnce(Invoke([&](ResponseEncoder& encoder, bool) -> RequestDecoder& { + response_encoder = &encoder; + return decoder; + })); + EXPECT_CALL(decoder, sendLocalReply(Http::Code::BadRequest, "Bad Request", _, _, _)); + + Buffer::OwnedImpl buffer(invalid_first_line); + auto status = codec_->dispatch(buffer); + + EXPECT_EQ(Protocol::Http11, codec_->protocol()); + EXPECT_TRUE(isCodecProtocolError(status)) << invalid_first_line; + EXPECT_EQ("http/1.1 protocol error: HPE_INVALID_URL", status.message()); + EXPECT_EQ("http1.codec_error", response_encoder->getStream().responseDetails()); + } +} + } // namespace Http } // namespace Envoy From 86ac031f71afaa6d892177b11f13239f32ce78aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20B=C3=A9ky?= Date: Wed, 6 Jul 2022 11:00:28 -0400 Subject: [PATCH 2/3] Silence spellcheck errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bence Béky --- test/common/http/http1/codec_impl_test.cc | 2 ++ tools/spelling/spelling_dictionary.txt | 1 + 2 files changed, 3 insertions(+) diff --git a/test/common/http/http1/codec_impl_test.cc b/test/common/http/http1/codec_impl_test.cc index b0e1f1e668af..aea7d20e6851 100644 --- a/test/common/http/http1/codec_impl_test.cc +++ b/test/common/http/http1/codec_impl_test.cc @@ -3484,6 +3484,7 @@ TEST_F(Http1ClientConnectionImplTest, ShouldDumpCorrespondingRequestWithoutAlloc // this test. Http1Settings members like `allow_absolute_url` do not matter as // they are not passed on to the Parser. +// SPELLCHECKER(off) const char* kValidFirstLines[] = { "GET http://www.somewhere.com HTTP/1.1\r\n", "GET scheme://$,www]][-_..!~\'(&=+.com/path HTTP/1.1\r\n", @@ -3523,6 +3524,7 @@ const char* kInvalidFirstLines[] = { "GET ?#query#@@ HTTP/1.1\r\n", "GET @ HTTP/1.1\r\n", }; +// SPELLCHECKER(on) TEST_F(Http1ServerConnectionImplTest, ParseUrl) { for (const char* valid_first_line : kValidFirstLines) { diff --git a/tools/spelling/spelling_dictionary.txt b/tools/spelling/spelling_dictionary.txt index d9c0423f6cbc..c1a5c2867272 100644 --- a/tools/spelling/spelling_dictionary.txt +++ b/tools/spelling/spelling_dictionary.txt @@ -53,6 +53,7 @@ ETX FS HEXDIG HEXDIGIT +HPE LRU NAK OWS From aa095b239b12c880e4a655fa0238a227ea0dbc5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bence=20B=C3=A9ky?= Date: Fri, 8 Jul 2022 08:28:09 -0400 Subject: [PATCH 3/3] Delete misplaced comment. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bence Béky --- test/common/http/http1/codec_impl_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/common/http/http1/codec_impl_test.cc b/test/common/http/http1/codec_impl_test.cc index aea7d20e6851..bd750d8a2d1f 100644 --- a/test/common/http/http1/codec_impl_test.cc +++ b/test/common/http/http1/codec_impl_test.cc @@ -3496,8 +3496,8 @@ const char* kValidFirstLines[] = { "GET http://@ HTTP/1.1\r\n", "GET http://@www@]@[]@?#?# HTTP/1.1\r\n", "GET http://* HTTP/1.1\r\n", - "GET /path HTTP/1.1\r\n", // "://" is the path - "GET /path?query HTTP/1.1\r\n", // "://" is the path + "GET /path HTTP/1.1\r\n", + "GET /path?query HTTP/1.1\r\n", "GET * HTTP/1.1\r\n", "GET ** HTTP/1.1\r\n", "GET *path@@/foo?# HTTP/1.1\r\n",