Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[balsa] Verify URL. #22040

Merged
merged 4 commits into from
Jul 18, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 66 additions & 1 deletion source/common/http/http1/balsa_parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,67 @@ namespace Envoy {
namespace Http {
namespace Http1 {

namespace {

using ::quiche::BalsaFrameEnums;
using ::quiche::BalsaHeaders;

constexpr absl::string_view kColonSlashSlash = "://";

// This method is crafted to match the URL validation behavior of the http-parser library.
bool isUrlValid(absl::string_view url, bool is_connect) {
if (url.empty()) {
return false;
}

// Same set of characters are allowed for path and query.
const auto is_valid_path_query_char = [](char c) {
return c == 9 || c == 12 || ('!' <= c && c <= 126);
bencebeky marked this conversation as resolved.
Show resolved Hide resolved
};

// The URL may start with a path.
if (auto it = url.begin(); *it == '/' || *it == '*') {
++it;
return std::all_of(it, url.end(), is_valid_path_query_char);
}

// If method is not CONNECT, parse scheme.
if (!is_connect) {
// Scheme must be alpha and non-empty.
auto it = std::find_if_not(url.begin(), url.end(), [](char c) { return std::isalpha(c); });
if (it == url.begin()) {
return false;
}
url.remove_prefix(it - url.begin());
if (!absl::StartsWith(url, kColonSlashSlash)) {
return false;
}
url.remove_prefix(kColonSlashSlash.length());
}

// Path and query start with the first '/' or '?' character.
const auto is_path_query_start = [](char c) { return c == '/' || c == '?'; };

// Divide the rest of the URL into two sections: host, and path/query/fragments.
auto path_query_begin = std::find_if(url.begin(), url.end(), is_path_query_start);
const absl::string_view host = url.substr(0, path_query_begin - url.begin());
const absl::string_view path_query = url.substr(path_query_begin - url.begin());

const auto valid_host_char = [](char c) {
return std::isalnum(c) || c == '!' || c == '$' || c == '%' || c == '&' || c == '\'' ||
c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == '-' || c == '.' ||
c == ':' || c == ';' || c == '=' || c == '@' || c == '[' || c == ']' || c == '_' ||
c == '~';
};

// Match http-parser's quirk of allowing any number of '@' characters in host
// as long as they are not consecutive.
return std::all_of(host.begin(), host.end(), valid_host_char) && !absl::StrContains(host, "@@") &&
std::all_of(path_query.begin(), path_query.end(), is_valid_path_query_char);
}

} // anonymous namespace

BalsaParser::BalsaParser(MessageType type, ParserCallbacks* connection, size_t max_header_length)
: connection_(connection) {
ASSERT(connection_ != nullptr);
Expand Down Expand Up @@ -124,7 +182,7 @@ void BalsaParser::ProcessTrailers(const BalsaHeaders& trailer) {
}

void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
absl::string_view /*method_input*/,
absl::string_view method_input,
absl::string_view request_uri,
absl::string_view /*version_input*/) {
if (status_ == ParserStatus::Error) {
Expand All @@ -134,6 +192,13 @@ void BalsaParser::OnRequestFirstLineInput(absl::string_view /*line_input*/,
if (status_ == ParserStatus::Error) {
return;
}
const bool is_connect = method_input == Headers::get().MethodValues.Connect;
if (!isUrlValid(request_uri, is_connect)) {
status_ = ParserStatus::Error;
// Error message matching that of http-parser.
error_message_ = "HPE_INVALID_URL";
return;
}
status_ = convertResult(connection_->onUrl(request_uri.data(), request_uri.size()));
}

Expand Down
93 changes: 93 additions & 0 deletions test/common/http/http1/codec_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3478,5 +3478,98 @@ TEST_F(Http1ClientConnectionImplTest, ShouldDumpCorrespondingRequestWithoutAlloc
EXPECT_THAT(ostream.contents(), testing::HasSubstr("Dumping corresponding downstream request:"));
}

// Test the URL validation logic of the Parser. This is executed as soon as the
// first line of the request is parsed. Note that the additional URL parsing
// code in ServerConnectionImpl::handlePath() is intentionally not exercised in
// this test. Http1Settings members like `allow_absolute_url` do not matter as
// they are not passed on to the Parser.

// SPELLCHECKER(off)
const char* kValidFirstLines[] = {
"GET http://www.somewhere.com HTTP/1.1\r\n",
"GET scheme://$,www]][-_..!~\'(&=+.com/path HTTP/1.1\r\n",
"GET foo:///www.this.is.not.host.but.path.com HTTP/1.1\r\n",
"GET http://www/* HTTP/1.1\r\n",
"GET http://www?query HTTP/1.1\r\n",
"GET http://www*?query HTTP/1.1\r\n",
"GET http://?query?more##fragment??more#? HTTP/1.1\r\n",
"GET http://@ HTTP/1.1\r\n",
"GET http://@www@]@[]@?#?# HTTP/1.1\r\n",
"GET http://* HTTP/1.1\r\n",
"GET /path HTTP/1.1\r\n",
"GET /path?query HTTP/1.1\r\n",
"GET * HTTP/1.1\r\n",
"GET ** HTTP/1.1\r\n",
"GET *path@@/foo?# HTTP/1.1\r\n",
"GET /@@path& HTTP/1.1\r\n",
"GET http://host://this.is.path.com HTTP/1.1\r\n",
"CONNECT www.somewhere.com HTTP/1.1\r\n",
"CONNECT http://there.is.no.scheme.com HTTP/1.1\r\n",
};

const char* kInvalidFirstLines[] = {
"GET www.somewhere.com HTTP/1.1\r\n",
"GET http11://www.somewhere.com HTTP/1.1\r\n",
"GET 0ttp:/\r\n",
"GET http:/www.somewhere.com HTTP/1.1\r\n",
"GET http:://www.somewhere.com HTTP/1.1\r\n",
"GET http/somewhere HTTP/1.1\r\n",
"GET http://www@@ HTTP/1.1\r\n",
"GET http://www@w@w@@ HTTP/1.1\r\n",
"GET http://www@]@[]@# HTTP/1.1\r\n",
"GET http://www.hash#mark.com HTTP/1.1\r\n",
"GET # HTTP/1.1\r\n",
"GET ? HTTP/1.1\r\n",
"GET ?query HTTP/1.1\r\n",
"GET ?#query#@@ HTTP/1.1\r\n",
"GET @ HTTP/1.1\r\n",
};
// SPELLCHECKER(on)

TEST_F(Http1ServerConnectionImplTest, ParseUrl) {
for (const char* valid_first_line : kValidFirstLines) {
initialize();

StrictMock<MockRequestDecoder> decoder;
Http::ResponseEncoder* response_encoder = nullptr;
EXPECT_CALL(callbacks_, newStream(_, _))
.WillOnce(Invoke([&](ResponseEncoder& encoder, bool) -> RequestDecoder& {
response_encoder = &encoder;
return decoder;
}));

Buffer::OwnedImpl buffer(valid_first_line);
auto status = codec_->dispatch(buffer);

EXPECT_EQ(Protocol::Http11, codec_->protocol());
EXPECT_TRUE(status.ok()) << valid_first_line;
EXPECT_EQ("", status.message());
EXPECT_EQ("", response_encoder->getStream().responseDetails());
}

for (const char* invalid_first_line : kInvalidFirstLines) {
initialize();

InSequence sequence;

StrictMock<MockRequestDecoder> decoder;
Http::ResponseEncoder* response_encoder = nullptr;
EXPECT_CALL(callbacks_, newStream(_, _))
.WillOnce(Invoke([&](ResponseEncoder& encoder, bool) -> RequestDecoder& {
response_encoder = &encoder;
return decoder;
}));
EXPECT_CALL(decoder, sendLocalReply(Http::Code::BadRequest, "Bad Request", _, _, _));

Buffer::OwnedImpl buffer(invalid_first_line);
auto status = codec_->dispatch(buffer);

EXPECT_EQ(Protocol::Http11, codec_->protocol());
EXPECT_TRUE(isCodecProtocolError(status)) << invalid_first_line;
EXPECT_EQ("http/1.1 protocol error: HPE_INVALID_URL", status.message());
EXPECT_EQ("http1.codec_error", response_encoder->getStream().responseDetails());
}
}

} // namespace Http
} // namespace Envoy
1 change: 1 addition & 0 deletions tools/spelling/spelling_dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ ETX
FS
HEXDIG
HEXDIGIT
HPE
LRU
NAK
OWS
Expand Down