Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scheme parsing fix (https) and extending (ws, wss) #1570

Merged
merged 6 commits into from
Feb 25, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
316 changes: 183 additions & 133 deletions fw/http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,31 +380,31 @@ do { \
__FSM_I_MATCH_MOVE_fixup_finish(alphabet, to, flag, {})

/* Conditional transition from state @st to @st_next. */
#define __FSM_TX_COND(st, condition, st_next, field) \
__FSM_STATE(st) { \
#define __FSM_TX_COND(st, condition, st_next, field, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_f(st_next, field); \
TFW_PARSER_BLOCK(st); \
}

#define __FSM_TX_COND_nofixup(st, condition, st_next) \
__FSM_STATE(st) { \
#define __FSM_TX_COND_nofixup(st, condition, st_next, ...) \
__FSM_STATE(st, __VA_ARGS__) { \
if (likely(condition)) \
__FSM_MOVE_nofixup(st_next); \
TFW_PARSER_BLOCK(st); \
}

/* Automaton transition from state @st to @st_next on character @ch. */
#define __FSM_TX(st, ch, st_next) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr)
#define __FSM_TX_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next)
#define __FSM_TX(st, ch, st_next, ...) \
__FSM_TX_COND(st, c == (ch), st_next, &parser->hdr, __VA_ARGS__)
#define __FSM_TX_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, c == (ch), st_next, __VA_ARGS__)

/* Case-insensitive version of __FSM_TX(). */
#define __FSM_TX_LC(st, ch, st_next, field) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field)
#define __FSM_TX_LC_nofixup(st, ch, st_next) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next)
#define __FSM_TX_LC(st, ch, st_next, field, ...) \
__FSM_TX_COND(st, TFW_LC(c) == (ch), st_next, field, __VA_ARGS__)
#define __FSM_TX_LC_nofixup(st, ch, st_next, ...) \
__FSM_TX_COND_nofixup(st, TFW_LC(c) == (ch), st_next, __VA_ARGS__)

/*
* Automaton transition with alphabet checking and fallback state.
Expand Down Expand Up @@ -3837,119 +3837,8 @@ tfw_http_parse_req(void *req_data, unsigned char *data, size_t len,
__FSM_STATE(Req_Uri, hot) {
if (likely(c == '/'))
__FSM_JMP(Req_UriMark);

if (likely(__data_available(p, 7)
&& C4_INT_LCM(p, 'h', 't', 't', 'p')
&& *(p + 4) == ':' && *(p + 5) == '/'
&& *(p + 6) == '/'))
__FSM_MOVE_nofixup_n(Req_UriAuthorityStart, 7);

/* "http://" slow path - step char-by-char. */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);

TFW_PARSER_BLOCK(Req_Uri);
}

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
else
__FSM_JMP(Req_UriRareForms);
ttaym marked this conversation as resolved.
Show resolved Hide resolved
}

__FSM_STATE(Req_UriMark, hot) {
Expand Down Expand Up @@ -4735,13 +4624,174 @@ Req_Method_1CharStep: __attribute__((cold))
__FSM_MOVE_nofixup_n(Req_MUSpace, 0);
}

/* process URI scheme: "http://" */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp);
__FSM_TX_nofixup(Req_UriSchHttp, ':', Req_UriSchHttpColon);
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart);
__FSM_STATE(Req_UriRareForms, cold) {
/* TODO Support authority form as in RFC7230#section-5.3.3
* when CONNECT method will be added */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't plan to support CONNECT and it seems we don't need it in terms of HTTP/1 for websockets.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right. CONNECT method not needed for websockets. And maybe not needed for Tempesta FW at all, because it is not an explicit proxy. But i have seen in code mentions and TODOs of CONNECT:

root@devvps:~/tempesta# grep -r -I -w CONNECT
fw/t/unit/test_http_parser.c:   TEST_REQ_UNKNOWN(CONNECT);
fw/http_parser.c:        * DELETE, TRACE and CONNECT requests has no defined semantics  \
fw/http_parser.c:                * (Successful) response to a CONNECT request
fw/http_parser.c:        * in any 2xx (Successful) response to a CONNECT request.
fw/http_parser.c:                * when CONNECT method will be added */
fw/http_parser.c:                       /* TODO: Add (req == CONNECT && resp == 2xx) */
fw/http.h:/* TODO: When CONNECT will be added, add it to tfw_handle_validation_req()
fw/cache.c:     /* TODO: Add CONNECT */

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be consistent with this intention i have added the comment for clarity and completeness. I'll change its content to reflect your note.

/* Asterisk form as in RFC7230#section-5.3.4 */
if (req->method == TFW_HTTP_METH_OPTIONS && c == '*')
__FSM_MOVE_nofixup(Req_UriMarkEnd);
/* Absolute form as in RFC7230#section-5.3.2 */
__FSM_JMP(Req_UriAbsoluteForm);
}

__FSM_STATE(Req_UriAbsoluteForm, cold) {
/* Rare form so there is no need to speed-up matching with
* fast path prefixing */
if (likely(TFW_LC(c) == 'h'))
__FSM_MOVE_nofixup(Req_UriSchH);
else if (TFW_LC(c) == 'w')
__FSM_MOVE_nofixup(Req_UriSchW);

TFW_PARSER_BLOCK(Req_UriAbsoluteForm);
}

/* process URI scheme */
/* path for 'http://' and 'https://' */
__FSM_TX_LC_nofixup(Req_UriSchH, 't', Req_UriSchHt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHt, 't', Req_UriSchHtt, cold);
__FSM_TX_LC_nofixup(Req_UriSchHtt, 'p', Req_UriSchHttp, cold);
__FSM_STATE(Req_UriSchHttp, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchHttpColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchHttps);
}
TFW_PARSER_BLOCK(Req_UriSchHttp);
}
/* http */
__FSM_TX_nofixup(Req_UriSchHttpColon, '/', Req_UriSchHttpColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpColonSlash, '/', Req_UriAuthorityStart,
cold);
/* https */
__FSM_TX_nofixup(Req_UriSchHttps, ':', Req_UriSchHttpsColon, cold);
__FSM_TX_nofixup(Req_UriSchHttpsColon, '/', Req_UriSchHttpsColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchHttpsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* path for 'ws://' and 'wss://' */
__FSM_TX_LC_nofixup(Req_UriSchW, 's', Req_UriSchWs, cold);
__FSM_STATE(Req_UriSchWs, cold) {
switch (TFW_LC(c)) {
case ':':
__FSM_MOVE_nofixup(Req_UriSchWsColon);
case 's':
__FSM_MOVE_nofixup(Req_UriSchWss);
}
TFW_PARSER_BLOCK(Req_UriSchWs);
}
/* ws */
__FSM_TX_nofixup(Req_UriSchWsColon, '/', Req_UriSchWsColonSlash, cold);
__FSM_TX_nofixup(Req_UriSchWsColonSlash, '/', Req_UriAuthorityStart,
cold);
/* wss */
__FSM_TX_nofixup(Req_UriSchWss, ':', Req_UriSchWssColon, cold);
__FSM_TX_nofixup(Req_UriSchWssColon, '/', Req_UriSchWssColonSlash,
cold);
__FSM_TX_nofixup(Req_UriSchWssColonSlash, '/', Req_UriAuthorityStart,
cold);

/*
* URI host part.
* RFC 3986 chapter 3.2: authority = [userinfo@]host[:port]
*
* Authority parsing: it can be "host" or "userinfo@host" (port is
* parsed later). At the beginning we don't know, which of variants we
* have. So we fill req->host, and if we get '@', we copy host to
* req->userinfo, reset req->host and fill it.
*/
__FSM_STATE(Req_UriAuthorityStart, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (likely(c == '/')) {
/*
* The case where "Host:" header value is empty.
* A special TfwStr{} string is created that has
* a valid pointer and the length of zero.
*/
T_DBG3("Handling http:///path\n");
tfw_http_msg_set_str_data(msg, &req->host, p);
req->host.flags |= TFW_STR_COMPLETE;
__FSM_JMP(Req_UriMark);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityStart);
}

__FSM_STATE(Req_UriAuthority, cold) {
if (likely(isalnum(c) || c == '.' || c == '-' || c == '@')) {
if (unlikely(c == '@')) {
if (!TFW_STR_EMPTY(&req->userinfo)) {
T_DBG("Second '@' in authority\n");
TFW_PARSER_BLOCK(Req_UriAuthority);
}
T_DBG3("Authority contains userinfo\n");
/* copy current host to userinfo */
req->userinfo = req->host;
__msg_field_finish(&req->userinfo, p);
TFW_STR_INIT(&req->host);

__FSM_MOVE_nofixup(Req_UriAuthorityResetHost);
}

__FSM_MOVE_f(Req_UriAuthority, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityIPv6, cold) {
if (likely(isxdigit(c) || c == ':')) {
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
} else if(c == ']') {
__FSM_MOVE_f(Req_UriAuthorityEnd, &req->host);
}
TFW_PARSER_BLOCK(Req_UriAuthorityIPv6);
}

__FSM_STATE(Req_UriAuthorityResetHost, cold) {
if (likely(isalnum(c) || c == '.' || c == '-')) {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthority, &req->host);
} else if (c == '[') {
__msg_field_open(&req->host, p);
__FSM_MOVE_f(Req_UriAuthorityIPv6, &req->host);
}
__FSM_JMP(Req_UriAuthorityEnd);
}

__FSM_STATE(Req_UriAuthorityEnd, cold) {
if (c == ':')
__FSM_MOVE_f(Req_UriPort, &req->host);
/* Authority End */
__msg_field_finish(&req->host, p);
T_DBG3("Userinfo len = %i, host len = %i\n",
(int)req->userinfo.len, (int)req->host.len);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriAuthorityEnd);
}

/* Host port in URI */
__FSM_STATE(Req_UriPort, cold) {
if (likely(isdigit(c)))
__FSM_MOVE_f(Req_UriPort, &req->host);
__msg_field_finish(&req->host, p);
if (likely(c == '/')) {
__FSM_JMP(Req_UriMark);
}
else if (c == ' ') {
__FSM_MOVE_nofixup(Req_HttpVer);
}
TFW_PARSER_BLOCK(Req_UriPort);
}

/* Parse HTTP version (1.1 and 1.0 are supported). */
__FSM_TX_nofixup(Req_HttpVerT1, 'T', Req_HttpVerT2);
Expand Down Expand Up @@ -5731,7 +5781,7 @@ do { \
__FSM_I_field_chunk_flags(fld, TFW_STR_HDR_VALUE); \
__FSM_EXIT(CSTR_POSTPONE); \
}

#define H2_TRY_STR_LAMBDA_fixup(str, fld, lambda, curr_st, next_st) \
H2_TRY_STR_2LAMBDA_fixup(str, fld, {}, lambda, curr_st, next_st)

Expand Down
19 changes: 19 additions & 0 deletions fw/t/unit/test_http_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,25 @@ TEST(http_parser, parses_req_uri)
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET https://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET ws://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("GET wss://userame@natsys-lab.com HTTP/1.1\r\n\r\n")
{
EXPECT_TFWSTR_EQ(&req->host, "natsys-lab.com");
}

FOR_REQ("OPTIONS * HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET sch://userame@natsys-lab.com HTTP/1.1\r\n\r\n");

EXPECT_BLOCK_REQ("GET \x7f HTTP/1.1\r\n"
"Host: test\r\n"
"\r\n");
Expand Down