From 8a92830fd3a9d814d04e410304cd4a1b3937ffc7 Mon Sep 17 00:00:00 2001 From: Fedor Indutny Date: Tue, 19 Nov 2019 18:45:41 -0800 Subject: [PATCH] deps: update llhttp to 2.0.1 Changelog: * Optional SSE4.2 support (at compile time) * Lenient mode of operation PR-URL: https://github.com/nodejs/node/pull/30553 Reviewed-By: Gus Caplan Reviewed-By: Jiawen Geng Reviewed-By: Anna Henningsen Reviewed-By: Ben Noordhuis Reviewed-By: David Carlier Reviewed-By: Colin Ihrig Reviewed-By: Myles Borins --- deps/llhttp/README.md | 9 +- deps/llhttp/include/llhttp.h | 25 ++- deps/llhttp/src/api.c | 9 + deps/llhttp/src/http.c | 4 +- deps/llhttp/src/llhttp.c | 374 +++++++++++++++++++++++++---------- 5 files changed, 306 insertions(+), 115 deletions(-) diff --git a/deps/llhttp/README.md b/deps/llhttp/README.md index 7010b90f7ad9e5..c6c061238127f3 100644 --- a/deps/llhttp/README.md +++ b/deps/llhttp/README.md @@ -14,6 +14,8 @@ This project aims to: * Verifiable * Improving benchmarks where possible +More details in [Fedor Indutny's talk at JSConf EU 2019](https://youtu.be/x3k_5Mi66sY) + ## How? Over time, different approaches for improving [http_parser][0]'s code base @@ -30,11 +32,10 @@ So far llhttp outperforms http_parser: | | input size | bandwidth | reqs/sec | time | |:----------------|-----------:|-------------:|-----------:|--------:| -| **llhttp** _(C)_ | 8192.00 mb | 1497.88 mb/s | 3020458.87 ops/sec | 5.47 s | -| **llhttp** _(bitcode)_ | 8192.00 mb | 1131.75 mb/s | 2282171.24 ops/sec | 7.24 s | +| **llhttp** _(C)_ | 8192.00 mb | 1777.24 mb/s | 3583799.39 ops/sec | 4.61 s | | **http_parser** | 8192.00 mb | 694.66 mb/s | 1406180.33 req/sec | 11.79 s | -llhttp is faster by approximately **116%**. +llhttp is faster by approximately **156%**. ## Maintenance @@ -77,8 +78,6 @@ settings.on_message_complete = handle_on_message_complete; */ llhttp_init(&parser, HTTP_BOTH, &settings); -/* Use `llhttp_set_type(&parser, HTTP_REQUEST);` to override the mode */ - /* Parse request! */ const char* request = "GET / HTTP/1.1\r\n\r\n"; int request_len = strlen(request); diff --git a/deps/llhttp/include/llhttp.h b/deps/llhttp/include/llhttp.h index 1671af4d088d26..719abe8aed2ba5 100644 --- a/deps/llhttp/include/llhttp.h +++ b/deps/llhttp/include/llhttp.h @@ -1,9 +1,9 @@ #ifndef INCLUDE_LLHTTP_H_ #define INCLUDE_LLHTTP_H_ -#define LLHTTP_VERSION_MAJOR 1 -#define LLHTTP_VERSION_MINOR 1 -#define LLHTTP_VERSION_PATCH 4 +#define LLHTTP_VERSION_MAJOR 2 +#define LLHTTP_VERSION_MINOR 0 +#define LLHTTP_VERSION_PATCH 1 #ifndef INCLUDE_LLHTTP_ITSELF_H_ #define INCLUDE_LLHTTP_ITSELF_H_ @@ -29,7 +29,7 @@ struct llhttp__internal_s { uint8_t http_major; uint8_t http_minor; uint8_t header_state; - uint8_t flags; + uint16_t flags; uint8_t upgrade; uint16_t status_code; uint8_t finish; @@ -85,7 +85,8 @@ enum llhttp_flags { F_UPGRADE = 0x10, F_CONTENT_LENGTH = 0x20, F_SKIPBODY = 0x40, - F_TRAILING = 0x80 + F_TRAILING = 0x80, + F_LENIENT = 0x100 }; typedef enum llhttp_flags llhttp_flags_t; @@ -297,7 +298,7 @@ llhttp_errno_t llhttp_finish(llhttp_t* parser); int llhttp_message_needs_eof(const llhttp_t* parser); /* Returns `1` if there might be any other messages following the last that was - * successfuly parsed. + * successfully parsed. */ int llhttp_should_keep_alive(const llhttp_t* parser); @@ -353,6 +354,18 @@ const char* llhttp_errno_name(llhttp_errno_t err); /* Returns textual name of HTTP method */ const char* llhttp_method_name(llhttp_method_t method); + +/* Enables/disables lenient header value parsing (disabled by default). + * + * Lenient parsing disables header value token checks, extending llhttp's + * protocol support to highly non-compliant clients/server. No + * `HPE_INVALID_HEADER_TOKEN` will be raised for incorrect header values when + * lenient parsing is "on". + * + * **(USE AT YOUR OWN RISK)** + */ +void llhttp_set_lenient(llhttp_t* parser, int enabled); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/deps/llhttp/src/api.c b/deps/llhttp/src/api.c index 45227b35afb209..6f7246546dfe1a 100644 --- a/deps/llhttp/src/api.c +++ b/deps/llhttp/src/api.c @@ -127,6 +127,15 @@ const char* llhttp_method_name(llhttp_method_t method) { } +void llhttp_set_lenient(llhttp_t* parser, int enabled) { + if (enabled) { + parser->flags |= F_LENIENT; + } else { + parser->flags &= ~F_LENIENT; + } +} + + /* Callbacks */ diff --git a/deps/llhttp/src/http.c b/deps/llhttp/src/http.c index 67834c2d377c49..65d2ee677e4d33 100644 --- a/deps/llhttp/src/http.c +++ b/deps/llhttp/src/http.c @@ -74,9 +74,11 @@ int llhttp__after_message_complete(llhttp_t* parser, const char* p, int should_keep_alive; should_keep_alive = llhttp_should_keep_alive(parser); - parser->flags = 0; parser->finish = HTTP_FINISH_SAFE; + /* Keep `F_LENIENT` flag between messages, but reset every other flag */ + parser->flags &= F_LENIENT; + /* NOTE: this is ignored in loose parsing mode */ return should_keep_alive; } diff --git a/deps/llhttp/src/llhttp.c b/deps/llhttp/src/llhttp.c index 2786638f3ed6d5..698230f93fe08f 100644 --- a/deps/llhttp/src/llhttp.c +++ b/deps/llhttp/src/llhttp.c @@ -2,6 +2,20 @@ #include #include +#ifdef __SSE4_2__ + #ifdef _MSC_VER + #include + #else /* !_MSC_VER */ + #include + #endif /* _MSC_VER */ +#endif /* __SSE4_2__ */ + +#ifdef _MSC_VER + #define ALIGN(n) _declspec(align(n)) +#else /* !_MSC_VER */ + #define ALIGN(n) __attribute__((aligned(n))) +#endif /* _MSC_VER */ + #include "llhttp.h" typedef int (*llhttp__internal__span_cb)( @@ -10,147 +24,161 @@ typedef int (*llhttp__internal__span_cb)( static const unsigned char llparse_blob0[] = { 'C', 'L' }; -static const unsigned char llparse_blob1[] = { - 'o', 'n' +static const unsigned char ALIGN(16) llparse_blob1[] = { + 0x9, 0x9, 0xc, 0xc, '!', '"', '$', '>', '@', '~', 0x80, + 0xff }; static const unsigned char llparse_blob2[] = { - 'e', 'c', 't', 'i', 'o', 'n' + 'o', 'n' }; static const unsigned char llparse_blob3[] = { - 'l', 'o', 's', 'e' + 'e', 'c', 't', 'i', 'o', 'n' }; static const unsigned char llparse_blob4[] = { - 'e', 'e', 'p', '-', 'a', 'l', 'i', 'v', 'e' + 'l', 'o', 's', 'e' }; static const unsigned char llparse_blob5[] = { - 'p', 'g', 'r', 'a', 'd', 'e' + 'e', 'e', 'p', '-', 'a', 'l', 'i', 'v', 'e' }; static const unsigned char llparse_blob6[] = { + 'p', 'g', 'r', 'a', 'd', 'e' +}; +static const unsigned char ALIGN(16) llparse_blob7[] = { + 0x9, 0x9, ' ', '~', 0x80, 0xfe +}; +static const unsigned char llparse_blob8[] = { 'h', 'u', 'n', 'k', 'e', 'd' }; -static const unsigned char llparse_blob7[] = { +static const unsigned char ALIGN(16) llparse_blob9[] = { + ' ', '!', '#', '\'', '*', '+', '-', '.', '0', '9', 'A', + 'Z', '^', 'z', '|', '|' +}; +static const unsigned char ALIGN(16) llparse_blob10[] = { + '~', '~' +}; +static const unsigned char llparse_blob11[] = { 'e', 'n', 't', '-', 'l', 'e', 'n', 'g', 't', 'h' }; -static const unsigned char llparse_blob8[] = { +static const unsigned char llparse_blob12[] = { 'r', 'o', 'x', 'y', '-', 'c', 'o', 'n', 'n', 'e', 'c', 't', 'i', 'o', 'n' }; -static const unsigned char llparse_blob9[] = { +static const unsigned char llparse_blob13[] = { 'r', 'a', 'n', 's', 'f', 'e', 'r', '-', 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g' }; -static const unsigned char llparse_blob10[] = { +static const unsigned char llparse_blob14[] = { 'p', 'g', 'r', 'a', 'd', 'e' }; -static const unsigned char llparse_blob11[] = { +static const unsigned char llparse_blob15[] = { 0xd, 0xa }; -static const unsigned char llparse_blob12[] = { +static const unsigned char llparse_blob16[] = { 'T', 'T', 'P', '/' }; -static const unsigned char llparse_blob13[] = { +static const unsigned char llparse_blob17[] = { 'C', 'E', '/' }; -static const unsigned char llparse_blob14[] = { +static const unsigned char llparse_blob18[] = { 'I', 'N', 'D' }; -static const unsigned char llparse_blob15[] = { +static const unsigned char llparse_blob19[] = { 'E', 'C', 'K', 'O', 'U', 'T' }; -static const unsigned char llparse_blob16[] = { +static const unsigned char llparse_blob20[] = { 'N', 'E', 'C', 'T' }; -static const unsigned char llparse_blob17[] = { +static const unsigned char llparse_blob21[] = { 'E', 'L', 'E', 'T', 'E' }; -static const unsigned char llparse_blob18[] = { +static const unsigned char llparse_blob22[] = { 'E', 'T' }; -static const unsigned char llparse_blob19[] = { +static const unsigned char llparse_blob23[] = { 'E', 'A', 'D' }; -static const unsigned char llparse_blob20[] = { +static const unsigned char llparse_blob24[] = { 'N', 'K' }; -static const unsigned char llparse_blob21[] = { +static const unsigned char llparse_blob25[] = { 'C', 'K' }; -static const unsigned char llparse_blob22[] = { +static const unsigned char llparse_blob26[] = { 'S', 'E', 'A', 'R', 'C', 'H' }; -static const unsigned char llparse_blob23[] = { +static const unsigned char llparse_blob27[] = { 'R', 'G', 'E' }; -static const unsigned char llparse_blob24[] = { +static const unsigned char llparse_blob28[] = { 'C', 'T', 'I', 'V', 'I', 'T', 'Y' }; -static const unsigned char llparse_blob25[] = { +static const unsigned char llparse_blob29[] = { 'L', 'E', 'N', 'D', 'A', 'R' }; -static const unsigned char llparse_blob26[] = { +static const unsigned char llparse_blob30[] = { 'V', 'E' }; -static const unsigned char llparse_blob27[] = { +static const unsigned char llparse_blob31[] = { 'O', 'T', 'I', 'F', 'Y' }; -static const unsigned char llparse_blob28[] = { +static const unsigned char llparse_blob32[] = { 'P', 'T', 'I', 'O', 'N', 'S' }; -static const unsigned char llparse_blob29[] = { +static const unsigned char llparse_blob33[] = { 'T', 'C', 'H' }; -static const unsigned char llparse_blob30[] = { +static const unsigned char llparse_blob34[] = { 'S', 'T' }; -static const unsigned char llparse_blob31[] = { +static const unsigned char llparse_blob35[] = { 'O', 'P' }; -static const unsigned char llparse_blob32[] = { +static const unsigned char llparse_blob36[] = { 'I', 'N', 'D' }; -static const unsigned char llparse_blob33[] = { +static const unsigned char llparse_blob37[] = { 'A', 'T', 'C', 'H' }; -static const unsigned char llparse_blob34[] = { +static const unsigned char llparse_blob38[] = { 'G', 'E' }; -static const unsigned char llparse_blob35[] = { +static const unsigned char llparse_blob39[] = { 'I', 'N', 'D' }; -static const unsigned char llparse_blob36[] = { +static const unsigned char llparse_blob40[] = { 'O', 'R', 'T' }; -static const unsigned char llparse_blob37[] = { +static const unsigned char llparse_blob41[] = { 'A', 'R', 'C', 'H' }; -static const unsigned char llparse_blob38[] = { +static const unsigned char llparse_blob42[] = { 'U', 'R', 'C', 'E' }; -static const unsigned char llparse_blob39[] = { +static const unsigned char llparse_blob43[] = { 'B', 'S', 'C', 'R', 'I', 'B', 'E' }; -static const unsigned char llparse_blob40[] = { +static const unsigned char llparse_blob44[] = { 'R', 'A', 'C', 'E' }; -static const unsigned char llparse_blob41[] = { +static const unsigned char llparse_blob45[] = { 'I', 'N', 'D' }; -static const unsigned char llparse_blob42[] = { +static const unsigned char llparse_blob46[] = { 'N', 'K' }; -static const unsigned char llparse_blob43[] = { +static const unsigned char llparse_blob47[] = { 'C', 'K' }; -static const unsigned char llparse_blob44[] = { +static const unsigned char llparse_blob48[] = { 'U', 'B', 'S', 'C', 'R', 'I', 'B', 'E' }; -static const unsigned char llparse_blob45[] = { +static const unsigned char llparse_blob49[] = { 'H', 'T', 'T', 'P', '/' }; -static const unsigned char llparse_blob46[] = { +static const unsigned char llparse_blob50[] = { 'A', 'D' }; -static const unsigned char llparse_blob47[] = { +static const unsigned char llparse_blob51[] = { 'T', 'P', '/' }; @@ -259,6 +287,7 @@ enum llparse_state_e { s_n_llhttp__internal__n_header_value_discard_ws_almost_done, s_n_llhttp__internal__n_header_value_lws, s_n_llhttp__internal__n_header_value_almost_done, + s_n_llhttp__internal__n_header_value_lenient, s_n_llhttp__internal__n_header_value_otherwise, s_n_llhttp__internal__n_header_value_connection_token, s_n_llhttp__internal__n_header_value_connection_ws, @@ -655,6 +684,13 @@ int llhttp__internal__c_update_header_state_2( return 0; } +int llhttp__internal__c_test_flags_2( + llhttp__internal_t* state, + const unsigned char* p, + const unsigned char* endp) { + return (state->flags & 256) == 256; +} + int llhttp__internal__c_update_header_state_4( llhttp__internal_t* state, const unsigned char* p, @@ -679,7 +715,7 @@ int llhttp__internal__c_update_header_state_6( return 0; } -int llhttp__internal__c_test_flags_2( +int llhttp__internal__c_test_flags_3( llhttp__internal_t* state, const unsigned char* p, const unsigned char* endp) { @@ -1394,6 +1430,26 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } + case s_n_llhttp__internal__n_header_value_lenient: + s_n_llhttp__internal__n_header_value_lenient: { + if (p == endp) { + return s_n_llhttp__internal__n_header_value_lenient; + } + switch (*p) { + case 10: { + goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_1; + } + case 13: { + goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_3; + } + default: { + p++; + goto s_n_llhttp__internal__n_header_value_lenient; + } + } + /* UNREACHABLE */; + abort(); + } case s_n_llhttp__internal__n_header_value_otherwise: s_n_llhttp__internal__n_header_value_otherwise: { if (p == endp) { @@ -1407,7 +1463,7 @@ static llparse_state_t llhttp__internal__run( goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_2; } default: { - goto s_n_llhttp__internal__n_error_13; + goto s_n_llhttp__internal__n_invoke_test_flags_2; } } /* UNREACHABLE */; @@ -1486,7 +1542,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_value_connection_1; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob3, 4); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob4, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1510,7 +1566,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_value_connection_2; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob4, 9); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob5, 9); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1534,7 +1590,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_value_connection_3; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob5, 6); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob6, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1617,6 +1673,30 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_value; } + #ifdef __SSE4_2__ + if (endp - p >= 16) { + __m128i ranges; + __m128i input; + int avail; + int match_len; + + /* Load input */ + input = _mm_loadu_si128((__m128i const*) p); + ranges = _mm_loadu_si128((__m128i const*) llparse_blob7); + + /* Find first character that does not match `ranges` */ + match_len = _mm_cmpestri(ranges, 6, + input, 16, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | + _SIDD_NEGATIVE_POLARITY); + + if (match_len != 0) { + p += match_len; + goto s_n_llhttp__internal__n_header_value; + } + goto s_n_llhttp__internal__n_header_value_otherwise; + } + #endif /* __SSE4_2__ */ switch (lookup_table[(uint8_t) *p]) { case 1: { p++; @@ -1679,7 +1759,7 @@ static llparse_state_t llhttp__internal__run( goto s_n_llhttp__internal__n_header_value_content_length_ws; } default: { - goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_4; + goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_5; } } /* UNREACHABLE */; @@ -1755,7 +1835,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_value_te_chunked_1; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob6, 6); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob8, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1877,6 +1957,42 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_general; } + #ifdef __SSE4_2__ + if (endp - p >= 16) { + __m128i ranges; + __m128i input; + int avail; + int match_len; + + /* Load input */ + input = _mm_loadu_si128((__m128i const*) p); + ranges = _mm_loadu_si128((__m128i const*) llparse_blob9); + + /* Find first character that does not match `ranges` */ + match_len = _mm_cmpestri(ranges, 16, + input, 16, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | + _SIDD_NEGATIVE_POLARITY); + + if (match_len != 0) { + p += match_len; + goto s_n_llhttp__internal__n_header_field_general; + } + ranges = _mm_loadu_si128((__m128i const*) llparse_blob10); + + /* Find first character that does not match `ranges` */ + match_len = _mm_cmpestri(ranges, 2, + input, 16, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | + _SIDD_NEGATIVE_POLARITY); + + if (match_len != 0) { + p += match_len; + goto s_n_llhttp__internal__n_header_field_general; + } + goto s_n_llhttp__internal__n_header_field_general_otherwise; + } + #endif /* __SSE4_2__ */ switch (lookup_table[(uint8_t) *p]) { case 1: { p++; @@ -1916,7 +2032,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_3; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob2, 6); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob3, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1941,7 +2057,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_4; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob7, 10); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob11, 10); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -1987,7 +2103,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_1; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob1, 2); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob2, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2011,7 +2127,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_5; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob8, 15); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob12, 15); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2036,7 +2152,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_6; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob9, 16); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob13, 16); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2061,7 +2177,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_header_field_7; } - match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob10, 6); + match_seq = llparse__match_sequence_to_lower_unsafe(state, p, endp, llparse_blob14, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2156,7 +2272,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_url_skip_lf_to_http09; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob11, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob15, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2361,7 +2477,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_req_http_start_1; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob12, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob16, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2385,7 +2501,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_req_http_start_2; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob13, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob17, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2591,6 +2707,30 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_url_path; } + #ifdef __SSE4_2__ + if (endp - p >= 16) { + __m128i ranges; + __m128i input; + int avail; + int match_len; + + /* Load input */ + input = _mm_loadu_si128((__m128i const*) p); + ranges = _mm_loadu_si128((__m128i const*) llparse_blob1); + + /* Find first character that does not match `ranges` */ + match_len = _mm_cmpestri(ranges, 12, + input, 16, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | + _SIDD_NEGATIVE_POLARITY); + + if (match_len != 0) { + p += match_len; + goto s_n_llhttp__internal__n_url_path; + } + goto s_n_llhttp__internal__n_url_query_or_fragment; + } + #endif /* __SSE4_2__ */ switch (lookup_table[(uint8_t) *p]) { case 1: { p++; @@ -2970,7 +3110,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_2; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob14, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob18, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -2995,7 +3135,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_4; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob15, 6); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob19, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3020,7 +3160,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_6; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob16, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob20, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3105,7 +3245,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_8; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob17, 5); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob21, 5); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3130,7 +3270,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_9; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob18, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob22, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3155,7 +3295,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_10; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob19, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob23, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3180,7 +3320,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_12; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob20, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob24, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3205,7 +3345,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_13; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob21, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob25, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3251,7 +3391,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_15; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob22, 6); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob26, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3276,7 +3416,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_16; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob23, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob27, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3301,7 +3441,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_18; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob24, 7); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob28, 7); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3326,7 +3466,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_20; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob25, 6); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob29, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3411,7 +3551,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_22; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob26, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob30, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3465,7 +3605,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_23; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob27, 5); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob31, 5); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3490,7 +3630,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_24; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob28, 6); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob32, 6); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3515,7 +3655,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_26; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob29, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob33, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3540,7 +3680,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_27; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob30, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob34, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3565,7 +3705,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_30; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob32, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob36, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3590,7 +3730,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_31; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob33, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob37, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3636,7 +3776,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_28; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob31, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob35, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3660,7 +3800,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_33; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob34, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob38, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3736,7 +3876,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_36; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob35, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob39, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3761,7 +3901,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_37; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob36, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob40, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3824,7 +3964,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_39; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob37, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob41, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3849,7 +3989,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_40; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob38, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob42, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3874,7 +4014,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_41; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob39, 7); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob43, 7); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3924,7 +4064,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_42; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob40, 4); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob44, 4); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3949,7 +4089,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_45; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob41, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob45, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3974,7 +4114,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_47; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob42, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob46, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -3999,7 +4139,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_48; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob43, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob47, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -4045,7 +4185,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_req_49; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob44, 8); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob48, 8); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -4493,7 +4633,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_start_res; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob45, 5); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob49, 5); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -4517,7 +4657,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_req_or_res_method_2; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob46, 2); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob50, 2); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -4542,7 +4682,7 @@ static llparse_state_t llhttp__internal__run( if (p == endp) { return s_n_llhttp__internal__n_req_or_res_method_3; } - match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob47, 3); + match_seq = llparse__match_sequence_id(state, p, endp, llparse_blob51, 3); p = match_seq.current; switch (match_seq.status) { case kMatchComplete: { @@ -5191,6 +5331,24 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } + s_n_llhttp__internal__n_span_end_llhttp__on_header_value_3: { + const unsigned char* start; + int err; + + start = state->_span_pos0; + state->_span_pos0 = NULL; + err = llhttp__on_header_value(state, start, p); + if (err != 0) { + state->error = err; + state->error_pos = (const char*) (p + 1); + state->_current = (void*) (intptr_t) s_n_llhttp__internal__n_header_value_almost_done; + return s_error; + } + p++; + goto s_n_llhttp__internal__n_header_value_almost_done; + /* UNREACHABLE */; + abort(); + } s_n_llhttp__internal__n_error_13: { state->error = 0xa; state->reason = "Invalid header value char"; @@ -5200,6 +5358,16 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } + s_n_llhttp__internal__n_invoke_test_flags_2: { + switch (llhttp__internal__c_test_flags_2(state, p, endp)) { + case 1: + goto s_n_llhttp__internal__n_header_value_lenient; + default: + goto s_n_llhttp__internal__n_error_13; + } + /* UNREACHABLE */; + abort(); + } s_n_llhttp__internal__n_invoke_update_header_state_3: { switch (llhttp__internal__c_update_header_state(state, p, endp)) { default: @@ -5288,7 +5456,7 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } - s_n_llhttp__internal__n_span_end_llhttp__on_header_value_3: { + s_n_llhttp__internal__n_span_end_llhttp__on_header_value_4: { const unsigned char* start; int err; @@ -5308,7 +5476,7 @@ static llparse_state_t llhttp__internal__run( s_n_llhttp__internal__n_invoke_mul_add_content_length_1: { switch (llhttp__internal__c_mul_add_content_length_1(state, p, endp, match)) { case 1: - goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_3; + goto s_n_llhttp__internal__n_span_end_llhttp__on_header_value_4; default: goto s_n_llhttp__internal__n_header_value_content_length; } @@ -5331,7 +5499,7 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } - s_n_llhttp__internal__n_span_end_llhttp__on_header_value_4: { + s_n_llhttp__internal__n_span_end_llhttp__on_header_value_5: { const unsigned char* start; int err; @@ -5357,8 +5525,8 @@ static llparse_state_t llhttp__internal__run( /* UNREACHABLE */; abort(); } - s_n_llhttp__internal__n_invoke_test_flags_2: { - switch (llhttp__internal__c_test_flags_2(state, p, endp)) { + s_n_llhttp__internal__n_invoke_test_flags_3: { + switch (llhttp__internal__c_test_flags_3(state, p, endp)) { case 0: goto s_n_llhttp__internal__n_header_value_content_length; default: @@ -5388,7 +5556,7 @@ static llparse_state_t llhttp__internal__run( case 1: goto s_n_llhttp__internal__n_header_value_connection; case 2: - goto s_n_llhttp__internal__n_invoke_test_flags_2; + goto s_n_llhttp__internal__n_invoke_test_flags_3; case 3: goto s_n_llhttp__internal__n_header_value_te_chunked; case 4: