From 2031b266429ff58a600e106e8861ad2c8905dfca Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Mon, 9 Jul 2018 10:59:10 +0300 Subject: [PATCH 1/4] Fix #1033: Change header name format in HTTP tables configuration (#1033). --- etc/tempesta_fw.conf | 32 ++---- tempesta_fw/cfg.c | 45 +++++++- tempesta_fw/cfg.h | 1 + tempesta_fw/http_match.c | 156 ++++++++++++++++---------- tempesta_fw/http_match.h | 23 ++-- tempesta_fw/http_tbl.c | 53 ++++----- tempesta_fw/t/unit/test_http_match.c | 162 ++++++++++++++++++++------- tempesta_fw/t/unit/test_http_tbl.c | 24 ++-- 8 files changed, 308 insertions(+), 188 deletions(-) diff --git a/etc/tempesta_fw.conf b/etc/tempesta_fw.conf index 1707ef8284..2d0d96578b 100644 --- a/etc/tempesta_fw.conf +++ b/etc/tempesta_fw.conf @@ -242,7 +242,7 @@ # # Syntax: # http_chain { -# [ FIELD == (!=) ARG ] -> ACTION [ = VAL]; +# [ FIELD [HDR_NAME] == (!=) ARG ] -> ACTION [ = VAL]; # ... # } # @@ -259,28 +259,14 @@ # HTTP request method. Supported ARG values for this field are: "copy", # "delete", "get", "head", "lock", "mkcol", "move", "options", "patch", # "post", "propfind", "proppatch", "put", "trace", "unlock", "purge". -# - hdr_host -# The value of the "Host" header field. -# - hdr_conn -# The value of the "Connection" header field. -# - hdr_ctype -# The value of the "Content-Type" header field. -# - hdr_uagent -# The value of the "User-Agent" header field. -# - hdr_cookie -# The value of the "Cookie" header field. -# - hdr_ref -# The value of the "Referer" header field. -# - hdr_nmatch -# The value of the "If-None-Match" header field. -# - hdr_xfrwd -# The value of the "X-Forwarded-For" header field. -# - hdr_raw -# The contents of any other HTTP header field as specified by ARG. -# ARG must include contents of an HTTP header starting with the header -# field name. The suffix OP is not supported for this FIELD. Processing -# of hdr_raw may be slow as it requires walking over all headers of an -# HTTP request. +# - hdr +# The content of specific HTTP header. In this case HDR_NAME field is +# used to specify the name of header; the value of header should be +# specified in ARG. Matching of special headers: "X-Forwarded-For", +# "If-None-Match", "Referer", "Cookie", "User-Agent", "Content-Type", +# "Connection", "Host" - is accelerated; proccessing of other headers +# may be slow as it requires walking over all headers of an HTTP +# request. Also, suffix OP is not supported for not special headers. # - mark # The value of netfilter mark of request's skb. # diff --git a/tempesta_fw/cfg.c b/tempesta_fw/cfg.c index 763f7149f7..d601ce38fa 100644 --- a/tempesta_fw/cfg.c +++ b/tempesta_fw/cfg.c @@ -200,6 +200,7 @@ static inline void rule_reset(TfwCfgRule *rule) { kfree(rule->fst); + kfree(rule->fst_ext); kfree(rule->snd); kfree(rule->act); kfree(rule->val); @@ -770,20 +771,23 @@ parse_cfg_entry(TfwCfgParserState *ps) /* Every _PFSM_MOVE() invokes _read_next_token(), so when we enter * any state, we get a new token automatically. - * Three different situations may occur here: + * Four different situations may occur here: * 1. In case of plain directive parsing: * name key = value; * ^ * 2. In case of rule parsing: * key == (!=) value -> action [= val] * ^ - * 3. In case of parsing of pure action rule: + * 3. In case of extended rule parsing: + * key key_ext == (!=) value -> action [= val] + * ^ + * 4. In case of parsing of pure action rule: * -> action [= val] * ^ - * current token is here; so at first we need to differentiate third - * situation, and in first two ones - save first token in special location + * current token is here; so at first we need to differentiate fourth + * situation, and in first three ones - save first token in special location * to decide later whether use it as name for plain directive or as - * condition key for rule; in last two cases predefined rule name is used. + * condition key for rule; in last three cases predefined rule name is used. */ FSM_STATE(PS_START_NEW_ENTRY) { entry_reset(&ps->e); @@ -802,11 +806,42 @@ parse_cfg_entry(TfwCfgParserState *ps) PFSM_COND_MOVE(ps->t == TOKEN_DEQSIGN || ps->t == TOKEN_NEQSIGN, PS_RULE_COND); + PFSM_COND_MOVE(ps->t == TOKEN_LITERAL, PS_PLAIN_OR_LONG_RULE); + + /* Jump to plain val/attr scheme to make remained checks + * for left brace and semicolon. */ ps->err = entry_set_name(&ps->e); FSM_COND_JMP(ps->err, PS_EXIT); FSM_JMP(PS_VAL_OR_ATTR); } + FSM_STATE(PS_PLAIN_OR_LONG_RULE) { + FSM_COND_JMP(ps->t == TOKEN_DEQSIGN || + ps->t == TOKEN_NEQSIGN, + PS_LONG_RULE_COND); + + /* This is not rule (simple or extended), so jump to + * plain val/attr scheme. */ + ps->err = entry_set_name(&ps->e); + FSM_COND_JMP(ps->err, PS_EXIT); + FSM_COND_JMP(ps->t == TOKEN_EQSIGN, PS_STORE_ATTR_PREV); + FSM_COND_JMP(ps->t == TOKEN_LITERAL || + ps->t == TOKEN_SEMICOLON || + ps->t == TOKEN_LBRACE, + PS_STORE_VAL_PREV); + + ps->err = -EINVAL; + FSM_JMP(PS_EXIT); + } + + FSM_STATE(PS_LONG_RULE_COND) { + ps->err = entry_add_rule_param(&ps->e.rule.fst_ext, + ps->prev_lit, + ps->prev_lit_len); + FSM_COND_JMP(ps->err, PS_EXIT); + PFSM_MOVE(PS_RULE_COND); + } + FSM_STATE(PS_RULE_COND) { PFSM_COND_JMP_EXIT_ERROR(ps->t != TOKEN_LITERAL); ps->err = entry_set_cond(&ps->e, ps->prev_t, ps->lit, diff --git a/tempesta_fw/cfg.h b/tempesta_fw/cfg.h index 8bbfb80421..64cba7fd7b 100644 --- a/tempesta_fw/cfg.h +++ b/tempesta_fw/cfg.h @@ -125,6 +125,7 @@ */ typedef struct { const char *fst; + const char *fst_ext; const char *snd; const char *act; const char *val; diff --git a/tempesta_fw/http_match.c b/tempesta_fw/http_match.c index 75167086a5..66e5def7e8 100644 --- a/tempesta_fw/http_match.c +++ b/tempesta_fw/http_match.c @@ -4,7 +4,7 @@ * HTTP table logic. * * The matching process is driven by a "chain" of rules that look like this: - * @field == (!=) @arg -> @action [ = @action_val ] + * @field [ @hdr_name ] == (!=) @arg -> @action [ = @action_val ] * { TFW_HTTP_MATCH_F_HOST, "*example.com", TFW_HTTP_MATCH_ACT_CHAIN }, * { TFW_HTTP_MATCH_F_URI, "/foo/bar*", TFW_HTTP_MATCH_ACT_VHOST }, * { TFW_HTTP_MATCH_F_URI, "/", TFW_HTTP_MATCH_ACT_MARK }, @@ -12,7 +12,8 @@ * The table is represented by a list of linked chains, that contain rules * of TfwHttpMatchRule type that has the fields described above: * - @field is the first argument in rule - the field of a parsed HTTP request: - * method/uri/host/header/etc. + * method/uri/host/header/etc; @hdr_name is used only in cases when + * @field == 'hdr', to specify the name of desired header. * - @arg is the second argument in rule, its type is determined dynamically * depending on the @field (may be number/string/addr/etc); comparison * operator for @field and @arg depends on "==" ("!=") sign and on wildcard @@ -183,33 +184,6 @@ match_host(const TfwHttpReq *req, const TfwHttpMatchRule *rule) return tfw_str_eq_cstr(host, arg->str, arg->len, flags); } -static bool -match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule) -{ - static const tfw_http_hdr_t id_tbl[] = { - [0 ... _TFW_HTTP_MATCH_F_COUNT] = -1, - [TFW_HTTP_MATCH_F_HDR_CONN] = TFW_HTTP_HDR_CONNECTION, - [TFW_HTTP_MATCH_F_HDR_HOST] = TFW_HTTP_HDR_HOST, - [TFW_HTTP_MATCH_F_HDR_CTYPE] = TFW_HTTP_HDR_CONTENT_TYPE, - [TFW_HTTP_MATCH_F_HDR_UAGENT] = TFW_HTTP_HDR_USER_AGENT, - [TFW_HTTP_MATCH_F_HDR_COOKIE] = TFW_HTTP_HDR_COOKIE, - [TFW_HTTP_MATCH_F_HDR_REFERER] = TFW_HTTP_HDR_REFERER, - [TFW_HTTP_MATCH_F_HDR_NMATCH] = TFW_HTTP_HDR_IF_NONE_MATCH, - [TFW_HTTP_MATCH_F_HDR_XFRWD] = TFW_HTTP_HDR_X_FORWARDED_FOR, - }; - - const TfwHttpMatchArg *arg = &rule->arg; - tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op); - tfw_http_hdr_t id = id_tbl[rule->field]; - BUG_ON(id < 0); - - /* There is no general rule, but most headers are case-insensitive. - * TODO: case-sensitive matching for headers when required by RFC. */ - flags |= TFW_STR_EQ_CASEI; - - return hdr_val_eq(req, id, rule->op, arg->str, arg->len, flags); -} - #define _MOVE_TO_COND(p, end, cond) \ while ((p) < (end) && !(cond)) \ (p)++; @@ -223,7 +197,7 @@ match_hdr_raw(const TfwHttpReq *req, const TfwHttpMatchRule *rule) int i; tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op); - for (i = 0; i < req->h_tbl->off; ++i) { + for (i = TFW_HTTP_HDR_RAW; i < req->h_tbl->off; ++i) { const TfwStr *hdr, *dup, *end, *chunk; const char *c, *cend, *p, *pend; char prev; @@ -335,6 +309,27 @@ match_hdr_raw(const TfwHttpReq *req, const TfwHttpMatchRule *rule) return false; } +static bool +match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule) +{ + const TfwHttpMatchArg *arg; + tfw_str_eq_flags_t flags; + tfw_http_hdr_t id = rule->hid; + + BUG_ON(id < 0); + if (id == TFW_HTTP_HDR_RAW) + return match_hdr_raw(req, rule); + + arg = &rule->arg; + flags = map_op_to_str_eq_flags(rule->op); + + /* There is no general rule, but most headers are case-insensitive. + * TODO: case-sensitive matching for headers when required by RFC. */ + flags |= TFW_STR_EQ_CASEI; + + return hdr_val_eq(req, id, rule->op, arg->str, arg->len, flags); +} + static bool match_wildcard(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { @@ -356,15 +351,7 @@ typedef bool (*match_fn)(const TfwHttpReq *, const TfwHttpMatchRule *); static const match_fn match_fn_tbl[_TFW_HTTP_MATCH_F_COUNT] = { [TFW_HTTP_MATCH_F_WILDCARD] = match_wildcard, - [TFW_HTTP_MATCH_F_HDR_CONN] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_HOST] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_CTYPE] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_UAGENT] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_COOKIE] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_REFERER] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_NMATCH] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_XFRWD] = match_hdr, - [TFW_HTTP_MATCH_F_HDR_RAW] = match_hdr_raw, + [TFW_HTTP_MATCH_F_HDR] = match_hdr, [TFW_HTTP_MATCH_F_HOST] = match_host, [TFW_HTTP_MATCH_F_METHOD] = match_method, [TFW_HTTP_MATCH_F_URI] = match_uri, @@ -509,18 +496,12 @@ tfw_http_rule_new(TfwHttpChain *chain, tfw_http_match_arg_t type, EXPORT_SYMBOL(tfw_http_rule_new); int -tfw_http_rule_init(TfwHttpMatchRule *rule, tfw_http_match_fld_t field, - tfw_http_match_op_t op, tfw_http_match_arg_t type, - const char *arg, size_t arg_len) +tfw_http_rule_arg_init(TfwHttpMatchRule *rule, const char *arg, size_t arg_len) { - rule->field = field; - rule->op = op; - rule->arg.type = type; - - if (type == TFW_HTTP_MATCH_A_WILDCARD) + if (rule->arg.type == TFW_HTTP_MATCH_A_WILDCARD) return 0; - if (type == TFW_HTTP_MATCH_A_NUM) { + if (rule->arg.type == TFW_HTTP_MATCH_A_NUM) { if (tfw_cfg_parse_uint(arg, &rule->arg.num)) { TFW_ERR_NL("http_match: invalid 'mark' condition:" " '%s'\n", arg); @@ -530,7 +511,7 @@ tfw_http_rule_init(TfwHttpMatchRule *rule, tfw_http_match_fld_t field, return 0; } - if (type == TFW_HTTP_MATCH_A_METHOD) { + if (rule->arg.type == TFW_HTTP_MATCH_A_METHOD) { if (tfw_http_tbl_method(arg, &rule->arg.method)) { TFW_ERR_NL("http_tbl: invalid 'method' condition:" " '%s'\n", arg); @@ -542,7 +523,9 @@ tfw_http_rule_init(TfwHttpMatchRule *rule, tfw_http_match_fld_t field, rule->arg.len = arg_len; memcpy(rule->arg.str, arg, arg_len); - if (field == TFW_HTTP_MATCH_F_HDR_RAW) { + if (rule->field == TFW_HTTP_MATCH_F_HDR + && rule->hid == TFW_HTTP_HDR_RAW) + { char *p = rule->arg.str; while ((*p = tolower(*p))) p++; @@ -550,38 +533,56 @@ tfw_http_rule_init(TfwHttpMatchRule *rule, tfw_http_match_fld_t field, return 0; } -EXPORT_SYMBOL(tfw_http_rule_init); +EXPORT_SYMBOL(tfw_http_rule_arg_init); const char * -tfw_http_arg_adjust(const char *arg, size_t len, size_t *size_out, - tfw_http_match_op_t *op_out) +tfw_http_arg_adjust(const char *arg, size_t len, const char *h_name, + size_t *size_out, tfw_http_match_op_t *op_out) { + int i; bool escaped; char *arg_out, *pos; - int i; + size_t name_len = 0, full_name_len = 0; + + if (h_name) { + name_len = strlen(h_name); + full_name_len = name_len + SLEN(S_DLM); + } - if (!(arg_out = kzalloc(len + 1, GFP_KERNEL))) { - TFW_ERR_NL("HTTP tables: unable to allocate rule" + if (!(arg_out = kzalloc(full_name_len + len + 1, GFP_KERNEL))) { + TFW_ERR_NL("http_match: unable to allocate rule" " argument.\n"); return NULL; } + if (h_name) { + memcpy(arg_out, h_name, name_len); + memcpy(arg_out + name_len, S_DLM, SLEN(S_DLM)); + } + *op_out = TFW_HTTP_MATCH_O_EQ; if (len > 1 && arg[len - 1] == '*' && arg[len - 2] != '\\') *op_out = TFW_HTTP_MATCH_O_PREFIX; if (arg[0] == '*') { if (*op_out == TFW_HTTP_MATCH_O_PREFIX) - TFW_WARN_NL("HTTP tables: unable to match" + TFW_WARN_NL("http_match: unable to match" " double-wildcard patterns '%s', so" - " prefix pattern is applied\n", arg); + " prefix pattern will be applied\n", arg); + + else if (h_name) + TFW_WARN_NL("http_match: unable to match suffix" + " pattern '%s' in case of raw header" + " specification: '%s', so wildcard pattern" + " will not be applied\n", arg, h_name); + else *op_out = TFW_HTTP_MATCH_O_SUFFIX; } - len = 0; + len = full_name_len; escaped = false; - pos = arg_out; + pos = arg_out + full_name_len; for (i = 0; arg[i]; ++i) { if (arg[i] == '*' && !escaped && (i == 0 || !arg[i + 1])) continue; @@ -600,3 +601,38 @@ tfw_http_arg_adjust(const char *arg, size_t len, size_t *size_out, return arg_out; } EXPORT_SYMBOL(tfw_http_arg_adjust); + +int +tfw_http_verify_hdr_field(tfw_http_match_fld_t field, const char **hdr_name, + unsigned int *hid_out) +{ + const char *h_name = *hdr_name; + + if (field != TFW_HTTP_MATCH_F_HDR && h_name) { + TFW_ERR_NL("http_tbl: unnecessary extra field is specified:" + " '%s'\n", h_name); + return -EINVAL; + } else if (field == TFW_HTTP_MATCH_F_HDR && !h_name) { + TFW_ERR_NL("http_tbl: header name missed\n"); + return -EINVAL; + } else if (h_name) { + size_t h_len = strlen(h_name); + const TfwStr tmp_hdr = { + .ptr = (TfwStr []){ + { .ptr = (void *)h_name, .len = h_len }, + { .ptr = S_DLM, .len = SLEN(S_DLM) } + }, + .len = h_len + SLEN(S_DLM), + .eolen = 0, + .flags = 2 << TFW_STR_CN_SHIFT + }; + + *hid_out = tfw_http_msg_req_spec_hid(&tmp_hdr); + + if (*hid_out != TFW_HTTP_HDR_RAW) + *hdr_name = NULL; + } + + return 0; +} +EXPORT_SYMBOL(tfw_http_verify_hdr_field); diff --git a/tempesta_fw/http_match.h b/tempesta_fw/http_match.h index 8c83edb2b5..d0f776a868 100644 --- a/tempesta_fw/http_match.h +++ b/tempesta_fw/http_match.h @@ -30,15 +30,7 @@ typedef enum { TFW_HTTP_MATCH_F_NA = 0, TFW_HTTP_MATCH_F_WILDCARD, - TFW_HTTP_MATCH_F_HDR_CONN, - TFW_HTTP_MATCH_F_HDR_HOST, - TFW_HTTP_MATCH_F_HDR_CTYPE, - TFW_HTTP_MATCH_F_HDR_UAGENT, - TFW_HTTP_MATCH_F_HDR_COOKIE, - TFW_HTTP_MATCH_F_HDR_REFERER, - TFW_HTTP_MATCH_F_HDR_NMATCH, - TFW_HTTP_MATCH_F_HDR_XFRWD, - TFW_HTTP_MATCH_F_HDR_RAW, + TFW_HTTP_MATCH_F_HDR, TFW_HTTP_MATCH_F_HOST, TFW_HTTP_MATCH_F_METHOD, TFW_HTTP_MATCH_F_URI, @@ -101,6 +93,7 @@ typedef struct { tfw_http_match_fld_t field; /* Field of a HTTP message to compare. */ tfw_http_match_op_t op; /* Comparison operator. */ TfwHttpAction act; /* Rule action. */ + unsigned int hid; /* Header ID. */ unsigned int inv; /* Comparison inversion (inequality) flag.*/ TfwHttpMatchArg arg; /* A value to be compared with the field. note: the @arg has variable length. */ @@ -131,12 +124,12 @@ TfwHttpMatchRule *tfw_http_rule_new(TfwHttpChain *chain, tfw_http_match_arg_t type, size_t arg_len); -int tfw_http_rule_init(TfwHttpMatchRule *rule, tfw_http_match_fld_t field, - tfw_http_match_op_t op, tfw_http_match_arg_t type, - const char *arg, size_t arg_len ); - -const char *tfw_http_arg_adjust(const char *arg, size_t len, size_t *size_out, - tfw_http_match_op_t *op_out); +int tfw_http_rule_arg_init(TfwHttpMatchRule *rule, const char *arg, + size_t arg_len); +const char *tfw_http_arg_adjust(const char *arg, size_t len, const char *h_name, + size_t *size_out, tfw_http_match_op_t *op_out); +int tfw_http_verify_hdr_field(tfw_http_match_fld_t field, const char **h_name, + unsigned int *hid_out); #define tfw_http_chain_rules_for_each(chain, func) \ ({ \ diff --git a/tempesta_fw/http_tbl.c b/tempesta_fw/http_tbl.c index 293a7b3204..9d793995f1 100644 --- a/tempesta_fw/http_tbl.c +++ b/tempesta_fw/http_tbl.c @@ -191,16 +191,8 @@ tfw_http_tbl_vhost(TfwMsg *msg, bool *block) static const TfwCfgEnum tfw_http_tbl_cfg_field_enum[] = { { "uri", TFW_HTTP_MATCH_F_URI }, { "host", TFW_HTTP_MATCH_F_HOST }, - { "hdr_host", TFW_HTTP_MATCH_F_HDR_HOST }, - { "hdr_conn", TFW_HTTP_MATCH_F_HDR_CONN }, - { "hdr_ctype", TFW_HTTP_MATCH_F_HDR_CTYPE }, - { "hdr_uagent", TFW_HTTP_MATCH_F_HDR_UAGENT }, - { "hdr_cookie", TFW_HTTP_MATCH_F_HDR_COOKIE }, - { "hdr_ref", TFW_HTTP_MATCH_F_HDR_REFERER }, - { "hdr_nmatch", TFW_HTTP_MATCH_F_HDR_NMATCH }, - { "hdr_xfrwd", TFW_HTTP_MATCH_F_HDR_XFRWD }, + { "hdr", TFW_HTTP_MATCH_F_HDR }, { "mark", TFW_HTTP_MATCH_F_MARK }, - { "hdr_raw", TFW_HTTP_MATCH_F_HDR_RAW }, { "method", TFW_HTTP_MATCH_F_METHOD }, { 0 } }; @@ -228,15 +220,7 @@ static const TfwCfgEnum tfw_http_tbl_cfg_method_enum[] = { static const tfw_http_match_arg_t tfw_http_tbl_cfg_arg_types[_TFW_HTTP_MATCH_F_COUNT] = { [TFW_HTTP_MATCH_F_WILDCARD] = TFW_HTTP_MATCH_A_WILDCARD, - [TFW_HTTP_MATCH_F_HDR_CONN] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_HOST] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_CTYPE] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_UAGENT] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_COOKIE] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_REFERER] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_NMATCH] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_XFRWD] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HDR_RAW] = TFW_HTTP_MATCH_A_STR, + [TFW_HTTP_MATCH_F_HDR] = TFW_HTTP_MATCH_A_STR, [TFW_HTTP_MATCH_F_HOST] = TFW_HTTP_MATCH_A_STR, [TFW_HTTP_MATCH_F_METHOD] = TFW_HTTP_MATCH_A_METHOD, [TFW_HTTP_MATCH_F_URI] = TFW_HTTP_MATCH_A_STR, @@ -389,14 +373,14 @@ static int tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) { int r; - unsigned int invert; TfwHttpMatchRule *rule; - const char *in_field, *action, *action_val, *in_arg, *arg = NULL; + const char *in_field, *hdr, *action, *action_val, *in_arg, *arg = NULL; + unsigned int invert, hid = TFW_HTTP_HDR_RAW; tfw_http_match_op_t op = TFW_HTTP_MATCH_O_WILDCARD; tfw_http_match_fld_t field = TFW_HTTP_MATCH_F_WILDCARD; tfw_http_match_arg_t type = TFW_HTTP_MATCH_A_WILDCARD; TfwCfgRule *cfg_rule = &e->rule; - size_t len, arg_size = 0; + size_t len = 0, arg_size = 0; TfwHttpChain *chain = NULL; TfwVhost *vhost = NULL; @@ -410,6 +394,7 @@ tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) invert = cfg_rule->inv; in_field = cfg_rule->fst; + hdr = cfg_rule->fst_ext; in_arg = cfg_rule->snd; if (in_arg) len = strlen(in_arg); @@ -434,10 +419,16 @@ tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) in_field); return r; } - type = tfw_http_tbl_cfg_arg_types[field]; - if (!(arg = tfw_http_arg_adjust(in_arg, len, &arg_size, &op))) + if ((r = tfw_http_verify_hdr_field(field, &hdr, &hid))) + return r; + + arg = tfw_http_arg_adjust(in_arg, len, hdr, &arg_size, &op); + if (!arg) return -ENOMEM; + + type = tfw_http_tbl_cfg_arg_types[field]; } + rule = tfw_http_rule_new(tfw_chain_entry, type, arg_size); if (!rule) { TFW_ERR_NL("http_tbl: can't allocate memory for rule\n"); @@ -448,9 +439,12 @@ tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) && type != TFW_HTTP_MATCH_A_NUM && type != TFW_HTTP_MATCH_A_METHOD && type != TFW_HTTP_MATCH_A_WILDCARD); + rule->hid = hid; rule->inv = invert; - r = tfw_http_rule_init(rule, field, op, type, arg, arg_size - 1); - if (r) + rule->field = field; + rule->op = op; + rule->arg.type = type; + if ((r = tfw_http_rule_arg_init(rule, arg, arg_size - 1))) goto err; kfree(arg); } @@ -578,13 +572,10 @@ tfw_http_tbl_cfgend(void) r = -ENOMEM; goto err; } - r = tfw_http_rule_init(rule, TFW_HTTP_MATCH_F_WILDCARD, - TFW_HTTP_MATCH_O_WILDCARD, - TFW_HTTP_MATCH_A_WILDCARD, - NULL, 0); - if (r) - goto err; + rule->op = TFW_HTTP_MATCH_O_WILDCARD; + rule->field = TFW_HTTP_MATCH_F_WILDCARD; + rule->arg.type = TFW_HTTP_MATCH_A_WILDCARD; rule->act.type = TFW_HTTP_MATCH_ACT_VHOST; rule->act.vhost = vhost_dflt; diff --git a/tempesta_fw/t/unit/test_http_match.c b/tempesta_fw/t/unit/test_http_match.c index bcd6b71015..42cd4d3c6e 100644 --- a/tempesta_fw/t/unit/test_http_match.c +++ b/tempesta_fw/t/unit/test_http_match.c @@ -106,9 +106,11 @@ http_match_suite_teardown(void) } static void -test_chain_add_rule_str(int test_id, tfw_http_match_fld_t field, const char *in_arg) +test_chain_add_rule_str(int test_id, tfw_http_match_fld_t field, + const char *hdr, const char *in_arg) { MatchEntry *e; + unsigned int hid; tfw_http_match_op_t op; size_t len, arg_size; const char *arg; @@ -116,12 +118,17 @@ test_chain_add_rule_str(int test_id, tfw_http_match_fld_t field, const char *in_ len = strlen(in_arg); BUG_ON(field == TFW_HTTP_MATCH_F_WILDCARD); BUG_ON(in_arg[0] == '*' && len == 1); - arg = tfw_http_arg_adjust(in_arg, len, &arg_size, &op); + + tfw_http_verify_hdr_field(field, &hdr, &hid); + arg = tfw_http_arg_adjust(in_arg, len, hdr, &arg_size, &op); e = test_rule_container_new(test_chain, MatchEntry, rule, TFW_HTTP_MATCH_A_STR, arg_size); - tfw_http_rule_init(&e->rule, field, op, TFW_HTTP_MATCH_A_STR, - arg, arg_size - 1); + e->rule.hid = hid; + e->rule.field = field; + e->rule.op = op; + e->rule.arg.type = TFW_HTTP_MATCH_A_STR; + tfw_http_rule_arg_init(&e->rule, arg, arg_size - 1); /* Just dummy action type to avoid BUG_ON in 'do_eval()'. */ e->rule.act.type = TFW_HTTP_MATCH_ACT_CHAIN; e->test_id = test_id; @@ -148,6 +155,23 @@ set_tfw_str(TfwStr *str, const char *cstr) str->len = strlen(cstr); } +static void +set_raw_hdr(const char *cstr) +{ + unsigned int hid; + TfwHttpHdrTbl *h_tbl = test_req->h_tbl; + + hid = h_tbl->off; + + if (hid == h_tbl->size && + tfw_http_msg_grow_hdr_tbl((TfwHttpMsg *)test_req)) + return; + + ++h_tbl->off; + + set_tfw_str(&test_req->h_tbl->tbl[hid], cstr); +} + TEST(tfw_http_match_req, returns_first_matching_rule) { const TfwHttpMatchRule *match; @@ -176,9 +200,9 @@ TEST(http_match, uri_prefix) { int match_id; - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, "/foo/bar/baz*"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, "/foo/ba*"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, "/*"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, NULL, "/foo/bar/baz*"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, NULL, "/foo/ba*"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, NULL, "/*"); set_tfw_str(&test_req->uri_path, "/foo/bar/baz.html"); match_id = test_chain_match(); @@ -201,9 +225,10 @@ TEST(http_match, uri_suffix) { int match_id; - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, "*.jpg"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, "*/people.html"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, "*/bar/folks.html"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, NULL, "*.jpg"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, NULL, "*/people.html"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, NULL, + "*/bar/folks.html"); set_tfw_str(&test_req->uri_path, "/foo/bar/picture.jpg"); match_id = test_chain_match(); @@ -230,9 +255,12 @@ TEST(http_match, uri_wc_escaped) { int match_id; - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, "\\*/foo/bar"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, "/foo/\\*people*"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, "*/foo\\*/bar\\*/index.html\\*"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_URI, NULL, + "\\*/foo/bar"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_URI, NULL, + "/foo/\\*people*"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_URI, NULL, + "*/foo\\*/bar\\*/index.html\\*"); set_tfw_str(&test_req->uri_path, "*/foo/bar"); match_id = test_chain_match(); @@ -251,9 +279,12 @@ TEST(http_match, host_eq) { int match_id; - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HOST, "www.natsys-lab.com"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HOST, "natsys-lab"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HOST, "NATSYS-LAB.COM"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HOST, NULL, + "www.natsys-lab.com"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HOST, NULL, + "natsys-lab"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HOST, NULL, + "NATSYS-LAB.COM"); set_tfw_str(&test_req->host, "natsys-lab.com"); match_id = test_chain_match(); EXPECT_EQ(3, match_id); @@ -261,24 +292,32 @@ TEST(http_match, host_eq) TEST(http_match, headers_eq) { - int match_id; + create_str_pool(); - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR_RAW, - "User-Agent: U880D/4.0 (CP/M; 8-bit)"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR_RAW, - "Connection: close"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR_RAW, - "Connection: Keep-Alive"); + { + int match_id; - set_tfw_str(&test_req->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION], - "Connection: Keep-Alive"); - match_id = test_chain_match(); - EXPECT_EQ(3, match_id); + /* Special headers must be compound */ + TFW_STR2(hdr1, "Connection: ", "Keep-Alive"); + TFW_STR2(hdr2, "Connection: ", "cLoSe"); - set_tfw_str(&test_req->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION], - "Connection: cLoSe"); - match_id = test_chain_match(); - EXPECT_EQ(2, match_id); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR, + "User-Agent", "U880D/4.0 (CP/M; 8-bit)"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR, + "Connection", "close"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR, + "Connection", "Keep-Alive"); + + test_req->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION] = *hdr1; + match_id = test_chain_match(); + EXPECT_EQ(3, match_id); + + test_req->h_tbl->tbl[TFW_HTTP_HDR_CONNECTION] = *hdr2; + match_id = test_chain_match(); + EXPECT_EQ(2, match_id); + } + + free_all_str(); } TEST(http_match, hdr_host_prefix) @@ -294,11 +333,12 @@ TEST(http_match, hdr_host_prefix) TFW_STR2(hdr3, "Host: ", "www"); TFW_STR2(hdr4, "Host: ", "WWW.EXAMPLE.COM:8081"); - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR_CONN, - "Connection: Keep-Alive"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR_HOST, "ex*"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR_HOST, - "www.example.com*"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR, + "Connection", " Keep-Alive"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR, + "Host", "ex*"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR, + "Host", "www.example.com*"); set_tfw_str(&test_req->host, "example.com"); match_id = test_chain_match(); @@ -339,12 +379,16 @@ TEST(http_match, hdr_host_suffix) TFW_STR2(hdr5, "Host: ", "www"); TFW_STR2(hdr6, "Host: ", "TEST.FOLKS.COM"); - test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR_CONN, - "Connection: Keep-Alive"); - test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR_HOST, "*.ru"); - test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR_HOST, "*.biz"); - test_chain_add_rule_str(4, TFW_HTTP_MATCH_F_HDR_HOST, "*.folks.com"); - test_chain_add_rule_str(5, TFW_HTTP_MATCH_F_HDR_HOST, "*.com"); + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR, + "Connection", " Keep-Alive"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR, + "Host", "*.ru"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR, + "Host", "*.biz"); + test_chain_add_rule_str(4, TFW_HTTP_MATCH_F_HDR, + "Host", "*.folks.com"); + test_chain_add_rule_str(5, TFW_HTTP_MATCH_F_HDR, + "Host", "*.com"); set_tfw_str(&test_req->host, "example.com"); match_id = test_chain_match(); @@ -378,6 +422,38 @@ TEST(http_match, hdr_host_suffix) free_all_str(); } +TEST(http_match, raw_header_eq) +{ + int match_id; + + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR, + "User-Agent", "U880D/4.0 (CP/M; 8-bit)"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR, + "Via", "test_proxy 1.0"); + + set_raw_hdr("Via: test_proxy 1.0"); + match_id = test_chain_match(); + EXPECT_EQ(2, match_id); +} + +TEST(http_match, raw_header_eq_ws) +{ + int match_id; + + test_chain_add_rule_str(1, TFW_HTTP_MATCH_F_HDR, + "User-Agent", "U880D/4.0 (CP/M; 8-bit)"); + test_chain_add_rule_str(2, TFW_HTTP_MATCH_F_HDR, + "Connection", "close"); + test_chain_add_rule_str(3, TFW_HTTP_MATCH_F_HDR, + "Connection", "Keep-Alive"); + test_chain_add_rule_str(4, TFW_HTTP_MATCH_F_HDR, + "Warning", " 123 miscellaneous warning"); + + set_raw_hdr("Warning: 123 miscellaneous warning"); + match_id = test_chain_match(); + EXPECT_EQ(4, match_id); +} + TEST(http_match, method_eq) { int match_id; @@ -427,5 +503,7 @@ TEST_SUITE(http_match) TEST_RUN(http_match, headers_eq); TEST_RUN(http_match, hdr_host_prefix); TEST_RUN(http_match, hdr_host_suffix); + TEST_RUN(http_match, raw_header_eq); + TEST_RUN(http_match, raw_header_eq_ws); TEST_RUN(http_match, method_eq); } diff --git a/tempesta_fw/t/unit/test_http_tbl.c b/tempesta_fw/t/unit/test_http_tbl.c index 26431b0826..7bfafa9666 100644 --- a/tempesta_fw/t/unit/test_http_tbl.c +++ b/tempesta_fw/t/unit/test_http_tbl.c @@ -214,12 +214,12 @@ TEST(http_tbl, some_rules) uri == /foo/bar* -> vh2;\n\ host == natsys-lab.com -> vh3;\n\ host == natsys-lab* -> vh4;\n\ - hdr_host == google.com -> vh5;\n\ - hdr_host == google* -> vh6;\n\ - hdr_conn == close -> vh7;\n\ - hdr_conn == Keep* -> vh8;\n\ - hdr_raw == User-Agent:Bot -> vh9;\n\ - hdr_raw == X-Forwarded-For* -> vh10;\n}\n")) { + hdr Host == google.com -> vh5;\n\ + hdr Host == google* -> vh6;\n\ + hdr Connection == close -> vh7;\n\ + hdr Connection == Keep* -> vh8;\n\ + hdr User-Agent == Bot -> vh9;\n\ + hdr X-Forwarded-For == 127* -> vh10;\n}\n")) { TEST_FAIL("can't parse rules\n"); } @@ -282,37 +282,37 @@ TestCase test_cases[] = { }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_host == natsys-lab.com -> default;\n}\n", + http_chain {\nhdr Host == natsys-lab.com -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nHost: natsys-lab.com\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nHost: natsys-lab2.com\r\n\r\n", }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_host == natsys-lab* -> default;\n}\n", + http_chain {\nhdr Host == natsys-lab* -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nHost: natsys-lab2.com\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nHost: google.com\r\n\r\n", }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_conn == Keep-Alive -> default;\n}\n", + http_chain {\nhdr Connection == Keep-Alive -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: Keep-Alive\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: close\r\n\r\n", }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_conn == Keep* -> default;\n}\n", + http_chain {\nhdr Connection == Keep* -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: Keep-Alive\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: close\r\n\r\n", }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_raw == User-Agent:Bot -> default;\n}\n", + http_chain {\nhdr User-Agent == Bot -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nUser-Agent:Bot\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nUser-Agent:Tot\r\n\r\n", }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr_raw == User-Agent* -> default;\n}\n", + http_chain {\nhdr User-Agent == B* -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nUser-Agent: Bot\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: close\r\n\r\n", }, From 8704d49fe23ef92821b09ee0d62366c845157109 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Thu, 12 Jul 2018 12:26:55 +0300 Subject: [PATCH 2/4] Correction processing of wildcard '*' argument (#1033). --- tempesta_fw/http_match.c | 185 ++++++++++++++++++--------- tempesta_fw/http_match.h | 6 +- tempesta_fw/http_tbl.c | 21 +-- tempesta_fw/t/unit/test_http_match.c | 20 +-- tempesta_fw/t/unit/test_http_tbl.c | 22 +++- 5 files changed, 161 insertions(+), 93 deletions(-) diff --git a/tempesta_fw/http_match.c b/tempesta_fw/http_match.c index 66e5def7e8..a5a674bdef 100644 --- a/tempesta_fw/http_match.c +++ b/tempesta_fw/http_match.c @@ -72,23 +72,49 @@ #include "http_msg.h" #include "cfg.h" +/** + * Map an operator to that flags passed to tfw_str_eq_*() functions. + */ +static tfw_str_eq_flags_t +map_op_to_str_eq_flags(tfw_http_match_op_t op) +{ + static const tfw_str_eq_flags_t flags_tbl[] = { + [ 0 ... _TFW_HTTP_MATCH_O_COUNT ] = -1, + [TFW_HTTP_MATCH_O_EQ] = TFW_STR_EQ_DEFAULT, + [TFW_HTTP_MATCH_O_PREFIX] = TFW_STR_EQ_PREFIX, + [TFW_HTTP_MATCH_O_SUFFIX] = TFW_STR_EQ_DEFAULT, + }; + BUG_ON(flags_tbl[op] < 0); + return flags_tbl[op]; +} + /** * Look up a header in the @req->h_tbl by given @id, - * and compare @str with the header's value (skipping name and LWS). + * and compare @rule->arg with the header's value (skipping name and LWS). * * For example: - * hdr_val_eq(req, TFW_HTTP_HDR_HOST, "natsys-lab", 10, TFW_STR_EQ_PREFIX); + * hdr_val_eq(req, + * { + * .arg.str="natsys-lab", + * .arg.len=10, + * .op=TFW_STR_EQ_PREFIX + * }, + * TFW_HTTP_HDR_HOST); * will match the following headers: * "Host: natsys-lab" * "Host: natsys-lab.com" * "Host : natsys-lab.com" */ static bool -hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, tfw_http_match_op_t op, - const char *str, int str_len, tfw_str_eq_flags_t flags) +hdr_val_eq(const TfwHttpReq *req, const TfwHttpMatchRule *rule, + tfw_http_hdr_t id) { TfwStr *hdr; TfwStr hdr_val; + tfw_str_eq_flags_t flags; + tfw_http_match_op_t op = rule->op; + const char *str = rule->arg.str; + int str_len = rule->arg.len; BUG_ON(id < 0 || id >= TFW_HTTP_HDR_NUM); @@ -96,8 +122,18 @@ hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, tfw_http_match_op_t op, if (TFW_STR_EMPTY(hdr)) return false; + if (op == TFW_HTTP_MATCH_O_WILDCARD) + return true; + tfw_http_msg_clnthdr_val(hdr, id, &hdr_val); + flags = map_op_to_str_eq_flags(rule->op); + /* + * There is no general rule, but most headers are case-insensitive. + * TODO: case-sensitive matching for headers when required by RFC. + */ + flags |= TFW_STR_EQ_CASEI; + if (op == TFW_HTTP_MATCH_O_SUFFIX) return tfw_str_eq_cstr_off(&hdr_val, hdr_val.len - str_len, str, str_len, flags); @@ -105,40 +141,28 @@ hdr_val_eq(const TfwHttpReq *req, tfw_http_hdr_t id, tfw_http_match_op_t op, return tfw_str_eq_cstr(&hdr_val, str, str_len, flags); } -/** - * Map an operator to that flags passed to tfw_str_eq_*() functions. - */ -static tfw_str_eq_flags_t -map_op_to_str_eq_flags(tfw_http_match_op_t op) -{ - static const tfw_str_eq_flags_t flags_tbl[] = { - [ 0 ... _TFW_HTTP_MATCH_O_COUNT ] = -1, - [TFW_HTTP_MATCH_O_EQ] = TFW_STR_EQ_DEFAULT, - [TFW_HTTP_MATCH_O_PREFIX] = TFW_STR_EQ_PREFIX, - [TFW_HTTP_MATCH_O_SUFFIX] = TFW_STR_EQ_DEFAULT, - }; - BUG_ON(flags_tbl[op] < 0); - return flags_tbl[op]; -} - static bool match_method(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { - if (rule->op == TFW_HTTP_MATCH_O_EQ) - return req->method == rule->arg.method; + /* Only WILDCARD and EQ operators are supported. */ + if (rule->op == TFW_HTTP_MATCH_O_WILDCARD) + return true; - /* Only EQ operator is supported. */ - BUG(); - return 0; + BUG_ON(rule->op != TFW_HTTP_MATCH_O_EQ); + return req->method == rule->arg.method; } static bool match_uri(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { + tfw_str_eq_flags_t flags; const TfwStr *uri_path = &req->uri_path; const TfwHttpMatchArg *arg = &rule->arg; - tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op); + if (rule->op == TFW_HTTP_MATCH_O_WILDCARD) + return true; + + flags = map_op_to_str_eq_flags(rule->op); /* RFC 7230: * 2.7.3: the comparison is case-insensitive. * @@ -157,10 +181,17 @@ match_uri(const TfwHttpReq *req, const TfwHttpMatchRule *rule) static bool match_host(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { + tfw_str_eq_flags_t flags; + const TfwHttpMatchArg *arg; const TfwStr *host = &req->host; - const TfwHttpMatchArg *arg = &rule->arg; - tfw_str_eq_flags_t flags = map_op_to_str_eq_flags(rule->op); + if (host->len == 0) + return hdr_val_eq(req, rule, TFW_HTTP_HDR_HOST); + + if (rule->op == TFW_HTTP_MATCH_O_WILDCARD) + return true; + + flags = map_op_to_str_eq_flags(rule->op); /* * RFC 7230: * 5.4: Host header must be ignored when URI is absolute. @@ -170,13 +201,8 @@ match_host(const TfwHttpReq *req, const TfwHttpMatchRule *rule) * 5.4, 2.7.3: Port 80 is equal to a non-given/empty port (done by * normalizing the host). */ - flags |= TFW_STR_EQ_CASEI; - - if (host->len == 0) - return hdr_val_eq(req, TFW_HTTP_HDR_HOST, - rule->op, arg->str, arg->len, flags); - + arg = &rule->arg; if (rule->op == TFW_HTTP_MATCH_O_SUFFIX) return tfw_str_eq_cstr_off(host, host->len - arg->len, arg->str, arg->len, flags); @@ -312,22 +338,13 @@ match_hdr_raw(const TfwHttpReq *req, const TfwHttpMatchRule *rule) static bool match_hdr(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { - const TfwHttpMatchArg *arg; - tfw_str_eq_flags_t flags; tfw_http_hdr_t id = rule->hid; BUG_ON(id < 0); if (id == TFW_HTTP_HDR_RAW) return match_hdr_raw(req, rule); - arg = &rule->arg; - flags = map_op_to_str_eq_flags(rule->op); - - /* There is no general rule, but most headers are case-insensitive. - * TODO: case-sensitive matching for headers when required by RFC. */ - flags |= TFW_STR_EQ_CASEI; - - return hdr_val_eq(req, id, rule->op, arg->str, arg->len, flags); + return hdr_val_eq(req, rule, id); } static bool @@ -342,11 +359,18 @@ match_wildcard(const TfwHttpReq *req, const TfwHttpMatchRule *rule) static bool match_mark(const TfwHttpReq *req, const TfwHttpMatchRule *rule) { + unsigned int mark = req->msg.skb_head->mark; + + if (!mark) + return false; + + if (rule->op == TFW_HTTP_MATCH_O_WILDCARD) + return true; + BUG_ON(rule->op != TFW_HTTP_MATCH_O_EQ); - return req->msg.skb_head->mark == rule->arg.num; + return mark == rule->arg.num; } - typedef bool (*match_fn)(const TfwHttpReq *, const TfwHttpMatchRule *); static const match_fn match_fn_tbl[_TFW_HTTP_MATCH_F_COUNT] = { @@ -400,6 +424,23 @@ do_eval(const TfwHttpReq *req, const TfwHttpMatchRule *rule) return true; } +static tfw_http_match_arg_t +tfw_http_tbl_arg_type(tfw_http_match_fld_t field) +{ + static const tfw_http_match_arg_t arg_types[_TFW_HTTP_MATCH_F_COUNT] = { + [TFW_HTTP_MATCH_F_WILDCARD] = TFW_HTTP_MATCH_A_WILDCARD, + [TFW_HTTP_MATCH_F_HDR] = TFW_HTTP_MATCH_A_STR, + [TFW_HTTP_MATCH_F_HOST] = TFW_HTTP_MATCH_A_STR, + [TFW_HTTP_MATCH_F_METHOD] = TFW_HTTP_MATCH_A_METHOD, + [TFW_HTTP_MATCH_F_URI] = TFW_HTTP_MATCH_A_STR, + [TFW_HTTP_MATCH_F_MARK] = TFW_HTTP_MATCH_A_NUM, + }; + + BUG_ON(field <= 0 || field >= _TFW_HTTP_MATCH_F_COUNT); + + return arg_types[field]; +} + /** * Match a HTTP request against all rules in @mlst. * Return a first matching rule. @@ -498,11 +539,12 @@ EXPORT_SYMBOL(tfw_http_rule_new); int tfw_http_rule_arg_init(TfwHttpMatchRule *rule, const char *arg, size_t arg_len) { - if (rule->arg.type == TFW_HTTP_MATCH_A_WILDCARD) + if (rule->arg.type == TFW_HTTP_MATCH_A_WILDCARD || + rule->op == TFW_HTTP_MATCH_O_WILDCARD) return 0; if (rule->arg.type == TFW_HTTP_MATCH_A_NUM) { - if (tfw_cfg_parse_uint(arg, &rule->arg.num)) { + if (tfw_cfg_parse_uint(arg, &rule->arg.num) || !rule->arg.num) { TFW_ERR_NL("http_match: invalid 'mark' condition:" " '%s'\n", arg); return -EINVAL; @@ -536,45 +578,68 @@ tfw_http_rule_arg_init(TfwHttpMatchRule *rule, const char *arg, size_t arg_len) EXPORT_SYMBOL(tfw_http_rule_arg_init); const char * -tfw_http_arg_adjust(const char *arg, size_t len, const char *h_name, - size_t *size_out, tfw_http_match_op_t *op_out) +tfw_http_arg_adjust(const char *arg, tfw_http_match_fld_t field, + const char *raw_hdr_name, size_t *size_out, + tfw_http_match_arg_t *type_out, + tfw_http_match_op_t *op_out) { int i; bool escaped; char *arg_out, *pos; - size_t name_len = 0, full_name_len = 0; + size_t name_len = 0, full_name_len = 0, len = strlen(arg); + bool wc_arg = (arg[0] == '*' && len == 1); - if (h_name) { - name_len = strlen(h_name); + *type_out = tfw_http_tbl_arg_type(field); + + /* + * If this is simple wildcard argument and this is not raw + * header case, this is wildcard type case and we do not + * need any argument for matching. + */ + if (wc_arg && !raw_hdr_name) + return NULL; + + if (raw_hdr_name) { + name_len = strlen(raw_hdr_name); full_name_len = name_len + SLEN(S_DLM); } if (!(arg_out = kzalloc(full_name_len + len + 1, GFP_KERNEL))) { TFW_ERR_NL("http_match: unable to allocate rule" " argument.\n"); - return NULL; + return ERR_PTR(-ENOMEM); } - if (h_name) { - memcpy(arg_out, h_name, name_len); + if (raw_hdr_name) { + memcpy(arg_out, raw_hdr_name, name_len); memcpy(arg_out + name_len, S_DLM, SLEN(S_DLM)); } *op_out = TFW_HTTP_MATCH_O_EQ; - if (len > 1 && arg[len - 1] == '*' && arg[len - 2] != '\\') + + /* + * In cases of simple wildcard argument for raw header or + * argument ended with wildcard, the prefix matching pattern + * should be applied. + */ + if (wc_arg || (len > 1 && arg[len - 1] == '*' && arg[len - 2] != '\\')) *op_out = TFW_HTTP_MATCH_O_PREFIX; - if (arg[0] == '*') { + /* + * For argument started with wildcard, the suffix matching + * pattern should be applied. + */ + if (!wc_arg && arg[0] == '*') { if (*op_out == TFW_HTTP_MATCH_O_PREFIX) TFW_WARN_NL("http_match: unable to match" " double-wildcard patterns '%s', so" " prefix pattern will be applied\n", arg); - else if (h_name) + else if (raw_hdr_name) TFW_WARN_NL("http_match: unable to match suffix" " pattern '%s' in case of raw header" " specification: '%s', so wildcard pattern" - " will not be applied\n", arg, h_name); + " will not be applied\n", arg, raw_hdr_name); else *op_out = TFW_HTTP_MATCH_O_SUFFIX; diff --git a/tempesta_fw/http_match.h b/tempesta_fw/http_match.h index d0f776a868..8506b3b77a 100644 --- a/tempesta_fw/http_match.h +++ b/tempesta_fw/http_match.h @@ -126,8 +126,10 @@ TfwHttpMatchRule *tfw_http_rule_new(TfwHttpChain *chain, int tfw_http_rule_arg_init(TfwHttpMatchRule *rule, const char *arg, size_t arg_len); -const char *tfw_http_arg_adjust(const char *arg, size_t len, const char *h_name, - size_t *size_out, tfw_http_match_op_t *op_out); +const char *tfw_http_arg_adjust(const char *arg, tfw_http_match_fld_t field, + const char *raw_hdr_name, size_t *size_out, + tfw_http_match_arg_t *type_out, + tfw_http_match_op_t *op_out); int tfw_http_verify_hdr_field(tfw_http_match_fld_t field, const char **h_name, unsigned int *hid_out); diff --git a/tempesta_fw/http_tbl.c b/tempesta_fw/http_tbl.c index 9d793995f1..b789569ecb 100644 --- a/tempesta_fw/http_tbl.c +++ b/tempesta_fw/http_tbl.c @@ -217,16 +217,6 @@ static const TfwCfgEnum tfw_http_tbl_cfg_method_enum[] = { { 0 } }; -static const tfw_http_match_arg_t -tfw_http_tbl_cfg_arg_types[_TFW_HTTP_MATCH_F_COUNT] = { - [TFW_HTTP_MATCH_F_WILDCARD] = TFW_HTTP_MATCH_A_WILDCARD, - [TFW_HTTP_MATCH_F_HDR] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_HOST] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_METHOD] = TFW_HTTP_MATCH_A_METHOD, - [TFW_HTTP_MATCH_F_URI] = TFW_HTTP_MATCH_A_STR, - [TFW_HTTP_MATCH_F_MARK] = TFW_HTTP_MATCH_A_NUM, -}; - int tfw_http_tbl_method(const char *arg, tfw_http_meth_t *method) { @@ -410,7 +400,7 @@ tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) } /* Interpret condition part of the rule. */ - if (in_arg && (in_arg[0] != '*' || len > 1)) { + if (in_arg) { BUG_ON(!in_field); r = tfw_cfg_map_enum(tfw_http_tbl_cfg_field_enum, in_field, &field); @@ -422,11 +412,10 @@ tfw_cfgop_http_rule(TfwCfgSpec *cs, TfwCfgEntry *e) if ((r = tfw_http_verify_hdr_field(field, &hdr, &hid))) return r; - arg = tfw_http_arg_adjust(in_arg, len, hdr, &arg_size, &op); - if (!arg) - return -ENOMEM; - - type = tfw_http_tbl_cfg_arg_types[field]; + arg = tfw_http_arg_adjust(in_arg, field, hdr, &arg_size, + &type, &op); + if (IS_ERR(arg)) + return PTR_ERR(arg); } rule = tfw_http_rule_new(tfw_chain_entry, type, arg_size); diff --git a/tempesta_fw/t/unit/test_http_match.c b/tempesta_fw/t/unit/test_http_match.c index 42cd4d3c6e..b812199a40 100644 --- a/tempesta_fw/t/unit/test_http_match.c +++ b/tempesta_fw/t/unit/test_http_match.c @@ -110,24 +110,24 @@ test_chain_add_rule_str(int test_id, tfw_http_match_fld_t field, const char *hdr, const char *in_arg) { MatchEntry *e; - unsigned int hid; - tfw_http_match_op_t op; - size_t len, arg_size; - const char *arg; + unsigned int hid = TFW_HTTP_HDR_RAW; + tfw_http_match_op_t op = TFW_HTTP_MATCH_O_WILDCARD; + tfw_http_match_arg_t type = TFW_HTTP_MATCH_A_WILDCARD; + size_t arg_size = 0; + const char *arg = NULL; - len = strlen(in_arg); - BUG_ON(field == TFW_HTTP_MATCH_F_WILDCARD); - BUG_ON(in_arg[0] == '*' && len == 1); + BUG_ON(in_arg && field == TFW_HTTP_MATCH_F_WILDCARD); + BUG_ON(!in_arg && field != TFW_HTTP_MATCH_F_WILDCARD); tfw_http_verify_hdr_field(field, &hdr, &hid); - arg = tfw_http_arg_adjust(in_arg, len, hdr, &arg_size, &op); + arg = tfw_http_arg_adjust(in_arg, field, hdr, &arg_size, &type, &op); e = test_rule_container_new(test_chain, MatchEntry, rule, - TFW_HTTP_MATCH_A_STR, arg_size); + type, arg_size); e->rule.hid = hid; e->rule.field = field; e->rule.op = op; - e->rule.arg.type = TFW_HTTP_MATCH_A_STR; + e->rule.arg.type = type; tfw_http_rule_arg_init(&e->rule, arg, arg_size - 1); /* Just dummy action type to avoid BUG_ON in 'do_eval()'. */ e->rule.act.type = TFW_HTTP_MATCH_ACT_CHAIN; diff --git a/tempesta_fw/t/unit/test_http_tbl.c b/tempesta_fw/t/unit/test_http_tbl.c index 7bfafa9666..49e0993d66 100644 --- a/tempesta_fw/t/unit/test_http_tbl.c +++ b/tempesta_fw/t/unit/test_http_tbl.c @@ -218,8 +218,8 @@ TEST(http_tbl, some_rules) hdr Host == google* -> vh6;\n\ hdr Connection == close -> vh7;\n\ hdr Connection == Keep* -> vh8;\n\ - hdr User-Agent == Bot -> vh9;\n\ - hdr X-Forwarded-For == 127* -> vh10;\n}\n")) { + hdr X-Forwarded-For == * -> vh9;\n\ + hdr User-Agent == Bot -> vh10;\n}\n")) { TEST_FAIL("can't parse rules\n"); } @@ -231,8 +231,8 @@ TEST(http_tbl, some_rules) test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nHost: google2.com\r\n\r\n", expect_conn6); test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nConnection: close\r\n\r\n", expect_conn7); test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nConnection: Keep-Alive\r\n\r\n", expect_conn8); - test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nUser-Agent:Bot\r\n\r\n", expect_conn9); - test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nX-Forwarded-For: 127.0.0.1\r\n\r\n", expect_conn10); + test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nX-Forwarded-For: 127.0.0.1\r\n\r\n", expect_conn9); + test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\nUser-Agent:Bot\r\n\r\n", expect_conn10); test_req("GET http://google.com/foo/baz/ HTTP/1.1\r\n\r\n", NULL); cleanup_cfg(); @@ -312,10 +312,22 @@ TestCase test_cases[] = { }, { .rule_str = "vhost default {\nproxy_pass default;\n}\n\ - http_chain {\nhdr User-Agent == B* -> default;\n}\n", + http_chain {\nhdr User-Agent == * -> default;\n}\n", .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nUser-Agent: Bot\r\n\r\n", .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nConnection: close\r\n\r\n", }, + { + .rule_str = "vhost default {\nproxy_pass default;\n}\n\ + http_chain {\nhdr Via == Sever* -> default;\n}\n", + .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nVia: SeverExample\r\n\r\n", + .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nVia: Proxy\r\n\r\n", + }, + { + .rule_str = "vhost default {\nproxy_pass default;\n}\n\ + http_chain {\nhdr Via == * -> default;\n}\n", + .good_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nVia: Proxy\r\n\r\n", + .bad_req_str = "GET http://natsys-lab.com/foo HTTP/1.1\r\nHost: Proxy\r\n\r\n", + }, }; size_t test_cases_size = ARRAY_SIZE(test_cases); From 041ff43a6a36c78b0f57e26916b6d44fc34298c4 Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Tue, 17 Jul 2018 23:14:30 +0300 Subject: [PATCH 3/4] Changes according review comments (#1033). --- tempesta_fw/cfg.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tempesta_fw/cfg.h b/tempesta_fw/cfg.h index 64cba7fd7b..6bfae54dd7 100644 --- a/tempesta_fw/cfg.h +++ b/tempesta_fw/cfg.h @@ -110,6 +110,25 @@ * comparison sign interpretation in rule condition part: * "==" => false / "!=" => true * + * Also extended rule form is used in case of specifying HTTP headers. Following + * rule: + * hdr "Referer" == "*example.com" -> mark = 7; + * + * will have following representaion: + * TfwCfgEntry { + * .name = "rule", + * ... + * .rule = { + * .fst = "hdr", + * .fst_ext = "Referer", + * .snd = "*example.com", + * .act = "mark", + * .val = "7", + * .inv = false + * }, + * ... + * } + * * @ftoken is an auxiliary internal field of TfwCfgEntry{} structure which * helps parser to differentiate plain directives from rules. * From 736111f214de1f246a78b865e1920d2d86a6141f Mon Sep 17 00:00:00 2001 From: Alexander Ostapenko Date: Wed, 18 Jul 2018 20:54:04 +0300 Subject: [PATCH 4/4] Correct matching of multiple headers in 'match_hdr_raw()' (#1033). --- tempesta_fw/http_match.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tempesta_fw/http_match.c b/tempesta_fw/http_match.c index a5a674bdef..202673f40e 100644 --- a/tempesta_fw/http_match.c +++ b/tempesta_fw/http_match.c @@ -284,7 +284,7 @@ match_hdr_raw(const TfwHttpReq *req, const TfwHttpMatchRule *rule) } } - return false; + break; } prev = *p++; @@ -315,12 +315,12 @@ match_hdr_raw(const TfwHttpReq *req, const TfwHttpMatchRule *rule) /* If only header field doesn't finished, may be it have * trailing spaces. */ - if (isspace(*c)) { + if (p == pend && isspace(*c)) { c++; goto state_hdr_sp; } - return false; + continue; state_rule_sp: _MOVE_TO_COND(p, pend, !isspace(*p));