From bc1b9229f2026cf1209b8d853c442ed19e27ef2a Mon Sep 17 00:00:00 2001 From: Arran Cudbard-Bell Date: Sat, 9 Nov 2024 17:13:11 -0600 Subject: [PATCH] Remove taint support from regexes, and add regex.safe() --- .../pages/unlang/condition/regex.adoc | 33 +++++++++++++++++++ src/lib/server/tmpl_tokenize.c | 2 +- src/lib/unlang/xlat_builtin.c | 14 ++++++++ src/lib/unlang/xlat_expr.c | 4 +-- src/lib/unlang/xlat_func.h | 2 ++ src/lib/util/regex.h | 4 +++ src/lib/util/value.c | 17 +++++----- src/lib/util/value.h | 2 +- src/tests/keywords/regex-escape | 8 +++-- 9 files changed, 72 insertions(+), 14 deletions(-) diff --git a/doc/antora/modules/reference/pages/unlang/condition/regex.adoc b/doc/antora/modules/reference/pages/unlang/condition/regex.adoc index 18ec7d0bb808..31784bd037ff 100644 --- a/doc/antora/modules/reference/pages/unlang/condition/regex.adoc +++ b/doc/antora/modules/reference/pages/unlang/condition/regex.adoc @@ -136,6 +136,39 @@ if (&User-Name =~ /^(.*)@example\.com$/) { ---- ==== +== Dynamic expressions and escaping + +If the pattern contains a xref:xlat/index.adoc[dynamic expansion], elements +of the expansion could potentially affect pattern evaluation in unanticipated ways. + +To prevent this, the server will, by default, escape any special characters +in the values produced by the dynamic expansion, so that they are treated as +literal characters in the regular expression. + +To allow non-literals in dynamic elements of regular expressions, wrap any literal +values or expansions in `%regex.safe(...)`. + +.Using regex.safe to prevent escaping +==== +[source,unlang] +---- +local pattern +switch (realm) { + case example.com { + pattern = %regex.safe('.*\.example\.com') + } + case test.com { + # '.' does not need to be escaped + pattern = 'test.com' + } +} + +if (&Stripped-User-Name =~ /^%{pattern}$/) { + ... +} +---- +==== + == Pre-Compiled vs Runtime Compiled When the server starts any regular expressions comparisons it finds will be diff --git a/src/lib/server/tmpl_tokenize.c b/src/lib/server/tmpl_tokenize.c index 80b5a71db936..0fcdd1e0aa38 100644 --- a/src/lib/server/tmpl_tokenize.c +++ b/src/lib/server/tmpl_tokenize.c @@ -3485,7 +3485,7 @@ fr_slen_t tmpl_afrom_substr(TALLOC_CTX *ctx, tmpl_t **out, vpt = tmpl_alloc_null(ctx); - slen = xlat_tokenize(vpt, &head, &our_in, p_rules, t_rules, t_rules->literals_safe_for); + slen = xlat_tokenize(vpt, &head, &our_in, p_rules, t_rules, FR_REGEX_SAFE_FOR); if (slen < 0) FR_SBUFF_ERROR_RETURN(&our_in); /* diff --git a/src/lib/unlang/xlat_builtin.c b/src/lib/unlang/xlat_builtin.c index a192a5f3b9d0..4d2a1fefb68a 100644 --- a/src/lib/unlang/xlat_builtin.c +++ b/src/lib/unlang/xlat_builtin.c @@ -4158,6 +4158,20 @@ do { \ if (unlikely((xlat = xlat_func_register(xlat_ctx, "regex", xlat_func_regex, FR_TYPE_STRING)) == NULL)) return -1; xlat_func_flags_set(xlat, XLAT_FUNC_FLAG_INTERNAL); #endif + + { + static xlat_arg_parser_t const xlat_regex_safe_args[] = { + { .type = FR_TYPE_STRING, .variadic = true, .concat = true }, + XLAT_ARG_PARSER_TERMINATOR + }; + + if (unlikely((xlat = xlat_func_register(xlat_ctx, "regex.safe", + xlat_transparent, FR_TYPE_STRING)) == NULL)) return -1; + xlat_func_flags_set(xlat, XLAT_FUNC_FLAG_INTERNAL); + xlat_func_args_set(xlat, xlat_regex_safe_args); + xlat_func_safe_for_set(xlat, FR_REGEX_SAFE_FOR); + } + XLAT_REGISTER_PURE("sha1", xlat_func_sha1, FR_TYPE_OCTETS, xlat_func_sha_arg); #ifdef HAVE_OPENSSL_EVP_H diff --git a/src/lib/unlang/xlat_expr.c b/src/lib/unlang/xlat_expr.c index df96064c95e4..676992c84333 100644 --- a/src/lib/unlang/xlat_expr.c +++ b/src/lib/unlang/xlat_expr.c @@ -631,7 +631,7 @@ static xlat_action_t xlat_regex_match(TALLOC_CTX *ctx, request_t *request, fr_va * Concatenate everything, and escape untrusted inputs. */ if (fr_value_box_list_concat_as_string(NULL, NULL, agg, &list, NULL, 0, ®ex_escape_rules, - FR_VALUE_BOX_LIST_FREE_BOX, true) < 0) { + FR_VALUE_BOX_LIST_FREE_BOX, FR_REGEX_SAFE_FOR, true) < 0) { RPEDEBUG("Failed concatenating regular expression string"); talloc_free(regmatch); return XLAT_ACTION_FAIL; @@ -703,7 +703,7 @@ static xlat_action_t xlat_regex_resume(TALLOC_CTX *ctx, fr_dcursor_t *out, * concatenate it here. We escape the various untrusted inputs. */ if (fr_value_box_list_concat_as_string(NULL, NULL, agg, &rctx->list, NULL, 0, ®ex_escape_rules, - FR_VALUE_BOX_LIST_FREE_BOX, true) < 0) { + FR_VALUE_BOX_LIST_FREE_BOX, FR_REGEX_SAFE_FOR, true) < 0) { RPEDEBUG("Failed concatenating regular expression string"); return XLAT_ACTION_FAIL; } diff --git a/src/lib/unlang/xlat_func.h b/src/lib/unlang/xlat_func.h index 96d4439571d1..6d64d87b17c1 100644 --- a/src/lib/unlang/xlat_func.h +++ b/src/lib/unlang/xlat_func.h @@ -73,6 +73,8 @@ void xlat_func_resolve_set(xlat_t *xlat, xlat_resolve_t func); void xlat_purify_func_set(xlat_t *xlat, xlat_purify_t func); /** Set the escaped values for output boxes + * + * Any boxes output by the xlat function will have their values marked as safe for something. * * @param[in] _xlat function to set the escaped value for (as returned by xlat_register). * @param[in] _escaped escaped value to write to output boxes. diff --git a/src/lib/util/regex.h b/src/lib/util/regex.h index 4285d25c830c..d1674f45ae7a 100644 --- a/src/lib/util/regex.h +++ b/src/lib/util/regex.h @@ -169,6 +169,10 @@ typedef struct { #define REGEX_FLAG_BUFF_SIZE 7 +/** Unique safefor value to prevent escaping for regexes + */ +#define FR_REGEX_SAFE_FOR ((uintptr_t)regex_exec) + ssize_t regex_flags_parse(int *err, fr_regex_flags_t *out, fr_sbuff_t *in, fr_sbuff_term_t const *terminals, bool err_on_dup); diff --git a/src/lib/util/value.c b/src/lib/util/value.c index 6fee0295454a..b586bf3c696b 100644 --- a/src/lib/util/value.c +++ b/src/lib/util/value.c @@ -5468,7 +5468,7 @@ ssize_t fr_value_box_print(fr_sbuff_t *out, fr_value_box_t const *data, fr_sbuff FR_SBUFF_RETURN(fr_value_box_list_concat_as_string, NULL, NULL, &our_out, UNCONST(fr_value_box_list_t *, &data->vb_group), ", ", (sizeof(", ") - 1), e_rules, - 0, false); + 0, 0, false); FR_SBUFF_IN_CHAR_RETURN(&our_out, '}'); break; @@ -5536,6 +5536,7 @@ ssize_t fr_value_box_print_quoted(fr_sbuff_t *out, fr_value_box_t const *data, f * Is not currently applied to any other box type. * @param[in] proc_action What to do with the boxes in the list once * they've been processed. + * @param[in] safe_for if value has this safe_for value, don't apply the escape rules. * @param[in] flatten If true and we encounter a #FR_TYPE_GROUP, * we concat the contents of its children together. * If false, the contents will be cast to #FR_TYPE_STRING. @@ -5546,7 +5547,7 @@ ssize_t fr_value_box_print_quoted(fr_sbuff_t *out, fr_value_box_t const *data, f */ ssize_t fr_value_box_list_concat_as_string(bool *tainted, bool *secret, fr_sbuff_t *sbuff, fr_value_box_list_t *list, char const *sep, size_t sep_len, fr_sbuff_escape_rules_t const *e_rules, - fr_value_box_list_action_t proc_action, bool flatten) + fr_value_box_list_action_t proc_action, fr_value_box_safe_for_t safe_for, bool flatten) { fr_sbuff_t our_sbuff = FR_SBUFF(sbuff); ssize_t slen; @@ -5559,7 +5560,7 @@ ssize_t fr_value_box_list_concat_as_string(bool *tainted, bool *secret, fr_sbuff if (!flatten) goto print; slen = fr_value_box_list_concat_as_string(tainted, secret, &our_sbuff, &vb->vb_group, sep, sep_len, e_rules, - proc_action, flatten); + proc_action, safe_for, flatten); break; case FR_TYPE_OCTETS: @@ -5567,7 +5568,7 @@ ssize_t fr_value_box_list_concat_as_string(bool *tainted, bool *secret, fr_sbuff /* * Copy the raw string over, if necessary with escaping. */ - if (e_rules && (vb->tainted || e_rules->do_oct || e_rules->do_hex)) { + if (e_rules && (!fr_value_box_is_safe_for(vb, safe_for) || e_rules->do_oct || e_rules->do_hex)) { slen = fr_sbuff_in_escape(&our_sbuff, (char const *)vb->vb_strvalue, vb->vb_length, e_rules); } else { slen = fr_sbuff_in_bstrncpy(&our_sbuff, (char const *)vb->vb_strvalue, vb->vb_length); @@ -5575,7 +5576,7 @@ ssize_t fr_value_box_list_concat_as_string(bool *tainted, bool *secret, fr_sbuff break; case FR_TYPE_STRING: - if (vb->tainted && e_rules) goto print; + if (!fr_value_box_is_safe_for(vb, safe_for) && e_rules) goto print; slen = fr_sbuff_in_bstrncpy(&our_sbuff, vb->vb_strvalue, vb->vb_length); break; @@ -5798,7 +5799,7 @@ int fr_value_box_list_concat_in_place(TALLOC_CTX *ctx, */ if (fr_value_box_list_concat_as_string(&tainted, &secret, &sbuff, list, NULL, 0, NULL, - FR_VALUE_BOX_LIST_REMOVE, flatten) < 0) { + FR_VALUE_BOX_LIST_REMOVE, 0, flatten) < 0) { fr_strerror_printf("Concatenation exceeded max_size (%zu)", max_size); error: switch (type) { @@ -5821,7 +5822,7 @@ int fr_value_box_list_concat_in_place(TALLOC_CTX *ctx, */ if (fr_value_box_list_concat_as_string(&tainted, &secret, &sbuff, list, NULL, 0, NULL, - proc_action, flatten) < 0) { + proc_action, 0, flatten) < 0) { fr_value_box_list_insert_head(list, head_vb); goto error; } @@ -5863,7 +5864,7 @@ int fr_value_box_list_concat_in_place(TALLOC_CTX *ctx, case FR_TYPE_STRING: if (fr_value_box_list_concat_as_string(&tainted, &secret, &sbuff, list, NULL, 0, NULL, - proc_action, flatten) < 0) goto error; + proc_action, 0, flatten) < 0) goto error; (void)fr_sbuff_trim_talloc(&sbuff, SIZE_MAX); entry = out->entry; diff --git a/src/lib/util/value.h b/src/lib/util/value.h index 208c0e2541f0..63098a8fae47 100644 --- a/src/lib/util/value.h +++ b/src/lib/util/value.h @@ -1205,7 +1205,7 @@ ssize_t fr_value_box_from_str(TALLOC_CTX *ctx, fr_value_box_t *dst, */ ssize_t fr_value_box_list_concat_as_string(bool *tainted, bool *secret, fr_sbuff_t *sbuff, fr_value_box_list_t *list, char const *sep, size_t sep_len, fr_sbuff_escape_rules_t const *e_rules, - fr_value_box_list_action_t proc_action, bool flatten) + fr_value_box_list_action_t proc_action, fr_value_box_safe_for_t safe_for, bool flatten) CC_HINT(nonnull(3,4)); ssize_t fr_value_box_list_concat_as_octets(bool *tainted, bool *secret, fr_dbuff_t *dbuff, fr_value_box_list_t *list, diff --git a/src/tests/keywords/regex-escape b/src/tests/keywords/regex-escape index 2ec0fc36f236..1e53ecb9243f 100644 --- a/src/tests/keywords/regex-escape +++ b/src/tests/keywords/regex-escape @@ -8,13 +8,17 @@ string test_string2 # Strings which are expanded in a regex have regex special # characters escaped. Because the input strings are unsafe. # -test_string1 := "%taint(example.com)" -test_string2 := "%taint(exampleXcom)" +test_string1 := "example.com" +test_string2 := "exampleXcom" if ("exampleXcom" =~ /%{test_string1}/) { test_fail } +if ("exampleXcom" !~ /%regex.safe(%{test_string1})/) { + test_fail +} + if (test_string2 =~ /%{test_string1}/) { test_fail }