From 9b2125aa88f3db63ebfb411b5f93b940742acc9f Mon Sep 17 00:00:00 2001 From: "data-plane-api(CircleCI)" Date: Fri, 23 Aug 2019 19:50:27 +0000 Subject: [PATCH] introduce safe regex matcher based on re2 engine (#7878) The libstdc++ std::regex implementation is not safe in all cases for user provided input. This change deprecates the used of std::regex in all user facing paths and introduces a new safe regex matcher with an explicitly configurable engine, right now limited to Google's re2 regex engine. This is not a drop in replacement for std::regex as all language features are not supported. As such we will go through a deprecation period for the old regex engine. Fixes https://github.com/envoyproxy/envoy/issues/7728 Signed-off-by: Matt Klein Mirrored from https://github.com/envoyproxy/envoy @ eff020170c6267e6c8dc235473f7fc85c5b1e07d --- docs/BUILD | 1 + envoy/api/v2/route/BUILD | 4 ++ envoy/api/v2/route/route.proto | 88 ++++++++++++++++++++++++++++----- envoy/type/matcher/BUILD | 17 +++++++ envoy/type/matcher/regex.proto | 37 ++++++++++++++ envoy/type/matcher/string.proto | 11 ++++- 6 files changed, 146 insertions(+), 12 deletions(-) create mode 100644 envoy/type/matcher/regex.proto diff --git a/docs/BUILD b/docs/BUILD index da20fe5e7..11ef3876b 100644 --- a/docs/BUILD +++ b/docs/BUILD @@ -102,6 +102,7 @@ proto_library( "//envoy/type:range", "//envoy/type/matcher:metadata", "//envoy/type/matcher:number", + "//envoy/type/matcher:regex", "//envoy/type/matcher:string", ], ) diff --git a/envoy/api/v2/route/BUILD b/envoy/api/v2/route/BUILD index 2fec56ae3..968ab1c67 100644 --- a/envoy/api/v2/route/BUILD +++ b/envoy/api/v2/route/BUILD @@ -10,6 +10,8 @@ api_proto_library_internal( "//envoy/api/v2/core:base", "//envoy/type:percent", "//envoy/type:range", + "//envoy/type/matcher:regex", + "//envoy/type/matcher:string", ], ) @@ -20,5 +22,7 @@ api_go_proto_library( "//envoy/api/v2/core:base_go_proto", "//envoy/type:percent_go_proto", "//envoy/type:range_go_proto", + "//envoy/type/matcher:regex_go_proto", + "//envoy/type/matcher:string_go_proto", ], ) diff --git a/envoy/api/v2/route/route.proto b/envoy/api/v2/route/route.proto index 93d021ddb..e0fbaf0fd 100644 --- a/envoy/api/v2/route/route.proto +++ b/envoy/api/v2/route/route.proto @@ -9,6 +9,8 @@ option go_package = "route"; option java_generic_services = true; import "envoy/api/v2/core/base.proto"; +import "envoy/type/matcher/regex.proto"; +import "envoy/type/matcher/string.proto"; import "envoy/type/percent.proto"; import "envoy/type/range.proto"; @@ -349,7 +351,25 @@ message RouteMatch { // * The regex */b[io]t* matches the path */bot* // * The regex */b[io]t* does not match the path */bite* // * The regex */b[io]t* does not match the path */bit/bot* - string regex = 3 [(validate.rules).string.max_bytes = 1024]; + // + // .. attention:: + // This field has been deprecated in favor of `safe_regex` as it is not safe for use with + // untrusted input in all cases. + string regex = 3 [(validate.rules).string.max_bytes = 1024, deprecated = true]; + + // If specified, the route is a regular expression rule meaning that the + // regex must match the *:path* header once the query string is removed. The entire path + // (without the query string) must match the regex. The rule will not match if only a + // subsequence of the *:path* header matches the regex. + // + // [#next-major-version: In the v3 API we should redo how path specification works such + // that we utilize StringMatcher, and additionally have consistent options around whether we + // strip query strings, do a case sensitive match, etc. In the interim it will be too disruptive + // to deprecate the existing options. We should even consider whether we want to do away with + // path_specifier entirely and just rely on a set of header matchers which can already match + // on :path, etc. The issue with that is it is unclear how to generically deal with query string + // stripping. This needs more thought.] + type.matcher.RegexMatcher safe_regex = 10 [(validate.rules).message.required = true]; } // Indicates that prefix/path matching should be case insensitive. The default @@ -404,12 +424,24 @@ message CorsPolicy { // Specifies the origins that will be allowed to do CORS requests. // // An origin is allowed if either allow_origin or allow_origin_regex match. - repeated string allow_origin = 1; + // + // .. attention:: + // This field has been deprecated in favor of `allow_origin_string_match`. + repeated string allow_origin = 1 [deprecated = true]; // Specifies regex patterns that match allowed origins. // // An origin is allowed if either allow_origin or allow_origin_regex match. - repeated string allow_origin_regex = 8 [(validate.rules).repeated .items.string.max_bytes = 1024]; + // + // .. attention:: + // This field has been deprecated in favor of `allow_origin_string_match` as it is not safe for + // use with untrusted input in all cases. + repeated string allow_origin_regex = 8 + [(validate.rules).repeated .items.string.max_bytes = 1024, deprecated = true]; + + // Specifies string patterns that match allowed origins. An origin is allowed if any of the + // string matchers match. + repeated type.matcher.StringMatcher allow_origin_string_match = 11; // Specifies the content for the *access-control-allow-methods* header. string allow_methods = 2; @@ -1077,18 +1109,28 @@ message VirtualCluster { // * The regex */rides/\d+* matches the path */rides/0* // * The regex */rides/\d+* matches the path */rides/123* // * The regex */rides/\d+* does not match the path */rides/123/456* - string pattern = 1 [(validate.rules).string = {min_bytes: 1, max_bytes: 1024}]; + // + // .. attention:: + // This field has been deprecated in favor of `headers` as it is not safe for use with + // untrusted input in all cases. + string pattern = 1 [(validate.rules).string.max_bytes = 1024, deprecated = true]; + + // Specifies a list of header matchers to use for matching requests. Each specified header must + // match. The pseudo-headers `:path` and `:method` can be used to match the request path and + // method, respectively. + repeated HeaderMatcher headers = 4; - // Specifies the name of the virtual cluster. The virtual cluster name as well + // Specifies the name of the virtual cluster. The virtual cluster name as well // as the virtual host name are used when emitting statistics. The statistics are emitted by the // router filter and are documented :ref:`here `. string name = 2 [(validate.rules).string.min_bytes = 1]; // Optionally specifies the HTTP method to match on. For example GET, PUT, // etc. - // [#comment:TODO(htuch): add (validate.rules).enum.defined_only = true once - // https://github.com/lyft/protoc-gen-validate/issues/42 is resolved.] - core.RequestMethod method = 3; + // + // .. attention:: + // This field has been deprecated in favor of `headers`. + core.RequestMethod method = 3 [deprecated = true]; } // Global rate limiting :ref:`architecture overview `. @@ -1248,6 +1290,7 @@ message RateLimit { // ` header will match, regardless of the header's // value. // +// [#next-major-version: HeaderMatcher should be refactored to use StringMatcher.] message HeaderMatcher { // Specifies the name of the header in the request. string name = 1 [(validate.rules).string.min_bytes = 1]; @@ -1273,7 +1316,16 @@ message HeaderMatcher { // * The regex *\d{3}* matches the value *123* // * The regex *\d{3}* does not match the value *1234* // * The regex *\d{3}* does not match the value *123.456* - string regex_match = 5 [(validate.rules).string.max_bytes = 1024]; + // + // .. attention:: + // This field has been deprecated in favor of `safe_regex_match` as it is not safe for use + // with untrusted input in all cases. + string regex_match = 5 [(validate.rules).string.max_bytes = 1024, deprecated = true]; + + // If specified, this regex string is a regular expression rule which implies the entire request + // header value must match the regex. The rule will not match if only a subsequence of the + // request header value matches the regex. + type.matcher.RegexMatcher safe_regex_match = 11; // If specified, header match will be performed based on range. // The rule will match if the request header value is within this range. @@ -1328,11 +1380,25 @@ message QueryParameterMatcher { // Specifies the value of the key. If the value is absent, a request // that contains the key in its query string will match, whether the // key appears with a value (e.g., "?debug=true") or not (e.g., "?debug") - string value = 3; + // + // ..attention:: + // This field is deprecated. Use an `exact` match inside the `string_match` field. + string value = 3 [deprecated = true]; // Specifies whether the query parameter value is a regular expression. // Defaults to false. The entire query parameter value (i.e., the part to // the right of the equals sign in "key=value") must match the regex. // E.g., the regex "\d+$" will match "123" but not "a123" or "123a". - google.protobuf.BoolValue regex = 4; + // + // ..attention:: + // This field is deprecated. Use a `safe_regex` match inside the `string_match` field. + google.protobuf.BoolValue regex = 4 [deprecated = true]; + + oneof query_parameter_match_specifier { + // Specifies whether a query parameter value should match against a string. + type.matcher.StringMatcher string_match = 5 [(validate.rules).message.required = true]; + + // Specifies whether a query parameter should be present. + bool present_match = 6; + } } diff --git a/envoy/type/matcher/BUILD b/envoy/type/matcher/BUILD index ec4aa09b6..5fe594db4 100644 --- a/envoy/type/matcher/BUILD +++ b/envoy/type/matcher/BUILD @@ -40,11 +40,17 @@ api_proto_library_internal( name = "string", srcs = ["string.proto"], visibility = ["//visibility:public"], + deps = [ + ":regex", + ], ) api_go_proto_library( name = "string", proto = ":string", + deps = [ + ":regex_go_proto", + ], ) api_proto_library_internal( @@ -65,3 +71,14 @@ api_go_proto_library( ":string_go_proto", ], ) + +api_proto_library_internal( + name = "regex", + srcs = ["regex.proto"], + visibility = ["//visibility:public"], +) + +api_go_proto_library( + name = "regex", + proto = ":regex", +) diff --git a/envoy/type/matcher/regex.proto b/envoy/type/matcher/regex.proto new file mode 100644 index 000000000..b3b719444 --- /dev/null +++ b/envoy/type/matcher/regex.proto @@ -0,0 +1,37 @@ +syntax = "proto3"; + +package envoy.type.matcher; + +option java_outer_classname = "StringProto"; +option java_multiple_files = true; +option java_package = "io.envoyproxy.envoy.type.matcher"; +option go_package = "matcher"; + +import "google/protobuf/wrappers.proto"; +import "validate/validate.proto"; + +// [#protodoc-title: RegexMatcher] + +// A regex matcher designed for safety when used with untrusted input. +message RegexMatcher { + // Google's `RE2 `_ regex engine. The regex string must adhere to + // the documented `syntax `_. The engine is designed + // to complete execution in linear time as well as limit the amount of memory used. + message GoogleRE2 { + // This field controls the RE2 "program size" which is a rough estimate of how complex a + // compiled regex is to evaluate. A regex that has a program size greater than the configured + // value will fail to compile. In this case, the configured max program size can be increased + // or the regex can be simplified. If not specified, the default is 100. + google.protobuf.UInt32Value max_program_size = 1; + } + + oneof engine_type { + option (validate.required) = true; + + // Google's RE2 regex engine. + GoogleRE2 google_re2 = 1 [(validate.rules).message.required = true]; + } + + // The regex match string. The string must be supported by the configured engine. + string regex = 2 [(validate.rules).string.min_bytes = 1]; +} diff --git a/envoy/type/matcher/string.proto b/envoy/type/matcher/string.proto index 55f2171af..35628b7cc 100644 --- a/envoy/type/matcher/string.proto +++ b/envoy/type/matcher/string.proto @@ -7,6 +7,8 @@ option java_multiple_files = true; option java_package = "io.envoyproxy.envoy.type.matcher"; option go_package = "matcher"; +import "envoy/type/matcher/regex.proto"; + import "validate/validate.proto"; // [#protodoc-title: StringMatcher] @@ -48,7 +50,14 @@ message StringMatcher { // * The regex *\d{3}* matches the value *123* // * The regex *\d{3}* does not match the value *1234* // * The regex *\d{3}* does not match the value *123.456* - string regex = 4 [(validate.rules).string.max_bytes = 1024]; + // + // .. attention:: + // This field has been deprecated in favor of `safe_regex` as it is not safe for use with + // untrusted input in all cases. + string regex = 4 [(validate.rules).string.max_bytes = 1024, deprecated = true]; + + // The input string must match the regular expression specified here. + RegexMatcher safe_regex = 5 [(validate.rules).message.required = true]; } }