diff --git a/CHANGELOG.md b/CHANGELOG.md index fbd119f5a8..6baf097b6b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## unreleased +- `ingress_upstreaminfo` log format has been added to `parse_nginx_log` function (https://github.com/vectordotdev/vrl/pull/193) + ## `0.4.0` (2023-05-11) - consolidated all crates into the root `vrl` crate. The external API stayed the same, with the exception of macros, which are now all exported at the root of the `vrl` crate. - published VRL to crates.io. Standard crate versioning will now be used instead of git tags. diff --git a/benches/stdlib.rs b/benches/stdlib.rs index 9ccd41573d..0682d380d3 100644 --- a/benches/stdlib.rs +++ b/benches/stdlib.rs @@ -1736,6 +1736,33 @@ bench_function! { })), } + ingress_upstreaminfo { + args: func_args![ + value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#, + format: "ingress_upstreaminfo", + ], + want: Ok(value!({ + "remote_addr" => "0.0.0.0", + "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().info()), + "request" => "GET /some/path HTTP/2.0", + "method" => "GET", + "path" => "/some/path", + "protocol" => "HTTP/2.0", + "status" => 200, + "body_bytes_size" => 12312, + "http_referer" => "https://10.0.0.1/some/referer", + "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "request_length" => 462, + "request_time" => 0.050, + "proxy_upstream_name" => "some-upstream-service-9000", + "upstream_addr" => "10.0.50.80:9000", + "upstream_response_length" => 19437, + "upstream_response_time" => 0.049, + "upstream_status" => 200, + "req_id" => "752178adb17130b291aefd8c386279e7", + })), + } + error { args: func_args![value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#, format: "error" diff --git a/src/stdlib/log_util.rs b/src/stdlib/log_util.rs index ec17fe4ff9..31d0699167 100644 --- a/src/stdlib/log_util.rs +++ b/src/stdlib/log_util.rs @@ -134,6 +134,39 @@ pub(crate) static REGEX_NGINX_COMBINED_LOG: Lazy = Lazy::new(|| { .expect("failed compiling regex for Nginx combined log") }); +// - Ingress Nginx docs: https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/log-format/ +#[cfg(feature = "stdlib_parse_nginx_log")] +pub(crate) static REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) # Ignore whitespace and comments in the regex expression. + ^\s* # Start with any number of whitespaces + (-|(?P\S+))\s+ # Match `-` or any non space character + \-\s+ # Always a dash + (-|(?P\S+))\s+ # Match `-` or any non space character + \[(?P[^\]]+)\]\s+ # Match date between brackets + "(?P + (?P\w+)\s+ # Match at least a word + (?P\S+)\s+ # Match any non space character + (?P[^"]+) + )"\s+ # Match any non double-quote character + (?P\d+)\s+ # Match numbers + (?P\d+)\s+ # Match numbers + "(-|(?P[^"]+))"\s+ # Match `-` or any non double-quote character + "(-|(?P[^"]+))"\s+ # Match `-` or any non double-quote character + (?P\d+)\s+ # Match numbers + (?P\d+\.\d+)\s+ # Match numbers with dot + \[(?P[^\]]+)\]\s+ # Match all characters within square brackets + \[(?P[^\]]+)?\]\s+ # Match all characters within square brackets, optional + (?P\S+)\s+ # Match any non space character + (?P\d+)\s+ # Match numbers + (?P\d+\.\d+)\s+ # Match numbers with dot + (?P\d+)\s+ # Match numbers + (?P\S+) # Match any non space character + \s*$ # Match any number of whitespaces (to be discarded). + "#) + .expect("failed compiling regex for Ingress Nginx upstreaminfo log") +}); + #[cfg(feature = "stdlib_parse_nginx_log")] pub(crate) static REGEX_NGINX_ERROR_LOG: Lazy = Lazy::new(|| { Regex::new( @@ -180,12 +213,21 @@ fn capture_value( ) -> std::result::Result { Ok(match name { "timestamp" => Value::Timestamp(parse_time(value, timestamp_format, timezone)?), - "status" | "size" | "pid" | "tid" | "cid" | "port" => Value::Integer( + "status" + | "size" + | "pid" + | "tid" + | "cid" + | "port" + | "body_bytes_size" + | "request_length" + | "upstream_response_length" + | "upstream_status" => Value::Integer( value .parse() .map_err(|_| format!("failed parsing {name}"))?, ), - "excess" => Value::Float( + "excess" | "request_time" | "upstream_response_time" => Value::Float( value .parse() .map_err(|_| format!("failed parsing {name}"))?, diff --git a/src/stdlib/parse_nginx_log.rs b/src/stdlib/parse_nginx_log.rs index 47e0c393b9..9d79761c07 100644 --- a/src/stdlib/parse_nginx_log.rs +++ b/src/stdlib/parse_nginx_log.rs @@ -24,7 +24,11 @@ fn parse_nginx_log( } fn variants() -> Vec { - vec![value!("combined"), value!("error")] + vec![ + value!("combined"), + value!("error"), + value!("ingress_upstreaminfo"), + ] } #[derive(Clone, Copy, Debug)] @@ -100,6 +104,7 @@ impl Function for ParseNginxLog { fn regex_for_format(format: &[u8]) -> &Regex { match format { b"combined" => &log_util::REGEX_NGINX_COMBINED_LOG, + b"ingress_upstreaminfo" => &log_util::REGEX_INGRESS_NGINX_UPSTREAMINFO_LOG, b"error" => &log_util::REGEX_NGINX_ERROR_LOG, _ => unreachable!(), } @@ -108,6 +113,7 @@ fn regex_for_format(format: &[u8]) -> &Regex { fn time_format_for_format(format: &[u8]) -> String { match format { b"combined" => "%d/%b/%Y:%T %z".to_owned(), + b"ingress_upstreaminfo" => "%d/%b/%Y:%T %z".to_owned(), b"error" => "%Y/%m/%d %H:%M:%S".to_owned(), _ => unreachable!(), } @@ -145,6 +151,7 @@ impl FunctionExpression for ParseNginxLogFn { fn type_def(&self, _: &state::TypeState) -> TypeDef { TypeDef::object(match self.format.as_ref() { b"combined" => kind_combined(), + b"ingress_upstreaminfo" => kind_ingress_upstreaminfo(), b"error" => kind_error(), _ => unreachable!(), }) @@ -169,6 +176,34 @@ fn kind_combined() -> BTreeMap { ]) } +fn kind_ingress_upstreaminfo() -> BTreeMap { + BTreeMap::from([ + ("remote_addr".into(), Kind::bytes().or_undefined()), + ("remote_user".into(), Kind::bytes().or_undefined()), + ("timestamp".into(), Kind::timestamp()), + ("request".into(), Kind::bytes()), + ("method".into(), Kind::bytes()), + ("path".into(), Kind::bytes()), + ("protocol".into(), Kind::bytes()), + ("status".into(), Kind::integer()), + ("body_bytes_size".into(), Kind::integer()), + ("http_referer".into(), Kind::bytes().or_undefined()), + ("http_user_agent".into(), Kind::bytes().or_undefined()), + ("request_length".into(), Kind::integer()), + ("request_time".into(), Kind::float()), + ("proxy_upstream_name".into(), Kind::bytes()), + ( + "proxy_alternative_upstream_name".into(), + Kind::bytes().or_undefined(), + ), + ("upstream_addr".into(), Kind::bytes()), + ("upstream_response_length".into(), Kind::integer()), + ("upstream_response_time".into(), Kind::float()), + ("upstream_status".into(), Kind::integer()), + ("req_id".into(), Kind::bytes()), + ]) +} + fn kind_error() -> BTreeMap { BTreeMap::from([ ("timestamp".into(), Kind::timestamp()), @@ -259,6 +294,64 @@ mod tests { tdef: TypeDef::object(kind_combined()).fallible(), } + ingress_nginx_upstreaminfo_valid_without_optional_fields { + args: func_args![ + value: r#"0.0.0.0 - - [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#, + format: "ingress_upstreaminfo" + ], + want: Ok(btreemap! { + "remote_addr" => "0.0.0.0", + "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()), + "request" => "GET /some/path HTTP/2.0", + "method" => "GET", + "path" => "/some/path", + "protocol" => "HTTP/2.0", + "status" => 200, + "body_bytes_size" => 12312, + "http_referer" => "https://10.0.0.1/some/referer", + "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "request_length" => 462, + "request_time" => 0.050, + "proxy_upstream_name" => "some-upstream-service-9000", + "upstream_addr" => "10.0.50.80:9000", + "upstream_response_length" => 19437, + "upstream_response_time" => 0.049, + "upstream_status" => 200, + "req_id" => "752178adb17130b291aefd8c386279e7", + }), + tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(), + } + + ingress_nginx_upstreaminfo_valid_all_fields { + args: func_args![ + value: r#"0.0.0.0 - bob [18/Mar/2023:15:00:00 +0000] "GET /some/path HTTP/2.0" 200 12312 "https://10.0.0.1/some/referer" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36" 462 0.050 [some-upstream-service-9000] [some-other-upstream-5000] 10.0.50.80:9000 19437 0.049 200 752178adb17130b291aefd8c386279e7"#, + format: "ingress_upstreaminfo" + ], + want: Ok(btreemap! { + "remote_addr" => "0.0.0.0", + "remote_user" => "bob", + "timestamp" => Value::Timestamp(DateTime::parse_from_rfc3339("2023-03-18T15:00:00Z").unwrap().into()), + "request" => "GET /some/path HTTP/2.0", + "method" => "GET", + "path" => "/some/path", + "protocol" => "HTTP/2.0", + "status" => 200, + "body_bytes_size" => 12312, + "http_referer" => "https://10.0.0.1/some/referer", + "http_user_agent" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36", + "request_length" => 462, + "request_time" => 0.050, + "proxy_upstream_name" => "some-upstream-service-9000", + "proxy_alternative_upstream_name" => "some-other-upstream-5000", + "upstream_addr" => "10.0.50.80:9000", + "upstream_response_length" => 19437, + "upstream_response_time" => 0.049, + "upstream_status" => 200, + "req_id" => "752178adb17130b291aefd8c386279e7", + }), + tdef: TypeDef::object(kind_ingress_upstreaminfo()).fallible(), + } + error_line_valid { args: func_args![ value: r#"2021/04/01 13:02:31 [error] 31#31: *1 open() "/usr/share/nginx/html/not-found" failed (2: No such file or directory), client: 172.17.0.1, server: localhost, request: "POST /not-found HTTP/1.1", host: "localhost:8081""#,