From b27e5009368121b37ad5574062f81618587bf0f3 Mon Sep 17 00:00:00 2001 From: Joris Bayer <joris.bayer@sentry.io> Date: Mon, 5 Feb 2024 16:41:04 +0100 Subject: [PATCH 1/4] chore(spans): Upgrade sqlparser --- Cargo.lock | 12 +- relay-event-normalization/Cargo.toml | 4 +- .../normalize/span/description/sql/parser.rs | 194 ++++++++++-------- 3 files changed, 119 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8151cd411b8..206c401502d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4840,8 +4840,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.37.0" -source = "git+https://github.com/getsentry/sqlparser-rs.git?rev=0bb7ec6ce661ecfc468f647fd1003b7839d37fa6#0bb7ec6ce661ecfc468f647fd1003b7839d37fa6" +version = "0.43.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" dependencies = [ "log", "sqlparser_derive", @@ -4849,12 +4850,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" -source = "git+https://github.com/getsentry/sqlparser-rs.git?rev=0bb7ec6ce661ecfc468f647fd1003b7839d37fa6#0bb7ec6ce661ecfc468f647fd1003b7839d37fa6" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.38", ] [[package]] diff --git a/relay-event-normalization/Cargo.toml b/relay-event-normalization/Cargo.toml index 8987ee98539..7e0447d5112 100644 --- a/relay-event-normalization/Cargo.toml +++ b/relay-event-normalization/Cargo.toml @@ -31,9 +31,7 @@ serde_json = { workspace = true } serde_urlencoded = "0.7.1" smallvec = { workspace = true } # Use a fork of sqlparser until https://github.com/sqlparser-rs/sqlparser-rs/pull/1065 gets merged: -sqlparser = { git = "https://github.com/getsentry/sqlparser-rs.git", rev = "0bb7ec6ce661ecfc468f647fd1003b7839d37fa6", features = [ - "visitor", -] } +sqlparser = { version = "0.43.1", features = ["visitor"] } thiserror = { workspace = true } url = { workspace = true } uuid = { workspace = true } diff --git a/relay-event-normalization/src/normalize/span/description/sql/parser.rs b/relay-event-normalization/src/normalize/span/description/sql/parser.rs index 7ce7f8df132..0a8c8527f8b 100644 --- a/relay-event-normalization/src/normalize/span/description/sql/parser.rs +++ b/relay-event-normalization/src/normalize/span/description/sql/parser.rs @@ -6,8 +6,8 @@ use itertools::Itertools; use once_cell::sync::Lazy; use regex::Regex; use sqlparser::ast::{ - AlterTableOperation, Assignment, BinaryOperator, CloseCursor, ColumnDef, Expr, Ident, - ObjectName, Query, Select, SelectItem, SetExpr, Statement, TableAlias, TableConstraint, + AlterTableOperation, Assignment, BinaryOperator, CloseCursor, ColumnDef, Expr, FunctionArg, + Ident, ObjectName, Query, Select, SelectItem, SetExpr, Statement, TableAlias, TableConstraint, TableFactor, UnaryOperator, Value, VisitMut, VisitorMut, }; use sqlparser::dialect::{Dialect, GenericDialect}; @@ -198,12 +198,41 @@ impl VisitorMut for NormalizeVisitor { Self::simplify_table_alias(alias); } TableFactor::Pivot { - table_alias, - pivot_alias, + value_column, + alias, + .. + } => { + Self::simplify_compound_identifier(value_column); + Self::simplify_table_alias(alias); + } + TableFactor::Function { + name, args, alias, .. + } => { + Self::simplify_compound_identifier(&mut name.0); + for arg in args { + if let FunctionArg::Named { name, .. } = arg { + Self::scrub_name(name); + } + } + Self::simplify_table_alias(alias); + } + TableFactor::JsonTable { columns, alias, .. } => { + for col in columns { + Self::scrub_name(&mut col.name); + Self::simplify_table_alias(alias); + } + } + TableFactor::Unpivot { + value, + name, + columns, + alias, .. } => { - Self::simplify_table_alias(table_alias); - Self::simplify_table_alias(pivot_alias); + Self::scrub_name(value); + Self::scrub_name(name); + Self::simplify_compound_identifier(columns); + Self::simplify_table_alias(alias); } } ControlFlow::Continue(()) @@ -307,13 +336,8 @@ impl VisitorMut for NormalizeVisitor { self.transform_query(query); } // `INSERT INTO col1, col2 VALUES (val1, val2)` becomes `INSERT INTO .. VALUES (%s)`. - Statement::Insert { - columns, source, .. - } => { + Statement::Insert { columns, .. } => { *columns = vec![Self::ellipsis()]; - if let SetExpr::Values(v) = &mut *source.body { - v.rows = vec![vec![Expr::Value(Self::placeholder())]] - } } // Simple lists of col = value assignments are collapsed to `..`. Statement::Update { assignments, .. } => { @@ -350,89 +374,93 @@ impl VisitorMut for NormalizeVisitor { Statement::Close { cursor: CloseCursor::Specific { name }, } => Self::erase_name(name), - Statement::AlterTable { name, operation } => { + Statement::AlterTable { + name, operations, .. + } => { Self::simplify_compound_identifier(&mut name.0); - match operation { - AlterTableOperation::AddConstraint(c) => match c { - TableConstraint::Unique { name, columns, .. } => { - if let Some(name) = name { - Self::scrub_name(name); + for operation in operations { + match operation { + AlterTableOperation::AddConstraint(c) => match c { + TableConstraint::Unique { name, columns, .. } => { + if let Some(name) = name { + Self::scrub_name(name); + } + for column in columns { + Self::scrub_name(column); + } } - for column in columns { - Self::scrub_name(column); + TableConstraint::ForeignKey { + name, + columns, + referred_columns, + .. + } => { + if let Some(name) = name { + Self::scrub_name(name); + } + for column in columns { + Self::scrub_name(column); + } + for column in referred_columns { + Self::scrub_name(column); + } } - } - TableConstraint::ForeignKey { - name, - columns, - referred_columns, - .. - } => { - if let Some(name) = name { - Self::scrub_name(name); + TableConstraint::Check { name, .. } => { + if let Some(name) = name { + Self::scrub_name(name); + } } - for column in columns { - Self::scrub_name(column); + TableConstraint::Index { name, columns, .. } => { + if let Some(name) = name { + Self::scrub_name(name); + } + for column in columns { + Self::scrub_name(column); + } } - for column in referred_columns { - Self::scrub_name(column); + TableConstraint::FulltextOrSpatial { + opt_index_name, + columns, + .. + } => { + if let Some(name) = opt_index_name { + Self::scrub_name(name); + } + for column in columns { + Self::scrub_name(column); + } } + }, + AlterTableOperation::AddColumn { column_def, .. } => { + let ColumnDef { name, .. } = column_def; + Self::scrub_name(name); } - TableConstraint::Check { name, .. } => { - if let Some(name) = name { - Self::scrub_name(name); - } + AlterTableOperation::DropConstraint { name, .. } => Self::scrub_name(name), + AlterTableOperation::DropColumn { column_name, .. } => { + Self::scrub_name(column_name) } - TableConstraint::Index { name, columns, .. } => { - if let Some(name) = name { - Self::scrub_name(name); - } - for column in columns { - Self::scrub_name(column); - } + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } => { + Self::scrub_name(old_column_name); + Self::scrub_name(new_column_name); } - TableConstraint::FulltextOrSpatial { - opt_index_name, - columns, - .. + AlterTableOperation::ChangeColumn { + old_name, new_name, .. } => { - if let Some(name) = opt_index_name { - Self::scrub_name(name); - } - for column in columns { - Self::scrub_name(column); - } + Self::scrub_name(old_name); + Self::scrub_name(new_name); } - }, - AlterTableOperation::AddColumn { column_def, .. } => { - let ColumnDef { name, .. } = column_def; - Self::scrub_name(name); - } - AlterTableOperation::DropConstraint { name, .. } => Self::scrub_name(name), - AlterTableOperation::DropColumn { column_name, .. } => { - Self::scrub_name(column_name) - } - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } => { - Self::scrub_name(old_column_name); - Self::scrub_name(new_column_name); - } - AlterTableOperation::ChangeColumn { - old_name, new_name, .. - } => { - Self::scrub_name(old_name); - Self::scrub_name(new_name); - } - AlterTableOperation::RenameConstraint { old_name, new_name } => { - Self::scrub_name(old_name); - Self::scrub_name(new_name); - } - AlterTableOperation::AlterColumn { column_name, .. } => { - Self::scrub_name(column_name); + AlterTableOperation::RenameConstraint { old_name, new_name } => { + Self::scrub_name(old_name); + Self::scrub_name(new_name); + } + AlterTableOperation::AlterColumn { column_name, .. } => { + Self::scrub_name(column_name); + } + _ => {} } - _ => {} } } From bcb2c024b046809a4aaf34df9b87b93e0b4c064f Mon Sep 17 00:00:00 2001 From: Joris Bayer <joris.bayer@sentry.io> Date: Mon, 5 Feb 2024 16:44:28 +0100 Subject: [PATCH 2/4] doc --- CHANGELOG.md | 1 + relay-event-normalization/Cargo.toml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0fb9bd064f..f7e741ba548 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - Drop spans ending outside the valid timestamp range. ([#3013](https://github.com/getsentry/relay/pull/3013)) - Extract INP metrics from spans. ([#2969](https://github.com/getsentry/relay/pull/2969), [#3041](https://github.com/getsentry/relay/pull/3041)) - Add ability to rate limit metric buckets by namespace. ([#2941](https://github.com/getsentry/relay/pull/2941)) +- Upgrade sqlparser to 0.43.1.([#3057](https://github.com/getsentry/relay/pull/3057)) ## 24.1.1 diff --git a/relay-event-normalization/Cargo.toml b/relay-event-normalization/Cargo.toml index 7e0447d5112..61a08b8aec7 100644 --- a/relay-event-normalization/Cargo.toml +++ b/relay-event-normalization/Cargo.toml @@ -30,7 +30,6 @@ serde = { workspace = true } serde_json = { workspace = true } serde_urlencoded = "0.7.1" smallvec = { workspace = true } -# Use a fork of sqlparser until https://github.com/sqlparser-rs/sqlparser-rs/pull/1065 gets merged: sqlparser = { version = "0.43.1", features = ["visitor"] } thiserror = { workspace = true } url = { workspace = true } From ea8750271c818c094082ac4b1b21124d7e1eee07 Mon Sep 17 00:00:00 2001 From: Joris Bayer <joris.bayer@sentry.io> Date: Mon, 5 Feb 2024 17:30:00 +0100 Subject: [PATCH 3/4] fix --- .../src/normalize/span/description/sql/mod.rs | 11 ++++------- .../src/normalize/span/description/sql/parser.rs | 9 ++++++++- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/description/sql/mod.rs b/relay-event-normalization/src/normalize/span/description/sql/mod.rs index edeed6434c3..54d0684a431 100644 --- a/relay-event-normalization/src/normalize/span/description/sql/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/sql/mod.rs @@ -383,7 +383,7 @@ mod tests { scrub_sql_test!( strip_prefixes_mysql_generic, r#"SELECT `table`.`foo`, count(*) from `table` WHERE sku = %s"#, - r#"SELECT foo, count(*) from table WHERE sku = %s"# + r#"SELECT foo, count(*) FROM table WHERE sku = %s"# ); scrub_sql_test_with_dialect!( @@ -581,7 +581,7 @@ mod tests { scrub_sql_test!( values_multi, "INSERT INTO a (b, c, d, e) VALuES (%s, %s, %s, %s), (%s, %s, %s, %s), (%s, %s, %s, %s) ON CONFLICT DO NOTHING", - "INSERT INTO a (..) VALUES (%s) ON CONFLICT DO NOTHING" + "INSERT INTO a (..) VALUES (%s) ON CONFLICT DO NOTHING" ); scrub_sql_test!( @@ -876,13 +876,10 @@ mod tests { ); scrub_sql_test_with_dialect!( - dont_fallback_to_regex, + replace_into, "mysql", - // sqlparser cannot parse REPLACE INTO. If we know that - // a query is MySQL, we should give up rather than try to scrub - // with regex r#"REPLACE INTO `foo` (`a`) VALUES ("abcd1234")"#, - "REPLACE INTO foo (a) VALUES (%s)" + "REPLACE INTO foo (..) VALUES (%s)" ); scrub_sql_test!( diff --git a/relay-event-normalization/src/normalize/span/description/sql/parser.rs b/relay-event-normalization/src/normalize/span/description/sql/parser.rs index 0a8c8527f8b..f38fab57f2a 100644 --- a/relay-event-normalization/src/normalize/span/description/sql/parser.rs +++ b/relay-event-normalization/src/normalize/span/description/sql/parser.rs @@ -336,8 +336,15 @@ impl VisitorMut for NormalizeVisitor { self.transform_query(query); } // `INSERT INTO col1, col2 VALUES (val1, val2)` becomes `INSERT INTO .. VALUES (%s)`. - Statement::Insert { columns, .. } => { + Statement::Insert { + columns, source, .. + } => { *columns = vec![Self::ellipsis()]; + if let Some(source) = source.as_mut() { + if let SetExpr::Values(v) = &mut *source.body { + v.rows = vec![vec![Expr::Value(Self::placeholder())]] + } + } } // Simple lists of col = value assignments are collapsed to `..`. Statement::Update { assignments, .. } => { From 0de38dcb15b2c4b20b03ba280eac8df67ecd43ea Mon Sep 17 00:00:00 2001 From: Joris Bayer <joris.bayer@sentry.io> Date: Tue, 6 Feb 2024 09:28:43 +0100 Subject: [PATCH 4/4] ref: simplify outside of loop --- .../src/normalize/span/description/sql/parser.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/description/sql/parser.rs b/relay-event-normalization/src/normalize/span/description/sql/parser.rs index f38fab57f2a..05c7edd0265 100644 --- a/relay-event-normalization/src/normalize/span/description/sql/parser.rs +++ b/relay-event-normalization/src/normalize/span/description/sql/parser.rs @@ -217,10 +217,10 @@ impl VisitorMut for NormalizeVisitor { Self::simplify_table_alias(alias); } TableFactor::JsonTable { columns, alias, .. } => { - for col in columns { - Self::scrub_name(&mut col.name); - Self::simplify_table_alias(alias); + for column in columns { + Self::scrub_name(&mut column.name); } + Self::simplify_table_alias(alias); } TableFactor::Unpivot { value,