From e3c56d471666c2a112bd590872de21de103d5089 Mon Sep 17 00:00:00 2001 From: Yanxin Xiang Date: Sun, 23 Feb 2025 23:19:37 -0800 Subject: [PATCH 1/4] correctly treat backslash in datafusion-cli --- datafusion-cli/src/helper.rs | 56 ++++++++++++++++++------- datafusion-cli/tests/cli_integration.rs | 4 ++ datafusion-cli/tests/data/backslash.txt | 1 + 3 files changed, 45 insertions(+), 16 deletions(-) create mode 100644 datafusion-cli/tests/data/backslash.txt diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs index 871e6de9d31f..21885fa45be0 100644 --- a/datafusion-cli/src/helper.rs +++ b/datafusion-cli/src/helper.rs @@ -170,28 +170,44 @@ impl Helper for CliHelper {} /// /// The data read from stdio will be escaped, so we need to unescape the input before executing the input pub fn unescape_input(input: &str) -> datafusion::error::Result { - let mut chars = input.chars(); - + let mut chars = input.chars().peekable(); let mut result = String::with_capacity(input.len()); - while let Some(char) = chars.next() { - if char == '\\' { - if let Some(next_char) = chars.next() { - // https://static.rust-lang.org/doc/master/reference.html#literals - result.push(match next_char { - '0' => '\0', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', + + while let Some(ch) = chars.next() { + if ch == '\\' { + if let Some(&next) = chars.peek() { + match next { + '0' => { + chars.next(); + result.push('\0'); + } + 'n' => { + chars.next(); + result.push('\n'); + } + 'r' => { + chars.next(); + result.push('\r'); + } + 't' => { + chars.next(); + result.push('\t'); + } + '\\' | '\'' => result.push('\\'), _ => { - return Err(sql_datafusion_err!(ParserError::TokenizerError( - format!("unsupported escape char: '\\{}'", next_char) + return Err(DataFusionError::Execution(format!( + "Invalid escape sequence: \\{}", + next ))) } - }); + } + } else { + return Err(sql_datafusion_err!(ParserError::TokenizerError( + "incomplete escape sequence: trailing backslash".to_string() + ))); } } else { - result.push(char); + result.push(ch); } } @@ -319,6 +335,14 @@ mod tests { )?; assert!(matches!(result, ValidationResult::Invalid(Some(_)))); + let result = readline_direct( + Cursor::new( + r"select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';".as_bytes(), + ), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + Ok(()) } diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 27cabf15afec..c24851f4bcd9 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -39,6 +39,10 @@ fn init() { ["--command", "select 1; select 2;", "--format", "json", "-q"], "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" )] +#[case::exec_backslash( + ["--file", "tests/data/backslash.txt", "--format", "json", "-q"], + "[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\t\\\")\":\"\\t\",\"Utf8(\\\"\\u0000\\\")\":\"\\u0000\",\"Utf8(\\\"\\n\\\")\":\"\\n\"}]\n" +)] #[case::exec_from_files( ["--file", "tests/data/sql.txt", "--format", "json", "-q"], "[{\"Int64(1)\":1}]\n" diff --git a/datafusion-cli/tests/data/backslash.txt b/datafusion-cli/tests/data/backslash.txt new file mode 100644 index 000000000000..f2fe2b03746e --- /dev/null +++ b/datafusion-cli/tests/data/backslash.txt @@ -0,0 +1 @@ +select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n'; \ No newline at end of file From 87790e3117c99bf6ef7e457f5f58c9e469eb9ec0 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 Feb 2025 16:17:45 -0500 Subject: [PATCH 2/4] Remove all cli unescaping --- datafusion-cli/src/exec.rs | 5 +-- datafusion-cli/src/helper.rs | 76 ++++-------------------------------- 2 files changed, 10 insertions(+), 71 deletions(-) diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index d560dee987f5..552d438ca390 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -22,7 +22,7 @@ use crate::helper::split_from_semicolon; use crate::print_format::PrintFormat; use crate::{ command::{Command, OutputFormat}, - helper::{unescape_input, CliHelper}, + helper::CliHelper, object_storage::get_object_store, print_options::{MaxRows, PrintOptions}, }; @@ -172,7 +172,7 @@ pub async fn exec_from_repl( } } Ok(line) => { - let lines = split_from_semicolon(line); + let lines = split_from_semicolon(&line); for line in lines { rl.add_history_entry(line.trim_end())?; tokio::select! { @@ -215,7 +215,6 @@ pub(super) async fn exec_and_print( sql: String, ) -> Result<()> { let now = Instant::now(); - let sql = unescape_input(&sql)?; let task_ctx = ctx.task_ctx(); let dialect = &task_ctx.session_config().options().sql_parser.dialect; let dialect = dialect_from_str(dialect).ok_or_else(|| { diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs index 21885fa45be0..5ab8ad2c8958 100644 --- a/datafusion-cli/src/helper.rs +++ b/datafusion-cli/src/helper.rs @@ -22,11 +22,8 @@ use std::borrow::Cow; use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter}; -use datafusion::common::sql_datafusion_err; -use datafusion::error::DataFusionError; use datafusion::sql::parser::{DFParser, Statement}; use datafusion::sql::sqlparser::dialect::dialect_from_str; -use datafusion::sql::sqlparser::parser::ParserError; use rustyline::completion::{Completer, FilenameCompleter, Pair}; use rustyline::error::ReadlineError; @@ -63,15 +60,6 @@ impl CliHelper { fn validate_input(&self, input: &str) -> Result { if let Some(sql) = input.strip_suffix(';') { - let sql = match unescape_input(sql) { - Ok(sql) => sql, - Err(err) => { - return Ok(ValidationResult::Invalid(Some(format!( - " 🤔 Invalid statement: {err}", - )))) - } - }; - let dialect = match dialect_from_str(&self.dialect) { Some(dialect) => dialect, None => { @@ -166,56 +154,8 @@ impl Validator for CliHelper { impl Helper for CliHelper {} -/// Unescape input string from readline. -/// -/// The data read from stdio will be escaped, so we need to unescape the input before executing the input -pub fn unescape_input(input: &str) -> datafusion::error::Result { - let mut chars = input.chars().peekable(); - let mut result = String::with_capacity(input.len()); - - while let Some(ch) = chars.next() { - if ch == '\\' { - if let Some(&next) = chars.peek() { - match next { - '0' => { - chars.next(); - result.push('\0'); - } - 'n' => { - chars.next(); - result.push('\n'); - } - 'r' => { - chars.next(); - result.push('\r'); - } - 't' => { - chars.next(); - result.push('\t'); - } - '\\' | '\'' => result.push('\\'), - _ => { - return Err(DataFusionError::Execution(format!( - "Invalid escape sequence: \\{}", - next - ))) - } - } - } else { - return Err(sql_datafusion_err!(ParserError::TokenizerError( - "incomplete escape sequence: trailing backslash".to_string() - ))); - } - } else { - result.push(ch); - } - } - - Ok(result) -} - /// Splits a string which consists of multiple queries. -pub(crate) fn split_from_semicolon(sql: String) -> Vec { +pub(crate) fn split_from_semicolon(sql: &str) -> Vec { let mut commands = Vec::new(); let mut current_command = String::new(); let mut in_single_quote = false; @@ -370,15 +310,15 @@ mod tests { fn test_split_from_semicolon() { let sql = "SELECT 1; SELECT 2;"; let expected = vec!["SELECT 1;", "SELECT 2;"]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = r#"SELECT ";";"#; let expected = vec![r#"SELECT ";";"#]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = "SELECT ';';"; let expected = vec!["SELECT ';';"]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = r#"SELECT 1; SELECT 'value;value'; SELECT 1 as "text;text";"#; let expected = vec![ @@ -386,18 +326,18 @@ mod tests { "SELECT 'value;value';", r#"SELECT 1 as "text;text";"#, ]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = ""; let expected: Vec = Vec::new(); - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = "SELECT 1"; let expected = vec!["SELECT 1;"]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); let sql = "SELECT 1; "; let expected = vec!["SELECT 1;"]; - assert_eq!(split_from_semicolon(sql.to_string()), expected); + assert_eq!(split_from_semicolon(sql), expected); } } From d304cfcd281f628c0b31f65f00403e30a12c35ef Mon Sep 17 00:00:00 2001 From: Lordworms Date: Tue, 25 Feb 2025 13:47:18 -0800 Subject: [PATCH 3/4] remove unnessasary check --- datafusion-cli/src/helper.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs index 21885fa45be0..e1d76b5c34e1 100644 --- a/datafusion-cli/src/helper.rs +++ b/datafusion-cli/src/helper.rs @@ -326,15 +326,6 @@ mod tests { )?; assert!(matches!(result, ValidationResult::Valid(None))); - // should be invalid - let result = readline_direct( - Cursor::new( - r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');" - .as_bytes()), - &validator, - )?; - assert!(matches!(result, ValidationResult::Invalid(Some(_)))); - let result = readline_direct( Cursor::new( r"select '\', '\\', '\\\\\', 'dsdsds\\\\', '\t', '\0', '\n';".as_bytes(), From 8c1785b9eaacab3d5e4617bd298c7ec5e30a78cd Mon Sep 17 00:00:00 2001 From: Lordworms Date: Tue, 25 Feb 2025 14:02:44 -0800 Subject: [PATCH 4/4] refine test --- datafusion-cli/tests/cli_integration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index c24851f4bcd9..fa170ae19259 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -41,7 +41,7 @@ fn init() { )] #[case::exec_backslash( ["--file", "tests/data/backslash.txt", "--format", "json", "-q"], - "[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\t\\\")\":\"\\t\",\"Utf8(\\\"\\u0000\\\")\":\"\\u0000\",\"Utf8(\\\"\\n\\\")\":\"\\n\"}]\n" + "[{\"Utf8(\\\"\\\\\\\")\":\"\\\\\",\"Utf8(\\\"\\\\\\\\\\\")\":\"\\\\\\\\\",\"Utf8(\\\"\\\\\\\\\\\\\\\\\\\\\\\")\":\"\\\\\\\\\\\\\\\\\\\\\",\"Utf8(\\\"dsdsds\\\\\\\\\\\\\\\\\\\")\":\"dsdsds\\\\\\\\\\\\\\\\\",\"Utf8(\\\"\\\\t\\\")\":\"\\\\t\",\"Utf8(\\\"\\\\0\\\")\":\"\\\\0\",\"Utf8(\\\"\\\\n\\\")\":\"\\\\n\"}]\n" )] #[case::exec_from_files( ["--file", "tests/data/sql.txt", "--format", "json", "-q"],