From 570216bc9e2dc8682743966dc56125557c298a70 Mon Sep 17 00:00:00 2001 From: Anshul Sanghi Date: Sun, 28 Apr 2024 16:15:26 +0530 Subject: [PATCH] Add Proper Handling For Escaped And Non-Escaped Single Quotes In TSVector Words #729,#2705 --- sqlx-postgres/src/types/ts_vector.rs | 58 ++++++++++++++++++++++++---- tests/postgres/types.rs | 18 +++++++++ 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/sqlx-postgres/src/types/ts_vector.rs b/sqlx-postgres/src/types/ts_vector.rs index 9bc9a897c5..97e14c0257 100644 --- a/sqlx-postgres/src/types/ts_vector.rs +++ b/sqlx-postgres/src/types/ts_vector.rs @@ -110,11 +110,23 @@ pub struct Lexeme { positions: Vec, } +impl Lexeme { + pub fn word(&self) -> &str { + self.word.as_str() + } +} + #[derive(Debug)] pub struct TsVector { words: Vec, } +impl TsVector { + pub fn words(&self) -> &Vec { + &self.words + } +} + impl Display for TsVector { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { use std::fmt::Write; @@ -122,6 +134,9 @@ impl Display for TsVector { let mut words = self.words.iter().peekable(); while let Some(Lexeme { positions, word }) = words.next() { + // Add escaping for any single quotes within the word. + let word = word.replace("'", "''"); + if positions.is_empty() { f.write_str(&format!("'{}'", word))?; } else { @@ -214,16 +229,45 @@ impl TryInto> for &TsVector { } } -fn split_into_ts_vector_words(input: &str) -> impl Iterator { +fn split_into_ts_vector_words(input: &str) -> Vec { let mut wrapped = false; - - input.split(move |character: char| { - if character == '\'' { - wrapped = !wrapped; + let mut words = vec![]; + let mut current_word = String::new(); + let mut escaped = false; + + let mut chars = input.chars().peekable(); + + while let Some(token) = chars.next() { + match token { + '\'' => { + if !escaped { + if chars.peek().is_some_and(|item| *item == '\'') { + escaped = true; + current_word += "'"; + } else { + wrapped = !wrapped; + } + } else { + escaped = false; + } + } + char => { + if char.is_whitespace() && !wrapped { + words.push(current_word); + current_word = String::new(); + } else { + current_word += &char.to_string(); + } + } } + } + + if !current_word.is_empty() { + words.push(current_word); + current_word = String::new(); + } - character.is_whitespace() && !wrapped - }) + words } impl FromStr for TsVector { diff --git a/tests/postgres/types.rs b/tests/postgres/types.rs index 3577920597..d5c10cae56 100644 --- a/tests/postgres/types.rs +++ b/tests/postgres/types.rs @@ -447,6 +447,7 @@ mod full_text_search { use sqlx::postgres::types::TsVector; use sqlx::postgres::PgRow; use sqlx::{Executor, Row}; + use sqlx_core::statement::Statement; use sqlx_test::new; #[sqlx_macros::test] @@ -489,6 +490,23 @@ mod full_text_search { let value = row.get::(0).to_string(); assert_eq!(value, "' A'"); + let row = conn + .fetch_one(r#"SELECT $$'Joe''s' cat$$::tsvector;"#) + .await?; + let value = row.get::(0).to_string(); + assert_eq!(value, "'Joe''s' 'cat'"); + + let sql = r#"SELECT $$'Joe''s' cat$$::tsvector;"#; + let row = conn.fetch_one(sql).await.unwrap(); + let cell = row.get::(0); + assert_eq!(cell.words()[0].word(), "Joe's"); + + let sql = r#"SELECT $$'Joe''s' cat$$::tsvector;"#; + let statement = conn.prepare(sql).await.unwrap(); + let row = statement.query().fetch_one(&mut conn).await.unwrap(); + let cell = row.get::(0); + assert_eq!(cell.to_string(), "'Joe''s' 'cat'"); + Ok(()) } }