From 91540c6c63a513850ccfb7d662a93d215e565ecf Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Tue, 3 Oct 2023 18:22:39 +0900 Subject: [PATCH] Refactor the string_or_struct logic and enables it for TermQuery. (#3906) * Refactor the string_or_struct logic and enables it for TermQuery. Closes #3900 * Allowing integers in term queries. --- .../elastic_query_dsl/match_bool_prefix.rs | 14 +- .../elastic_query_dsl/match_phrase_query.rs | 77 ++-------- .../src/elastic_query_dsl/match_query.rs | 77 ++-------- .../src/elastic_query_dsl/mod.rs | 2 + .../src/elastic_query_dsl/string_or_struct.rs | 95 ++++++++++++ .../src/elastic_query_dsl/term_query.rs | 69 ++++++++- .../es_compatibility/0006-term_query.yaml | 144 ++++++++++++++++++ .../es_compatibility/0009-bool_query.yaml | 2 + .../es_compatibility/0016-misc-query.yaml | 3 +- 9 files changed, 343 insertions(+), 140 deletions(-) create mode 100644 quickwit/quickwit-query/src/elastic_query_dsl/string_or_struct.rs diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs index d613f0c0e54..36a35536817 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_bool_prefix.rs @@ -19,7 +19,8 @@ use serde::Deserialize; -use crate::elastic_query_dsl::match_query::{MatchQueryParams, MatchQueryParamsForDeserialization}; +use super::StringOrStructForSerialization; +use crate::elastic_query_dsl::match_query::MatchQueryParams; use crate::elastic_query_dsl::{default_max_expansions, ConvertableToQueryAst}; use crate::query_ast::{FullTextParams, FullTextQuery, QueryAst}; use crate::OneFieldMap; @@ -27,10 +28,7 @@ use crate::OneFieldMap; /// `MatchBoolPrefixQuery` as defined in /// #[derive(Deserialize, Clone, Eq, PartialEq, Debug)] -#[serde( - from = "OneFieldMap", - into = "OneFieldMap" -)] +#[serde(from = "OneFieldMap>")] pub(crate) struct MatchBoolPrefixQuery { pub(crate) field: String, pub(crate) params: MatchQueryParams, @@ -54,8 +52,10 @@ impl ConvertableToQueryAst for MatchBoolPrefixQuery { } } -impl From> for MatchBoolPrefixQuery { - fn from(match_query_params: OneFieldMap) -> Self { +impl From>> for MatchBoolPrefixQuery { + fn from( + match_query_params: OneFieldMap>, + ) -> Self { let OneFieldMap { field, value } = match_query_params; MatchBoolPrefixQuery { field, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs index 48dbbd009e9..07a874db2ea 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_phrase_query.rs @@ -17,22 +17,16 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::fmt; +use serde::Deserialize; -use serde::de::{self, MapAccess, Visitor}; -use serde::{Deserialize, Deserializer}; - -use crate::elastic_query_dsl::ConvertableToQueryAst; +use crate::elastic_query_dsl::{ConvertableToQueryAst, StringOrStructForSerialization}; use crate::query_ast::{FullTextMode, FullTextParams, FullTextQuery, QueryAst}; use crate::{MatchAllOrNone, OneFieldMap}; /// `MatchPhraseQuery` as defined in /// #[derive(Deserialize, Clone, Eq, PartialEq, Debug)] -#[serde( - from = "OneFieldMap", - into = "OneFieldMap" -)] +#[serde(from = "OneFieldMap>")] pub(crate) struct MatchPhraseQuery { pub(crate) field: String, pub(crate) params: MatchPhraseQueryParams, @@ -67,36 +61,12 @@ impl ConvertableToQueryAst for MatchPhraseQuery { } } -// -------------- -// -// Below is the Deserialization code -// The difficulty here is to support the two following formats: -// -// `{"field": {"query": "my query", "default_operator": "OR"}}` -// `{"field": "my query"}` -// -// We don't use untagged enum to support this, in order to keep good errors. -// -// The code below is adapted from solution described here: https://serde.rs/string-or-struct.html - -#[derive(Deserialize)] -#[serde(transparent)] -struct MatchPhraseQueryParamsForDeserialization { - #[serde(deserialize_with = "string_or_struct")] - inner: MatchPhraseQueryParams, -} - -impl From for OneFieldMap { - fn from(match_phrase_query: MatchPhraseQuery) -> OneFieldMap { - OneFieldMap { - field: match_phrase_query.field, - value: match_phrase_query.params, - } - } -} - -impl From> for MatchPhraseQuery { - fn from(match_query_params: OneFieldMap) -> Self { +impl From>> + for MatchPhraseQuery +{ + fn from( + match_query_params: OneFieldMap>, + ) -> Self { let OneFieldMap { field, value } = match_query_params; MatchPhraseQuery { field, @@ -105,36 +75,17 @@ impl From> for MatchPhrase } } -struct MatchQueryParamsStringOrStructVisitor; - -impl<'de> Visitor<'de> for MatchQueryParamsStringOrStructVisitor { - type Value = MatchPhraseQueryParams; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("string or map containing the parameters of a match query.") - } - - fn visit_str(self, query: &str) -> Result - where E: serde::de::Error { - Ok(MatchPhraseQueryParams { - query: query.to_string(), +impl From for MatchPhraseQueryParams { + fn from(query: String) -> MatchPhraseQueryParams { + MatchPhraseQueryParams { + query, zero_terms_query: Default::default(), analyzer: None, slop: 0, - }) - } - - fn visit_map(self, map: M) -> Result - where M: MapAccess<'de> { - Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) + } } } -fn string_or_struct<'de, D>(deserializer: D) -> Result -where D: Deserializer<'de> { - deserializer.deserialize_any(MatchQueryParamsStringOrStructVisitor) -} - #[cfg(test)] mod tests { use super::*; diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs index b05e6125958..60f03d9c352 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs @@ -17,22 +17,18 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::fmt; +use serde::Deserialize; -use serde::de::{self, MapAccess, Visitor}; -use serde::{Deserialize, Deserializer}; - -use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; +use crate::elastic_query_dsl::{ + ConvertableToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, +}; use crate::query_ast::{FullTextParams, FullTextQuery, QueryAst}; use crate::{BooleanOperand, MatchAllOrNone, OneFieldMap}; /// `MatchQuery` as defined in /// #[derive(Deserialize, Clone, Eq, PartialEq, Debug)] -#[serde( - from = "OneFieldMap", - into = "OneFieldMap" -)] +#[serde(from = "OneFieldMap>")] pub struct MatchQuery { pub(crate) field: String, pub(crate) params: MatchQueryParams, @@ -74,36 +70,10 @@ impl From for ElasticQueryDslInner { } } -// -------------- -// -// Below is the Deserialization code -// The difficulty here is to support the two following formats: -// -// `{"field": {"query": "my query", "default_operator": "OR"}}` -// `{"field": "my query"}` -// -// We don't use untagged enum to support this, in order to keep good errors. -// -// The code below is adapted from solution described here: https://serde.rs/string-or-struct.html - -#[derive(Deserialize)] -#[serde(transparent)] -pub(crate) struct MatchQueryParamsForDeserialization { - #[serde(deserialize_with = "string_or_struct")] - pub(crate) inner: MatchQueryParams, -} - -impl From for OneFieldMap { - fn from(match_query: MatchQuery) -> OneFieldMap { - OneFieldMap { - field: match_query.field, - value: match_query.params, - } - } -} - -impl From> for MatchQuery { - fn from(match_query_params: OneFieldMap) -> Self { +impl From>> for MatchQuery { + fn from( + match_query_params: OneFieldMap>, + ) -> Self { let OneFieldMap { field, value } = match_query_params; MatchQuery { field, @@ -112,36 +82,17 @@ impl From> for MatchQuery { } } -struct MatchQueryParamsStringOrStructVisitor; - -impl<'de> Visitor<'de> for MatchQueryParamsStringOrStructVisitor { - type Value = MatchQueryParams; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("string or map containing the parameters of a match query.") - } - - fn visit_str(self, query: &str) -> Result - where E: serde::de::Error { - Ok(MatchQueryParams { - query: query.to_string(), +impl From for MatchQueryParams { + fn from(query: String) -> MatchQueryParams { + MatchQueryParams { + query, zero_terms_query: Default::default(), operator: Default::default(), _lenient: false, - }) - } - - fn visit_map(self, map: M) -> Result - where M: MapAccess<'de> { - Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) + } } } -fn string_or_struct<'de, D>(deserializer: D) -> Result -where D: Deserializer<'de> { - deserializer.deserialize_any(MatchQueryParamsStringOrStructVisitor) -} - #[cfg(test)] mod tests { use super::*; diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index 6eef160cde6..e1bab738d48 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -29,6 +29,7 @@ mod one_field_map; mod phrase_prefix_query; mod query_string_query; mod range_query; +mod string_or_struct; mod term_query; mod terms_query; @@ -37,6 +38,7 @@ pub use one_field_map::OneFieldMap; use phrase_prefix_query::MatchPhrasePrefixQuery; pub(crate) use query_string_query::QueryStringQuery; use range_query::RangeQuery; +pub(crate) use string_or_struct::StringOrStructForSerialization; use term_query::TermQuery; use crate::elastic_query_dsl::exists_query::ExistsQuery; diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/string_or_struct.rs b/quickwit/quickwit-query/src/elastic_query_dsl/string_or_struct.rs new file mode 100644 index 00000000000..3695143d31c --- /dev/null +++ b/quickwit/quickwit-query/src/elastic_query_dsl/string_or_struct.rs @@ -0,0 +1,95 @@ +// Copyright (C) 2023 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::fmt; +use std::marker::PhantomData; + +use serde::de::{MapAccess, Visitor}; +use serde::{de, Deserialize, Deserializer}; + +/// The point of `StringOrStructForSerialization` is to support +/// the two following formats for various queries. +/// +/// `{"field": {"query": "my query", "default_operator": "OR"}}` +/// +/// and the shorter. +/// `{"field": "my query"}` +/// +/// If a integer is passed, we cast it to string. Floats are not supported. +/// +/// We don't use untagged enum to support this, in order to keep good errors. +/// +/// The code below is adapted from solution described here: https://serde.rs/string-or-struct.html +#[derive(Deserialize)] +#[serde(transparent)] +pub(crate) struct StringOrStructForSerialization +where + T: From, + for<'de2> T: Deserialize<'de2>, +{ + #[serde(deserialize_with = "string_or_struct")] + pub inner: T, +} + +struct StringOrStructVisitor { + phantom_data: PhantomData, +} + +fn string_or_struct<'de, D, T>(deserializer: D) -> Result +where + D: Deserializer<'de>, + T: From + Deserialize<'de>, +{ + deserializer.deserialize_any(StringOrStructVisitor { + phantom_data: Default::default(), + }) +} + +impl<'de, T> Visitor<'de> for StringOrStructVisitor +where + T: From, + T: Deserialize<'de>, +{ + type Value = T; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + let type_str = std::any::type_name::(); + formatter.write_str(&format!("string or map to deserialize {type_str}.")) + } + + fn visit_i64(self, v: i64) -> Result + where E: de::Error { + self.visit_str(&v.to_string()) + } + + fn visit_u64(self, v: u64) -> Result + where E: de::Error { + self.visit_str(&v.to_string()) + } + + fn visit_str(self, query: &str) -> Result + where E: serde::de::Error { + Ok(T::from(query.to_string())) + } + + fn visit_map(self, map: M) -> Result + where M: MapAccess<'de> { + Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) + } +} diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs index 93ed1294aa3..2ae70d6a9a2 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/term_query.rs @@ -17,18 +17,61 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use serde::Deserialize; +use serde::{Deserialize, Deserializer, Serialize}; +use super::StringOrStructForSerialization; use crate::elastic_query_dsl::one_field_map::OneFieldMap; use crate::elastic_query_dsl::{ConvertableToQueryAst, ElasticQueryDslInner}; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::{self, QueryAst}; -pub type TermQuery = OneFieldMap; +#[derive(Deserialize, Debug, PartialEq, Eq, Clone)] +#[serde(from = "OneFieldMap>")] +pub struct TermQuery { + pub field: String, + pub value: TermQueryParams, +} + +impl From>> for TermQuery { + fn from(one_field_map: OneFieldMap>) -> Self { + TermQuery { + field: one_field_map.field, + value: one_field_map.value.inner, + } + } +} + +impl From for TermQueryParams { + fn from(query: String) -> TermQueryParams { + TermQueryParams { + value: query, + boost: None, + } + } +} + +#[derive(Deserialize)] +#[serde(untagged)] +enum TermValue { + I64(i64), + U64(u64), + Str(String), +} + +fn deserialize_term_value<'de, D>(deserializer: D) -> Result +where D: Deserializer<'de> { + let term_value = TermValue::deserialize(deserializer)?; + match term_value { + TermValue::I64(i64) => Ok(i64.to_string()), + TermValue::U64(u64) => Ok(u64.to_string()), + TermValue::Str(str) => Ok(str), + } +} -#[derive(PartialEq, Eq, Debug, Deserialize, Clone)] +#[derive(PartialEq, Eq, Debug, Serialize, Deserialize, Clone)] #[serde(deny_unknown_fields)] -pub struct TermQueryValue { +pub struct TermQueryParams { + #[serde(deserialize_with = "deserialize_term_value")] pub value: String, #[serde(default)] pub boost: Option, @@ -37,7 +80,7 @@ pub struct TermQueryValue { pub fn term_query_from_field_value(field: impl ToString, value: impl ToString) -> TermQuery { TermQuery { field: field.to_string(), - value: TermQueryValue { + value: TermQueryParams { value: value.to_string(), boost: None, }, @@ -52,7 +95,7 @@ impl From for ElasticQueryDslInner { impl ConvertableToQueryAst for TermQuery { fn convert_to_query_ast(self) -> anyhow::Result { - let TermQueryValue { value, boost } = self.value; + let TermQueryParams { value, boost } = self.value; let term_ast: QueryAst = query_ast::TermQuery { field: self.field, value, @@ -75,4 +118,18 @@ mod tests { &term_query_from_field_value("product_id", "61809") ); } + + #[test] + fn test_term_query_deserialization_in_short_format() { + let term_query: TermQuery = serde_json::from_str( + r#"{ + "product_id": "61809" + }"#, + ) + .unwrap(); + assert_eq!( + &term_query, + &term_query_from_field_value("product_id", "61809") + ); + } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0006-term_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0006-term_query.yaml index 03231257e13..14599566959 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0006-term_query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0006-term_query.yaml @@ -16,3 +16,147 @@ expected: relation: "eq" hits: $expect: "len(val) == 3" +--- +# Terms must be pushed in their form post tokenization +params: + size: 0 +json: + query: + term: + type: + # this does not match because push event has been lowercased by the tokenizer. + value: "PushEvent" +expected: + hits: + total: + value: 0 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + type: + value: "pushevent" +expected: + hits: + total: + value: 60 + relation: "eq" +--- +params: + size: 0 +# Testing the format without the "value" object +json: + query: + term: + type: "pushevent" +expected: + hits: + total: + value: 60 + relation: "eq" +# Also testing numbers, and numbers as string in the JSON query +--- +engines: ["elasticsearch"] +params: + size: 0 +json: + query: + term: + actor.id: 1762355 +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + actor.id: "1762355" +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + actor.id: + value: 1762355 +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + actor.id: + value: "1762355" +expected: + hits: + total: + value: 1 + relation: "eq" +--- +# id is a text field +params: + size: 0 +json: + query: + term: + id: + value: "2549961272" +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + id: + value: 2549961272 +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + id: 2549961272 +expected: + hits: + total: + value: 1 + relation: "eq" +--- +params: + size: 0 +json: + query: + term: + id: "2549961272" +expected: + hits: + total: + value: 1 + relation: "eq" diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0009-bool_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0009-bool_query.yaml index a7db3d90c56..b45ed7b9817 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0009-bool_query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0009-bool_query.yaml @@ -107,6 +107,8 @@ expected: value: 100 --- # Support null values +# This format is not supported by Elasticsearch +engines: ["quickwit"] json: query: bool: diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0016-misc-query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0016-misc-query.yaml index c418b76bc76..212911e5d92 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0016-misc-query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0016-misc-query.yaml @@ -80,6 +80,7 @@ expected: total: value: 3 --- +engines: ["quickwit"] json: query: exists: @@ -93,7 +94,7 @@ expected: json: query: exists: - field: public + field: public expected: hits: total: