From e6a1acb6e67227ee90b7013ab479530c414bcc9a Mon Sep 17 00:00:00 2001 From: Richard BAYET Date: Tue, 8 Oct 2024 11:17:39 +0200 Subject: [PATCH] [Core] No edge ngram decomposition in termVectors To avoid false 'exact' matches on leading parts of a misspelled word. The logic is similar to the use of a distinct 'search_analyzer' on fields using 'standard_edge_ngram' as 'analyzer'. --- .../Search/Adapter/Elasticsuite/Spellchecker.php | 7 +++++++ .../Unit/Search/Adapter/Elasticsuite/SpellcheckerTest.php | 8 ++++++++ 2 files changed, 15 insertions(+) diff --git a/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Spellchecker.php b/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Spellchecker.php index d324ab5e9..a284a9628 100644 --- a/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Spellchecker.php +++ b/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Spellchecker.php @@ -164,6 +164,7 @@ private function getTermVectors(RequestInterface $request) MappingInterface::DEFAULT_SPELLING_FIELD => $request->getQueryText(), ], ]; + $perFieldAnalyzer = []; if ($request->isUsingReference()) { $doc['fields'][] = MappingInterface::DEFAULT_REFERENCE_FIELD . "." . FieldInterface::ANALYZER_REFERENCE; @@ -172,9 +173,15 @@ private function getTermVectors(RequestInterface $request) if ($request->isUsingEdgeNgram()) { $doc['fields'][] = MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM; + $perFieldAnalyzer[MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM] + = FieldInterface::ANALYZER_STANDARD; $doc['doc'][MappingInterface::DEFAULT_EDGE_NGRAM_FIELD] = $request->getQueryText(); } + if (!empty($perFieldAnalyzer)) { + $doc['per_field_analyzer'] = $perFieldAnalyzer; + } + $docs = []; // Compute the mtermvector query on all indices. diff --git a/src/module-elasticsuite-core/Test/Unit/Search/Adapter/Elasticsuite/SpellcheckerTest.php b/src/module-elasticsuite-core/Test/Unit/Search/Adapter/Elasticsuite/SpellcheckerTest.php index a6c5e1939..ce5da6713 100644 --- a/src/module-elasticsuite-core/Test/Unit/Search/Adapter/Elasticsuite/SpellcheckerTest.php +++ b/src/module-elasticsuite-core/Test/Unit/Search/Adapter/Elasticsuite/SpellcheckerTest.php @@ -184,6 +184,10 @@ public function testEdgeNgramTermVectorsParams() MappingInterface::DEFAULT_SEARCH_FIELD . "." . FieldInterface::ANALYZER_WHITESPACE, MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM, ], + 'per_field_analyzer' => [ + MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM + => FieldInterface::ANALYZER_STANDARD, + ], 'doc' => [ MappingInterface::DEFAULT_SEARCH_FIELD => $queryText, MappingInterface::DEFAULT_SPELLING_FIELD => $queryText, @@ -243,6 +247,10 @@ public function testReferenceAndEdgeNgramTermVectorsParams() MappingInterface::DEFAULT_REFERENCE_FIELD . "." . FieldInterface::ANALYZER_REFERENCE, MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM, ], + 'per_field_analyzer' => [ + MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM + => FieldInterface::ANALYZER_STANDARD, + ], 'doc' => [ MappingInterface::DEFAULT_SEARCH_FIELD => $queryText, MappingInterface::DEFAULT_SPELLING_FIELD => $queryText,