Skip to content

Commit

Permalink
[Core] No edge ngram decomposition in termVectors
Browse files Browse the repository at this point in the history
To avoid false 'exact' matches on leading parts of a misspelled word.
The logic is similar to the use of a distinct 'search_analyzer' on
fields using 'standard_edge_ngram' as 'analyzer'.
  • Loading branch information
rbayet committed Oct 8, 2024
1 parent b79e5dc commit e6a1acb
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ private function getTermVectors(RequestInterface $request)
MappingInterface::DEFAULT_SPELLING_FIELD => $request->getQueryText(),
],
];
$perFieldAnalyzer = [];

if ($request->isUsingReference()) {
$doc['fields'][] = MappingInterface::DEFAULT_REFERENCE_FIELD . "." . FieldInterface::ANALYZER_REFERENCE;
Expand All @@ -172,9 +173,15 @@ private function getTermVectors(RequestInterface $request)

if ($request->isUsingEdgeNgram()) {
$doc['fields'][] = MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM;
$perFieldAnalyzer[MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM]
= FieldInterface::ANALYZER_STANDARD;
$doc['doc'][MappingInterface::DEFAULT_EDGE_NGRAM_FIELD] = $request->getQueryText();
}

if (!empty($perFieldAnalyzer)) {
$doc['per_field_analyzer'] = $perFieldAnalyzer;
}

$docs = [];

// Compute the mtermvector query on all indices.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,10 @@ public function testEdgeNgramTermVectorsParams()
MappingInterface::DEFAULT_SEARCH_FIELD . "." . FieldInterface::ANALYZER_WHITESPACE,
MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM,
],
'per_field_analyzer' => [
MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM
=> FieldInterface::ANALYZER_STANDARD,
],
'doc' => [
MappingInterface::DEFAULT_SEARCH_FIELD => $queryText,
MappingInterface::DEFAULT_SPELLING_FIELD => $queryText,
Expand Down Expand Up @@ -243,6 +247,10 @@ public function testReferenceAndEdgeNgramTermVectorsParams()
MappingInterface::DEFAULT_REFERENCE_FIELD . "." . FieldInterface::ANALYZER_REFERENCE,
MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM,
],
'per_field_analyzer' => [
MappingInterface::DEFAULT_EDGE_NGRAM_FIELD . "." . FieldInterface::ANALYZER_EDGE_NGRAM
=> FieldInterface::ANALYZER_STANDARD,
],
'doc' => [
MappingInterface::DEFAULT_SEARCH_FIELD => $queryText,
MappingInterface::DEFAULT_SPELLING_FIELD => $queryText,
Expand Down

0 comments on commit e6a1acb

Please sign in to comment.