From ae6cfde22c95e1188ad8b9aa503e0f25cdaabdc9 Mon Sep 17 00:00:00 2001 From: Romain Ruaud Date: Tue, 25 Apr 2023 17:36:20 +0200 Subject: [PATCH] Improve MLT query. --- .../Request/Query/Builder/MoreLikeThis.php | 2 + .../Search/Request/Query/MoreLikeThis.php | 56 +++++++++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Request/Query/Builder/MoreLikeThis.php b/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Request/Query/Builder/MoreLikeThis.php index da88a5aea..0d2dbbb3f 100644 --- a/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Request/Query/Builder/MoreLikeThis.php +++ b/src/module-elasticsuite-core/Search/Adapter/Elasticsuite/Request/Query/Builder/MoreLikeThis.php @@ -45,6 +45,8 @@ public function buildQuery(QueryInterface $query) 'min_doc_freq' => $query->getMinDocFreq(), 'max_doc_freq' => $query->getMaxDocFreq(), 'max_query_terms' => $query->getMaxQueryTerms(), + 'min_word_length' => $query->getMinWordLength(), + 'max_word_length' => $query->getMaxWordLength(), 'include' => $query->includeOriginalDocs(), ]; diff --git a/src/module-elasticsuite-core/Search/Request/Query/MoreLikeThis.php b/src/module-elasticsuite-core/Search/Request/Query/MoreLikeThis.php index 3c5449729..812880682 100644 --- a/src/module-elasticsuite-core/Search/Request/Query/MoreLikeThis.php +++ b/src/module-elasticsuite-core/Search/Request/Query/MoreLikeThis.php @@ -53,7 +53,17 @@ class MoreLikeThis implements QueryInterface /** * @var integer */ - const DEFAULT_MAX_DOC_FREQ = 100; + const DEFAULT_MAX_DOC_FREQ = 2147483647; + + /** + * @var integer + */ + const DEFAULT_MIN_WORD_LENGTH = 0; + + /** + * @var integer + */ + const DEFAULT_MAX_WORD_LENGTH = 0; /** * @var string @@ -100,6 +110,16 @@ class MoreLikeThis implements QueryInterface */ private $maxDocFreq; + /** + * @var integer + */ + private $minWordLength; + + /** + * @var integer + */ + private $maxWordLength; + /** * @var integer */ @@ -124,6 +144,8 @@ class MoreLikeThis implements QueryInterface * @param integer $minDocFreq Minimum doc freq for a term to be considered. * @param integer $maxDocFreq Maximum doc freq for a term to be considered. * @param integer $maxQueryTerms Maximum number of term in generated queries. + * @param integer $minWordLength Minimum length of word to consider. + * @param integer $maxWordLength Maximum length of word to consider. * @param integer $includeOriginalDocs Include original doc in the result set. * @param string $name Query name. * @param integer $boost Query boost. @@ -137,6 +159,8 @@ public function __construct( $minDocFreq = self::DEFAULT_MIN_DOC_FREQ, $maxDocFreq = self::DEFAULT_MAX_DOC_FREQ, $maxQueryTerms = self::DEFAULT_MAX_QUERY_TERMS, + $minWordLength = self::DEFAULT_MIN_WORD_LENGTH, + $maxWordLength = self::DEFAULT_MAX_WORD_LENGTH, $includeOriginalDocs = false, $name = null, $boost = QueryInterface::DEFAULT_BOOST_VALUE @@ -152,6 +176,8 @@ public function __construct( $this->name = $name; $this->boost = $boost; $this->includeOriginalDocs = $includeOriginalDocs; + $this->minWordLength = $minWordLength; + $this->maxWordLength = $maxWordLength; } /** @@ -225,7 +251,7 @@ public function getBoostTerms() */ public function getMinTermFreq() { - return $this->minTermFreq; + return (int) $this->minTermFreq; } /** @@ -235,7 +261,7 @@ public function getMinTermFreq() */ public function getMinDocFreq() { - return $this->minDocFreq; + return (int) $this->minDocFreq; } /** @@ -245,7 +271,7 @@ public function getMinDocFreq() */ public function getMaxDocFreq() { - return $this->maxDocFreq; + return (int) $this->maxDocFreq; } /** @@ -255,7 +281,7 @@ public function getMaxDocFreq() */ public function getMaxQueryTerms() { - return $this->maxQueryTerms; + return (int) $this->maxQueryTerms; } /** @@ -267,4 +293,24 @@ public function includeOriginalDocs() { return $this->includeOriginalDocs; } + + /** + * Minimum doc freq for a term to be considered. + * + * @return integer + */ + public function getMinWordLength() + { + return (int) $this->minWordLength; + } + + /** + * Maximum doc freq for a term to be considered. + * + * @return integer + */ + public function getMaxWordLength() + { + return (int) $this->maxWordLength; + } }