Skip to content

Commit

Permalink
feat: support lucene query converter support search with long value o…
Browse files Browse the repository at this point in the history
…n string field
  • Loading branch information
QuadStingray committed Mar 4, 2024
1 parent 3df7127 commit 9e73ce1
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import scala.jdk.CollectionConverters._

object LuceneQueryConverter extends LazyLogging {

def toDocument(query: Query): Bson = {
getMongoDbSearchMap(query, false)
def toDocument(query: Query, searchWithValueAndString: Boolean = false): Bson = {
getMongoDbSearchMap(query, negated = false, searchWithValueAndString)
}

def parse(queryString: String, defaultField: String): Query = {
Expand All @@ -30,59 +30,59 @@ object LuceneQueryConverter extends LazyLogging {
query
}

private def getMongoDbSearchMap(query: Query, negated: Boolean): Map[String, Any] = {
private def getMongoDbSearchMap(query: Query, negated: Boolean, searchWithValueAndString: Boolean): Map[String, Any] = {
val searchMapResponse = mutable.Map[String, Any]()
query match {
case booleanQuery: BooleanQuery =>
appendBooleanQueryToSearchMap(searchMapResponse, booleanQuery)
case termRangeQuery: TermRangeQuery =>
appendTermRangeQueryToSearchMap(negated, searchMapResponse, termRangeQuery)
case termQuery: TermQuery =>
appendTermQueryToSearchMap(negated, searchMapResponse, termQuery)
case query: PrefixQuery =>
appendPrefixQueryToSearchMap(negated, searchMapResponse, query)
case query: WildcardQuery =>
appendWildCardQueryToSearchMap(negated, searchMapResponse, query)
case query: PhraseQuery =>
appendPhraseQueryToSearchMap(negated, searchMapResponse, query)
case booleanQuery: BooleanQuery => appendBooleanQueryToSearchMap(searchMapResponse, booleanQuery, searchWithValueAndString)
case termRangeQuery: TermRangeQuery => appendTermRangeQueryToSearchMap(negated, searchMapResponse, termRangeQuery, searchWithValueAndString)
case termQuery: TermQuery => appendTermQueryToSearchMap(negated, searchMapResponse, termQuery, searchWithValueAndString)
case query: PrefixQuery => appendPrefixQueryToSearchMap(negated, searchMapResponse, query)
case query: WildcardQuery => appendWildCardQueryToSearchMap(negated, searchMapResponse, query)
case query: PhraseQuery => appendPhraseQueryToSearchMap(negated, searchMapResponse, query)
case a: Any =>
logger.error(s"Unexpected QueryType <${a.getClass.getSimpleName}>")
val simpleNameOption = Option(a.getClass.getSimpleName).filterNot(s => s.trim.equalsIgnoreCase(""))
if (simpleNameOption.isDefined) {
logger.error(s"Unexpected QueryType <${a.getClass.getSimpleName}>")
}
}
searchMapResponse.toMap

}
private def appendBooleanQueryToSearchMap(searchMapResponse: mutable.Map[String, Any], booleanQuery: BooleanQuery): Unit = {

private def appendBooleanQueryToSearchMap(
searchMapResponse: mutable.Map[String, Any],
booleanQuery: BooleanQuery,
searchWithValueAndString: Boolean
): Unit = {
val subQueries = booleanQuery.clauses().asScala
val listOfAnd = ArrayBuffer[Map[String, Any]]()
val listOfOr = ArrayBuffer[Map[String, Any]]()
var nextTypeAnd = true
subQueries
.foreach(c => {
val queryMap = getMongoDbSearchMap(c.getQuery, c.isProhibited)
var thisTypeAnd = true
subQueries.foreach(c => {
val queryMap = getMongoDbSearchMap(c.getQuery, c.isProhibited, searchWithValueAndString)
var thisTypeAnd = true

if (c.getOccur == Occur.MUST) {
thisTypeAnd = true
}
else if (c.getOccur == Occur.SHOULD) {
thisTypeAnd = false
}
else if (c.getOccur == Occur.MUST_NOT) {
// searchMapResponse ++= queryMap
}
else {
logger.error(s"Unexpected Occur <${c.getOccur.name()}>")
throw new NotSupportedException(s"${c.getOccur.name()} currently not supported")
}
if (c.getOccur == Occur.MUST) {
thisTypeAnd = true
}
else if (c.getOccur == Occur.SHOULD) {
thisTypeAnd = false
}
else if (c.getOccur == Occur.MUST_NOT) {
// searchMapResponse ++= queryMap
}
else {
logger.error(s"Unexpected Occur <${c.getOccur.name()}>")
throw new NotSupportedException(s"${c.getOccur.name()} currently not supported")
}

if (nextTypeAnd && thisTypeAnd) {
listOfAnd += queryMap
}
else {
listOfOr += queryMap
}
nextTypeAnd = thisTypeAnd
})
if (nextTypeAnd && thisTypeAnd) {
listOfAnd += queryMap
}
else {
listOfOr += queryMap
}
nextTypeAnd = thisTypeAnd
})

if (listOfAnd.nonEmpty) {
searchMapResponse.put("$and", listOfAnd.toList)
Expand All @@ -91,25 +91,77 @@ object LuceneQueryConverter extends LazyLogging {
searchMapResponse.put("$or", listOfOr.toList)
}
}
private def appendTermRangeQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termRangeQuery: TermRangeQuery): Unit = {
val lowerBound = checkAndConvertValue(new String(termRangeQuery.getLowerTerm.bytes))
val upperBound = checkAndConvertValue(new String(termRangeQuery.getUpperTerm.bytes))
val inRangeSearch = Map("$lte" -> upperBound, "$gte" -> lowerBound)

private def appendTermRangeQueryToSearchMap(
negated: Boolean,
searchMapResponse: mutable.Map[String, Any],
termRangeQuery: TermRangeQuery,
searchWithValueAndString: Boolean
): Unit = {
val lowerBoundString = new String(termRangeQuery.getLowerTerm.bytes)
val lowerBound = checkAndConvertValue(lowerBoundString)
val upperBoundString = new String(termRangeQuery.getUpperTerm.bytes)
val upperBound = checkAndConvertValue(upperBoundString)

val searchWithStringValue = searchWithValueAndString && (lowerBoundString != lowerBound || upperBoundString != upperBound)

val inRangeSearch = Map("$lte" -> upperBound, "$gte" -> lowerBound)
val inRangeStringSearch = Map("$lte" -> upperBoundString, "$gte" -> lowerBoundString)
if (negated) {
searchMapResponse.put(termRangeQuery.getField, Map("$not" -> inRangeSearch))
if (searchWithStringValue) {
searchMapResponse.put(
"$and",
List(Map(termRangeQuery.getField -> Map("$not" -> inRangeSearch)), Map(termRangeQuery.getField -> Map("$not" -> inRangeStringSearch)))
)
}
else {
searchMapResponse.put(termRangeQuery.getField, Map("$not" -> inRangeSearch))
}
}
else {
searchMapResponse.put(termRangeQuery.getField, inRangeSearch)
if (searchWithStringValue) {
searchMapResponse.put(
"$or",
List(Map(termRangeQuery.getField -> inRangeSearch), Map(termRangeQuery.getField -> inRangeStringSearch))
)
}
else {
searchMapResponse.put(termRangeQuery.getField, inRangeSearch)
}
}
}
private def appendTermQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], termQuery: TermQuery): Unit = {

private def appendTermQueryToSearchMap(
negated: Boolean,
searchMapResponse: mutable.Map[String, Any],
termQuery: TermQuery,
searchWithValueAndString: Boolean
): Unit = {
val convertedValue = checkAndConvertValue(termQuery.getTerm.text())
if (negated) {
searchMapResponse.put(termQuery.getTerm.field(), Map("$ne" -> checkAndConvertValue(termQuery.getTerm.text())))
if (!searchWithValueAndString || convertedValue == termQuery.getTerm.text()) {
searchMapResponse.put(termQuery.getTerm.field(), Map("$ne" -> convertedValue))
}
else {
searchMapResponse.put(
"$and",
List(Map(termQuery.getTerm.field() -> Map("$ne" -> convertedValue)), Map(termQuery.getTerm.field() -> Map("$ne" -> termQuery.getTerm.text())))
)
}
}
else {
searchMapResponse.put(termQuery.getTerm.field(), Map("$eq" -> checkAndConvertValue(termQuery.getTerm.text())))
if (!searchWithValueAndString || convertedValue == termQuery.getTerm.text()) {
searchMapResponse.put(termQuery.getTerm.field(), Map("$eq" -> convertedValue))
}
else {
searchMapResponse.put(
"$or",
List(Map(termQuery.getTerm.field() -> Map("$eq" -> convertedValue)), Map(termQuery.getTerm.field() -> Map("$eq" -> termQuery.getTerm.text())))
)
}
}
}

private def appendPrefixQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PrefixQuery): Unit = {
val searchValue = s"${checkAndConvertValue(query.getPrefix.text())}(.*?)"
val listOfSearches: List[Bson] = List(Map(query.getField -> generateRegexQuery(s"$searchValue", "i")))
Expand All @@ -120,6 +172,7 @@ object LuceneQueryConverter extends LazyLogging {
searchMapResponse ++= Map("$and" -> listOfSearches)
}
}

private def appendWildCardQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: WildcardQuery): Unit = {
val searchValue = checkAndConvertValue(query.getTerm.text().replace("*", "(.*?)"))
if (negated) {
Expand All @@ -129,20 +182,21 @@ object LuceneQueryConverter extends LazyLogging {
searchMapResponse.put(query.getField, generateRegexQuery(s"$searchValue", "i"))
}
}

private def appendPhraseQueryToSearchMap(negated: Boolean, searchMapResponse: mutable.Map[String, Any], query: PhraseQuery): Unit = {
val listOfSearches = query.getTerms
.map(term => Map(term.field() -> generateRegexQuery(s"(.*?)${checkAndConvertValue(term.text())}(.*?)", "i")))
.toList
val listOfSearches = query.getTerms.map(term => Map(term.field() -> generateRegexQuery(s"(.*?)${checkAndConvertValue(term.text())}(.*?)", "i"))).toList
if (negated) {
searchMapResponse.put("$nor", listOfSearches)
}
else {
searchMapResponse ++= Map("$and" -> listOfSearches)
}
}

private def generateRegexQuery(pattern: String, options: String): Map[String, String] = {
Map("$regex" -> pattern, "$options" -> options)
}

private def checkAndConvertValue(s: String): Any = {

def checkOrReturn[A <: Any](f: () => A): Option[A] = {
Expand All @@ -156,7 +210,7 @@ object LuceneQueryConverter extends LazyLogging {
}
}
catch {
case e: Exception => None
case _: Exception => None
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@ class MongoCampLuceneAnalyzer(stopWords: CharArraySet = CharArraySet.EMPTY_SET,
override protected def createComponents(fieldName: String): Analyzer.TokenStreamComponents = {
val src = new StandardTokenizer
src.setMaxTokenLength(maxTokenLength)
val tok: TokenStream = new StopFilter(src, stopwords)
val tok: TokenStream = new StopFilter(src, stopWords)
new Analyzer.TokenStreamComponents(
(r: Reader) => {
src.setMaxTokenLength(maxTokenLength)
src.setReader(r)

},
tok
)
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/json/people.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{ "_id" : { "$oid" : "5e9ef66185c0145fa5d3c447" }, "id" : { "$numberLong" : "0" }, "guid" : "a17be99a-8913-4bb6-8f14-16d4fa1b3559", "isActive" : true, "balance" : 3349.0, "picture" : "http://placehold.it/32x32", "age" : 25, "name" : "Cheryl Hoffman", "gender" : "female", "email" : "cherylhoffman@melbacor.com", "phone" : "+1 (942) 477-2284", "address" : "308 Just Court, Rose, Maine, 4477", "about" : "Adipisicing aliquip deserunt mollit veniam. Quis mollit cupidatat laboris dolor incididunt esse voluptate amet aute. Sit labore magna minim ex aliquip do labore ullamco labore labore.\r\n", "registered" : { "$date" : "2014-02-08T02:10:36.000+0000" }, "tags" : [ "esse", "sunt", "exercitation", "nostrud", "aliqua", "voluptate", "ullamco" ], "friends" : [ { "id" : { "$numberLong" : "0" }, "name" : "Castaneda Mccullough" }, { "id" : { "$numberLong" : "1" }, "name" : "Black Whitaker" }, { "id" : { "$numberLong" : "2" }, "name" : "Wendy Strong" }, { "id" : { "$numberLong" : "3" }, "name" : "Dixie Reilly" }, { "id" : { "$numberLong" : "4" }, "name" : "Deleon Turner" } ], "greeting" : "Hello, Cheryl Hoffman! You have 9 unread messages.", "favoriteFruit" : "apple" }
{ "_id" : { "$oid" : "5e9ef66185c0145fa5d3c447" }, "id" : { "$numberLong" : "0" }, "guid" : "a17be99a-8913-4bb6-8f14-16d4fa1b3559", "isActive" : true, "balance" : 3349.0, "picture" : "http://placehold.it/32x32", "age" : 25, "name" : "Cheryl Hoffman", "gender" : "female", "email" : "cherylhoffman@melbacor.com", "phone" : "+1 (942) 477-2284", "address" : "308 Just Court, Rose, Maine, 4477", "about" : "Adipisicing aliquip deserunt mollit veniam. Quis mollit cupidatat laboris dolor incididunt esse voluptate amet aute. Sit labore magna minim ex aliquip do labore ullamco labore labore.\r\n", "registered" : { "$date" : "2014-02-08T02:10:36.000+0000" }, "tags" : [ "esse", "sunt", "exercitation", "nostrud", "aliqua", "voluptate", "ullamco" ], "friends" : [ { "id" : { "$numberLong" : "0" }, "name" : "Castaneda Mccullough" }, { "id" : { "$numberLong" : "1" }, "name" : "Black Whitaker" }, { "id" : { "$numberLong" : "2" }, "name" : "Wendy Strong" }, { "id" : { "$numberLong" : "3" }, "name" : "Dixie Reilly" }, { "id" : { "$numberLong" : "4" }, "name" : "Deleon Turner" } ], "greeting" : "Hello, Cheryl Hoffman! You have 9 unread messages.", "favoriteFruit" : "apple", "stringNumber": "123" }
{ "_id" : { "$oid" : "5e9ef66185c0145fa5d3c448" }, "id" : { "$numberLong" : "1" }, "guid" : "19ebe4fe-f860-4cbc-ac0a-664a418e2173", "isActive" : true, "balance" : 2316.0, "picture" : "http://placehold.it/32x32", "age" : 25, "name" : "Bowen Leon", "gender" : "male", "email" : "bowenleon@inrt.com", "phone" : "+1 (904) 457-2017", "address" : "138 Miami Court, Urbana, Kansas, 1034", "about" : "Commodo in mollit laboris incididunt excepteur nulla cillum sunt do occaecat Lorem. Excepteur esse id magna pariatur irure anim officia exercitation veniam anim dolor. Sunt irure est dolore nisi nulla nulla. Nostrud aliquip exercitation ut adipisicing esse ullamco incididunt mollit laborum duis exercitation. Ipsum commodo excepteur nulla sit irure laboris magna ipsum Lorem.\r\n", "registered" : { "$date" : "2014-01-26T16:08:40.000+0000" }, "tags" : [ "ipsum", "qui", "proident", "sunt", "cillum", "veniam", "laboris" ], "friends" : [ { "id" : { "$numberLong" : "0" }, "name" : "Reyes Velasquez" }, { "id" : { "$numberLong" : "1" }, "name" : "Rosalie Hooper" }, { "id" : { "$numberLong" : "2" }, "name" : "Alyssa David" } ], "greeting" : "Hello, Bowen Leon! You have 9 unread messages.", "favoriteFruit" : "apple" }
{ "_id" : { "$oid" : "5e9ef66185c0145fa5d3c449" }, "id" : { "$numberLong" : "2" }, "guid" : "6ee53e07-2e61-48cd-9bc9-b3505a0438f3", "isActive" : false, "balance" : 1527.0, "picture" : "http://placehold.it/32x32", "age" : 40, "name" : "Cecilia Lynn", "gender" : "female", "email" : "cecilialynn@medicroix.com", "phone" : "+1 (875) 525-3138", "address" : "124 Herzl Street, Greenwich, Arkansas, 5309", "about" : "Esse adipisicing ipsum esse consectetur eu ad sunt sit culpa enim velit elit velit deserunt. Aliqua nulla et laboris nulla aute excepteur Lorem. Ut aliquip non excepteur exercitation consectetur anim est ex irure dolore ut. Consequat enim enim dolor excepteur mollit consectetur. Magna sunt reprehenderit est quis.\r\n", "registered" : { "$date" : "2014-02-21T23:13:05.000+0000" }, "tags" : [ "eiusmod", "minim", "magna", "est", "laborum", "nisi", "qui" ], "friends" : [ { "id" : { "$numberLong" : "0" }, "name" : "Erika Harmon" }, { "id" : { "$numberLong" : "1" }, "name" : "Horn Larsen" }, { "id" : { "$numberLong" : "2" }, "name" : "Gertrude Fuller" }, { "id" : { "$numberLong" : "3" }, "name" : "Spencer Hutchinson" }, { "id" : { "$numberLong" : "4" }, "name" : "Beryl Buckley" } ], "greeting" : "Hello, Cecilia Lynn! You have 7 unread messages.", "favoriteFruit" : "strawberry" }
{ "_id" : { "$oid" : "5e9ef66185c0145fa5d3c44a" }, "id" : { "$numberLong" : "3" }, "guid" : "a01c8bb6-95ac-4235-b6b3-475734f0dd92", "isActive" : false, "balance" : 2682.0, "picture" : "http://placehold.it/32x32", "age" : 24, "name" : "Sylvia Ortega", "gender" : "female", "email" : "sylviaortega@viagrand.com", "phone" : "+1 (983) 470-3157", "address" : "617 Vernon Avenue, Advance, Connecticut, 7787", "about" : "Tempor aliquip dolor excepteur proident ex magna commodo laboris. Ullamco ex esse excepteur nostrud. Duis ex anim pariatur dolore ut irure. Consequat non Lorem laborum esse anim magna consequat voluptate dolor elit. Mollit sint consequat ipsum minim id anim aute reprehenderit eu velit voluptate commodo.\r\n", "registered" : { "$date" : "2014-01-13T07:33:15.000+0000" }, "tags" : [ "ut", "culpa", "reprehenderit", "ad", "amet", "officia", "nostrud" ], "friends" : [ { "id" : { "$numberLong" : "0" }, "name" : "Ferrell Rhodes" }, { "id" : { "$numberLong" : "1" }, "name" : "Ana Guy" }, { "id" : { "$numberLong" : "2" }, "name" : "Rosanne Griffin" }, { "id" : { "$numberLong" : "3" }, "name" : "Morrow Adams" }, { "id" : { "$numberLong" : "4" }, "name" : "Keri White" }, { "id" : { "$numberLong" : "5" }, "name" : "Tracey Sykes" } ], "greeting" : "Hello, Sylvia Ortega! You have 9 unread messages.", "favoriteFruit" : "apple" }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class PersonDAOSpec extends PersonSpecification with MongoImplicits {
}

"support columnNames" in {
val columnNames = PersonDAO.columnNames(100)
columnNames.size mustEqual 18
val columnNames = PersonDAO.columnNames(200)
columnNames.size mustEqual 19
}

"support results" in {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,12 @@ package dev.mongocamp.driver.mongodb.gridfs

import better.files.File
import dev.mongocamp.driver.mongodb._
import dev.mongocamp.driver.mongodb.test.TestDatabase._
import dev.mongocamp.driver.mongodb.model.ImageMetadata
import dev.mongocamp.driver.mongodb.test.TestDatabase._
import org.bson.types.ObjectId
import org.specs2.mutable.Specification
import org.specs2.specification.BeforeAll

import scala.io.Source

class GridFSDatabaseSpec extends Specification with GridfsDatabaseFunctions with BeforeAll {

"GridFSDatabase" should {
Expand Down Expand Up @@ -58,7 +56,7 @@ class GridFSDatabaseSpec extends Specification with GridfsDatabaseFunctions with
files.head.getMetadata.get("name").toString must be equalTo "logo2"

// update complete metadata for one file
updateMetadata(files.head, ImageMetadata("logo22", group = "logos"))
updateMetadata(files.head, ImageMetadata("logo22"))
// update metadata entry for all files
updateMetadataElements(Map(), Map("group" -> "logos3", "newKey" -> "newEntryValue"))

Expand All @@ -80,7 +78,7 @@ class GridFSDatabaseSpec extends Specification with GridfsDatabaseFunctions with
}

override def beforeAll(): Unit = {
dropImages
dropImages()
insertImage(ImageDAOSourcePath + "scala-logo.jpg", ImageMetadata("logo2", indexSet = Set(5, 6, 7)))
imagesCount must be equalTo 1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ class LuceneSearchSpec extends PersonSpecification {

"LuceneSearch" should {

"search with with number in string" in {
val luceneQuery = LuceneQueryConverter.parse("stringNumber: 123", "id")
val search2 = PersonDAO.find(LuceneQueryConverter.toDocument(luceneQuery), sortByBalance).resultList()
search2 must haveSize(0)
val search = PersonDAO.find(LuceneQueryConverter.toDocument(luceneQuery, searchWithValueAndString = true), sortByBalance).resultList()
search must haveSize(1)
search.head.age mustEqual 25
search.head.name mustEqual "Cheryl Hoffman"
}

"search with extended query" in {
val luceneQuery = LuceneQueryConverter.parse("(favoriteFruit:\"apple\" AND age:\"25\") OR name:*Cecile* AND -active:false AND 123", "id")
// #region lucene-parser-with-explicit
Expand Down
Loading

0 comments on commit 9e73ce1

Please sign in to comment.