From 39a58cfbaa531abc60a5c21ff4305b10de544470 Mon Sep 17 00:00:00 2001 From: Sam Pillsworth Date: Fri, 15 Sep 2023 14:53:45 -0400 Subject: [PATCH 1/5] doc strings for Parser.scala --- .../scala/pink/cozydev/lucille/Parser.scala | 103 +++++++++++------- 1 file changed, 63 insertions(+), 40 deletions(-) diff --git a/core/src/main/scala/pink/cozydev/lucille/Parser.scala b/core/src/main/scala/pink/cozydev/lucille/Parser.scala index ffecca3..607ae97 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Parser.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Parser.scala @@ -38,85 +38,100 @@ object Parser { P.charIn(baseRange -- special) val reserved = Set("OR", "||", "AND", "&&", "NOT", "+", "-", "/") - // Term query - // e.g. 'cat', 'catch22' val term: P[String] = P.not(P.stringIn(reserved)).with1 *> allowed.rep.string + + /** Parse a term query + * e.g. 'cat', 'catch22' + */ val termQ: P[Term] = term.map(Term.apply) - // Phrase query - // e.g. 'the cat jumped' val phrase: P[String] = (term ~ sp.?).rep.string.surroundedBy(dquote) + + /** Parse a phrase query + * e.g. 'the cat jumped' + */ val phraseQ: P[Phrase] = phrase.map(Phrase.apply) - // Proximity query - // e.g. '"cat jumped"~3', '"one two three"~2' val proxSoft: P[String] = phrase.soft <* pchar('~') + + /** Parse a proximity query + * e.g. '"cat jumped"~3', '"one two three"~2' + */ val proximityQ: P[Proximity] = (proxSoft ~ int).map { case (p, n) => Proximity(p, n) } - // Fuzzy term - // e.g. 'cat~', 'cat~2' val fuzzySoft: P[String] = term.soft <* pchar('~') + + /** Parse a fuzzy term query + * e.g. 'cat~', 'cat~t' + */ val fuzzyT: P[Fuzzy] = (fuzzySoft ~ int.?).map { case (q, n) => Fuzzy(q, n) } - // Prefix term - // e.g. 'jump*' + /** Parse a prefix term query + * e.g. 'jump*' + */ val prefixT: P[Prefix] = (term.soft <* P.char('*')) .map(Prefix.apply) - // TermRegex query - // e.g. '/.ump(s|ing)/' private val regex: P[String] = { val notEscape = P.charIn(baseRange - '\\' - '/').void val rStr = notEscape.orElse(P.char('\\') *> P.char('/')).rep.string rStr.surroundedBy(pchar('/')) } + + /** Parse a regex query + * e.g. '/.ump(s|ing)' + */ val regexQ: P[TermRegex] = regex.map(TermRegex.apply) val or = (P.string("OR") | P.string("||")).as(Op.OR) val and = (P.string("AND") | P.string("&&")).as(Op.AND) val infixOp = (or | and).withContext("infixOp") - // given " OR term1 OR term2$" - // parses completely - // given " OR term1 OR term2 extra$" - // parses until the end of 'term2', with 'extra' being left + /** Parse a suffix op query + * e.g. 'OR term1 OR term2$' parses completely + * however 'OR term1 OR term2 extra$' parses until the end of 'term2', with 'extra' being left + */ def suffixOps(query: P[Query]): Parser0[List[(Op, Query)]] = ((maybeSpace.with1 *> infixOp <* sp.rep) ~ query) .repUntil0(maybeSpace *> (P.end | query)) - // parse simple queries followed by suffix op queries - // "q0 q1 OR q2" + /** Parse simple queries followed by suffix op queries + * e.g. 'q0 q1 OR q2' + */ def qWithSuffixOps(query: P[Query]): P[NonEmptyList[Query]] = (query.repSep(sp.rep) ~ suffixOps(query)) .map { case (h, t) => Op.associateOps(h, t) } - // parse a whole list of queries - // "q0 q1 OR q2 q3" - // we repeat so that we can parse q3 - // the first iteration only gets "qp q1 OR q2" + /** Parse a whole list of queries + * e.g. 'q0 q1 OR q2 q3' + * Parsing repeats so that we can parse q3, the first iteration only gets 'q0 q1 OR q2' + */ def nonGrouped(query: P[Query]): P[NonEmptyList[Query]] = (maybeSpace.with1 *> qWithSuffixOps(query)).repUntil(maybeSpace ~ P.end).map(_.flatten) - // Not query - // e.g. 'animals NOT (cats AND dogs)' + /** Parse a not query + * e.g. 'animals NOT (cats AND dogs)' + */ def notQ(query: P[Query]): P[Query] = ((P.string("NOT").soft ~ maybeSpace) *> query).map(Not.apply) - // Minimum match query - // e.g. '(one two three)@2' + /** Parse a minimum match query + * e.g. '(one two three)@2' + */ def minimumMatchQ(query: P[Query]): P[MinimumMatch] = { val matchNum = P.char('@') *> int val grouped = nonGrouped(query).between(P.char('('), P.char(')')) (grouped.soft ~ matchNum).map { case (qs, n) => MinimumMatch(qs, n) } } - // Group query - // e.g. '(cats AND dogs)' + /** Parse a group query + * e.g. '(cats AND dogs)' + */ def groupQ(query: P[Query]): P[Group] = { val g = nonGrouped(query) .between(P.char('('), P.char(')')) @@ -124,24 +139,28 @@ object Parser { (g <* P.not(endOfGroupSpecial)).map(Group.apply) } - // Field query - // e.g. 'title:cats', 'author:"Silly Goose"', 'title:(cats AND dogs)' + /** Parse a field query + * e.g. 'title:cats', 'author:"Silly Goose"', 'title:(cats AND dogs)' + */ val fieldValueSoft: P[String] = term.soft <* pchar(':') def fieldQuery(query: P[Query]): P[Field] = (fieldValueSoft ~ query).map { case (f, q) => Field(f, q) } - // Unary Plus query - // e.g. '+cat', '+(cats AND dogs)' + /** Parse a unary plus query + * e.g. '+cat', '+(cats AND dogs)' + */ def unaryPlus(query: P[Query]): P[UnaryPlus] = P.char('+') *> query.map(UnaryPlus.apply) - // Unary Minus query - // e.g. 'cat', '-(cats AND dogs)' + /** Parse a unary minus query + * e.g. 'cat'*, '-(cats AND dogs)' + */ def unaryMinus(query: P[Query]): P[UnaryMinus] = P.char('-') *> query.map(UnaryMinus.apply) - // TermRange query - // e.g. '{cats TO dogs}', '[1 TO *}' + /** Parse a term range query + * e.g. '{cats TO dogs}, '[1 TO *}'' + */ def rangeQuery: P[TermRange] = { val inclLower = P.charIn('{', '[').map(lowerBound => lowerBound == '[') <* maybeSpace val inclUpper = maybeSpace *> P.charIn('}', ']').map(upperBound => upperBound == ']') @@ -155,9 +174,10 @@ object Parser { } } - // Tie compound queries together recursively - // Order is very important here - // prefixT before termQ + /** Recursively parse compound queries + * The order is very important: + * - prefixT must come before termQ + */ val recursiveQ: P[Query] = P.recursive[Query](r => P.oneOf( List( @@ -178,9 +198,12 @@ object Parser { ) ) - // One or more queries implicitly grouped together in a list + /** Parse one or more queries implicitly grouped together in a list + */ val fullQuery = nonGrouped(recursiveQ) <* maybeSpace + /** Attempt to parse a whole string representing a Lucene query + */ def parseQ(s: String): Either[cats.parse.Parser.Error, MultiQuery] = fullQuery.parseAll(s).map(MultiQuery.apply) } From e75f1d3206145570733832488ddec2f558fc1a97 Mon Sep 17 00:00:00 2001 From: Sam Pillsworth Date: Fri, 22 Sep 2023 11:49:11 -0400 Subject: [PATCH 2/5] query doc strings --- .../scala/pink/cozydev/lucille/Parser.scala | 2 +- .../scala/pink/cozydev/lucille/Query.scala | 73 +++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/pink/cozydev/lucille/Parser.scala b/core/src/main/scala/pink/cozydev/lucille/Parser.scala index 607ae97..30bb03f 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Parser.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Parser.scala @@ -64,7 +64,7 @@ object Parser { val fuzzySoft: P[String] = term.soft <* pchar('~') /** Parse a fuzzy term query - * e.g. 'cat~', 'cat~t' + * e.g. 'cat~', 'cat~1' */ val fuzzyT: P[Fuzzy] = (fuzzySoft ~ int.?).map { case (q, n) => Fuzzy(q, n) diff --git a/core/src/main/scala/pink/cozydev/lucille/Query.scala b/core/src/main/scala/pink/cozydev/lucille/Query.scala index baefdc5..54d550d 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Query.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Query.scala @@ -17,7 +17,9 @@ package pink.cozydev.lucille import cats.data.NonEmptyList +import scala.annotation.meta.field +/** A trait for all queries */ sealed trait Query extends Product with Serializable { /** Builds a new query by applying a `Term => Query` function to a Term if it is in the last position. @@ -28,11 +30,13 @@ sealed trait Query extends Product with Serializable { def mapLastTerm(f: Query.Term => Query): Query } +/** A trait for all leaf node queries (meaning that they do not contain queries) */ sealed trait TermQuery extends Query { // noop for everything except Query.Term def mapLastTerm(f: Query.Term => Query): Query = this } +/** A trait for a list of one or more queries */ final case class MultiQuery(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): MultiQuery = { @@ -50,15 +54,49 @@ object MultiQuery { } object Query { + + /** A term query + * e.g. 'cat', 'catch22' + */ final case class Term(str: String) extends TermQuery { override def mapLastTerm(f: Query.Term => Query): Query = f(this) } + + /** A phrase query + * e.g. 'the cat jumped' + */ final case class Phrase(str: String) extends TermQuery + + /** A prefix query + * Search for words starting with the given prefix + * e.g. 'jump*' + */ final case class Prefix(str: String) extends TermQuery + + /** A proximity query + * Search for words within a specified word distance + * e.g. '"cat jumped"~3', '"one two three"~2' + */ final case class Proximity(str: String, num: Int) extends TermQuery + + /** A fuzzy query + * Search for words within a Damerau-Levenshtein Distance + * The additional parameter, between 0 and 2, specifies the number of edits allowed (defaults to 2) + * e.g. 'cat~', 'cat~1' + */ final case class Fuzzy(str: String, num: Option[Int]) extends TermQuery + + /** A regex query + * Search with a regular expression, the pattern is given between forward slashes, `/`. + * e.g. '/.ump(s|ing)' + */ final case class TermRegex(str: String) extends TermQuery + + /** A range query + * Search for terms that fall between some upper and lower bounds. The bounds can be inclusive or exclusive. + * e.g. '{cats TO dogs}', '[1 TO *]' + */ final case class TermRange( lower: Option[String], upper: Option[String], @@ -66,6 +104,10 @@ object Query { upperInc: Boolean, ) extends TermQuery + /** An Or operator + * Join the given queries with OR, the equivalent of taking the union of the results of each query + * e.g. 'q1 OR q2' + */ final case class Or(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): Or = Or(rewriteLastTerm(qs, f)) @@ -75,6 +117,10 @@ object Query { Or(NonEmptyList(head, tail.toList)) } + /** An And operator + * Join the given queries with AND, the equivalent of taking the intersection of the results of each query + * e.g. 'q1 AND q2' + */ final case class And(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): And = And(rewriteLastTerm(qs, f)) @@ -84,11 +130,19 @@ object Query { And(NonEmptyList(head, tail.toList)) } + /** A Not operator + * Exclude terms that would match the given query + * e.g. 'NOT cats' + */ final case class Not(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): Not = Not(q.mapLastTerm(f)) } + /** A group query + * Queries grouped together with parentheses + * e.g. '(cats AND dogs)' + */ final case class Group(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): Group = this } @@ -97,17 +151,36 @@ object Query { Group(NonEmptyList(head, tail.toList)) } + /** A unary plus query + * Search for documents which must contain the given query + * e.g. '+cat', '+(cats AND dogs)' + */ final case class UnaryPlus(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): UnaryPlus = UnaryPlus(q.mapLastTerm(f)) } + + /** A unary minus query + * Search for documents which must not contain the given query + * e.g. '-cat', '-(cats AND dogs)' + */ final case class UnaryMinus(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): UnaryMinus = UnaryMinus(q.mapLastTerm(f)) } + + /** A minimum match query + * Search for documents that match at least `num` of the given queries + * e.g. '(one two three)@2' + */ final case class MinimumMatch(qs: NonEmptyList[Query], num: Int) extends Query { def mapLastTerm(f: Query.Term => Query): MinimumMatch = this } + + /** A field query + * Search for documents by applying the given query only to the named field + * e.g. 'author:"Silly Goose"', 'title:(cats AND dogs)' + */ final case class Field(field: String, q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): Field = Field(field, q.mapLastTerm(f)) From 48608d1c28ab034b36fe1aea4aebba66cbcd59fa Mon Sep 17 00:00:00 2001 From: Sam Pillsworth Date: Fri, 22 Sep 2023 11:53:47 -0400 Subject: [PATCH 3/5] wait I actually don't know what this was for? --- core/src/main/scala/pink/cozydev/lucille/Query.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/pink/cozydev/lucille/Query.scala b/core/src/main/scala/pink/cozydev/lucille/Query.scala index 54d550d..54ed2b7 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Query.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Query.scala @@ -17,7 +17,6 @@ package pink.cozydev.lucille import cats.data.NonEmptyList -import scala.annotation.meta.field /** A trait for all queries */ sealed trait Query extends Product with Serializable { From 7342edfe1897aec8f04736628bcd9fdb784cacd2 Mon Sep 17 00:00:00 2001 From: Sam Pillsworth Date: Fri, 22 Sep 2023 12:07:35 -0400 Subject: [PATCH 4/5] explicit param docs --- .../scala/pink/cozydev/lucille/Query.scala | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/pink/cozydev/lucille/Query.scala b/core/src/main/scala/pink/cozydev/lucille/Query.scala index 54ed2b7..07526c2 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Query.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Query.scala @@ -35,7 +35,10 @@ sealed trait TermQuery extends Query { def mapLastTerm(f: Query.Term => Query): Query = this } -/** A trait for a list of one or more queries */ +/** A trait for a list of one or more queries + * + * @param qs the queries + */ final case class MultiQuery(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): MultiQuery = { @@ -56,6 +59,8 @@ object Query { /** A term query * e.g. 'cat', 'catch22' + * + * @param str the term */ final case class Term(str: String) extends TermQuery { override def mapLastTerm(f: Query.Term => Query): Query = @@ -64,18 +69,25 @@ object Query { /** A phrase query * e.g. 'the cat jumped' + * + * @param str the phrase */ final case class Phrase(str: String) extends TermQuery /** A prefix query * Search for words starting with the given prefix * e.g. 'jump*' + * + * @param str the prefix */ final case class Prefix(str: String) extends TermQuery /** A proximity query * Search for words within a specified word distance * e.g. '"cat jumped"~3', '"one two three"~2' + * + * @param str the words + * @param num the word distance */ final case class Proximity(str: String, num: Int) extends TermQuery @@ -83,18 +95,28 @@ object Query { * Search for words within a Damerau-Levenshtein Distance * The additional parameter, between 0 and 2, specifies the number of edits allowed (defaults to 2) * e.g. 'cat~', 'cat~1' + * + * @param str the string + * @param num the number of edits allowed */ final case class Fuzzy(str: String, num: Option[Int]) extends TermQuery /** A regex query * Search with a regular expression, the pattern is given between forward slashes, `/`. * e.g. '/.ump(s|ing)' + * + * @param str the regular expression query */ final case class TermRegex(str: String) extends TermQuery /** A range query * Search for terms that fall between some upper and lower bounds. The bounds can be inclusive or exclusive. * e.g. '{cats TO dogs}', '[1 TO *]' + * + * @param lower the lower bound + * @param upper the upper bound + * @param lowerInc whether the lower bound is inclusive + * @param upperInc whether the upper bound is inclusive */ final case class TermRange( lower: Option[String], @@ -106,6 +128,8 @@ object Query { /** An Or operator * Join the given queries with OR, the equivalent of taking the union of the results of each query * e.g. 'q1 OR q2' + * + * @param qs the queries to union */ final case class Or(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): Or = @@ -119,6 +143,8 @@ object Query { /** An And operator * Join the given queries with AND, the equivalent of taking the intersection of the results of each query * e.g. 'q1 AND q2' + * + * @param qs the queries to intersect */ final case class And(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): And = @@ -132,6 +158,8 @@ object Query { /** A Not operator * Exclude terms that would match the given query * e.g. 'NOT cats' + * + * @param q the query to exclude */ final case class Not(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): Not = @@ -141,6 +169,8 @@ object Query { /** A group query * Queries grouped together with parentheses * e.g. '(cats AND dogs)' + * + * @param qs the queries to group */ final case class Group(qs: NonEmptyList[Query]) extends Query { def mapLastTerm(f: Query.Term => Query): Group = this @@ -153,6 +183,8 @@ object Query { /** A unary plus query * Search for documents which must contain the given query * e.g. '+cat', '+(cats AND dogs)' + * + * @param q the query */ final case class UnaryPlus(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): UnaryPlus = @@ -162,6 +194,8 @@ object Query { /** A unary minus query * Search for documents which must not contain the given query * e.g. '-cat', '-(cats AND dogs)' + * + * @param q the query */ final case class UnaryMinus(q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): UnaryMinus = @@ -171,6 +205,9 @@ object Query { /** A minimum match query * Search for documents that match at least `num` of the given queries * e.g. '(one two three)@2' + * + * @param qs the queries + * @param num the number of queries that must match */ final case class MinimumMatch(qs: NonEmptyList[Query], num: Int) extends Query { def mapLastTerm(f: Query.Term => Query): MinimumMatch = this @@ -179,6 +216,9 @@ object Query { /** A field query * Search for documents by applying the given query only to the named field * e.g. 'author:"Silly Goose"', 'title:(cats AND dogs)' + * + * @param field the field name + * @param q the query */ final case class Field(field: String, q: Query) extends Query { def mapLastTerm(f: Query.Term => Query): Field = From 5f4f66606273bb234cc2f803b0258ba6c62f73fa Mon Sep 17 00:00:00 2001 From: Sam Pillsworth Date: Thu, 19 Oct 2023 17:48:26 -0400 Subject: [PATCH 5/5] suggested change Co-authored-by: Andrew Valencik --- core/src/main/scala/pink/cozydev/lucille/Query.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/core/src/main/scala/pink/cozydev/lucille/Query.scala b/core/src/main/scala/pink/cozydev/lucille/Query.scala index 07526c2..c2620bc 100644 --- a/core/src/main/scala/pink/cozydev/lucille/Query.scala +++ b/core/src/main/scala/pink/cozydev/lucille/Query.scala @@ -91,9 +91,7 @@ object Query { */ final case class Proximity(str: String, num: Int) extends TermQuery - /** A fuzzy query - * Search for words within a Damerau-Levenshtein Distance - * The additional parameter, between 0 and 2, specifies the number of edits allowed (defaults to 2) + /** A fuzzy query with an optional distance value * e.g. 'cat~', 'cat~1' * * @param str the string