Skip to content

Commit

Permalink
Merge pull request #85 from cozydev-pink/sam/31/docs
Browse files Browse the repository at this point in the history
doc strings
  • Loading branch information
samspills authored Oct 19, 2023
2 parents 3158f47 + 5f4f666 commit 748f2d6
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 40 deletions.
103 changes: 63 additions & 40 deletions core/src/main/scala/pink/cozydev/lucille/Parser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,110 +38,129 @@ object Parser {
P.charIn(baseRange -- special)
val reserved = Set("OR", "||", "AND", "&&", "NOT", "+", "-", "/")

// Term query
// e.g. 'cat', 'catch22'
val term: P[String] = P.not(P.stringIn(reserved)).with1 *> allowed.rep.string

/** Parse a term query
* e.g. 'cat', 'catch22'
*/
val termQ: P[Term] = term.map(Term.apply)

// Phrase query
// e.g. 'the cat jumped'
val phrase: P[String] = (term ~ sp.?).rep.string.surroundedBy(dquote)

/** Parse a phrase query
* e.g. 'the cat jumped'
*/
val phraseQ: P[Phrase] = phrase.map(Phrase.apply)

// Proximity query
// e.g. '"cat jumped"~3', '"one two three"~2'
val proxSoft: P[String] = phrase.soft <* pchar('~')

/** Parse a proximity query
* e.g. '"cat jumped"~3', '"one two three"~2'
*/
val proximityQ: P[Proximity] = (proxSoft ~ int).map { case (p, n) =>
Proximity(p, n)
}

// Fuzzy term
// e.g. 'cat~', 'cat~2'
val fuzzySoft: P[String] = term.soft <* pchar('~')

/** Parse a fuzzy term query
* e.g. 'cat~', 'cat~1'
*/
val fuzzyT: P[Fuzzy] = (fuzzySoft ~ int.?).map { case (q, n) =>
Fuzzy(q, n)
}

// Prefix term
// e.g. 'jump*'
/** Parse a prefix term query
* e.g. 'jump*'
*/
val prefixT: P[Prefix] =
(term.soft <* P.char('*'))
.map(Prefix.apply)

// TermRegex query
// e.g. '/.ump(s|ing)/'
private val regex: P[String] = {
val notEscape = P.charIn(baseRange - '\\' - '/').void
val rStr = notEscape.orElse(P.char('\\') *> P.char('/')).rep.string
rStr.surroundedBy(pchar('/'))
}

/** Parse a regex query
* e.g. '/.ump(s|ing)'
*/
val regexQ: P[TermRegex] = regex.map(TermRegex.apply)

val or = (P.string("OR") | P.string("||")).as(Op.OR)
val and = (P.string("AND") | P.string("&&")).as(Op.AND)
val infixOp = (or | and).withContext("infixOp")

// given " OR term1 OR term2$"
// parses completely
// given " OR term1 OR term2 extra$"
// parses until the end of 'term2', with 'extra' being left
/** Parse a suffix op query
* e.g. 'OR term1 OR term2$' parses completely
* however 'OR term1 OR term2 extra$' parses until the end of 'term2', with 'extra' being left
*/
def suffixOps(query: P[Query]): Parser0[List[(Op, Query)]] =
((maybeSpace.with1 *> infixOp <* sp.rep) ~ query)
.repUntil0(maybeSpace *> (P.end | query))

// parse simple queries followed by suffix op queries
// "q0 q1 OR q2"
/** Parse simple queries followed by suffix op queries
* e.g. 'q0 q1 OR q2'
*/
def qWithSuffixOps(query: P[Query]): P[NonEmptyList[Query]] =
(query.repSep(sp.rep) ~ suffixOps(query))
.map { case (h, t) => Op.associateOps(h, t) }

// parse a whole list of queries
// "q0 q1 OR q2 q3"
// we repeat so that we can parse q3
// the first iteration only gets "qp q1 OR q2"
/** Parse a whole list of queries
* e.g. 'q0 q1 OR q2 q3'
* Parsing repeats so that we can parse q3, the first iteration only gets 'q0 q1 OR q2'
*/
def nonGrouped(query: P[Query]): P[NonEmptyList[Query]] =
(maybeSpace.with1 *> qWithSuffixOps(query)).repUntil(maybeSpace ~ P.end).map(_.flatten)

// Not query
// e.g. 'animals NOT (cats AND dogs)'
/** Parse a not query
* e.g. 'animals NOT (cats AND dogs)'
*/
def notQ(query: P[Query]): P[Query] =
((P.string("NOT").soft ~ maybeSpace) *> query).map(Not.apply)

// Minimum match query
// e.g. '(one two three)@2'
/** Parse a minimum match query
* e.g. '(one two three)@2'
*/
def minimumMatchQ(query: P[Query]): P[MinimumMatch] = {
val matchNum = P.char('@') *> int
val grouped = nonGrouped(query).between(P.char('('), P.char(')'))
(grouped.soft ~ matchNum).map { case (qs, n) => MinimumMatch(qs, n) }
}

// Group query
// e.g. '(cats AND dogs)'
/** Parse a group query
* e.g. '(cats AND dogs)'
*/
def groupQ(query: P[Query]): P[Group] = {
val g = nonGrouped(query)
.between(P.char('('), P.char(')'))
val endOfGroupSpecial = P.char('@')
(g <* P.not(endOfGroupSpecial)).map(Group.apply)
}

// Field query
// e.g. 'title:cats', 'author:"Silly Goose"', 'title:(cats AND dogs)'
/** Parse a field query
* e.g. 'title:cats', 'author:"Silly Goose"', 'title:(cats AND dogs)'
*/
val fieldValueSoft: P[String] = term.soft <* pchar(':')
def fieldQuery(query: P[Query]): P[Field] =
(fieldValueSoft ~ query).map { case (f, q) => Field(f, q) }

// Unary Plus query
// e.g. '+cat', '+(cats AND dogs)'
/** Parse a unary plus query
* e.g. '+cat', '+(cats AND dogs)'
*/
def unaryPlus(query: P[Query]): P[UnaryPlus] =
P.char('+') *> query.map(UnaryPlus.apply)

// Unary Minus query
// e.g. 'cat', '-(cats AND dogs)'
/** Parse a unary minus query
* e.g. 'cat'*, '-(cats AND dogs)'
*/
def unaryMinus(query: P[Query]): P[UnaryMinus] =
P.char('-') *> query.map(UnaryMinus.apply)

// TermRange query
// e.g. '{cats TO dogs}', '[1 TO *}'
/** Parse a term range query
* e.g. '{cats TO dogs}, '[1 TO *}''
*/
def rangeQuery: P[TermRange] = {
val inclLower = P.charIn('{', '[').map(lowerBound => lowerBound == '[') <* maybeSpace
val inclUpper = maybeSpace *> P.charIn('}', ']').map(upperBound => upperBound == ']')
Expand All @@ -155,9 +174,10 @@ object Parser {
}
}

// Tie compound queries together recursively
// Order is very important here
// prefixT before termQ
/** Recursively parse compound queries
* The order is very important:
* - prefixT must come before termQ
*/
val recursiveQ: P[Query] = P.recursive[Query](r =>
P.oneOf(
List(
Expand All @@ -178,9 +198,12 @@ object Parser {
)
)

// One or more queries implicitly grouped together in a list
/** Parse one or more queries implicitly grouped together in a list
*/
val fullQuery = nonGrouped(recursiveQ) <* maybeSpace

/** Attempt to parse a whole string representing a Lucene query
*/
def parseQ(s: String): Either[cats.parse.Parser.Error, MultiQuery] =
fullQuery.parseAll(s).map(MultiQuery.apply)
}
110 changes: 110 additions & 0 deletions core/src/main/scala/pink/cozydev/lucille/Query.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package pink.cozydev.lucille

import cats.data.NonEmptyList

/** A trait for all queries */
sealed trait Query extends Product with Serializable {

/** Builds a new query by applying a `Term => Query` function to a Term if it is in the last position.
Expand All @@ -28,11 +29,16 @@ sealed trait Query extends Product with Serializable {
def mapLastTerm(f: Query.Term => Query): Query
}

/** A trait for all leaf node queries (meaning that they do not contain queries) */
sealed trait TermQuery extends Query {
// noop for everything except Query.Term
def mapLastTerm(f: Query.Term => Query): Query = this
}

/** A trait for a list of one or more queries
*
* @param qs the queries
*/
final case class MultiQuery(qs: NonEmptyList[Query]) extends Query {

def mapLastTerm(f: Query.Term => Query): MultiQuery = {
Expand All @@ -50,22 +56,79 @@ object MultiQuery {
}

object Query {

/** A term query
* e.g. 'cat', 'catch22'
*
* @param str the term
*/
final case class Term(str: String) extends TermQuery {
override def mapLastTerm(f: Query.Term => Query): Query =
f(this)
}

/** A phrase query
* e.g. 'the cat jumped'
*
* @param str the phrase
*/
final case class Phrase(str: String) extends TermQuery

/** A prefix query
* Search for words starting with the given prefix
* e.g. 'jump*'
*
* @param str the prefix
*/
final case class Prefix(str: String) extends TermQuery

/** A proximity query
* Search for words within a specified word distance
* e.g. '"cat jumped"~3', '"one two three"~2'
*
* @param str the words
* @param num the word distance
*/
final case class Proximity(str: String, num: Int) extends TermQuery

/** A fuzzy query with an optional distance value
* e.g. 'cat~', 'cat~1'
*
* @param str the string
* @param num the number of edits allowed
*/
final case class Fuzzy(str: String, num: Option[Int]) extends TermQuery

/** A regex query
* Search with a regular expression, the pattern is given between forward slashes, `/`.
* e.g. '/.ump(s|ing)'
*
* @param str the regular expression query
*/
final case class TermRegex(str: String) extends TermQuery

/** A range query
* Search for terms that fall between some upper and lower bounds. The bounds can be inclusive or exclusive.
* e.g. '{cats TO dogs}', '[1 TO *]'
*
* @param lower the lower bound
* @param upper the upper bound
* @param lowerInc whether the lower bound is inclusive
* @param upperInc whether the upper bound is inclusive
*/
final case class TermRange(
lower: Option[String],
upper: Option[String],
lowerInc: Boolean,
upperInc: Boolean,
) extends TermQuery

/** An Or operator
* Join the given queries with OR, the equivalent of taking the union of the results of each query
* e.g. 'q1 OR q2'
*
* @param qs the queries to union
*/
final case class Or(qs: NonEmptyList[Query]) extends Query {
def mapLastTerm(f: Query.Term => Query): Or =
Or(rewriteLastTerm(qs, f))
Expand All @@ -75,6 +138,12 @@ object Query {
Or(NonEmptyList(head, tail.toList))
}

/** An And operator
* Join the given queries with AND, the equivalent of taking the intersection of the results of each query
* e.g. 'q1 AND q2'
*
* @param qs the queries to intersect
*/
final case class And(qs: NonEmptyList[Query]) extends Query {
def mapLastTerm(f: Query.Term => Query): And =
And(rewriteLastTerm(qs, f))
Expand All @@ -84,11 +153,23 @@ object Query {
And(NonEmptyList(head, tail.toList))
}

/** A Not operator
* Exclude terms that would match the given query
* e.g. 'NOT cats'
*
* @param q the query to exclude
*/
final case class Not(q: Query) extends Query {
def mapLastTerm(f: Query.Term => Query): Not =
Not(q.mapLastTerm(f))
}

/** A group query
* Queries grouped together with parentheses
* e.g. '(cats AND dogs)'
*
* @param qs the queries to group
*/
final case class Group(qs: NonEmptyList[Query]) extends Query {
def mapLastTerm(f: Query.Term => Query): Group = this
}
Expand All @@ -97,17 +178,46 @@ object Query {
Group(NonEmptyList(head, tail.toList))
}

/** A unary plus query
* Search for documents which must contain the given query
* e.g. '+cat', '+(cats AND dogs)'
*
* @param q the query
*/
final case class UnaryPlus(q: Query) extends Query {
def mapLastTerm(f: Query.Term => Query): UnaryPlus =
UnaryPlus(q.mapLastTerm(f))
}

/** A unary minus query
* Search for documents which must not contain the given query
* e.g. '-cat', '-(cats AND dogs)'
*
* @param q the query
*/
final case class UnaryMinus(q: Query) extends Query {
def mapLastTerm(f: Query.Term => Query): UnaryMinus =
UnaryMinus(q.mapLastTerm(f))
}

/** A minimum match query
* Search for documents that match at least `num` of the given queries
* e.g. '(one two three)@2'
*
* @param qs the queries
* @param num the number of queries that must match
*/
final case class MinimumMatch(qs: NonEmptyList[Query], num: Int) extends Query {
def mapLastTerm(f: Query.Term => Query): MinimumMatch = this
}

/** A field query
* Search for documents by applying the given query only to the named field
* e.g. 'author:"Silly Goose"', 'title:(cats AND dogs)'
*
* @param field the field name
* @param q the query
*/
final case class Field(field: String, q: Query) extends Query {
def mapLastTerm(f: Query.Term => Query): Field =
Field(field, q.mapLastTerm(f))
Expand Down

0 comments on commit 748f2d6

Please sign in to comment.