Skip to content

Commit

Permalink
Fast LIKE pattern compilation (#286)
Browse files Browse the repository at this point in the history
Implements #284 

Replaces previous LIKE implementation which is slow when compiling large patterns with wildcard characters `%` with another implementation that compiles the patterns in linear time and has similar performance characteristics at evaluation time.
  • Loading branch information
dlurton authored Sep 24, 2020
1 parent e4a5663 commit c5d72c0
Show file tree
Hide file tree
Showing 8 changed files with 414 additions and 533 deletions.
72 changes: 37 additions & 35 deletions lang/src/org/partiql/lang/eval/EvaluatingCompiler.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import org.partiql.lang.ast.passes.*
import org.partiql.lang.domains.PartiqlAst
import org.partiql.lang.errors.*
import org.partiql.lang.eval.binding.*
import org.partiql.lang.eval.like.PatternPart
import org.partiql.lang.eval.like.executePattern
import org.partiql.lang.eval.like.parsePattern
import org.partiql.lang.syntax.SqlParser
import org.partiql.lang.util.*
import java.math.*
Expand Down Expand Up @@ -1640,9 +1643,9 @@ internal class EvaluatingCompiler(
*
* Three cases
*
* 1. All arguments are literals, then compile and run the DFA
* 1. Search pattern and escape pattern are literals, compile the DFA. Running the DFA is deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of DFA deferred to evaluation time.
* 1. All arguments are literals, then compile and run the pattern
* 1. Search pattern and escape pattern are literals, compile the pattern. Running the pattern deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of pattern deferred to evaluation time.
*
* ```
* <valueExpr> LIKE <patternExpr> [ESCAPE <escapeExpr>]
Expand All @@ -1661,14 +1664,14 @@ internal class EvaluatingCompiler(
val patternLocationMeta = patternExpr.metas.sourceLocationMeta
val escapeLocationMeta = escapeExpr?.metas?.sourceLocationMeta


// Note that the return value is a nullable and deferred.
// This is so that null short-circuits can be supported.
// The effective type is Either<Null, Either<Error, IDFA>>
fun getDfa(pattern: ExprValue, escape: ExprValue?): (() -> IDFAState)? {
val dfaArgs = listOfNotNull(pattern, escape)
fun getPatternParts(pattern: ExprValue, escape: ExprValue?): (() -> List<PatternPart>)? {
val patternArgs = listOfNotNull(pattern, escape)
when {
dfaArgs.any { it.type.isUnknown } -> return null
dfaArgs.any { !it.type.isText } -> return {
patternArgs.any { it.type.isUnknown } -> return null
patternArgs.any { !it.type.isText } -> return {
err("LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
Expand All @@ -1678,77 +1681,77 @@ internal class EvaluatingCompiler(
internal = false)
}
else -> {
val (patternString: String, escapeChar: Int?, patternSize) =
val (patternString: String, escapeChar: Int?) =
checkPattern(pattern.ionValue, patternLocationMeta, escape?.ionValue, escapeLocationMeta)

val dfa =
if (patternString.isEmpty()) DFAEmptyPattern
else buildDfaFromPattern(patternString, escapeChar, patternSize)
val patternParts = when {
patternString.isEmpty() -> emptyList()
else -> parsePattern(patternString, escapeChar)
}

return { dfa }
return { patternParts }
}
}
}

/** See getDfa for more info on the DFA's odd type. */
fun runDfa(value: ExprValue, dfa: (() -> IDFAState)?): ExprValue {
fun runPatternParts(value: ExprValue, patternParts: (() -> List<PatternPart>)?): ExprValue {
return when {
dfa == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
patternParts == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
"LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
it[Property.LIKE_VALUE] = value.ionValue.toString()
},
internal = false)
else -> dfa().run(value.stringValue()).exprValue()
else -> valueFactory.newBoolean(executePattern(patternParts(), value.stringValue()))
}
}

val valueThunk = argThunks[0]

// If the pattern and escape expressions are literals then we can can compile the DFA now and
// re-use it with every execution. Otherwise we must re-compile the DFA every time.
// If the pattern and escape expressions are literals then we can can compile the pattern now and
// re-use it with every execution. Otherwise we must re-compile the pattern every time.

return when {
patternExpr is Literal && (escapeExpr == null || escapeExpr is Literal) -> {
val dfa = getDfa(
val patternParts = getPatternParts(
valueFactory.newFromIonValue(patternExpr.ionValue),
(escapeExpr as? Literal)?.ionValue?.let { valueFactory.newFromIonValue(it) })

// If valueExpr is also a literal then we can evaluate this at compile time and return a constant.
if (valueExpr is Literal) {
val resultValue = runDfa(valueFactory.newFromIonValue(valueExpr.ionValue), dfa)
val resultValue = runPatternParts(valueFactory.newFromIonValue(valueExpr.ionValue), patternParts)
return thunkFactory.thunkEnv(operatorMetas) { resultValue }
}
else {
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
runDfa(value, dfa)
runPatternParts(value, patternParts)
}
}
}
else -> {
val patternThunk = argThunks[1]
when {
argThunks.size == 2 -> {
//thunk that re-compiles the DFA every evaluation without a custom escape sequence
//thunk that re-compiles the pattern every evaluation without a custom escape sequence
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val dfa = getDfa(pattern, null)
runDfa(value, dfa)
val pps = getPatternParts(pattern, null)
runPatternParts(value, pps)
}
}
else -> {
//thunk that re-compiles the DFA every evaluation but *with* a custom escape sequence
//thunk that re-compiles the pattern every evaluation but *with* a custom escape sequence
val escapeThunk = argThunks[2]
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val escape = escapeThunk(env)
val dfa = getDfa(pattern, escape)
runDfa(value, dfa)
val pps = getPatternParts(pattern, escape)
runPatternParts(value, pps)
}
}
}
Expand Down Expand Up @@ -1785,8 +1788,9 @@ internal class EvaluatingCompiler(
patternLocationMeta: SourceLocationMeta?,
escape: IonValue?,
escapeLocationMeta: SourceLocationMeta?
): Triple<String, Int?, Int> {
val patternString = pattern.stringValue()?.let { it }
): Pair<String, Int?> {

val patternString = pattern.stringValue()
?: err("Must provide a non-null value for PATTERN in a LIKE predicate: $pattern",
errorContextFrom(patternLocationMeta),
internal = false)
Expand All @@ -1796,7 +1800,6 @@ internal class EvaluatingCompiler(
val escapeCharCodePoint = escapeCharString.codePointAt(0) // escape is a string of length 1
val validEscapedChars = setOf('_'.toInt(), '%'.toInt(), escapeCharCodePoint)
val iter = patternString.codePointSequence().iterator()
var count = 0

while (iter.hasNext()) {
val current = iter.next()
Expand All @@ -1809,11 +1812,10 @@ internal class EvaluatingCompiler(
},
internal = false)
}
count++
}
return Triple(patternString, escapeCharCodePoint, count)
return Pair(patternString, escapeCharCodePoint)
}
return Triple(patternString, null, patternString.length)
return Pair(patternString, null)
}

/**
Expand Down
Loading

0 comments on commit c5d72c0

Please sign in to comment.