Skip to content

Commit

Permalink
Fast LIKE pattern compilation (#286)
Browse files Browse the repository at this point in the history
Implements #284

Replaces previous LIKE implementation which is slow when compiling large
patterns with wildcard characters `%` with another implementation that
compiles the patterns in linear time and has similar performance
characteristics at evaluation time.
  • Loading branch information
dlurton committed Sep 30, 2020
1 parent 32d7820 commit e80967f
Show file tree
Hide file tree
Showing 8 changed files with 424 additions and 543 deletions.
92 changes: 47 additions & 45 deletions lang/src/org/partiql/lang/eval/EvaluatingCompiler.kt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ import org.partiql.lang.ast.*
import org.partiql.lang.ast.passes.*
import org.partiql.lang.errors.*
import org.partiql.lang.eval.binding.*
import org.partiql.lang.eval.like.PatternPart
import org.partiql.lang.eval.like.executePattern
import org.partiql.lang.eval.like.parsePattern
import org.partiql.lang.syntax.SqlParser
import org.partiql.lang.util.*
import java.math.*
Expand Down Expand Up @@ -244,9 +247,9 @@ internal class EvaluatingCompiler(

private fun compileNAry(expr: NAry): ThunkEnv {
val (op, args, metas: MetaContainer) = expr

fun argThunks() = args.map { compileExprNode(it) }

return when (op) {
NAryOp.ADD -> compileNAryAdd(argThunks(), metas)
NAryOp.SUB -> compileNArySub(argThunks(), metas)
Expand All @@ -261,7 +264,7 @@ internal class EvaluatingCompiler(
NAryOp.GTE -> compileNAryGte(argThunks(), metas)
NAryOp.BETWEEN -> compileNAryBetween(argThunks(), metas)
NAryOp.LIKE -> compileNAryLike(args, argThunks(), metas)
NAryOp.IN -> compileNAryIn(args, metas)
NAryOp.IN -> compileNAryIn(args, metas)
NAryOp.NOT -> compileNAryNot(argThunks(), metas)
NAryOp.AND -> compileNAryAnd(argThunks(), metas)
NAryOp.OR -> compileNAryOr(argThunks(), metas)
Expand Down Expand Up @@ -440,7 +443,7 @@ internal class EvaluatingCompiler(
metas: MetaContainer): ThunkEnv {
val leftArg = compileExprNode(args[0])
val rightArg = args[1]

return when {
// When the right arg is a list of literals we use a Set to speed up the predicate
rightArg is ListExprNode && rightArg.values.all { it is Literal } -> {
Expand All @@ -454,7 +457,7 @@ internal class EvaluatingCompiler(
inSet.contains(value).exprValue()
}
}

else -> {
val rightArgThunk = compileExprNode(rightArg)

Expand All @@ -466,7 +469,7 @@ internal class EvaluatingCompiler(
}
}
}

private fun compileNAryNot(
argThunks: List<ThunkEnv>,
metas: MetaContainer): ThunkEnv {
Expand Down Expand Up @@ -536,20 +539,20 @@ internal class EvaluatingCompiler(
private fun compileNAryStringConcat(
argThunks: List<ThunkEnv>,
metas: MetaContainer): ThunkEnv {

return thunkFold(valueFactory.nullValue, metas, argThunks) { lValue, rValue ->
val lType = lValue.type
val rType = rValue.type

if(lType.isText && rType.isText) {
// null/missing propagation is handled before getting here
(lValue.stringValue() + rValue.stringValue()).exprValue()
(lValue.stringValue() + rValue.stringValue()).exprValue()
}
else {
err(
"Wrong argument type for ||",
ErrorCode.EVALUATOR_CONCAT_FAILED_DUE_TO_INCOMPATIBLE_TYPE,
errorContextFrom(metas).also {
errorContextFrom(metas).also {
it[Property.ACTUAL_ARGUMENT_TYPES] = listOf(lType, rType).toString()
},
internal = false)
Expand Down Expand Up @@ -1486,9 +1489,9 @@ internal class EvaluatingCompiler(
*
* Three cases
*
* 1. All arguments are literals, then compile and run the DFA
* 1. Search pattern and escape pattern are literals, compile the DFA. Running the DFA is deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of DFA deferred to evaluation time.
* 1. All arguments are literals, then compile and run the pattern
* 1. Search pattern and escape pattern are literals, compile the pattern. Running the pattern deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of pattern deferred to evaluation time.
*
* ```
* <valueExpr> LIKE <patternExpr> [ESCAPE <escapeExpr>]
Expand All @@ -1507,14 +1510,14 @@ internal class EvaluatingCompiler(
val patternLocationMeta = patternExpr.metas.sourceLocationMeta
val escapeLocationMeta = escapeExpr?.metas?.sourceLocationMeta


// Note that the return value is a nullable and deferred.
// This is so that null short-circuits can be supported.
// The effective type is Either<Null, Either<Error, IDFA>>
fun getDfa(pattern: ExprValue, escape: ExprValue?): (() -> IDFAState)? {
val dfaArgs = listOfNotNull(pattern, escape)
fun getPatternParts(pattern: ExprValue, escape: ExprValue?): (() -> List<PatternPart>)? {
val patternArgs = listOfNotNull(pattern, escape)
when {
dfaArgs.any { it.type.isUnknown } -> return null
dfaArgs.any { !it.type.isText } -> return {
patternArgs.any { it.type.isUnknown } -> return null
patternArgs.any { !it.type.isText } -> return {
err("LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
Expand All @@ -1524,77 +1527,77 @@ internal class EvaluatingCompiler(
internal = false)
}
else -> {
val (patternString: String, escapeChar: Int?, patternSize) =
val (patternString: String, escapeChar: Int?) =
checkPattern(pattern.ionValue, patternLocationMeta, escape?.ionValue, escapeLocationMeta)

val dfa =
if (patternString.isEmpty()) DFAEmptyPattern
else buildDfaFromPattern(patternString, escapeChar, patternSize)
val patternParts = when {
patternString.isEmpty() -> emptyList()
else -> parsePattern(patternString, escapeChar)
}

return { dfa }
return { patternParts }
}
}
}

/** See getDfa for more info on the DFA's odd type. */
fun runDfa(value: ExprValue, dfa: (() -> IDFAState)?): ExprValue {
fun runPatternParts(value: ExprValue, patternParts: (() -> List<PatternPart>)?): ExprValue {
return when {
dfa == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
patternParts == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
"LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
it[Property.LIKE_VALUE] = value.ionValue.toString()
},
internal = false)
else -> dfa().run(value.stringValue()).exprValue()
else -> valueFactory.newBoolean(executePattern(patternParts(), value.stringValue()))
}
}

val valueThunk = argThunks[0]

// If the pattern and escape expressions are literals then we can can compile the DFA now and
// re-use it with every execution. Otherwise we must re-compile the DFA every time.
// If the pattern and escape expressions are literals then we can can compile the pattern now and
// re-use it with every execution. Otherwise we must re-compile the pattern every time.

return when {
patternExpr is Literal && (escapeExpr == null || escapeExpr is Literal) -> {
val dfa = getDfa(
val patternParts = getPatternParts(
valueFactory.newFromIonValue(patternExpr.ionValue),
(escapeExpr as? Literal)?.ionValue?.let { valueFactory.newFromIonValue(it) })

// If valueExpr is also a literal then we can evaluate this at compile time and return a constant.
if (valueExpr is Literal) {
val resultValue = runDfa(valueFactory.newFromIonValue(valueExpr.ionValue), dfa)
val resultValue = runPatternParts(valueFactory.newFromIonValue(valueExpr.ionValue), patternParts)
return thunkEnv(operatorMetas) { resultValue }
}
else {
thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
runDfa(value, dfa)
runPatternParts(value, patternParts)
}
}
}
else -> {
val patternThunk = argThunks[1]
when {
argThunks.size == 2 -> {
//thunk that re-compiles the DFA every evaluation without a custom escape sequence
//thunk that re-compiles the pattern every evaluation without a custom escape sequence
thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val dfa = getDfa(pattern, null)
runDfa(value, dfa)
val pps = getPatternParts(pattern, null)
runPatternParts(value, pps)
}
}
else -> {
//thunk that re-compiles the DFA every evaluation but *with* a custom escape sequence
//thunk that re-compiles the pattern every evaluation but *with* a custom escape sequence
val escapeThunk = argThunks[2]
thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val escape = escapeThunk(env)
val dfa = getDfa(pattern, escape)
runDfa(value, dfa)
val pps = getPatternParts(pattern, escape)
runPatternParts(value, pps)
}
}
}
Expand Down Expand Up @@ -1631,8 +1634,9 @@ internal class EvaluatingCompiler(
patternLocationMeta: SourceLocationMeta?,
escape: IonValue?,
escapeLocationMeta: SourceLocationMeta?
): Triple<String, Int?, Int> {
val patternString = pattern.stringValue()?.let { it }
): Pair<String, Int?> {

val patternString = pattern.stringValue()
?: err("Must provide a non-null value for PATTERN in a LIKE predicate: $pattern",
errorContextFrom(patternLocationMeta),
internal = false)
Expand All @@ -1642,7 +1646,6 @@ internal class EvaluatingCompiler(
val escapeCharCodePoint = escapeCharString.codePointAt(0) // escape is a string of length 1
val validEscapedChars = setOf('_'.toInt(), '%'.toInt(), escapeCharCodePoint)
val iter = patternString.codePointSequence().iterator()
var count = 0

while (iter.hasNext()) {
val current = iter.next()
Expand All @@ -1655,11 +1658,10 @@ internal class EvaluatingCompiler(
},
internal = false)
}
count++
}
return Triple(patternString, escapeCharCodePoint, count)
return Pair(patternString, escapeCharCodePoint)
}
return Triple(patternString, null, patternString.length)
return Pair(patternString, null)
}

/**
Expand Down
Loading

0 comments on commit e80967f

Please sign in to comment.