Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Appropriately fast LIKE pattern compilation #286

Merged
merged 9 commits into from
Sep 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 37 additions & 35 deletions lang/src/org/partiql/lang/eval/EvaluatingCompiler.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import org.partiql.lang.ast.passes.*
import org.partiql.lang.domains.PartiqlAst
import org.partiql.lang.errors.*
import org.partiql.lang.eval.binding.*
import org.partiql.lang.eval.like.PatternPart
import org.partiql.lang.eval.like.executePattern
import org.partiql.lang.eval.like.parsePattern
import org.partiql.lang.syntax.SqlParser
import org.partiql.lang.util.*
import java.math.*
Expand Down Expand Up @@ -1640,9 +1643,9 @@ internal class EvaluatingCompiler(
*
* Three cases
*
* 1. All arguments are literals, then compile and run the DFA
* 1. Search pattern and escape pattern are literals, compile the DFA. Running the DFA is deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of DFA deferred to evaluation time.
* 1. All arguments are literals, then compile and run the pattern
* 1. Search pattern and escape pattern are literals, compile the pattern. Running the pattern deferred to evaluation time.
* 1. Pattern or escape (or both) are *not* literals, compile and running of pattern deferred to evaluation time.
*
* ```
* <valueExpr> LIKE <patternExpr> [ESCAPE <escapeExpr>]
Expand All @@ -1661,14 +1664,14 @@ internal class EvaluatingCompiler(
val patternLocationMeta = patternExpr.metas.sourceLocationMeta
val escapeLocationMeta = escapeExpr?.metas?.sourceLocationMeta


// Note that the return value is a nullable and deferred.
// This is so that null short-circuits can be supported.
// The effective type is Either<Null, Either<Error, IDFA>>
fun getDfa(pattern: ExprValue, escape: ExprValue?): (() -> IDFAState)? {
val dfaArgs = listOfNotNull(pattern, escape)
fun getPatternParts(pattern: ExprValue, escape: ExprValue?): (() -> List<PatternPart>)? {
val patternArgs = listOfNotNull(pattern, escape)
when {
dfaArgs.any { it.type.isUnknown } -> return null
dfaArgs.any { !it.type.isText } -> return {
patternArgs.any { it.type.isUnknown } -> return null
patternArgs.any { !it.type.isText } -> return {
err("LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
Expand All @@ -1678,77 +1681,77 @@ internal class EvaluatingCompiler(
internal = false)
}
else -> {
val (patternString: String, escapeChar: Int?, patternSize) =
val (patternString: String, escapeChar: Int?) =
checkPattern(pattern.ionValue, patternLocationMeta, escape?.ionValue, escapeLocationMeta)

val dfa =
if (patternString.isEmpty()) DFAEmptyPattern
else buildDfaFromPattern(patternString, escapeChar, patternSize)
val patternParts = when {
patternString.isEmpty() -> emptyList()
else -> parsePattern(patternString, escapeChar)
}

return { dfa }
return { patternParts }
}
}
}

/** See getDfa for more info on the DFA's odd type. */
fun runDfa(value: ExprValue, dfa: (() -> IDFAState)?): ExprValue {
fun runPatternParts(value: ExprValue, patternParts: (() -> List<PatternPart>)?): ExprValue {
return when {
dfa == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
patternParts == null || value.type.isUnknown -> valueFactory.nullValue
!value.type.isText -> err(
"LIKE expression must be given non-null strings as input",
ErrorCode.EVALUATOR_LIKE_INVALID_INPUTS,
errorContextFrom(operatorMetas).also {
it[Property.LIKE_VALUE] = value.ionValue.toString()
},
internal = false)
else -> dfa().run(value.stringValue()).exprValue()
else -> valueFactory.newBoolean(executePattern(patternParts(), value.stringValue()))
}
}

val valueThunk = argThunks[0]

// If the pattern and escape expressions are literals then we can can compile the DFA now and
// re-use it with every execution. Otherwise we must re-compile the DFA every time.
// If the pattern and escape expressions are literals then we can can compile the pattern now and
// re-use it with every execution. Otherwise we must re-compile the pattern every time.

return when {
patternExpr is Literal && (escapeExpr == null || escapeExpr is Literal) -> {
val dfa = getDfa(
val patternParts = getPatternParts(
valueFactory.newFromIonValue(patternExpr.ionValue),
(escapeExpr as? Literal)?.ionValue?.let { valueFactory.newFromIonValue(it) })

// If valueExpr is also a literal then we can evaluate this at compile time and return a constant.
if (valueExpr is Literal) {
val resultValue = runDfa(valueFactory.newFromIonValue(valueExpr.ionValue), dfa)
val resultValue = runPatternParts(valueFactory.newFromIonValue(valueExpr.ionValue), patternParts)
return thunkFactory.thunkEnv(operatorMetas) { resultValue }
}
else {
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
runDfa(value, dfa)
runPatternParts(value, patternParts)
}
}
}
else -> {
val patternThunk = argThunks[1]
when {
argThunks.size == 2 -> {
//thunk that re-compiles the DFA every evaluation without a custom escape sequence
//thunk that re-compiles the pattern every evaluation without a custom escape sequence
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val dfa = getDfa(pattern, null)
runDfa(value, dfa)
val pps = getPatternParts(pattern, null)
runPatternParts(value, pps)
}
}
else -> {
//thunk that re-compiles the DFA every evaluation but *with* a custom escape sequence
//thunk that re-compiles the pattern every evaluation but *with* a custom escape sequence
val escapeThunk = argThunks[2]
thunkFactory.thunkEnv(operatorMetas) { env ->
val value = valueThunk(env)
val pattern = patternThunk(env)
val escape = escapeThunk(env)
val dfa = getDfa(pattern, escape)
runDfa(value, dfa)
val pps = getPatternParts(pattern, escape)
runPatternParts(value, pps)
}
}
}
Expand Down Expand Up @@ -1785,8 +1788,9 @@ internal class EvaluatingCompiler(
patternLocationMeta: SourceLocationMeta?,
escape: IonValue?,
escapeLocationMeta: SourceLocationMeta?
): Triple<String, Int?, Int> {
val patternString = pattern.stringValue()?.let { it }
): Pair<String, Int?> {

val patternString = pattern.stringValue()
?: err("Must provide a non-null value for PATTERN in a LIKE predicate: $pattern",
errorContextFrom(patternLocationMeta),
internal = false)
Expand All @@ -1796,7 +1800,6 @@ internal class EvaluatingCompiler(
val escapeCharCodePoint = escapeCharString.codePointAt(0) // escape is a string of length 1
val validEscapedChars = setOf('_'.toInt(), '%'.toInt(), escapeCharCodePoint)
val iter = patternString.codePointSequence().iterator()
var count = 0

while (iter.hasNext()) {
val current = iter.next()
Expand All @@ -1809,11 +1812,10 @@ internal class EvaluatingCompiler(
},
internal = false)
}
count++
}
return Triple(patternString, escapeCharCodePoint, count)
return Pair(patternString, escapeCharCodePoint)
}
return Triple(patternString, null, patternString.length)
return Pair(patternString, null)
}

/**
Expand Down
Loading