Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add EXCLUDE to the parser, ast, plan, and plan schema inferencer #1226

Merged
merged 12 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ Thank you to all who have contributed!
- Support parsing, planning, and evaluation of Bitwise AND operator (&).
- The Bitwise And Operator only works for integer operands.
- The operator precedence may change based on the pending operator precedence [RFC](https://github.com/partiql/partiql-docs/issues/50).
- **EXPERIMENTAL** Adds `EXCLUDE` to parser, ast, plan, and plan schema inferencer
alancai98 marked this conversation as resolved.
Show resolved Hide resolved
- This feature is marked as experimental until an RFC is added https://github.com/partiql/partiql-spec/issues/39
- NOTE: this feature is not currently implemented in the evaluator

### Changed

### Deprecated

### Fixed

- Fixes schema inferencer behavior for ORDER BY
### Removed

### Security
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import com.amazon.ionelement.api.ionSymbol
import com.amazon.ionelement.api.metaContainerOf
import org.partiql.ast.AstNode
import org.partiql.ast.DatetimeField
import org.partiql.ast.Exclude
import org.partiql.ast.Expr
import org.partiql.ast.From
import org.partiql.ast.GraphMatch
Expand Down Expand Up @@ -665,14 +666,15 @@ private class AstTranslator(val metas: Map<String, MetaContainer>) : AstBaseVisi
}
val project = visitSelect(node.select, ctx)
val from = visitFrom(node.from, ctx)
val exclude = node.exclude?.let { visitExclude(it, ctx) }
val fromLet = node.let?.let { visitLet(it, ctx) }
val where = node.where?.let { visitExpr(it, ctx) }
val groupBy = node.groupBy?.let { visitGroupBy(it, ctx) }
val having = node.having?.let { visitExpr(it, ctx) }
val orderBy = node.orderBy?.let { visitOrderBy(it, ctx) }
val limit = node.limit?.let { visitExpr(it, ctx) }
val offset = node.offset?.let { visitExpr(it, ctx) }
select(setq, project, from, fromLet, where, groupBy, having, orderBy, limit, offset, metas)
select(setq, project, exclude, from, fromLet, where, groupBy, having, orderBy, limit, offset, metas)
}

/**
Expand Down Expand Up @@ -750,6 +752,48 @@ private class AstTranslator(val metas: Map<String, MetaContainer>) : AstBaseVisi
join(type, lhs, rhs, condition, metas)
}

override fun visitExclude(node: Exclude, ctx: Ctx): PartiqlAst.ExcludeOp = translate(node) { metas ->
val excludeExprs = node.exprs.translate<PartiqlAst.ExcludeExpr>(ctx)
excludeOp(excludeExprs, metas)
}

override fun visitExcludeExcludeExpr(node: Exclude.ExcludeExpr, ctx: Ctx) = translate(node) { metas ->
val root = visitIdentifierSymbol(node.root, ctx)
val steps = node.steps.translate<PartiqlAst.ExcludeStep>(ctx)
excludeExpr(root = root, steps = steps, metas)
}

override fun visitExcludeStep(node: Exclude.Step, ctx: Ctx) =
super.visitExcludeStep(node, ctx) as PartiqlAst.ExcludeStep

override fun visitExcludeStepExcludeTupleAttr(node: Exclude.Step.ExcludeTupleAttr, ctx: Ctx) = translate(node) { metas ->
val attr = node.symbol.symbol
val case = node.symbol.caseSensitivity.toLegacyCaseSensitivity()
excludeTupleAttr(identifier(attr, case), metas)
}

override fun visitExcludeStepExcludeCollectionIndex(
node: Exclude.Step.ExcludeCollectionIndex,
ctx: Ctx
) = translate(node) { metas ->
val index = node.index.toLong()
excludeCollectionIndex(index, metas)
}

override fun visitExcludeStepExcludeTupleWildcard(
node: Exclude.Step.ExcludeTupleWildcard,
ctx: Ctx
) = translate(node) { metas ->
excludeTupleWildcard(metas)
}

override fun visitExcludeStepExcludeCollectionWildcard(
node: Exclude.Step.ExcludeCollectionWildcard,
ctx: Ctx
) = translate(node) { metas ->
excludeCollectionWildcard(metas)
}

override fun visitLet(node: Let, ctx: Ctx) = translate(node) { metas ->
val bindings = node.bindings.translate<PartiqlAst.LetBinding>(ctx)
let(bindings, metas)
Expand Down
17 changes: 17 additions & 0 deletions partiql-ast/src/main/pig/partiql.ion
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ may then be further optimized by selecting better implementations of each operat
(select
(setq (? set_quantifier))
(project projection)
(exclude_clause (? exclude_op))
(from from_source)
(from_let (? let))
(where (? expr))
Expand Down Expand Up @@ -220,6 +221,22 @@ may then be further optimized by selecting better implementations of each operat
// For `<expr> [AS <id>]`
(project_expr expr::expr as_alias::(? symbol)))

// EXCLUDE clause
(product exclude_op exprs::(* exclude_expr 1))

(product exclude_expr root::identifier steps::(* exclude_step 1))

(sum exclude_step
// `someRoot.someField` case sensitivity depends on if `someField` is quoted
// `someRoot[<string literal>] is equivalent to `someRoot."<string literal>"` (case-sensitive)
(exclude_tuple_attr attr::identifier)
// `someRoot[<int literal>]`
(exclude_collection_index index::int)
// `someRoot[*]`]
alancai98 marked this conversation as resolved.
Show resolved Hide resolved
(exclude_tuple_wildcard)
// `someRoot.*`
(exclude_collection_wildcard))

// A list of LET bindings
(product let let_bindings::(* let_binding 1))

Expand Down
17 changes: 17 additions & 0 deletions partiql-ast/src/main/resources/partiql_ast.ion
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,7 @@ expr::[
// The PartiQL `<sfw>` query expression, think SQL `<query specification>`
s_f_w::{
select: select, // oneof SELECT / SELECT VALUE / PIVOT
exclude: optional::exclude,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we allow PIVOT x.v AT x.a EXCLUDE ...?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes that will be allowed. There's no tests yet since PIVOT is not yet fully implemented in the plan schema inferencer.

I will add a parser test showing that it's allowed though.

from: from,
let: optional::let,
where: optional::expr,
Expand Down Expand Up @@ -561,6 +562,22 @@ select::[
},
]

exclude::{
exprs: list::[exclude_expr],
_: [
exclude_expr::{
root: '.identifier.symbol',
steps: list::[step],
},
step::[
exclude_tuple_attr::{ symbol: '.identifier.symbol' },
exclude_collection_index::{ index: int },
exclude_tuple_wildcard::{},
exclude_collection_wildcard::{},
]
alancai98 marked this conversation as resolved.
Show resolved Hide resolved
]
}

// PartiQL FROM Clause Variants — https://partiql.org/dql/from.html
from::[

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class GroupByPathExpressionVisitorTransform(
val projection = currentAndUnshadowedTransformer.transformExprSelect_project(node)

// The scope of the expressions in the FROM clause is the same as that of the parent scope.
val exclude = unshadowedTransformer.transformExprSelect_excludeClause(node)
val from = this.transformExprSelect_from(node)
val fromLet = unshadowedTransformer.transformExprSelect_fromLet(node)
val where = unshadowedTransformer.transformExprSelect_where(node)
Expand All @@ -116,6 +117,7 @@ class GroupByPathExpressionVisitorTransform(
PartiqlAst.Expr.Select(
setq = node.setq,
project = projection,
excludeClause = exclude,
from = from,
fromLet = fromLet,
where = where,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ abstract class VisitorTransformBase : PartiqlAst.VisitorTransform() {
* infinite recursion.
*/
fun transformExprSelectEvaluationOrder(node: PartiqlAst.Expr.Select): PartiqlAst.Expr {
val exclude = transformExprSelect_excludeClause(node)
val from = transformExprSelect_from(node)
val fromLet = transformExprSelect_fromLet(node)
val where = transformExprSelect_where(node)
Expand All @@ -51,6 +52,7 @@ abstract class VisitorTransformBase : PartiqlAst.VisitorTransform() {
PartiqlAst.Expr.Select(
setq = setq,
project = project,
excludeClause = exclude,
from = from,
fromLet = fromLet,
where = where,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,11 @@ import org.partiql.lang.types.TypedOpParameter
import org.partiql.lang.types.UnknownArguments
import org.partiql.lang.util.cartesianProduct
import org.partiql.plan.Arg
import org.partiql.plan.Attribute
import org.partiql.plan.Binding
import org.partiql.plan.Case
import org.partiql.plan.ExcludeExpr
import org.partiql.plan.ExcludeStep
import org.partiql.plan.Plan
import org.partiql.plan.PlanNode
import org.partiql.plan.Property
Expand Down Expand Up @@ -155,6 +158,150 @@ internal object PlanTyper : PlanRewriter<PlanTyper.Context>() {
)
}

/**
* Initial implementation of `EXCLUDE` schema inference. Until an RFC is finalized for `EXCLUDE`
* (https://github.com/partiql/partiql-spec/issues/39), this behavior is considered experimental and subject to
* change.
*
* There are still discussion points regarding the following edge cases
* - EXCLUDE on a tuple attribute that doesn't exist -- give an error/warning?
* - currently no error
* - EXCLUDE on a tuple attribute that has duplicates -- give an error/warning? exclude one? exclude both?
* - currently excludes both w/ no error
* - EXCLUDE on a collection index as the last step -- mark element type as optional?
* - currently element type as-is
* - EXCLUDE on a collection index w/ remaining path steps -- mark last step's type as optional?
* - currently marks last step's type as optional
* - EXCLUDE on a binding tuple variable (e.g. SELECT ... EXCLUDE t FROM t) -- error?
* - currently a parser error
* - EXCLUDE on a union type -- give an error/warning? no-op? exclude on each type in union?
* - currently exclude on each union type
* - If SELECT list includes an attribute that is excluded, we could consider giving an error in PlanTyper or
* some other semantic pass
* - currently does not give an error
*/
override fun visitRelExclude(node: Rel.Exclude, ctx: Context): Rel.Exclude {
val input = visitRel(node.input, ctx)
val exprs = node.exprs
val typeEnv = input.getTypeEnv()
val newTypeEnv = exprs.fold(typeEnv) { tEnv, expr ->
excludeExpr(tEnv, expr, ctx)
}
return node.copy(
input = input,
common = node.common.copy(
typeEnv = newTypeEnv,
properties = input.getProperties()
)
)
}

private fun excludeExpr(attrs: List<Attribute>, expr: ExcludeExpr, ctx: Context): List<Attribute> {
val resultAttrs = mutableListOf<Attribute>()
attrs.forEach { attr ->
val rootId = expr.root
if (attr.name == rootId || (expr.rootCase == Case.INSENSITIVE && attr.name.equals(expr.root, ignoreCase = true))) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this would be required as part of the RFC, but If we wanted to make sure we don't come into a situation such as:

SELECT * EXCLUDE nonsense.a
FROM t

we could:

val attrsExist = attrs.find { attr -> attr.name == .. } != null
if (attrsExist.not()) { handleUnresolvedDescriptor(..) }
attrs.forEach { ->
  if (attr.name == ..) {
    ..

It should make sure that we don't accidentally break future queries. It's IMO better to error now even though we aren't sure what the RFC will say. Then, if we allow nonexistent bindings after the RFC, we can always remove the error? Then you can add the second test that @yliuuuu mentioned above.

@yliuuuu A couple more test cases that can be beneficial:

LMK what you think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, it would surely help users who write:

SELECT a
EXCLUDE a.b
FROM <<
  { 'a': { 'b': 1, 'c': 2 } }
>> AS t

Since we are specifically looking in the bindings coming from the SCAN, when we look for the EXCLUDE a.b (and specifically looking for a), we are silently ignoring the fact that a isn't in the EXCLUDE's input bindings. So, even though the projection list can use a, the EXCLUDE is silently letting this data to pass through. The current implementation, if I'm not mistaken, will return:

<<
  { 'a': { 'b': 1, 'c': 2 } }
>>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since resolution of the EXCLUDE expressions is not defined, I think it's perfectly fine to mandate that users write ... EXCLUDE t.a.b explicitly. Though, if we silently let them write a.b without notifying them that a isn't a binding (and therefore they think that their query will strip b), we'll be leaking data.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair. I had originally wanted to punt on the erroring behavior to form a consistent mental model for when to give an error in the RFC. The more I think about it though, if the exclude expr root is not in the binding tuples, then we should give some error. I'll adjust to at least give an error in this case (i.e. when the root doesn't exist)

if (expr.steps.isEmpty()) {
throw IllegalStateException("Empty `ExcludeExpr.steps` encountered. This should have been caught by the parser.")
} else {
val newType = excludeExprSteps(attr.type, expr.steps, lastStepAsOptional = false, ctx)
resultAttrs.add(
attr.copy(
type = newType
)
)
}
} else {
resultAttrs.add(
attr
)
}
}
return resultAttrs
}

private fun excludeExprSteps(type: StaticType, steps: List<ExcludeStep>, lastStepAsOptional: Boolean, ctx: Context): StaticType {
fun excludeExprStepsStruct(s: StructType, steps: List<ExcludeStep>, lastStepAsOptional: Boolean): StaticType {
val outputFields = mutableListOf<StructType.Field>()
val first = steps.first()
s.fields.forEach { field ->
when (first) {
is ExcludeStep.TupleAttr -> {
if (field.key == first.attr || (first.case == Case.INSENSITIVE && field.key.equals(first.attr, ignoreCase = true))) {
if (steps.size == 1) {
if (lastStepAsOptional) {
val newField = StructType.Field(field.key, field.value.asOptional())
outputFields.add(newField)
}
} else {
outputFields.add(StructType.Field(field.key, excludeExprSteps(field.value, steps.drop(1), lastStepAsOptional, ctx)))
}
} else {
outputFields.add(field)
}
}
is ExcludeStep.TupleWildcard -> {
if (steps.size == 1) {
if (lastStepAsOptional) {
val newField = StructType.Field(field.key, field.value.asOptional())
outputFields.add(newField)
}
} else {
outputFields.add(StructType.Field(field.key, excludeExprSteps(field.value, steps.drop(1), lastStepAsOptional, ctx)))
}
}
else -> {
// currently no change to field.value and no error thrown; could consider an error/warning in
// the future
outputFields.add(StructType.Field(field.key, field.value))
}
}
}
return s.copy(fields = outputFields)
}

fun excludeExprStepsCollection(c: CollectionType, steps: List<ExcludeStep>, lastStepAsOptional: Boolean): StaticType {
var elementType = c.elementType
when (steps.first()) {
is ExcludeStep.CollectionIndex -> {
if (steps.size > 1) {
elementType = excludeExprSteps(elementType, steps.drop(1), lastStepAsOptional = true, ctx)
}
Comment on lines +281 to +283
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sanity check here.
So if we have {'a' : [0, 1, 2]} and exclude a[0], the result will be {'a' : [1,2]}, hence optional, i.e., unionOf(missing, int).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate on this?

I would assume that the static type of a would be a ListType(INT). IMO, even if we had:

SELECT * EXCLUDE t.a[0]
FROM << { 'a': [0] } >> AS t

I would assume the static type of t.a would still be ListType(INT) because we can't know how long the list actually is (especially if t is part of the catalog). If t.a started without any elements, I also think it would still be ListType(INT). That being said @alancai98 , do we expect this to be a runtime error when we attempt to exclude an index that doesn't exist (essentially OutOfBoundsException)? How about when we try to exclude an attribute in a tuple that doesn't exist? This one we can sometimes know statically, but what happens at runtime if it's a union type?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sanity check here. So if we have {'a' : [0, 1, 2]} and exclude a[0], the result will be {'a' : [1,2]}, hence optional, i.e., unionOf(missing, int).

Not quite. In this case, a's type would be ListType(INT). The lastTypeAsOptional is currently only used for when there's a subsequent field to exclude after the collection index (such as EXCLUDE t.a[0].field -- in this case, field's type would be unionOf(missing, <field's original type>)).

This is demonstrated by the below test case:

SuccessTestCase(
name = "EXCLUDE SELECT star list index and list index field",
query = """SELECT *
EXCLUDE
t.a.b.c[0],
t.a.b.c[1].field
FROM [{
'a': {
'b': {
'c': [
{
'field': 0 -- c[0]
},
{
'field': 1 -- c[1]
},
{
'field': 2 -- c[2]
}
]
}
},
'foo': 'bar'
}] AS t""",
expected = BagType(
StructType(
fields = mapOf(
"a" to StructType(
fields = mapOf(
"b" to StructType(
fields = mapOf(
"c" to ListType(
elementType = StructType(
fields = mapOf(
"field" to AnyOfType(
setOf(
StaticType.INT,
StaticType.MISSING // c[1]'s `field` was excluded
)
)
),
contentClosed = true,
constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true))
)
)
),
contentClosed = true,
constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true))
)
),
contentClosed = true,
constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true))
),
"foo" to StaticType.STRING
),
contentClosed = true,
constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true), TupleConstraint.Ordered)
)
)
),
.

^ In above, c's element type is not a union of StructType(...) and MISSING.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding runtime behavior, this hasn't yet been decided and will be part of the upcoming RFC. The schema inferencer doesn't currently give an error if an attribute doesn't exist. We can choose to add errors in the future. I can add this note to the EXCLUDE code.

It currently gives a data type mismatch if the types doesn't match up:

// assuming a is a list
EXCLUDE t.a.*

^ will give an error

}
is ExcludeStep.CollectionWildcard -> {
if (steps.size > 1) {
elementType =
excludeExprSteps(elementType, steps.drop(1), lastStepAsOptional = lastStepAsOptional, ctx)
}
// currently no change to elementType if collection wildcard is last element; this behavior could
// change based on RFC definition
}
else -> {
// currently no change to elementType and no error thrown; could consider an error/warning in
// the future
}
}
return when (c) {
is BagType -> c.copy(elementType)
is ListType -> c.copy(elementType)
is SexpType -> c.copy(elementType)
}
}

return when (type) {
is StructType -> excludeExprStepsStruct(type, steps, lastStepAsOptional)
is CollectionType -> excludeExprStepsCollection(type, steps, lastStepAsOptional)
is AnyOfType -> {
StaticType.unionOf(
type.types.map {
excludeExprSteps(it, steps, lastStepAsOptional, ctx)
}.toSet()
)
}
else -> type
}.flatten()
}

override fun visitRelUnpivot(node: Rel.Unpivot, ctx: Context): Rel.Unpivot {
val from = node

Expand Down Expand Up @@ -227,7 +374,8 @@ internal object PlanTyper : PlanRewriter<PlanTyper.Context>() {
return node.copy(
input = input,
common = node.common.copy(
typeEnv = typeEnv
typeEnv = typeEnv,
properties = input.getProperties()
Comment on lines +391 to +392
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change was for fixing the ORDER BY schema inference behavior.

Tests with corrected behavior:

SuccessTestCase(
name = "ORDER BY int",
catalog = CATALOG_AWS,
catalogPath = listOf("ddb"),
query = "SELECT * FROM pets ORDER BY id",
expected = TABLE_AWS_DDB_PETS_LIST
),
SuccessTestCase(
name = "ORDER BY str",
catalog = CATALOG_AWS,
catalogPath = listOf("ddb"),
query = "SELECT * FROM pets ORDER BY breed",
expected = TABLE_AWS_DDB_PETS_LIST
),
SuccessTestCase(
name = "ORDER BY str",
catalog = CATALOG_AWS,
catalogPath = listOf("ddb"),
query = "SELECT * FROM pets ORDER BY unknown_col",
expected = TABLE_AWS_DDB_PETS_LIST
),
.

Previously these tests would output a bag rather than a list.

)
)
}
Expand Down Expand Up @@ -1006,6 +1154,7 @@ internal object PlanTyper : PlanRewriter<PlanTyper.Context>() {
is Rel.Scan -> this.common
is Rel.Sort -> this.common
is Rel.Unpivot -> this.common
is Rel.Exclude -> this.common
}

private fun inferPathComponentExprType(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ internal object PlanUtils {
is Rel.Scan -> input.common.typeEnv
is Rel.Sort -> input.common.typeEnv
is Rel.Unpivot -> input.common.typeEnv
is Rel.Exclude -> input.common.typeEnv
}

internal fun Rex.addType(type: StaticType): Rex = when (this) {
Expand Down
Loading