Skip to content

Commit

Permalink
Updates to ideally support majority of tpch queries
Browse files Browse the repository at this point in the history
- Remove aggregate expressions type from generalized expressions. (only allow aggregate expressions as root expressions for aggregation)
- Update function mapping to support options
- Remove named structs from type unions (should only be used in special places as root, not in arbitrary hierarchy)
- Add project, join, fetch, aggregate, sort, set logical relational operations.
- Introduce key scalar and aggregate functions in functions yaml.
- Remove old extensions docs

Address substrait-io#42, substrait-io#43, substrait-io#44
  • Loading branch information
jacques-n committed Oct 3, 2021
1 parent eb2cc62 commit 1ca0bfb
Show file tree
Hide file tree
Showing 11 changed files with 500 additions and 68 deletions.
15 changes: 14 additions & 1 deletion binary/expression.proto
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ message Expression {
Literal literal = 1;
FieldReference selection = 2;
ScalarFunction scalar_function = 3;
AggregateFunction aggregate_function = 4;
WindowFunction window_function = 5;
IfThen if_then = 6;
SwitchExpression switch_expression = 7;
Expand Down Expand Up @@ -75,20 +74,34 @@ message Expression {
message ScalarFunction {
Extensions.FunctionId id = 1;
repeated Expression args = 2;
Type output_type = 3;
}

message AggregateFunction {
Extensions.FunctionId id = 1;
repeated Expression args = 2;
repeated SortField sorts = 3;
AggregationPhase phase = 4;
Type output_type = 5;
}

enum AggregationPhase {
UNKNOWN = 0;
INITIAL_TO_INTERMEDIATE = 1;
INTERMEDIATE_TO_INTERMEDIATE = 2;
INITIAL_TO_RESULT = 3;
INTERMEDIATE_TO_RESULT = 4;
}


message WindowFunction {
Extensions.FunctionId id = 1;
repeated Expression partitions = 2;
repeated SortField sorts = 3;
Bound upper_bound = 4;
Bound lower_bound = 5;
AggregationPhase phase = 6;
Type output_type = 7;

message Bound {

Expand Down
5 changes: 5 additions & 0 deletions binary/extensions.proto
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ message Extensions {
ExtensionId extension_id = 2;
string name = 3;
uint32 index = 4;
repeated Option options = 5;
message Option {
string key = 1;
string value = 2;
}
}
}

Expand Down
1 change: 0 additions & 1 deletion binary/parameterized_types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ message ParameterizedType {
ParameterizedDecimal decimal = 24;

ParameterizedStruct struct = 25;
ParameterizedNamedStruct named_struct = 26;
ParameterizedList list = 27;
ParameterizedMap map = 28;

Expand Down
78 changes: 77 additions & 1 deletion binary/relations.proto
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,14 @@ message ReadRel {
Expression filter = 3;
MaskExpression projection = 4;


oneof read_type {
VirtualTable virtual_table = 5;
LocalFiles local_files = 6;
NamedTable named_table = 7;
}

message NamedTable {
repeated string names = 1;
}


Expand Down Expand Up @@ -83,17 +88,88 @@ message ReadRel {

}

message ProjectRel {
RelCommon common = 1;
Rel input = 2;
repeated Expression expressions = 3;
}

message JoinRel {
RelCommon common = 1;
Rel left = 2;
Rel right = 3;
Expression expression = 4;
Expression post_join_filter = 5;

enum JoinType {
UNKNOWN = 0;
INNER = 1;
OUTER = 2;
LEFT = 3;
RIGHT = 4;
}
}

message FetchRel {
RelCommon common = 1;
Rel input = 2;
int64 offset = 3;
int64 count = 4;
}

message AggregateRel {
RelCommon common = 1;
Rel input = 2;
repeated Grouping groupings = 3;
repeated Measure measures = 4;
Expression.AggregationPhase phase = 5;

message Grouping {
repeated int32 input_fields = 1;
}

message Measure {
Expression.AggregateFunction measure = 1;
}
}

message SortRel {
RelCommon common = 1;
Rel input = 2;
repeated Expression.SortField sorts = 3;
}

message FilterRel {
RelCommon common = 1;
Rel input = 2;
Expression condition = 3;
}

message SetRel {
RelCommon common = 1;
repeated Rel inputs = 2;
SetOp op = 3;

enum SetOp {
UNKNOWN = 0;
MINUS_PRIMARY = 1;
MINUS_MULTISET = 2;
INTERSECTION_PRIMARY = 3;
INTERSECTION_MULTISET = 4;
UNION_DISTINCT = 5;
UNION_ALL = 6;
}
}

message Rel {
oneof RelType {
ReadRel read = 1;
FilterRel filter = 2;
FetchRel fetch = 3;
AggregateRel aggregate = 4;
SortRel sort = 5;
JoinRel join = 6;
ProjectRel project = 7;
SetRel set = 8;
}
}
1 change: 0 additions & 1 deletion binary/type.proto
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ message Type {
Decimal decimal = 24;

Struct struct = 25;
NamedStruct named_struct = 26;
List list = 27;
Map map = 28;

Expand Down
1 change: 0 additions & 1 deletion binary/type_expressions.proto
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ message DerivationExpression {
ExpressionDecimal decimal = 24;

ExpressionStruct struct = 25;
ExpressionNamedStruct named_struct = 26;
ExpressionList list = 27;
ExpressionMap map = 28;

Expand Down
74 changes: 57 additions & 17 deletions extensions/aggregate_functions.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,58 @@
functions:
- name: add
id: 0
arguments:
- type: i32
variadic: 2..N
return: i32
- name: lt
id: 1
arguments:
- type: i32
- type: i32
return: boolean
- name: and
id: 3
arguments: - type: boolean
variadic: 2..N
return: boolean
- name: 'sum'
description: "Sum a set of values."
options:
overflow: [SILENT, SATURATE, ERROR]
variants:
- name: scalar
decomposable: MANY
parameters:
- K: [i8,i16,i32,i64,fp32,fp64]
arguments:
- type: K
intermediate: K
return: K
- name: decimal
decomposable: MANY
parameters:
integer:
- P: "1..38"
- S: "1..38"
arguments:
- type: "DECIMAL<P,S>"
intermediate: "DECIMAL<38,S>"
return: "DECIMAL<38,S>"
- name: 'avg'
description: "Average a set of values."
options:
overflow: [SILENT, SATURATE, ERROR]
variants:
- name: scalar
decomposable: MANY
parameters:
- K: [i8,i16,i32,i64,fp32,fp64]
arguments:
- type: K
intermediate: "STRUCT<sum:K,count:i64>"
return: K
- name: decimal
decomposable: MANY
parameters:
integer:
- P: "1..38"
- S: "1..38"
arguments:
- type: "DECIMAL<P,S>"
intermediate: "STRUCT<DECIMAL<38,S>,count:i64>"
return: "DECIMAL<38,S>"
- name: 'count'
description: "Average a set of values."
options:
overflow: [SILENT, SATURATE, ERROR]
decomposable: MANY
parameters:
- K
arguments:
- type: K
intermediate: i64
return: i64
4 changes: 0 additions & 4 deletions extensions/organizations.yaml

This file was deleted.

Loading

0 comments on commit 1ca0bfb

Please sign in to comment.