Skip to content

Commit

Permalink
[query] Add semhash support for Table-to-Table Aggregations (hail-is#…
Browse files Browse the repository at this point in the history
…13922)

Namely, TableKeyByAndAggregate and TableAggregateByKey
  • Loading branch information
ehigham authored Oct 31, 2023
1 parent 5d42835 commit a950ede
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,13 @@ case object SemanticHash extends Logging {
val getFieldIndex = table.typ.rowType.fieldIdx
keys.map(getFieldIndex).foreach(buffer ++= Bytes.fromInt(_))


case TableKeyByAndAggregate(_, _, _, nPartitions, bufferSize) =>
nPartitions.foreach {
buffer ++= Bytes.fromInt(_)
}
buffer ++= Bytes.fromInt(bufferSize)

case TableJoin(_, _, joinop, key) =>
buffer ++= joinop.getBytes ++= Bytes.fromInt(key)

Expand Down Expand Up @@ -339,6 +346,7 @@ case object SemanticHash extends Logging {
_: StreamTakeWhile |
_: TableGetGlobals |
_: TableAggregate |
_: TableAggregateByKey |
_: TableCollect |
_: TableCount |
_: TableDistinct |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class SemanticHashSuite extends HailSuite {
)

def isTableIRSemanticallyEquivalent: Array[Array[Any]] = {
val ttype = TableType(TStruct("a" -> TInt32, "b" -> TStruct()), IndexedSeq(), TStruct())
val ttype = TableType(TStruct("a" -> TInt32, "b" -> TStruct()), IndexedSeq("a"), TStruct())
val ttypeb = TableType(TStruct("c" -> TInt32, "d" -> TStruct()), IndexedSeq(), TStruct())

def mkTableRead(reader: TableReader): TableIR =
Expand Down Expand Up @@ -205,12 +205,14 @@ class SemanticHashSuite extends HailSuite {
Array(
TableGetGlobals,
TableAggregate(_, Void()),
TableAggregateByKey(_, MakeStruct(FastSeq())),
TableKeyByAndAggregate(_, MakeStruct(FastSeq()), MakeStruct(FastSeq("idx" -> I32(0))), None, 256),
TableCollect,
TableCount,
TableDistinct,
TableFilter(_, Void()),
TableMapGlobals(_, MakeStruct(IndexedSeq.empty)),
TableMapRows(_, MakeStruct(IndexedSeq.empty)),
TableMapRows(_, MakeStruct(FastSeq("a" -> I32(0)))),
TableRename(_, Map.empty, Map.empty),
).map { wrap =>
Array(wrap(tir), wrap(tir), true, "")
Expand Down

0 comments on commit a950ede

Please sign in to comment.