From d019e45ddcecf55e4eef1dcff844b3cfe804514d Mon Sep 17 00:00:00 2001 From: krynju Date: Thu, 16 Sep 2021 21:29:39 +0200 Subject: [PATCH] add nicer reduce for grouped dtable --- src/table/gdtable.jl | 2 ++ src/table/operations.jl | 12 +++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/table/gdtable.jl b/src/table/gdtable.jl index 424a01516..adf447c14 100644 --- a/src/table/gdtable.jl +++ b/src/table/gdtable.jl @@ -6,6 +6,8 @@ mutable struct GDTable index::Dict end +grouped_cols(gd::GDTable) = gd.cols === nothing ? [:keys] : gd.cols + keys(gd::GDTable) = keys(gd.index) fetch(gd::GDTable) = fetch(gd.dtable) diff --git a/src/table/operations.jl b/src/table/operations.jl index 2cb93d547..0db938999 100644 --- a/src/table/operations.jl +++ b/src/table/operations.jl @@ -98,8 +98,8 @@ function reduce(f, d::DTable; cols=nothing::Union{Nothing, Vector{Symbol}}, init construct_single_column = (_col, _chunk_results...) -> getindex.(_chunk_results, _col) result_columns = [Dagger.@spawn construct_single_column(c, chunk_reduce_results...) for c in columns] - reduce_result_column = (_f, _c, _init) -> reduce(_f, _c; init=_init) - reduce_chunks = [Dagger.@spawn reduce_result_column(f, c, deepcopy(init)) for c in result_columns] + reduce_result_column = (_f, _c) -> reduce(_f, _c) # removed init from here as it's not needed (couldn't do (x,y)->x+1 for example) + reduce_chunks = [Dagger.@spawn reduce_result_column(f, c) for c in result_columns] construct_result = (_cols, _vals...) -> (; zip(_cols, _vals)...) Dagger.@spawn construct_result(columns, reduce_chunks...) @@ -107,7 +107,13 @@ end function reduce(f, gd::GDTable; cols=nothing::Union{Nothing, Vector{Symbol}}, init=Base._InitialValue()) - Dict([d[1] => reduce(f, d[2]; cols=cols, init=init) for d in gd]) + construct_result = (_keys, _results...) -> begin + result_cols = keys(first(_results)) + k = [col => getindex.(_keys, i) for (i, col) in enumerate(grouped_cols(gd))] + r = [Symbol("result_" * string(r)) => collect(getindex.(_results, r)) for r in result_cols] + (;k...,r...) + end + Dagger.@spawn construct_result(keys(gd), [reduce(f, d[2]; cols=cols, init=deepcopy(init)) for d in gd]...) end """