From 886ebad6b36d32c1af462bf6602ec06bad6708ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 9 Jun 2021 08:22:40 +0200 Subject: [PATCH] allow :col => AsTable and :col => cols (#2780) --- NEWS.md | 3 +++ docs/src/man/split_apply_combine.md | 2 +- src/abstractdataframe/selection.jl | 18 ++++++++++++++++-- test/select.jl | 23 ++++++++++++++++++++--- 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 20ed277ce7..243e5da8c4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,6 +8,9 @@ but preserved in right; in `innerjoin` missings are dropped in both data frames; in `outerjoin` this value of keyword argument is not supported ([#2724](https://github.com/JuliaData/DataFrames.jl/pull/2724)) +* correctly handle selectors of the form `:col => AsTable` and `:col => cols` + by expanding a single column into multiple columns + ([#2780](https://github.com/JuliaData/DataFrames.jl/pull/2780)) ## Bug fixes diff --git a/docs/src/man/split_apply_combine.md b/docs/src/man/split_apply_combine.md index fb8e9df5d4..c156ea7d77 100644 --- a/docs/src/man/split_apply_combine.md +++ b/docs/src/man/split_apply_combine.md @@ -53,7 +53,7 @@ each subset of the `DataFrame`. This specification can be of the following forms 3. a `cols => function => target_cols` form additionally explicitly specifying the target column or columns. 4. a `col => target_cols` pair, which renames the column `col` to `target_cols`, which - must be single name (as a `Symbol` or a string). + must be single name (as a `Symbol` or a string), a vector of names or `AsTable`. 5. a `nrow` or `nrow => target_cols` form which efficiently computes the number of rows in a group; without `target_cols` the new column is called `:nrow`, otherwise it must be single name (as a `Symbol` or a string). diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index 2aaf48a28e..755e81a7d6 100755 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -47,7 +47,7 @@ const TRANSFORMATION_COMMON_RULES = 3. a `cols => function => target_cols` form additionally explicitly specifying the target column or columns. 4. a `col => target_cols` pair, which renames the column `col` to `target_cols`, which - must be single name (as a `Symbol` or a string). + must be single name (as a `Symbol` or a string), a vector of names or `AsTable`. 5. a `nrow` or `nrow => target_cols` form which efficiently computes the number of rows in a group; without `target_cols` the new column is called `:nrow`, otherwise it must be single name (as a `Symbol` or a string). @@ -212,7 +212,13 @@ end normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractString}, renamecols::Bool) = - normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool) + normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols) + +normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, + <:Union{AbstractVector{Symbol}, + AbstractVector{<:AbstractString}}}, + renamecols::Bool) = + normalize_selection(idx, first(sel) => identity => last(sel), renamecols) function normalize_selection(idx::AbstractIndex, @nospecialize(sel::Pair{<:ColumnIndex, @@ -287,6 +293,11 @@ function normalize_selection(idx::AbstractIndex, @nospecialize(sel::Pair{<:ColumnIndex, <:Base.Callable}), renamecols::Bool) c = idx[first(sel)] fun = last(sel) + + if fun === AsTable + return normalize_selection(idx, first(sel) => identity => AsTable, renamecols) + end + if renamecols newcol = Symbol(_names(idx)[c], "_", funname(fun)) else @@ -320,6 +331,9 @@ function normalize_selection(idx::AbstractIndex, end end fun = last(sel) + + fun === AsTable && throw(ArgumentError("Passing AsTable in $sel is not supported")) + if length(c) > 3 prefix = join(@views(_names(idx)[c[1:2]]), '_') if renamecols diff --git a/test/select.jl b/test/select.jl index 67fbc162fd..64e2b893e7 100644 --- a/test/select.jl +++ b/test/select.jl @@ -1616,11 +1616,11 @@ end @test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]), [:a] => sum, false) == (1 => (sum => :a)) - @test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]), + @test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]), [:a] => sum => [:new], false) == (1 => (sum => [:new])) # Test that target col strings are converted to Symbols - @test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]), + @test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]), [:a] => sum => ["new"], false) == (1 => (sum => [:new])) end @@ -1647,5 +1647,22 @@ end @test df.a === a end end - + +@testset ":col => AsTable and :col => cols" begin + df = DataFrame(id=1:2, c1=[(a=1, b=2), (a=3, b=4)], c2=[(11, 12), (13, 14)]) + @test select(df, :c1 => AsTable) == DataFrame(a=[1, 3], b=[2, 4]) + @test select(df, :c1 => [:p, :q]) == DataFrame(p=[1, 3], q=[2, 4]) + @test select(df, :c2 => AsTable) == DataFrame(x1=[11, 13], x2=[12, 14]) + @test select(df, :c2 => [:p, :q]) == DataFrame(p=[11, 13], q=[12, 14]) + @test_throws ArgumentError select(df, [:c1, :c2] => AsTable) + @test_throws ArgumentError select(df, [:c1, :c2] => AsTable) + gdf = groupby(df, :id) + @test select(gdf, :c1 => AsTable) == DataFrame(id=1:2, a=[1, 3], b=[2, 4]) + @test select(gdf, :c1 => [:p, :q]) == DataFrame(id=1:2, p=[1, 3], q=[2, 4]) + @test select(gdf, :c2 => AsTable) == DataFrame(id=1:2, x1=[11, 13], x2=[12, 14]) + @test select(gdf, :c2 => [:p, :q]) == DataFrame(id=1:2, p=[11, 13], q=[12, 14]) + @test_throws ArgumentError select(gdf, [:c1, :c2] => AsTable) + @test_throws ArgumentError select(gdf, [:c1, :c2] => AsTable) +end + end # module