Skip to content

Commit

Permalink
allow :col => AsTable and :col => cols (#2780)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Jun 9, 2021
1 parent eddc3e7 commit 886ebad
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 6 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
but preserved in right; in `innerjoin` missings are dropped in both data frames;
in `outerjoin` this value of keyword argument is not supported
([#2724](https://github.com/JuliaData/DataFrames.jl/pull/2724))
* correctly handle selectors of the form `:col => AsTable` and `:col => cols`
by expanding a single column into multiple columns
([#2780](https://github.com/JuliaData/DataFrames.jl/pull/2780))

## Bug fixes

Expand Down
2 changes: 1 addition & 1 deletion docs/src/man/split_apply_combine.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ each subset of the `DataFrame`. This specification can be of the following forms
3. a `cols => function => target_cols` form additionally explicitly specifying
the target column or columns.
4. a `col => target_cols` pair, which renames the column `col` to `target_cols`, which
must be single name (as a `Symbol` or a string).
must be single name (as a `Symbol` or a string), a vector of names or `AsTable`.
5. a `nrow` or `nrow => target_cols` form which efficiently computes the number of rows
in a group; without `target_cols` the new column is called `:nrow`, otherwise
it must be single name (as a `Symbol` or a string).
Expand Down
18 changes: 16 additions & 2 deletions src/abstractdataframe/selection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const TRANSFORMATION_COMMON_RULES =
3. a `cols => function => target_cols` form additionally explicitly specifying
the target column or columns.
4. a `col => target_cols` pair, which renames the column `col` to `target_cols`, which
must be single name (as a `Symbol` or a string).
must be single name (as a `Symbol` or a string), a vector of names or `AsTable`.
5. a `nrow` or `nrow => target_cols` form which efficiently computes the number of rows
in a group; without `target_cols` the new column is called `:nrow`, otherwise
it must be single name (as a `Symbol` or a string).
Expand Down Expand Up @@ -212,7 +212,13 @@ end

normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex, <:AbstractString},
renamecols::Bool) =
normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols::Bool)
normalize_selection(idx, first(sel) => Symbol(last(sel)), renamecols)

normalize_selection(idx::AbstractIndex, sel::Pair{<:ColumnIndex,
<:Union{AbstractVector{Symbol},
AbstractVector{<:AbstractString}}},
renamecols::Bool) =
normalize_selection(idx, first(sel) => identity => last(sel), renamecols)

function normalize_selection(idx::AbstractIndex,
@nospecialize(sel::Pair{<:ColumnIndex,
Expand Down Expand Up @@ -287,6 +293,11 @@ function normalize_selection(idx::AbstractIndex,
@nospecialize(sel::Pair{<:ColumnIndex, <:Base.Callable}), renamecols::Bool)
c = idx[first(sel)]
fun = last(sel)

if fun === AsTable
return normalize_selection(idx, first(sel) => identity => AsTable, renamecols)
end

if renamecols
newcol = Symbol(_names(idx)[c], "_", funname(fun))
else
Expand Down Expand Up @@ -320,6 +331,9 @@ function normalize_selection(idx::AbstractIndex,
end
end
fun = last(sel)

fun === AsTable && throw(ArgumentError("Passing AsTable in $sel is not supported"))

if length(c) > 3
prefix = join(@views(_names(idx)[c[1:2]]), '_')
if renamecols
Expand Down
23 changes: 20 additions & 3 deletions test/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1616,11 +1616,11 @@ end
@test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]),
[:a] => sum, false) == (1 => (sum => :a))

@test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]),
@test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]),
[:a] => sum => [:new], false) == (1 => (sum => [:new]))

# Test that target col strings are converted to Symbols
@test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]),
@test DataFrames.normalize_selection(DataFrames.Index(Dict(:a => 1, :b => 2), [:a, :b]),
[:a] => sum => ["new"], false) == (1 => (sum => [:new]))
end

Expand All @@ -1647,5 +1647,22 @@ end
@test df.a === a
end
end


@testset ":col => AsTable and :col => cols" begin
df = DataFrame(id=1:2, c1=[(a=1, b=2), (a=3, b=4)], c2=[(11, 12), (13, 14)])
@test select(df, :c1 => AsTable) == DataFrame(a=[1, 3], b=[2, 4])
@test select(df, :c1 => [:p, :q]) == DataFrame(p=[1, 3], q=[2, 4])
@test select(df, :c2 => AsTable) == DataFrame(x1=[11, 13], x2=[12, 14])
@test select(df, :c2 => [:p, :q]) == DataFrame(p=[11, 13], q=[12, 14])
@test_throws ArgumentError select(df, [:c1, :c2] => AsTable)
@test_throws ArgumentError select(df, [:c1, :c2] => AsTable)
gdf = groupby(df, :id)
@test select(gdf, :c1 => AsTable) == DataFrame(id=1:2, a=[1, 3], b=[2, 4])
@test select(gdf, :c1 => [:p, :q]) == DataFrame(id=1:2, p=[1, 3], q=[2, 4])
@test select(gdf, :c2 => AsTable) == DataFrame(id=1:2, x1=[11, 13], x2=[12, 14])
@test select(gdf, :c2 => [:p, :q]) == DataFrame(id=1:2, p=[11, 13], q=[12, 14])
@test_throws ArgumentError select(gdf, [:c1, :c2] => AsTable)
@test_throws ArgumentError select(gdf, [:c1, :c2] => AsTable)
end

end # module

0 comments on commit 886ebad

Please sign in to comment.