Skip to content

Commit

Permalink
deprecate map on GroupedDataFrame (#2662)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Mar 25, 2021
1 parent ff965d5 commit 777afbb
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 21 deletions.
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@
`convert(::Type{NamedTuple}, key::GroupKey)`, and
`convert(::Type{DataFrame}, sdf::SubDataFrame)`; the deprecated methods will be
removed in 1.0 release
* as a bug fix `eltype` of vector returned by `eachrow` is now `DataFrameRow`
([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662))
* applying `map` to `GroupedDataFrame` is now deprecated. It will
be an error in 1.0 release.
([#2662](https://github.com/JuliaData/DataFrames.jl/pull/2662))

# DataFrames v0.22 Release Notes

Expand Down
6 changes: 3 additions & 3 deletions src/abstractdataframe/iteration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@

# Iteration by rows
"""
DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow{D, S}}
DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}
Iterator over rows of an `AbstractDataFrame`,
with each row represented as a `DataFrameRow`.
A value of this type is returned by the [`eachrow`](@ref) function.
"""
struct DataFrameRows{D<:AbstractDataFrame, S} <: AbstractVector{DataFrameRow{D, S}}
struct DataFrameRows{D<:AbstractDataFrame} <: AbstractVector{DataFrameRow}
df::D
end

Expand Down Expand Up @@ -72,7 +72,7 @@ julia> eachrow(view(df, [4, 3], [2, 1]))
2 │ 13 3
```
"""
eachrow(df::AbstractDataFrame) = DataFrameRows{typeof(df), typeof(index(df))}(df)
eachrow(df::AbstractDataFrame) = DataFrameRows(df)

Base.IndexStyle(::Type{<:DataFrameRows}) = Base.IndexLinear()
Base.size(itr::DataFrameRows) = (size(parent(itr), 1), )
Expand Down
10 changes: 8 additions & 2 deletions src/groupeddataframe/groupeddataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,9 @@ Base.length(gd::GroupedDataFrame) = gd.ngroups

function Base.iterate(gd::GroupedDataFrame, i=1)
if i > length(gd)
nothing
return nothing
else
(view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1)
return (view(gd.parent, gd.idx[gd.starts[i]:gd.ends[i]], :), i+1)
end
end

Expand Down Expand Up @@ -973,3 +973,9 @@ function _filter_helper_astable(gdf::GroupedDataFrame, nt::NamedTuple, f,

return gdf[[f(mapper(i))::Bool for i in 1:length(gdf)]]
end

function Base.map(f, gdf::GroupedDataFrame)
Base.depwarn("Use of the map function on GroupedDataFrame is deprecated. " *
"Use `[f(sdf) for sdf in gdf]` instead.", :map)
return collect(Base.Generator(f, gdf))
end
10 changes: 0 additions & 10 deletions src/other/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,6 @@ function precompile(all=false)
Base.precompile(fbody, (Bool,Bool,typeof(select),DataFrame,Any,))
end
end
Base.precompile(Tuple{typeof(map),Function,DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(show),Base.GenericIOBuffer{Array{UInt8,1}},MIME{Symbol("text/html")},DataFrameRow{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(sin),Tuple{Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},DataFrame}}}}}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{String,1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}})
Expand Down Expand Up @@ -764,7 +763,6 @@ function precompile(all=false)
Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.mul_prod),Nothing,Nothing},Array{Union{Missing, Int},1},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(getindex),GroupedDataFrame{DataFrame},InvertedIndex{Array{Bool,1}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.insertcols!)),NamedTuple{(:makeunique,),Tuple{Bool}},typeof(insertcols!),DataFrame,Int,Pair{Symbol,Int}})
Base.precompile(Tuple{Type{DataFrame},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.push!)),NamedTuple{(:cols,),Tuple{Symbol}},typeof(push!),DataFrame,NamedTuple{(:a,),Tuple{Float64}}})
Base.precompile(Tuple{typeof(transform),GroupedDataFrame{DataFrame},Function})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:Key1, :Key2, :Value),Tuple{Array{Union{Missing, String},1},Array{Union{Missing, String},1},UnitRange{Int}}},Type{DataFrame}})
Expand Down Expand Up @@ -806,7 +804,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Bool,1}},Val{true},Nothing})
Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:y, :z),Tuple{Array{Float64,1},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Function,GroupedDataFrame{DataFrame},Nothing})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b2, :v2),Tuple{Array{Union{Missing, Int},1},Array{Union{Missing, Symbol},1},Array{Union{Missing, Float64},1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(sum),Tuple{DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}}}}}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{Bool,1}}},Type{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2, :x_left, :x_right, :ind),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{String,1}}},Type{DataFrame}})
let fbody = try __lookup_kwbody__(which(DataFrames.stack, (DataFrame,Array{Symbol,1},Array{Any,1},))) catch missing end
Expand Down Expand Up @@ -1380,7 +1377,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_process_pair_symbol),Bool,GroupedDataFrame{DataFrame},Dict{Symbol,Tuple{Bool,Int}},Array{DataFrames.TransformationResult,1},Nothing,Symbol,Bool,String,Union{Function, Type},Tuple{Array{String,1}}})
Base.precompile(Tuple{typeof(sort),DataFrame,Function})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Pair{Symbol,typeof(sum)},1}})
Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid, :id_1),Tuple{Array{Int,1},Array{Int,1},Array{Union{Missing, Int},1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},InvertedIndex{Int},Between{Int,Int}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:p, :q),Tuple{SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false},SubArray{Int,1,Array{Int,1},Tuple{Array{Int,1}},false}}},Type{DataFrame}})
Expand Down Expand Up @@ -1623,7 +1619,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Array{Symbol,1}})
Base.precompile(Tuple{typeof(Base.Broadcast.materialize),Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},Base.OneTo{Int}},Array{Int,1}}}})
isdefined(DataFrames, Symbol("#632#635")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#632#635")),Array{Bool,1}})
Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}}})
Base.precompile(Tuple{typeof(view),SubDataFrame{DataFrame,DataFrames.Index,Base.OneTo{Int}},typeof(!),Between{Int,Int}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:x1,),Tuple{Array{String,1}}},Type{DataFrame}})
isdefined(DataFrames, Symbol("#67#74")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#67#74")),Array{Int,1}})
Expand Down Expand Up @@ -1834,7 +1829,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(prod),Symbol}}})
Base.precompile(Tuple{typeof(completecases),DataFrame,Regex})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:g, :col),Tuple{Array{Any,1},Array{Any,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(iterate),Base.Iterators.Zip{Tuple{Array{NamedTuple{(:a, :b),Tuple{Int,Symbol}},1},Tables.NamedTupleIterator{Tables.Schema{(:a, :b),Tuple{Int,Symbol}},Tables.RowIterator{NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Symbol,1}}}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}}},Tuple{Int,Tuple{Int},Tuple{Base.OneTo{Int},Int}}})
Base.precompile(Tuple{typeof(DataFrames._sortperm),SubDataFrame{DataFrame,DataFrames.Index,Array{Int,1}},Base.Sort.MergeSortAlg,DataFrames.DFPerm{Base.Order.ForwardOrdering,Tuple{SubArray{String,1,PooledArrays.PooledArray{String,UInt8,1,Array{UInt8,1}},Tuple{Array{Int,1}},false},SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Array{Int,1}},false}}}})
Base.precompile(Tuple{DataFrames.Reduce{typeof(Base.add_sum),Nothing,typeof(/)},Array{Union{Missing, Number},1},GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Int}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Float64,1}},Val{false},Array{Int,1}})
Expand Down Expand Up @@ -2539,7 +2533,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1,),Tuple{Missing}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, DataFrame},1}},Val{false},Array{Int,1}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames._combine_prepare)),NamedTuple{(:copycols, :keepkeys, :ungroup, :keeprows, :renamecols),NTuple{5,Bool}},typeof(DataFrames._combine_prepare),GroupedDataFrame{DataFrame},Colon,Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id, :fid),Tuple{Array{Int,1},Array{Int,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},typeof(sum)}})
Base.precompile(Tuple{Type{SubDataFrame},SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Cols{Tuple{Array{Int,1}}}})
Base.precompile(Tuple{typeof(getindex),DataFrame,Colon,Cols{Tuple{Regex,InvertedIndex{Regex}}}})
Base.precompile(Tuple{typeof(repr),MIME{Symbol("text/latex")},DataFrames.DataFrameColumns{DataFrame}})
Expand Down Expand Up @@ -2700,7 +2693,6 @@ function precompile(all=false)
Base.precompile(Tuple{typeof(DataFrames.do_call),typeof(sum),Array{Int,1},Array{Int,1},Array{Int,1},GroupedDataFrame{DataFrame},Tuple{Array{Union{Missing, Array{Float64,2}},1}},Int})
Base.precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(+),Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(+),Tuple{SubArray{Float64,0,Array{Float64,1},Tuple{Int},true},Array{Float64,1}}},Base.Broadcast.Broadcasted{DataFrames.DataFrameStyle,Nothing,typeof(/),Tuple{DataFrame,DataFrame}}}}})
Base.precompile(Tuple{typeof(DataFrames._transformation_helper),DataFrame,AsTable,ByRow{typeof(first)}})
Base.precompile(Tuple{typeof(DataFrames.escapedprint),Base.GenericIOBuffer{Array{UInt8,1}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index},String})
Base.precompile(Tuple{typeof(DataFrames._combine_with_first),NamedTuple{(:x1, :x2),Tuple{SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int}},Int},true}}},Function,GroupedDataFrame{DataFrame},Tuple{Array{Int,1},Array{Int,1}},Val{true},Nothing})
Base.precompile(Tuple{typeof(show),GroupedDataFrame{DataFrame}})
Base.precompile(Tuple{typeof(getindex),SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,UnitRange{Int},UnitRange{Int}},UnitRange{Int}},Colon,Array{Symbol,1}})
Expand Down Expand Up @@ -2851,7 +2843,6 @@ function precompile(all=false)
isdefined(DataFrames, Symbol("#627#628")) && Base.precompile(Tuple{getfield(DataFrames, Symbol("#627#628")),SubArray{Union{Missing, String},1,Array{Union{Missing, String},1},Tuple{Base.OneTo{Int}},true}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:id1, :id2_left, :x_left, :ID2_right, :x_right),Tuple{Array{Int,1},Array{Union{Missing, Int},1},Array{Union{Missing, Int},1},Array{Int,1},Array{Int,1}}},Type{DataFrame}})
Base.precompile(Tuple{Core.kwftype(typeof(DataFrames.Type)),NamedTuple{(:a, :b),Tuple{Array{Int,1},Array{Any,1}}},Type{DataFrame}})
Base.precompile(Tuple{typeof(Base.collect_similar),DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Base.Generator{DataFrames.DataFrameRows{DataFrame,DataFrames.Index},Type{Array{T,1} where T}}})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{typeof(sum),Symbol}},Pair{Symbol,Pair{typeof(length),Symbol}}})
Base.precompile(Tuple{typeof(DataFrames._combine_rows_with_first!),NamedTuple{(:x1,),Tuple{BigFloat}},Tuple{Array{BigFloat,1}},Int,Int,Function,GroupedDataFrame{DataFrame},Tuple{Array{BigInt,1}},Tuple{Symbol},Val{false}})
let fbody = try __lookup_kwbody__(which(issorted, (DataFrame,Array{Any,1},))) catch missing end
Expand All @@ -2861,7 +2852,6 @@ function precompile(all=false)
end
Base.precompile(Tuple{typeof(append!),Array{Any,1},Array{Pair{String,ByRow{typeof(-)}},1}})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Symbol,Pair{ByRow{typeof(sin)},Symbol}},InvertedIndex{Symbol},Vararg{Union{Regex, AbstractString, Function, Signed, Symbol, Unsigned, Pair, AbstractArray{T,1} where T, Type, All, Between, Cols, InvertedIndex},N} where N})
Base.precompile(Tuple{typeof(==),DataFrames.DataFrameRows{SubDataFrame{DataFrame,DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}},Array{Int,1}},DataFrames.SubIndex{DataFrames.Index,Array{Int,1},Array{Int,1}}},DataFrames.DataFrameRows{DataFrame,DataFrames.Index}})
Base.precompile(Tuple{typeof(DataFrames._combine_multicol),NamedTuple{(:x2,),Tuple{Array{Bool,1}}},Function,GroupedDataFrame{DataFrame},Nothing})
Base.precompile(Tuple{typeof(combine),GroupedDataFrame{DataFrame},Pair{Int,typeof(sum)}})
Base.precompile(Tuple{typeof(allowmissing!),DataFrame,InvertedIndex{InvertedIndex{Array{Int,1}}}})
Expand Down
17 changes: 17 additions & 0 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,23 @@ end
indicator=:source, source=:source)
end

@testset "map on GroupedDataFrame" begin
df = DataFrame(a=1:3, b=4:6, c=7:9)
dfv = @view df[1:3, 1:3]
gdf = groupby(df, :a)
gdfv = groupby(dfv, :a)

for x in (gdf, gdfv)
@test collect(x) == map(identity, x)
end
end

@testset "new map behavior" begin
df = DataFrame(g=[1, 2, 3])
gdf = groupby(df, :g)
@test map(nrow, gdf) == [1, 1, 1]
end

@testset "Conversion tests" begin
df = DataFrame()
df[!, :A] = 1:5
Expand Down
19 changes: 13 additions & 6 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3018,12 +3018,6 @@ end
@test_throws ArgumentError combine(gdf, (:g, :g) => identity)
end

@testset "new map behavior" begin
df = DataFrame(g=[1, 2, 3])
gdf = groupby(df, :g)
@test map(nrow, gdf) == [1, 1, 1]
end

@testset "check isagg correctly uses fast path only when it should" begin
for fun in (sum, prod, mean, var, std, sum∘skipmissing, prod∘skipmissing,
mean∘skipmissing, var∘skipmissing, std∘skipmissing),
Expand Down Expand Up @@ -3825,6 +3819,19 @@ end
((x, y, z) -> x[1] <= 5 ? unwrap(y[1]) : unwrap(z[1])) => :res)
end

@testset "grouped data frame iteration" begin
df = DataFrame(a=1:3, b=4:6, c=7:9)
dfv = @view df[1:3, 1:3]
gdf = groupby(df, :a)
gdfv = groupby(dfv, :a)

for x in (gdf, gdfv)
@test collect(x) == [v for v in x] == [x[i] for i in 1:3]
@test reduce(vcat, x) == parent(x)
@test mapreduce(v -> sum(Matrix(v)), +, x) == sum(Matrix(parent(x)))
end
end

@testset "groupby multithreading" begin
for x in (PooledArray(rand(1:10, 1_100_000)),
PooledArray(rand([1:9; missing], 1_100_000))),
Expand Down

0 comments on commit 777afbb

Please sign in to comment.