Skip to content

Commit

Permalink
Enforce correct eltype of arrays used to index GroupedDataFrame
Browse files Browse the repository at this point in the history
  • Loading branch information
jlumpe committed Dec 9, 2019
1 parent b97ec42 commit 4039ee4
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 9 deletions.
2 changes: 1 addition & 1 deletion docs/src/lib/indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ The elements of a `GroupedDataFrame` are [`SubDataFrame`](@ref)s of its parent.
array of `Integer`s or `Bool`s, similar to a standard array. Alternatively the
array may contain keys of any of the types supported for dictionary-like
indexing (`GroupKey`, `Tuple`, or `NamedTuple`). Selected groups must be
unique.
unique, and different types of indices cannot be mixed.
* `gd[n::Not]` -> Any of the above types wrapped in [`Not`](@ref). The result
will be a new `GroupedDataFrame` containing all groups in `gd` *not* selected
by the wrapped index.
19 changes: 13 additions & 6 deletions src/groupeddataframe/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ Base.keys(gd::GroupedDataFrame) = GroupKeys(gd)

# The allowed key types for dictionary-like indexing
const GroupKeyTypes = Union{GroupKey, Tuple, NamedTuple}
# All allowed scalar index types
const GroupIndexTypes = Union{Integer, GroupKeyTypes}


# Find the integer index of a group given any supported scalar index type
Expand Down Expand Up @@ -445,13 +447,18 @@ Base.getindex(gd::GroupedDataFrame, key::GroupKeyTypes) = gd[_findgroup(gd, key)
# Array indexing with dictionary-like keys
Base.getindex(gd::GroupedDataFrame, idxs::AbstractVector{T}) where {T<:GroupKeyTypes} = gd[[_findgroup(gd, k) for k in idxs]]

# InvertedIndex
function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where T
# InvertedIndex wrapping scalar index
function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where {T<:GroupIndexTypes}
selected = trues(length(gd))
if T <: AbstractVector
selected[[_findgroup(gd, k) for k in idx.skip]] .= false
else
selected[_findgroup(gd, idx.skip)] = false
selected[_findgroup(gd, idx.skip)] = false
return gd[selected]
end

# InvertedIndex wrapping array
function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where {E<:GroupIndexTypes,T<:AbstractVector{E}}
selected = trues(length(gd))
for i in idx.skip
selected[_findgroup(gd, i)] = false
end
return gd[selected]
end
Expand Down
24 changes: 22 additions & 2 deletions test/grouping.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1477,17 +1477,37 @@ end
expected = gd[[i != skip_i for i in 1:length(gd)]]

for skip in [skip_i, skip_key, Tuple(skip_key), NamedTuple(skip_key)]
@test gd[InvertedIndex(skip)] expected
@test gd[Not(skip)] expected
end

@test_throws ArgumentError gd[Not(true)] # Bool <: Integer, but should fail

# Inverted array index
skipped = [3, 5, 2]
skipped_bool = [i skipped for i in 1:length(gd)]
skipped_keys = keys(gd)[skipped]
expected2 = gd[.!skipped_bool]

for skip in [skipped, skipped_bool, skipped_keys, Tuple.(skipped_keys), NamedTuple.(skipped_keys)]
@test gd[InvertedIndex(skip)] expected2
@test gd[Not(skip)] expected2
end
end

@testset "GroupedDataFrame array index homogeneity" begin
df = DataFrame(a = repeat([:A, :B, missing], outer=4), b = repeat(1:2, inner=6), c = 1:12)
gd = groupby_checked(df, [:a, :b])

# All scalar index types
idxsets = [1:length(gd), keys(gd), Tuple.(keys(gd)), NamedTuple.(keys(gd))]

# Mixing index types should fail
for i in 1:length(idxsets)
idx1 = idxsets[i][1]
for j in (i+1):length(idxsets)
idx2 = idxsets[j][2]
a = [idx1, idx2]
@test_throws MethodError gd[a]
end
end
end

Expand Down

0 comments on commit 4039ee4

Please sign in to comment.