Enforce correct eltype of arrays used to index GroupedDataFrame

JuliaData · Dec 9, 2019 · 4039ee4 · 4039ee4
1 parent b97ec42
commit 4039ee4
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 9 deletions.
diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md
@@ -226,7 +226,7 @@ The elements of a `GroupedDataFrame` are [`SubDataFrame`](@ref)s of its parent.
   array of `Integer`s or `Bool`s, similar to a standard array. Alternatively the
   array may contain keys of any of the types supported for dictionary-like
   indexing (`GroupKey`, `Tuple`, or `NamedTuple`). Selected groups must be
-  unique.
+  unique, and different types of indices cannot be mixed.
 * `gd[n::Not]` -> Any of the above types wrapped in [`Not`](@ref). The result
    will be a new `GroupedDataFrame` containing all groups in `gd` *not* selected
    by the wrapped index.
diff --git a/src/groupeddataframe/grouping.jl b/src/groupeddataframe/grouping.jl
@@ -415,6 +415,8 @@ Base.keys(gd::GroupedDataFrame) = GroupKeys(gd)
 
 # The allowed key types for dictionary-like indexing
 const GroupKeyTypes = Union{GroupKey, Tuple, NamedTuple}
+# All allowed scalar index types
+const GroupIndexTypes = Union{Integer, GroupKeyTypes}
 
 
 # Find the integer index of a group given any supported scalar index type
@@ -445,13 +447,18 @@ Base.getindex(gd::GroupedDataFrame, key::GroupKeyTypes) = gd[_findgroup(gd, key)
 # Array indexing with dictionary-like keys
 Base.getindex(gd::GroupedDataFrame, idxs::AbstractVector{T}) where {T<:GroupKeyTypes} = gd[[_findgroup(gd, k) for k in idxs]]
 
-# InvertedIndex
-function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where T
+# InvertedIndex wrapping scalar index
+function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where {T<:GroupIndexTypes}
     selected = trues(length(gd))
-    if T <: AbstractVector
-        selected[[_findgroup(gd, k) for k in idx.skip]] .= false
-    else
-        selected[_findgroup(gd, idx.skip)] = false
+    selected[_findgroup(gd, idx.skip)] = false
+    return gd[selected]
+end
+
+# InvertedIndex wrapping array
+function Base.getindex(gd::GroupedDataFrame, idx::InvertedIndex{T}) where {E<:GroupIndexTypes,T<:AbstractVector{E}}
+    selected = trues(length(gd))
+    for i in idx.skip
+        selected[_findgroup(gd, i)] = false
     end
     return gd[selected]
 end

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -1477,17 +1477,37 @@ end
     expected = gd[[i != skip_i for i in 1:length(gd)]]
 
     for skip in [skip_i, skip_key, Tuple(skip_key), NamedTuple(skip_key)]
-        @test gd[InvertedIndex(skip)] ≅ expected
+        @test gd[Not(skip)] ≅ expected
     end
 
+    @test_throws ArgumentError gd[Not(true)]  # Bool <: Integer, but should fail
+
     # Inverted array index
     skipped = [3, 5, 2]
     skipped_bool = [i ∈ skipped for i in 1:length(gd)]
     skipped_keys = keys(gd)[skipped]
     expected2 = gd[.!skipped_bool]
 
     for skip in [skipped, skipped_bool, skipped_keys, Tuple.(skipped_keys), NamedTuple.(skipped_keys)]
-        @test gd[InvertedIndex(skip)] ≅ expected2
+        @test gd[Not(skip)] ≅ expected2
+    end
+end
+
+@testset "GroupedDataFrame array index homogeneity" begin
+    df = DataFrame(a = repeat([:A, :B, missing], outer=4), b = repeat(1:2, inner=6), c = 1:12)
+    gd = groupby_checked(df, [:a, :b])
+
+    # All scalar index types
+    idxsets = [1:length(gd), keys(gd), Tuple.(keys(gd)), NamedTuple.(keys(gd))]
+
+    # Mixing index types should fail
+    for i in 1:length(idxsets)
+        idx1 = idxsets[i][1]
+        for j in (i+1):length(idxsets)
+            idx2 = idxsets[j][2]
+            a = [idx1, idx2]
+            @test_throws MethodError gd[a]
+        end
     end
 end