diff --git a/src/array.jl b/src/array.jl index 8580ef0..7a98cb5 100644 --- a/src/array.jl +++ b/src/array.jl @@ -1,6 +1,6 @@ ## Common code for CategoricalArray and NullableCategoricalArray -import Base: convert, copy, getindex, setindex!, similar, size, linearindexing +import Base: convert, copy, getindex, setindex!, similar, size, linearindexing, vcat # Used for keyword argument default value _ordered(x::AbstractCategoricalArray) = ordered(x) @@ -215,6 +215,14 @@ end R = reftype(length(index(A.pool))) convert($A{T, N, R}, A) end + + function vcat(A::$A...) + newlevels, isordered = mergelevels(map(levels, A)...) + + refs = [indexin(index(a.pool), newlevels)[a.refs] for a in A] + $A(DefaultRefType[refs...;], + CategoricalPool(newlevels, isordered && all(ordered, A))) + end end end @@ -351,3 +359,30 @@ function getindex(A::CategoricalArray, i::Int) end levels!(A::CategoricalArray, newlevels::Vector) = _levels!(A, newlevels) + +function mergelevels(levels...) + T = Base.promote_eltype(levels...) + res = Array{T}(0) + isordered = true + + for l in levels + levelsmap = indexin(l, res) + + isordered &= issorted(levelsmap[levelsmap.!=0]) + if !isordered + # Give up attempt to order res + append!(res, l[levelsmap.==0]) + else + i = length(res)+1 + for j = length(l):-1:1 + if levelsmap[j] == 0 + insert!(res, i, l[j]) + else + i = levelsmap[j] + end + end + end + end + + res, isordered +end diff --git a/test/11_array.jl b/test/11_array.jl index 8511181..279167d 100644 --- a/test/11_array.jl +++ b/test/11_array.jl @@ -576,6 +576,67 @@ for isordered in (false, true) @test x[1] === x.pool.valindex[3] @test x[2] === x.pool.valindex[1] @test levels(x) == ["c", "a", "b"] + + + # Tests of vcat of CategoricalArray + # Test that vcat of compact arrays use a reftype that doesn't overflow + a1 = 3:200 + a2 = 300:-1:100 + ca1 = CategoricalArray(a1) + ca2 = CategoricalArray(a2) + cca1 = compact(ca1) + cca2 = compact(ca2) + r = vcat(cca1, cca2) + @test r == vcat(a1, a2) + @test isa(cca1, CategoricalArray{Int,1,UInt8}) + @test isa(cca2, CategoricalArray{Int,1,UInt8}) + @test isa(r, CategoricalArray{Int,1,CategoricalArrays.DefaultRefType}) + @test isa(vcat(cca1, ca2), CategoricalArray{Int,1,CategoricalArrays.DefaultRefType}) + @test ordered(r) == false + @test levels(r) == collect(3:300) + + # Test vcat of multidimensional arrays + a1 = Array{Int}(2,3,4,5) + a2 = Array{Int}(3,3,4,5) + a1[1:end] = (length(a1):-1:1) + 2 + a2[1:end] = (1:length(a2)) + 10 + ca1 = CategoricalArray(a1) + ca2 = CategoricalArray(a2) + cca1 = compact(ca1) + cca2 = compact(ca2) + r = vcat(cca1, cca2) + @test r == vcat(a1, a2) + @test isa(r, CategoricalArray{Int,4,CategoricalArrays.DefaultRefType}) + @test ordered(r) == false + @test levels(r) == collect(3:length(a2)+10) + + # Test that sortedmerge handles mutually compatible ordering + @test CategoricalArrays.mergelevels([6,3,4,7],[2,3,5,4],[2,4,8]) == ([6,2,3,5,4,7,8],true) + @test CategoricalArrays.mergelevels([6,3,4,7],[2,3,6,5,4],[2,4,8]) == ([6,3,4,7,2,5,8],false) + + # Test concatenation of mutually compatible levels + a1 = ["Young", "Middle"] + a2 = ["Middle", "Old"] + ca1 = CategoricalArray(a1, ordered=true) + ca2 = CategoricalArray(a2, ordered=true) + levels!(ca1, ["Young", "Middle"]) + levels!(ca2, ["Middle", "Old"]) + r = vcat(ca1, ca2) + @test r == vcat(a1, a2) + @test levels(r) == ["Young", "Middle", "Old"] + @test ordered(r) == true + + # Test concatenation of conflicting ordering. This drops the ordering + a1 = ["Old", "Young", "Young"] + a2 = ["Old", "Young", "Middle", "Young"] + ca1 = CategoricalArray(a1, ordered=true) + ca2 = CategoricalArray(a2, ordered=true) + levels!(ca1, ["Young", "Middle", "Old"]) + # ca2 has another order + r = vcat(ca1, ca2) + @test r == vcat(a1, a2) + @test levels(r) == ["Young", "Middle", "Old"] + @test ordered(r) == false end end end