Skip to content

Commit

Permalink
sortedmerge
Browse files Browse the repository at this point in the history
  • Loading branch information
Johan Gustafsson committed Sep 26, 2016
1 parent 8654b6d commit 610b701
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 43 deletions.
56 changes: 33 additions & 23 deletions src/array.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Common code for CategoricalArray and NullableCategoricalArray

import Base: convert, copy, getindex, setindex!, similar, size, linearindexing
import Base: convert, copy, getindex, setindex!, similar, size, linearindexing, vcat

# Used for keyword argument default value
_ordered(x::AbstractCategoricalArray) = ordered(x)
Expand Down Expand Up @@ -213,29 +213,11 @@ end
convert($A{T, N, R}, A)
end

function Base.vcat{T,N,R}(A1::$A{T, N, R}, An::$A...)
As = (A1, An...)
levels = unique(T[[a.pool.levels for a in As]...;])
idx = [indexin(a.pool.index, levels) for a in As]
function vcat(A::$A...)
L, O = sortedmerge(map(levels, A)...)

ordered = A1.pool.ordered
if ordered && levels != A1.pool.levels
warn("Failed to preserve order of levels. Define all levels in the first argument.")
ordered = false
else
for (i,a) in zip(idx,As)
if a.pool.ordered
if !issorted(i[Base.invperm(a.pool.order)])
warn("Failed to preserve order of levels. The first argument defines the levels and their order.")
ordered = false
break
end
end
end
end

refs = DefaultRefType[[i[a.refs] for (i,a) in zip(idx,As)]...;]
$A(refs, CategoricalPool(levels, ordered))
refs = DefaultRefType[[indexin(index(a.pool), L)[a.refs] for a in A]...;]
$A(refs, CategoricalPool(L, O && all(ordered, A)))
end
end
end
Expand Down Expand Up @@ -350,3 +332,31 @@ function getindex(A::CategoricalArray, i::Int)
end

levels!(A::CategoricalArray, newlevels::Vector) = _levels!(A, newlevels)


function sortedmerge(A...)
T = Base.promote_eltype(A...)
m = Array{T}(0)
ordered = true

for a in A
i = indexin(a, m)

ordered &= issorted(i[i.!=0])
if !ordered
append!(m, a[i.==0])
continue
end

x = length(m)+1
for j = length(i):-1:1
if i[j] == 0
insert!(m, x, a[j])
else
x = i[j]
end
end
end

m, ordered
end
37 changes: 17 additions & 20 deletions test/11_array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -546,35 +546,32 @@ for isordered in (false, true)
@test r == vcat(a1, a2)
@test isa(r, CategoricalArray{Int,4,CategoricalArrays.DefaultRefType})

# All levels has to be present in the first argument to vcat to preserve ordering
a1 = ["Old", "Young", "Young"]
a2 = ["Old", "Young", "Middle", "Young"]
# Test that sortedmerge handles mutually compatible ordering
@test CategoricalArrays.sortedmerge([6,3,4,7],[2,3,5,4]) == ([6,2,3,5,4,7],true)

# Test concatenation of mutually compatible levels
a1 = ["Young", "Middle"]
a2 = ["Middle", "Old"]
ca1 = CategoricalArray(a1, ordered=true)
ca2 = CategoricalArray(a2)
levels!(ca1, ["Young", "Middle", "Old"])
ca2 = CategoricalArray(a2, ordered=true)
levels!(ca1, ["Young", "Middle"])
levels!(ca2, ["Middle", "Old"])
r = vcat(ca1, ca2)
@test r == vcat(a1, a2)
@test isa(r, CategoricalArray{ASCIIString,1,CategoricalArrays.DefaultRefType})
@test levels(r) == ["Young", "Middle", "Old"]
@test ordered(r) == true

#=
# Test concatenation of ambiguous ordering. This prints a warning about
# mixing ordering and returns a categorical array with ordered=false.
levels!(ca1, ["Young", "Old"])
levels!(ca2, ["Old", "Young", "Middle"])
ordered!(ca1,true)
ordered!(ca2,true)
println("Expect warning: Failed to preserve order of levels. Define all levels in the first argument.")
# Test concatenation of ambiguous ordering. This drops the ordering
a1 = ["Old", "Young", "Young"]
a2 = ["Old", "Young", "Middle", "Young"]
ca1 = CategoricalArray(a1, ordered=true)
ca2 = CategoricalArray(a2, ordered=true)
levels!(ca1, ["Young", "Middle", "Old"])
# ca2 has another order
r = vcat(ca1, ca2)
@test r == vcat(a1, a2)
@test levels(r) == ["Young", "Middle", "Old"]
@test ordered(r) == false
println("Expect warning: Failed to preserve order of levels. The first argument defines the levels and their order.")
r = vcat(ca2, ca1)
@test r == vcat(a2, a1)
@test ordered(r) == false
=#
end
end
end
Expand Down

0 comments on commit 610b701

Please sign in to comment.