Skip to content

Commit

Permalink
Add unique! (#20619)
Browse files Browse the repository at this point in the history
  • Loading branch information
JackDevine authored and StefanKarpinski committed Jun 16, 2017
1 parent ae342ac commit ce3f853
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ Library improvements
* `logspace` now accepts a `base` keyword argument to specify the base of the logarithmic
range. The base defaults to 10 ([#22310]).

* Added `unique!` which is an inplace version of `unique` ([#20549]).

Compiler/Runtime improvements
-----------------------------

Expand Down
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@ export
symdiff,
union!,
union,
unique!,
unique,
values,
valtype,
Expand Down
88 changes: 88 additions & 0 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,94 @@ function unique(f::Callable, C)
out
end

# If A is not grouped, then we will need to keep track of all of the elements that we have
# seen so far.
function _unique!(A::AbstractVector)
seen = Set{eltype(A)}()
idxs = eachindex(A)
i = state = start(idxs)
for x in A
if x seen
push!(seen, x)
i, state = next(idxs, state)
A[i] = x
end
end
resize!(A, i - first(idxs) + 1)
end

# If A is grouped, so that each unique element is in a contiguous group, then we only
# need to keep track of one element at a time. We replace the elements of A with the
# unique elements that we see in the order that we see them. Once we have iterated
# through A, we resize A based on the number of unique elements that we see.
function _groupedunique!(A::AbstractVector)
isempty(A) && return A
idxs = eachindex(A)
y = first(A)
state = start(idxs)
i, state = next(idxs, state)
for x in A
if !isequal(x, y)
i, state = next(idxs, state)
y = A[i] = x
end
end
resize!(A, i - first(idxs) + 1)
end

"""
unique!(A::AbstractVector)
Remove duplicate items as determined by [`isequal`](@ref), then return the modified `A`.
`unique!` will return the elements of `A` in the order that they occur. If you do not care
about the order of the returned data, then calling `(sort!(A); unique!(A))` will be much
more efficient as long as the elements of `A` can be sorted.
```jldoctest
julia> unique!([1, 1, 1])
1-element Array{Int64,1}:
1
julia> A = [7, 3, 2, 3, 7, 5];
julia> unique!(A)
4-element Array{Int64,1}:
7
3
2
5
julia> B = [7, 6, 42, 6, 7, 42];
julia> sort!(B); # unique! is able to process sorted data much more efficiently.
julia> unique!(B)
3-element Array{Int64,1}:
6
7
42
```
"""
function unique!(A::Union{AbstractVector{<:Real}, AbstractVector{<:AbstractString},
AbstractVector{<:Symbol}})
if isempty(A)
return A
elseif issorted(A) || issorted(A, rev=true)
return _groupedunique!(A)
else
return _unique!(A)
end
end
# issorted fails for some element types, so the method above has to be restricted to
# elements with isless/< defined.
function unique!(A)
if isempty(A)
return A
else
return _unique!(A)
end
end

"""
allunique(itr) -> Bool
Expand Down
1 change: 1 addition & 0 deletions doc/src/stdlib/collections.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ Base.eltype
Base.indexin
Base.findin
Base.unique
Base.unique!
Base.allunique
Base.reduce(::Any, ::Any, ::Any)
Base.reduce(::Any, ::Any)
Expand Down
2 changes: 2 additions & 0 deletions test/TestHelpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ _offset(out, ::Tuple{}, ::Tuple{}) = out
indsoffset(r::Range) = first(r) - 1
indsoffset(i::Integer) = 0

Base.resize!(A::OffsetVector, nl::Integer) = (resize!(A.parent, nl); A)

end

end
38 changes: 38 additions & 0 deletions test/sets.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Set tests
isdefined(Main, :TestHelpers) || @eval Main include("TestHelpers.jl")
using TestHelpers.OAs

# Construction, collect
@test ===(typeof(Set([1,2,3])), Set{Int})
Expand Down Expand Up @@ -221,6 +223,42 @@ u = unique([1,1,2])
@test @inferred(unique(x for x in 1:1)) == [1]
@test unique(x for x in Any[1,1.0])::Vector{Real} == [1]

# unique!
@testset "unique!" begin
u = [1,1,3,2,1]
unique!(u)
@test u == [1,3,2]
@test unique!([]) == []
@test unique!(Float64[]) == Float64[]
u = [1,2,2,3,5,5]
@test unique!(u) === u
@test u == [1,2,3,5]
u = [6,5,5,3,3,2,1]
@test unique!(u) === u
@test u == [6,5,3,2,1]
u = OffsetArray([1,2,2,3,5,5], -1)
@test unique!(u) === u
@test u == OffsetArray([1,2,3,5], -1)
u = OffsetArray([5,5,4,4,2,2,0,-1,-1], -1)
@test unique!(u) === u
@test u == OffsetArray([5,4,2,0,-1], -1)
u = OffsetArray(["w","we","w",5,"r",5,5], -1)
@test unique!(u) === u
@test u == OffsetArray(["w","we",5,"r"], -1)
u = [0.0,-0.0,1.0,2]
@test unique!(u) === u
@test u == [0.0,-0.0,1.0,2.0]
u = [1,NaN,NaN,3]
@test unique!(u) === u
@test u[1] == 1
@test isnan(u[2])
@test u[3] == 3
u = [5,"w","we","w","r",5,"w"]
unique!(u)
@test u == [5,"w","we","r"]
u = [1,2,5,1,3,2]
end

# allunique
@test allunique([])
@test allunique(Set())
Expand Down

0 comments on commit ce3f853

Please sign in to comment.