From 2580d5687ca4930c1043a552b3ed94d330ae09c4 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Sat, 25 Nov 2023 02:11:40 +0100 Subject: [PATCH] Document and export `Base.in!` (#51636) I think `in!` is a useful general function for users, and would be good to have as official API. Its semantics is clear and unambiguous, while providing a clear performance advantage over the naive implementation. For more evidence that this functionality is useful, consider: * Rust's `HashSet::insert` works just like this implementation of `in!` * This function was already used in the implementation of `Base.unique`, precisely for the performance over the naive approach Comes from #45156 with some initial discussion. --- NEWS.md | 1 + base/exports.jl | 1 + base/set.jl | 42 +++++++++++++++++++++++++++++++++---- doc/src/base/collections.md | 1 + test/sets.jl | 25 ++++++++++++++++++++++ 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index c9b968da3c1e0..30a4c1af21d89 100644 --- a/NEWS.md +++ b/NEWS.md @@ -54,6 +54,7 @@ Build system changes New library functions --------------------- +* `in!(x, s::AbstractSet)` will return whether `x` is in `s`, and insert `x` in `s` if not. * The new `Libc.mkfifo` function wraps the `mkfifo` C function on Unix platforms ([#34587]). * `hardlink(src, dst)` can be used to create hard links ([#41639]). * `diskstat(path=pwd())` can be used to return statistics about the disk ([#42248]). diff --git a/base/exports.jl b/base/exports.jl index b6f7ea0d6ad35..398d828f9cf19 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -532,6 +532,7 @@ export getkey, haskey, in, + in!, intersect!, intersect, isdisjoint, diff --git a/base/set.jl b/base/set.jl index 489673945ea1f..07c857fab3213 100644 --- a/base/set.jl +++ b/base/set.jl @@ -91,12 +91,46 @@ isempty(s::Set) = isempty(s.dict) length(s::Set) = length(s.dict) in(x, s::Set) = haskey(s.dict, x) -# This avoids hashing and probing twice and it works the same as -# in!(x, s::Set) = in(x, s) ? true : (push!(s, x); false) +""" + in!(x, s::AbstractSet) -> Bool + +If `x` is in `s`, return `true`. If not, push `x` into `s` and return `false`. +This is equivalent to `in(x, s) ? true : (push!(s, x); false)`, but may have a +more efficient implementation. + +See also: [`in`](@ref), [`push!`](@ref), [`Set`](@ref) + +!!! compat "Julia 1.11" + This function requires at least 1.11. + +# Examples +```jldoctest; filter = r"^ [1234]\$" +julia> s = Set{Any}([1, 2, 3]); in!(4, s) +false + +julia> length(s) +4 + +julia> in!(0x04, s) +true + +julia> s +Set{Any} with 4 elements: + 4 + 2 + 3 + 1 +``` +""" +function in!(x, s::AbstractSet) + x ∈ s ? true : (push!(s, x); false) +end + function in!(x, s::Set) - idx, sh = ht_keyindex2_shorthash!(s.dict, x) + xT = convert(eltype(s), x) + idx, sh = ht_keyindex2_shorthash!(s.dict, xT) idx > 0 && return true - _setindex!(s.dict, nothing, x, -idx, sh) + _setindex!(s.dict, nothing, xT, -idx, sh) return false end diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md index a8f2bdc6b7d7d..23cb8d6dab7c2 100644 --- a/doc/src/base/collections.md +++ b/doc/src/base/collections.md @@ -255,6 +255,7 @@ Base.symdiff Base.symdiff! Base.intersect! Base.issubset +Base.in! Base.:⊈ Base.:⊊ Base.issetequal diff --git a/test/sets.jl b/test/sets.jl index 19ccf3bf3701e..2e7e19c5c1cdf 100644 --- a/test/sets.jl +++ b/test/sets.jl @@ -133,6 +133,31 @@ end @test pop!(s, 4) === 4.0 @test_throws KeyError pop!(s, 5) end + +@testset "in!" begin + s = Set() + @test !(in!(0x01, s)) + @test !(in!(Int32(2), s)) + @test in!(1, s) + @test in!(2.0, s) + (a, b, c...) = sort!(collect(s)) + @test a === 0x01 + @test b === Int32(2) + @test isempty(c) + + # in! will convert to the right type automatically + s = Set{Int32}() + @test !(in!(1, s)) + @test only(s) === Int32(1) + @test_throws Exception in!("hello", s) + + # Other set types + s = BitSet() + @test !(in!(13, s)) + @test in!(UInt16(13), s) + @test only(s) === 13 +end + @testset "copy" begin data_in = (1,2,9,8,4) s = Set(data_in)