Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve performance of searchsorted(range, lt=<) #50365

Merged
merged 10 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions base/ordering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,18 +125,15 @@ lt(o::Lt, a, b) = o.lt(a,b)
(lt(p.order, da, db)::Bool) | (!(lt(p.order, db, da)::Bool) & (a < b))
end

_ord(lt::typeof(isless), by::typeof(identity), order::Ordering) = order
_ord(lt::typeof(isless), by, order::Ordering) = By(by, order)

function _ord(lt, by, order::Ordering)
if order === Forward
return Lt((x, y) -> lt(by(x), by(y)))
elseif order === Reverse
return Lt((x, y) -> lt(by(y), by(x)))
else
error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
end
end

_ord(lt::typeof(isless), by, order::Ordering) = _by(by, order)
_ord(lt::typeof(isless), by, order::ForwardOrdering) = _by(by, order) # disambiguation
_ord(lt::typeof(isless), by, order::ReverseOrdering{ForwardOrdering}) = _by(by, order) # disambiguation
_ord(lt, by, order::ForwardOrdering) = _by(by, Lt(lt))
_ord(lt, by, order::ReverseOrdering{ForwardOrdering}) = reverse(_by(by, Lt(lt)))
_ord(lt, by, order::Ordering) = error("Passing both lt= and order= arguments is ambiguous; please pass order=Forward or order=Reverse (or leave default)")
_by(by, order::Ordering) = By(by, order)
_by(::typeof(identity), order::Ordering) = order

"""
ord(lt, by, rev::Union{Bool, Nothing}, order::Ordering=Forward)
Expand Down
17 changes: 10 additions & 7 deletions base/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,10 @@ function searchsorted(v::AbstractVector, x, ilo::T, ihi::T, o::Ordering)::UnitRa
return (lo + 1) : (hi - 1)
end

function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)

const FastRangeOrderings = Union{DirectOrdering,Lt{typeof(<)},ReverseOrdering{Lt{typeof(<)}}}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you need a Union like DirectOrdering, usually that is a sign that something is designed poorly, and a bit buggy in edge case. But it appears that all of these types in the Union are concrete, so it is not huge and much less likely to be buggy, but still a little bit concerning design-wise. Is this Union likely to need to grow in the future, and if so, is there something better?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DirectOrdering already exists and was used in this function signature before. Here, I extended it with two more concrete types.
DirectOrdering is also user for fastpath in sort, with missings and with floats/integers. Sorting missings doesn't play nicely with < anyway, and < on floats is complicated by NaNs, so I guess these two unions will remain different.

I don't foresee any extension to this union. In principle, stuff like isgreater and > could also be supported, but there's no need given the rev argument.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This type gates dispatch to an optimization that relies on a monotonic order over the real numbers. It could be extended to be arbitrarily large to e.g. include By{typeof(atan), ForwardOrdering}.

This pr optimizes
searchsorted(range, lt=<)
That further extension would optimize
searchsorted(range, by=atan)

A more general solution would be an ismonotonic trait.

All this is a tradeoff between runtime performance on easy edge cases vs. compile times and sysimg size. It's unfortunate that it is so difficult to instrument compile time and sysimg size regressions.


function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
require_one_based_indexing(a)
f, h, l = first(a), step(a), last(a)
if lt(o, x, f)
Expand All @@ -238,7 +241,7 @@ function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering):
end
end

function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings)::keytype(a)
require_one_based_indexing(a)
f, h, l = first(a), step(a), last(a)
if !lt(o, f, x)
Expand All @@ -251,39 +254,39 @@ function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)
end
end

function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
require_one_based_indexing(a)
f, h, l = first(a), step(a), last(a)
if lt(o, x, f)
0
elseif h == 0 || !lt(o, x, l)
length(a)
else
if o isa ForwardOrdering
if !(o isa ReverseOrdering)
fld(floor(Integer, x) - f, h) + 1
else
fld(ceil(Integer, x) - f, h) + 1
end
end
end

function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::FastRangeOrderings)::keytype(a)
require_one_based_indexing(a)
f, h, l = first(a), step(a), last(a)
if !lt(o, f, x)
1
elseif h == 0 || lt(o, l, x)
length(a) + 1
else
if o isa ForwardOrdering
if !(o isa ReverseOrdering)
cld(ceil(Integer, x) - f, h) + 1
else
cld(floor(Integer, x) - f, h) + 1
end
end
end

searchsorted(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering) =
searchsorted(a::AbstractRange{<:Real}, x::Real, o::FastRangeOrderings) =
searchsortedfirst(a, x, o) : searchsortedlast(a, x, o)

for s in [:searchsortedfirst, :searchsortedlast, :searchsorted]
Expand Down
25 changes: 18 additions & 7 deletions test/ordering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@

using Test

import Base.Order: Forward, Reverse
import Base.Order: Forward, Reverse, ord, Lt, By, ReverseOrdering

# every argument can flip the integer order by passing the right value. Here,
# we enumerate a few of these combinations and check that all these flips
# compound so that in total we either have an increasing or decreasing sort.
for (s1, rev) in enumerate([true, false])
for (s2, lt) in enumerate([>, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
for (s2, lt) in enumerate([(a, b)->isless(b, a), isless, >, <, (a, b) -> a - b > 0, (a, b) -> a - b < 0])
for (s3, by) in enumerate([-, +])
for (s4, order) in enumerate([Reverse, Forward])
if iseven(s1 + s2 + s3 + s4)
target = [1, 2, 3]
else
target = [3, 2, 1]
end
is_fwd = iseven(s1 + s2 + s3 + s4)
target = is_fwd ? (1:3) : (3:-1:1)
# arrays, integer and float ranges sometimes have different code paths
@test target == sort([2, 3, 1], rev=rev, lt=lt, by=by, order=order)

@test target == sort(1:3, rev=rev, lt=lt, by=by, order=order)
@test target == sort(3:-1:1, rev=rev, lt=lt, by=by, order=order)
@test float(target) == sort(1.0:3, rev=rev, lt=lt, by=by, order=order)
@test float(target) == sort(3.0:-1:1, rev=rev, lt=lt, by=by, order=order)
end
end
end
Expand All @@ -40,3 +43,11 @@ struct SomeOtherOrder <: Base.Order.Ordering end

@test reverse(Forward) === Reverse
@test reverse(Reverse) === Forward

@test ord(isless, identity, false, Forward) === Forward
@test ord(isless, identity, true, Forward) === Reverse
@test ord(<, identity, false, Forward) === Lt(<)
@test ord(isless, abs, false, Forward) === By(abs)
@test ord(<, abs, false, Forward) === By(abs, Lt(<))
@test ord(<, abs, true, Forward) === ReverseOrdering(By(abs, Lt(<)))
@test ord(<, abs, true, Reverse) === By(abs, Lt(<))
10 changes: 10 additions & 0 deletions test/sorting.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1225,6 +1225,16 @@ end
@test searchsorted(v, 0.1, rev=true) === 4:3
end
end

@testset "ranges issue #44102, PR #50365" begin
# range sorting test for different Ordering parameter combinations
@test searchsorted(-1000.0:1:1000, -0.0) === 1001:1000
@test searchsorted(-1000.0:1:1000, -0.0; lt=<) === 1001:1001
@test searchsorted(-1000.0:1:1000, -0.0; lt=<, by=x->x) === 1001:1001
@test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, by=-) === 1001:1001
@test searchsorted(reverse(-1000.0:1:1000), -0.0, rev=true) === 1002:1001
@test searchsorted(reverse(-1000.0:1:1000), -0.0; lt=<, rev=true) === 1001:1001
end
end
# The "searchsorted" testset is at the end of the file because it is slow.

Expand Down