Skip to content

Commit

Permalink
Merge pull request #24673 from JuliaLang/nl/search
Browse files Browse the repository at this point in the history
Clean up search and find API
  • Loading branch information
nalimilan committed Jan 4, 2018
2 parents 236d190 + ff8b17d commit 1950086
Show file tree
Hide file tree
Showing 63 changed files with 808 additions and 803 deletions.
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,14 @@ Deprecated or removed
in favor of dot overloading (`getproperty`) so factors should now be accessed as e.g.
`F.Q` instead of `F[:Q]` ([#25184]).

* `search` and `rsearch` have been deprecated in favor of `findfirst`/`findnext` and
`findlast`/`findprev` respectively, in combination with the new `equalto` and `occursin`
predicates for some methods ([#24673]

* `ismatch(regex, str)` has been deprecated in favor of `contains(str, regex)` ([#24673]).

* `findin(a, b)` has been deprecated in favor of `find(occursin(b), a)` ([#24673]).

Command-line option changes
---------------------------

Expand Down
6 changes: 3 additions & 3 deletions base/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ get(A::AbstractArray, I::Dims, default) = checkbounds(Bool, A, I...) ? A[I...] :

function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
# 1d is not linear indexing
ind = findin(I, indices1(A))
ind = find(occursin(indices1(A)), I)
X[ind] = A[I[ind]]
Xind = indices1(X)
X[first(Xind):first(ind)-1] = default
Expand All @@ -1064,7 +1064,7 @@ function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,Ab
end
function get!(X::AbstractArray{T}, A::AbstractArray, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
# Linear indexing
ind = findin(I, 1:length(A))
ind = find(occursin(1:length(A)), I)
X[ind] = A[I[ind]]
X[1:first(ind)-1] = default
X[last(ind)+1:length(X)] = default
Expand Down Expand Up @@ -1237,7 +1237,7 @@ _cs(d, a, b) = (a == b ? a : throw(DimensionMismatch(
"mismatch in dimension $d (expected $a got $b)")))

dims2cat(::Val{n}) where {n} = ntuple(i -> (i == n), Val(n))
dims2cat(dims) = ntuple(i -> (i in dims), maximum(dims))
dims2cat(dims) = ntuple(occursin(dims), maximum(dims))

cat(dims, X...) = cat_t(dims, promote_eltypeof(X...), X...)

Expand Down
47 changes: 11 additions & 36 deletions base/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1799,6 +1799,7 @@ end

find(x::Bool) = x ? [1] : Vector{Int}()
find(testf::Function, x::Number) = !testf(x) ? Vector{Int}() : [1]
find(p::OccursIn, x::Number) = x in p.x ? Vector{Int}() : [1]

"""
findnz(A)
Expand Down Expand Up @@ -2008,7 +2009,7 @@ function _findin(a, b)
ind
end

# If two collections are already sorted, findin can be computed with
# If two collections are already sorted, _findin can be computed with
# a single traversal of the two collections. This is much faster than
# using a hash table (although it has the same complexity).
function _sortedfindin(v, w)
Expand Down Expand Up @@ -2050,42 +2051,16 @@ function _sortedfindin(v, w)
return out
end

"""
findin(a, b)
Return the indices of elements in collection `a` that appear in collection `b`.
# Examples
```jldoctest
julia> a = collect(1:3:15)
5-element Array{Int64,1}:
1
4
7
10
13
julia> b = collect(2:4:10)
3-element Array{Int64,1}:
2
6
10
julia> findin(a,b) # 10 is the only common element
1-element Array{Int64,1}:
4
```
"""
function findin(a::Array{<:Real}, b::Union{Array{<:Real},Real})
if issorted(a, Sort.Forward) && issorted(b, Sort.Forward)
return _sortedfindin(a, b)
function find(pred::OccursIn{<:Union{Array{<:Real},Real}}, x::Array{<:Real})
if issorted(x, Sort.Forward) && issorted(pred.x, Sort.Forward)
return _sortedfindin(x, pred.x)
else
return _findin(a, b)
return _findin(x, pred.x)
end
end
# issorted fails for some element types so the method above has to be restricted
# to element with isless/< defined.
findin(a, b) = _findin(a, b)
find(pred::OccursIn, x::Union{AbstractArray, Tuple}) = _findin(x, pred.x)

# Copying subregions
function indcopy(sz::Dims, I::Vector)
Expand All @@ -2094,8 +2069,8 @@ function indcopy(sz::Dims, I::Vector)
for i = n+1:length(sz)
s *= sz[i]
end
dst = eltype(I)[findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
src = eltype(I)[I[i][findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
dst = eltype(I)[_findin(I[i], i < n ? (1:sz[i]) : (1:s)) for i = 1:n]
src = eltype(I)[I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))] for i = 1:n]
dst, src
end

Expand All @@ -2105,8 +2080,8 @@ function indcopy(sz::Dims, I::Tuple{Vararg{RangeIndex}})
for i = n+1:length(sz)
s *= sz[i]
end
dst::typeof(I) = ntuple(i-> findin(I[i], i < n ? (1:sz[i]) : (1:s)), n)::typeof(I)
src::typeof(I) = ntuple(i-> I[i][findin(I[i], i < n ? (1:sz[i]) : (1:s))], n)::typeof(I)
dst::typeof(I) = ntuple(i-> _findin(I[i], i < n ? (1:sz[i]) : (1:s)), n)::typeof(I)
src::typeof(I) = ntuple(i-> I[i][_findin(I[i], i < n ? (1:sz[i]) : (1:s))], n)::typeof(I)
dst, src
end

Expand Down
53 changes: 51 additions & 2 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3813,16 +3813,65 @@ end
@deprecate getq(F::Factorization) F.Q
end

# issue #5290
@deprecate lexcmp(x::AbstractArray, y::AbstractArray) cmp(x, y)
@deprecate lexcmp(x::Real, y::Real) cmp(isless, x, y)
@deprecate lexcmp(x::Complex, y::Complex) cmp((real(x),imag(x)), (real(y),imag(y)))
@deprecate lexcmp(x, y) cmp(x, y)

@deprecate lexless isless

# END 0.7 deprecations
@deprecate search(str::Union{String,SubString}, re::Regex, idx::Integer) findnext(re, str, idx)
@deprecate search(s::AbstractString, r::Regex, idx::Integer) findnext(r, s, idx)
@deprecate search(s::AbstractString, r::Regex) findfirst(r, s)
@deprecate search(s::AbstractString, c::Char, i::Integer) findnext(equalto(c), s, i)
@deprecate search(s::AbstractString, c::Char) findfirst(equalto(c), s)
@deprecate search(a::ByteArray, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), a, i)
@deprecate search(a::ByteArray, b::Union{Int8,UInt8}) findfirst(equalto(b), a)
@deprecate search(a::String, b::Union{Int8,UInt8}, i::Integer) findnext(equalto(b), unsafe_wrap(Vector{UInt8}, a), i)
@deprecate search(a::String, b::Union{Int8,UInt8}) findfirst(equalto(b), unsafe_wrap(Vector{UInt8}, a))
@deprecate search(a::ByteArray, b::Char, i::Integer) findnext(equalto(UInt8(b)), a, i)
@deprecate search(a::ByteArray, b::Char) findfirst(equalto(UInt8(b)), a)

@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findnext(occursin(c), s, i)
@deprecate search(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findfirst(occursin(c), s)
@deprecate search(s::AbstractString, t::AbstractString, i::Integer) findnext(t, s, i)
@deprecate search(s::AbstractString, t::AbstractString) findfirst(t, s)

@deprecate search(buf::IOBuffer, delim::UInt8) findfirst(equalto(delim), buf)
@deprecate search(buf::Base.GenericIOBuffer, delim::UInt8) findfirst(equalto(delim), buf)

@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}, i::Integer) findprev(occursin(c), s, i)
@deprecate rsearch(s::AbstractString, c::Union{Tuple{Vararg{Char}},AbstractVector{Char},Set{Char}}) findlast(occursin(c), s)
@deprecate rsearch(s::AbstractString, t::AbstractString, i::Integer) findprev(t, s, i)
@deprecate rsearch(s::AbstractString, t::AbstractString) findlast(t, s)
@deprecate rsearch(s::ByteArray, t::ByteArray, i::Integer) findprev(t, s, i)
@deprecate rsearch(s::ByteArray, t::ByteArray) findlast(t, s)

@deprecate rsearch(str::Union{String,SubString}, re::Regex, idx::Integer) findprev(re, str, idx)
@deprecate rsearch(str::Union{String,SubString}, re::Regex) findlast(re, str)
@deprecate rsearch(s::AbstractString, r::Regex, idx::Integer) findprev(r, s, idx)
@deprecate rsearch(s::AbstractString, r::Regex) findlast(r, s)
@deprecate rsearch(s::AbstractString, c::Char, i::Integer) findprev(equalto(c), s, i)
@deprecate rsearch(s::AbstractString, c::Char) findlast(equalto(c), s)
@deprecate rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(b), a, i)
@deprecate rsearch(a::String, b::Union{Int8,UInt8}, i::Integer = endof(a)) findprev(equalto(Char(b)), a, i)
@deprecate rsearch(a::ByteArray, b::Char, i::Integer = endof(a)) findprev(equalto(UInt8(b)), a, i)

@deprecate searchindex(s::AbstractString, t::AbstractString) first(findfirst(t, s))
@deprecate searchindex(s::AbstractString, t::AbstractString, i::Integer) first(findnext(t, s, i))
@deprecate rsearchindex(s::AbstractString, t::AbstractString) first(findlast(t, s))
@deprecate rsearchindex(s::AbstractString, t::AbstractString, i::Integer) first(findprev(t, s, i))

@deprecate searchindex(s::AbstractString, c::Char) first(findfirst(equalto(c), s))
@deprecate searchindex(s::AbstractString, c::Char, i::Integer) first(findnext(equalto(c), s, i))
@deprecate rsearchindex(s::AbstractString, c::Char) first(findlast(equalto(c), s))
@deprecate rsearchindex(s::AbstractString, c::Char, i::Integer) first(findprev(equalto(c), s, i))

@deprecate ismatch(r::Regex, s::AbstractString) contains(s, r)

@deprecate findin(a, b) find(occursin(b), a)

# END 0.7 deprecations
# BEGIN 1.0 deprecations

# END 1.0 deprecations
4 changes: 2 additions & 2 deletions base/docs/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ const builtins = ["abstract type", "baremodule", "begin", "break",

moduleusings(mod) = ccall(:jl_module_usings, Any, (Any,), mod)

filtervalid(names) = filter(x->!ismatch(r"#", x), map(string, names))
filtervalid(names) = filter(x->!contains(x, r"#"), map(string, names))

accessible(mod::Module) =
[filter!(s -> !Base.isdeprecated(mod, s), names(mod, true, true));
Expand All @@ -363,7 +363,7 @@ completions(name::Symbol) = completions(string(name))
# Searching and apropos

# Docsearch simply returns true or false if an object contains the given needle
docsearch(haystack::AbstractString, needle) = !isempty(search(haystack, needle))
docsearch(haystack::AbstractString, needle) = !isempty(findfirst(needle, haystack))
docsearch(haystack::Symbol, needle) = docsearch(string(haystack), needle)
docsearch(::Nothing, needle) = false
function docsearch(haystack::Array, needle)
Expand Down
51 changes: 24 additions & 27 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -409,18 +409,6 @@ export
extrema,
fill!,
fill,
find,
findfirst,
findlast,
findin,
findmax,
findmin,
findmin!,
findmax!,
findn,
findnext,
findprev,
findnz,
first,
flipdim,
hcat,
Expand Down Expand Up @@ -476,9 +464,6 @@ export
rot180,
rotl90,
rotr90,
searchsorted,
searchsortedfirst,
searchsortedlast,
shuffle,
shuffle!,
size,
Expand All @@ -501,6 +486,30 @@ export
view,
zeros,

# search, find, match and related functions
contains,
eachmatch,
endswith,
equalto,
find,
findfirst,
findlast,
findmax,
findmin,
findmin!,
findmax!,
findn,
findnext,
findprev,
findnz,
occursin,
match,
matchall,
searchsorted,
searchsortedfirst,
searchsortedlast,
startswith,

# linear algebra
bkfact!,
bkfact,
Expand Down Expand Up @@ -611,7 +620,6 @@ export
any!,
any,
collect,
contains,
count,
delete!,
deleteat!,
Expand Down Expand Up @@ -679,7 +687,6 @@ export
# strings and text output
ascii,
base,
startswith,
bin,
bitstring,
bytes2hex,
Expand All @@ -691,22 +698,17 @@ export
digits,
digits!,
dump,
eachmatch,
endswith,
escape_string,
hex,
hex2bytes,
hex2bytes!,
info,
isascii,
ismatch,
isvalid,
join,
logging,
lpad,
lstrip,
match,
matchall,
ncodeunits,
ndigits,
nextind,
Expand All @@ -723,12 +725,8 @@ export
repr,
reverseind,
rpad,
rsearch,
rsearchindex,
rsplit,
rstrip,
search,
searchindex,
show,
showcompact,
showerror,
Expand Down Expand Up @@ -800,7 +798,6 @@ export
identity,
isbits,
isequal,
equalto,
isimmutable,
isless,
ifelse,
Expand Down
6 changes: 3 additions & 3 deletions base/interactiveutil.jl
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ function versioninfo(io::IO=STDOUT; verbose::Bool=false, packages::Bool=false)

println(io, "Environment:")
for (k,v) in ENV
if ismatch(r"JULIA", String(k))
if contains(String(k), r"JULIA")
println(io, " $(k) = $(v)")
end
end
if verbose
for (k,v) in ENV
if ismatch(r"PATH|FLAG|^TERM$|HOME", String(k))
if contains(String(k), r"PATH|FLAG|^TERM$|HOME")
println(io, " $(k) = $(v)")
end
end
Expand Down Expand Up @@ -743,7 +743,7 @@ function varinfo(m::Module=Main, pattern::Regex=r"")
(value (Base, Main, Core) ? "" : format_bytes(summarysize(value))),
summary(value)]
end
for v in sort!(names(m)) if isdefined(m, v) && ismatch(pattern, string(v)) ]
for v in sort!(names(m)) if isdefined(m, v) && contains(string(v), pattern) ]

pushfirst!(rows, Any["name", "size", "summary"])

Expand Down
8 changes: 4 additions & 4 deletions base/iobuffer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -426,18 +426,18 @@ read(io::GenericIOBuffer) = read!(io,StringVector(nb_available(io)))
readavailable(io::GenericIOBuffer) = read(io)
read(io::GenericIOBuffer, nb::Integer) = read!(io,StringVector(min(nb, nb_available(io))))

function search(buf::IOBuffer, delim::UInt8)
function findfirst(delim::EqualTo{UInt8}, buf::IOBuffer)
p = pointer(buf.data, buf.ptr)
q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim,nb_available(buf))
q = @gc_preserve buf ccall(:memchr,Ptr{UInt8},(Ptr{UInt8},Int32,Csize_t),p,delim.x,nb_available(buf))
nb::Int = (q == C_NULL ? 0 : q-p+1)
return nb
end

function search(buf::GenericIOBuffer, delim::UInt8)
function findfirst(delim::EqualTo{UInt8}, buf::GenericIOBuffer)
data = buf.data
for i = buf.ptr : buf.size
@inbounds b = data[i]
if b == delim
if b == delim.x
return i - buf.ptr + 1
end
end
Expand Down
2 changes: 1 addition & 1 deletion base/libc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ function strptime(fmt::AbstractString, timestr::AbstractString)
@static if Sys.isapple()
# if we didn't explicitly parse the weekday or year day, use mktime
# to fill them in automatically.
if !ismatch(r"([^%]|^)%(a|A|j|w|Ow)", fmt)
if !contains(fmt, r"([^%]|^)%(a|A|j|w|Ow)")
ccall(:mktime, Int, (Ref{TmStruct},), tm)
end
end
Expand Down
Loading

0 comments on commit 1950086

Please sign in to comment.