Skip to content

Commit

Permalink
add eachsplit (#761)
Browse files Browse the repository at this point in the history
  • Loading branch information
longemen3000 authored Dec 14, 2021
1 parent 88f0817 commit a2de107
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 1 deletion.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "Compat"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "3.40.0"
version = "3.41.0"

[deps]
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ changes in `julia`.

## Supported features

* `eachsplit` for iteratively performing split(str). ([#39245]). (since Compat 3.41.0)

* `ismutabletype(t::Type)` check whether a type is mutable (the field `mutable` of `DataType` was removed. [#39037]) (since Compat 3.40)

* `convert(::Type{<:Period}, ::CompoundPeriod)` can convert `CompoundPeriod`s into the specified `Period` type ([#40803]) (since Compat 3.38.0)
Expand Down
77 changes: 77 additions & 0 deletions src/Compat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,83 @@ if VERSION < v"1.8.0-DEV.300"
end
end

# https://github.com/JuliaLang/julia/pull/39245
if VERSION < v"1.8.0-DEV.487"
export eachsplit

"""
eachsplit(str::AbstractString, dlm; limit::Integer=0)
eachsplit(str::AbstractString; limit::Integer=0)
Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
substrings. `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
(i.e. as a string, regular expression or a function), or as a single character or collection
of characters.
If `dlm` is omitted, it defaults to [`isspace`](@ref).
The iterator will return a maximum of `limit` results if the keyword argument is supplied.
The default of `limit=0` implies no maximum.
See also [`split`](@ref).
# Examples
```julia
julia> a = "Ma.rch"
"Ma.rch"
julia> collect(eachsplit(a, "."))
2-element Vector{SubString}:
"Ma"
"rch"
```
"""
function eachsplit end

struct SplitIterator{S<:AbstractString,F}
str::S
splitter::F
limit::Int
keepempty::Bool
end

Base.eltype(::Type{<:SplitIterator}) = SubString
Base.IteratorSize(::Type{<:SplitIterator}) = Base.SizeUnknown()

function Base.iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstindex(iter.str), 0))
i - 1 > ncodeunits(iter.str)::Int && return nothing
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
r = r::Union{Int,UnitRange{Int}} #commit dcc2182db228935fe97d03a44ae3b6889e40c542
#follow #39245, improve inferrability of iterate(::SplitIterator)
#Somehow type constraints from the complex `while` condition don't
#propagate to the `while` body.
j, k = first(r), nextind(iter.str, last(r))::Int
k_ = k <= j ? nextind(iter.str, j) : k
if i < k
substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
(iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
i = k
end
k = k_
r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
end
iter.keepempty || i <= ncodeunits(iter.str) || return nothing
@inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)
end

eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
SplitIterator(str, splitter, limit, keepempty)

eachsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, in(splitter); limit=limit, keepempty=keepempty)

eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
eachsplit(str, isequal(splitter); limit=limit, keepempty=keepempty)

eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
eachsplit(str, isspace; limit=limit, keepempty=keepempty)
end
include("iterators.jl")
include("deprecated.jl")

Expand Down
73 changes: 73 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1298,3 +1298,76 @@ end
@test ismutabletype(Array)
@test !ismutabletype(Tuple)
end

# https://github.com/JuliaLang/julia/pull/39245

#=
cmcaine commented on Sep 8, 2021
This PR implements split with eachsplit and uses eachsplit in a few other places in Base,
so it's kind of already covered by the existing tests.
Not sure it needs any more?
so, these are the Base.split tests, but replacing split with eachsplit |> collect
=#
@testset "eachsplit" begin
@test eachsplit("foo,bar,baz", 'x') |> collect == ["foo,bar,baz"]
@test eachsplit("foo,bar,baz", ',') |> collect == ["foo","bar","baz"]
@test eachsplit("foo,bar,baz", ",") |> collect == ["foo","bar","baz"]
@test eachsplit("foo,bar,baz", r",") |> collect == ["foo","bar","baz"]
@test eachsplit("foo,bar,baz", ','; limit=0) |> collect == ["foo","bar","baz"]
@test eachsplit("foo,bar,baz", ','; limit=1) |> collect == ["foo,bar,baz"]
@test eachsplit("foo,bar,baz", ','; limit=2) |> collect == ["foo","bar,baz"]
@test eachsplit("foo,bar,baz", ','; limit=3) |> collect == ["foo","bar","baz"]
@test eachsplit("foo,bar", "o,b") |> collect == ["fo","ar"]

@test eachsplit("", ',') |> collect == [""]
@test eachsplit(",", ',') |> collect == ["",""]
@test eachsplit(",,", ',') |> collect == ["","",""]
@test eachsplit("", ',' ; keepempty=false) |> collect == SubString[]
@test eachsplit(",", ',' ; keepempty=false) |> collect == SubString[]
@test eachsplit(",,", ','; keepempty=false) |> collect == SubString[]

@test eachsplit("a b c") |> collect == ["a","b","c"]
@test eachsplit("a b \t c\n") |> collect == ["a","b","c"]
@test eachsplit("α β \u2009 γ\n") |> collect == ["α","β","γ"]

@test eachsplit("a b c"; limit=2) |> collect == ["a","b c"]
@test eachsplit("a b \t c\n"; limit=3) |> collect == ["a","b","\t c\n"]
@test eachsplit("a b c"; keepempty=true) |> collect == ["a","b","c"]
@test eachsplit("a b \t c\n"; keepempty=true) |> collect == ["a","","b","","","c",""]

let str = "a.:.ba..:..cba.:.:.dcba.:."
@test eachsplit(str, ".:.") |> collect == ["a","ba.",".cba",":.dcba",""]
@test eachsplit(str, ".:."; keepempty=false) |> collect == ["a","ba.",".cba",":.dcba"]
@test eachsplit(str, ".:.") |> collect == ["a","ba.",".cba",":.dcba",""]
@test eachsplit(str, r"\.(:\.)+") |> collect == ["a","ba.",".cba","dcba",""]
@test eachsplit(str, r"\.(:\.)+"; keepempty=false) |> collect == ["a","ba.",".cba","dcba"]
@test eachsplit(str, r"\.+:\.+") |> collect == ["a","ba","cba",":.dcba",""]
@test eachsplit(str, r"\.+:\.+"; keepempty=false) |> collect == ["a","ba","cba",":.dcba"]
end

# zero-width splits
@test eachsplit("", "") |> collect == rsplit("", "") == [""]
@test eachsplit("abc", "") |> collect == rsplit("abc", "") == ["a","b","c"]
@test eachsplit("abc", "", limit=2) |> collect == ["a","bc"]

@test eachsplit("", r"") |> collect == [""]
@test eachsplit("abc", r"") |> collect == ["a","b","c"]
@test eachsplit("abcd", r"b?") |> collect == ["a","c","d"]
@test eachsplit("abcd", r"b*") |> collect == ["a","c","d"]
@test eachsplit("abcd", r"b+") |> collect == ["a","cd"]
@test eachsplit("abcd", r"b?c?") |> collect == ["a","d"]
@test eachsplit("abcd", r"[bc]?") |> collect == ["a","","d"]
@test eachsplit("abcd", r"a*") |> collect == ["","b","c","d"]
@test eachsplit("abcd", r"a+") |> collect == ["","bcd"]
@test eachsplit("abcd", r"d*") |> collect == ["a","b","c",""]
@test eachsplit("abcd", r"d+") |> collect == ["abc",""]
@test eachsplit("abcd", r"[ad]?") |> collect == ["","b","c",""]

# multi-byte unicode characters (issue #26225)
@test eachsplit("α β γ", " ") |> collect == rsplit("α β γ", " ") ==
eachsplit("α β γ", isspace) |> collect == rsplit("α β γ", isspace) == ["α","β","γ"]
@test eachsplit("ö.", ".") |> collect == rsplit("ö.", ".") == ["ö",""]
@test eachsplit("α β γ", "β") |> collect == rsplit("α β γ", "β") == ["α "," γ"]
end

2 comments on commit a2de107

@martinholters
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/50510

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v3.41.0 -m "<description of version>" a2de10749ab9d8155c3ae88a408e68f3497a2c33
git push origin v3.41.0

Please sign in to comment.