From 2cd78559562b89cf14bb7d3298880af33885988f Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 4 Jun 2020 12:59:43 -0400 Subject: [PATCH 1/8] Add eachof function for iterating while reading from io This saves boilerplate versus a while !eof(io); read(io, T) loop. The function signature is patterned after the read(io, T) function signature. Prompted by #36132 but does not close #36132 I am open to better/clearer names for this function. --- base/exports.jl | 1 + base/io.jl | 40 ++++++++++++++++++++++++++++++++++++++ doc/src/base/io-network.md | 1 + test/read.jl | 6 ++++++ 4 files changed, 48 insertions(+) diff --git a/base/exports.jl b/base/exports.jl index f47e1f719cbf2..31d442fd5e28c 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -782,6 +782,7 @@ export close, countlines, eachline, + eachof, eof, fd, fdio, diff --git a/base/io.jl b/base/io.jl index 61c686a38529b..fce5e83728d0f 100644 --- a/base/io.jl +++ b/base/io.jl @@ -1003,6 +1003,46 @@ eltype(::Type{<:EachLine}) = String IteratorSize(::Type{<:EachLine}) = SizeUnknown() +struct EachOfIO{T, IOT <: IO} + stream::IOT +end + +""" + eachof(io::IO, T) + +Return an iterable object yielding [`read(io, T)`](@ref). + +See also: [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref) + +!!! compat "Julia 1.6" + `eachof` requires Julia 1.6 or later. + +# Examples +```jldoctest +julia> open("my_file.txt", "w") do io + write(io, "JuliaLang is a GitHub organization.\\n It has many members.\\n"); + end; + +julia> open("my_file.txt") do io + for c in eachof(io, Char) + c == '\\n' && break + print(c) + end + end +JuliaLang is a GitHub organization. + +julia> rm("my_file.txt"); +``` +""" +eachof(stream::IOT, T::Type) where IOT<:IO = EachOfIO{T,IOT}(stream) + +iterate(itr::EachOfIO{T}, state=nothing) where T = + eof(itr.stream) ? nothing : (read(itr.stream, T), nothing) + +eltype(::Type{EachOfIO{T}}) where T = T + +IteratorSize(::Type{<:EachOfIO}) = SizeUnknown() + # IOStream Marking # Note that these functions expect that io.mark exists for # the concrete IO type. This may not be true for IO types diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md index 101cdc890f9a9..cd58e4c8311bd 100644 --- a/doc/src/base/io-network.md +++ b/doc/src/base/io-network.md @@ -19,6 +19,7 @@ Base.read! Base.readbytes! Base.unsafe_read Base.unsafe_write +Base.eachof Base.peek Base.position Base.seek diff --git a/test/read.jl b/test/read.jl index 6961dc29d2af6..eaf0035842684 100644 --- a/test/read.jl +++ b/test/read.jl @@ -300,6 +300,12 @@ for (name, f) in l cleanup() + verbose && println("$name eachof...") + @test collect(eachof(io(), Char)) == Vector{Char}(text) + @test collect(eachof(io(), UInt8)) == Vector{UInt8}(text) + + cleanup() + verbose && println("$name countlines...") @test countlines(io()) == countlines(IOBuffer(text)) From 28d66b1939021e21d0f4c35948602fd0abf9f576 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 4 Jun 2020 16:45:18 -0400 Subject: [PATCH 2/8] Keep skipchars from allocating unnecessary String Remove usage of readline from skipchars definition. --- base/io.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/io.jl b/base/io.jl index fce5e83728d0f..559d0a3a9cd43 100644 --- a/base/io.jl +++ b/base/io.jl @@ -1127,10 +1127,10 @@ julia> String(readavailable(buf)) ``` """ function skipchars(predicate, io::IO; linecomment=nothing) - while !eof(io) - c = read(io, Char) + for c in eachof(io, Char) if c === linecomment - readline(io) + skipchars(c -> c !== '\n', io) + read(io, Char) elseif !predicate(c) skip(io, -ncodeunits(c)) break From b0fac8340a00e70c1b628328feaf59acbc79fa65 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 4 Jun 2020 16:53:22 -0400 Subject: [PATCH 3/8] Add NEWS entry for eachof --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 9966811f8e1c7..733a92a665115 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,6 +35,7 @@ New library functions * New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place. ([#31069]). * New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption. ([#34543]). +* New function `Base.eachof(io, T)` for iteratively performing `read(io, T)`. ([#36150]) New library features -------------------- From 96a3a52355c2fd28c05732cdc92dde4d1a881f48 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 4 Jun 2020 17:30:30 -0400 Subject: [PATCH 4/8] Refactor to make use of eachof function These are all the places in base/stdlib where I saw the pattern used: while!eof(io); read(io, T) I think they make a good case study on this function's ability to make code dealing with IO cleaner. --- base/io.jl | 9 +++------ stdlib/Markdown/src/Common/block.jl | 6 ++---- stdlib/Markdown/src/GitHub/GitHub.jl | 3 +-- stdlib/Markdown/src/parse/util.jl | 6 ++---- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/base/io.jl b/base/io.jl index 559d0a3a9cd43..b5a7d734805f0 100644 --- a/base/io.jl +++ b/base/io.jl @@ -754,8 +754,7 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) return readuntil_string(s, delim % UInt8, keep) end out = IOBuffer() - while !eof(s) - c = read(s, Char) + for c in eachof(s, Char) if c == delim keep && write(out, c) break @@ -767,8 +766,7 @@ end function readuntil(s::IO, delim::T; keep::Bool=false) where T out = (T === UInt8 ? StringVector(0) : Vector{T}()) - while !eof(s) - c = read(s, T) + for c in eachof(s, T) if c == delim keep && push!(out, c) break @@ -804,8 +802,7 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w max_pos = 1 # array-offset in cache local cache # will be lazy initialized when needed output! = (isa(out, IO) ? write : push!) - while !eof(io) - c = read(io, T) + for c in eachof(io, T) # Backtrack until the next target character matches what was found while true c1 = target[pos + first] diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl index ba290ba1e1eb9..257eee3b35257 100644 --- a/stdlib/Markdown/src/Common/block.jl +++ b/stdlib/Markdown/src/Common/block.jl @@ -16,8 +16,7 @@ function paragraph(stream::IO, md::MD) push!(md, p) skipwhitespace(stream) prev_char = '\n' - while !eof(stream) - char = read(stream, Char) + for char in eachof(stream, Char) if char == '\n' || char == '\r' char == '\r' && !eof(stream) && peek(stream, Char) == '\n' && read(stream, Char) if prev_char == '\\' @@ -339,8 +338,7 @@ end function horizontalrule(stream::IO, block::MD) withstream(stream) do n, rule = 0, ' ' - while !eof(stream) - char = read(stream, Char) + for char in eachof(stream, Char) char == '\n' && break isspace(char) && continue if n==0 || char==rule diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl index 8718055eae1fa..27e9d8d8c6998 100644 --- a/stdlib/Markdown/src/GitHub/GitHub.jl +++ b/stdlib/Markdown/src/GitHub/GitHub.jl @@ -41,8 +41,7 @@ function github_paragraph(stream::IO, md::MD) buffer = IOBuffer() p = Paragraph() push!(md, p) - while !eof(stream) - char = read(stream, Char) + for char in eachof(stream, Char) if char == '\n' eof(stream) && break if blankline(stream) || parse(stream, md, breaking = true) diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl index 7b1e9c1c003ff..2ff4248465be1 100644 --- a/stdlib/Markdown/src/parse/util.jl +++ b/stdlib/Markdown/src/parse/util.jl @@ -26,8 +26,7 @@ Skip any leading blank lines. Returns the number skipped. function skipblank(io::IO) start = position(io) i = 0 - while !eof(io) - c = read(io, Char) + for c in eachof(io, Char) c == '\n' && (start = position(io); i+=1; continue) c == '\r' && (start = position(io); i+=1; continue) c in whitespace || break @@ -183,8 +182,7 @@ function parse_inline_wrapper(stream::IO, delimiter::AbstractString; rep = false !eof(stream) && peek(stream, Char) in whitespace && return nothing buffer = IOBuffer() - while !eof(stream) - char = read(stream, Char) + for char in eachof(stream, Char) write(buffer, char) if !(char in whitespace || char == '\n' || char in delimiter) && startswith(stream, delimiter^n) trailing = 0 From db328745769174fa507c5603c3a55f538509f981 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 18 Jun 2020 12:34:08 -0400 Subject: [PATCH 5/8] Rename eachof to readeach --- NEWS.md | 2 +- base/exports.jl | 2 +- base/io.jl | 16 ++++++++-------- doc/src/base/io-network.md | 2 +- stdlib/Markdown/src/Common/block.jl | 4 ++-- stdlib/Markdown/src/GitHub/GitHub.jl | 2 +- stdlib/Markdown/src/parse/util.jl | 4 ++-- test/read.jl | 6 +++--- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/NEWS.md b/NEWS.md index 733a92a665115..171de48aa5d3c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -35,7 +35,7 @@ New library functions * New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place. ([#31069]). * New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption. ([#34543]). -* New function `Base.eachof(io, T)` for iteratively performing `read(io, T)`. ([#36150]) +* New function `Base.readeach(io, T)` for iteratively performing `read(io, T)`. ([#36150]) New library features -------------------- diff --git a/base/exports.jl b/base/exports.jl index 31d442fd5e28c..76e050d3968ee 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -782,7 +782,7 @@ export close, countlines, eachline, - eachof, + readeach, eof, fd, fdio, diff --git a/base/io.jl b/base/io.jl index b5a7d734805f0..e713774b72880 100644 --- a/base/io.jl +++ b/base/io.jl @@ -754,7 +754,7 @@ function readuntil(s::IO, delim::AbstractChar; keep::Bool=false) return readuntil_string(s, delim % UInt8, keep) end out = IOBuffer() - for c in eachof(s, Char) + for c in readeach(s, Char) if c == delim keep && write(out, c) break @@ -766,7 +766,7 @@ end function readuntil(s::IO, delim::T; keep::Bool=false) where T out = (T === UInt8 ? StringVector(0) : Vector{T}()) - for c in eachof(s, T) + for c in readeach(s, T) if c == delim keep && push!(out, c) break @@ -802,7 +802,7 @@ function readuntil_vector!(io::IO, target::AbstractVector{T}, keep::Bool, out) w max_pos = 1 # array-offset in cache local cache # will be lazy initialized when needed output! = (isa(out, IO) ? write : push!) - for c in eachof(io, T) + for c in readeach(io, T) # Backtrack until the next target character matches what was found while true c1 = target[pos + first] @@ -1005,14 +1005,14 @@ struct EachOfIO{T, IOT <: IO} end """ - eachof(io::IO, T) + readeach(io::IO, T) Return an iterable object yielding [`read(io, T)`](@ref). See also: [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref) !!! compat "Julia 1.6" - `eachof` requires Julia 1.6 or later. + `readeach` requires Julia 1.6 or later. # Examples ```jldoctest @@ -1021,7 +1021,7 @@ julia> open("my_file.txt", "w") do io end; julia> open("my_file.txt") do io - for c in eachof(io, Char) + for c in readeach(io, Char) c == '\\n' && break print(c) end @@ -1031,7 +1031,7 @@ JuliaLang is a GitHub organization. julia> rm("my_file.txt"); ``` """ -eachof(stream::IOT, T::Type) where IOT<:IO = EachOfIO{T,IOT}(stream) +readeach(stream::IOT, T::Type) where IOT<:IO = EachOfIO{T,IOT}(stream) iterate(itr::EachOfIO{T}, state=nothing) where T = eof(itr.stream) ? nothing : (read(itr.stream, T), nothing) @@ -1124,7 +1124,7 @@ julia> String(readavailable(buf)) ``` """ function skipchars(predicate, io::IO; linecomment=nothing) - for c in eachof(io, Char) + for c in readeach(io, Char) if c === linecomment skipchars(c -> c !== '\n', io) read(io, Char) diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md index cd58e4c8311bd..b798a708f22b2 100644 --- a/doc/src/base/io-network.md +++ b/doc/src/base/io-network.md @@ -19,7 +19,7 @@ Base.read! Base.readbytes! Base.unsafe_read Base.unsafe_write -Base.eachof +Base.readeach Base.peek Base.position Base.seek diff --git a/stdlib/Markdown/src/Common/block.jl b/stdlib/Markdown/src/Common/block.jl index 257eee3b35257..64f8d58efcb4f 100644 --- a/stdlib/Markdown/src/Common/block.jl +++ b/stdlib/Markdown/src/Common/block.jl @@ -16,7 +16,7 @@ function paragraph(stream::IO, md::MD) push!(md, p) skipwhitespace(stream) prev_char = '\n' - for char in eachof(stream, Char) + for char in readeach(stream, Char) if char == '\n' || char == '\r' char == '\r' && !eof(stream) && peek(stream, Char) == '\n' && read(stream, Char) if prev_char == '\\' @@ -338,7 +338,7 @@ end function horizontalrule(stream::IO, block::MD) withstream(stream) do n, rule = 0, ' ' - for char in eachof(stream, Char) + for char in readeach(stream, Char) char == '\n' && break isspace(char) && continue if n==0 || char==rule diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl index 27e9d8d8c6998..493e01b085258 100644 --- a/stdlib/Markdown/src/GitHub/GitHub.jl +++ b/stdlib/Markdown/src/GitHub/GitHub.jl @@ -41,7 +41,7 @@ function github_paragraph(stream::IO, md::MD) buffer = IOBuffer() p = Paragraph() push!(md, p) - for char in eachof(stream, Char) + for char in readeach(stream, Char) if char == '\n' eof(stream) && break if blankline(stream) || parse(stream, md, breaking = true) diff --git a/stdlib/Markdown/src/parse/util.jl b/stdlib/Markdown/src/parse/util.jl index 2ff4248465be1..7be845c96a9fc 100644 --- a/stdlib/Markdown/src/parse/util.jl +++ b/stdlib/Markdown/src/parse/util.jl @@ -26,7 +26,7 @@ Skip any leading blank lines. Returns the number skipped. function skipblank(io::IO) start = position(io) i = 0 - for c in eachof(io, Char) + for c in readeach(io, Char) c == '\n' && (start = position(io); i+=1; continue) c == '\r' && (start = position(io); i+=1; continue) c in whitespace || break @@ -182,7 +182,7 @@ function parse_inline_wrapper(stream::IO, delimiter::AbstractString; rep = false !eof(stream) && peek(stream, Char) in whitespace && return nothing buffer = IOBuffer() - for char in eachof(stream, Char) + for char in readeach(stream, Char) write(buffer, char) if !(char in whitespace || char == '\n' || char in delimiter) && startswith(stream, delimiter^n) trailing = 0 diff --git a/test/read.jl b/test/read.jl index eaf0035842684..b31216811b75c 100644 --- a/test/read.jl +++ b/test/read.jl @@ -300,9 +300,9 @@ for (name, f) in l cleanup() - verbose && println("$name eachof...") - @test collect(eachof(io(), Char)) == Vector{Char}(text) - @test collect(eachof(io(), UInt8)) == Vector{UInt8}(text) + verbose && println("$name readeach...") + @test collect(readeach(io(), Char)) == Vector{Char}(text) + @test collect(readeach(io(), UInt8)) == Vector{UInt8}(text) cleanup() From 9db00310b07274b1b67a2ffd88d2f03bde4d422c Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Thu, 18 Jun 2020 16:25:43 -0400 Subject: [PATCH 6/8] Rename EachOfIO to ReadEachIterator --- base/io.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/base/io.jl b/base/io.jl index e713774b72880..17aa2fff8e5cd 100644 --- a/base/io.jl +++ b/base/io.jl @@ -1000,7 +1000,7 @@ eltype(::Type{<:EachLine}) = String IteratorSize(::Type{<:EachLine}) = SizeUnknown() -struct EachOfIO{T, IOT <: IO} +struct ReadEachIterator{T, IOT <: IO} stream::IOT end @@ -1031,14 +1031,14 @@ JuliaLang is a GitHub organization. julia> rm("my_file.txt"); ``` """ -readeach(stream::IOT, T::Type) where IOT<:IO = EachOfIO{T,IOT}(stream) +readeach(stream::IOT, T::Type) where IOT<:IO = ReadEachIterator{T,IOT}(stream) -iterate(itr::EachOfIO{T}, state=nothing) where T = +iterate(itr::ReadEachIterator{T}, state=nothing) where T = eof(itr.stream) ? nothing : (read(itr.stream, T), nothing) -eltype(::Type{EachOfIO{T}}) where T = T +eltype(::Type{ReadEachIterator{T}}) where T = T -IteratorSize(::Type{<:EachOfIO}) = SizeUnknown() +IteratorSize(::Type{<:ReadEachIterator}) = SizeUnknown() # IOStream Marking # Note that these functions expect that io.mark exists for From 7ed76b2284c74c1126191e5f3f59c795f8262863 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Fri, 17 Jul 2020 10:42:14 -0400 Subject: [PATCH 7/8] Revert substituting skipchars for readline Although using skipchars reduced allocations, it overall slows down the function. --- base/io.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/base/io.jl b/base/io.jl index 17aa2fff8e5cd..7bc6f216e3d50 100644 --- a/base/io.jl +++ b/base/io.jl @@ -1126,8 +1126,7 @@ julia> String(readavailable(buf)) function skipchars(predicate, io::IO; linecomment=nothing) for c in readeach(io, Char) if c === linecomment - skipchars(c -> c !== '\n', io) - read(io, Char) + readline(io) elseif !predicate(c) skip(io, -ncodeunits(c)) break From e1ce5aa13d63bf112d200a63865875ba7e1d99c7 Mon Sep 17 00:00:00 2001 From: Adam Beckmeyer Date: Fri, 17 Jul 2020 10:46:01 -0400 Subject: [PATCH 8/8] Use IOBuffer for readeach example This is more succinct and focused than writing a file to disk and using open. --- base/io.jl | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/base/io.jl b/base/io.jl index 7bc6f216e3d50..fb4d0ef6e6b95 100644 --- a/base/io.jl +++ b/base/io.jl @@ -1016,19 +1016,13 @@ See also: [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref) # Examples ```jldoctest -julia> open("my_file.txt", "w") do io - write(io, "JuliaLang is a GitHub organization.\\n It has many members.\\n"); - end; +julia> io = IOBuffer("JuliaLang is a GitHub organization.\\n It has many members.\\n"); -julia> open("my_file.txt") do io - for c in readeach(io, Char) - c == '\\n' && break - print(c) - end +julia> for c in readeach(io, Char) + c == '\\n' && break + print(c) end JuliaLang is a GitHub organization. - -julia> rm("my_file.txt"); ``` """ readeach(stream::IOT, T::Type) where IOT<:IO = ReadEachIterator{T,IOT}(stream)