Deprecate readbytes!()

- There were only a handfull of uses of readbytes!() in Base, mostly in implementations of other io functions. - Most existing read!() methods were already resizing the result array so returning byte count from readbytes!() was not that useful. - added eachblock() to deal with countlines() usecase in datafmt.jl. (eachblock() is an iterator like eachline())
samoconnor · Jan 15, 2016 · e757e5c · kmsquire · Jan 15, 2016 · kmsquire
1 parent fd6b27f
commit e757e5c
Show file tree

Hide file tree

Showing 11 changed files with 84 additions and 70 deletions.
diff --git a/base/datafmt.jl b/base/datafmt.jl
@@ -18,11 +18,9 @@ const offs_chunk_size = 5000
 countlines(f::AbstractString, eol::Char='\n') = open(io->countlines(io,eol), f)::Int
 function countlines(io::IO, eol::Char='\n')
  isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
- a = Array(UInt8, 8192)
  nl = 0
- while !eof(io)
- nb = readbytes!(io, a)
- @simd for i=1:nb
+ for a in eachblock(io)
+ @simd for i=1:length(a)
  @inbounds nl += a[i] == eol
  end
  end

diff --git a/base/deprecated.jl b/base/deprecated.jl
@@ -964,3 +964,9 @@ end
 #https://github.com/JuliaLang/julia/issues/14608
 @deprecate readall readstring
 @deprecate readbytes read
+
+export readbytes!
+@noinline function readbytes!(io, a, n=length(a))
+ depwarn("readbytes! is deprecated, use read! instead", :readbytes!)
+ return length(read!(io, a, n))
+end
diff --git a/base/docs/helpdb/Base.jl b/base/docs/helpdb/Base.jl
@@ -2462,10 +2462,18 @@ poll_file
 """
  eachline(stream or filename)
 
-Create an iterable object that will yield each line.
+Iterable that yields each line.
 """
 eachline
 
+"""
+ eachblock(stream or filename, [blocksize])
+
+
+Iterable that yields each block as `AbstractArray{UInt8}`
+"""
+eachblock
+
 """
  isposdef!(A) -> Bool
 
@@ -6430,14 +6438,13 @@ Compute the inverse secant of `x`, where the output is in degrees.
 asecd
 
 """
- readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
+ read!(stream, b::Vector{UInt8}, nb=length(b); all=true)
 
-Read at most `nb` bytes from the stream into `b`, returning the number of bytes read
-(increasing the size of `b` as needed).
+Read at most `nb` bytes from the stream into `b`, resizing `b` to match the number of bytes read.
 
 See `read` for a description of the `all` option.
 """
-readbytes!
+read!
 
 """
  basename(path::AbstractString) -> AbstractString

diff --git a/base/exports.jl b/base/exports.jl
@@ -1132,6 +1132,7 @@ export
  connect,
  countlines,
  deserialize,
+ eachblock,
  eachline,
  eof,
  fd,
@@ -1167,7 +1168,6 @@ export
  read!,
  readstring,
  readavailable,
- readbytes!,
  readchomp,
  readcsv,
  readdir,

diff --git a/base/filesystem.jl b/base/filesystem.jl
@@ -151,27 +151,22 @@ function read(f::File, ::Type{UInt8})
  return ret % UInt8
 end
 
-function read!(f::File, a::Vector{UInt8}, nel=length(a))
+function read!(f::File, a::Vector{UInt8})
  check_open(f)
- if nel < 0 || nel > length(a)
- throw(BoundsError())
- end
  ret = ccall(:jl_fs_read, Int32, (Int32, Ptr{Void}, Csize_t),
- f.handle, a, nel)
+ f.handle, a, length(a))
  uv_error("read",ret)
  return a
 end
 
 nb_available(f::File) = filesize(f) - position(f)
 
-function readbytes!(f::File, b::Array{UInt8}, nb=length(b))
+function read!(f::File, b::Vector{UInt8}, nb=length(b))
  nr = min(nb, nb_available(f))
- if length(b) < nr
- resize!(b, nr)
- end
- read!(f, b, nr)
- return nr
+ resize!(b, nr)
+ read!(f, b)
 end
+
 read(io::File) = read!(io, Array(UInt8, nb_available(io)))
 read(io::File, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))
 

diff --git a/base/io.jl b/base/io.jl
@@ -307,7 +307,7 @@ readline(s::IO) = readuntil(s, '\n')
 readchomp(x) = chomp!(readstring(x))
 
 # read up to nb bytes into nb, returning # bytes read
-function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
+function read!(s::IO, b::Vector{UInt8}, nb=length(b))
  olb = lb = length(b)
  nr = 0
  while nr < nb && !eof(s)
@@ -322,16 +322,15 @@ function readbytes!(s::IO, b::AbstractArray{UInt8}, nb=length(b))
  if lb > olb
  resize!(b, nr) # shrink to just contain input data if was resized
  end
- return nr
+ return b
 end
 
 # read up to nb bytes from s, returning a Vector{UInt8} of bytes read.
 function read(s::IO, nb=typemax(Int))
- # Let readbytes! grow the array progressively by default
+ # Let read! grow the array progressively by default
  # instead of taking of risk of over-allocating
  b = Array(UInt8, nb == typemax(Int) ? 1024 : nb)
- nr = readbytes!(s, b, nb)
- resize!(b, nr)
+ read!(s, b, nb)
 end
 
 function readstring(s::IO)
@@ -341,27 +340,39 @@ end
 
 ## high-level iterator interfaces ##
 
-type EachLine
+type EachChunk{T}
  stream::IO
+ f::Function
  ondone::Function
- EachLine(stream) = EachLine(stream, ()->nothing)
- EachLine(stream, ondone) = new(stream, ondone)
+ EachChunk(stream, f) = EachChunk{T}(stream, f, ()->nothing)
+ EachChunk(stream, f, ondone) = new(stream, f, ondone)
 end
-eachline(stream::IO) = EachLine(stream)
-eachline(filename::AbstractString) = EachLine(open(filename), close)
+eachline(stream::IO) = EachChunk{ByteString}(stream, readline)
+function eachline(filename::AbstractString)
+ io = open(filename)
+ EachChunk{ByteString}(io, readline, ()->close(io))
+end
+
+start{T}(::EachChunk{T}) = nothing
+done{T}(itr::EachChunk{T}, nada) = eof(itr.stream) ? (itr.ondone(); true) : false
+next{T}(itr::EachChunk{T}, nada) = (itr.f(itr.stream), nothing)
+eltype{T}(::Type{EachChunk{T}}) = T
+
+readlines(s=STDIN) = collect(eachline(s))
 
-start(itr::EachLine) = nothing
-function done(itr::EachLine, nada)
- if !eof(itr.stream)
- return false
+function eachblock(stream::IO, blocksize=0, ondone=()->nothing)
+ if blocksize == 0
+ blocksize = 8192
  end
- itr.ondone()
- true
+ a = Array(UInt8, blocksize)
+ EachChunk{Vector{UInt8}}(stream, io->read!(io, a), ondone)
+end
+
+function eachblock(filename::AbstractString, blocksize=0)
+ io=open(filename)
+ eachblock(io, blocksize, ()->close(io))
 end
-next(itr::EachLine, nada) = (readline(itr.stream), nothing)
-eltype(::Type{EachLine}) = ByteString
 
-readlines(s=STDIN) = collect(eachline(s))
 
 # IOStream Marking
 

diff --git a/base/iobuffer.jl b/base/iobuffer.jl
@@ -328,13 +328,12 @@ function write(to::AbstractIOBuffer, a::UInt8)
  sizeof(UInt8)
 end
 
-function readbytes!(io::AbstractIOBuffer, b::Array{UInt8}, nb=length(b))
+function read!(io::AbstractIOBuffer, b::Vector{UInt8}, nb=length(b))
  nr = min(nb, nb_available(io))
  if length(b) < nr
  resize!(b, nr)
  end
  read_sub(io, b, 1, nr)
- return nr
 end
 read(io::AbstractIOBuffer) = read!(io, Array(UInt8, nb_available(io)))
 read(io::AbstractIOBuffer, nb::Integer) = read!(io, Array(UInt8, min(nb, nb_available(io))))

diff --git a/base/iostream.jl b/base/iostream.jl
@@ -218,10 +218,7 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
  s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
  eof(s) && break
  end
- if lb > olb && lb > nr
- resize!(b, nr) # shrink to just contain input data if was resized
- end
- return nr
+ resize!(b, nr)
 end
 
 function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
@@ -231,13 +228,10 @@ function readbytes_some!(s::IOStream, b::Array{UInt8}, nb)
  end
  nr = Int(ccall(:ios_read, Csize_t, (Ptr{Void}, Ptr{Void}, Csize_t),
  s.ios, pointer(b), nb))
- if lb > olb && lb > nr
- resize!(b, nr)
- end
- return nr
+ resize!(b, nr)
 end
 
-function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
+function read!(s::IOStream, b::Vector{UInt8}, nb=length(b); all::Bool=true)
  return all ? readbytes_all!(s, b, nb) : readbytes_some!(s, b, nb)
 end
 
@@ -251,14 +245,11 @@ function read(s::IOStream)
  end
  end
  b = Array(UInt8, sz<=0 ? 1024 : sz)
- nr = readbytes_all!(s, b, typemax(Int))
- resize!(b, nr)
+ readbytes_all!(s, b, typemax(Int))
 end
 
 function read(s::IOStream, nb::Integer; all::Bool=true)
- b = Array(UInt8, nb)
- nr = readbytes!(s, b, nb, all=all)
- resize!(b, nr)
+ read!(s, Array(UInt8, nb), nb, all)
 end
 
 ## Character streams ##

diff --git a/base/stream.jl b/base/stream.jl
@@ -892,20 +892,12 @@ function stop_reading(stream::LibuvStream)
  end
 end
 
-function readbytes!(s::LibuvStream, b::AbstractArray{UInt8}, nb=length(b))
- wait_readnb(s, nb)
- nr = nb_available(s)
- resize!(b, nr) # shrink to just contain input data if was resized
- read!(s.buffer, b)
- return nr
-end
-
 function read(stream::LibuvStream)
  wait_readnb(stream, typemax(Int))
  return takebuf_array(stream.buffer)
 end
 
-function read!(s::LibuvStream, a::Array{UInt8, 1})
+function read!(s::LibuvStream, a::Vector{UInt8})
  nb = length(a)
  sbuf = s.buffer
  @assert sbuf.seekable == false
@@ -936,6 +928,13 @@ function read!(s::LibuvStream, a::Array{UInt8, 1})
  return a
 end
 
+function read!(s::LibuvStream, b::Vector{UInt8}, nb=length(b))
+ wait_readnb(s, nb)
+ nr = nb_available(s)
+ resize!(b, nr)
+ read!(s.buffer, b)
+end
+
 function read(this::LibuvStream, ::Type{UInt8})
  wait_readnb(this, 1)
  buf = this.buffer

diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst
@@ -152,11 +152,11 @@ General I/O
 
  Read binary data from a stream, filling in the argument ``array``\ .
 
-.. function:: readbytes!(stream, b::Vector{UInt8}, nb=length(b); all=true)
+.. function:: read!(stream, b::Vector{UInt8}, nb=length(b); all=true)
 
  .. Docstring generated from Julia source
 
- Read at most ``nb`` bytes from the stream into ``b``\ , returning the number of bytes read (increasing the size of ``b`` as needed).
+ Read at most ``nb`` bytes from the stream into ``b``\ , resizing ``b`` to match the number of bytes read.
 
  See ``read`` for a description of the ``all`` option.
 
@@ -521,7 +521,13 @@ Text I/O
 
  .. Docstring generated from Julia source
 
- Create an iterable object that will yield each line.
+ Iterable that yields each line.
+
+.. function:: eachblock(stream or filename, [blocksize])
+
+ .. Docstring generated from Julia source
+
+ Iterable that yields each block as `AbstractArray{UInt8}`
 
 .. function:: readdlm(source, delim::Char, T::Type, eol::Char; header=false, skipstart=0, skipblanks=true, use_mmap, ignore_invalid_chars=false, quotes=true, dims, comments=true, comment_char='#')
 

diff --git a/test/file.jl b/test/file.jl
@@ -1096,9 +1096,11 @@ let s = "qwerty"
 
  # Test growing output array
  x = UInt8[]
- n = readbytes!(IOBuffer(s), x, 10)
+ a = read!(IOBuffer(s), x, 10)
  @test x == s.data
- @test n == length(x)
+ @test a == s.data
+ @test length(a) == length(x)
+ @test length(s) == length(x)
 end
 
 # DevNull