diff --git a/src/bufferedinputstream.jl b/src/bufferedinputstream.jl index cbf347d..2283799 100644 --- a/src/bufferedinputstream.jl +++ b/src/bufferedinputstream.jl @@ -449,3 +449,36 @@ end end return true end + +if isdefined(Base, :copyuntil) # julia#48273 in Julia 1.11 + # optimized copyuntil using findnext on the buffer: + function Base.copyuntil(out::IO, stream::BufferedInputStream, delim::UInt8; keep::Bool=false) + checkopen(stream) + @views @inbounds while ensurebuffered!(stream, 1) + p = findnext(==(delim), stream.buffer[1:stream.available], stream.position) + if isnothing(p) + # delim not found, copy buffer & keep reading + write(out, stream.buffer[stream.position:stream.available]) + stream.position = stream.available + 1 + else + # delim found, copy buffer up to delim & stop + oldp = stream.position + stream.position = p + 1 + p -= !keep + write(out, stream.buffer[oldp:p]) + break + end + end + return out + end + + # in principle, we could also similarly optimize Base.copyline, + # but this is used mainly for readline, and there is already + # an optimized copyline(out::IOBuffer, in::IO) method used there + # that calls our optimized copyuntil above. + # + # For copyline(out::IO, in::IO), it only calls copyuntil for keep=false, + # whereas the keep=true logic is more complicated to handle CRLF. + # So in principle we could have a faster in::BufferedInputStream method + # for this case, but I'm not sure how many people care. +end diff --git a/test/runtests.jl b/test/runtests.jl index e1a4f74..1633fc7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -421,6 +421,17 @@ end @test readavailable(stream) == b"some data" end + @testset "copyuntil" begin + # note: readlines calls readuntil which calls copyline, + # which calls copyuntil for keep=true, in Julia 1.11 + data = join(randstring(rand(0:32))*(rand(Bool) ? "\n" : "\r\n") + for n=0:100) * "\n\r\n\r\r\r\r\nfooooooobar" + for bufsize in (1, 3, 7, 128), keep in (true, false) + s = BufferedInputStream(IOBuffer(data), bufsize) + @test readlines(s; keep) == readlines(IOBuffer(data); keep) + end + end + @testset "read/peek/skipchars" begin ascii = randstring(100) unicode = randstring("xα∆🐨", 100) * 'β' # mix of 1/2/3/4-byte chars