Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit cc7456f

Browse files
committedJun 17, 2018
Support Julia 0.7, drop 0.6 and LegacyStrings
1 parent 17d7c89 commit cc7456f

File tree

7 files changed

+95
-143
lines changed

7 files changed

+95
-143
lines changed
 

‎.travis.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ os:
44
- linux
55
- osx
66
julia:
7-
- 0.6
87
- 0.7
98
- nightly
109
notifications:

‎README.md

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# StringEncodings
22

3-
[![Travis CI Build Status](https://travis-ci.org/nalimilan/StringEncodings.jl.svg?branch=master)](https://travis-ci.org/nalimilan/StringEncodings.jl)
3+
[![Travis CI Build Status](https://travis-ci.org/JuliaStrings/StringEncodings.jl.svg?branch=master)](https://travis-ci.org/JuliaStrings/StringEncodings.jl)
44
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/3gslhfg91isldnvq?svg=true)](https://ci.appveyor.com/project/nalimilan/stringencodings-jl)
5-
[![Coveralls Coverage Status](https://coveralls.io/repos/nalimilan/StringEncodings.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/nalimilan/StringEncodings.jl?branch=master)
6-
[![Codecov Coverage Status](http://codecov.io/github/nalimilan/StringEncodings.jl/coverage.svg?branch=master)](http://codecov.io/github/nalimilan/StringEncodings.jl?branch=master)
5+
[![Coveralls Coverage Status](https://coveralls.io/repos/JuliaStrings/StringEncodings.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaStrings/StringEncodings.jl?branch=master)
6+
[![Codecov Coverage Status](http://codecov.io/github/JuliaStrings/StringEncodings.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaStrings/StringEncodings.jl?branch=master)
77

8-
[![Julia 0.6 Status](http://pkg.julialang.org/badges/StringEncodings_0.6.svg)](http://pkg.julialang.org/?pkg=StringEncodings&ver=0.6)
98
[![Julia 0.7 Status](http://pkg.julialang.org/badges/StringEncodings_0.7.svg)](http://pkg.julialang.org/?pkg=StringEncodings&ver=0.7)
109

1110
This Julia package provides support for decoding and encoding texts between multiple character encodings. It is currently based on the iconv interface, and supports all major platforms using either the native iconv support or [GNU libiconv](https://www.gnu.org/software/libiconv/). In the future, native Julia support for major encodings will be added.
@@ -82,12 +81,12 @@ julia> write(f, "café\nnoël")
8281
julia> close(f); # Essential to complete encoding
8382
```
8483

85-
The contents of the file can then be read back using `readstring`:
84+
The contents of the file can then be read back using `read(path, String)`:
8685
```julia
87-
julia> readstring(path) # Standard function expects UTF-8
86+
julia> read(path, String) # Standard function expects UTF-8
8887
"\U3d83f7c0f\0\0n\0o\0\0"
8988

90-
julia> readstring(path, enc"UTF-16") # Works when passing the correct encoding
89+
julia> read(path, String, enc"UTF-16") # Works when passing the correct encoding
9190
"café\nnoël"
9291
```
9392

@@ -115,7 +114,7 @@ When performing more complex operations on an encoded text file, it will often b
115114
```julia
116115
julia> io = open(path, enc"UTF-16");
117116

118-
julia> readstring(io)
117+
julia> read(io, String)
119118
"café\nnoël"
120119
```
121120

@@ -142,7 +141,7 @@ julia> seek(b, 0); # Move to start of buffer
142141

143142
julia> s = StringDecoder(b, "UTF-16");
144143

145-
julia> readstring(s) # Decoding happens automatically here
144+
julia> read(s, String) # Decoding happens automatically here
146145
"café"
147146
```
148147

‎REQUIRE

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
1-
julia 0.6
2-
BinaryProvider 0.3.0
3-
Compat 0.17.0
4-
LegacyStrings 0.1.1
1+
julia 0.7
2+
BinaryProvider 0.3.0

‎appveyor.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
environment:
22
matrix:
3-
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.6/julia-0.6-latest-win32.exe"
4-
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.6/julia-0.6-latest-win64.exe"
53
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x86/0.7/julia-0.7-latest-win32.exe"
64
- JULIA_URL: "https://julialang-s3.julialang.org/bin/winnt/x64/0.7/julia-0.7-latest-win64.exe"
75
- JULIA_URL: "https://julialangnightlies-s3.julialang.org/bin/winnt/x86/julia-latest-win32.exe"

‎src/StringEncodings.jl

Lines changed: 61 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
module StringEncodings
44

5+
using Libdl
6+
57
# Load in `deps.jl`, complaining if it does not exist
68
const depsjl_path = joinpath(dirname(@__FILE__), "..", "deps", "deps.jl")
79
if !isfile(depsjl_path)
@@ -16,11 +18,9 @@ function __init__()
1618
end
1719

1820
using Base.Libc: errno, strerror, E2BIG, EINVAL, EILSEQ
19-
using Compat: @compat
2021

2122
import Base: close, eachline, eof, flush, isreadable, iswritable,
22-
open, readline, readlines, readuntil, show, write
23-
import Compat: read
23+
open, readline, readlines, readuntil, show, write, read
2424

2525
export StringEncoder, StringDecoder, encode, decode, encodings
2626
export StringEncodingError, OutputBufferError, IConvError
@@ -30,59 +30,59 @@ include("encodings.jl")
3030
using StringEncodings.Encodings
3131
export encoding, encodings_list, Encoding, @enc_str
3232

33-
@compat abstract type StringEncodingError end
33+
abstract type StringEncodingError end
3434

3535
# Specified encodings or the combination are not supported by iconv
36-
type InvalidEncodingError <: StringEncodingError
36+
struct InvalidEncodingError <: StringEncodingError
3737
args::Tuple{String, String}
3838
end
3939
InvalidEncodingError(from, to) = InvalidEncodingError((from, to))
4040
message(::Type{InvalidEncodingError}) = "Conversion from <<1>> to <<2>> not supported by iconv implementation, check that specified encodings are correct"
4141

4242
# Encountered invalid byte sequence
43-
type InvalidSequenceError <: StringEncodingError
43+
struct InvalidSequenceError <: StringEncodingError
4444
args::Tuple{String}
4545
end
4646
InvalidSequenceError(seq::Vector{UInt8}) = InvalidSequenceError((bytes2hex(seq),))
4747
message(::Type{InvalidSequenceError}) = "Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding"
4848

49-
type IConvError <: StringEncodingError
49+
struct IConvError <: StringEncodingError
5050
args::Tuple{String, Int, String}
5151
end
5252
IConvError(func::String) = IConvError((func, errno(), strerror(errno())))
5353
message(::Type{IConvError}) = "<<1>>: <<2>> (<<3>>)"
5454

5555
# Input ended with incomplete byte sequence
56-
type IncompleteSequenceError <: StringEncodingError ; end
56+
struct IncompleteSequenceError <: StringEncodingError ; end
5757
message(::Type{IncompleteSequenceError}) = "Incomplete byte sequence at end of input"
5858

59-
type OutputBufferError <: StringEncodingError ; end
59+
struct OutputBufferError <: StringEncodingError ; end
6060
message(::Type{OutputBufferError}) = "Ran out of space in the output buffer"
6161

6262
function show(io::IO, exc::StringEncodingError)
6363
str = message(typeof(exc))
6464
for i = 1:length(exc.args)
65-
str = replace(str, "<<$i>>", exc.args[i])
65+
str = replace(str, "<<$i>>" => exc.args[i])
6666
end
6767
print(io, str)
6868
end
6969

70-
show{T<:Union{IncompleteSequenceError,OutputBufferError}}(io::IO, exc::T) =
70+
show(io::IO, exc::T) where {T<:Union{IncompleteSequenceError,OutputBufferError}} =
7171
print(io, message(T))
7272

7373

7474
## iconv wrappers
7575

76-
function iconv_close(cd::Ptr{Void})
76+
function iconv_close(cd::Ptr{Nothing})
7777
if cd != C_NULL
78-
ccall((iconv_close_s, libiconv), Cint, (Ptr{Void},), cd) == 0 ||
78+
ccall((iconv_close_s, libiconv), Cint, (Ptr{Nothing},), cd) == 0 ||
7979
throw(IConvError("iconv_close"))
8080
end
8181
end
8282

8383
function iconv_open(tocode::String, fromcode::String)
84-
p = ccall((iconv_open_s, libiconv), Ptr{Void}, (Cstring, Cstring), tocode, fromcode)
85-
if p != Ptr{Void}(-1)
84+
p = ccall((iconv_open_s, libiconv), Ptr{Nothing}, (Cstring, Cstring), tocode, fromcode)
85+
if p != Ptr{Nothing}(-1)
8686
return p
8787
elseif errno() == EINVAL
8888
throw(InvalidEncodingError(fromcode, tocode))
@@ -96,10 +96,10 @@ end
9696

9797
const BUFSIZE = 100
9898

99-
type StringEncoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
99+
mutable struct StringEncoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
100100
stream::S
101101
closestream::Bool
102-
cd::Ptr{Void}
102+
cd::Ptr{Nothing}
103103
inbuf::Vector{UInt8}
104104
outbuf::Vector{UInt8}
105105
inbufptr::Ref{Ptr{UInt8}}
@@ -108,10 +108,10 @@ type StringEncoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
108108
outbytesleft::Ref{Csize_t}
109109
end
110110

111-
type StringDecoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
111+
mutable struct StringDecoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
112112
stream::S
113113
closestream::Bool
114-
cd::Ptr{Void}
114+
cd::Ptr{Nothing}
115115
inbuf::Vector{UInt8}
116116
outbuf::Vector{UInt8}
117117
inbufptr::Ref{Ptr{UInt8}}
@@ -135,7 +135,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
135135
nothing
136136
end
137137

138-
function iconv!(cd::Ptr{Void}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
138+
function iconv!(cd::Ptr{Nothing}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
139139
inbufptr::Ref{Ptr{UInt8}}, outbufptr::Ref{Ptr{UInt8}},
140140
inbytesleft::Ref{Csize_t}, outbytesleft::Ref{Csize_t})
141141
inbufptr[] = pointer(inbuf)
@@ -145,7 +145,7 @@ function iconv!(cd::Ptr{Void}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
145145
outbytesleft[] = BUFSIZE
146146

147147
ret = ccall((iconv_s, libiconv), Csize_t,
148-
(Ptr{Void}, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
148+
(Ptr{Nothing}, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
149149
cd, inbufptr, inbytesleft, outbufptr, outbytesleft)
150150

151151
if ret == -1 % Csize_t
@@ -157,7 +157,7 @@ function iconv!(cd::Ptr{Void}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
157157
# Output buffer is full, or sequence is incomplete:
158158
# copy remaining bytes to the start of the input buffer for next time
159159
elseif err == E2BIG || err == EINVAL
160-
copy!(inbuf, 1, inbuf, inbytesleft_orig-inbytesleft[]+1, inbytesleft[])
160+
copyto!(inbuf, 1, inbuf, inbytesleft_orig-inbytesleft[]+1, inbytesleft[])
161161
elseif err == EILSEQ
162162
seq = inbuf[(inbytesleft_orig-inbytesleft[]+1):inbytesleft_orig]
163163
throw(InvalidSequenceError(seq))
@@ -177,7 +177,7 @@ function iconv_reset!(s::Union{StringEncoder, StringDecoder})
177177
s.outbufptr[] = pointer(s.outbuf)
178178
s.outbytesleft[] = BUFSIZE
179179
ret = ccall((iconv_s, libiconv), Csize_t,
180-
(Ptr{Void}, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
180+
(Ptr{Nothing}, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
181181
s.cd, C_NULL, C_NULL, s.outbufptr, s.outbytesleft)
182182

183183
if ret == -1 % Csize_t
@@ -208,13 +208,13 @@ stream is necessary to complete the encoding (but does not close `stream`).
208208
"""
209209
function StringEncoder(stream::IO, to::Encoding, from::Encoding=enc"UTF-8")
210210
cd = iconv_open(String(to), String(from))
211-
inbuf = Vector{UInt8}(BUFSIZE)
212-
outbuf = Vector{UInt8}(BUFSIZE)
211+
inbuf = Vector{UInt8}(undef, BUFSIZE)
212+
outbuf = Vector{UInt8}(undef, BUFSIZE)
213213
s = StringEncoder{typeof(from), typeof(to), typeof(stream)}(stream, false,
214214
cd, inbuf, outbuf,
215215
Ref{Ptr{UInt8}}(pointer(inbuf)), Ref{Ptr{UInt8}}(pointer(outbuf)),
216216
Ref{Csize_t}(0), Ref{Csize_t}(BUFSIZE))
217-
finalizer(s, finalize)
217+
finalizer(finalize, s)
218218
s
219219
end
220220

@@ -223,7 +223,7 @@ StringEncoder(stream::IO, to::AbstractString, from::Encoding=enc"UTF-8") =
223223
StringEncoder(stream::IO, to::AbstractString, from::AbstractString) =
224224
StringEncoder(stream, Encoding(to), Encoding(from))
225225

226-
function show{F, T, S}(io::IO, s::StringEncoder{F, T, S})
226+
function show(io::IO, s::StringEncoder{F, T}) where {F, T}
227227
from = F()
228228
to = T()
229229
print(io, "StringEncoder{$from, $to}($(s.stream))")
@@ -284,13 +284,13 @@ in the input data without raising an error.
284284
"""
285285
function StringDecoder(stream::IO, from::Encoding, to::Encoding=enc"UTF-8")
286286
cd = iconv_open(String(to), String(from))
287-
inbuf = Vector{UInt8}(BUFSIZE)
288-
outbuf = Vector{UInt8}(BUFSIZE)
287+
inbuf = Vector{UInt8}(undef, BUFSIZE)
288+
outbuf = Vector{UInt8}(undef, BUFSIZE)
289289
s = StringDecoder{typeof(from), typeof(to), typeof(stream)}(stream, false,
290290
cd, inbuf, outbuf,
291291
Ref{Ptr{UInt8}}(pointer(inbuf)), Ref{Ptr{UInt8}}(pointer(outbuf)),
292292
Ref{Csize_t}(0), Ref{Csize_t}(BUFSIZE), 0)
293-
finalizer(s, finalize)
293+
finalizer(finalize, s)
294294
s
295295
end
296296

@@ -299,7 +299,7 @@ StringDecoder(stream::IO, from::AbstractString, to::Encoding=enc"UTF-8") =
299299
StringDecoder(stream::IO, from::AbstractString, to::AbstractString) =
300300
StringDecoder(stream, Encoding(from), Encoding(to))
301301

302-
function show{F, T, S}(io::IO, s::StringDecoder{F, T, S})
302+
function show(io::IO, s::StringDecoder{F, T}) where {F, T}
303303
from = F()
304304
to = T()
305305
print(io, "StringDecoder{$from, $to}($(s.stream))")
@@ -384,11 +384,16 @@ specifying the encoding again.
384384
open(fname::AbstractString, enc::Encoding, args...) = wrap_stream(open(fname, args...), enc)
385385

386386
function open(fname::AbstractString, enc::Encoding,
387-
rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::Bool)
388-
if rd && (wr || ff)
389-
throw(ArgumentError("cannot open encoded text files in read and write/append modes at the same time"))
387+
read :: Union{Bool,Nothing} = nothing,
388+
write :: Union{Bool,Nothing} = nothing,
389+
create :: Union{Bool,Nothing} = nothing,
390+
truncate :: Union{Bool,Nothing} = nothing,
391+
append :: Union{Bool,Nothing} = nothing)
392+
if read == true && (write == true || truncate == true || append == true)
393+
throw(ArgumentError("cannot open encoded text files in read and write/truncate/append modes at the same time"))
390394
end
391-
wrap_stream(open(fname, rd, wr, cr, tr, ff), enc)
395+
wrap_stream(open(fname, read=read, write=write, create=create, truncate=truncate, append=append),
396+
enc)
392397
end
393398

394399
function open(fname::AbstractString, enc::Encoding, mode::AbstractString)
@@ -398,25 +403,14 @@ function open(fname::AbstractString, enc::Encoding, mode::AbstractString)
398403
wrap_stream(open(fname, mode), enc)
399404
end
400405

401-
if isdefined(Base, :readstring)
402-
@doc """
403-
readstring(stream::IO, enc::Encoding)
404-
readstring(filename::AbstractString, enc::Encoding)
405-
406-
Methods to read text in character encoding `enc`.
407-
""" ->
408-
Base.readstring(s::IO, enc::Encoding) = readstring(StringDecoder(s, enc))
409-
Base.readstring(filename::AbstractString, enc::Encoding) = open(io->readstring(io, enc), filename)
410-
else # Compatibility with Julia 0.4
411-
@doc """
412-
readall(stream::IO, enc::Encoding)
413-
readall(filename::AbstractString, enc::Encoding)
406+
"""
407+
read(stream::IO, ::Type{String}, enc::Encoding)
408+
read(filename::AbstractString, ::Type{String}, enc::Encoding)
414409
415-
Methods to read text in character encoding `enc`.
416-
""" ->
417-
Base.readall(s::IO, enc::Encoding) = readall(StringDecoder(s, enc))
418-
Base.readall(filename::AbstractString, enc::Encoding) = open(io->readall(io, enc), filename)
419-
end
410+
Methods to read text in character encoding `enc`.
411+
"""
412+
Base.read(s::IO, ::Type{String}, enc::Encoding) = read(StringDecoder(s, enc), String)
413+
Base.read(filename::AbstractString, ::Type{String}, enc::Encoding) = open(io->read(io, String, enc), filename)
420414

421415
"""
422416
readline(stream::IO, enc::Encoding)
@@ -445,30 +439,15 @@ Methods to read text in character encoding `enc`.
445439
readuntil(s::IO, enc::Encoding, delim) = readuntil(StringDecoder(s, enc), delim)
446440
readuntil(filename::AbstractString, enc::Encoding, delim) = open(io->readuntil(io, enc, delim), filename)
447441

448-
if VERSION >= v"0.6.0-dev.2467"
449-
"""
450-
eachline(stream::IO, enc::Encoding; chomp=true)
451-
eachline(filename::AbstractString, enc::Encoding; chomp=true)
452-
453-
Methods to read text in character encoding `enc`. Decoding is performed on the fly.
454-
"""
455-
eachline(s::IO, enc::Encoding; chomp=true) = eachline(StringDecoder(s, enc); chomp=true)
456-
function eachline(filename::AbstractString, enc::Encoding; chomp=true)
457-
s = open(filename, enc)
458-
EachLine(s, ondone=()->close(s), chomp=chomp)
459-
end
460-
else
461-
"""
462-
eachline(stream::IO, enc::Encoding)
463-
eachline(filename::AbstractString, enc::Encoding)
464-
465-
Methods to read text in character encoding `enc`. Decoding is performed on the fly.
466-
"""
467-
eachline(s::IO, enc::Encoding) = eachline(StringDecoder(s, enc))
468-
function eachline(filename::AbstractString, enc::Encoding)
469-
s = open(filename, enc)
470-
EachLine(s, ()->close(s))
471-
end
442+
"""
443+
eachline(stream::IO, enc::Encoding; keep=false)
444+
eachline(filename::AbstractString, enc::Encoding; keep=false)
445+
Methods to read text in character encoding `enc`. Decoding is performed on the fly.
446+
"""
447+
eachline(s::IO, enc::Encoding; keep=false) = eachline(StringDecoder(s, enc); keep=false)
448+
function eachline(filename::AbstractString, enc::Encoding; keep=false)
449+
s = open(filename, enc)
450+
Base.EachLine(s, ondone=()->close(s), keep=keep)
472451
end
473452

474453

@@ -481,11 +460,8 @@ Convert an array of bytes `a` representing text in encoding `enc` to a string of
481460
By default, a `String` is returned.
482461
483462
`enc` can be specified either as a string or as an `Encoding` object.
484-
485-
Note that some implementations (notably the Windows one) may accept invalid sequences
486-
in the input data without raising an error.
487463
"""
488-
function decode{T<:AbstractString}(::Type{T}, a::Vector{UInt8}, enc::Encoding)
464+
function decode(::Type{T}, a::Vector{UInt8}, enc::Encoding) where {T<:AbstractString}
489465
b = IOBuffer(a)
490466
try
491467
T(read(StringDecoder(b, enc, encoding(T))))
@@ -494,7 +470,8 @@ function decode{T<:AbstractString}(::Type{T}, a::Vector{UInt8}, enc::Encoding)
494470
end
495471
end
496472

497-
decode{T<:AbstractString}(::Type{T}, a::Vector{UInt8}, enc::AbstractString) = decode(T, a, Encoding(enc))
473+
decode(::Type{T}, a::Vector{UInt8}, enc::AbstractString) where {T<:AbstractString} =
474+
decode(T, a, Encoding(enc))
498475

499476
decode(a::Vector{UInt8}, enc::AbstractString) = decode(String, a, Encoding(enc))
500477
decode(a::Vector{UInt8}, enc::Union{AbstractString, Encoding}) = decode(String, a, enc)
@@ -517,8 +494,8 @@ encode(s::AbstractString, enc::AbstractString) = encode(s, Encoding(enc))
517494

518495
function test_encoding(enc::String)
519496
# We assume that an encoding is supported if it's possible to convert from it to UTF-8:
520-
cd = ccall((iconv_open_s, libiconv), Ptr{Void}, (Cstring, Cstring), enc, "UTF-8")
521-
if cd == Ptr{Void}(-1)
497+
cd = ccall((iconv_open_s, libiconv), Ptr{Nothing}, (Cstring, Cstring), enc, "UTF-8")
498+
if cd == Ptr{Nothing}(-1)
522499
return false
523500
else
524501
iconv_close(cd)

‎src/encodings.jl

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,22 @@ module Encodings
77
import Base: show, print, convert
88
export encoding, encodings_list, Encoding, @enc_str
99

10-
using LegacyStrings: ASCIIString, UTF8String, UTF16String, UTF32String
11-
12-
immutable Encoding{enc} end
10+
struct Encoding{enc} end
1311

1412
Encoding(s) = Encoding{Symbol(s)}()
1513
macro enc_str(s)
1614
:(Encoding{$(Expr(:quote, Symbol(s)))}())
1715
end
1816

19-
convert{T<:AbstractString, enc}(::Type{T}, ::Encoding{enc}) = string(enc)
17+
convert(::Type{T}, ::Encoding{enc}) where {T<:AbstractString, enc} = string(enc)
18+
Base.String(::Encoding{enc}) where {enc} = string(enc)
2019

21-
show{enc}(io::IO, ::Encoding{enc}) = print(io, string(enc), " string encoding")
22-
print{enc}(io::IO, ::Encoding{enc}) = print(io, enc)
20+
show(io::IO, ::Encoding{enc}) where {enc} = print(io, string(enc), " string encoding")
21+
print(io::IO, ::Encoding{enc}) where {enc} = print(io, enc)
2322

2423

2524
## Get the encoding used by a string type
2625
encoding(::Type{String}) = enc"UTF-8"
27-
encoding(::Type{ASCIIString}) = enc"ASCII"
28-
encoding(::Type{UTF8String}) = enc"UTF-8"
29-
30-
if ENDIAN_BOM == 0x04030201
31-
encoding(::Type{UTF16String}) = enc"UTF-16LE"
32-
encoding(::Type{UTF32String}) = enc"UTF-32LE"
33-
else
34-
encoding(::Type{UTF16String}) = enc"UTF-16BE"
35-
encoding(::Type{UTF32String}) = enc"UTF-32BE"
36-
end
3726

3827
encodings_list = ["1026", "1046", "1047", "10646-1:1993", "10646-1:1993/UCS4",
3928
"437", "500", "500V1", "850", "851", "852", "855", "856", "857",

‎test/runtests.jl

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,15 @@
11
using Base.Test
2-
using Compat: readstring
3-
using LegacyStrings: UTF8String, UTF16String, UTF32String
42
using StringEncodings
53

4+
# Test round-trip to Unicode formats
65
for s in ("", "\0", "a", "café crème",
76
"a"^(StringEncodings.BUFSIZE-1) * "€ with an incomplete codepoint between two input buffer fills",
87
"a string € チャネルパートナーの選択",
9-
"a string \0€ チャネルパ\0\0トナーの選択 with embedded and trailing nuls\0")
10-
# Test round-trip to Unicode formats, checking against pure-Julia implementation
11-
for (T, nullen) in ((UTF8String, 0), (UTF16String, 2), (UTF32String, 4))
12-
enc = StringEncodings.encoding(T)
13-
a = reinterpret(UInt8, T(s).data)
14-
# Adjust for explicit \0 only for .data on UTF16String/UTF32String
15-
a = a[1:end - nullen]
16-
@test decode(a, enc) == s
17-
@test decode(UTF16String, a, enc) == s
18-
@test decode(UTF32String, a, enc) == s
19-
@test decode(encode(s, enc), enc) == s
20-
end
8+
"a string \0€ チャネルパ\0\0トナーの選択 with embedded and trailing nuls\0"),
9+
enc in (enc"UTF-8", enc"UTF-16", enc"UTF-16LE", enc"UTF-16BE", enc"UTF-32")
10+
a = encode(s, enc)
11+
@test decode(a, enc) == s
12+
@test decode(encode(s, enc), enc) == s
2113
end
2214

2315
# Test a few non-Unicode encodings
@@ -60,7 +52,7 @@ let s = "a string チャネルパートナーの選択", a = Vector{UInt8}(s)
6052
b = IOBuffer(encode(s, "UTF-16LE")[1:19])
6153
p = StringDecoder(b, "UTF-16LE")
6254
@test string(p) == "StringDecoder{UTF-16LE, UTF-8}($(string(b)))"
63-
@test readstring(p) == s[1:9]
55+
@test read(p, String) == s[1:9]
6456
@test_throws IncompleteSequenceError close(p)
6557
# Test that closed pipe behaves correctly even after an error
6658
@test eof(p)
@@ -90,9 +82,9 @@ catch err
9082
"Byte sequence 0xc3a9e282ac is invalid in source encoding or cannot be represented in target encoding"
9183
end
9284

93-
@test_throws InvalidSequenceError decode(b"qwertyé€", "ASCII")
85+
@test_throws InvalidSequenceError decode(Vector{UInt8}("qwertyé€"), "ASCII")
9486
try
95-
decode(b"qwertyé€", "ASCII")
87+
decode(Vector{UInt8}("qwertyé€"), "ASCII")
9688
catch err
9789
io = IOBuffer()
9890
showerror(io, err)
@@ -116,7 +108,7 @@ mktemp() do p, io
116108
s = "café crème"
117109
write(io, encode(s, "CP1252"))
118110
close(io)
119-
@test readstring(p, enc"CP1252") == s
111+
@test read(p, String, enc"CP1252") == s
120112
end
121113

122114
@test_throws InvalidEncodingError p = StringEncoder(IOBuffer(), "nonexistent_encoding")
@@ -149,14 +141,14 @@ mktemp() do path, io
149141
write(io, s)
150142
end
151143

152-
@test readstring(path, enc"ISO-2022-JP") == s
153-
@test open(io->readstring(io, enc"ISO-2022-JP"), path) == s
154-
@test open(readstring, path, enc"ISO-2022-JP") == s
144+
@test read(path, String, enc"ISO-2022-JP") == s
145+
@test open(io->read(io, String, enc"ISO-2022-JP"), path) == s
146+
@test open(io->read(io, String), path, enc"ISO-2022-JP") == s
155147

156-
@test readuntil(path, enc"ISO-2022-JP", '\0') == "a string \0"
157-
@test open(io->readuntil(io, enc"ISO-2022-JP", '\0'), path) == "a string \0"
158-
@test readuntil(path, enc"ISO-2022-JP", "チャ") == "a string \0チャ"
159-
@test open(io->readuntil(io, enc"ISO-2022-JP", "チャ"), path) == "a string \0チャ"
148+
@test readuntil(path, enc"ISO-2022-JP", '\0') == "a string "
149+
@test open(io->readuntil(io, enc"ISO-2022-JP", '\0'), path) == "a string "
150+
@test readuntil(path, enc"ISO-2022-JP", "チャ") == "a string \0"
151+
@test open(io->readuntil(io, enc"ISO-2022-JP", "チャ"), path) == "a string \0"
160152

161153
if VERSION >= v"0.6.0-dev.2467"
162154
@test readline(path, enc"ISO-2022-JP") == split(s, '\n')[1]
@@ -181,11 +173,11 @@ mktemp() do path, io
181173
# Test alternative syntaxes for open()
182174
open(path, enc"ISO-2022-JP", "r") do io
183175
@test isreadable(io) && !iswritable(io)
184-
@test readstring(io) == s
176+
@test read(io, String) == s
185177
end
186178
open(path, enc"ISO-2022-JP", true, false, false, false, false) do io
187179
@test isreadable(io) && !iswritable(io)
188-
@test readstring(io) == s
180+
@test read(io, String) == s
189181
end
190182
@test_throws ArgumentError open(path, enc"ISO-2022-JP", "r+")
191183
@test_throws ArgumentError open(path, enc"ISO-2022-JP", "w+")

0 commit comments

Comments
 (0)
Please sign in to comment.