2
2
3
3
module StringEncodings
4
4
5
+ using Libdl
6
+
5
7
# Load in `deps.jl`, complaining if it does not exist
6
8
const depsjl_path = joinpath (dirname (@__FILE__ ), " .." , " deps" , " deps.jl" )
7
9
if ! isfile (depsjl_path)
@@ -16,11 +18,9 @@ function __init__()
16
18
end
17
19
18
20
using Base. Libc: errno, strerror, E2BIG, EINVAL, EILSEQ
19
- using Compat: @compat
20
21
21
22
import Base: close, eachline, eof, flush, isreadable, iswritable,
22
- open, readline, readlines, readuntil, show, write
23
- import Compat: read
23
+ open, readline, readlines, readuntil, show, write, read
24
24
25
25
export StringEncoder, StringDecoder, encode, decode, encodings
26
26
export StringEncodingError, OutputBufferError, IConvError
@@ -30,59 +30,59 @@ include("encodings.jl")
30
30
using StringEncodings. Encodings
31
31
export encoding, encodings_list, Encoding, @enc_str
32
32
33
- @compat abstract type StringEncodingError end
33
+ abstract type StringEncodingError end
34
34
35
35
# Specified encodings or the combination are not supported by iconv
36
- type InvalidEncodingError <: StringEncodingError
36
+ struct InvalidEncodingError <: StringEncodingError
37
37
args:: Tuple{String, String}
38
38
end
39
39
InvalidEncodingError (from, to) = InvalidEncodingError ((from, to))
40
40
message (:: Type{InvalidEncodingError} ) = " Conversion from <<1>> to <<2>> not supported by iconv implementation, check that specified encodings are correct"
41
41
42
42
# Encountered invalid byte sequence
43
- type InvalidSequenceError <: StringEncodingError
43
+ struct InvalidSequenceError <: StringEncodingError
44
44
args:: Tuple{String}
45
45
end
46
46
InvalidSequenceError (seq:: Vector{UInt8} ) = InvalidSequenceError ((bytes2hex (seq),))
47
47
message (:: Type{InvalidSequenceError} ) = " Byte sequence 0x<<1>> is invalid in source encoding or cannot be represented in target encoding"
48
48
49
- type IConvError <: StringEncodingError
49
+ struct IConvError <: StringEncodingError
50
50
args:: Tuple{String, Int, String}
51
51
end
52
52
IConvError (func:: String ) = IConvError ((func, errno (), strerror (errno ())))
53
53
message (:: Type{IConvError} ) = " <<1>>: <<2>> (<<3>>)"
54
54
55
55
# Input ended with incomplete byte sequence
56
- type IncompleteSequenceError <: StringEncodingError ; end
56
+ struct IncompleteSequenceError <: StringEncodingError ; end
57
57
message (:: Type{IncompleteSequenceError} ) = " Incomplete byte sequence at end of input"
58
58
59
- type OutputBufferError <: StringEncodingError ; end
59
+ struct OutputBufferError <: StringEncodingError ; end
60
60
message (:: Type{OutputBufferError} ) = " Ran out of space in the output buffer"
61
61
62
62
function show (io:: IO , exc:: StringEncodingError )
63
63
str = message (typeof (exc))
64
64
for i = 1 : length (exc. args)
65
- str = replace (str, " <<$i >>" , exc. args[i])
65
+ str = replace (str, " <<$i >>" => exc. args[i])
66
66
end
67
67
print (io, str)
68
68
end
69
69
70
- show {T<:Union{IncompleteSequenceError,OutputBufferError}} (io:: IO , exc:: T ) =
70
+ show (io:: IO , exc:: T ) where {T <: Union{IncompleteSequenceError,OutputBufferError} } =
71
71
print (io, message (T))
72
72
73
73
74
74
# # iconv wrappers
75
75
76
- function iconv_close (cd:: Ptr{Void } )
76
+ function iconv_close (cd:: Ptr{Nothing } )
77
77
if cd != C_NULL
78
- ccall ((iconv_close_s, libiconv), Cint, (Ptr{Void },), cd) == 0 ||
78
+ ccall ((iconv_close_s, libiconv), Cint, (Ptr{Nothing },), cd) == 0 ||
79
79
throw (IConvError (" iconv_close" ))
80
80
end
81
81
end
82
82
83
83
function iconv_open (tocode:: String , fromcode:: String )
84
- p = ccall ((iconv_open_s, libiconv), Ptr{Void }, (Cstring, Cstring), tocode, fromcode)
85
- if p != Ptr {Void } (- 1 )
84
+ p = ccall ((iconv_open_s, libiconv), Ptr{Nothing }, (Cstring, Cstring), tocode, fromcode)
85
+ if p != Ptr {Nothing } (- 1 )
86
86
return p
87
87
elseif errno () == EINVAL
88
88
throw (InvalidEncodingError (fromcode, tocode))
96
96
97
97
const BUFSIZE = 100
98
98
99
- type StringEncoder{F<: Encoding , T<: Encoding , S<: IO } <: IO
99
+ mutable struct StringEncoder{F<: Encoding , T<: Encoding , S<: IO } <: IO
100
100
stream:: S
101
101
closestream:: Bool
102
- cd:: Ptr{Void }
102
+ cd:: Ptr{Nothing }
103
103
inbuf:: Vector{UInt8}
104
104
outbuf:: Vector{UInt8}
105
105
inbufptr:: Ref{Ptr{UInt8}}
@@ -108,10 +108,10 @@ type StringEncoder{F<:Encoding, T<:Encoding, S<:IO} <: IO
108
108
outbytesleft:: Ref{Csize_t}
109
109
end
110
110
111
- type StringDecoder{F<: Encoding , T<: Encoding , S<: IO } <: IO
111
+ mutable struct StringDecoder{F<: Encoding , T<: Encoding , S<: IO } <: IO
112
112
stream:: S
113
113
closestream:: Bool
114
- cd:: Ptr{Void }
114
+ cd:: Ptr{Nothing }
115
115
inbuf:: Vector{UInt8}
116
116
outbuf:: Vector{UInt8}
117
117
inbufptr:: Ref{Ptr{UInt8}}
@@ -135,7 +135,7 @@ function finalize(s::Union{StringEncoder, StringDecoder})
135
135
nothing
136
136
end
137
137
138
- function iconv! (cd:: Ptr{Void } , inbuf:: Vector{UInt8} , outbuf:: Vector{UInt8} ,
138
+ function iconv! (cd:: Ptr{Nothing } , inbuf:: Vector{UInt8} , outbuf:: Vector{UInt8} ,
139
139
inbufptr:: Ref{Ptr{UInt8}} , outbufptr:: Ref{Ptr{UInt8}} ,
140
140
inbytesleft:: Ref{Csize_t} , outbytesleft:: Ref{Csize_t} )
141
141
inbufptr[] = pointer (inbuf)
@@ -145,7 +145,7 @@ function iconv!(cd::Ptr{Void}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
145
145
outbytesleft[] = BUFSIZE
146
146
147
147
ret = ccall ((iconv_s, libiconv), Csize_t,
148
- (Ptr{Void }, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
148
+ (Ptr{Nothing }, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
149
149
cd, inbufptr, inbytesleft, outbufptr, outbytesleft)
150
150
151
151
if ret == - 1 % Csize_t
@@ -157,7 +157,7 @@ function iconv!(cd::Ptr{Void}, inbuf::Vector{UInt8}, outbuf::Vector{UInt8},
157
157
# Output buffer is full, or sequence is incomplete:
158
158
# copy remaining bytes to the start of the input buffer for next time
159
159
elseif err == E2BIG || err == EINVAL
160
- copy ! (inbuf, 1 , inbuf, inbytesleft_orig- inbytesleft[]+ 1 , inbytesleft[])
160
+ copyto ! (inbuf, 1 , inbuf, inbytesleft_orig- inbytesleft[]+ 1 , inbytesleft[])
161
161
elseif err == EILSEQ
162
162
seq = inbuf[(inbytesleft_orig- inbytesleft[]+ 1 ): inbytesleft_orig]
163
163
throw (InvalidSequenceError (seq))
@@ -177,7 +177,7 @@ function iconv_reset!(s::Union{StringEncoder, StringDecoder})
177
177
s. outbufptr[] = pointer (s. outbuf)
178
178
s. outbytesleft[] = BUFSIZE
179
179
ret = ccall ((iconv_s, libiconv), Csize_t,
180
- (Ptr{Void }, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
180
+ (Ptr{Nothing }, Ptr{Ptr{UInt8}}, Ref{Csize_t}, Ptr{Ptr{UInt8}}, Ref{Csize_t}),
181
181
s. cd, C_NULL , C_NULL , s. outbufptr, s. outbytesleft)
182
182
183
183
if ret == - 1 % Csize_t
@@ -208,13 +208,13 @@ stream is necessary to complete the encoding (but does not close `stream`).
208
208
"""
209
209
function StringEncoder (stream:: IO , to:: Encoding , from:: Encoding = enc " UTF-8" )
210
210
cd = iconv_open (String (to), String (from))
211
- inbuf = Vector {UInt8} (BUFSIZE)
212
- outbuf = Vector {UInt8} (BUFSIZE)
211
+ inbuf = Vector {UInt8} (undef, BUFSIZE)
212
+ outbuf = Vector {UInt8} (undef, BUFSIZE)
213
213
s = StringEncoder {typeof(from), typeof(to), typeof(stream)} (stream, false ,
214
214
cd, inbuf, outbuf,
215
215
Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
216
216
Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE))
217
- finalizer (s, finalize )
217
+ finalizer (finalize, s )
218
218
s
219
219
end
220
220
@@ -223,7 +223,7 @@ StringEncoder(stream::IO, to::AbstractString, from::Encoding=enc"UTF-8") =
223
223
StringEncoder (stream:: IO , to:: AbstractString , from:: AbstractString ) =
224
224
StringEncoder (stream, Encoding (to), Encoding (from))
225
225
226
- function show {F, T, S} (io:: IO , s:: StringEncoder{F, T, S} )
226
+ function show (io:: IO , s:: StringEncoder{F, T} ) where {F, T}
227
227
from = F ()
228
228
to = T ()
229
229
print (io, " StringEncoder{$from , $to }($(s. stream) )" )
@@ -284,13 +284,13 @@ in the input data without raising an error.
284
284
"""
285
285
function StringDecoder (stream:: IO , from:: Encoding , to:: Encoding = enc " UTF-8" )
286
286
cd = iconv_open (String (to), String (from))
287
- inbuf = Vector {UInt8} (BUFSIZE)
288
- outbuf = Vector {UInt8} (BUFSIZE)
287
+ inbuf = Vector {UInt8} (undef, BUFSIZE)
288
+ outbuf = Vector {UInt8} (undef, BUFSIZE)
289
289
s = StringDecoder {typeof(from), typeof(to), typeof(stream)} (stream, false ,
290
290
cd, inbuf, outbuf,
291
291
Ref {Ptr{UInt8}} (pointer (inbuf)), Ref {Ptr{UInt8}} (pointer (outbuf)),
292
292
Ref {Csize_t} (0 ), Ref {Csize_t} (BUFSIZE), 0 )
293
- finalizer (s, finalize )
293
+ finalizer (finalize, s )
294
294
s
295
295
end
296
296
@@ -299,7 +299,7 @@ StringDecoder(stream::IO, from::AbstractString, to::Encoding=enc"UTF-8") =
299
299
StringDecoder (stream:: IO , from:: AbstractString , to:: AbstractString ) =
300
300
StringDecoder (stream, Encoding (from), Encoding (to))
301
301
302
- function show {F, T, S} (io:: IO , s:: StringDecoder{F, T, S} )
302
+ function show (io:: IO , s:: StringDecoder{F, T} ) where {F, T}
303
303
from = F ()
304
304
to = T ()
305
305
print (io, " StringDecoder{$from , $to }($(s. stream) )" )
@@ -384,11 +384,16 @@ specifying the encoding again.
384
384
open (fname:: AbstractString , enc:: Encoding , args... ) = wrap_stream (open (fname, args... ), enc)
385
385
386
386
function open (fname:: AbstractString , enc:: Encoding ,
387
- rd:: Bool , wr:: Bool , cr:: Bool , tr:: Bool , ff:: Bool )
388
- if rd && (wr || ff)
389
- throw (ArgumentError (" cannot open encoded text files in read and write/append modes at the same time" ))
387
+ read :: Union{Bool,Nothing} = nothing ,
388
+ write :: Union{Bool,Nothing} = nothing ,
389
+ create :: Union{Bool,Nothing} = nothing ,
390
+ truncate :: Union{Bool,Nothing} = nothing ,
391
+ append :: Union{Bool,Nothing} = nothing )
392
+ if read == true && (write == true || truncate == true || append == true )
393
+ throw (ArgumentError (" cannot open encoded text files in read and write/truncate/append modes at the same time" ))
390
394
end
391
- wrap_stream (open (fname, rd, wr, cr, tr, ff), enc)
395
+ wrap_stream (open (fname, read= read, write= write, create= create, truncate= truncate, append= append),
396
+ enc)
392
397
end
393
398
394
399
function open (fname:: AbstractString , enc:: Encoding , mode:: AbstractString )
@@ -398,25 +403,14 @@ function open(fname::AbstractString, enc::Encoding, mode::AbstractString)
398
403
wrap_stream (open (fname, mode), enc)
399
404
end
400
405
401
- if isdefined (Base, :readstring )
402
- @doc """
403
- readstring(stream::IO, enc::Encoding)
404
- readstring(filename::AbstractString, enc::Encoding)
405
-
406
- Methods to read text in character encoding `enc`.
407
- """ ->
408
- Base. readstring (s:: IO , enc:: Encoding ) = readstring (StringDecoder (s, enc))
409
- Base. readstring (filename:: AbstractString , enc:: Encoding ) = open (io-> readstring (io, enc), filename)
410
- else # Compatibility with Julia 0.4
411
- @doc """
412
- readall(stream::IO, enc::Encoding)
413
- readall(filename::AbstractString, enc::Encoding)
406
+ """
407
+ read(stream::IO, ::Type{String}, enc::Encoding)
408
+ read(filename::AbstractString, ::Type{String}, enc::Encoding)
414
409
415
- Methods to read text in character encoding `enc`.
416
- """ ->
417
- Base. readall (s:: IO , enc:: Encoding ) = readall (StringDecoder (s, enc))
418
- Base. readall (filename:: AbstractString , enc:: Encoding ) = open (io-> readall (io, enc), filename)
419
- end
410
+ Methods to read text in character encoding `enc`.
411
+ """
412
+ Base. read (s:: IO , :: Type{String} , enc:: Encoding ) = read (StringDecoder (s, enc), String)
413
+ Base. read (filename:: AbstractString , :: Type{String} , enc:: Encoding ) = open (io-> read (io, String, enc), filename)
420
414
421
415
"""
422
416
readline(stream::IO, enc::Encoding)
@@ -445,30 +439,15 @@ Methods to read text in character encoding `enc`.
445
439
readuntil (s:: IO , enc:: Encoding , delim) = readuntil (StringDecoder (s, enc), delim)
446
440
readuntil (filename:: AbstractString , enc:: Encoding , delim) = open (io-> readuntil (io, enc, delim), filename)
447
441
448
- if VERSION >= v " 0.6.0-dev.2467"
449
- """
450
- eachline(stream::IO, enc::Encoding; chomp=true)
451
- eachline(filename::AbstractString, enc::Encoding; chomp=true)
452
-
453
- Methods to read text in character encoding `enc`. Decoding is performed on the fly.
454
- """
455
- eachline (s:: IO , enc:: Encoding ; chomp= true ) = eachline (StringDecoder (s, enc); chomp= true )
456
- function eachline (filename:: AbstractString , enc:: Encoding ; chomp= true )
457
- s = open (filename, enc)
458
- EachLine (s, ondone= ()-> close (s), chomp= chomp)
459
- end
460
- else
461
- """
462
- eachline(stream::IO, enc::Encoding)
463
- eachline(filename::AbstractString, enc::Encoding)
464
-
465
- Methods to read text in character encoding `enc`. Decoding is performed on the fly.
466
- """
467
- eachline (s:: IO , enc:: Encoding ) = eachline (StringDecoder (s, enc))
468
- function eachline (filename:: AbstractString , enc:: Encoding )
469
- s = open (filename, enc)
470
- EachLine (s, ()-> close (s))
471
- end
442
+ """
443
+ eachline(stream::IO, enc::Encoding; keep=false)
444
+ eachline(filename::AbstractString, enc::Encoding; keep=false)
445
+ Methods to read text in character encoding `enc`. Decoding is performed on the fly.
446
+ """
447
+ eachline (s:: IO , enc:: Encoding ; keep= false ) = eachline (StringDecoder (s, enc); keep= false )
448
+ function eachline (filename:: AbstractString , enc:: Encoding ; keep= false )
449
+ s = open (filename, enc)
450
+ Base. EachLine (s, ondone= ()-> close (s), keep= keep)
472
451
end
473
452
474
453
@@ -481,11 +460,8 @@ Convert an array of bytes `a` representing text in encoding `enc` to a string of
481
460
By default, a `String` is returned.
482
461
483
462
`enc` can be specified either as a string or as an `Encoding` object.
484
-
485
- Note that some implementations (notably the Windows one) may accept invalid sequences
486
- in the input data without raising an error.
487
463
"""
488
- function decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding )
464
+ function decode (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding ) where {T <: AbstractString }
489
465
b = IOBuffer (a)
490
466
try
491
467
T (read (StringDecoder (b, enc, encoding (T))))
@@ -494,7 +470,8 @@ function decode{T<:AbstractString}(::Type{T}, a::Vector{UInt8}, enc::Encoding)
494
470
end
495
471
end
496
472
497
- decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) = decode (T, a, Encoding (enc))
473
+ decode (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) where {T<: AbstractString } =
474
+ decode (T, a, Encoding (enc))
498
475
499
476
decode (a:: Vector{UInt8} , enc:: AbstractString ) = decode (String, a, Encoding (enc))
500
477
decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode (String, a, enc)
@@ -517,8 +494,8 @@ encode(s::AbstractString, enc::AbstractString) = encode(s, Encoding(enc))
517
494
518
495
function test_encoding (enc:: String )
519
496
# We assume that an encoding is supported if it's possible to convert from it to UTF-8:
520
- cd = ccall ((iconv_open_s, libiconv), Ptr{Void }, (Cstring, Cstring), enc, " UTF-8" )
521
- if cd == Ptr {Void } (- 1 )
497
+ cd = ccall ((iconv_open_s, libiconv), Ptr{Nothing }, (Cstring, Cstring), enc, " UTF-8" )
498
+ if cd == Ptr {Nothing } (- 1 )
522
499
return false
523
500
else
524
501
iconv_close (cd)
0 commit comments