Skip to content

Commit

Permalink
Fix unnecessary CharString use
Browse files Browse the repository at this point in the history
Instead of converting first to Vector{Char} and then to UTF-8, convert
directly to UTF-8. Also fixes future breakage by JuliaLang/julia#7016
  • Loading branch information
simonster committed May 29, 2014
1 parent 2d64b13 commit 67f2c4f
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions src/MAT_v5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -244,19 +244,26 @@ function read_string(f::IO, swap_bytes::Bool, dimensions::Vector{Int32})
# However, the first 256 Unicode code points are derived from ISO-8859-1, so UCS-2
# is a superset of 2-byte ISO-8859-1.
chars = read_bswap(f, swap_bytes, Uint16, int(div(nbytes, 2)))
for i = 1:length(chars)
if chars[i] > 255
# Newer versions of MATLAB seem to write some mongrel UTF-8...
chars[i] = bytestring(reverse(reinterpret(Uint8, [chars[i]])))[1]
bufs = [IOBuffer() for i = 1:dimensions[1]]
i = 1
while i <= length(chars)
for j = 1:dimensions[1]
char = convert(Char, chars[i])
if char > 255
# Newer versions of MATLAB seem to write some mongrel UTF-8...
char = bytestring([uint8(chars[i] >> 8), uint8(chars[i])])[1]
end
write(bufs[j], char)
i += 1
end
end
if dimensions[1] <= 1
data = bytestring(CharString(chars))

if dimensions[1] == 0
data = ""
elseif dimensions[1] == 1
data = bytestring(bufs[1])
else
data = Array(ByteString, dimensions[1])
for i = 1:dimensions[1]
data[i] = rstrip(bytestring(CharString(chars[i:dimensions[1]:end])))
end
data = [rstrip(bytestring(buf)) for buf in bufs]
end
else
error("Unsupported string type")
Expand Down

0 comments on commit 67f2c4f

Please sign in to comment.