Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module Unicode
import Base: show, ==, hash, string, Symbol, isless, length, eltype,
convert, isvalid, ismalformed, isoverlong, iterate,
AnnotatedString, AnnotatedChar, annotated_chartransform,
@assume_effects, annotations
@assume_effects, annotations, is_overlong_enc

# whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff

Expand Down Expand Up @@ -262,17 +262,15 @@ julia> textwidth('⛵')
2
```
"""
function textwidth(c::AbstractChar)
ismalformed(c) && return 1
i = codepoint(c)
i < 0x7f && return Int(i >= 0x20) # ASCII fast path
Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
end
textwidth(c::AbstractChar) = textwidth(Char(c)::Char)

function textwidth(c::Char)
b = bswap(reinterpret(UInt32, c)) # from isascii(c)
u = reinterpret(UInt32, c)
b = bswap(u) # from isascii(c)
b < 0x7f && return Int(b >= 0x20) # ASCII fast path
ismalformed(c) && return 1
# We can't know a priori how terminals will render invalid UTF8 chars,
# so we conservatively decide a width of 1.
(ismalformed(c) || is_overlong_enc(u)) && return 1
Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
end

Expand Down
2 changes: 2 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
@test textwidth(c^3) == w*3
@test w == @invoke textwidth(c::AbstractChar)
end
@test textwidth('\xc0\xa0') == 1 # overlong
@test textwidth('\xf0\x80\x80') == 1 # malformed
for i in 0x00:0x7f # test all ASCII chars (which have fast path)
w = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
c = Char(i)
Expand Down