From cb4bc8eeb141be692a8b5801bc5aaed8f17c4fd4 Mon Sep 17 00:00:00 2001 From: Stefan Karpinski Date: Wed, 12 Sep 2018 14:25:02 -0400 Subject: [PATCH] ncodeunits(c::Char): fast equivalent of ncodeunits(string(c)) --- base/char.jl | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/base/char.jl b/base/char.jl index 2245bbcbcd305..35123b2f3ebe3 100644 --- a/base/char.jl +++ b/base/char.jl @@ -50,6 +50,15 @@ Char (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x)) (::Type{T})(x::T) where {T<:AbstractChar} = x +""" + ncodeunits(c::Char) -> Int + +Return the number of code units required to encode a character as UTF-8. +This is the number of bytes which will be printed if the character is written +to an output stream, or `ncodeunits(string(c))` but computed efficiently. +""" +ncodeunits(c::Char) = max(1, 4 - (trailing_zeros(reinterpret(UInt32, c)) >> 3)) + """ codepoint(c::AbstractChar) -> UInt32