JuliaLang · stevengj · Apr 24, 2015 · Apr 24, 2015
diff --git a/base/string.jl b/base/string.jl
@@ -541,8 +541,6 @@ startswith(a::Array{UInt8,1}, b::Array{UInt8,1}) =
 ## character column width function ##
 
 strwidth(s::AbstractString) = (w=0; for c in s; w += charwidth(c); end; w)
-strwidth(s::ByteString) = Int(ccall(:u8_strwidth, Csize_t, (Ptr{UInt8},), s.data))
-# TODO: implement and use u8_strnwidth that takes a length argument
 
 isascii(c::Char) = c < Char(0x80)
 isascii(s::AbstractString) = all(isascii, s)

diff --git a/base/utf8.jl b/base/utf8.jl
@@ -37,7 +37,8 @@ function endof(s::UTF8String)
     end
     i
 end
-length(s::UTF8String) = Int(ccall(:u8_strlen, Csize_t, (Ptr{UInt8},), s.data))
+length(s::UTF8String) = Int(ccall(:u8_charnum, Csize_t, (Ptr{UInt8}, Csize_t),
+                                  s.data, length(s.data)))
 
 function next(s::UTF8String, i::Int)
     # potentially faster version

diff --git a/base/utf8proc.jl b/base/utf8proc.jl
@@ -46,7 +46,6 @@ const UTF8PROC_CATEGORY_CF = 27
 const UTF8PROC_CATEGORY_CS = 28
 const UTF8PROC_CATEGORY_CO = 29
 
-const UTF8PROC_NULLTERM  = (1<<0)
 const UTF8PROC_STABLE    = (1<<1)
 const UTF8PROC_COMPAT    = (1<<2)
 const UTF8PROC_COMPOSE   = (1<<3)
@@ -64,22 +63,21 @@ const UTF8PROC_STRIPMARK = (1<<13)
 
 ############################################################################
 
-let
-    const p = Array(Ptr{UInt8}, 1)
-    global utf8proc_map
-    function utf8proc_map(s::AbstractString, flags::Integer)
-        result = ccall(:utf8proc_map, Cssize_t,
-                       (Ptr{UInt8}, Cssize_t, Ptr{Ptr{UInt8}}, Cint),
-                       s, 0, p, flags | UTF8PROC_NULLTERM)
-        result < 0 && error(bytestring(ccall(:utf8proc_errmsg, Ptr{UInt8},
-                                             (Cssize_t,), result)))
-        a = ccall(:jl_ptr_to_array_1d, Vector{UInt8},
-                  (Any, Ptr{UInt8}, Csize_t, Cint),
-                  Vector{UInt8}, p[1], result, true)
-        ccall(:jl_array_to_string, Any, (Any,), a)::ByteString
-    end
+function utf8proc_map(s::ByteString, flags::Integer)
+    p = Ref{Ptr{UInt8}}()
+    result = ccall(:utf8proc_map, Cssize_t,
+                   (Ptr{UInt8}, Cssize_t, Ref{Ptr{UInt8}}, Cint),
+                   s, sizeof(s), p, flags)
+    result < 0 && error(bytestring(ccall(:utf8proc_errmsg, Ptr{UInt8},
+                                         (Cssize_t,), result)))
+    a = ccall(:jl_ptr_to_array_1d, Vector{UInt8},
+              (Any, Ptr{UInt8}, Csize_t, Cint),
+              Vector{UInt8}, p[], result, true)
+    ccall(:jl_array_to_string, Any, (Any,), a)::ByteString
 end
 
+utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(bytestring(s), flags)
+
 function normalize_string(s::AbstractString; stable::Bool=false, compat::Bool=false, compose::Bool=true, decompose::Bool=false, stripignore::Bool=false, rejectna::Bool=false, newline2ls::Bool=false, newline2ps::Bool=false, newline2lf::Bool=false, stripcc::Bool=false, casefold::Bool=false, lump::Bool=false, stripmark::Bool=false)
     flags = 0
     stable && (flags = flags | UTF8PROC_STABLE)

diff --git a/test/unicode.jl b/test/unicode.jl
@@ -129,3 +129,8 @@ end
 
 # up-to-date character widths (#3721, #6939)
 @test charwidth('\U1f355') == strwidth("\U1f355") == strwidth(utf16("\U1f355")) == strwidth("\U1f355\u0302") == strwidth(utf16("\U1f355\u0302")) == 2
+
+# handling of embedded NUL chars (#10958)
+@test length("\0w") == length("\0α") == 2
+@test strwidth("\0w") == strwidth("\0α") == 1
+@test normalize_string("\0W", casefold=true) == "\0w"