Skip to content

Commit 89061b2

Browse files
committed
switch to utf8proc's portable, up-to-date, upper/lowercase functions (fixes JuliaLang#11471)
1 parent 7d8178c commit 89061b2

File tree

4 files changed

+13
-7
lines changed

4 files changed

+13
-7
lines changed

base/string.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -793,10 +793,6 @@ write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
793793
sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)
794794

795795
## uppercase and lowercase transformations ##
796-
797-
uppercase(c::Char) = convert(Char, ccall(:towupper, Cwchar_t, (Cwchar_t,), c))
798-
lowercase(c::Char) = convert(Char, ccall(:towlower, Cwchar_t, (Cwchar_t,), c))
799-
800796
uppercase(s::AbstractString) = map(uppercase, s)
801797
lowercase(s::AbstractString) = map(lowercase, s)
802798

base/utf8proc.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Various Unicode functionality from the utf8proc library
44
module UTF8proc
55

6-
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid
6+
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase
77

88
export isgraphemebreak
99

@@ -121,6 +121,12 @@ end
121121

122122
charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
123123

124+
# faster x+y that does no overflow checking
125+
fastplus(x::Char, y::UInt32) = reinterpret(Char, reinterpret(UInt32, x) + y)
126+
127+
lowercase(c::Char) = isascii(c) ? ('A' <= c <= 'Z' ? fastplus(c,0x00000020) : c) : ccall(:utf8proc_tolower, Char, (UInt32,), c)
128+
uppercase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? fastplus(c,0xffffffe0) : c) : ccall(:utf8proc_toupper, Char, (UInt32,), c)
129+
124130
############################################################################
125131

126132
# returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category

deps/utf8proc.version

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
UTF8PROC_BRANCH=v1.2
2-
UTF8PROC_SHA1=e1fdad0ca9dc518b429439b6f4eac546a1bdd0de
1+
UTF8PROC_BRANCH=master
2+
UTF8PROC_SHA1=d75985cf09fd753047952beaafa691645c47724f

test/strings.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,10 @@ end
673673
@test lowercase("AbC") == "abc"
674674
@test lowercase('A') == 'a'
675675
@test lowercase('a') == 'a'
676+
@test uppercase('α') == '\u0391'
677+
@test lowercase('Δ') == 'δ'
678+
@test lowercase('\U118bf') == '\U118df'
679+
@test uppercase('\U1044d') == '\U10425'
676680
@test ucfirst("Abc") == "Abc"
677681
@test ucfirst("abc") == "Abc"
678682
@test lcfirst("ABC") == "aBC"

0 commit comments

Comments
 (0)