Skip to content

Commit

Permalink
switch to utf8proc's portable, up-to-date, upper/lowercase functions (f…
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed May 31, 2015
1 parent 7d8178c commit becc6eb
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 8 deletions.
4 changes: 0 additions & 4 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -793,10 +793,6 @@ write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)

## uppercase and lowercase transformations ##

uppercase(c::Char) = convert(Char, ccall(:towupper, Cwchar_t, (Cwchar_t,), c))
lowercase(c::Char) = convert(Char, ccall(:towlower, Cwchar_t, (Cwchar_t,), c))

uppercase(s::AbstractString) = map(uppercase, s)
lowercase(s::AbstractString) = map(lowercase, s)

Expand Down
8 changes: 7 additions & 1 deletion base/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Various Unicode functionality from the utf8proc library
module UTF8proc

import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase

export isgraphemebreak

Expand Down Expand Up @@ -121,6 +121,12 @@ end

charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))

# faster x+y that does no overflow checking
fastplus(x::Char, y::UInt32) = reinterpret(Char, reinterpret(UInt32, x) + y)

lowercase(c::Char) = isascii(c) ? ('A' <= c <= 'Z' ? fastplus(c,0x00000020) : c) : ccall(:utf8proc_tolower, Char, (UInt32,), c)
uppercase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? fastplus(c,0xffffffe0) : c) : ccall(:utf8proc_toupper, Char, (UInt32,), c)

############################################################################

# returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category
Expand Down
2 changes: 1 addition & 1 deletion deps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,7 @@ UTF8PROC_OBJ_HEADER = $(build_includedir)/utf8proc.h
UTF8PROC_OBJ_TARGET = $(UTF8PROC_OBJ_LIB) $(UTF8PROC_OBJ_HEADER)

$(UTF8PROC_SRC_TARGET): $(UTF8PROC_SRC_DIR)/Makefile
$(MAKE) -C $(UTF8PROC_SRC_DIR) cc="$(CC) -O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
$(MAKE) -C $(UTF8PROC_SRC_DIR) CC="$(CC)" CFLAGS="-O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
touch -c $@
$(UTF8PROC_SRC_DIR)/checked: $(UTF8PROC_SRC_TARGET)
ifeq ($(OS),$(BUILD_OS))
Expand Down
4 changes: 2 additions & 2 deletions deps/utf8proc.version
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
UTF8PROC_BRANCH=v1.2
UTF8PROC_SHA1=e1fdad0ca9dc518b429439b6f4eac546a1bdd0de
UTF8PROC_BRANCH=1.3-dev1
UTF8PROC_SHA1=f7219d516efe7496737d041f22b0ba567174fb16
4 changes: 4 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,10 @@ end
@test lowercase("AbC") == "abc"
@test lowercase('A') == 'a'
@test lowercase('a') == 'a'
@test uppercase('α') == '\u0391'
@test lowercase('Δ') == 'δ'
@test lowercase('\U118bf') == '\U118df'
@test uppercase('\U1044d') == '\U10425'
@test ucfirst("Abc") == "Abc"
@test ucfirst("abc") == "Abc"
@test lcfirst("ABC") == "aBC"
Expand Down

0 comments on commit becc6eb

Please sign in to comment.