Skip to content

Commit

Permalink
switch to utf8proc's portable, up-to-date, upper/lowercase functions (f…
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj authored and tkelman committed Jun 6, 2015
1 parent 67500d5 commit 982ba88
Show file tree
Hide file tree
Showing 9 changed files with 16 additions and 10 deletions.
4 changes: 0 additions & 4 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -793,10 +793,6 @@ write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))
sizeof(s::RopeString) = sizeof(s.head) + sizeof(s.tail)

## uppercase and lowercase transformations ##

uppercase(c::Char) = convert(Char, ccall(:towupper, Cwchar_t, (Cwchar_t,), c))
lowercase(c::Char) = convert(Char, ccall(:towlower, Cwchar_t, (Cwchar_t,), c))

uppercase(s::AbstractString) = map(uppercase, s)
lowercase(s::AbstractString) = map(lowercase, s)

Expand Down
8 changes: 7 additions & 1 deletion base/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Various Unicode functionality from the utf8proc library
module UTF8proc

import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid
import Base: show, showcompact, ==, hash, string, symbol, isless, length, eltype, start, next, done, convert, isvalid, lowercase, uppercase

export isgraphemebreak

Expand Down Expand Up @@ -121,6 +121,12 @@ end

charwidth(c::Char) = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))

# faster x+y that does no overflow checking
fastplus(x::Char, y::UInt32) = reinterpret(Char, reinterpret(UInt32, x) + y)

lowercase(c::Char) = isascii(c) ? ('A' <= c <= 'Z' ? fastplus(c,0x00000020) : c) : ccall(:utf8proc_tolower, Char, (UInt32,), c)
uppercase(c::Char) = isascii(c) ? ('a' <= c <= 'z' ? fastplus(c,0xffffffe0) : c) : ccall(:utf8proc_toupper, Char, (UInt32,), c)

############################################################################

# returns UTF8PROC_CATEGORY code in 0:30 giving Unicode category
Expand Down
2 changes: 1 addition & 1 deletion deps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1436,7 +1436,7 @@ UTF8PROC_OBJ_HEADER = $(build_includedir)/utf8proc.h
UTF8PROC_OBJ_TARGET = $(UTF8PROC_OBJ_LIB) $(UTF8PROC_OBJ_HEADER)

$(UTF8PROC_SRC_TARGET): $(UTF8PROC_SRC_DIR)/Makefile
$(MAKE) -C $(UTF8PROC_SRC_DIR) cc="$(CC) -O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
$(MAKE) -C $(UTF8PROC_SRC_DIR) CC="$(CC)" CFLAGS="-O2 -std=c99 $(fPIC) -DUTF8PROC_EXPORTS $(DEPS_CFLAGS)" AR="$(AR)" libutf8proc.a
touch -c $@
$(UTF8PROC_SRC_DIR)/checked: $(UTF8PROC_SRC_TARGET)
ifeq ($(OS),$(BUILD_OS))
Expand Down
1 change: 1 addition & 0 deletions deps/checksums/utf8proc-1.3-dev1.tar.gz/md5
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
e61de478d2a858586671dfd56477281e
1 change: 1 addition & 0 deletions deps/checksums/utf8proc-1.3-dev1.tar.gz/sha512
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
167d3a7779335d7ac0103431c4e7b1d9dc4f31836a7704524ea6c1c29d447012ec49f122247f9bf5d445500d7ddd7af5403ca260da05ffac9dba114e32afac07
1 change: 0 additions & 1 deletion deps/checksums/utf8proc-v1.2.tar.gz/md5

This file was deleted.

1 change: 0 additions & 1 deletion deps/checksums/utf8proc-v1.2.tar.gz/sha512

This file was deleted.

4 changes: 2 additions & 2 deletions deps/utf8proc.version
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
UTF8PROC_BRANCH=v1.2
UTF8PROC_SHA1=e1fdad0ca9dc518b429439b6f4eac546a1bdd0de
UTF8PROC_BRANCH=1.3-dev1
UTF8PROC_SHA1=f7219d516efe7496737d041f22b0ba567174fb16
4 changes: 4 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,10 @@ end
@test lowercase("AbC") == "abc"
@test lowercase('A') == 'a'
@test lowercase('a') == 'a'
@test uppercase('α') == '\u0391'
@test lowercase('Δ') == 'δ'
@test lowercase('\U118bf') == '\U118df'
@test uppercase('\U1044d') == '\U10425'
@test ucfirst("Abc") == "Abc"
@test ucfirst("abc") == "Abc"
@test lcfirst("ABC") == "aBC"
Expand Down

0 comments on commit 982ba88

Please sign in to comment.