Skip to content

Commit

Permalink
move codelen and first_utf8_byte to Char.jl (#28894)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored and JeffBezanson committed Sep 12, 2018
1 parent 5e4ca86 commit 62de472
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 7 deletions.
3 changes: 3 additions & 0 deletions base/char.jl
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y)
hash(x::Char, h::UInt) =
hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) UInt64(h))

first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8
codelen(c::Char) = 4 - (trailing_zeros(0xff000000 | reinterpret(UInt32, c)) >> 3)

This comment has been minimized.

Copy link
@Keno

Keno Sep 12, 2018

Member

Didn't @StefanKarpinski just add an overload to ncodeunits that does this?

This comment has been minimized.

Copy link
@StefanKarpinski

# fallbacks:
isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
Expand Down
7 changes: 0 additions & 7 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -293,17 +293,10 @@ length(s::String) = length(s, 1, ncodeunits(s), ncodeunits(s))
end
end

# TODO: delete or move to char.jl
first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8

## overload methods for efficiency ##

isvalid(s::String, i::Int) = checkbounds(Bool, s, i) && thisind(s, i) == i

# UTF-8 encoding length of a character
# TODO: delete or move to char.jl
codelen(c::Char) = 4 - (trailing_zeros(0xff000000 | reinterpret(UInt32, c)) >> 3)

"""
repeat(c::AbstractChar, r::Integer) -> String
Expand Down

0 comments on commit 62de472

Please sign in to comment.