Skip to content

Commit

Permalink
Merge pull request #20342 from JuliaLang/jb/unliftcharpredicates
Browse files Browse the repository at this point in the history
deprecate string methods of character predicates, e.g. `isnumber("")`
  • Loading branch information
JeffBezanson authored Feb 1, 2017
2 parents e0ab6a8 + 38715b8 commit 567b7c2
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 80 deletions.
6 changes: 6 additions & 0 deletions base/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1857,4 +1857,10 @@ end)

@deprecate FloatRange{T}(start::T, step, len, den) Base.floatrange(T, start, step, len, den)

for name in ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper", "xdigit")
f = Symbol("is",name)
@eval @deprecate ($f)(s::AbstractString) all($f, s)
end

# End deprecations scheduled for 0.6
2 changes: 1 addition & 1 deletion base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
end
# TODO: avoid this allocation
groupname = SubString(repl, groupstart, prevind(repl, i))
if isnumber(groupname)
if all(isnumber,groupname)
_write_capture(io, re, parse(Int, groupname))
else
group = PCRE.substring_number_from_name(re.regex, groupname)
Expand Down
10 changes: 5 additions & 5 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -347,20 +347,20 @@ isascii(s::AbstractString) = all(isascii, s)
promote_rule{S<:AbstractString,T<:AbstractString}(::Type{S}, ::Type{T}) = String

"""
isxdigit(c::Union{Char,AbstractString}) -> Bool
isxdigit(c::Char) -> Bool
Tests whether a character is a valid hexadecimal digit, or whether this is true for all elements of a string.
Tests whether a character is a valid hexadecimal digit. Note that this does not
include `x` (as in the standard `0x` prefix).
```jldoctest
julia> isxdigit("abc")
julia> isxdigit('a')
true
julia> isxdigit("0x9")
julia> isxdigit('x')
false
```
"""
isxdigit(c::Char) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
isxdigit(s::AbstractString) = all(isxdigit, s)

## uppercase, lowercase, and titlecase transformations ##

Expand Down
74 changes: 28 additions & 46 deletions base/strings/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,21 @@ is_assigned_char(c) = category_code(c) != UTF8PROC_CATEGORY_CN
## libc character class predicates ##

"""
islower(c::Union{Char,AbstractString}) -> Bool
islower(c::Char) -> Bool
Tests whether a character is a lowercase letter, or whether this is true for all elements of
a string. A character is classified as lowercase if it belongs to Unicode category Ll,
Tests whether a character is a lowercase letter.
A character is classified as lowercase if it belongs to Unicode category Ll,
Letter: Lowercase.
"""
islower(c::Char) = (category_code(c) == UTF8PROC_CATEGORY_LL)

# true for Unicode upper and mixed case

"""
isupper(c::Union{Char,AbstractString}) -> Bool
isupper(c::Char) -> Bool
Tests whether a character is an uppercase letter, or whether this is true for all elements
of a string. A character is classified as uppercase if it belongs to Unicode category Lu,
Tests whether a character is an uppercase letter.
A character is classified as uppercase if it belongs to Unicode category Lu,
Letter: Uppercase, or Lt, Letter: Titlecase.
"""
function isupper(c::Char)
Expand All @@ -242,36 +242,35 @@ function isupper(c::Char)
end

"""
isdigit(c::Union{Char,AbstractString}) -> Bool
isdigit(c::Char) -> Bool
Tests whether a character is a numeric digit (0-9), or whether this is true for all elements
of a string.
Tests whether a character is a numeric digit (0-9).
"""
isdigit(c::Char) = ('0' <= c <= '9')

"""
isalpha(c::Union{Char,AbstractString}) -> Bool
isalpha(c::Char) -> Bool
Tests whether a character is alphabetic, or whether this is true for all elements of a
string. A character is classified as alphabetic if it belongs to the Unicode general
Tests whether a character is alphabetic.
A character is classified as alphabetic if it belongs to the Unicode general
category Letter, i.e. a character whose category code begins with 'L'.
"""
isalpha(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_LO)

"""
isnumber(c::Union{Char,AbstractString}) -> Bool
isnumber(c::Char) -> Bool
Tests whether a character is numeric, or whether this is true for all elements of a string.
Tests whether a character is numeric.
A character is classified as numeric if it belongs to the Unicode general category Number,
i.e. a character whose category code begins with 'N'.
"""
isnumber(c::Char) = (UTF8PROC_CATEGORY_ND <= category_code(c) <= UTF8PROC_CATEGORY_NO)

"""
isalnum(c::Union{Char,AbstractString}) -> Bool
isalnum(c::Char) -> Bool
Tests whether a character is alphanumeric, or whether this is true for all elements of a
string. A character is classified as alphabetic if it belongs to the Unicode general
Tests whether a character is alphanumeric.
A character is classified as alphabetic if it belongs to the Unicode general
category Letter or Number, i.e. a character whose category code begins with 'L' or 'N'.
"""
function isalnum(c::Char)
Expand All @@ -283,67 +282,50 @@ end
# following C++ only control characters from the Latin-1 subset return true

"""
iscntrl(c::Union{Char,AbstractString}) -> Bool
iscntrl(c::Char) -> Bool
Tests whether a character is a control character, or whether this is true for all elements
of a string. Control characters are the non-printing characters of the Latin-1 subset of Unicode.
Tests whether a character is a control character.
Control characters are the non-printing characters of the Latin-1 subset of Unicode.
"""
iscntrl(c::Char) = (c <= Char(0x1f) || Char(0x7f) <= c <= Char(0x9f))

"""
ispunct(c::Union{Char,AbstractString}) -> Bool
ispunct(c::Char) -> Bool
Tests whether a character belongs to the Unicode general category Punctuation, i.e. a
character whose category code begins with 'P'. For strings, tests whether this is true for
all elements of the string.
character whose category code begins with 'P'.
"""
ispunct(c::Char) = (UTF8PROC_CATEGORY_PC <= category_code(c) <= UTF8PROC_CATEGORY_PO)

# \u85 is the Unicode Next Line (NEL) character

"""
isspace(c::Union{Char,AbstractString}) -> Bool
isspace(c::Char) -> Bool
Tests whether a character is any whitespace character. Includes ASCII characters '\\t',
'\\n', '\\v', '\\f', '\\r', and ' ', Latin-1 character U+0085, and characters in Unicode
category Zs. For strings, tests whether this is true for all elements of the string.
category Zs.
"""
@inline isspace(c::Char) = c == ' ' || '\t' <= c <='\r' || c == '\u85' || '\ua0' <= c && category_code(c) == UTF8PROC_CATEGORY_ZS

"""
isprint(c::Union{Char,AbstractString}) -> Bool
isprint(c::Char) -> Bool
Tests whether a character is printable, including spaces, but not a control character. For
strings, tests whether this is true for all elements of the string.
Tests whether a character is printable, including spaces, but not a control character.
"""
isprint(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_ZS)

# true in principal if a printer would use ink

"""
isgraph(c::Union{Char,AbstractString}) -> Bool
isgraph(c::Char) -> Bool
Tests whether a character is printable, and not a space, or whether this is true for all
elements of a string. Any character that would cause a printer to use ink should be
Tests whether a character is printable, and not a space.
Any character that would cause a printer to use ink should be
classified with `isgraph(c)==true`.
"""
isgraph(c::Char) = (UTF8PROC_CATEGORY_LU <= category_code(c) <= UTF8PROC_CATEGORY_SO)

for name = ("alnum", "alpha", "cntrl", "digit", "number", "graph",
"lower", "print", "punct", "space", "upper")
f = Symbol("is",name)
@eval begin
function $f(s::AbstractString)
for c in s
if !$f(c)
return false
end
end
return true
end
end
end

############################################################################
# iterators for grapheme segmentation

Expand Down
56 changes: 28 additions & 28 deletions test/unicode/utf8proc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,34 +187,34 @@ let

end

@test isspace(" \t \n \r ")==true
@test isgraph(" \t \n \r ")==false
@test isprint(" \t \n \r ")==false
@test isalpha(" \t \n \r ")==false
@test isnumber(" \t \n \r ")==false
@test ispunct(" \t \n \r ")==false

@test isspace("ΣβΣβ")==false
@test isalpha("ΣβΣβ")==true
@test isgraph("ΣβΣβ")==true
@test isprint("ΣβΣβ")==true
@test isupper("ΣβΣβ")==false
@test islower("ΣβΣβ")==false
@test isnumber("ΣβΣβ")==false
@test iscntrl("ΣβΣβ")==false
@test ispunct("ΣβΣβ")==false

@test isnumber("23435")==true
@test isdigit("23435")==true
@test isalnum("23435")==true
@test isalpha("23435")==false
@test iscntrl( string(Char(0x0080))) == true
@test ispunct( "‡؟჻") ==true

@test isxdigit('0') == true
@test isxdigit("0") == true
@test isxdigit("a") == true
@test isxdigit("g") == false
@test all(isspace," \t \n \r ")
@test !all(isgraph," \t \n \r ")
@test !all(isprint," \t \n \r ")
@test !all(isalpha," \t \n \r ")
@test !all(isnumber," \t \n \r ")
@test !all(ispunct," \t \n \r ")

@test !all(isspace,"ΣβΣβ")
@test all(isalpha,"ΣβΣβ")
@test all(isgraph,"ΣβΣβ")
@test all(isprint,"ΣβΣβ")
@test !all(isupper,"ΣβΣβ")
@test !all(islower,"ΣβΣβ")
@test !all(isnumber,"ΣβΣβ")
@test !all(iscntrl,"ΣβΣβ")
@test !all(ispunct,"ΣβΣβ")

@test all(isnumber,"23435")
@test all(isdigit,"23435")
@test all(isalnum,"23435")
@test !all(isalpha,"23435")
@test all(iscntrl,string(Char(0x0080)))
@test all(ispunct, "‡؟჻")

@test isxdigit('0')
@test isxdigit('a')
@test !isxdigit('x')
@test !isxdigit('g')

# check utf8proc handling of CN category constants
let c_ll = 'β', c_cn = '\u038B'
Expand Down

0 comments on commit 567b7c2

Please sign in to comment.