Skip to content

Commit

Permalink
Merge branch 'master' of github.com:JuliaLang/julia
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffBezanson committed Sep 3, 2012
2 parents b4d359e + 4fd0783 commit 656e0d3
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 27 deletions.
2 changes: 1 addition & 1 deletion base/ascii.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ end
## outputing ASCII strings ##

print(io::IO, s::ASCIIString) = (write(io, s.data);nothing)
write(io, s::ASCIIString) = write(io, s.data)
write(io::IO, s::ASCIIString) = write(io, s.data)

## transcoding to ASCII ##

Expand Down
6 changes: 3 additions & 3 deletions base/printf.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ function _special_handler(flags::ASCIIString, width::Int)
pos = contains(flags,'+') ? "+" :
contains(flags,' ') ? " " : ""
abn = quote
isnan($x) ? $(bytestring(pad("NaN", width))) :
$x < 0 ? $(bytestring(pad("-Inf", width))) :
$(bytestring(pad("$(pos)Inf", width)))
isnan($x) ? $(pad("NaN", width)) :
$x < 0 ? $(pad("-Inf", width)) :
$(pad("$(pos)Inf", width))
end
ex = :(isfinite($x) ? $blk : write(out, $abn))
x, ex, blk
Expand Down
3 changes: 2 additions & 1 deletion base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ function search(str::ByteString, re::Regex, idx::Integer)
m, n = PCRE.exec(re.regex, re.extra, str, idx-1, opts, true)
isempty(m) ? (0,0) : (m[1]+1,m[2]+1)
end
search(s::ByteString, r::Regex) = search(s,r,start(s))
search(s::String, r::Regex, idx::Integer) = error("regex search is only available for bytestrings; use bytestring(s) to convert")
search(s::String, r::Regex) = search(s,r,start(s))

type RegexMatchIterator
regex::Regex
Expand Down
1 change: 1 addition & 0 deletions base/set.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ elements(s::Set) = keys(s.hash)
eltype{T}(s::Set{T}) = T

has(s::Set, x) = has(s.hash, x)
contains(s::Set, x) = has(s, x)
get(s::Set, x, deflt) = get(s.hash, x, false)

add(s::Set, x) = (s.hash[x] = true; s)
Expand Down
69 changes: 60 additions & 9 deletions base/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ ref(s::String, v::AbstractVector) =
symbol(s::String) = symbol(bytestring(s))

print(io::IO, s::String) = for c in s write(io, c) end
write(io::IO, s::String) = print(io, s)
show(io::IO, s::String) = print_quoted(io, s)

(*)(s::String...) = strcat(s...)
Expand Down Expand Up @@ -156,7 +157,7 @@ function chr2ind(s::String, i::Integer)
end
end

typealias Chars Union(Char,AbstractVector{Char})
typealias Chars Union(Char,AbstractVector{Char},Set{Char})

function strchr(s::String, c::Chars, i::Integer)
if i < 1 error("index out of range") end
Expand All @@ -174,7 +175,15 @@ strchr(s::String, c::Chars) = strchr(s,c,start(s))

contains(s::String, c::Char) = (strchr(s,c)!=0)

search(s::String, c::Chars, i::Integer) = (i=strchr(s,c,i); (i,nextind(s,i)))
function search(s::String, c::Chars, i::Integer)
if isempty(c)
return 1 <= i <= length(s)+1 ? (i,i) :
i == length(s)+2 ? (0,0) :
error("index out of range")
end
i=strchr(s,c,i)
(i, nextind(s,i))
end
search(s::String, c::Chars) = search(s,c,start(s))

function search(s::String, t::String, i::Integer)
Expand Down Expand Up @@ -447,6 +456,8 @@ strcat(xs...) = string(xs...) # backwards compat

print(io::IO, s::RopeString) = print(io, s.head, s.tail)

write(io::IO, s::RopeString) = (write(io, s.head); write(io, s.tail))

## transformed strings ##

type TransformedString <: String
Expand All @@ -465,17 +476,57 @@ end

## uppercase and lowercase transformations ##

const _TF_U = (c,i)->uppercase(c)
const _TF_L = (c,i)->lowercase(c)
const _TF_u = (c,i)->i==1 ? uppercase(c) : c
const _TF_l = (c,i)->i==1 ? lowercase(c) : c
const _TF_C = (c,i)->i==1 ? uppercase(c) : lowercase(c)
const _TF_c = (c,i)->i==1 ? lowercase(c) : uppercase(c)

uppercase(c::Char) = ccall(:towupper, Char, (Char,), c)
lowercase(c::Char) = ccall(:towlower, Char, (Char,), c)

uppercase(c::Uint8) = ccall(:toupper, Uint8, (Uint8,), c)
lowercase(c::Uint8) = ccall(:tolower, Uint8, (Uint8,), c)

uppercase(s::String) = TransformedString((c,i)->uppercase(c), s)
lowercase(s::String) = TransformedString((c,i)->lowercase(c), s)
uppercase(s::String) = TransformedString(_TF_U, s)
lowercase(s::String) = TransformedString(_TF_L, s)

ucfirst(s::String) = TransformedString((c,i)->i==1 ? uppercase(c) : c, s)
lcfirst(s::String) = TransformedString((c,i)->i==1 ? lowercase(c) : c, s)
ucfirst(s::String) = TransformedString(_TF_u, s)
lcfirst(s::String) = TransformedString(_TF_l, s)

function _transfunc_compose(f2::Function, f1::Function)
allf = [_TF_U, _TF_L, _TF_u, _TF_l, _TF_C, _TF_c]
if !contains(allf, f2) || !contains(allf, f1)
return nothing
end
if f2 == _TF_U || f2 == _TF_L || f2 == _TF_C || f2 == _TF_c ||
f2 == f1 ||
(f2 == _TF_u && f1 == _TF_l) ||
(f2 == _TF_l && f1 == _TF_u)
return f2
elseif (f2 == _TF_u && (f1 == _TF_U || f1 == _TF_C)) ||
(f2 == _TF_l && (f1 == _TF_L || f1 == _TF_c))
return f1
elseif (f2 == _TF_u && f1 == _TF_L)
return _TF_C
elseif (f2 == _TF_l && f1 == _TF_U)
return _TF_c
elseif (f2 == _TF_u && f1 == _TF_c)
return _TF_U
elseif (f2 == _TF_l && f1 == _TF_C)
return _TF_L
end
error("this is a bug")
end

function TransformedString(transform::Function, s::TransformedString)
newtf = _transfunc_compose(transform, s.transform)
if newtf === nothing
return invoke(TransformedString, (Function, String), transform, s)
end
TransformedString(newtf, s.string)
end

const uc = uppercase
const lc = lowercase
Expand All @@ -500,7 +551,7 @@ function filter(f::Function, s::String)
takebuf_string(out)
end

has(s::String, c::Char) = has(Set(s...), c)
has(s::String, c::Char) = contains(s, c)

## string promotion rules ##

Expand Down Expand Up @@ -864,7 +915,7 @@ function lpad(s::String, n::Integer, p::String)
if m <= 0; return s; end
l = strlen(p)
if l==1
return p^m * s
return bytestring(p^m * s)
end
q = div(m,l)
r = m - q*l
Expand All @@ -876,7 +927,7 @@ function rpad(s::String, n::Integer, p::String)
if m <= 0; return s; end
l = strlen(p)
if l==1
return s * p^m
return bytestring(s * p^m)
end
q = div(m,l)
r = m - q*l
Expand Down
2 changes: 1 addition & 1 deletion base/utf8.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ lcfirst(s::UTF8String) = string(lowercase(s[1]), s[2:])
## outputing UTF-8 strings ##

print(io::IO, s::UTF8String) = (write(io, s.data);nothing)
write(io, s::UTF8String) = write(io, s.data)
write(io::IO, s::UTF8String) = write(io, s.data)

## transcoding to UTF-8 ##

Expand Down
44 changes: 38 additions & 6 deletions doc/helpdb.jl
Original file line number Diff line number Diff line change
Expand Up @@ -549,21 +549,36 @@ collection[key...] = value
"),

(E"Strings",E"ASCIIString",E"ASCIIString(::Array{Uint8, 1})
(E"Strings",E"ascii",E"ascii(::Array{Uint8, 1})
Create an ASCII string from a byte array.
"),

(E"Strings",E"UTF8String",E"UTF8String(::Array{Uint8, 1})
(E"Strings",E"ascii",E"ascii(s)
Convert a string to a contiguous ASCII string (all characters must
be valid ASCII characters).
"),

(E"Strings",E"utf8",E"utf8(::Array{Uint8, 1})
Create a UTF-8 string from a byte array.
"),

(E"Strings",E"utf8",E"utf8(s)
Convert a string to a contiguous UTF-8 string (all characters must
be valid UTF-8 characters).
"),

(E"Strings",E"strchr",E"strchr(string, char[, i])
Return the index of 'char' in 'string', giving 0 if not found. The
second argument may also be a vector or a set of characters. The
third argument optionally specifies a starting index.
"),
Expand All @@ -582,12 +597,29 @@ collection[key...] = value
"),

(E"Strings",E"split",E"split(string, char, include_empty)
(E"Strings",E"search",E"search(string, chars[, start])
Search for the given characters within the given string. The second
argument may be a single character, a vector or a set of
characters, a string, or a regular expression (but regular
expressions are only allowed on contiguous strings, such as ASCII
or UTF-8 strings). The third argument optionally specifies a
starting index. The return value is a tuple with 2 integers: the
index of the match and the first valid index past the match (or an
index beyond the end of the string if the match is at the end); it
returns '(0,0)' if no match was found, and '(start,start)' if
'chars' is empty.
"),

(E"Strings",E"split",E"split(string, chars[, limit][, include_empty])
Return an array of strings by splitting the given string on
occurrences of the given character delimiter. The second argument
may also be a set of character delimiters to use. The third
argument specifies whether empty fields should be included.
occurrences of the given character delimiters, which may be
specified in any of the formats allowed by 'search''s second
argument. The last two arguments are optional; they are are a
maximum size for the result and a flag determining whether empty
fields should be included in the result.
"),

Expand Down
2 changes: 1 addition & 1 deletion doc/manual/metaprogramming.rst
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ cause a compile-time error:
::

julia> $a + b
not supported
unsupported or misplaced expression $

.. _man-macros:

Expand Down
22 changes: 17 additions & 5 deletions doc/stdlib/base.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,17 +401,25 @@ Strings

Convert a string to a contiguous byte array representation appropriate for passing it to C functions.

.. function:: ASCIIString(::Array{Uint8,1})
.. function:: ascii(::Array{Uint8,1})

Create an ASCII string from a byte array.

.. function:: UTF8String(::Array{Uint8,1})
.. function:: ascii(s)

Convert a string to a contiguous ASCII string (all characters must be valid ASCII characters).

.. function:: utf8(::Array{Uint8,1})

Create a UTF-8 string from a byte array.

.. function:: utf8(s)

Convert a string to a contiguous UTF-8 string (all characters must be valid UTF-8 characters).

.. function:: strchr(string, char[, i])

Return the index of ``char`` in ``string``, giving 0 if not found. The third argument optionally specifies a starting index.
Return the index of ``char`` in ``string``, giving 0 if not found. The second argument may also be a vector or a set of characters. The third argument optionally specifies a starting index.

.. function:: lpad(string, n, p)

Expand All @@ -421,9 +429,13 @@ Strings

Make a string at least ``n`` characters long by padding on the right with copies of ``p``.

.. function:: split(string, char, include_empty)
.. function:: search(string, chars[, start])

Search for the given characters within the given string. The second argument may be a single character, a vector or a set of characters, a string, or a regular expression (but regular expressions are only allowed on contiguous strings, such as ASCII or UTF-8 strings). The third argument optionally specifies a starting index. The return value is a tuple with 2 integers: the index of the match and the first valid index past the match (or an index beyond the end of the string if the match is at the end); it returns ``(0,0)`` if no match was found, and ``(start,start)`` if ``chars`` is empty.

.. function:: split(string, chars[, limit][, include_empty])

Return an array of strings by splitting the given string on occurrences of the given character delimiter. The second argument may also be a set of character delimiters to use. The third argument specifies whether empty fields should be included.
Return an array of strings by splitting the given string on occurrences of the given character delimiters, which may be specified in any of the formats allowed by ``search``'s second argument. The last two arguments are optional; they are are a maximum size for the result and a flag determining whether empty fields should be included in the result.

.. function:: strip(string)

Expand Down

0 comments on commit 656e0d3

Please sign in to comment.