diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index 8536bf7d830ab..ac1727ffdb7c8 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -651,25 +651,27 @@ fn cmd_install(c: cargo) unsafe { if str::starts_with(target, "uuid:") { let uuid = rest(target, 5u); - let idx = str::index(uuid, '/' as u8); - if idx != -1 { - let source = str::unsafe::slice_bytes(uuid, 0u, idx as uint); - uuid = str::unsafe::slice_bytes(uuid, idx as uint + 1u, - str::byte_len(uuid)); - install_uuid_specific(c, wd, source, uuid); - } else { - install_uuid(c, wd, uuid); + alt str::index(uuid, '/') { + option::some(idx) { + let source = str::slice(uuid, 0u, idx); + uuid = str::slice(uuid, idx + 1u, str::char_len(uuid)); + install_uuid_specific(c, wd, source, uuid); + } + option::none { + install_uuid(c, wd, uuid); + } } } else { let name = target; - let idx = str::index(name, '/' as u8); - if idx != -1 { - let source = str::unsafe::slice_bytes(name, 0u, idx as uint); - name = str::unsafe::slice_bytes(name, idx as uint + 1u, - str::byte_len(name)); - install_named_specific(c, wd, source, name); - } else { - install_named(c, wd, name); + alt str::index(name, '/') { + option::some(idx) { + let source = str::slice(name, 0u, idx); + name = str::slice(name, idx + 1u, str::char_len(name)); + install_named_specific(c, wd, source, name); + } + option::none { + install_named(c, wd, name); + } } } } diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index cb2070c124c5a..5fa7841c97246 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -109,14 +109,16 @@ mod write { // Decides what to call an intermediate file, given the name of the output // and the extension to use. fn mk_intermediate_name(output_path: str, extension: str) -> str unsafe { - let dot_pos = str::index(output_path, '.' as u8); - let stem; - if dot_pos < 0 { - stem = output_path; - } else { stem = str::unsafe::slice_bytes(output_path, 0u, - dot_pos as uint); } + let stem = alt str::index(output_path, '.') { + option::some(dot_pos) { + str::slice(output_path, 0u, dot_pos) + } + option::none { output_path } + }; + ret stem + "." + extension; } + fn run_passes(sess: session, llmod: ModuleRef, output: str) { let opts = sess.opts; if opts.time_llvm_passes { llvm::LLVMRustEnableTimePasses(); } diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index 5d8d7ff563332..cb2590e195188 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -119,16 +119,13 @@ fn get_line(fm: filemap, line: int) -> str unsafe { let end: uint; if line as uint < vec::len(fm.lines) - 1u { end = fm.lines[line + 1].byte - fm.start_pos.byte; + ret str::unsafe::slice_bytes(*fm.src, begin, end); } else { // If we're not done parsing the file, we're at the limit of what's // parsed. If we just slice the rest of the string, we'll print out // the remainder of the file, which is undesirable. - end = str::byte_len(*fm.src); - let rest = str::unsafe::slice_bytes(*fm.src, begin, end); - let newline = str::index(rest, '\n' as u8); - if newline != -1 { end = begin + (newline as uint); } + ret str::splitn_char(*fm.src, '\n', 1u)[0]; } - ret str::unsafe::slice_bytes(*fm.src, begin, end); } fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index a5cfb8db3f7cf..9790ec02ff45b 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -283,10 +283,9 @@ fn check_variants_T( } } -fn last_part(filename: str) -> str unsafe { - let ix = str::rindex(filename, 47u8 /* '/' */); - assert ix >= 0; - str::unsafe::slice_bytes(filename, ix as uint + 1u, str::byte_len(filename) - 3u) +fn last_part(filename: str) -> str { + let ix = option::get(str::rindex(filename, '/')); + str::slice(filename, ix + 1u, str::char_len(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } diff --git a/src/libcore/str.rs b/src/libcore/str.rs index b98d96bb901a0..ca7bb819443b6 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -253,15 +253,12 @@ Function: pop_char Remove the final character from a string and return it. Failure: - If the string does not contain any characters. */ fn pop_char(&s: str) -> char unsafe { let end = byte_len(s); - while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; } - assert (end > 0u); - let ch = char_at(s, end - 1u); - s = unsafe::slice_bytes(s, 0u, end - 1u); + let {ch:ch, prev:end} = char_range_at_reverse(s, end); + s = unsafe::slice_bytes(s, 0u, end); ret ch; } @@ -868,32 +865,50 @@ fn lines_iter(ss: str, ff: fn(&&str)) { Section: Searching */ -/* -Function: index +// Function: index +// +// Returns the index of the first matching char +// (as option some/none) +fn index(ss: str, cc: char) -> option { + let bii = 0u; + let cii = 0u; + let len = byte_len(ss); + while bii < len { + let {ch, next} = char_range_at(ss, bii); -Returns the index of the first matching byte. Returns -1 if -no match is found. + // found here? + if ch == cc { + ret option::some(cii); + } -FIXME: UTF-8 -*/ -fn index(s: str, c: u8) -> int { - let i: int = 0; - for k: u8 in s { if k == c { ret i; } i += 1; } - ret -1; -} + cii += 1u; + bii = next; + } -/* -Function: rindex + // wasn't found + ret option::none; +} -Returns the index of the last matching byte. Returns -1 -if no match is found. +// Function: rindex +// +// Returns the index of the first matching char +// (as option some/none) +fn rindex(ss: str, cc: char) -> option { + let bii = byte_len(ss); + let cii = char_len(ss); + while bii > 0u { + let {ch, prev} = char_range_at_reverse(ss, bii); + cii -= 1u; + bii = prev; + + // found here? + if ch == cc { + ret option::some(cii); + } + } -FIXME: UTF-8 -*/ -fn rindex(s: str, c: u8) -> int { - let n: int = byte_len(s) as int; - while n >= 0 { if s[n] == c { ret n; } n -= 1; } - ret n; + // wasn't found + ret option::none; } /* @@ -1233,6 +1248,25 @@ Pluck a character out of a string */ fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; } +// Function: char_range_at_reverse +// +// Given a byte position and a str, return the previous char and its position +// This function can be used to iterate over a unicode string in reverse. +fn char_range_at_reverse(ss: str, start: uint) -> {ch: char, prev: uint} { + let prev = start; + + // while there is a previous byte == 10...... + while prev > 0u && ss[prev - 1u] & 192u8 == tag_cont_u8 { + prev -= 1u; + } + + // now refer to the initial byte of previous char + prev -= 1u; + + let ch = char_at(ss, prev); + ret {ch:ch, prev:prev}; +} + /* Function: substr_all @@ -1442,13 +1476,42 @@ mod tests { } #[test] - fn test_index_and_rindex() { - assert (index("hello", 'e' as u8) == 1); - assert (index("hello", 'o' as u8) == 4); - assert (index("hello", 'z' as u8) == -1); - assert (rindex("hello", 'l' as u8) == 3); - assert (rindex("hello", 'h' as u8) == 0); - assert (rindex("hello", 'z' as u8) == -1); + fn test_index() { + assert ( index("hello", 'h') == option::some(0u)); + assert ( index("hello", 'e') == option::some(1u)); + assert ( index("hello", 'o') == option::some(4u)); + assert ( index("hello", 'z') == option::none); + } + + #[test] + fn test_rindex() { + assert (rindex("hello", 'l') == option::some(3u)); + assert (rindex("hello", 'o') == option::some(4u)); + assert (rindex("hello", 'h') == option::some(0u)); + assert (rindex("hello", 'z') == option::none); + } + + #[test] + fn test_pop_char() { + let data = "ประเทศไทย中华"; + let cc = pop_char(data); + assert "ประเทศไทย中" == data; + assert '华' == cc; + } + + #[test] + fn test_pop_char_2() { + let data2 = "华"; + let cc2 = pop_char(data2); + assert "" == data2; + assert '华' == cc2; + } + + #[test] + #[should_fail] + fn test_pop_char_fail() { + let data = ""; + let _cc3 = pop_char(data); } #[test] diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index 2304445b95b18..de4789fdd3996 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -32,6 +32,22 @@ A path or fragment of a filesystem path */ type path = str; +fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { + let ii; + alt str::rindex(pp, os_fs::path_sep) { + option::some(xx) { ii = xx; } + option::none { + alt str::rindex(pp, os_fs::alt_path_sep) { + option::some(xx) { ii = xx; } + option::none { ret {dirname: ".", basename: pp}; } + } + } + } + + ret {dirname: str::slice(pp, 0u, ii), + basename: str::slice(pp, ii + 1u, str::char_len(pp))}; +} + /* Function: dirname @@ -43,13 +59,8 @@ The dirname of "/usr/share" will be "/usr", but the dirname of If the path is not prefixed with a directory, then "." is returned. */ -fn dirname(p: path) -> path unsafe { - let i: int = str::rindex(p, os_fs::path_sep as u8); - if i == -1 { - i = str::rindex(p, os_fs::alt_path_sep as u8); - if i == -1 { ret "."; } - } - ret str::unsafe::slice_bytes(p, 0u, i as uint); +fn dirname(pp: path) -> path { + ret splitDirnameBasename(pp).dirname; } /* @@ -63,18 +74,10 @@ path separators in the path then the returned path is identical to the provided path. If an empty path is provided or the path ends with a path separator then an empty path is returned. */ -fn basename(p: path) -> path unsafe { - let i: int = str::rindex(p, os_fs::path_sep as u8); - if i == -1 { - i = str::rindex(p, os_fs::alt_path_sep as u8); - if i == -1 { ret p; } - } - let len = str::byte_len(p); - if (i + 1) as uint >= len { ret p; } - ret str::unsafe::slice_bytes(p, (i + 1) as uint, len); +fn basename(pp: path) -> path { + ret splitDirnameBasename(pp).basename; } - // FIXME: Need some typestate to avoid bounds check when len(pre) == 0 /* Function: connect diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index 48d83f0e5c2c8..8288501defc31 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -230,16 +230,14 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let i_arg = option::none::; if cur[1] == '-' as u8 { let tail = str::unsafe::slice_bytes(cur, 2u, curlen); - let eq = str::index(tail, '=' as u8); - if eq == -1 { + let tail_eq = str::splitn_char(tail, '=', 1u); + if vec::len(tail_eq) <= 1u { names = [long(tail)]; } else { names = - [long(str::unsafe::slice_bytes(tail,0u,eq as uint))]; + [long(tail_eq[0])]; i_arg = - option::some::(str::unsafe::slice_bytes(tail, - (eq as uint) + 1u, - curlen - 2u)); + option::some::(tail_eq[1]); } } else { let j = 1u;