Skip to content

Commit a2e7c4d

Browse files
committedOct 4, 2014
auto merge of #17738 : hoeppnertill/rust/master, r=alexcrichton
There is an issue with lev_distance, where ``` fn main() { println!("{}", "\x80".lev_distance("\x80")) } ``` prints `2`. This is due to using the byte length instead of the char length.
2 parents e434aa1 + 3aea7f1 commit a2e7c4d

File tree

1 file changed

+28
-9
lines changed

1 file changed

+28
-9
lines changed
 

‎src/libcollections/str.rs

+28-9
Original file line numberDiff line numberDiff line change
@@ -778,13 +778,11 @@ pub trait StrAllocating: Str {
778778
/// Returns the Levenshtein Distance between two strings.
779779
fn lev_distance(&self, t: &str) -> uint {
780780
let me = self.as_slice();
781-
let slen = me.len();
782-
let tlen = t.len();
781+
if me.is_empty() { return t.char_len(); }
782+
if t.is_empty() { return me.char_len(); }
783783

784-
if slen == 0 { return tlen; }
785-
if tlen == 0 { return slen; }
786-
787-
let mut dcol = Vec::from_fn(tlen + 1, |x| x);
784+
let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
785+
let mut t_last = 0;
788786

789787
for (i, sc) in me.chars().enumerate() {
790788

@@ -799,15 +797,15 @@ pub trait StrAllocating: Str {
799797
*dcol.get_mut(j + 1) = current;
800798
} else {
801799
*dcol.get_mut(j + 1) = cmp::min(current, next);
802-
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1],
803-
dcol[j]) + 1;
800+
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1;
804801
}
805802

806803
current = next;
804+
t_last = j;
807805
}
808806
}
809807

810-
return dcol[tlen];
808+
dcol[t_last + 1]
811809
}
812810

813811
/// Returns an iterator over the string in Unicode Normalization Form D
@@ -1878,6 +1876,27 @@ mod tests {
18781876
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
18791877
}
18801878

1879+
#[test]
1880+
fn test_lev_distance() {
1881+
use std::char::{ from_u32, MAX };
1882+
// Test bytelength agnosticity
1883+
for c in range(0u32, MAX as u32)
1884+
.filter_map(|i| from_u32(i))
1885+
.map(|i| String::from_char(1, i)) {
1886+
assert_eq!(c[].lev_distance(c[]), 0);
1887+
}
1888+
1889+
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1890+
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1891+
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
1892+
assert_eq!(a.lev_distance(b), 1);
1893+
assert_eq!(b.lev_distance(a), 1);
1894+
assert_eq!(a.lev_distance(c), 2);
1895+
assert_eq!(c.lev_distance(a), 2);
1896+
assert_eq!(b.lev_distance(c), 1);
1897+
assert_eq!(c.lev_distance(b), 1);
1898+
}
1899+
18811900
#[test]
18821901
fn test_nfd_chars() {
18831902
macro_rules! t {

0 commit comments

Comments
 (0)
Please sign in to comment.