Skip to content

Commit

Permalink
use lib method
Browse files Browse the repository at this point in the history
update doc and test

Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 committed Mar 30, 2022
1 parent cd6029e commit bc9ef92
Showing 1 changed file with 10 additions and 32 deletions.
42 changes: 10 additions & 32 deletions arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,37 +78,13 @@ impl<OffsetSize: StringOffsetSizeTrait> GenericStringArray<OffsetSize> {
self.data.buffers()[1].clone()
}

/// Returns the number of chars in the string at index `i`.
/// # Panic
/// If an invalid utf-8 byte is found, the function will panic.
/// However, this function does not check every byte. So you might
/// get an unexpected result if the string is in invalid utf-8 format.
/// Returns the number of `Unicode Scalar Value` in the string at index `i`.
/// # Performance
/// This function has `O(n)` time complexity where `n` is the string length.
/// If you can make sure that all chars in the string are in the range `U+0x0000` ~ `U+0x007F`,
/// please use the function [`value_length`](#method.value_length) which has O(1) time complexity.
pub fn num_chars(&self, i: usize) -> usize {
let offsets = self.value_offsets();
let start = offsets[i].to_usize().unwrap();
let end = offsets[i + 1].to_usize().unwrap();
let chars = &self.data.buffers()[1].as_slice()[start..end];

let mut char_iter = chars.iter();
let mut length: usize = 0;
while let Some(prefix) = char_iter.next() {
let ones = prefix.leading_ones() as usize;
match ones {
0 => {}
2..=4 => {
char_iter.nth(ones - 2);
}
_ => {
panic!("invalid utf-8 format");
}
};
length += 1;
}
length
self.value(i).chars().count()
}

/// Returns the element at index
Expand Down Expand Up @@ -410,9 +386,9 @@ mod tests {

#[test]
fn test_string_array_from_u8_slice() {
let values: Vec<&str> = vec!["hello", "", "parquet"];
let values: Vec<&str> = vec!["hello", "", "A£ऀ𖼚𝌆৩ƐZ"];

// Array data: ["hello", "", "parquet"]
// Array data: ["hello", "", "A£ऀ𖼚𝌆৩ƐZ"]
let string_array = StringArray::from(values);

assert_eq!(3, string_array.len());
Expand All @@ -421,10 +397,12 @@ mod tests {
assert_eq!("hello", unsafe { string_array.value_unchecked(0) });
assert_eq!("", string_array.value(1));
assert_eq!("", unsafe { string_array.value_unchecked(1) });
assert_eq!("parquet", string_array.value(2));
assert_eq!("parquet", unsafe { string_array.value_unchecked(2) });
assert_eq!(5, string_array.value_offsets()[2]);
assert_eq!(7, string_array.value_length(2));
assert_eq!("A£ऀ𖼚𝌆৩ƐZ", string_array.value(2));
assert_eq!("A£ऀ𖼚𝌆৩ƐZ", unsafe {
string_array.value_unchecked(2)
});
assert_eq!(20, string_array.value_length(2)); // 1 + 2 + 3 + 4 + 4 + 3 + 2 + 1
assert_eq!(8, string_array.num_chars(2));
for i in 0..3 {
assert!(string_array.is_valid(i));
assert!(!string_array.is_null(i));
Expand Down

0 comments on commit bc9ef92

Please sign in to comment.