Skip to content

Commit

Permalink
refactor: join get_row_key()
Browse files Browse the repository at this point in the history
for performance:
- do case-insensitive check only once
- inline util::transform and do traditional to_lowercase instead of lowercase_into
  • Loading branch information
jqnatividad committed Jan 10, 2025
1 parent 1dcfb4d commit 93aacb7
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion src/cmd/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,27 @@ impl<R> fmt::Debug for ValueIndex<R> {

#[inline]
fn get_row_key(sel: &Selection, row: &csv::ByteRecord, casei: bool) -> Vec<ByteString> {
sel.select(row).map(|v| util::transform(v, casei)).collect()
if casei {
sel.select(row)
.map(|v| {
if let Ok(s) = simdutf8::basic::from_utf8(v) {
s.trim().to_lowercase().into_bytes()
} else {
v.to_vec()
}
})
.collect()
} else {
sel.select(row)
.map(|v| {
if let Ok(s) = simdutf8::basic::from_utf8(v) {
s.trim().as_bytes().to_vec()
} else {
v.to_vec()
}
})
.collect()
}
}

struct KeysWriter {
Expand Down

0 comments on commit 93aacb7

Please sign in to comment.