Skip to content

Commit

Permalink
perf: to_writer use utf8 offset and WithIndices use OnceCell (#62)
Browse files Browse the repository at this point in the history
* perf: to_writer use utf8 offset

* perf: WithIndices use OnceCell
  • Loading branch information
jerrykingxyz authored Sep 8, 2023
1 parent d9ff439 commit 74c8edd
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 40 deletions.
32 changes: 5 additions & 27 deletions src/replace_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ pub struct ReplaceSource<T> {
struct Replacement {
start: u32,
end: u32,
char_start: OnceCell<u32>,
char_end: OnceCell<u32>,
content: String,
name: Option<String>,
}
Expand Down Expand Up @@ -79,26 +77,10 @@ impl Replacement {
Self {
start,
end,
char_start: OnceCell::new(),
char_end: OnceCell::new(),
content,
name,
}
}

pub fn char_start(&self, inner_source_code: &str) -> u32 {
*self.char_start.get_or_init(|| {
str_indices::chars::from_byte_idx(inner_source_code, self.start as usize)
as u32
})
}

pub fn char_end(&self, inner_source_code: &str) -> u32 {
*self.char_end.get_or_init(|| {
str_indices::chars::from_byte_idx(inner_source_code, self.end as usize)
as u32
})
}
}

impl<T> ReplaceSource<T> {
Expand Down Expand Up @@ -158,30 +140,26 @@ impl<T: Source + Hash + PartialEq + Eq + 'static> Source for ReplaceSource<T> {
self.sort_replacement();

let inner_source_code = self.get_inner_source_code();
let inner_source_code_with_indices = WithIndices::new(inner_source_code);

// mut_string_push_str is faster that vec join
// concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs
let mut source_code = String::new();
let mut inner_pos = 0;
for replacement in self.replacements.lock().iter() {
if inner_pos < replacement.char_start(inner_source_code) {
let end_pos = (replacement.char_start(inner_source_code) as usize)
.min(inner_source_code.len());
source_code.push_str(
inner_source_code_with_indices.substring(inner_pos as usize, end_pos),
);
if inner_pos < replacement.start {
let end_pos = (replacement.start as usize).min(inner_source_code.len());
source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]);
}
source_code.push_str(&replacement.content);
#[allow(clippy::manual_clamp)]
{
inner_pos = inner_pos
.max(replacement.char_end(inner_source_code))
.max(replacement.end)
.min(inner_source_code.len() as u32);
}
}
source_code.push_str(
inner_source_code_with_indices.substring(inner_pos as usize, usize::MAX),
&inner_source_code[inner_pos as usize..inner_source_code.len()],
);

source_code.into()
Expand Down
32 changes: 19 additions & 13 deletions src/with_indices.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,17 @@
use once_cell::sync::OnceCell;

#[derive(Debug, Clone)]
pub struct WithIndices<T: AsRef<str>> {
/// line is a string reference
pub line: T,
/// the byte position of each `char` in `line` string slice .
pub indices_indexes: Box<[u32]>,
pub indices_indexes: OnceCell<Box<[u32]>>,
}

impl<T: AsRef<str>> WithIndices<T> {
pub fn new(line: T) -> Self {
Self {
indices_indexes: line
.as_ref()
.char_indices()
.map(|(i, _)| i as u32)
.collect::<Vec<_>>()
.into_boxed_slice(),
indices_indexes: OnceCell::new(),
line,
}
}
Expand All @@ -26,17 +23,26 @@ impl<T: AsRef<str>> WithIndices<T> {
return "";
}

let indices_indexes = self.indices_indexes.get_or_init(|| {
self
.line
.as_ref()
.char_indices()
.map(|(i, _)| i as u32)
.collect::<Vec<_>>()
.into_boxed_slice()
});

let str_len = self.line.as_ref().len() as u32;
let start = *self.indices_indexes.get(start_index).unwrap_or(&str_len);
let end = *self.indices_indexes.get(end_index).unwrap_or(&str_len);
let start =
indices_indexes.get(start_index).unwrap_or(&str_len).clone() as usize;
let end =
indices_indexes.get(end_index).unwrap_or(&str_len).clone() as usize;
unsafe {
// SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee
// that the indices obtained from it will always be within the bounds of `self` and they
// will always lie on UTF-8 sequence boundaries.
self
.line
.as_ref()
.get_unchecked(start as usize..end as usize)
self.line.as_ref().get_unchecked(start..end)
}
}
}
Expand Down

1 comment on commit 74c8edd

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 74c8edd Previous: 51b1532 Ratio
benchmark_concat_generate_base64 29300 ns/iter (± 561) 24224 ns/iter (± 170) 1.21
benchmark_concat_generate_base64_with_cache 18292 ns/iter (± 124) 15135 ns/iter (± 95) 1.21
benchmark_concat_generate_string 14878 ns/iter (± 86) 12394 ns/iter (± 67) 1.20
benchmark_concat_generate_string_with_cache 4133 ns/iter (± 19) 3466 ns/iter (± 15) 1.19

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.