From 74c8edd18b3b7486b33de709d0d509c27d16fb80 Mon Sep 17 00:00:00 2001 From: jinrui Date: Fri, 8 Sep 2023 18:52:57 +0800 Subject: [PATCH] perf: to_writer use utf8 offset and WithIndices use OnceCell (#62) * perf: to_writer use utf8 offset * perf: WithIndices use OnceCell --- src/replace_source.rs | 32 +++++--------------------------- src/with_indices.rs | 32 +++++++++++++++++++------------- 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/src/replace_source.rs b/src/replace_source.rs index d4ae954..20d1bd1 100644 --- a/src/replace_source.rs +++ b/src/replace_source.rs @@ -45,8 +45,6 @@ pub struct ReplaceSource { struct Replacement { start: u32, end: u32, - char_start: OnceCell, - char_end: OnceCell, content: String, name: Option, } @@ -79,26 +77,10 @@ impl Replacement { Self { start, end, - char_start: OnceCell::new(), - char_end: OnceCell::new(), content, name, } } - - pub fn char_start(&self, inner_source_code: &str) -> u32 { - *self.char_start.get_or_init(|| { - str_indices::chars::from_byte_idx(inner_source_code, self.start as usize) - as u32 - }) - } - - pub fn char_end(&self, inner_source_code: &str) -> u32 { - *self.char_end.get_or_init(|| { - str_indices::chars::from_byte_idx(inner_source_code, self.end as usize) - as u32 - }) - } } impl ReplaceSource { @@ -158,30 +140,26 @@ impl Source for ReplaceSource { self.sort_replacement(); let inner_source_code = self.get_inner_source_code(); - let inner_source_code_with_indices = WithIndices::new(inner_source_code); // mut_string_push_str is faster that vec join // concatenate strings benchmark, see https://github.com/hoodie/concatenation_benchmarks-rs let mut source_code = String::new(); let mut inner_pos = 0; for replacement in self.replacements.lock().iter() { - if inner_pos < replacement.char_start(inner_source_code) { - let end_pos = (replacement.char_start(inner_source_code) as usize) - .min(inner_source_code.len()); - source_code.push_str( - inner_source_code_with_indices.substring(inner_pos as usize, end_pos), - ); + if inner_pos < replacement.start { + let end_pos = (replacement.start as usize).min(inner_source_code.len()); + source_code.push_str(&inner_source_code[inner_pos as usize..end_pos]); } source_code.push_str(&replacement.content); #[allow(clippy::manual_clamp)] { inner_pos = inner_pos - .max(replacement.char_end(inner_source_code)) + .max(replacement.end) .min(inner_source_code.len() as u32); } } source_code.push_str( - inner_source_code_with_indices.substring(inner_pos as usize, usize::MAX), + &inner_source_code[inner_pos as usize..inner_source_code.len()], ); source_code.into() diff --git a/src/with_indices.rs b/src/with_indices.rs index 69e7db1..974e2e7 100644 --- a/src/with_indices.rs +++ b/src/with_indices.rs @@ -1,20 +1,17 @@ +use once_cell::sync::OnceCell; + #[derive(Debug, Clone)] pub struct WithIndices> { /// line is a string reference pub line: T, /// the byte position of each `char` in `line` string slice . - pub indices_indexes: Box<[u32]>, + pub indices_indexes: OnceCell>, } impl> WithIndices { pub fn new(line: T) -> Self { Self { - indices_indexes: line - .as_ref() - .char_indices() - .map(|(i, _)| i as u32) - .collect::>() - .into_boxed_slice(), + indices_indexes: OnceCell::new(), line, } } @@ -26,17 +23,26 @@ impl> WithIndices { return ""; } + let indices_indexes = self.indices_indexes.get_or_init(|| { + self + .line + .as_ref() + .char_indices() + .map(|(i, _)| i as u32) + .collect::>() + .into_boxed_slice() + }); + let str_len = self.line.as_ref().len() as u32; - let start = *self.indices_indexes.get(start_index).unwrap_or(&str_len); - let end = *self.indices_indexes.get(end_index).unwrap_or(&str_len); + let start = + indices_indexes.get(start_index).unwrap_or(&str_len).clone() as usize; + let end = + indices_indexes.get(end_index).unwrap_or(&str_len).clone() as usize; unsafe { // SAFETY: Since `indices` iterates over the `CharIndices` of `self`, we can guarantee // that the indices obtained from it will always be within the bounds of `self` and they // will always lie on UTF-8 sequence boundaries. - self - .line - .as_ref() - .get_unchecked(start as usize..end as usize) + self.line.as_ref().get_unchecked(start..end) } } }