From 1385c710742e61f46bff1947c9e0482998f21eec Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:38:51 +0000 Subject: [PATCH] perf(codegen): further reduce memory allocations in `generate_line_offset_tables` (#13056) #13054 added a nice optimization to `SourcemapBuilder`. During generation of line/offset tables, it reuses a single `Vec` for column indexes for each line, rather than creating a new `Vec` on each turn of the inner loop. This reduces the number of times that `Vec` may have to grow as column indexes get added to it. Take this optimization a step further by re-using the same `Vec` across *all* lines. `columns` `Vec` is not consumed on each line, but each time the contents are copied into a boxed slice - except when reaching EOF, where we can consume `columns`, as its work is done. This memory-copying was likely happening anyway, as `Vec` -> `Box<[u32]>` conversion has to drop the spare capacity of the `Vec`, which will likely cause a reallocation. Also, avoid using iterators to create the boxed slices. `Vec::clone` followed by `Vec::into_boxed_slice` is a bit more explicit and so may help compiler to see that it only needs to allocate exactly `columns.len()` slots for the `Box<[u32]>`. Note: I also tried `columns.drain(..).collect()` instead of `columns.clone().into_boxed_slice()` + `columns.clear()`. But it looks like the `Drain` abstraction doesn't get completely removed by compiler. https://godbolt.org/z/Trv47j4hP So I *think* `into_boxed_slice` is probably preferable. --- crates/oxc_codegen/src/sourcemap_builder.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/oxc_codegen/src/sourcemap_builder.rs b/crates/oxc_codegen/src/sourcemap_builder.rs index 098f90aa0e222..2fe16c1a26abd 100644 --- a/crates/oxc_codegen/src/sourcemap_builder.rs +++ b/crates/oxc_codegen/src/sourcemap_builder.rs @@ -312,6 +312,9 @@ impl<'a> SourcemapBuilder<'a> { let mut lines = vec![]; let mut column_offsets = IndexVec::new(); + // Used as a buffer to reduce memory reallocations + let mut columns = vec![]; + // Process content line-by-line. // For each line, start by assuming line will be entirely ASCII, and read byte-by-byte. // If line is all ASCII, UTF-8 columns and UTF-16 columns are the same, @@ -325,7 +328,6 @@ impl<'a> SourcemapBuilder<'a> { column_offsets_id: None, }); - let mut columns = vec![]; // Used as a buffer to reduce memory reallocations. let remaining = &content.as_bytes()[line_byte_offset as usize..]; for (byte_offset_from_line_start, b) in remaining.iter().enumerate() { #[expect(clippy::cast_possible_truncation)] @@ -351,8 +353,6 @@ impl<'a> SourcemapBuilder<'a> { line.column_offsets_id = Some(ColumnOffsetsId::from_usize(column_offsets.len())); - columns.clear(); - // Loop through rest of line char-by-char. // `chunk_byte_offset` in this loop is byte offset from start of this 1st // Unicode char. @@ -396,8 +396,9 @@ impl<'a> SourcemapBuilder<'a> { // Record column offsets column_offsets.push(ColumnOffsets { byte_offset_to_first: byte_offset_from_line_start, - columns: columns.into_iter().collect(), + columns: columns.clone().into_boxed_slice(), }); + columns.clear(); // Revert back to outer loop for next line continue 'lines; @@ -410,7 +411,7 @@ impl<'a> SourcemapBuilder<'a> { // Record column offsets column_offsets.push(ColumnOffsets { byte_offset_to_first: byte_offset_from_line_start, - columns: columns.into_iter().collect(), + columns: columns.into_boxed_slice(), }); break 'lines;