From 18b68ff90e48676f6d41f00eb3b8d2c13be6e683 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Mon, 14 Oct 2024 01:09:01 +0000 Subject: [PATCH] perf(codegen): optimize `CodeBuffer::print_ascii_byte` (#6516) Optimize `CodeBuffer`'s `print_byte_unchecked` and `print_ascii_byte` methods by making a fast path for when the buffer has sufficient capacity to be pushed to without growing. As discussed in https://github.com/oxc-project/oxc/pull/6148#issuecomment-2381635390 --- Cargo.lock | 1 + crates/oxc_codegen/Cargo.toml | 1 + crates/oxc_codegen/src/code_buffer.rs | 40 +++++++++++++++++++++++---- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dc2d6088b9eca..09b4fed60edb5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1536,6 +1536,7 @@ dependencies = [ name = "oxc_codegen" version = "0.31.0" dependencies = [ + "assert-unchecked", "base64", "bitflags 2.6.0", "cow-utils", diff --git a/crates/oxc_codegen/Cargo.toml b/crates/oxc_codegen/Cargo.toml index 82d8d756a4566..1ba96c14f8414 100644 --- a/crates/oxc_codegen/Cargo.toml +++ b/crates/oxc_codegen/Cargo.toml @@ -28,6 +28,7 @@ oxc_sourcemap = { workspace = true } oxc_span = { workspace = true } oxc_syntax = { workspace = true, features = ["to_js_string"] } +assert-unchecked = { workspace = true } bitflags = { workspace = true } cow-utils = { workspace = true } daachorse = { workspace = true } diff --git a/crates/oxc_codegen/src/code_buffer.rs b/crates/oxc_codegen/src/code_buffer.rs index 20c43ee3d305a..d8bb144bafb56 100644 --- a/crates/oxc_codegen/src/code_buffer.rs +++ b/crates/oxc_codegen/src/code_buffer.rs @@ -1,5 +1,7 @@ use std::mem; +use assert_unchecked::assert_unchecked; + /// A string builder for constructing source code. /// /// `CodeBuffer` provides safe abstractions over a byte array. @@ -152,11 +154,12 @@ impl CodeBuffer { /// ``` #[inline] pub fn print_ascii_byte(&mut self, byte: u8) { - // NOTE: since this method is inlined, this assertion should get - // optimized away by the compiler when the value of `byte` is known, - // e.g. when printing a constant. + // When this method is inlined, and the value of `byte` is known, this assertion should + // get optimized away by the compiler. e.g. `code_buffer.print_ascii_byte(b' ')`. assert!(byte.is_ascii(), "byte {byte} is not ASCII"); - self.buf.push(byte); + + // SAFETY: `byte` is an ASCII character + unsafe { self.print_byte_unchecked(byte) } } /// Push a byte to the buffer, without checking that the buffer still represents a valid @@ -200,7 +203,34 @@ impl CodeBuffer { /// [`print_bytes_unchecked`]: CodeBuffer::print_bytes_unchecked #[inline] pub unsafe fn print_byte_unchecked(&mut self, byte: u8) { - self.buf.push(byte); + // By default, `self.buf.push(byte)` results in quite verbose assembly, because the default + // branch is for the "buf is full to capacity" case. + // + // That's not ideal because growth strategy is doubling, so e.g. when the `Vec` has just grown + // from 1024 bytes to 2048 bytes, it won't need to grow again until another 1024 bytes have + // been pushed. "Needs to grow" is a very rare occurrence. + // + // So we use `push_slow` to move the complicated logic for the "needs to grow" path out of + // `print_byte_unchecked`, leaving a fast path for the common "there is sufficient capacity" case. + // https://godbolt.org/z/Kv8sEoEed + // https://github.com/oxc-project/oxc/pull/6148#issuecomment-2381635390 + #[cold] + #[inline(never)] + fn push_slow(code_buffer: &mut CodeBuffer, byte: u8) { + let buf = &mut code_buffer.buf; + // SAFETY: We only call this function below if `buf.len() == buf.capacity()`. + // This function is not inlined, so we need this assertion to assist compiler to + // understand this fact. + unsafe { assert_unchecked!(buf.len() == buf.capacity()) } + buf.push(byte); + } + + #[expect(clippy::if_not_else)] + if self.buf.len() != self.buf.capacity() { + self.buf.push(byte); + } else { + push_slow(self, byte); + } } /// Push a single Unicode character into the buffer.