diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f828a27026b..f20ab59407be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - _Experimental_: Add `user-valid` and `user-invalid` variants ([#12370](https://github.com/tailwindlabs/tailwindcss/pull/12370)) - _Experimental_: Add `wrap-anywhere`, `wrap-break-word`, and `wrap-normal` utilities ([#12128](https://github.com/tailwindlabs/tailwindcss/pull/12128)) - Add `col-<number>` and `row-<number>` utilities for `grid-column` and `grid-row` ([#15183](https://github.com/tailwindlabs/tailwindcss/pull/15183)) +- Add new candidate extractor ([#16306](https://github.com/tailwindlabs/tailwindcss/pull/16306)) ### Fixed diff --git a/Cargo.lock b/Cargo.lock index f742de3eefb0..f87a6e889b0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "aho-corasick" @@ -35,9 +35,9 @@ checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bstr" -version = "1.10.0" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", "regex-automata 0.4.8", @@ -268,9 +268,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "napi" -version = "2.16.11" +version = "2.16.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53575dfa17f208dd1ce3a2da2da4659aae393b256a472f2738a8586a6c4107fd" +checksum = "839ae2ee5e62c6348669c50098b187c08115bd3cced658c9c0bf945fca0fec83" dependencies = [ "bitflags", "ctor", @@ -281,15 +281,15 @@ dependencies = [ [[package]] name = "napi-build" -version = "2.0.1" +version = "2.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882a73d9ef23e8dc2ebbffb6a6ae2ef467c0f18ac10711e4cc59c5485d41df0e" +checksum = "db836caddef23662b94e16bf1f26c40eceb09d6aee5d5b06a7ac199320b69b19" [[package]] name = "napi-derive" -version = "2.16.12" +version = "2.16.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17435f7a00bfdab20b0c27d9c56f58f6499e418252253081bfff448099da31d1" +checksum = "7cbe2585d8ac223f7d34f13701434b9d5f4eb9c332cccce8dee57ea18ab8ab0c" dependencies = [ "cfg-if", "convert_case", @@ -301,9 +301,9 @@ dependencies = [ [[package]] name = "napi-derive-backend" -version = "1.0.74" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "967c485e00f0bf3b1bdbe510a38a4606919cf1d34d9a37ad41f25a81aa077abe" +checksum = "1639aaa9eeb76e91c6ae66da8ce3e89e921cd3885e99ec85f4abacae72fc91bf" dependencies = [ "convert_case", "once_cell", @@ -450,9 +450,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" diff --git a/crates/node/Cargo.toml b/crates/node/Cargo.toml index 0009076936ab..2222c643fd10 100644 --- a/crates/node/Cargo.toml +++ b/crates/node/Cargo.toml @@ -8,10 +8,10 @@ crate-type = ["cdylib"] [dependencies] # Default enable napi4 feature, see https://nodejs.org/api/n-api.html#node-api-version-matrix -napi = { version = "2.16.11", default-features = false, features = ["napi4"] } -napi-derive = "2.16.12" +napi = { version = "2.16.16", default-features = false, features = ["napi4"] } +napi-derive = "2.16.13" tailwindcss-oxide = { path = "../oxide" } -rayon = "1.5.3" +rayon = "1.10.0" [build-dependencies] -napi-build = "2.0.1" +napi-build = "2.1.4" diff --git a/crates/node/src/lib.rs b/crates/node/src/lib.rs index 2eff57be308d..5811698c3bdd 100644 --- a/crates/node/src/lib.rs +++ b/crates/node/src/lib.rs @@ -28,12 +28,23 @@ pub struct GlobEntry { pub pattern: String, } -impl From<ChangedContent> for tailwindcss_oxide::ChangedContent { +impl From<ChangedContent> for tailwindcss_oxide::ChangedContent<'_> { fn from(changed_content: ChangedContent) -> Self { - Self { - file: changed_content.file.map(Into::into), - content: changed_content.content, + if let Some(file) = changed_content.file { + return tailwindcss_oxide::ChangedContent::File( + file.into(), + changed_content.extension.into(), + ); + } + + if let Some(contents) = changed_content.content { + return tailwindcss_oxide::ChangedContent::Content( + contents, + changed_content.extension.into(), + ); } + + unreachable!() } } diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index 96d847b96e02..3964b93887bf 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -4,11 +4,11 @@ version = "0.1.0" edition = "2021" [dependencies] -bstr = "1.10.0" +bstr = "1.11.3" globwalk = "0.9.1" log = "0.4.22" rayon = "1.10.0" -fxhash = { package = "rustc-hash", version = "2.0.0" } +fxhash = { package = "rustc-hash", version = "2.1.1" } crossbeam = "0.8.4" tracing = { version = "0.1.40", features = [] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } @@ -20,3 +20,4 @@ fast-glob = "0.4.3" [dev-dependencies] tempfile = "3.13.0" + diff --git a/crates/oxide/src/cursor.rs b/crates/oxide/src/cursor.rs index 0e5ad6479e61..ebda110a26ab 100644 --- a/crates/oxide/src/cursor.rs +++ b/crates/oxide/src/cursor.rs @@ -41,14 +41,34 @@ impl<'a> Cursor<'a> { cursor } - pub fn rewind_by(&mut self, amount: usize) { - self.move_to(self.pos.saturating_sub(amount)); - } - pub fn advance_by(&mut self, amount: usize) { self.move_to(self.pos.saturating_add(amount)); } + #[inline(always)] + pub fn advance(&mut self) { + self.pos += 1; + + self.prev = self.curr; + self.curr = self.next; + self.next = *self + .input + .get(self.pos.saturating_add(1)) + .unwrap_or(&0x00u8); + } + + #[inline(always)] + pub fn advance_twice(&mut self) { + self.pos += 2; + + self.prev = self.next; + self.curr = *self.input.get(self.pos).unwrap_or(&0x00u8); + self.next = *self + .input + .get(self.pos.saturating_add(1)) + .unwrap_or(&0x00u8); + } + pub fn move_to(&mut self, pos: usize) { let len = self.input.len(); let pos = pos.clamp(0, len); @@ -57,13 +77,9 @@ impl<'a> Cursor<'a> { self.at_start = pos == 0; self.at_end = pos + 1 >= len; - self.prev = if pos > 0 { self.input[pos - 1] } else { 0x00 }; - self.curr = if pos < len { self.input[pos] } else { 0x00 }; - self.next = if pos + 1 < len { - self.input[pos + 1] - } else { - 0x00 - }; + self.prev = *self.input.get(pos.wrapping_sub(1)).unwrap_or(&0x00u8); + self.curr = *self.input.get(pos).unwrap_or(&0x00u8); + self.next = *self.input.get(pos.saturating_add(1)).unwrap_or(&0x00u8); } } @@ -139,21 +155,5 @@ mod test { assert_eq!(cursor.prev, b'd'); assert_eq!(cursor.curr, 0x00); assert_eq!(cursor.next, 0x00); - - cursor.rewind_by(1); - assert_eq!(cursor.pos, 10); - assert!(!cursor.at_start); - assert!(cursor.at_end); - assert_eq!(cursor.prev, b'l'); - assert_eq!(cursor.curr, b'd'); - assert_eq!(cursor.next, 0x00); - - cursor.rewind_by(10); - assert_eq!(cursor.pos, 0); - assert!(cursor.at_start); - assert!(!cursor.at_end); - assert_eq!(cursor.prev, 0x00); - assert_eq!(cursor.curr, b'h'); - assert_eq!(cursor.next, b'e'); } } diff --git a/crates/oxide/src/extractor/arbitrary_property_machine.rs b/crates/oxide/src/extractor/arbitrary_property_machine.rs new file mode 100644 index 000000000000..4c8e7ef1eaca --- /dev/null +++ b/crates/oxide/src/extractor/arbitrary_property_machine.rs @@ -0,0 +1,427 @@ +use crate::cursor; +use crate::extractor::bracket_stack::BracketStack; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::string_machine::StringMachine; +use crate::extractor::CssVariableMachine; + +/// Extracts arbitrary properties from the input, including the brackets. +/// +/// E.g.: +/// +/// ```text +/// [color:red] +/// ^^^^^^^^^^^ +/// +/// [--my-color:red] +/// ^^^^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ArbitraryPropertyMachine { + /// Start position of the arbitrary value + start_pos: usize, + + /// Track brackets to ensure they are balanced + bracket_stack: BracketStack, + + /// Current state of the machine + state: State, + + css_variable_machine: CssVariableMachine, + string_machine: StringMachine, +} + +#[derive(Debug, Default)] +enum State { + #[default] + Idle, + + /// Parsing the property, e.g.: + /// + /// ```text + /// [color:red] + /// ^^^^^ + /// + /// [--my-color:red] + /// ^^^^^^^^^^ + /// ``` + ParsingProperty, + + /// Parsing the value, e.g.: + /// + /// ```text + /// [color:red] + /// ^^^ + /// ``` + ParsingValue, +} + +impl Machine for ArbitraryPropertyMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.state = State::Idle; + self.bracket_stack.reset(); + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let len = cursor.input.len(); + + match self.state { + State::Idle => match CLASS_TABLE[cursor.curr as usize] { + // Start of an arbitrary property + Class::OpenBracket => { + self.start_pos = cursor.pos; + self.state = State::ParsingProperty; + cursor.advance(); + self.next(cursor) + } + + // Anything else is not a valid start of an arbitrary value + _ => MachineState::Idle, + }, + + State::ParsingProperty => { + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Dash => match CLASS_TABLE[cursor.next as usize] { + // Start of a CSS variable + // + // E.g.: `[--my-color:red]` + // ^^ + Class::Dash => return self.parse_property_variable(cursor), + + // Dashes are allowed in the property name + // + // E.g.: `[background-color:red]` + // ^ + _ => cursor.advance(), + }, + + // Alpha characters are allowed in the property name + // + // E.g.: `[color:red]` + // ^^^^^ + Class::Alpha => cursor.advance(), + + // End of the property name, but there must be at least a single character + Class::Colon if cursor.pos > self.start_pos + 1 => { + self.state = State::ParsingValue; + cursor.advance(); + return self.next(cursor); + } + + // Anything else is not a valid property character + _ => return self.restart(), + } + } + + self.restart() + } + + State::ParsingValue => { + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Escape => match CLASS_TABLE[cursor.next as usize] { + // An escaped whitespace character is not allowed + // + // E.g.: `[color:var(--my-\ color)]` + // ^ + Class::Whitespace => return self.restart(), + + // An escaped character, skip the next character, resume after + // + // E.g.: `[color:var(--my-\#color)]` + // ^ + _ => cursor.advance_twice(), + }, + + Class::OpenParen | Class::OpenBracket | Class::OpenCurly => { + if !self.bracket_stack.push(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + Class::CloseParen | Class::CloseBracket | Class::CloseCurly + if !self.bracket_stack.is_empty() => + { + if !self.bracket_stack.pop(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + // End of an arbitrary value + // + // 1. All brackets must be balanced + // 2. There must be at least a single character inside the brackets + Class::CloseBracket + if self.start_pos + 1 != cursor.pos + && self.bracket_stack.is_empty() => + { + return self.done(self.start_pos, cursor) + } + + // Start of a string + Class::Quote => return self.parse_string(cursor), + + // Another `:` inside of an arbitrary property is only valid inside of a string or + // inside of brackets. Everywhere else, it's invalid. + // + // E.g.: `[color:red:blue]` + // ^ Not valid + // E.g.: `[background:url(https://example.com)]` + // ^ Valid + // E.g.: `[content:'a:b:c:']` + // ^ ^ ^ Valid + Class::Colon if self.bracket_stack.is_empty() => return self.restart(), + + // Any kind of whitespace is not allowed + Class::Whitespace => return self.restart(), + + // Everything else is valid + _ => cursor.advance(), + }; + } + + self.restart() + } + } + } +} + +impl ArbitraryPropertyMachine { + fn parse_property_variable(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match self.css_variable_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // End of the CSS variable, must be followed by a `:` + // + // E.g.: `[--my-color:red]` + // ^ + Class::Colon => { + self.state = State::ParsingValue; + cursor.advance_twice(); + self.next(cursor) + } + + // Invalid arbitrary property + _ => self.restart(), + }, + } + } + + fn parse_string(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match self.string_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => { + cursor.advance(); + self.next(cursor) + } + } + } +} + +#[derive(Clone, Copy)] +enum Class { + /// `(` + OpenParen, + + /// `[` + OpenBracket, + + /// `{` + OpenCurly, + + /// `)` + CloseParen, + + /// `]` + CloseBracket, + + /// `}` + CloseCurly, + + /// `\` + Escape, + + /// ', ", or ` + Quote, + + /// `-` + Dash, + + /// `a`..`z` or `A`..`Z` + Alpha, + + /// `:` + Colon, + + /// Whitespace characters + Whitespace, + + /// End of the input + End, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set!(Class::OpenParen, b'('); + set!(Class::OpenBracket, b'['); + set!(Class::OpenCurly, b'{'); + + set!(Class::CloseParen, b')'); + set!(Class::CloseBracket, b']'); + set!(Class::CloseCurly, b'}'); + + set!(Class::Escape, b'\\'); + + set!(Class::Quote, b'"', b'\'', b'`'); + + set!(Class::Dash, b'-'); + + set_range!(Class::Alpha, b'a'..=b'z'); + set_range!(Class::Alpha, b'A'..=b'Z'); + + set!(Class::Colon, b':'); + set!(Class::End, b'\0'); + + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + table +}; + +#[cfg(test)] +mod tests { + use super::ArbitraryPropertyMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_arbitrary_property_machine_performance() { + let input = r#"<button class="[color:red] [background-color:red] [--my-color:red] [background:url('https://example.com')]">"#.repeat(10); + + ArbitraryPropertyMachine::test_throughput(1_000_000, &input); + ArbitraryPropertyMachine::test_duration_once(&input); + + todo!() + } + + #[test] + fn test_arbitrary_property_machine_extraction() { + for (input, expected) in [ + // Simple arbitrary property + ("[color:red]", vec!["[color:red]"]), + // Name with dashes + ("[background-color:red]", vec!["[background-color:red]"]), + // Name with leading `-` is valid + ("[-webkit-value:red]", vec!["[-webkit-value:red]"]), + // Setting a CSS Variable + ("[--my-color:red]", vec!["[--my-color:red]"]), + // Value with nested brackets + ( + "[background:url(https://example.com)]", + vec!["[background:url(https://example.com)]"], + ), + // Value containing strings + ( + "[background:url('https://example.com')]", + vec!["[background:url('https://example.com')]"], + ), + // -------------------------------------------------------- + + // Invalid CSS Variable + ("[--my#color:red]", vec![]), + // Spaces are not allowed + ("[color: red]", vec![]), + // Multiple colons are not allowed + ("[color:red:blue]", vec![]), + // Only alphanumeric characters are allowed in the property name + ("[background_color:red]", vec![]), + // A color is required + ("[red]", vec![]), + // The property cannot be empty + ("[:red]", vec![]), + // Empty brackets are not allowed + ("[]", vec![]), + // Missing colon in more complex example + (r#"[CssClass("gap-y-4")]"#, vec![]), + // Brackets must be balanced + ("[background:url(https://example.com]", vec![]), + // Many brackets (>= 8) must be balanced + ( + "[background:url(https://example.com?q={[{[([{[[2]]}])]}]})]", + vec!["[background:url(https://example.com?q={[{[([{[[2]]}])]}]})]"], + ), + ] { + for wrapper in [ + // No wrapper + "{}", + // With leading spaces + " {}", + // With trailing spaces + "{} ", + // Surrounded by spaces + " {} ", + // Inside a string + "'{}'", + // Inside a function call + "fn({})", + // Inside nested function calls + "fn1(fn2({}))", + // -------------------------- + // + // HTML + // Inside a class (on its own) + r#"<div class="{}"></div>"#, + // Inside a class (first) + r#"<div class="{} foo"></div>"#, + // Inside a class (second) + r#"<div class="foo {}"></div>"#, + // Inside a class (surrounded) + r#"<div class="foo {} bar"></div>"#, + // -------------------------- + // + // JavaScript + // Inside a variable + r#"let classes = '{}';"#, + // Inside an object (key) + r#"let classes = { '{}': true };"#, + // Inside an object (no spaces, key) + r#"let classes = {'{}':true};"#, + // Inside an object (value) + r#"let classes = { primary: '{}' };"#, + // Inside an object (no spaces, value) + r#"let classes = {primary:'{}'};"#, + ] { + let input = wrapper.replace("{}", input); + let actual = ArbitraryPropertyMachine::test_extract_all(&input); + + if actual != expected { + dbg!(&input, &actual, &expected); + } + assert_eq!(actual, expected); + } + } + } +} diff --git a/crates/oxide/src/extractor/arbitrary_value_machine.rs b/crates/oxide/src/extractor/arbitrary_value_machine.rs new file mode 100644 index 000000000000..eab286a3d9ac --- /dev/null +++ b/crates/oxide/src/extractor/arbitrary_value_machine.rs @@ -0,0 +1,214 @@ +use crate::cursor; +use crate::extractor::bracket_stack::BracketStack; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::string_machine::StringMachine; + +/// Extracts arbitrary values including the brackets. +/// +/// E.g.: +/// +/// ```text +/// bg-[#0088cc] +/// ^^^^^^^^^ +/// +/// bg-red-500/[20%] +/// ^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ArbitraryValueMachine { + /// Track brackets to ensure they are balanced + bracket_stack: BracketStack, + + string_machine: StringMachine, +} + +impl Machine for ArbitraryValueMachine { + #[inline(always)] + fn reset(&mut self) { + self.bracket_stack.reset(); + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + // An arbitrary value must start with an open bracket + if CLASS_TABLE[cursor.curr as usize] != Class::OpenBracket { + return MachineState::Idle; + } + + let start_pos = cursor.pos; + cursor.advance(); + + let len = cursor.input.len(); + + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Escape => match CLASS_TABLE[cursor.next as usize] { + // An escaped whitespace character is not allowed + // + // E.g.: `[color:var(--my-\ color)]` + // ^ + Class::Whitespace => { + cursor.advance_twice(); + return self.restart(); + } + + // An escaped character, skip the next character, resume after + // + // E.g.: `[color:var(--my-\#color)]` + // ^ + _ => cursor.advance_twice(), + }, + + Class::OpenParen | Class::OpenBracket | Class::OpenCurly => { + if !self.bracket_stack.push(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + Class::CloseParen | Class::CloseBracket | Class::CloseCurly + if !self.bracket_stack.is_empty() => + { + if !self.bracket_stack.pop(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + // End of an arbitrary value + // + // 1. All brackets must be balanced + // 2. There must be at least a single character inside the brackets + Class::CloseBracket + if start_pos + 1 != cursor.pos && self.bracket_stack.is_empty() => + { + return self.done(start_pos, cursor); + } + + // Start of a string + Class::Quote => match self.string_machine.next(cursor) { + MachineState::Idle => return self.restart(), + MachineState::Done(_) => cursor.advance(), + }, + + // Any kind of whitespace is not allowed + Class::Whitespace => return self.restart(), + + // Everything else is valid + _ => cursor.advance(), + }; + } + + self.restart() + } +} + +#[derive(Clone, Copy, PartialEq)] +enum Class { + /// `\` + Escape, + + /// `(` + OpenParen, + + /// `)` + CloseParen, + + /// `[` + OpenBracket, + + /// `]` + CloseBracket, + + /// `{` + OpenCurly, + + /// `}` + CloseCurly, + + /// ', ", or ` + Quote, + + /// Whitespace + Whitespace, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + set!(Class::Escape, b'\\'); + + set!(Class::OpenParen, b'('); + set!(Class::CloseParen, b')'); + + set!(Class::OpenBracket, b'['); + set!(Class::CloseBracket, b']'); + + set!(Class::OpenCurly, b'{'); + set!(Class::CloseCurly, b'}'); + + set!(Class::Quote, b'"', b'\'', b'`'); + + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + table +}; + +#[cfg(test)] +mod tests { + use super::ArbitraryValueMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_arbitrary_value_machine_performance() { + let input = r#"<div class="[color:red] [[data-foo]] [url('https://tailwindcss.com')] [url(https://tailwindcss.com)]"></div>"#.repeat(100); + + ArbitraryValueMachine::test_throughput(100_000, &input); + ArbitraryValueMachine::test_duration_once(&input); + + todo!() + } + + #[test] + fn test_arbitrary_value_machine_extraction() { + for (input, expected) in [ + // Simple variable + ("[#0088cc]", vec!["[#0088cc]"]), + // With parentheses + ( + "[url(https://tailwindcss.com)]", + vec!["[url(https://tailwindcss.com)]"], + ), + // With strings, where bracket balancing doesn't matter + ("['[({])}']", vec!["['[({])}']"]), + // With strings later in the input + ( + "[url('https://tailwindcss.com?[{]}')]", + vec!["[url('https://tailwindcss.com?[{]}')]"], + ), + // With nested brackets + ("[[data-foo]]", vec!["[[data-foo]]"]), + ( + "[&>[data-slot=icon]:last-child]", + vec!["[&>[data-slot=icon]:last-child]"], + ), + // Spaces are not allowed + ("[ #0088cc ]", vec![]), + // Unbalanced brackets are not allowed + ("[foo[bar]", vec![]), + // Empty brackets are not allowed + ("[]", vec![]), + ] { + assert_eq!(ArbitraryValueMachine::test_extract_all(input), expected); + } + } +} diff --git a/crates/oxide/src/extractor/arbitrary_variable_machine.rs b/crates/oxide/src/extractor/arbitrary_variable_machine.rs new file mode 100644 index 000000000000..7339b2377c8f --- /dev/null +++ b/crates/oxide/src/extractor/arbitrary_variable_machine.rs @@ -0,0 +1,358 @@ +use crate::cursor; +use crate::extractor::bracket_stack::BracketStack; +use crate::extractor::css_variable_machine::CssVariableMachine; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::string_machine::StringMachine; + +/// Extracts arbitrary variables including the parens. +/// +/// E.g.: +/// +/// ```text +/// (--my-value) +/// ^^^^^^^^^^^^ +/// +/// bg-red-500/(--my-opacity) +/// ^^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ArbitraryVariableMachine { + /// Start position of the arbitrary variable + start_pos: usize, + + /// Track brackets to ensure they are balanced + bracket_stack: BracketStack, + + /// Current state of the machine + state: State, + + string_machine: StringMachine, + css_variable_machine: CssVariableMachine, +} + +#[derive(Debug, Default)] +enum State { + #[default] + Idle, + + /// Currently parsing the inside of the arbitrary variable + /// + /// ```text + /// (--my-opacity) + /// ^^^^^^^^^^^^ + /// ``` + Parsing, + + /// Currently parsing the fallback of the arbitrary variable + /// + /// ```text + /// (--my-opacity,50%) + /// ^^^^ + /// ``` + ParsingFallback, +} + +impl Machine for ArbitraryVariableMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.state = State::Idle; + self.bracket_stack.reset(); + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let class_curr = CLASS_TABLE[cursor.curr as usize]; + let len = cursor.input.len(); + + match self.state { + State::Idle => match class_curr { + // Arbitrary variables start with `(` followed by a CSS variable + // + // E.g.: `(--my-variable)` + // ^^ + // + Class::OpenParen => match CLASS_TABLE[cursor.next as usize] { + Class::Dash => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + + _ => MachineState::Idle, + }, + + // Everything else, is not a valid start of the arbitrary variable. But the next + // character might be a valid start for a new utility. + _ => MachineState::Idle, + }, + + State::Parsing => match self.css_variable_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // A CSS variable followed by a `,` means that there is a fallback + // + // E.g.: `(--my-color,red)` + // ^ + Class::Comma => { + self.state = State::ParsingFallback; + cursor.advance_twice(); // Skip the `,` + self.next(cursor) + } + + // End of the CSS variable + // + // E.g.: `(--my-color)` + // ^ + _ => { + cursor.advance(); + + match CLASS_TABLE[cursor.curr as usize] { + // End of an arbitrary variable, must be followed by `)` + Class::CloseParen => self.done(self.start_pos, cursor), + + // Invalid arbitrary variable, not ending at `)` + _ => self.restart(), + } + } + }, + }, + + State::ParsingFallback => { + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Escape => match CLASS_TABLE[cursor.next as usize] { + // An escaped whitespace character is not allowed + // + // E.g.: `(--my-\ color)` + // ^^ + Class::Whitespace => return self.restart(), + + // An escaped character, skip the next character, resume after + // + // E.g.: `(--my-\#color)` + // ^^ + _ => cursor.advance_twice(), + }, + + Class::OpenParen | Class::OpenBracket | Class::OpenCurly => { + if !self.bracket_stack.push(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + Class::CloseParen | Class::CloseBracket | Class::CloseCurly + if !self.bracket_stack.is_empty() => + { + if !self.bracket_stack.pop(cursor.curr) { + return self.restart(); + } + cursor.advance(); + } + + // End of an arbitrary variable + Class::CloseParen => return self.done(self.start_pos, cursor), + + // Start of a string + Class::Quote => match self.string_machine.next(cursor) { + MachineState::Idle => return self.restart(), + MachineState::Done(_) => { + self.state = State::ParsingFallback; + cursor.advance(); + return self.next(cursor); + } + }, + + // A `:` inside of a fallback value is only valid inside of brackets or inside of a + // string. Everywhere else, it's invalid. + // + // E.g.: `(--foo,bar:baz)` + // ^ Not valid + // + // E.g.: `(--url,url(https://example.com))` + // ^ Valid + // + // E.g.: `(--my-content:'a:b:c:')` + // ^ ^ ^ Valid + Class::Colon if self.bracket_stack.is_empty() => return self.restart(), + + // Any kind of whitespace is not allowed + Class::Whitespace => return self.restart(), + + // Everything else is valid + _ => cursor.advance(), + }; + } + + self.restart() + } + } + } +} + +#[derive(Clone, Copy, PartialEq)] +enum Class { + /// `'a'..='z'` + AlphaLower, + + /// `'A'..='Z'` + AlphaUpper, + + /// `@` + At, + + // `:` + Colon, + + /// `,` + Comma, + + /// `-` + Dash, + + /// `:` + Dot, + + /// `\\` + Escape, + + /// `0x00` + End, + + /// `'0'..='9'` + Number, + + /// `[` + OpenBracket, + + /// `]` + CloseBracket, + + /// `(` + OpenParen, + + /// `)` + CloseParen, + + /// `{` + OpenCurly, + + /// `}` + CloseCurly, + + /// ', ", or ` + Quote, + + /// _ + Underscore, + + /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + Whitespace, + + /// Anything else + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set!(Class::At, b'@'); + set!(Class::Underscore, b'_'); + set!(Class::Dash, b'-'); + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + set!(Class::Comma, b','); + set!(Class::Escape, b'\\'); + + set!(Class::OpenBracket, b'['); + set!(Class::CloseBracket, b']'); + + set!(Class::OpenParen, b'('); + set!(Class::CloseParen, b')'); + + set!(Class::OpenCurly, b'{'); + set!(Class::CloseCurly, b'}'); + + set!(Class::Dot, b'.'); + set!(Class::Colon, b':'); + + set!(Class::Quote, b'"', b'\'', b'`'); + + set_range!(Class::AlphaLower, b'a'..=b'z'); + set_range!(Class::AlphaUpper, b'A'..=b'Z'); + set_range!(Class::Number, b'0'..=b'9'); + + set!(Class::End, 0x00); + + table +}; + +#[cfg(test)] +mod tests { + use super::ArbitraryVariableMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_arbitrary_variable_machine_performance() { + let input = r#"<div class="(--foo) (--my-color,red,blue) (--my-img,url('https://example.com?q=(][)'))"></div>"#.repeat(100); + + ArbitraryVariableMachine::test_throughput(100_000, &input); + ArbitraryVariableMachine::test_duration_once(&input); + + todo!() + } + + #[test] + fn test_arbitrary_variable_extraction() { + for (input, expected) in [ + // Simple utility + ("(--foo)", vec!["(--foo)"]), + // With dashes + ("(--my-color)", vec!["(--my-color)"]), + // With a fallback + ("(--my-color,red,blue)", vec!["(--my-color,red,blue)"]), + // With a fallback containing a string with unbalanced brackets + ( + "(--my-img,url('https://example.com?q=(][)'))", + vec!["(--my-img,url('https://example.com?q=(][)'))"], + ), + // -------------------------------------------------------- + + // Exceptions: + // Arbitrary variable must start with a CSS variable + (r"(bar)", vec![]), + // Arbitrary variables must be valid CSS variables + (r"(--my-\ color)", vec![]), + (r"(--my#color)", vec![]), + // Fallbacks cannot have spaces + (r"(--my-color, red)", vec![]), + // Fallbacks cannot have escaped spaces + (r"(--my-color,\ red)", vec![]), + // Variables must have at least one character after the `--` + (r"(--)", vec![]), + (r"(--,red)", vec![]), + (r"(-)", vec![]), + (r"(-my-color)", vec![]), + ] { + assert_eq!(ArbitraryVariableMachine::test_extract_all(input), expected); + } + } +} diff --git a/crates/oxide/src/extractor/bracket_stack.rs b/crates/oxide/src/extractor/bracket_stack.rs new file mode 100644 index 000000000000..f34e56ed3674 --- /dev/null +++ b/crates/oxide/src/extractor/bracket_stack.rs @@ -0,0 +1,56 @@ +const SIZE: usize = 32; + +#[repr(C)] +#[derive(Debug, Default)] +pub struct BracketStack { + /// Bracket stack to ensure properly balanced brackets. + bracket_stack: [u8; SIZE], + bracket_stack_len: usize, +} + +impl BracketStack { + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.bracket_stack_len == 0 + } + + #[inline(always)] + pub fn push(&mut self, bracket: u8) -> bool { + if self.bracket_stack_len >= SIZE { + return false; + } + + unsafe { + *self.bracket_stack.get_unchecked_mut(self.bracket_stack_len) = match bracket { + b'(' => b')', + b'[' => b']', + b'{' => b'}', + _ => std::hint::unreachable_unchecked(), + }; + } + + self.bracket_stack_len += 1; + true + } + + #[inline(always)] + pub fn pop(&mut self, bracket: u8) -> bool { + if self.bracket_stack_len == 0 { + return false; + } + + self.bracket_stack_len -= 1; + unsafe { + if *self.bracket_stack.get_unchecked(self.bracket_stack_len) != bracket { + return false; + } + } + + true + } + + #[inline(always)] + pub fn reset(&mut self) { + self.bracket_stack_len = 0; + } +} diff --git a/crates/oxide/src/extractor/candidate_machine.rs b/crates/oxide/src/extractor/candidate_machine.rs new file mode 100644 index 000000000000..7ba8441a03da --- /dev/null +++ b/crates/oxide/src/extractor/candidate_machine.rs @@ -0,0 +1,376 @@ +use crate::cursor; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::utility_machine::UtilityMachine; +use crate::extractor::variant_machine::VariantMachine; +use crate::extractor::Span; + +/// Extract full candidates including variants and utilities. +#[derive(Debug, Default)] +pub struct CandidateMachine { + /// Start position of the candidate + start_pos: usize, + + /// End position of the last variant (if any) + last_variant_end_pos: Option<usize>, + + utility_machine: UtilityMachine, + variant_machine: VariantMachine, +} + +impl Machine for CandidateMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.last_variant_end_pos = None; + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let len = cursor.input.len(); + + while cursor.pos < len { + // Skip ahead for known characters that will never be part of a candidate. No need to + // run any sub-machines. + if cursor.curr.is_ascii_whitespace() { + self.reset(); + cursor.advance(); + continue; + } + + // Candidates don't start with these characters, so we can skip ahead. + if matches!(cursor.curr, b':' | b'"' | b'\'' | b'`') { + self.reset(); + cursor.advance(); + continue; + } + + // Jump ahead if the character is known to be an invalid boundary and we should start + // at the next boundary even though "valid" candidates can exist. + // + // E.g.: `<div class="">` + // ^^^ Valid candidate + // ^ But this character makes it invalid + // ^ Therefore we jump here + // + // E.g.: `Some Class` + // ^ ^ Invalid, we can jump ahead to the next boundary + // + if matches!(cursor.curr, b'<' | b'A'..=b'Z') { + if let Some(offset) = cursor.input[cursor.pos..] + .iter() + .position(|&c| is_valid_before_boundary(&c)) + { + self.reset(); + cursor.advance_by(offset + 1); + } else { + return self.restart(); + } + + continue; + } + + let mut variant_cursor = cursor.clone(); + let variant_machine_state = self.variant_machine.next(&mut variant_cursor); + + let mut utility_cursor = cursor.clone(); + let utility_machine_state = self.utility_machine.next(&mut utility_cursor); + + match (variant_machine_state, utility_machine_state) { + // No variant, but the utility machine completed + (MachineState::Idle, MachineState::Done(utility_span)) => { + cursor.move_to(utility_cursor.pos + 1); + + let span = match self.last_variant_end_pos { + Some(end_pos) => { + // Verify that the utility is touching the last variant + if end_pos + 1 != utility_span.start { + return self.restart(); + } + + Span::new(self.start_pos, utility_span.end) + } + None => utility_span, + }; + + // Ensure the span has valid boundary characters before and after + if !has_valid_boundaries(&span, cursor.input) { + return self.restart(); + } + + return self.done_span(span); + } + + // Both variant and utility machines are done + // E.g.: `hover:flex` + // ^^^^^^ Variant + // ^^^^^ Utility + // + (MachineState::Done(variant_span), MachineState::Done(utility_span)) => { + cursor.move_to(variant_cursor.pos + 1); + + if let Some(end_pos) = self.last_variant_end_pos { + // Verify variant is touching the last variant + if end_pos + 1 != variant_span.start { + return self.restart(); + } + } else { + // We know that there is no variant before this one. + // + // Edge case: JavaScript keys should be considered utilities if they are + // not preceded by another variant, and followed by any kind of whitespace + // or the end of the line. + // + // E.g.: `{ underline: true }` + // ^^^^^^^^^^ Variant + // ^^^^^^^^^ Utility (followed by `: `) + let after = cursor.input.get(utility_span.end + 2).unwrap_or(&b'\0'); + if after.is_ascii_whitespace() || *after == b'\0' { + cursor.move_to(utility_cursor.pos + 2); + return self.done_span(utility_span); + } + + self.start_pos = variant_span.start; + } + + self.last_variant_end_pos = Some(variant_cursor.pos); + } + + // Variant is done, utility is invalid + (MachineState::Done(variant_span), MachineState::Idle) => { + cursor.move_to(variant_cursor.pos + 1); + + if let Some(end_pos) = self.last_variant_end_pos { + if end_pos + 1 > variant_span.start { + self.reset(); + return MachineState::Idle; + } + } else { + self.start_pos = variant_span.start; + } + + self.last_variant_end_pos = Some(variant_cursor.pos); + } + + (MachineState::Idle, MachineState::Idle) => { + // Skip main cursor to the next character after both machines. We already know + // there is no candidate here. + if variant_cursor.pos > cursor.pos || utility_cursor.pos > cursor.pos { + cursor.move_to(variant_cursor.pos.max(utility_cursor.pos)); + } + + self.reset(); + cursor.advance(); + } + } + } + + MachineState::Idle + } +} + +impl CandidateMachine { + #[inline(always)] + fn done_span(&mut self, span: Span) -> MachineState { + self.reset(); + MachineState::Done(span) + } +} + +/// A candidate must be preceded or followed by any of these characters +/// E.g.: `<div class="flex">` +/// ^ Valid for `flex` +/// ^ Invalid for `div` +#[inline(always)] +fn is_valid_common_boundary(c: &u8) -> bool { + matches!( + c, + b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | b'"' | b'\'' | b'`' | b'\0' + ) +} + +/// A candidate must be preceded by any of these characters. +#[inline(always)] +fn is_valid_before_boundary(c: &u8) -> bool { + is_valid_common_boundary(c) || matches!(c, b'.') +} + +/// A candidate must be followed by any of these characters. +/// +/// E.g.: `[class.foo]` Angular +/// E.g.: `<div class:flex="bool">` Svelte +/// ^ +#[inline(always)] +fn is_valid_after_boundary(c: &u8) -> bool { + is_valid_common_boundary(c) || matches!(c, b'}' | b']' | b'=') +} + +#[inline(always)] +fn has_valid_boundaries(span: &Span, input: &[u8]) -> bool { + let before = { + if span.start == 0 { + b'\0' + } else { + input[span.start - 1] + } + }; + + let after = { + if span.end >= input.len() - 1 { + b'\0' + } else { + input[span.end + 1] + } + }; + + // Ensure the span has valid boundary characters before and after + is_valid_before_boundary(&before) && is_valid_after_boundary(&after) +} + +#[cfg(test)] +mod tests { + use super::CandidateMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_candidate_machine_performance() { + let n = 10_000; + let input = include_str!("../fixtures/example.html"); + // let input = &r#"<button type="button" class="absolute -top-1 -left-1.5 flex items-center justify-center p-1.5 text-gray-400 hover:text-gray-500">"#.repeat(100); + + CandidateMachine::test_throughput(n, input); + CandidateMachine::test_duration_once(input); + CandidateMachine::test_duration_n(n, input); + + todo!() + } + + #[test] + fn test_candidate_extraction() { + for (input, expected) in [ + // Simple utility + ("flex", vec!["flex"]), + // Simple utility with special character(s) + ("@container", vec!["@container"]), + // Single character utility + ("a", vec!["a"]), + // Simple utility with dashes + ("items-center", vec!["items-center"]), + // Simple utility with numbers + ("px-2.5", vec!["px-2.5"]), + // Simple variant with simple utility + ("hover:flex", vec!["hover:flex"]), + // Arbitrary properties + ("[color:red]", vec!["[color:red]"]), + ("![color:red]", vec!["![color:red]"]), + ("[color:red]!", vec!["[color:red]!"]), + ("[color:red]/20", vec!["[color:red]/20"]), + ("![color:red]/20", vec!["![color:red]/20"]), + ("[color:red]/20!", vec!["[color:red]/20!"]), + // With multiple variants + ("hover:focus:flex", vec!["hover:focus:flex"]), + // Exceptions: + // + // Keys inside of a JS object could be a variant-less candidate. Vue example. + ("{ underline: true }", vec!["underline", "true"]), + // With complex variants + ( + "[&>[data-slot=icon]:last-child]:right-2.5", + vec!["[&>[data-slot=icon]:last-child]:right-2.5"], + ), + // With multiple (complex) variants + ( + "[&>[data-slot=icon]:last-child]:sm:right-2.5", + vec!["[&>[data-slot=icon]:last-child]:sm:right-2.5"], + ), + ( + "sm:[&>[data-slot=icon]:last-child]:right-2.5", + vec!["sm:[&>[data-slot=icon]:last-child]:right-2.5"], + ), + // Exceptions regarding boundaries + // + // `flex!` is valid, but since it's followed by a non-boundary character it's invalid. + // `block` is therefore also invalid because it didn't start after a boundary. + ("flex!block", vec![]), + ] { + for (wrapper, additional) in [ + // No wrapper + ("{}", vec![]), + // With leading spaces + (" {}", vec![]), + (" {}", vec![]), + (" {}", vec![]), + // With trailing spaces + ("{} ", vec![]), + ("{} ", vec![]), + ("{} ", vec![]), + // Surrounded by spaces + (" {} ", vec![]), + // Inside a string + ("'{}'", vec![]), + // Inside a function call + ("fn('{}')", vec![]), + // Inside nested function calls + ("fn1(fn2('{}'))", vec![]), + // -------------------------- + // + // HTML + // Inside a class (on its own) + (r#"<div class="{}"></div>"#, vec![]), + // Inside a class (first) + (r#"<div class="{} foo"></div>"#, vec!["foo"]), + // Inside a class (second) + (r#"<div class="foo {}"></div>"#, vec!["foo"]), + // Inside a class (surrounded) + (r#"<div class="foo {} bar"></div>"#, vec!["foo", "bar"]), + // -------------------------- + // + // JavaScript + // Inside a variable + (r#"let classes = '{}';"#, vec!["let", "classes"]), + // Inside an object (key) + ( + r#"let classes = { '{}': true };"#, + vec!["let", "classes", "true"], + ), + // Inside an object (no spaces, key) + (r#"let classes = {'{}':true};"#, vec!["let", "classes"]), + // Inside an object (value) + ( + r#"let classes = { primary: '{}' };"#, + vec!["let", "classes", "primary"], + ), + // Inside an object (no spaces, value) + (r#"let classes = {primary:'{}'};"#, vec!["let", "classes"]), + ] { + let input = wrapper.replace("{}", input); + + let mut expected = expected.clone(); + expected.extend(additional); + expected.sort(); + + let mut actual = CandidateMachine::test_extract_all(&input); + actual.sort(); + + if actual != expected { + dbg!(&input, &expected, &actual); + } + + assert_eq!(actual, expected); + } + } + } + + #[test] + fn do_not_consider_svg_path_commands() { + for input in [ + r#"<path d="M19 21V5a2 2 0 00-2-2H7a2 2 0 00-2 2v16m14 0h2m-2 0h-5m-9 0H3m2 0h5M9 7h1m-1 4h1m4-4h1m-1 4h1m-5 10v-5a1 1 0 011-1h2a1 1 0 011 1v5m-4 0h4"/>"#, + r#"<path d="0h2m-2"/>"#, + ] { + assert_eq!( + CandidateMachine::test_extract_all(input), + Vec::<&str>::new() + ); + } + } +} diff --git a/crates/oxide/src/extractor/css_variable_machine.rs b/crates/oxide/src/extractor/css_variable_machine.rs new file mode 100644 index 000000000000..f443e5275859 --- /dev/null +++ b/crates/oxide/src/extractor/css_variable_machine.rs @@ -0,0 +1,244 @@ +use crate::cursor; +use crate::extractor::machine::{Machine, MachineState}; + +/// Extract CSS variables from an input. +/// +/// E.g.: +/// +/// ```text +/// var(--my-variable) +/// ^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct CssVariableMachine; + +impl Machine for CssVariableMachine { + #[inline(always)] + fn reset(&mut self) {} + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + // CSS Variables must start with `--` + if CLASS_TABLE[cursor.curr as usize] != Class::Dash + || CLASS_TABLE[cursor.next as usize] != Class::Dash + { + return MachineState::Idle; + } + + let start_pos = cursor.pos; + let len = cursor.input.len(); + + cursor.advance_twice(); + + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + // https://drafts.csswg.org/css-syntax-3/#ident-token-diagram + // + Class::AllowedCharacter | Class::Dash => match CLASS_TABLE[cursor.next as usize] { + // Valid character followed by a valid character or an escape character + // + // E.g.: `--my-variable` + // ^^ + // E.g.: `--my-\#variable` + // ^^ + Class::AllowedCharacter | Class::Dash | Class::Escape => cursor.advance(), + + // Valid character followed by anything else means the variable is done + // + // E.g.: `'--my-variable'` + // ^ + _ => { + // There must be at least 1 character after the `--` + if cursor.pos - start_pos < 2 { + return self.restart(); + } else { + return self.done(start_pos, cursor); + } + } + }, + + Class::Escape => match CLASS_TABLE[cursor.next as usize] { + // An escaped whitespace character is not allowed + // + // In CSS it is allowed, but in the context of a class it's not because then we + // would have spaces in the class. + // + // E.g.: `bg-(--my-\ color)` + // ^ + Class::Whitespace => return self.restart(), + + // An escape at the end of the class is not allowed + Class::End => return self.restart(), + + // An escaped character, skip the next character, resume after + // + // E.g.: `--my-\#variable` + // ^ We are here + // ^ Resume here + _ => cursor.advance_twice(), + }, + + // Character is not valid anymore + _ => return self.restart(), + } + } + + MachineState::Idle + } +} + +#[derive(Clone, Copy, PartialEq)] +enum Class { + /// - + Dash, + + /// _, a-z, A-Z, 0-9 + AllowedCharacter, + + /// \ + Escape, + + /// Whitespace + Whitespace, + + /// End of the input + End, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set!(Class::Dash, b'-'); + set!(Class::Escape, b'\\'); + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + set!(Class::AllowedCharacter, b'_'); + set_range!(Class::AllowedCharacter, b'a'..=b'z'); + set_range!(Class::AllowedCharacter, b'A'..=b'Z'); + set_range!(Class::AllowedCharacter, b'0'..=b'9'); + + // non-ASCII (such as Emoji): https://drafts.csswg.org/css-syntax-3/#non-ascii-ident-code-point + set_range!(Class::AllowedCharacter, 0x80..=0xff); + + set!(Class::End, b'\0'); + + table +}; + +#[cfg(test)] +mod tests { + use super::CssVariableMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_css_variable_machine_performance() { + let input = r#"This sentence will contain a few variables here and there var(--my-variable) --other-variable-1\/2 var(--more-variables-here)"#.repeat(100); + + CssVariableMachine::test_throughput(100_000, &input); + CssVariableMachine::test_duration_once(&input); + + todo!(); + } + + #[test] + fn test_css_variable_machine_extraction() { + for (input, expected) in [ + // Simple variable + ("--foo", vec!["--foo"]), + ("--my-variable", vec!["--my-variable"]), + // Multiple variables + ( + "calc(var(--first) + var(--second))", + vec!["--first", "--second"], + ), + // Variables with... emojis + ("--๐", vec!["--๐"]), + ("--๐-๐", vec!["--๐-๐"]), + // Escaped character in the middle, skips the next character + (r#"--spacing-1\/2"#, vec![r#"--spacing-1\/2"#]), + // Escaped whitespace is not allowed + (r#"--my-\ variable"#, vec![]), + // -------------------------- + // + // Exceptions + // Not a valid variable + ("", vec![]), + ("-", vec![]), + ("--", vec![]), + ] { + for wrapper in [ + // No wrapper + "{}", + // With leading spaces + " {}", + // With trailing spaces + "{} ", + // Surrounded by spaces + " {} ", + // Inside a string + "'{}'", + // Inside a function call + "fn({})", + // Inside nested function calls + "fn1(fn2({}))", + // -------------------------- + // + // HTML + // Inside a class (on its own) + r#"<div class="{}"></div>"#, + // Inside a class (first) + r#"<div class="{} foo"></div>"#, + // Inside a class (second) + r#"<div class="foo {}"></div>"#, + // Inside a class (surrounded) + r#"<div class="foo {} bar"></div>"#, + // Inside an arbitrary property + r#"<div class="[{}:red]"></div>"#, + // -------------------------- + // + // JavaScript + // Inside a variable + r#"let classes = '{}';"#, + // Inside an object (key) + r#"let classes = { '{}': true };"#, + // Inside an object (no spaces, key) + r#"let classes = {'{}':true};"#, + // Inside an object (value) + r#"let classes = { primary: '{}' };"#, + // Inside an object (no spaces, value) + r#"let classes = {primary:'{}'};"#, + // Inside an array + r#"let classes = ['{}'];"#, + ] { + let input = wrapper.replace("{}", input); + + let actual = CssVariableMachine::test_extract_all(&input); + + if actual != expected { + dbg!(&input, &actual, &expected); + } + + assert_eq!(actual, expected); + } + } + } +} diff --git a/crates/oxide/src/extractor/machine.rs b/crates/oxide/src/extractor/machine.rs new file mode 100644 index 000000000000..5874e4b63e38 --- /dev/null +++ b/crates/oxide/src/extractor/machine.rs @@ -0,0 +1,161 @@ +use crate::cursor; + +#[derive(Debug, Clone, Copy)] +pub struct Span { + /// Inclusive start position of the span + pub start: usize, + + /// Inclusive end position of the span + pub end: usize, +} + +impl Span { + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + #[inline(always)] + pub fn slice<'a>(&self, input: &'a [u8]) -> &'a [u8] { + &input[self.start..=self.end] + } +} + +#[derive(Debug, Default)] +pub enum MachineState { + /// Machine is not doing anything at the moment + #[default] + Idle, + + /// Machine is done parsing and has extracted a span + Done(Span), +} + +pub trait Machine: Sized + Default { + fn reset(&mut self); + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState; + + /// Reset the state machine, and mark the machine as [MachineState::Idle]. + #[inline(always)] + fn restart(&mut self) -> MachineState { + self.reset(); + MachineState::Idle + } + + /// Reset the state machine, and mark the machine as [MachineState::Done(โฆ)]. + #[inline(always)] + fn done(&mut self, start: usize, cursor: &cursor::Cursor<'_>) -> MachineState { + self.reset(); + MachineState::Done(Span::new(start, cursor.pos)) + } + + #[cfg(test)] + fn test_throughput(iterations: usize, input: &str) { + use crate::throughput::Throughput; + use std::hint::black_box; + + let input = input.as_bytes(); + let len = input.len(); + + let throughput = Throughput::compute(iterations, len, || { + let mut machine = Self::default(); + let mut cursor = cursor::Cursor::new(input); + + while cursor.pos < len { + _ = black_box(machine.next(&mut cursor)); + + cursor.advance(); + } + }); + eprintln!( + "{}: Throughput: {}", + std::any::type_name::<Self>(), + throughput + ); + } + + #[cfg(test)] + fn test_duration_once(input: &str) { + use std::hint::black_box; + + let input = input.as_bytes(); + let len = input.len(); + + let duration = { + let start = std::time::Instant::now(); + let mut machine = Self::default(); + let mut cursor = cursor::Cursor::new(input); + + while cursor.pos < len { + _ = black_box(machine.next(&mut cursor)); + + cursor.advance(); + } + + start.elapsed() + }; + eprintln!( + "{}: Duration: {:?}", + std::any::type_name::<Self>(), + duration + ); + } + + #[cfg(test)] + fn test_duration_n(n: usize, input: &str) { + use std::hint::black_box; + + let input = input.as_bytes(); + let len = input.len(); + + let duration = { + let start = std::time::Instant::now(); + + for _ in 0..n { + let mut machine = Self::default(); + let mut cursor = cursor::Cursor::new(input); + + while cursor.pos < len { + _ = black_box(machine.next(&mut cursor)); + + cursor.advance(); + } + } + + start.elapsed() + }; + eprintln!( + "{}: Duration: {:?} ({} iterations, ~{:?} per iteration)", + std::any::type_name::<Self>(), + duration, + n, + duration / n as u32 + ); + } + + #[cfg(test)] + fn test_extract_all(input: &str) -> Vec<&str> { + input + // Mimicking the behavior of how we parse lines individually + .split_terminator("\n") + .flat_map(|input| { + let mut machine = Self::default(); + let mut cursor = cursor::Cursor::new(input.as_bytes()); + + let mut actual: Vec<&str> = vec![]; + let len = cursor.input.len(); + + while cursor.pos < len { + if let MachineState::Done(span) = machine.next(&mut cursor) { + actual.push(unsafe { + std::str::from_utf8_unchecked(span.slice(cursor.input)) + }); + } + + cursor.advance(); + } + + actual + }) + .collect() + } +} diff --git a/crates/oxide/src/extractor/mod.rs b/crates/oxide/src/extractor/mod.rs new file mode 100644 index 000000000000..b60720e25dda --- /dev/null +++ b/crates/oxide/src/extractor/mod.rs @@ -0,0 +1,901 @@ +use crate::cursor; +use crate::extractor::machine::Span; +use candidate_machine::CandidateMachine; +use css_variable_machine::CssVariableMachine; +use machine::{Machine, MachineState}; +use std::fmt; + +pub mod arbitrary_property_machine; +pub mod arbitrary_value_machine; +pub mod arbitrary_variable_machine; +pub mod bracket_stack; +pub mod candidate_machine; +pub mod css_variable_machine; +pub mod machine; +pub mod modifier_machine; +pub mod named_utility_machine; +pub mod named_variant_machine; +pub mod pre_processors; +pub mod string_machine; +pub mod utility_machine; +pub mod variant_machine; + +#[derive(Debug)] +pub enum Extracted<'a> { + /// Extracted a valid looking candidate + /// + /// E.g.: `flex` + /// + Candidate(&'a [u8]), + + /// Extracted a valid looking CSS variable + /// + /// E.g.: `--my-variable` + /// + CssVariable(&'a [u8]), +} + +impl fmt::Display for Extracted<'_> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Extracted::Candidate(candidate) => { + write!(f, "Candidate({})", std::str::from_utf8(candidate).unwrap()) + } + Extracted::CssVariable(candidate) => { + write!( + f, + "CssVariable({})", + std::str::from_utf8(candidate).unwrap() + ) + } + } + } +} + +#[derive(Debug)] +pub struct Extractor<'a> { + cursor: cursor::Cursor<'a>, + + css_variable_machine: CssVariableMachine, + candidate_machine: CandidateMachine, +} + +impl<'a> Extractor<'a> { + pub fn new(input: &'a [u8]) -> Self { + Self { + cursor: cursor::Cursor::new(input), + + css_variable_machine: Default::default(), + candidate_machine: Default::default(), + } + } + + pub fn extract(&mut self) -> Vec<Extracted<'a>> { + // Candidates found by inner candidate machines. If the outer machine finds a solution, we + // can discard the inner machines. Otherwise, we can keep the candidates from the inner + // machines. + let mut in_flight_spans: Vec<Span> = Vec::with_capacity(15); + + // All the extracted values + let mut extracted = Vec::with_capacity(100); + + let len = self.cursor.input.len(); + + // CSS Variable extractor + { + let cursor = &mut self.cursor.clone(); + while cursor.pos < len { + if cursor.curr.is_ascii_whitespace() { + cursor.advance(); + continue; + } + + if let MachineState::Done(span) = self.css_variable_machine.next(cursor) { + extracted.push(Extracted::CssVariable(span.slice(self.cursor.input))); + } + + cursor.advance(); + } + } + + // Candidate extractor + { + let cursor = &mut self.cursor.clone(); + + while cursor.pos < len { + if cursor.curr.is_ascii_whitespace() { + cursor.advance(); + continue; + } + + let before = cursor.pos; + match self.candidate_machine.next(cursor) { + MachineState::Done(span) => { + in_flight_spans.push(span); + extract_sub_candidates(before..span.start, cursor, &mut in_flight_spans); + } + MachineState::Idle => { + extract_sub_candidates( + before..cursor.pos.min(cursor.input.len()), + cursor, + &mut in_flight_spans, + ); + } + } + + cursor.advance(); + } + + // Commit the remaining in-flight spans as extracted candidates + if !in_flight_spans.is_empty() { + extracted.extend( + drop_covered_spans(in_flight_spans) + .iter() + .map(|span| Extracted::Candidate(span.slice(self.cursor.input))), + ); + } + } + + extracted + } +} + +// Extract sub-candidates from a given range. +// +// E.g.: `[ClassPrefix('gap-y-4')]` will not be a valid candidate or variant. In that case we want +// to extract candidates from inside the `[โฆ]`. +// +// ``` +// [ClassPrefix('gap-y-4')] +// ^ Try again here +// ``` +#[inline(always)] +fn extract_sub_candidates( + range: std::ops::Range<usize>, + cursor: &cursor::Cursor<'_>, + in_flight_spans: &mut Vec<Span>, +) { + let end = range.end; + for i in range { + if cursor.input[i] == b'[' { + let mut cursor = cursor.clone(); + cursor.move_to(i + 1); + + let mut machine = CandidateMachine::default(); + + while cursor.pos < end { + if let MachineState::Done(span) = machine.next(&mut cursor) { + in_flight_spans.push(span); + } + + cursor.advance(); + } + } + } +} + +fn drop_covered_spans(mut spans: Vec<Span>) -> Vec<Span> { + if spans.len() <= 1 { + return spans; + } + + spans.sort_by(|a, b| a.start.cmp(&b.start).then(b.end.cmp(&a.end))); + + let mut result = Vec::with_capacity(spans.len()); + let mut max_end = None; + + for span in spans { + if max_end.is_none_or(|end| span.end > end) { + result.push(span); + max_end = Some(span.end); + } + } + + result +} + +#[cfg(test)] +mod tests { + use super::{Extracted, Extractor}; + use crate::throughput::Throughput; + use std::hint::black_box; + + fn pre_process_input(input: &str, extension: &str) -> String { + let input = crate::pre_process_input(input.as_bytes(), extension); + String::from_utf8(input).unwrap() + } + + fn extract_sorted_candidates(input: &str) -> Vec<&str> { + let mut machine = Extractor::new(input.as_bytes()); + let mut actual = machine + .extract() + .iter() + .filter_map(|x| match x { + Extracted::Candidate(candidate) => std::str::from_utf8(candidate).ok(), + Extracted::CssVariable(_) => None, + }) + .collect::<Vec<_>>(); + actual.sort(); + actual + } + + fn extract_sorted_css_variables(input: &str) -> Vec<&str> { + let mut machine = Extractor::new(input.as_bytes()); + let mut actual = machine + .extract() + .iter() + .filter_map(|x| match x { + Extracted::Candidate(_) => None, + Extracted::CssVariable(bytes) => std::str::from_utf8(bytes).ok(), + }) + .collect::<Vec<_>>(); + actual.sort(); + actual + } + + fn assert_extract_sorted_candidates(input: &str, expected: Vec<&str>) { + let mut actual = extract_sorted_candidates(input); + actual.sort(); + actual.dedup(); + + let mut expected = expected; + expected.sort(); + expected.dedup(); + + if actual != expected { + dbg!(&input, &actual, &expected); + } + assert_eq!(actual, expected); + } + + fn assert_extract_sorted_css_variables(input: &str, expected: Vec<&str>) { + let actual = extract_sorted_css_variables(input); + + let mut expected = expected; + expected.sort(); + + if actual != expected { + dbg!(&input, &actual, &expected); + } + assert_eq!(actual, expected); + } + + #[test] + #[ignore] + fn test_extract_performance() { + if true { + let iterations = 50_000; + + let input = include_bytes!("../fixtures/example.html"); + + let throughput = Throughput::compute(iterations, input.len(), || { + let mut extractor = Extractor::new(input); + _ = black_box(extractor.extract()); + }); + eprintln!("Extractor throughput: {:}", throughput); + + let mut extractor = Extractor::new(input); + let start = std::time::Instant::now(); + _ = black_box(extractor.extract().len()); + let end = start.elapsed(); + eprintln!("Extractor took: {:?}", end); + + todo!(); + } + } + + #[test] + fn test_candidates_extraction() { + for (input, expected) in [ + // Simple utility + ("flex", vec!["flex"]), + // Single character utility + ("a", vec!["a"]), + // Simple variant with simple utility + ("hover:flex", vec!["hover:flex"]), + // Multiple utilities + ("flex block", vec!["flex", "block"]), + // Simple utility with dashes + ("items-center", vec!["items-center"]), + // Simple utility with numbers + ("px-2.5", vec!["px-2.5"]), + // Arbitrary properties + ("[color:red]", vec!["[color:red]"]), + ("![color:red]", vec!["![color:red]"]), + ("[color:red]!", vec!["[color:red]!"]), + ("[color:red]/20", vec!["[color:red]/20"]), + ("![color:red]/20", vec!["![color:red]/20"]), + ("[color:red]/20!", vec!["[color:red]/20!"]), + // In HTML + ( + r#"<div class="flex items-center px-2.5 bg-[#0088cc] text-(--my-color)"></div>"#, + vec![ + "flex", + "items-center", + "px-2.5", + "bg-[#0088cc]", + "text-(--my-color)", + ], + ), + // In an array, looks like an arbitrary property (because it starts with `[`). + (r#"["flex"]"#, vec!["flex"]), + (r#"["p-2.5"]"#, vec!["p-2.5"]), + (r#"["flex","p-2.5"]"#, vec!["flex", "p-2.5"]), + (r#"["flex", "p-2.5"]"#, vec!["flex", "p-2.5"]), + // Overlapping candidates, outer candidate should win + ( + r#"[CssClass("[&:hover]:flex",'italic')]"#, + vec!["[&:hover]:flex", "italic"], + ), + ( + r#"["flex",["italic",["underline"]]]"#, + vec!["flex", "italic", "underline"], + ), + (r#"[:is(italic):is(underline)]"#, vec![]), + ( + r#"[:is(italic):is(underline)]:flex"#, + vec!["[:is(italic):is(underline)]:flex"], + ), + // Clojure syntax. See: https://github.com/tailwindlabs/tailwindcss/issues/16189#issuecomment-2642438176 + (r#"[:div {:class ["p-2"]}"#, vec!["p-2"]), + ( + r#"[:div {:class ["p-2" "text-green"]}"#, + vec!["p-2", "text-green"], + ), + (r#"[:div {:class ["p-2""#, vec!["p-2"]), + (r#" "text-green"]}"#, vec!["text-green"]), + (r#"[:div.p-2]"#, vec!["p-2"]), + // Longer example with mixed types of variants and utilities + ( + "[&>[data-slot=icon]:last-child]:right-2.5", + vec!["[&>[data-slot=icon]:last-child]:right-2.5"], + ), + ( + "sm:[&>[data-slot=icon]:last-child]:right-2.5", + vec!["sm:[&>[data-slot=icon]:last-child]:right-2.5"], + ), + // -------------------------------------------------------- + + // Exceptions: + // + // Keys inside of a JS object could be a variant-less candidate. Vue example. + ("{ underline: true }", vec!["underline", "true"]), + ( + r#" <CheckIcon className={clsx('h-4 w-4', { invisible: index !== 0 })} />"#, + vec!["h-4", "w-4", "invisible", "index"], + ), + // You can have variants but in a string. Vue example. + ( + "{ 'hover:underline': true }", + vec!["hover:underline", "true"], + ), + // Important marker on both sides is invalid + ("!flex!", vec![]), + // Important marker before a modifier is invalid + ("bg-red-500!/20", vec![]), + // HTML start of a tag + ("<div", vec![]), + // HTML end of a tag + ("</div>", vec![]), + // HTML element on its own + ("<div></div>", vec![]), + // Modifier followed by a modifier is invalid + ("bg-red-500/20/20", vec![]), + ("bg-red-500/20/[20%]", vec![]), + ("bg-red-500/20/(--my-opacity)", vec![]), + ("bg-red-500/[20%]/20", vec![]), + ("bg-red-500/[20%]/[20%]", vec![]), + ("bg-red-500/[20%]/(--my-opacity)", vec![]), + ("bg-red-500/(--my-opacity)/20", vec![]), + ("bg-red-500/(--my-opacity)/[20%]", vec![]), + ("bg-red-500/(--my-opacity)/(--my-opacity)", vec![]), + // Arbitrary value followed by an arbitrary value is invalid + ("bg-[red]-[blue]", vec![]), + ("bg-[red][blue]", vec![]), + // Arbitrary value followed by an arbitrary variable is invalid + ("bg-[red]-(--my-color)", vec![]), + ("bg-[red](--my-color)", vec![]), + // Important looking utility cannot be followed by another utility + ("flex!block", vec![]), + // Invalid variants make the whole candidate invalid + ("[foo]/bar:flex", vec![]), + // Utilities cannot start with `_` + ("_blank", vec![]), + ("hover:_blank", vec![]), + ("hover:focus:_blank", vec![]), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + #[test] + fn test_extractor_extract_candidates() { + for (input, expected) in [ + // Simple utility + ("flex", vec!["flex"]), + // Simple utility with special character(s) + ("@container", vec!["@container"]), + // Single character utility + ("a", vec!["a"]), + // Simple utility with dashes + ("items-center", vec!["items-center"]), + // Simple utility with numbers + ("px-2.5", vec!["px-2.5"]), + // Simple variant with simple utility + ("hover:flex", vec!["hover:flex"]), + // Arbitrary properties + ("[color:red]", vec!["[color:red]"]), + ("![color:red]", vec!["![color:red]"]), + ("[color:red]!", vec!["[color:red]!"]), + ("[color:red]/20", vec!["[color:red]/20"]), + ("![color:red]/20", vec!["![color:red]/20"]), + ("[color:red]/20!", vec!["[color:red]/20!"]), + // With multiple variants + ("hover:focus:flex", vec!["hover:focus:flex"]), + // Exceptions: + // + // Keys inside of a JS object could be a variant-less candidate. Vue example. + ("{ underline: true }", vec!["underline", "true"]), + // With complex variants + ( + "[&>[data-slot=icon]:last-child]:right-2.5", + vec!["[&>[data-slot=icon]:last-child]:right-2.5"], + ), + // With multiple (complex) variants + ( + "[&>[data-slot=icon]:last-child]:sm:right-2.5", + vec!["[&>[data-slot=icon]:last-child]:sm:right-2.5"], + ), + ( + "sm:[&>[data-slot=icon]:last-child]:right-2.5", + vec!["sm:[&>[data-slot=icon]:last-child]:right-2.5"], + ), + // Exceptions regarding boundaries + // + // `flex!` is valid, but since it's followed by a non-boundary character it's invalid. + // `block` is therefore also invalid because it didn't start after a boundary. + ("flex!block", vec![]), + ] { + for (wrapper, additional) in [ + // No wrapper + ("{}", vec![]), + // With leading spaces + (" {}", vec![]), + (" {}", vec![]), + (" {}", vec![]), + // With trailing spaces + ("{} ", vec![]), + ("{} ", vec![]), + ("{} ", vec![]), + // Surrounded by spaces + (" {} ", vec![]), + // Inside a string + ("'{}'", vec![]), + // Inside a function call + ("fn('{}')", vec![]), + // Inside nested function calls + ("fn1(fn2('{}'))", vec![]), + // -------------------------- + // + // HTML + // Inside a class (on its own) + (r#"<div class="{}"></div>"#, vec![]), + // Inside a class (first) + (r#"<div class="{} foo"></div>"#, vec!["foo"]), + // Inside a class (second) + (r#"<div class="foo {}"></div>"#, vec!["foo"]), + // Inside a class (surrounded) + (r#"<div class="foo {} bar"></div>"#, vec!["foo", "bar"]), + // -------------------------- + // + // JavaScript + // Inside a variable + (r#"let classes = '{}';"#, vec!["let", "classes"]), + // Inside an object (key) + ( + r#"let classes = { '{}': true };"#, + vec!["let", "classes", "true"], + ), + // Inside an object (no spaces, key) + (r#"let classes = {'{}':true};"#, vec!["let", "classes"]), + // Inside an object (value) + ( + r#"let classes = { primary: '{}' };"#, + vec!["let", "classes", "primary"], + ), + // Inside an object (no spaces, value) + (r#"let classes = {primary:'{}'};"#, vec!["let", "classes"]), + // Inside an array + (r#"let classes = ['{}'];"#, vec!["let", "classes"]), + ] { + let input = &wrapper.replace("{}", input); + let mut expected = expected.clone(); + expected.extend(additional); + + assert_extract_sorted_candidates(input, expected); + } + } + } + + #[test] + fn test_ruby_syntax() { + for (input, expected) in [ + (r#"%w[flex]"#, vec!["flex"]), + (r#"%w[flex items-center]"#, vec!["flex", "items-center"]), + (r#"%w[[color:red]]"#, vec!["[color:red]"]), + // See: https://github.com/tailwindlabs/tailwindcss/issues/13778 + ( + r#"def call = tag.span "Foo", class: %w[rounded-full h-0.75 w-0.75]"#, + vec![ + "def", + "call", + "span", + "class", + "rounded-full", + "h-0.75", + "w-0.75", + ], + ), + ( + r#"def call = tag.span "Foo", class: %w[rounded-full w-0.75 h-0.75]"#, + vec![ + "def", + "call", + "span", + "class", + "rounded-full", + "h-0.75", + "w-0.75", + ], + ), + ( + r#"def call = tag.span "Foo", class: %w[w-0.75 h-0.75 rounded-full]"#, + vec![ + "def", + "call", + "span", + "class", + "rounded-full", + "h-0.75", + "w-0.75", + ], + ), + // Other similar syntaxes + (r#"%w[flex]"#, vec!["flex"]), + (r#"%w(flex)"#, vec!["flex"]), + ] { + assert_extract_sorted_candidates(&pre_process_input(input, "rb"), expected); + } + } + + // Pug syntax, see: https://github.com/tailwindlabs/tailwindcss/issues/14005 + #[test] + fn test_pug_syntax() { + for (input, expected) in [ + // Class literal + ( + ".bg-green-300.2xl:bg-red-500", + vec!["bg-green-300", "2xl:bg-red-500"], + ), + ( + ".2xl:bg-red-500.bg-green-300", + vec!["bg-green-300", "2xl:bg-red-500"], + ), + (".xl:col-span-2.xl:pr-8", vec!["xl:col-span-2", "xl:pr-8"]), + ( + "div.2xl:bg-red-500.bg-green-300", + vec!["div", "bg-green-300", "2xl:bg-red-500"], + ), + // Quoted attribute + ( + r#"input(type="checkbox" class="px-2.5")"#, + vec!["checkbox", "px-2.5"], + ), + ] { + assert_extract_sorted_candidates(&pre_process_input(input, "pug"), expected); + } + } + + // Slim syntax, see: https://github.com/tailwindlabs/tailwindcss/issues/16790 + #[test] + fn test_slim_syntax() { + for (input, expected) in [ + // Class literal + ( + ".bg-blue-100.2xl:bg-red-100", + vec!["bg-blue-100", "2xl:bg-red-100"], + ), + ( + ".2xl:bg-red-100.bg-blue-100", + vec!["bg-blue-100", "2xl:bg-red-100"], + ), + // Quoted attribute + (r#"div class="px-2.5""#, vec!["div", "px-2.5"]), + ] { + assert_extract_sorted_candidates(&pre_process_input(input, "slim"), expected); + } + } + + // C# syntax, see: https://github.com/tailwindlabs/tailwindcss/issues/16189#issue-2826350984 + #[test] + fn test_csharp_syntax() { + for (input, expected) in [ + // Not a valid arbitrary value or variant + // Extract inner candidates: `gap-y-4` + (r#"[CssClass("gap-y-4")]"#, vec!["gap-y-4"]), + (r#"[CssClass("hover:gap-y-4")]"#, vec!["hover:gap-y-4"]), + // Valid arbitrary variant + utility, extract full candidate without inner candidates + ( + r#"[CssClass("gap-y-4")]:flex"#, + vec![r#"[CssClass("gap-y-4")]:flex"#], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + // Clojure syntax, see: https://github.com/tailwindlabs/tailwindcss/issues/16189#issuecomment-2642438176 + #[test] + fn test_clojure_syntax() { + for (input, expected) in [ + (r#"[:div {:class ["p-2"]}"#, vec!["p-2"]), + ( + r#"[:div {:class ["p-2" "text-green"]}"#, + vec!["p-2", "text-green"], + ), + (r#"[:div {:class ["p-2""#, vec!["p-2"]), + (r#" "text-green"]}"#, vec!["text-green"]), + (r#"[:div.p-2]"#, vec!["p-2"]), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + // Gleam syntax, see: https://github.com/tailwindlabs/tailwindcss/issues/15632#issuecomment-2617431021 + #[test] + fn test_gleam_syntax() { + for (input, expected) in [ + (r#"html.div([attribute.class("py-10")], [])"#, vec!["py-10"]), + ( + r#"html.div([attribute.class("hover:py-10")], [])"#, + vec!["hover:py-10"], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + #[test] + fn test_overlapping_candidates() { + for (input, expected) in [ + // Not a valid arbitrary property + // Not a valid arbitrary variant + // Extract inner candidates + ( + r#"[CssClass("[&:hover]:flex",'italic')]"#, + vec!["[&:hover]:flex", "italic"], + ), + // Not a valid arbitrary property or variant, array syntax + // Extract inner candidates + ( + r#"["flex",["italic",["underline"]]]"#, + vec!["flex", "italic", "underline"], + ), + // Not a valid arbitrary variant (not followed by a candidate) + // Inner classes `is`, `italic` and `underline` are not valid in this context + (r#"[:is(italic):is(underline)]"#, vec![]), + // Valid arbitrary variant, nothing inside should be extracted + ( + r#"[:is(italic):is(underline)]:flex"#, + vec!["[:is(italic):is(underline)]:flex"], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + #[test] + fn test_js_syntax() { + for (input, expected) in [ + // String + ( + r#"let classes = 'flex items-center';"#, + vec!["let", "classes", "flex", "items-center"], + ), + // Array + ( + r#"let classes = ['flex', 'items-center'];"#, + vec!["let", "classes", "flex", "items-center"], + ), + // Minified array + ( + r#"let classes = ['flex','items-center'];"#, + vec!["let", "classes", "flex", "items-center"], + ), + // Function call + ( + r#"let classes = something('flex');"#, + vec!["let", "classes", "flex"], + ), + // Function call in array + ( + r#"let classes = [wrapper('flex')]"#, + vec!["let", "classes", "flex"], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + // See: https://github.com/tailwindlabs/tailwindcss/issues/16750 + #[test] + fn test_js_tuple_syntax() { + for (input, expected) in [ + // Split + ( + r#"["h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]",\n true],"#, + vec![ + "h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]", + "true", + ], + ), + // Same line + ( + r#"["h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]", true],"#, + vec![ + "h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]", + "true", + ], + ), + // Split with space in front + ( + r#"[ "h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]",\n true],"#, + vec![ + "h-[calc(100vh-(var(--spacing)*8)-(var(--spacing)*14))]", + "true", + ], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + // See: https://github.com/tailwindlabs/tailwindcss/issues/16801 + #[test] + fn test_angular_binding_syntax() { + for (input, expected) in [ + ( + r#"'[ngClass]': `{"variant": variant(), "no-variant": !variant() }`"#, + vec!["variant", "no-variant"], + ), + ( + r#"'[class]': '"bg-gradient-to-b px-6 py-3 rounded-3xl from-5%"',"#, + vec!["bg-gradient-to-b", "px-6", "py-3", "rounded-3xl", "from-5%"], + ), + ( + r#"'[class.from-secondary-light]': `variant() === 'secondary'`,"#, + vec!["from-secondary-light", "secondary"], + ), + ( + r#"'[class.to-secondary]': `variant() === 'secondary'`,"#, + vec!["to-secondary", "secondary"], + ), + ( + r#"'[class.from-5%]': `variant() === 'secondary'`,"#, + vec!["from-5%", "secondary"], + ), + ( + r#"'[class.from-1%]': `variant() === 'primary'`,"#, + vec!["from-1%", "primary"], + ), + ( + r#"'[class.from-light-blue]': `variant() === 'primary'`,"#, + vec!["from-light-blue", "primary"], + ), + ( + r#"'[class.to-primary]': `variant() === 'primary'`,"#, + vec!["to-primary", "primary"], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + #[test] + fn test_angular_binding_attribute_syntax() { + for (input, expected) in [ + // Simple class + ( + r#"<div [class.underline]="bool"></div>"#, + vec!["underline", "bool"], + ), + // With additional dots + ( + r#"<div [class.px-2.5]="bool"></div>"#, + vec!["px-2.5", "bool"], + ), + // With additional square brackets + ( + r#"<div [class.bg-[#0088cc]]="bool"></div>"#, + vec!["bg-[#0088cc]", "bool"], + ), + ] { + assert_extract_sorted_candidates(input, expected); + } + } + + #[test] + fn test_svelte_shorthand_syntax() { + assert_extract_sorted_candidates( + &pre_process_input(r#"<div class:px-4='condition'></div>"#, "svelte"), + vec!["class", "px-4", "condition"], + ); + } + + #[test] + fn test_extract_css_variables() { + for (input, expected) in [ + // Simple variable + ("--foo", vec!["--foo"]), + ("--my-variable", vec!["--my-variable"]), + // Multiple variables + ( + "calc(var(--first) + var(--second))", + vec!["--first", "--second"], + ), + // Escaped character in the middle, skips the next character + (r#"--spacing-1\/2"#, vec![r#"--spacing-1\/2"#]), + // Escaped whitespace is not allowed + (r#"--my-\ variable"#, vec![]), + ] { + for wrapper in [ + // No wrapper + "{}", + // With leading spaces + " {}", + // With trailing spaces + "{} ", + // Surrounded by spaces + " {} ", + // Inside a string + "'{}'", + // Inside a function call + "fn({})", + // Inside nested function calls + "fn1(fn2({}))", + // -------------------------- + // + // HTML + // Inside a class (on its own) + r#"<div class="{}"></div>"#, + // Inside a class (first) + r#"<div class="{} foo"></div>"#, + // Inside a class (second) + r#"<div class="foo {}"></div>"#, + // Inside a class (surrounded) + r#"<div class="foo {} bar"></div>"#, + // Inside an arbitrary property + r#"<div class="[{}:red]"></div>"#, + // -------------------------- + // + // JavaScript + // Inside a variable + r#"let classes = '{}';"#, + // Inside an object (key) + r#"let classes = { '{}': true };"#, + // Inside an object (no spaces, key) + r#"let classes = {'{}':true};"#, + // Inside an object (value) + r#"let classes = { primary: '{}' };"#, + // Inside an object (no spaces, value) + r#"let classes = {primary:'{}'};"#, + // Inside an array + r#"let classes = ['{}'];"#, + ] { + let input = wrapper.replace("{}", input); + + assert_extract_sorted_css_variables(&input, expected.clone()); + } + } + } +} diff --git a/crates/oxide/src/extractor/modifier_machine.rs b/crates/oxide/src/extractor/modifier_machine.rs new file mode 100644 index 000000000000..6a64f1424c3f --- /dev/null +++ b/crates/oxide/src/extractor/modifier_machine.rs @@ -0,0 +1,197 @@ +use crate::cursor; +use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; +use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; +use crate::extractor::machine::{Machine, MachineState}; + +/// Extract modifiers from an input including the `/`. +/// +/// E.g.: +/// +/// ```text +/// bg-red-500/20 +/// ^^^ +/// +/// bg-red-500/[20%] +/// ^^^^^^ +/// +/// bg-red-500/(--my-opacity) +/// ^^^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct ModifierMachine { + arbitrary_value_machine: ArbitraryValueMachine, + arbitrary_variable_machine: ArbitraryVariableMachine, +} + +impl Machine for ModifierMachine { + #[inline(always)] + fn reset(&mut self) {} + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + // A modifier must start with a `/`, everything else is not a valid start of a modifier + if CLASS_TABLE[cursor.curr as usize] != Class::Slash { + return MachineState::Idle; + } + + let start_pos = cursor.pos; + cursor.advance(); + + match CLASS_TABLE[cursor.curr as usize] { + // Start of an arbitrary value: + // + // ``` + // bg-red-500/[20%] + // ^^^^^ + // ``` + Class::OpenBracket => match self.arbitrary_value_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.done(start_pos, cursor), + }, + + // Start of an arbitrary variable: + // + // ``` + // bg-red-500/(--my-opacity) + // ^^^^^^^^^^^^^^ + // ``` + Class::OpenParen => match self.arbitrary_variable_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.done(start_pos, cursor), + }, + + // Start of a named modifier: + // + // ``` + // bg-red-500/20 + // ^^ + // ``` + Class::ValidStart => { + let len = cursor.input.len(); + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::ValidStart | Class::ValidInside => { + match CLASS_TABLE[cursor.next as usize] { + // Only valid characters are allowed, if followed by another valid character + Class::ValidStart | Class::ValidInside => cursor.advance(), + + // Valid character, but at the end of the modifier, this ends the + // modifier + _ => return self.done(start_pos, cursor), + } + } + + // Anything else is invalid, end of the modifier + _ => return self.restart(), + } + } + + MachineState::Idle + } + + // Anything else is not a valid start of a modifier + _ => MachineState::Idle, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Class { + /// `'a'..='z' | 'A'..='Z' | '0'..='9'` + ValidStart, + + /// `-`, `_`, `.` + ValidInside, + + /// `[` + OpenBracket, + + /// `(` + OpenParen, + + /// `/` + Slash, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set_range!(Class::ValidStart, b'a'..=b'z'); + set_range!(Class::ValidStart, b'A'..=b'Z'); + set_range!(Class::ValidStart, b'0'..=b'9'); + + set!(Class::OpenBracket, b'['); + set!(Class::OpenParen, b'('); + + set!(Class::Slash, b'/'); + + set!(Class::ValidInside, b'-', b'_', b'.'); + + table +}; + +#[cfg(test)] +mod tests { + use super::ModifierMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_modifier_machine_performance() { + let input = r#"<button class="group-hover/name:flex bg-red-500/20 text-black/[20%] border-white/(--my-opacity)">"#; + + ModifierMachine::test_throughput(1_000_000, input); + ModifierMachine::test_duration_once(input); + + todo!() + } + + #[test] + fn test_modifier_extraction() { + for (input, expected) in [ + // Simple modifier + ("foo/bar", vec!["/bar"]), + ("foo/bar-baz", vec!["/bar-baz"]), + // Simple modifier with numbers + ("foo/20", vec!["/20"]), + // Simple modifier with numbers + ("foo/20", vec!["/20"]), + // Arbitrary value + ("foo/[20]", vec!["/[20]"]), + // Arbitrary value with CSS variable shorthand + ("foo/(--x)", vec!["/(--x)"]), + ("foo/(--foo-bar)", vec!["/(--foo-bar)"]), + // -------------------------------------------------------- + + // Empty arbitrary value is not allowed + ("foo/[]", vec![]), + // Empty arbitrary value shorthand is not allowed + ("foo/()", vec![]), + // A CSS variable must start with `--` and must have at least a single character + ("foo/(-)", vec![]), + ("foo/(--)", vec![]), + // Arbitrary value shorthand should be a valid CSS variable + ("foo/(--my#color)", vec![]), + ] { + assert_eq!(ModifierMachine::test_extract_all(input), expected); + } + } +} diff --git a/crates/oxide/src/extractor/named_utility_machine.rs b/crates/oxide/src/extractor/named_utility_machine.rs new file mode 100644 index 000000000000..b76186d2fe39 --- /dev/null +++ b/crates/oxide/src/extractor/named_utility_machine.rs @@ -0,0 +1,545 @@ +use crate::cursor; +use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; +use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; +use crate::extractor::machine::{Machine, MachineState}; + +/// Extracts named utilities from an input. +/// +/// E.g.: +/// +/// ```text +/// flex +/// ^^^^ +/// +/// bg-red-500 +/// ^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct NamedUtilityMachine { + /// Start position of the utility + start_pos: usize, + + /// Current state of the machine + state: State, + + arbitrary_variable_machine: ArbitraryVariableMachine, + arbitrary_value_machine: ArbitraryValueMachine, +} + +#[derive(Debug, Default)] +enum State { + #[default] + Idle, + + /// Parsing a utility + Parsing, +} + +impl Machine for NamedUtilityMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.state = State::Idle; + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let len = cursor.input.len(); + + match self.state { + State::Idle => match CLASS_TABLE[cursor.curr as usize] { + Class::AlphaLower => match CLASS_TABLE[cursor.next as usize] { + // Valid single character utility in between quotes + // + // E.g.: `<div class="a"></div>` + // ^ + // E.g.: `<div class="a "></div>` + // ^ + // E.g.: `<div class=" a"></div>` + // ^ + Class::Whitespace | Class::Quote | Class::End => self.done(cursor.pos, cursor), + + // Valid start characters + // + // E.g.: `flex` + // ^ + _ => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + }, + + // Valid start characters + // + // E.g.: `@container` + // ^ + Class::At => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + + // Valid start of a negative utility, if followed by another set of valid + // characters. `@` as a second character is invalid. + // + // E.g.: `-mx-2.5` + // ^^ + Class::Dash => match CLASS_TABLE[cursor.next as usize] { + Class::AlphaLower => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + + // A dash should not be followed by anything else + _ => MachineState::Idle, + }, + + // Everything else, is not a valid start of the utility. + _ => MachineState::Idle, + }, + + State::Parsing => { + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + // Followed by a boundary character, we are at the end of the utility. + // + // E.g.: `'flex'` + // ^ + // E.g.: `<div class="flex items-center">` + // ^ + // E.g.: `[flex]` (Angular syntax) + // ^ + // E.g.: `[class.flex.items-center]` (Angular syntax) + // ^ + // E.g.: `:div="{ flex: true }"` (JavaScript object syntax) + // ^ + Class::AlphaLower | Class::AlphaUpper => { + match CLASS_TABLE[cursor.next as usize] { + Class::Quote + | Class::Whitespace + | Class::CloseBracket + | Class::Dot + | Class::Colon + | Class::End + | Class::Slash + | Class::Exclamation => return self.done(self.start_pos, cursor), + + // Still valid characters + _ => cursor.advance(), + } + } + + Class::Dash => match CLASS_TABLE[cursor.next as usize] { + // Start of an arbitrary value + // + // E.g.: `bg-[#0088cc]` + // ^^ + Class::OpenBracket => { + cursor.advance(); + return match self.arbitrary_value_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.done(self.start_pos, cursor), + }; + } + + // Start of an arbitrary variable + // + // E.g.: `bg-(--my-color)` + // ^^ + Class::OpenParen => { + cursor.advance(); + return match self.arbitrary_variable_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.done(self.start_pos, cursor), + }; + } + + // A dash is a valid character if it is followed by another valid + // character. + // + // E.g.: `flex-` + // ^ Invalid + // E.g.: `flex-!` + // ^ Invalid + // E.g.: `flex-/` + // ^ Invalid + // E.g.: `flex-2` + // ^ Valid + Class::AlphaLower | Class::AlphaUpper | Class::Number => { + cursor.advance(); + } + + // Everything else is invalid + _ => return self.restart(), + }, + + Class::Underscore => match CLASS_TABLE[cursor.next as usize] { + // Valid characters _if_ followed by another valid character. These characters are + // only valid inside of the utility but not at the end of the utility. + // + // E.g.: `custom_` + // ^ Invalid + // E.g.: `custom_!` + // ^ Invalid + // E.g.: `custom_/` + // ^ Invalid + // E.g.: `custom_2` + // ^ Valid + // + Class::AlphaLower + | Class::AlphaUpper + | Class::Number + | Class::Underscore => { + cursor.advance(); + } + + // Followed by a boundary character, we are at the end of the utility. + // + // E.g.: `'flex'` + // ^ + // E.g.: `<div class="flex items-center">` + // ^ + // E.g.: `[flex]` (Angular syntax) + // ^ + // E.g.: `[class.flex.items-center]` (Angular syntax) + // ^ + // E.g.: `:div="{ flex: true }"` (JavaScript object syntax) + // ^ + Class::Quote + | Class::Whitespace + | Class::CloseBracket + | Class::Dot + | Class::Colon + | Class::End + | Class::Slash + | Class::Exclamation => return self.done(self.start_pos, cursor), + + // Everything else is invalid + _ => return self.restart(), + }, + + // A dot must be surrounded by numbers + // + // E.g.: `px-2.5` + // ^^^ + Class::Dot => { + if !matches!(CLASS_TABLE[cursor.prev as usize], Class::Number) { + return self.restart(); + } + + if !matches!(CLASS_TABLE[cursor.next as usize], Class::Number) { + return self.restart(); + } + + cursor.advance(); + } + + // A number must be preceded by a `-`, `.` or another alphanumeric + // character, and can be followed by a `.` or an alphanumeric character. + // + // E.g.: `text-2xs` + // ^^ + // `p-2.5` + // ^^ + // `bg-red-500` + // ^^ + // It can also be followed by a %, but that will be considered the end of + // the candidate. + // + // E.g.: `from-15%` + // ^ + // + Class::Number => { + if !matches!( + CLASS_TABLE[cursor.prev as usize], + Class::Dash | Class::Dot | Class::Number | Class::AlphaLower + ) { + return self.restart(); + } + + if !matches!( + CLASS_TABLE[cursor.next as usize], + Class::Dot + | Class::Number + | Class::AlphaLower + | Class::AlphaUpper + | Class::Percent + ) { + return self.done(self.start_pos, cursor); + } + + cursor.advance(); + } + + // A percent sign must be preceded by a number. + // + // E.g.: + // + // ``` + // from-15% + // ^^ + // ``` + Class::Percent => { + if !matches!(CLASS_TABLE[cursor.prev as usize], Class::Number) { + return self.restart(); + } + + return self.done(self.start_pos, cursor); + } + + // Everything else is invalid + _ => return self.restart(), + }; + } + + self.restart() + } + } + } +} + +#[derive(Clone, Copy)] +enum Class { + /// `'a'..='z'` + AlphaLower, + + /// `'A'..='Z'` + AlphaUpper, + + /// `@` + At, + + // `:` + Colon, + + /// `-` + Dash, + + /// `:` + Dot, + + /// `0x00` + End, + + /// `!` + Exclamation, + + /// `'0'..='9'` + Number, + + /// `[` + OpenBracket, + + /// `]` + CloseBracket, + + /// `(` + OpenParen, + + /// `%` + Percent, + + /// ', ", or ` + Quote, + + /// `/` + Slash, + + /// _ + Underscore, + + /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + Whitespace, + + /// Anything else + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set!(Class::At, b'@'); + set!(Class::Underscore, b'_'); + set!(Class::Dash, b'-'); + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + set!(Class::OpenBracket, b'['); + set!(Class::CloseBracket, b']'); + + set!(Class::OpenParen, b'('); + + set!(Class::Dot, b'.'); + set!(Class::Colon, b':'); + + set!(Class::Percent, b'%'); + + set!(Class::Quote, b'"', b'\'', b'`'); + + set!(Class::Exclamation, b'!'); + set!(Class::Slash, b'/'); + + set_range!(Class::AlphaLower, b'a'..=b'z'); + set_range!(Class::AlphaUpper, b'A'..=b'Z'); + set_range!(Class::Number, b'0'..=b'9'); + + set!(Class::End, 0x00); + + table +}; + +#[cfg(test)] +mod tests { + use super::NamedUtilityMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_named_utility_machine_performance() { + let input = r#"<button class="flex items-center px-2.5 -inset-x-2 bg-[#0088cc] text-(--my-color)">"#; + + NamedUtilityMachine::test_throughput(1_000_000, input); + NamedUtilityMachine::test_duration_once(input); + + todo!() + } + + #[test] + fn test_named_utility_extraction() { + for (input, expected) in [ + // Simple utility + ("flex", vec!["flex"]), + // Simple utility with special character(s) + ("@container", vec!["@container"]), + // Simple single-character utility + ("a", vec!["a"]), + // With dashes + ("items-center", vec!["items-center"]), + // With numbers + ("px-5", vec!["px-5"]), + ("px-2.5", vec!["px-2.5"]), + // With trailing % sign + ("from-15%", vec!["from-15%"]), + // Arbitrary value with bracket notation + ("bg-[#0088cc]", vec!["bg-[#0088cc]"]), + // Arbitrary variable + ("bg-(--my-color)", vec!["bg-(--my-color)"]), + // Arbitrary variable with fallback + ("bg-(--my-color,red,blue)", vec!["bg-(--my-color,red,blue)"]), + // -------------------------------------------------------- + + // Exceptions: + // Arbitrary variable must be valid + (r"bg-(--my-color\)", vec![]), + (r"bg-(--my#color)", vec![]), + // Single letter utility with uppercase letter is invalid + ("A", vec![]), + // A dot must be in-between numbers + ("opacity-0.5", vec!["opacity-0.5"]), + ("opacity-.5", vec![]), + ("opacity-5.", vec![]), + // A number must be preceded by a `-`, `.` or another number + ("text-2xs", vec!["text-2xs"]), + // Random invalid utilities + ("-$", vec![]), + ("-_", vec![]), + ("-foo-", vec![]), + ("foo-=", vec![]), + ("foo-#", vec![]), + ("foo-!", vec![]), + ("foo-/20", vec![]), + ("-", vec![]), + ("--", vec![]), + ("---", vec![]), + ] { + for (wrapper, additional) in [ + // No wrapper + ("{}", vec![]), + // With leading spaces + (" {}", vec![]), + // With trailing spaces + ("{} ", vec![]), + // Surrounded by spaces + (" {} ", vec![]), + // Inside a string + ("'{}'", vec![]), + // Inside a function call + ("fn('{}')", vec![]), + // Inside nested function calls + ("fn1(fn2('{}'))", vec!["fn1", "fn2"]), + // -------------------------- + // + // HTML + // Inside a class (on its own) + (r#"<div class="{}"></div>"#, vec!["div"]), + // Inside a class (first) + (r#"<div class="{} foo"></div>"#, vec!["div", "foo"]), + // Inside a class (second) + (r#"<div class="foo {}"></div>"#, vec!["div", "foo"]), + // Inside a class (surrounded) + ( + r#"<div class="foo {} bar"></div>"#, + vec!["div", "foo", "bar"], + ), + // -------------------------- + // + // JavaScript + // Inside a variable + (r#"let classes = '{}';"#, vec!["let", "classes"]), + // Inside an object (key) + ( + r#"let classes = { '{}': true };"#, + vec!["let", "classes", "true"], + ), + // Inside an object (no spaces, key) + (r#"let classes = {'{}':true};"#, vec!["let", "classes"]), + // Inside an object (value) + ( + r#"let classes = { primary: '{}' };"#, + vec!["let", "classes", "primary"], + ), + // Inside an object (no spaces, value) + ( + r#"let classes = {primary:'{}'};"#, + vec!["let", "classes", "primary"], + ), + // Inside an array + (r#"let classes = ['{}'];"#, vec!["let", "classes"]), + ] { + let input = wrapper.replace("{}", input); + + let mut expected = expected.clone(); + expected.extend(additional); + expected.sort(); + + let mut actual = NamedUtilityMachine::test_extract_all(&input); + actual.sort(); + + if actual != expected { + dbg!(&input, &expected, &actual); + } + assert_eq!(actual, expected); + } + } + } +} diff --git a/crates/oxide/src/extractor/named_variant_machine.rs b/crates/oxide/src/extractor/named_variant_machine.rs new file mode 100644 index 000000000000..7baa5e151d0a --- /dev/null +++ b/crates/oxide/src/extractor/named_variant_machine.rs @@ -0,0 +1,424 @@ +use crate::cursor; +use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; +use crate::extractor::arbitrary_variable_machine::ArbitraryVariableMachine; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::modifier_machine::ModifierMachine; + +/// Extract named variants from an input including the `:`. +/// +/// E.g.: +/// +/// ```text +/// hover:flex +/// ^^^^^^ +/// +/// data-[state=pending]:flex +/// ^^^^^^^^^^^^^^^^^^^^^ +/// +/// supports-(--my-variable):flex +/// ^^^^^^^^^^^^^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct NamedVariantMachine { + /// Start position of the variant + start_pos: usize, + + /// Current state of the machine + state: State, + + arbitrary_variable_machine: ArbitraryVariableMachine, + arbitrary_value_machine: ArbitraryValueMachine, + modifier_machine: ModifierMachine, +} + +#[derive(Debug, Default)] +enum State { + #[default] + Idle, + + /// Parsing a variant + Parsing, + + /// Parsing a modifier + /// + /// E.g.: + /// + /// ```text + /// group-hover/name: + /// ^^^^^ + /// ``` + /// + ParsingModifier, + + /// Parsing the end of a variant + /// + /// E.g.: + /// + /// ```text + /// hover: + /// ^ + /// ``` + ParseEnd, +} + +impl Machine for NamedVariantMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.state = State::Idle; + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + let len = cursor.input.len(); + + match self.state { + State::Idle => match CLASS_TABLE[cursor.curr as usize] { + Class::AlphaLower | Class::Star => match CLASS_TABLE[cursor.next as usize] { + // Valid single character variant, must be followed by a `:` + // + // E.g.: `<div class="x:flex"></div>` + // ^^ + // E.g.: `*:` + // ^^ + Class::Colon => { + self.state = State::ParseEnd; + cursor.advance(); + self.next(cursor) + } + + // Valid start characters + // + // E.g.: `hover:` + // ^ + // E.g.: `**:` + // ^ + _ => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + }, + + // Valid start characters + // + // E.g.: `2xl:` + // ^ + // E.g.: `@md:` + // ^ + Class::Number | Class::At => { + self.start_pos = cursor.pos; + self.state = State::Parsing; + cursor.advance(); + self.next(cursor) + } + + // Everything else, is not a valid start of the variant. + _ => MachineState::Idle, + }, + + State::Parsing => { + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Dash => match CLASS_TABLE[cursor.next as usize] { + // Start of an arbitrary value + // + // E.g.: `data-[state=pending]:`. + // ^^ + Class::OpenBracket => { + cursor.advance(); + + return match self.arbitrary_value_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.parse_arbitrary_end(cursor), + }; + } + + // Start of an arbitrary variable + // + // E.g.: `supports-(--my-color):`. + // ^^ + Class::OpenParen => { + cursor.advance(); + return match self.arbitrary_variable_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.parse_arbitrary_end(cursor), + }; + } + + // Valid characters _if_ followed by another valid character. These characters are + // only valid inside of the variant but not at the end of the variant. + // + // E.g.: `hover-` + // ^ Invalid + // E.g.: `hover-!` + // ^ Invalid + // E.g.: `hover-/` + // ^ Invalid + // E.g.: `flex-1` + // ^ Valid + Class::Dash + | Class::Underscore + | Class::AlphaLower + | Class::AlphaUpper + | Class::Number => cursor.advance(), + + // Everything else is invalid + _ => return self.restart(), + }, + + Class::Underscore => match CLASS_TABLE[cursor.next as usize] { + // Valid characters _if_ followed by another valid character. These characters are + // only valid inside of the variant but not at the end of the variant. + // + // E.g.: `hover_` + // ^ Invalid + // E.g.: `hover_!` + // ^ Invalid + // E.g.: `hover_/` + // ^ Invalid + // E.g.: `custom_1` + // ^ Valid + Class::Dash + | Class::Underscore + | Class::AlphaLower + | Class::AlphaUpper + | Class::Number => cursor.advance(), + + // Everything else is invalid + _ => return self.restart(), + }, + + // Still valid characters + Class::AlphaLower | Class::AlphaUpper | Class::Number | Class::Star => { + cursor.advance(); + } + + // A `/` means we are at the end of the variant, but there might be a modifier + // + // E.g.: + // + // ``` + // group-hover/name: + // ^ + // ``` + Class::Slash => { + self.state = State::ParsingModifier; + return self.next(cursor); + } + + // A `:` means we are at the end of the variant + // + // E.g.: `hover:` + // ^ + Class::Colon => return self.done(self.start_pos, cursor), + + // Everything else is invalid + _ => return self.restart(), + }; + } + + self.restart() + } + + State::ParsingModifier => match self.modifier_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // Modifier must be followed by a `:` + // + // E.g.: `group-hover/name:` + // ^ + Class::Colon => { + self.state = State::ParseEnd; + cursor.advance(); + self.next(cursor) + } + + // Everything else is invalid + _ => self.restart(), + }, + }, + + State::ParseEnd => match CLASS_TABLE[cursor.curr as usize] { + // The end of a variant must be the `:` + // + // E.g.: `hover:` + // ^ + Class::Colon => self.done(self.start_pos, cursor), + + // Everything else is invalid + _ => self.restart(), + }, + } + } +} + +impl NamedVariantMachine { + #[inline(always)] + fn parse_arbitrary_end(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match CLASS_TABLE[cursor.next as usize] { + Class::Slash => { + self.state = State::ParsingModifier; + cursor.advance(); + self.next(cursor) + } + Class::Colon => { + self.state = State::ParseEnd; + cursor.advance(); + self.next(cursor) + } + _ => self.restart(), + } + } +} + +#[derive(Clone, Copy)] +enum Class { + /// `'a'..='z'` + AlphaLower, + + /// `'A'..='Z'` + AlphaUpper, + + /// `@` + At, + + // `:` + Colon, + + /// `-` + Dash, + + /// `:` + Dot, + + /// `0x00` + End, + + /// `'0'..='9'` + Number, + + /// `[` + OpenBracket, + + /// `]` + CloseBracket, + + /// `(` + OpenParen, + + /// ', ", or ` + Quote, + + /// `*` + Star, + + /// `/` + Slash, + + /// _ + Underscore, + + /// Whitespace characters: ' ', '\t', '\n', '\r', '\x0C' + Whitespace, + + /// Anything else + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + macro_rules! set_range { + ($class:expr, $start:literal ..= $end:literal) => { + let mut i = $start; + while i <= $end { + table[i as usize] = $class; + i += 1; + } + }; + } + + set!(Class::At, b'@'); + set!(Class::Underscore, b'_'); + set!(Class::Dash, b'-'); + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + set!(Class::OpenBracket, b'['); + set!(Class::CloseBracket, b']'); + + set!(Class::OpenParen, b'('); + + set!(Class::Dot, b'.'); + set!(Class::Colon, b':'); + + set!(Class::Quote, b'"', b'\'', b'`'); + + set!(Class::Star, b'*'); + set!(Class::Slash, b'/'); + + set_range!(Class::AlphaLower, b'a'..=b'z'); + set_range!(Class::AlphaUpper, b'A'..=b'Z'); + set_range!(Class::Number, b'0'..=b'9'); + + set!(Class::End, 0x00); + + table +}; + +#[cfg(test)] +mod tests { + use super::NamedVariantMachine; + use crate::extractor::{machine::Machine, variant_machine::VariantMachine}; + + #[test] + #[ignore] + fn test_named_variant_machine_performance() { + let input = r#"<button class="hover:focus:flex data-[state=pending]:flex supports-(--my-variable):flex group-hover/named:not-has-peer-data-disabled:flex">"#; + + VariantMachine::test_throughput(1_000_000, input); + VariantMachine::test_duration_once(input); + + todo!() + } + + #[test] + fn test_named_variant_extraction() { + for (input, expected) in [ + // Simple variant + ("hover:", vec!["hover:"]), + // Simple single-character variant + ("a:", vec!["a:"]), + ("a/foo:", vec!["a/foo:"]), + // + ("group-hover:flex", vec!["group-hover:"]), + ("group-hover/name:flex", vec!["group-hover/name:"]), + ( + "group-[data-state=pending]/name:flex", + vec!["group-[data-state=pending]/name:"], + ), + ("supports-(--foo)/name:flex", vec!["supports-(--foo)/name:"]), + // -------------------------------------------------------- + + // Exceptions: + // Arbitrary variable must be valid + (r"supports-(--my-color\):", vec![]), + (r"supports-(--my#color)", vec![]), + // Single letter variant with uppercase letter is invalid + ("A:", vec![]), + ] { + let actual = NamedVariantMachine::test_extract_all(input); + if actual != expected { + dbg!(&input, &actual, &expected); + } + assert_eq!(actual, expected); + } + } +} diff --git a/crates/oxide/src/extractor/pre_processors/mod.rs b/crates/oxide/src/extractor/pre_processors/mod.rs new file mode 100644 index 000000000000..b105508a15d6 --- /dev/null +++ b/crates/oxide/src/extractor/pre_processors/mod.rs @@ -0,0 +1,9 @@ +pub mod pre_processor; +pub mod pug; +pub mod ruby; +pub mod svelte; + +pub use pre_processor::*; +pub use pug::*; +pub use ruby::*; +pub use svelte::*; diff --git a/crates/oxide/src/extractor/pre_processors/pre_processor.rs b/crates/oxide/src/extractor/pre_processors/pre_processor.rs new file mode 100644 index 000000000000..4bbbbe6d186b --- /dev/null +++ b/crates/oxide/src/extractor/pre_processors/pre_processor.rs @@ -0,0 +1,28 @@ +pub trait PreProcessor: Sized + Default { + fn process(&self, content: &[u8]) -> Vec<u8>; + + #[cfg(test)] + fn test(input: &str, expected: &str) { + let input = input.as_bytes(); + let expected = expected.as_bytes(); + + let processor = Self::default(); + + let actual = processor.process(input); + + // Convert to strings for better error messages. + let input = String::from_utf8_lossy(input); + let actual = String::from_utf8_lossy(&actual); + let expected = String::from_utf8_lossy(expected); + + if actual != expected { + dbg!((&input, &actual, &expected)); + } + + // The input and output should have the exact same length. + assert_eq!(input.len(), actual.len()); + assert_eq!(actual.len(), expected.len()); + + assert_eq!(actual, expected); + } +} diff --git a/crates/oxide/src/extractor/pre_processors/pug.rs b/crates/oxide/src/extractor/pre_processors/pug.rs new file mode 100644 index 000000000000..c99091f4e976 --- /dev/null +++ b/crates/oxide/src/extractor/pre_processors/pug.rs @@ -0,0 +1,56 @@ +use crate::cursor; +use crate::extractor::machine::Machine; +use crate::extractor::pre_processors::pre_processor::PreProcessor; +use crate::StringMachine; + +#[derive(Debug, Default)] +pub struct Pug; + +impl PreProcessor for Pug { + fn process(&self, content: &[u8]) -> Vec<u8> { + let len = content.len(); + let mut result = content.to_vec(); + let mut cursor = cursor::Cursor::new(content); + let mut string_machine = StringMachine; + + while cursor.pos < len { + match cursor.curr { + // Consume strings as-is + b'\'' | b'"' => { + string_machine.next(&mut cursor); + } + + // Replace dots with spaces + b'.' => { + result[cursor.pos] = b' '; + } + + // Consume everything else + _ => {} + }; + + cursor.advance(); + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::Pug; + use crate::extractor::pre_processors::pre_processor::PreProcessor; + + #[test] + fn test_pug_pre_processor() { + for (input, expected) in [ + // Convert dots to spaces + ("div.flex.bg-red-500", "div flex bg-red-500"), + (".flex.bg-red-500", " flex bg-red-500"), + // Keep dots in strings + (r#"div(class="px-2.5")"#, r#"div(class="px-2.5")"#), + ] { + Pug::test(input, expected); + } + } +} diff --git a/crates/oxide/src/extractor/pre_processors/ruby.rs b/crates/oxide/src/extractor/pre_processors/ruby.rs new file mode 100644 index 000000000000..09e775b2b9cc --- /dev/null +++ b/crates/oxide/src/extractor/pre_processors/ruby.rs @@ -0,0 +1,117 @@ +// See: - https://docs.ruby-lang.org/en/3.4/syntax/literals_rdoc.html#label-Percent+Literals +// - https://docs.ruby-lang.org/en/3.4/syntax/literals_rdoc.html#label-25w+and+-25W-3A+String-Array+Literals +use crate::cursor; +use crate::extractor::bracket_stack; +use crate::extractor::pre_processors::pre_processor::PreProcessor; + +#[derive(Debug, Default)] +pub struct Ruby; + +impl PreProcessor for Ruby { + fn process(&self, content: &[u8]) -> Vec<u8> { + let len = content.len(); + let mut result = content.to_vec(); + let mut cursor = cursor::Cursor::new(content); + let mut bracket_stack = bracket_stack::BracketStack::default(); + + while cursor.pos < len { + // Looking for `%w` or `%W` + if cursor.curr != b'%' && !matches!(cursor.next, b'w' | b'W') { + cursor.advance(); + continue; + } + + cursor.advance_twice(); + + // Boundary character + let boundary = match cursor.curr { + b'[' => b']', + b'(' => b')', + _ => { + cursor.advance(); + continue; + } + }; + + bracket_stack.reset(); + + // Replace the current character with a space + result[cursor.pos] = b' '; + + // Skip the boundary character + cursor.advance(); + + while cursor.pos < len { + match cursor.curr { + // Skip escaped characters + b'\\' => { + // Use backslash to embed spaces in the strings. + if cursor.next == b' ' { + result[cursor.pos] = b' '; + } + + cursor.advance(); + } + + // Start of a nested bracket + b'[' | b'(' => { + bracket_stack.push(cursor.curr); + } + + // End of a nested bracket + b']' | b')' if !bracket_stack.is_empty() => { + if !bracket_stack.pop(cursor.curr) { + // Unbalanced + cursor.advance(); + } + } + + // End of the pattern, replace the boundary character with a space + _ if cursor.curr == boundary => { + result[cursor.pos] = b' '; + break; + } + + // Everything else is valid + _ => {} + } + + cursor.advance(); + } + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::Ruby; + use crate::extractor::pre_processors::pre_processor::PreProcessor; + + #[test] + fn test_ruby_pre_processor() { + for (input, expected) in [ + // %w[โฆ] + ("%w[flex px-2.5]", "%w flex px-2.5 "), + ( + "%w[flex data-[state=pending]:bg-[#0088cc] flex-col]", + "%w flex data-[state=pending]:bg-[#0088cc] flex-col ", + ), + // %w(โฆ) + ("%w(flex px-2.5)", "%w flex px-2.5 "), + ( + "%w(flex data-[state=pending]:bg-(--my-color) flex-col)", + "%w flex data-[state=pending]:bg-(--my-color) flex-col ", + ), + // Use backslash to embed spaces in the strings. + (r#"%w[foo\ bar baz\ bat]"#, r#"%w foo bar baz bat "#), + (r#"%W[foo\ bar baz\ bat]"#, r#"%W foo bar baz bat "#), + // The nested delimiters evaluated to a flat array of strings + // (not nested array). + (r#"%w[foo[bar baz]qux]"#, r#"%w foo[bar baz]qux "#), + ] { + Ruby::test(input, expected); + } + } +} diff --git a/crates/oxide/src/extractor/pre_processors/svelte.rs b/crates/oxide/src/extractor/pre_processors/svelte.rs new file mode 100644 index 000000000000..5834f7feb290 --- /dev/null +++ b/crates/oxide/src/extractor/pre_processors/svelte.rs @@ -0,0 +1,43 @@ +use crate::extractor::pre_processors::pre_processor::PreProcessor; +use bstr::ByteSlice; + +#[derive(Debug, Default)] +pub struct Svelte; + +impl PreProcessor for Svelte { + fn process(&self, content: &[u8]) -> Vec<u8> { + content + .replace(" class:", " class ") + .replace("\tclass:", " class ") + .replace("\nclass:", " class ") + } +} + +#[cfg(test)] +mod tests { + use super::Svelte; + use crate::extractor::pre_processors::pre_processor::PreProcessor; + + #[test] + fn test_svelte_pre_processor() { + for (input, expected) in [ + // Spaces + ( + "<div class:flex class:px-2.5={condition()}>", + "<div class flex class px-2.5={condition()}>", + ), + // Tabs + ( + "<div\tclass:flex class:px-2.5={condition()}>", + "<div class flex class px-2.5={condition()}>", + ), + // Newlines + ( + "<div\nclass:flex class:px-2.5={condition()}>", + "<div class flex class px-2.5={condition()}>", + ), + ] { + Svelte::test(input, expected); + } + } +} diff --git a/crates/oxide/src/extractor/string_machine.rs b/crates/oxide/src/extractor/string_machine.rs new file mode 100644 index 000000000000..2832ccfe2ac3 --- /dev/null +++ b/crates/oxide/src/extractor/string_machine.rs @@ -0,0 +1,151 @@ +use crate::cursor; +use crate::extractor::machine::{Machine, MachineState}; + +/// Extracts a string (including the quotes) from the input. +/// +/// Rules: +/// +/// - The string must start and end with the same quote character. +/// - The string cannot contain any whitespace characters. +/// - The string can contain any other character except for the quote character (unless it's escaped). +/// - Balancing of brackets is not required. +/// +/// +/// E.g.: +/// +/// ```text +/// 'hello_world' +/// ^^^^^^^^^^^^^ +/// +/// content-['hello_world'] +/// ^^^^^^^^^^^^^ +/// ``` +#[derive(Debug, Default)] +pub struct StringMachine; + +impl Machine for StringMachine { + #[inline(always)] + fn reset(&mut self) {} + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + if CLASS_TABLE[cursor.curr as usize] != Class::Quote { + return MachineState::Idle; + } + + // Start of a string + let len = cursor.input.len(); + let start_pos = cursor.pos; + let end_char = cursor.curr; + + cursor.advance(); + + while cursor.pos < len { + match CLASS_TABLE[cursor.curr as usize] { + Class::Escape => match CLASS_TABLE[cursor.next as usize] { + // An escaped whitespace character is not allowed + Class::Whitespace => return MachineState::Idle, + + // An escaped character, skip ahead to the next character + _ => cursor.advance(), + }, + + // End of the string + Class::Quote if cursor.curr == end_char => return self.done(start_pos, cursor), + + // Any kind of whitespace is not allowed + Class::Whitespace => return MachineState::Idle, + + // Everything else is valid + _ => {} + }; + + cursor.advance() + } + + MachineState::Idle + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +enum Class { + /// ', ", or ` + Quote, + + /// \ + Escape, + + /// Whitespace characters + Whitespace, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + set!(Class::Quote, b'"', b'\'', b'`'); + set!(Class::Escape, b'\\'); + set!(Class::Whitespace, b' ', b'\t', b'\n', b'\r', b'\x0C'); + + table +}; + +#[cfg(test)] +mod tests { + use super::StringMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_string_machine_performance() { + let input = r#"There will be a 'string' in this input, even "strings_with_other_quotes_and_\#escaped_characters" "#.repeat(100); + + StringMachine::test_throughput(100_000, &input); + StringMachine::test_duration_once(&input); + + todo!() + } + + #[test] + fn test_string_machine_extraction() { + for (input, expected) in [ + // Simple string + ("'foo'", vec!["'foo'"]), + // String as part of a candidate + ("content-['hello_world']", vec!["'hello_world'"]), + // With nested quotes + (r#"'"`hello`"'"#, vec![r#"'"`hello`"'"#]), + // With escaped opening quote + (r#"'Tailwind\'s_parser'"#, vec![r#"'Tailwind\'s_parser'"#]), + ( + r#"'Tailwind\'\'s_parser'"#, + vec![r#"'Tailwind\'\'s_parser'"#], + ), + ( + r#"'Tailwind\'\'\'s_parser'"#, + vec![r#"'Tailwind\'\'\'s_parser'"#], + ), + ( + r#"'Tailwind\'\'\'\'s_parser'"#, + vec![r#"'Tailwind\'\'\'\'s_parser'"#], + ), + // Spaces are not allowed + ("' hello world '", vec![]), + // With unfinished quote + ("'unfinished_quote", vec![]), + // An escape at the end will never be valid, because it _must_ be followed by the + // ending quote. + (r#"'escaped_ending_quote\'"#, vec![]), + (r#"'escaped_end\"#, vec![]), + ] { + assert_eq!(StringMachine::test_extract_all(input), expected); + } + } +} diff --git a/crates/oxide/src/extractor/utility_machine.rs b/crates/oxide/src/extractor/utility_machine.rs new file mode 100644 index 000000000000..c6ca253e5164 --- /dev/null +++ b/crates/oxide/src/extractor/utility_machine.rs @@ -0,0 +1,360 @@ +use crate::cursor; +use crate::extractor::arbitrary_property_machine::ArbitraryPropertyMachine; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::modifier_machine::ModifierMachine; +use crate::extractor::named_utility_machine::NamedUtilityMachine; + +#[derive(Debug, Default)] +pub struct UtilityMachine { + /// Start position of the utility + start_pos: usize, + + /// Whether the legacy important marker `!` was used + legacy_important: bool, + + arbitrary_property_machine: ArbitraryPropertyMachine, + named_utility_machine: NamedUtilityMachine, + modifier_machine: ModifierMachine, +} + +impl Machine for UtilityMachine { + #[inline(always)] + fn reset(&mut self) { + self.start_pos = 0; + self.legacy_important = false; + } + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match CLASS_TABLE[cursor.curr as usize] { + // LEGACY: Important marker + Class::Exclamation => { + self.legacy_important = true; + + match CLASS_TABLE[cursor.next as usize] { + // Start of an arbitrary property + // + // E.g.: `![color:red]` + // ^ + Class::OpenBracket => { + self.start_pos = cursor.pos; + cursor.advance(); + self.parse_arbitrary_property(cursor) + } + + // Start of a named utility + // + // E.g.: `!flex` + // ^ + _ => { + self.start_pos = cursor.pos; + cursor.advance(); + self.parse_named_utility(cursor) + } + } + } + + // Start of an arbitrary property + // + // E.g.: `[color:red]` + // ^ + Class::OpenBracket => { + self.start_pos = cursor.pos; + self.parse_arbitrary_property(cursor) + } + + // Everything else might be a named utility. Delegate to the named utility machine + // to determine if it's a named utility or not. + _ => { + self.start_pos = cursor.pos; + self.parse_named_utility(cursor) + } + } + } +} + +impl UtilityMachine { + fn parse_arbitrary_property(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match self.arbitrary_property_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // End of arbitrary property, but there is a potential modifier. + // + // E.g.: `[color:#0088cc]/` + // ^ + Class::Slash => { + cursor.advance(); + self.parse_modifier(cursor) + } + + // End of arbitrary property, but there is an `!`. + // + // E.g.: `[color:#0088cc]!` + // ^ + Class::Exclamation => { + cursor.advance(); + self.parse_important(cursor) + } + + // End of arbitrary property + // + // E.g.: `[color:#0088cc]` + // ^ + _ => self.done(self.start_pos, cursor), + }, + } + } + + fn parse_named_utility(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match self.named_utility_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // End of a named utility, but there is a potential modifier. + // + // E.g.: `bg-red-500/` + // ^ + Class::Slash => { + cursor.advance(); + self.parse_modifier(cursor) + } + + // End of named utility, but there is an `!`. + // + // E.g.: `bg-red-500!` + // ^ + Class::Exclamation => { + cursor.advance(); + self.parse_important(cursor) + } + + // End of a named utility + // + // E.g.: `bg-red-500` + // ^ + _ => self.done(self.start_pos, cursor), + }, + } + } + + fn parse_modifier(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match self.modifier_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => match CLASS_TABLE[cursor.next as usize] { + // A modifier followed by a modifier is invalid + Class::Slash => self.restart(), + + // A modifier followed by the important marker `!` + Class::Exclamation => { + cursor.advance(); + self.parse_important(cursor) + } + + // Everything else is valid + _ => self.done(self.start_pos, cursor), + }, + } + } + + fn parse_important(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + // Only the `!` is valid if we didn't start with `!` + // + // E.g.: + // + // ``` + // !bg-red-500! + // ^ invalid because of the first `!` + // ``` + if self.legacy_important { + return self.restart(); + } + + self.done(self.start_pos, cursor) + } +} + +#[derive(Debug, Clone, Copy)] +enum Class { + /// `!` + Exclamation, + + /// `[` + OpenBracket, + + /// `/` + Slash, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + set!(Class::Exclamation, b'!'); + set!(Class::OpenBracket, b'['); + set!(Class::Slash, b'/'); + + table +}; + +#[cfg(test)] +mod tests { + use super::UtilityMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_utility_machine_performance() { + let input = r#"<button type="button" class="absolute -top-1 -left-1.5 flex items-center justify-center p-1.5 text-gray-400">"#.repeat(100); + + UtilityMachine::test_throughput(100_000, &input); + UtilityMachine::test_duration_once(&input); + + todo!() + } + + #[test] + fn test_utility_extraction() { + for (input, expected) in [ + // Simple utility + ("flex", vec!["flex"]), + // Simple utility with special character(s) + ("@container", vec!["@container"]), + // Single character utility + ("a", vec!["a"]), + // Important utilities + ("!flex", vec!["!flex"]), + ("flex!", vec!["flex!"]), + ("flex! block", vec!["flex!", "block"]), + // With dashes + ("items-center", vec!["items-center"]), + // Inside a string + ("'flex'", vec!["flex"]), + // Multiple utilities + ("flex items-center", vec!["flex", "items-center"]), + // Arbitrary property + ("[color:red]", vec!["[color:red]"]), + ("![color:red]", vec!["![color:red]"]), + ("[color:red]!", vec!["[color:red]!"]), + ("[color:red]/20", vec!["[color:red]/20"]), + ("![color:red]/20", vec!["![color:red]/20"]), + ("[color:red]/20!", vec!["[color:red]/20!"]), + // Modifiers + ("bg-red-500/20", vec!["bg-red-500/20"]), + ("bg-red-500/[20%]", vec!["bg-red-500/[20%]"]), + ( + "bg-red-500/(--my-opacity)", + vec!["bg-red-500/(--my-opacity)"], + ), + // Modifiers with important (legacy) + ("!bg-red-500/20", vec!["!bg-red-500/20"]), + ("!bg-red-500/[20%]", vec!["!bg-red-500/[20%]"]), + ( + "!bg-red-500/(--my-opacity)", + vec!["!bg-red-500/(--my-opacity)"], + ), + // Modifiers with important + ("bg-red-500/20!", vec!["bg-red-500/20!"]), + ("bg-red-500/[20%]!", vec!["bg-red-500/[20%]!"]), + ( + "bg-red-500/(--my-opacity)!", + vec!["bg-red-500/(--my-opacity)!"], + ), + // Arbitrary value with bracket notation + ("bg-[#0088cc]", vec!["bg-[#0088cc]"]), + // Arbitrary value with arbitrary property shorthand modifier + ( + "bg-[#0088cc]/(--my-opacity)", + vec!["bg-[#0088cc]/(--my-opacity)"], + ), + // Arbitrary value with CSS property shorthand + ("bg-(--my-color)", vec!["bg-(--my-color)"]), + // Multiple utilities including arbitrary property shorthand + ( + "bg-(--my-color) flex px-(--my-padding)", + vec!["bg-(--my-color)", "flex", "px-(--my-padding)"], + ), + // Pug syntax + (".flex.bg-red-500", vec!["flex", "bg-red-500"]), + // -------------------------------------------------------- + + // Exceptions: + ("bg-red-500/20/20", vec![]), + ("bg-[#0088cc]/20/20", vec![]), + ] { + for (wrapper, additional) in [ + // No wrapper + ("{}", vec![]), + // With leading spaces + (" {}", vec![]), + // With trailing spaces + ("{} ", vec![]), + // Surrounded by spaces + (" {} ", vec![]), + // Inside a string + ("'{}'", vec![]), + // Inside a function call + ("fn('{}')", vec![]), + // Inside nested function calls + ("fn1(fn2('{}'))", vec!["fn1", "fn2"]), + // -------------------------- + // + // HTML + // Inside a class (on its own) + (r#"<div class="{}"></div>"#, vec!["div"]), + // Inside a class (first) + (r#"<div class="{} foo"></div>"#, vec!["div", "foo"]), + // Inside a class (second) + (r#"<div class="foo {}"></div>"#, vec!["div", "foo"]), + // Inside a class (surrounded) + ( + r#"<div class="foo {} bar"></div>"#, + vec!["div", "foo", "bar"], + ), + // -------------------------- + // + // JavaScript + // Inside a variable + (r#"let classes = '{}';"#, vec!["let", "classes"]), + // Inside an object (key) + ( + r#"let classes = { '{}': true };"#, + vec!["let", "classes", "true"], + ), + // Inside an object (no spaces, key) + (r#"let classes = {'{}':true};"#, vec!["let", "classes"]), + // Inside an object (value) + ( + r#"let classes = { primary: '{}' };"#, + vec!["let", "classes", "primary"], + ), + // Inside an object (no spaces, value) + ( + r#"let classes = {primary:'{}'};"#, + vec!["let", "classes", "primary"], + ), + // Inside an array + (r#"let classes = ['{}'];"#, vec!["let", "classes"]), + ] { + let input = wrapper.replace("{}", input); + + let mut expected = expected.clone(); + expected.extend(additional); + expected.sort(); + + let mut actual = UtilityMachine::test_extract_all(&input); + actual.sort(); + + if actual != expected { + dbg!(&input, &expected, &actual); + } + assert_eq!(actual, expected); + } + } + } +} diff --git a/crates/oxide/src/extractor/variant_machine.rs b/crates/oxide/src/extractor/variant_machine.rs new file mode 100644 index 000000000000..4a4647b61013 --- /dev/null +++ b/crates/oxide/src/extractor/variant_machine.rs @@ -0,0 +1,158 @@ +use crate::cursor; +use crate::extractor::arbitrary_value_machine::ArbitraryValueMachine; +use crate::extractor::machine::{Machine, MachineState}; +use crate::extractor::named_variant_machine::NamedVariantMachine; + +#[derive(Debug, Default)] +pub struct VariantMachine { + arbitrary_value_machine: ArbitraryValueMachine, + named_variant_machine: NamedVariantMachine, +} + +impl Machine for VariantMachine { + #[inline(always)] + fn reset(&mut self) {} + + #[inline] + fn next(&mut self, cursor: &mut cursor::Cursor<'_>) -> MachineState { + match CLASS_TABLE[cursor.curr as usize] { + // Start of an arbitrary variant + // + // E.g.: `[&:hover]:` + // ^ + Class::OpenBracket => { + let start_pos = cursor.pos; + match self.arbitrary_value_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.parse_arbitrary_end(start_pos, cursor), + } + } + + // Start of a named variant + _ => { + let start_pos = cursor.pos; + match self.named_variant_machine.next(cursor) { + MachineState::Idle => self.restart(), + MachineState::Done(_) => self.done(start_pos, cursor), + } + } + } + } +} + +impl VariantMachine { + #[inline(always)] + fn parse_arbitrary_end( + &mut self, + start_pos: usize, + cursor: &mut cursor::Cursor<'_>, + ) -> MachineState { + match CLASS_TABLE[cursor.next as usize] { + // End of an arbitrary value, must be followed by a `:` + // + // E.g.: `[&:hover]:` + // ^ + Class::Colon => { + cursor.advance(); + self.done(start_pos, cursor) + } + + // Everything else is invalid + _ => self.restart(), + } + } +} + +#[derive(Debug, Clone, Copy)] +enum Class { + /// `[` + OpenBracket, + + /// `:` + Colon, + + Other, +} + +const CLASS_TABLE: [Class; 256] = { + let mut table = [Class::Other; 256]; + + macro_rules! set { + ($class:expr, $($byte:expr),+ $(,)?) => { + $(table[$byte as usize] = $class;)+ + }; + } + + set!(Class::OpenBracket, b'['); + set!(Class::Colon, b':'); + + table +}; + +#[cfg(test)] +mod tests { + use super::VariantMachine; + use crate::extractor::machine::Machine; + + #[test] + #[ignore] + fn test_variant_machine_performance() { + let input = r#"<button class="hover:focus:flex data-[state=pending]:[&.in-progress]:flex supports-(--my-variable):flex group-hover/named:not-has-peer-data-disabled:flex">"#; + + VariantMachine::test_throughput(100_000, input); + VariantMachine::test_duration_once(input); + + todo!() + } + + #[test] + fn test_variant_extraction() { + for (input, expected) in [ + // Simple variant + ("hover:flex", vec!["hover:"]), + // Single character variant + ("a:flex", vec!["a:"]), + ("*:flex", vec!["*:"]), + // With special characters + ("**:flex", vec!["**:"]), + // With dashes + ("data-disabled:flex", vec!["data-disabled:"]), + // Multiple variants + ("hover:focus:flex", vec!["hover:", "focus:"]), + // Arbitrary variant + ("[&:hover:focus]:flex", vec!["[&:hover:focus]:"]), + // Arbitrary variant with nested brackets + ( + "[&>[data-slot=icon]:last-child]:", + vec!["[&>[data-slot=icon]:last-child]:"], + ), + ( + "sm:[&>[data-slot=icon]:last-child]:", + vec!["sm:", "[&>[data-slot=icon]:last-child]:"], + ), + ( + "[:is(italic):is(underline)]:", + vec!["[:is(italic):is(underline)]:"], + ), + // Modifiers + ("group-hover/foo:flex", vec!["group-hover/foo:"]), + ("group-hover/[.parent]:flex", vec!["group-hover/[.parent]:"]), + // Arbitrary variant with bracket notation + ("data-[state=pending]:flex", vec!["data-[state=pending]:"]), + // Arbitrary variant with CSS property shorthand + ("supports-(--my-color):flex", vec!["supports-(--my-color):"]), + // ------------------------------------------------------------- + + // Exceptions + // Empty arbitrary variant is not allowed + ("[]:flex", vec![]), + // Named variant must be followed by `:` + ("hover", vec![]), + // Modifier cannot be followed by another modifier. However, we don't check boundary + // characters in this state machine so we will get `bar:`. + ("group-hover/foo/bar:flex", vec!["bar:"]), + ] { + assert_eq!(VariantMachine::test_extract_all(input), expected); + } + } +} diff --git a/crates/oxide/src/fixtures/example.html b/crates/oxide/src/fixtures/example.html new file mode 100644 index 000000000000..41d8beed611b --- /dev/null +++ b/crates/oxide/src/fixtures/example.html @@ -0,0 +1,552 @@ +<div class="font-semibold px-3 text-left text-gray-900 py-3.5 text-sm"> + <nav class="font-medium text-gray-900"> + <ul class="h-7 justify-center rounded-full items-center w-7 flex mx-auto"> + <li class="h-0.5 inset-x-0 absolute bottom-0"> + <a href="#" target="_blank" class="space-y-1 px-2 mt-3"> + This is link 4132f37a-a03f-4776-9a8e-1b70ff626f71 + </a> + <img + class="text-gray-900 font-medium text-sm ml-3.5" + alt="Profile picture of user 40faf8f0-6221-4ec4-a65e-fe22e1d9abd2" + src="https://example.org/pictures/4f7d7f80-e9cd-447a-9b35-f9d93befe025" + /> + </li> + <li class="text-indigo-700 order-1 font-semibold"> + <ol class="h-24 sm:w-32 w-24 object-center rounded-md sm:h-32 object-cover"> + <li class="lg:justify-center lg:gap-x-12 hidden lg:flex lg:min-w-0 lg:flex-1"> + <img + class="hover:bg-gray-100 bg-gray-50 py-1.5 focus:z-10 text-gray-400" + alt="Profile picture of user d27b5a21-1622-4f3a-ba7d-6230fae487c2" + src="https://example.org/pictures/2a026d06-0e67-467d-babf-8a18614667f2" + /> + <ul class="w-6 h-6 mr-3 flex-shrink-0"> + <li class="flow-root"> + <img + class="bg-white focus:ring-indigo-500 group font-medium items-center inline-flex focus:ring-offset-2 focus:ring-2 focus:outline-none text-base rounded-md hover:text-gray-900" + alt="Profile picture of user 0052230e-90d8-4d84-ab64-904b87fe4622" + src="https://example.org/pictures/f7ec91ba-17a7-470f-a472-705a8b5a79ce" + /> + </li> + <li class="items-center right-0 flex pointer-events-none absolute inset-y-0"> + <img + class="ml-3" + alt="Profile picture of user a7c51adf-3917-4e41-b5f1-d263768c9adf" + src="https://example.org/pictures/a56323a8-8aa3-4d39-8c79-9ee875ecd6f0" + /> + </li> + </ul> + <ul class="hover:bg-opacity-75 hover:bg-indigo-500 text-white"> + <li class="font-medium hover:text-indigo-500 text-indigo-600 text-sm"> + <ul class="sr-only"></ul> + <ol class="flex-col px-8 flex pt-8"></ol> + <a href="#" class="items-center sm:items-start flex"> + This is link 8c59d1ad-9ef0-4a41-ab15-e9fe04d77ae2 + </a> + </li> + <li class="rounded-full w-8 h-8"> + <ol class="lg:grid lg:grid-cols-12"></ol> + <a + href="#" + target="_blank" + rel="noreferrer" + class="px-4 border-t py-6 space-y-6 border-gray-200" + > + This is link 4182f198-ff54-45a4-ace9-e6f46a60ec92 + </a> + </li> + <li class="border-gray-700 border-t pt-4 pb-3"> + <ol class="font-medium text-gray-900 p-2 block -m-2"></ol> + <ol + class="sm:py-24 to-green-400 lg:px-0 bg-gradient-to-r lg:items-center lg:justify-end sm:px-6 lg:bg-none from-cyan-600 px-4 lg:pl-8 py-16 lg:flex" + ></ol> + <img + class="lg:gap-24 lg:grid-cols-2 lg:grid lg:mx-auto lg:items-start lg:max-w-7xl lg:px-8" + alt="Profile picture of user c44d18a8-a1f1-4bf4-87b0-89f37e34aba1" + src="https://example.org/pictures/527dca2c-5afe-4a5c-96cd-706396701c36" + /> + </li> + <li class="hover:bg-gray-100 bg-white focus:z-10 text-gray-900 relative py-1.5"> + <a + href="#" + target="_blank" + rel="noreferrer" + class="text-sm font-medium text-gray-500" + > + This is link f6db0c8d-6409-4abd-9af1-d3e68ebbd25c + </a> + <a href="#" rel="noreferrer" class="text-sm font-medium mt-12"> + This is link 51c9b242-e449-44d8-9289-1a5d474d5fbb + </a> + <ul class="h-12"></ul> + </li> + <li class="bg-gray-100"> + <a href="#" rel="noreferrer" class="bg-gray-100"> + This is link eb479051-0dff-4d8f-9456-bb8b56ab90af + </a> + <img + class="ml-3 text-gray-900 font-medium text-base" + alt="Profile picture of user 17d797ac-aea7-4154-b522-f0ac89762b0b" + src="https://example.org/pictures/55e31a5b-2fcb-4b35-832c-fad711717f1a" + /> + <ul class="font-medium hover:text-gray-700 text-gray-500 ml-4 text-sm"></ul> + </li> + </ul> + <ul class="space-y-6 border-t py-6 border-gray-200 px-4"> + <li class="md:hidden z-40 relative"> + <ol class="h-7 w-7 mx-auto items-center justify-center rounded-full flex"></ol> + <ol class="items-center flex rounded-full w-7 mx-auto h-7 justify-center"></ol> + <a + href="#" + target="_blank" + class="sm:py-32 lg:px-8 max-w-7xl mx-auto px-4 py-24 sm:px-6 relative" + > + This is link 5014471c-4f44-4696-a1f6-7817d57827eb + </a> + <img + class="h-5 group-hover:text-gray-500 w-5 ml-2" + alt="Profile picture of user 98731873-0af8-4823-9115-f1333a2cdc2e" + src="https://example.org/pictures/12ac8cbf-4540-49ff-a71f-4b0ac684b374" + /> + </li> + <li + class="focus:outline-none font-medium px-4 justify-center text-sm focus:ring-2 border hover:bg-gray-50 focus:ring-offset-2 py-2 border-gray-300 bg-white inline-flex text-gray-700 rounded-md shadow-sm focus:ring-gray-900" + > + <img + class="bg-white focus:ring-indigo-500 text-base focus:outline-none focus:ring-2 items-center font-medium focus:ring-offset-2 hover:text-gray-900 rounded-md group inline-flex" + alt="Profile picture of user 3ae3670d-c729-41d2-adee-691340673aef" + src="https://example.org/pictures/5f289b87-efc2-4cc9-9061-21883b0778db" + /> + </li> + <li class="text-base text-gray-500 mt-6 font-medium text-center"> + <ol class="text-sm font-medium text-indigo-600 hover:text-indigo-500"></ol> + <ol class="rounded-md h-6 w-6 inline-block"></ol> + </li> + <li class="hidden lg:flex lg:items-center"> + <ol class="border-indigo-600"></ol> + </li> + </ul> + </li> + <li class="sr-only"> + <img + class="justify-center py-2 bg-white flex" + alt="Profile picture of user 89195190-9a42-4826-89ba-e7bc1d677520" + src="https://example.org/pictures/742d9c44-1c75-461f-b2d0-e52636041966" + /> + <img + class="-ml-14 sticky left-0 z-20 text-gray-400 -mt-2.5 leading-5 pr-2 w-14 text-right text-xs" + alt="Profile picture of user 77a37b14-1c0f-4ef0-85d4-a18755065ea3" + src="https://example.org/pictures/8ad2e05f-6ea6-4d94-8f82-f8aa59274b9e" + /> + </li> + </ol> + <img + class="border-t shadow-sm sm:border bg-white sm:rounded-lg border-gray-200 border-b" + alt="Profile picture of user 07935cf2-78e5-49ba-afdb-09d13c8320d6" + src="https://example.org/pictures/8f895991-6e8c-4bcb-82c3-11c2a3338eb2" + /> + <ol class="grid-cols-2 grid gap-x-8 gap-y-10"> + <li + class="py-1 w-48 shadow-lg bg-white rounded-md ring-1 absolute ring-black z-10 focus:outline-none right-0 mt-2 ring-opacity-5 origin-top-right" + > + <ol class="h-6 w-6"> + <li class="bg-gray-500 inset-0 transition-opacity fixed bg-opacity-75"> + <ol class="flex"></ol> + <a href="#" class="mt-2 flex items-center justify-between"> + This is link 1a1a3c60-a2ea-4153-acd7-12aa82f03c8d + </a> + <a href="#" target="_blank" class="text-gray-300 flex-shrink-0 w-5 h-5"> + This is link bdaa695c-3fe4-4cab-8d68-dbb338601044 + </a> + </li> + <li class="text-gray-500"> + <ol class="py-20"></ol> + <a href="#" target="_blank" class="text-sm ml-3"> + This is link 2183c71d-39ec-42d4-8b1e-eeb7e5490050 + </a> + </li> + <li class="mt-10"> + <ol class="group"></ol> + <a href="#" rel="noreferrer" class="bg-gray-50"> + This is link c49882fd-ab55-41a4-8dac-b96fe2901bce + </a> + <ol class="bg-white h-[940px] overflow-y-auto"></ol> + </li> + <li class="flex-1 space-y-1"> + <ol class="h-6 w-6"></ol> + </li> + </ol> + <ul class="z-10 flex relative items-center lg:hidden"> + <li class="aspect-w-1 bg-gray-100 rounded-lg overflow-hidden aspect-h-1"> + <img + class="overflow-hidden sm:rounded-md bg-white shadow" + alt="Profile picture of user 263bb89c-5b54-4247-8c82-fec829b1a895" + src="https://example.org/pictures/4c12a5c4-d117-4f81-93e1-47a45626a36e" + /> + <a href="#" class="sr-only"> This is link fc3885d8-d63f-4455-b056-f113aa3a2f23 </a> + <ol + class="border-t grid-cols-1 border-gray-200 gap-6 border-b mt-6 sm:grid-cols-2 grid py-6" + ></ol> + </li> + <li class="space-x-3 items-center flex"> + <a href="#" class="py-2 bg-white"> + This is link 1d718cb1-05e3-4d14-b959-92f5fd475ce0 + </a> + <img + class="rounded-md w-full focus:border-indigo-500 border-gray-300 focus:ring-indigo-500 block mt-1 shadow-sm sm:text-sm" + alt="Profile picture of user 1c4d6dc3-700a-4167-8ec3-3dc2f73d4ad5" + src="https://example.org/pictures/359599b3-5610-482e-bc09-025ac5283170" + /> + <ul class="max-w-3xl mx-auto divide-y-2 divide-gray-200"></ul> + <ul class="flex space-x-4"></ul> + </li> + <li class="text-gray-500 hover:text-gray-600"> + <ul class="h-96 w-full relative lg:hidden"></ul> + <ol class="text-gray-500 mt-6 text-sm"></ol> + <a href="#" class="sm:col-span-6"> + This is link 51cc68af-1184-4f8c-9efd-d2f855902b0b + </a> + <ol + class="left-0 inset-y-0 absolute pointer-events-none pl-3 items-center flex" + ></ol> + </li> + <li class="flex-shrink-0"> + <ol + class="shadow-lg rounded-lg ring-1 bg-white ring-black ring-opacity-5 divide-gray-50 divide-y-2" + ></ol> + </li> + </ul> + <img + class="pl-3 sm:pr-6 py-3.5 relative pr-4" + alt="Profile picture of user 095f88c2-1892-41d1-8165-981ab47b1942" + src="https://example.org/pictures/c70d575b-353a-4360-99df-c2100a36e41b" + /> + </li> + <li class="whitespace-nowrap"> + <a href="#" target="_blank" rel="noreferrer" class="h-full"> + This is link c804eb7a-39ea-46e6-a79a-2d48e61d5b4e + </a> + </li> + <li class="text-gray-900 text-2xl font-bold tracking-tight"> + <img + class="text-gray-900 font-medium" + alt="Profile picture of user 12190673-25cb-4175-90d7-73282e51bd02" + src="https://example.org/pictures/e55a076b-0325-4629-8e02-c9491f2f49cc" + /> + </li> + <li + class="ring-black sm:-mx-6 overflow-hidden ring-1 ring-opacity-5 mt-8 md:rounded-lg md:mx-0 -mx-4 shadow" + > + <ol class="flex items-center font-medium hover:text-gray-800 text-sm text-gray-700"> + <li class="flex mt-8 flex-col"> + <ol class="sm:inline hidden"></ol> + <ul class="flex items-center absolute inset-0"></ul> + <ol class="h-6 w-6"></ol> + </li> + <li class="-ml-2 rounded-md text-gray-400 p-2 bg-white"> + <img + class="block ml-3 font-medium text-sm text-gray-700" + alt="Profile picture of user 2cee83e8-6405-4046-9454-7f6083db307d" + src="https://example.org/pictures/93097140-2e56-4a3f-bc41-e00c658b7767" + /> + <ul class="sm:grid font-medium hidden grid-cols-4 text-gray-600 mt-6 text-sm"></ul> + <ul class="h-64 w-64 rounded-full xl:h-80 xl:w-80"></ul> + <ul class="sr-only"></ul> + </li> + </ol> + <img + class="aspect-w-2 group sm:aspect-w-1 aspect-h-1 overflow-hidden sm:aspect-h-1 sm:row-span-2 rounded-lg" + alt="Profile picture of user bc370a72-a44e-44e1-bbec-962c234060da" + src="https://example.org/pictures/d284630d-a088-49ad-a018-b245a8d7acb7" + /> + </li> + <li class="space-y-6 mt-6"> + <ul class="w-5 text-gray-400 h-5"> + <li + class="hover:bg-gray-100 py-1.5 bg-gray-50 text-gray-400 focus:z-10 rounded-tl-lg" + > + <img + class="bg-gray-50 py-1.5 hover:bg-gray-100 focus:z-10 text-gray-400" + alt="Profile picture of user 4b298841-5911-4b70-ae5a-a5aa8646e575" + src="https://example.org/pictures/fd05d4f7-2b6c-4de4-b7a7-3035e7b5fe78" + /> + <img + class="px-3 py-2 bg-white relative" + alt="Profile picture of user 5406aa7d-5563-4ce1-980f-754a912cd9ff" + src="https://example.org/pictures/e979117e-580c-40a7-be74-bf1b916b9ac0" + /> + <a + href="#" + target="_blank" + rel="noreferrer" + class="mt-2 font-medium text-gray-900 text-lg" + > + This is link 95d285c9-6e4e-43c2-a97f-bb958dcce92f + </a> + <ul class="sr-only"></ul> + </li> + <li class="mt-2 text-sm text-gray-500"> + <a href="#" target="_blank" class="text-sm text-blue-gray-900 font-medium block"> + This is link e0ca5c13-efa5-44d7-b91e-512df7c88410 + </a> + <img + class="w-5 h-5" + alt="Profile picture of user 4f5aeddc-6718-40fd-87d2-dcca7e4ef8b1" + src="https://example.org/pictures/6a2ed606-6c59-413c-911f-612c7390091f" + /> + <ol class="font-medium text-gray-900"></ol> + <ol class="justify-center rounded-full h-7 items-center mx-auto w-7 flex"></ol> + </li> + <li class="font-medium px-1 whitespace-nowrap py-4 text-sm border-b-2"> + <a + href="#" + target="_blank" + class="min-h-80 rounded-md aspect-h-1 aspect-w-1 lg:aspect-none w-full group-hover:opacity-75 lg:h-80 overflow-hidden bg-gray-200" + > + This is link 4f52e535-2e46-44e2-8a5f-fce48c1a673a + </a> + <ul class="text-gray-300 h-full w-full"></ul> + <img + class="block" + alt="Profile picture of user 76dd6af3-d2b7-4a7a-8bc3-628ce95ea9b3" + src="https://example.org/pictures/c6b476c8-283c-44f7-a5c6-4cb48c5ae10b" + /> + <ol class="h-32 relative lg:hidden w-full"></ol> + </li> + <li class="bg-gray-100 z-10 sticky sm:pt-3 pl-1 pt-1 md:hidden sm:pl-3 top-0"> + <ol class="z-40 relative lg:hidden"></ol> + <a href="#" class="sr-only"> This is link ec11a608-55b8-41fd-8085-325252c469af </a> + <ol class="divide-gray-200 lg:col-span-9 divide-y"></ol> + </li> + </ul> + <ul class="text-xl font-semibold ml-1"> + <li class="lg:flex-1 lg:w-0"> + <ol class="sm:hidden"></ol> + <img + class="block py-2 text-blue-gray-900 text-base hover:bg-blue-gray-50 font-medium px-3 rounded-md" + alt="Profile picture of user 885eb4a3-98ca-4614-b255-ec03e124c026" + src="https://example.org/pictures/6d3a9136-583e-4742-826f-e6d94f60ce99" + /> + </li> + <li class="truncate w-0 ml-2 flex-1"> + <img + class="hover:text-gray-600 text-gray-500" + alt="Profile picture of user de38ffb5-607b-4ed6-87bd-dd05fde1731c" + src="https://example.org/pictures/da67f442-40c9-425d-9344-2f9772582519" + /> + <ol class="text-center mt-8 text-gray-400 text-base"></ol> + </li> + </ul> + </li> + </ol> + </li> + <li class="lg:block hidden lg:flex-1"> + <a href="#" target="_blank" class="text-base ml-3 text-gray-500"> + This is link 9a44892f-7ead-48b6-af94-913ec04821a1 + </a> + <ol + class="shadow-sm border-gray-300 focus:ring-indigo-500 sm:text-sm focus:border-indigo-500 w-full rounded-md block" + > + <li class="bg-white hover:bg-gray-100 py-1.5 focus:z-10"> + <a + href="#" + target="_blank" + class="bg-gray-200 text-gray-700 gap-px lg:flex-none border-b text-center grid-cols-7 grid text-xs leading-6 border-gray-300 font-semibold" + > + This is link 42a16c66-508d-4d65-bb1a-b252f7e78df2 + </a> + <a href="#" class="h-8 w-auto"> This is link 021bda37-522f-413d-af3c-4a8c4f4d666a </a> + <img + class="max-h-12" + alt="Profile picture of user a1a859bc-ef5e-4349-99ab-a8f4ae262d94" + src="https://example.org/pictures/da93a022-cbbf-4ba0-b34c-21c090d87d74" + /> + <a + href="#" + target="_blank" + rel="noreferrer" + class="flex relative justify-center text-sm" + > + This is link fa5aded2-2906-4809-b742-26832b226f50 + </a> + </li> + <li class="py-12 sm:px-6 lg:py-16 px-4 lg:px-8 mx-auto max-w-7xl"> + <a href="#" rel="noreferrer" class="min-w-0 ml-3 flex-1"> + This is link 715e397d-5676-42fc-9d8f-456172543c31 + </a> + <img + class="bg-gray-800" + alt="Profile picture of user c02575ab-6ff1-45f9-a8e3-242c79b133fc" + src="https://example.org/pictures/3d8ecc7a-2504-4797-a644-f93d74acf853" + /> + <img + class="text-base text-gray-900 font-medium" + alt="Profile picture of user 9f457701-be79-4ff2-9dfc-9382dafb20ab" + src="https://example.org/pictures/438b713b-c9fd-4da2-a265-4b8d8f531e30" + /> + <a href="#" rel="noreferrer" class="text-sm"> + This is link 4c84098a-c0ac-4044-bbba-6f10bcc315fb + </a> + </li> + <li class="w-6 flex-shrink-0 h-6 text-green-500"> + <img + class="mx-auto sm:px-6 px-4 max-w-7xl" + alt="Profile picture of user 00ae83b8-845d-447e-ae1b-cd3c685e1ca0" + src="https://example.org/pictures/b9deba2b-c5b3-4b80-bf76-e0a717d310fd" + /> + <ul class="w-72"> + <li class="sm:flex sm:justify-between sm:items-center"> + <img + class="block font-medium text-sm text-gray-700" + alt="Profile picture of user c0de8cb0-e9d7-4639-8c3d-851ca17e665b" + src="https://example.org/pictures/897193ff-aa53-4e9e-b761-bc4f75aef572" + /> + </li> + <li class="text-sm hidden font-medium ml-3 text-gray-700 lg:block"> + <img + class="h-8 w-auto" + alt="Profile picture of user 9ff9c8ac-2374-4960-9996-ec258745c91a" + src="https://example.org/pictures/e898638f-08fa-4743-aea7-66adba84bded" + /> + </li> + <li + class="mx-auto sm:px-6 px-4 lg:items-center lg:flex lg:py-16 lg:px-8 max-w-7xl py-12" + > + <img + class="lg:max-w-none px-4 max-w-2xl mx-auto lg:px-0" + alt="Profile picture of user 7c4d617d-afa2-4410-81c5-2def540d2d20" + src="https://example.org/pictures/05a7dbc1-c1cc-4f99-99e5-7b507a4108b3" + /> + <ul class="hover:text-gray-600 text-gray-500"></ul> + <ul + class="relative rounded-md border-transparent focus-within:ring-2 focus-within:ring-white -ml-2 group" + ></ul> + </li> + </ul> + <img + class="h-5 text-gray-300 w-5" + alt="Profile picture of user bf2c6905-715a-4e38-9b5d-17fd8e7aec2a" + src="https://example.org/pictures/e759a4d7-5e63-4075-ba32-c6574107f401" + /> + </li> + <li class="object-cover object-center h-full w-full"> + <ol class="w-full"> + <li class="md:mt-0 absolute sm:-mt-32 -mt-72 inset-0"> + <ul class="w-12 h-12 rounded-full"></ul> + </li> + <li class="h-1.5 rounded-full w-1.5 mb-1 mx-0.5 bg-gray-400"> + <ol class="border-gray-200 border-4 rounded-lg border-dashed h-96"></ol> + <img + class="order-1 font-semibold text-gray-700" + alt="Profile picture of user 1c2cabee-08a3-4b48-ba54-5a578b2c3d30" + src="https://example.org/pictures/f5c185be-8b9d-490d-81f4-73a6e1517d98" + /> + <img + class="font-bold sm:text-4xl text-gray-900 tracking-tight text-3xl leading-8 text-center" + alt="Profile picture of user 1e8a2508-a37d-4f6b-9a8c-c6fcf0773604" + src="https://example.org/pictures/90af1bd2-30ed-45d3-917f-c685190ce56e" + /> + </li> + <li class="space-x-2 mt-4 text-sm text-gray-700 flex"> + <ul + class="rounded-full translate-x-1/2 block transform border-2 absolute bottom-0 right-0 border-white translate-y-1/2" + ></ul> + <ol + class="sm:hidden text-base py-2 text-gray-900 w-full placeholder-gray-500 h-full focus:outline-none border-transparent pr-3 pl-8 focus:placeholder-gray-400 focus:ring-0 focus:border-transparent" + ></ol> + </li> + </ol> + <ul class="lg:mt-0 self-center flow-root mt-8"> + <li + class="justify-center rounded-full bg-transparent bg-white hover:text-gray-500 focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 text-gray-400 inline-flex focus:outline-none h-8 items-center w-8" + > + <ol class="block xl:inline"></ol> + <ol class="flex-shrink-0 ml-4"></ol> + <ol class="text-gray-200"></ol> + <img + class="border-gray-300 focus:relative md:w-9 rounded-r-md flex bg-white md:hover:bg-gray-50 pl-4 text-gray-400 border hover:text-gray-500 items-center pr-3 border-l-0 justify-center md:px-2 py-2" + alt="Profile picture of user b0a1e2d3-84c4-494b-91d6-194fc294b0db" + src="https://example.org/pictures/2f414511-756c-40ef-aded-22e3f4d985d7" + /> + </li> + <li class="inset-0 absolute"> + <img + class="text-gray-500 text-base font-medium text-center" + alt="Profile picture of user 431f88eb-5002-43ab-b23a-37ae0ef7d424" + src="https://example.org/pictures/bc7c19bb-4ef2-46ff-b00e-da70febab926" + /> + <img + class="inset-0 absolute z-10" + alt="Profile picture of user 4a3698d0-7cea-4ba2-854d-28b2bb2d374b" + src="https://example.org/pictures/6078c4cd-db51-43af-90b3-8aeb0a8f1030" + /> + </li> + </ul> + <a href="#" rel="noreferrer" class="bg-white"> + This is link ab01c689-e03a-4992-8322-37c20551cb07 + </a> + </li> + <li class="flex px-4 pb-2 pt-5"> + <ol + class="sm:px-6 px-4 bg-white relative pb-8 md:p-6 shadow-2xl items-center flex w-full sm:pt-8 overflow-hidden lg:p-8 pt-14" + > + <li class="py-1.5 hover:bg-gray-100 focus:z-10 text-gray-400 bg-gray-50"> + <a + href="#" + rel="noreferrer" + class="sm:text-sm bg-gray-50 items-center border-gray-300 border-r-0 rounded-l-md text-gray-500 inline-flex border px-3" + > + This is link 3ef75acd-3dfc-4e82-801b-eae9ff7c4351 + </a> + <ol class="absolute border-dashed border-gray-200 border-2 rounded-lg inset-0"></ol> + </li> + <li class="hover:bg-gray-50 block"> + <ol class="items-center flex justify-center p-8"></ol> + <ul class="mx-auto sm:px-6 lg:px-8 pb-12 max-w-7xl px-4"></ul> + <img + class="h-6 w-6 text-green-400" + alt="Profile picture of user f5900afb-7bee-4492-b6e3-148f0afc4f5f" + src="https://example.org/pictures/1b12c3fb-9f84-4cc7-84a7-d38f7ea232ee" + /> + </li> + <li class="flex mt-4 lg:flex-grow-0 flex-grow lg:ml-4 flex-shrink-0 ml-8"> + <img + class="focus:ring-indigo-500 block w-full sm:text-sm border-gray-300 focus:border-indigo-500 rounded-md shadow-sm" + alt="Profile picture of user c9dd7fa0-c1f4-477c-b24e-87d200ebb161" + src="https://example.org/pictures/1a3b2e5c-a192-47f3-a550-e18f715213a2" + /> + <a href="#" target="_blank" rel="noreferrer" class="text-gray-300 hover:text-white"> + This is link 81b819f3-b2e8-41db-ab8d-abe8c6926047 + </a> + </li> + </ol> + <img + class="lg:flex-1 lg:block hidden" + alt="Profile picture of user 352e9ea2-0216-4a0c-917c-1906f5ef4ed1" + src="https://example.org/pictures/ec02985c-b92d-4978-906e-7bdc70bfa54e" + /> + </li> + </ol> + </li> + <li class="sr-only"> + <a href="#" target="_blank" class="text-gray-500 mt-4 text-sm"> + This is link a712361b-f51e-4f0a-9d55-b64a5d53e10e + </a> + <a + href="#" + rel="noreferrer" + class="rounded-lg ring-opacity-5 shadow-lg overflow-hidden ring-1 ring-black" + > + This is link 8eb412d6-1c39-4fed-b507-cf2a65904247 + </a> + </li> + </ul> + </nav> + <img + class="bg-gray-100" + alt="Profile picture of user 8825a6f0-3a41-44b8-92ec-abcd0af79bfb" + src="https://example.org/pictures/679e2a54-073e-416a-85c4-3eba0626aab3" + /> + <span class="bg-white focus:z-10 py-1.5 hover:bg-gray-100"> + This is text 3d190171-53b3-4393-99ab-9bedfc964141 + </span> +</div> diff --git a/crates/oxide/src/glob.rs b/crates/oxide/src/glob.rs index 6c7676ece953..ca73b5ee116b 100644 --- a/crates/oxide/src/glob.rs +++ b/crates/oxide/src/glob.rs @@ -173,7 +173,7 @@ pub fn path_matches_globs(path: &Path, globs: &[GlobEntry]) -> bool { globs .iter() - .any(|g| glob_match(&format!("{}/{}", g.base, g.pattern), path.as_bytes())) + .any(|g| glob_match(format!("{}/{}", g.base, g.pattern), path.as_bytes())) } #[cfg(test)] diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index 03fe9e8f190f..6fc9372ed525 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -1,15 +1,17 @@ use crate::glob::hoist_static_glob_parts; -use crate::parser::Extractor; use crate::scanner::allowed_paths::resolve_paths; use crate::scanner::detect_sources::DetectSources; use bexpand::Expression; use bstr::ByteSlice; +use extractor::string_machine::StringMachine; +use extractor::{Extracted, Extractor}; use fast_glob::glob_match; use fxhash::{FxHashMap, FxHashSet}; use glob::optimize_patterns; use paths::Path; use rayon::prelude::*; use scanner::allowed_paths::read_dir; +use std::borrow::Cow; use std::fs; use std::path::PathBuf; use std::sync; @@ -17,11 +19,12 @@ use std::time::SystemTime; use tracing::event; pub mod cursor; +pub mod extractor; pub mod fast_skip; pub mod glob; -pub mod parser; pub mod paths; pub mod scanner; +pub mod throughput; static SHOULD_TRACE: sync::LazyLock<bool> = sync::LazyLock::new( || matches!(std::env::var("DEBUG"), Ok(value) if value.eq("*") || (value.contains("tailwindcss:oxide") && !value.contains("-tailwindcss:oxide"))), @@ -40,9 +43,9 @@ fn init_tracing() { } #[derive(Debug, Clone)] -pub struct ChangedContent { - pub file: Option<PathBuf>, - pub content: Option<String>, +pub enum ChangedContent<'a> { + File(PathBuf, Cow<'a, str>), + Content(String, Cow<'a, str>), } #[derive(Debug, Clone)] @@ -101,12 +104,13 @@ impl Scanner { pub fn scan(&mut self) -> Vec<String> { init_tracing(); + self.prepare(); self.compute_candidates(); let mut candidates: Vec<String> = self.candidates.clone().into_par_iter().collect(); + candidates.par_sort_unstable(); - candidates.par_sort(); candidates } @@ -135,18 +139,39 @@ impl Scanner { self.prepare(); let content = read_changed_content(changed_content).unwrap_or_default(); - let extractor = Extractor::with_positions(&content[..], Default::default()); + let original_content = &content; + + // Workaround for legacy upgrades: + // + // `-[]` won't parse in the new parser (`[โฆ]` must contain _something_), but we do need it + // for people using `group-[]` (which we will later replace with `in-[.group]` instead). + let content = content.replace("-[]", "XYZ"); + let offset = content.as_ptr() as usize; + + let mut extractor = Extractor::new(&content[..]); - let candidates: Vec<(String, usize)> = extractor + extractor + .extract() .into_par_iter() - .map(|(s, i)| { - // SAFETY: When we parsed the candidates, we already guaranteed that the byte slices - // are valid, therefore we don't have to re-check here when we want to convert it back - // to a string. - unsafe { (String::from_utf8_unchecked(s.to_vec()), i) } + .flat_map(|extracted| match extracted { + Extracted::Candidate(s) => { + let i = s.as_ptr() as usize - offset; + let original = &original_content[i..i + s.len()]; + if original.contains_str("-[]") { + return Some(unsafe { + (String::from_utf8_unchecked(original.to_vec()), i) + }); + } + + // SAFETY: When we parsed the candidates, we already guaranteed that the byte + // slices are valid, therefore we don't have to re-check here when we want to + // convert it back to a string. + Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) + } + + _ => None, }) - .collect(); - candidates + .collect() } #[tracing::instrument(skip_all)] @@ -197,10 +222,8 @@ impl Scanner { }; if should_scan_file { - changed_content.push(ChangedContent { - file: Some(path.clone()), - content: None, - }); + let extension = path.extension().unwrap_or_default().to_string_lossy(); + changed_content.push(ChangedContent::File(path.to_path_buf(), extension)) } } @@ -427,35 +450,29 @@ impl Scanner { } fn read_changed_content(c: ChangedContent) -> Option<Vec<u8>> { - if let Some(content) = c.content { - return Some(content.into_bytes()); - } + let (content, extension) = match c { + ChangedContent::File(file, extension) => match std::fs::read(&file) { + Ok(content) => (content, extension), + Err(e) => { + event!(tracing::Level::ERROR, "Failed to read file: {:?}", e); + return None; + } + }, - let Some(file) = c.file else { - return Default::default(); + ChangedContent::Content(contents, extension) => (contents.into_bytes(), extension), }; - let Ok(content) = std::fs::read(&file).map_err(|e| { - event!(tracing::Level::ERROR, "Failed to read file: {:?}", e); - e - }) else { - return Default::default(); - }; + Some(pre_process_input(&content, &extension)) +} - let Some(extension) = file.extension().map(|x| x.to_str()) else { - return Some(content); - }; +pub fn pre_process_input(content: &[u8], extension: &str) -> Vec<u8> { + use crate::extractor::pre_processors::*; match extension { - // Angular class shorthand - Some("html") => Some(content.replace("[class.", "[")), - Some("svelte") => Some( - content - .replace(" class:", " ") - .replace("\tclass:", " ") - .replace("\nclass:", " "), - ), - _ => Some(content), + "rb" | "erb" => Ruby.process(content), + "slim" | "pug" => Pug.process(content), + "svelte" => Svelte.process(content), + _ => content.to_vec(), } } @@ -477,21 +494,78 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> { fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> { let mut result: Vec<_> = blobs .par_iter() - .flat_map(|blob| blob.par_split(|x| matches!(x, b'\n'))) - .map(|blob| Extractor::unique(blob, Default::default())) + .flat_map(|blob| blob.par_split(|x| *x == b'\n')) + .filter_map(|blob| { + if blob.is_empty() { + return None; + } + + let extracted = crate::extractor::Extractor::new(blob).extract(); + if extracted.is_empty() { + return None; + } + + Some(FxHashSet::from_iter(extracted.into_iter().map( + |x| match x { + Extracted::Candidate(bytes) => bytes, + Extracted::CssVariable(bytes) => bytes, + }, + ))) + }) .reduce(Default::default, |mut a, b| { a.extend(b); a }) .into_iter() - .map(|s| { - // SAFETY: When we parsed the candidates, we already guaranteed that the byte slices - // are valid, therefore we don't have to re-check here when we want to convert it back - // to a string. - unsafe { String::from_utf8_unchecked(s.to_vec()) } - }) + .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) .collect(); - result.par_sort(); + // SAFETY: Unstable sort is faster and in this scenario it's also safe because we are + // guaranteed to have unique candidates. + result.par_sort_unstable(); + result } + +#[cfg(test)] +mod tests { + use crate::Scanner; + + #[test] + fn test_positions() { + let mut scanner = Scanner::new(None); + + for (input, expected) in [ + // Before migrations + ( + r#"<div class="!tw__flex sm:!tw__block tw__bg-gradient-to-t flex tw:[color:red] group-[]:tw__flex"#, + vec![ + ("!tw__flex".to_string(), 12), + ("sm:!tw__block".to_string(), 22), + ("tw__bg-gradient-to-t".to_string(), 36), + ("flex".to_string(), 57), + ("tw:[color:red]".to_string(), 62), + ("group-[]:tw__flex".to_string(), 77), + ], + ), + // After migrations + ( + r#"<div class="tw:flex! tw:sm:block! tw:bg-linear-to-t flex tw:[color:red] tw:in-[.tw\:group]:flex"></div>"#, + vec![ + ("tw:flex!".to_string(), 12), + ("tw:sm:block!".to_string(), 21), + ("tw:bg-linear-to-t".to_string(), 34), + ("flex".to_string(), 52), + ("tw:[color:red]".to_string(), 57), + ("tw:in-[.tw\\:group]:flex".to_string(), 72), + ], + ), + ] { + let candidates = scanner.get_candidates_with_positions(crate::ChangedContent::Content( + input.to_string(), + "html".into(), + )); + assert_eq!(candidates, expected); + } + } +} diff --git a/crates/oxide/src/main.rs b/crates/oxide/src/main.rs new file mode 100644 index 000000000000..1fa2918afb72 --- /dev/null +++ b/crates/oxide/src/main.rs @@ -0,0 +1,65 @@ +use std::hint::black_box; +use tailwindcss_oxide::cursor::Cursor; +use tailwindcss_oxide::extractor::machine::{Machine, MachineState}; +use tailwindcss_oxide::extractor::{Extracted, Extractor}; +use tailwindcss_oxide::throughput::Throughput; + +fn run_full_extractor(input: &[u8]) -> Vec<&[u8]> { + Extractor::new(input) + .extract() + .into_iter() + .map(|x| match x { + Extracted::Candidate(bytes) => bytes, + Extracted::CssVariable(bytes) => bytes, + }) + .collect::<Vec<_>>() +} + +fn _run_machine<T: Machine>(input: &[u8]) -> Vec<&[u8]> { + let len = input.len(); + let mut machine = T::default(); + let mut cursor = Cursor::new(input); + let mut result = Vec::with_capacity(25); + + while cursor.pos < len { + if let MachineState::Done(span) = machine.next(&mut cursor) { + result.push(span.slice(input)); + } + + cursor.advance(); + } + + result +} + +fn run(input: &[u8]) -> Vec<&[u8]> { + // _run_machine::<tailwindcss_oxide::extractor::arbitrary_property_machine::ArbitraryPropertyMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::arbitrary_value_machine::ArbitraryValueMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::arbitrary_variable_machine::ArbitraryVariableMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::candidate_machine::CandidateMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::css_variable_machine::CssVariableMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::modifier_machine::ModifierMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::named_utility_machine::NamedUtilityMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::named_variant_machine::NamedVariantMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::string_machine::StringMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::utility_machine::UtilityMachine>(input) + // _run_machine::<tailwindcss_oxide::extractor::variant_machine::VariantMachine>(input) + + run_full_extractor(input) +} + +fn main() { + let iterations = 10_000; + let input = include_bytes!("./fixtures/example.html"); + + let throughput = Throughput::compute(iterations, input.len(), || { + _ = black_box( + input + .split(|x| *x == b'\n') + .flat_map(run) + .collect::<Vec<_>>(), + ); + }); + + eprintln!("Extractor: {:}", throughput); +} diff --git a/crates/oxide/src/parser.rs b/crates/oxide/src/parser.rs deleted file mode 100644 index 9cd702d477f3..000000000000 --- a/crates/oxide/src/parser.rs +++ /dev/null @@ -1,1757 +0,0 @@ -use crate::{cursor::Cursor, fast_skip::fast_skip}; -use bstr::ByteSlice; -use fxhash::FxHashSet; -use tracing::trace; - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum ParseAction<'a> { - Consume, - Skip, - RestartAt(usize), - - SingleCandidate(&'a [u8]), - MultipleCandidates(Vec<&'a [u8]>), - Done, -} - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum Bracketing<'a> { - Included(&'a [u8]), - Wrapped(&'a [u8]), - None, -} - -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub struct SplitCandidate<'a> { - variant: &'a [u8], - utility: &'a [u8], -} - -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum ValidationResult { - Invalid, - Valid, - Restart, -} - -#[derive(Default)] -pub struct ExtractorOptions { - pub preserve_spaces_in_arbitrary: bool, -} - -#[derive(Debug, PartialEq, Eq, Clone)] -enum Arbitrary { - /// Not inside any arbitrary value - None, - - /// In arbitrary value mode with square brackets - /// - /// E.g.: `bg-[โฆ]` - /// ^ - Brackets { start_idx: usize }, - - /// In arbitrary value mode with parens - /// - /// E.g.: `bg-(โฆ)` - /// ^ - Parens { start_idx: usize }, -} - -pub struct Extractor<'a> { - opts: ExtractorOptions, - - input: &'a [u8], - cursor: Cursor<'a>, - - idx_start: usize, - idx_end: usize, - idx_last: usize, - - arbitrary: Arbitrary, - - in_candidate: bool, - in_escape: bool, - - discard_next: bool, - - quote_stack: Vec<u8>, - bracket_stack: Vec<u8>, -} - -impl<'a> Extractor<'a> { - pub fn all(input: &'a [u8], opts: ExtractorOptions) -> Vec<&'a [u8]> { - Self::new(input, opts).flatten().collect() - } - - pub fn unique(input: &'a [u8], opts: ExtractorOptions) -> FxHashSet<&'a [u8]> { - let mut candidates: FxHashSet<&[u8]> = Default::default(); - candidates.reserve(100); - candidates.extend(Self::new(input, opts).flatten()); - candidates - } - - pub fn unique_ord(input: &'a [u8], opts: ExtractorOptions) -> Vec<&'a [u8]> { - // This is an inefficient way to get an ordered, unique - // list as a Vec but it is only meant for testing. - let mut candidates = Self::all(input, opts); - let mut unique_list = FxHashSet::default(); - unique_list.reserve(candidates.len()); - candidates.retain(|c| unique_list.insert(*c)); - - candidates - } - - pub fn with_positions(input: &'a [u8], opts: ExtractorOptions) -> Vec<(&'a [u8], usize)> { - let mut result = Vec::new(); - let extractor = Self::new(input, opts).flatten(); - for item in extractor { - // Since the items are slices of the input buffer, we can calculate the start index - // by doing some pointer arithmetics. - let start_index = item.as_ptr() as usize - input.as_ptr() as usize; - result.push((item, start_index)); - } - result - } -} - -impl<'a> Extractor<'a> { - pub fn new(input: &'a [u8], opts: ExtractorOptions) -> Self { - Self { - opts, - input, - cursor: Cursor::new(input), - - idx_start: 0, - idx_end: 0, - - arbitrary: Arbitrary::None, - in_candidate: false, - in_escape: false, - - discard_next: false, - - idx_last: input.len(), - quote_stack: Vec::with_capacity(8), - bracket_stack: Vec::with_capacity(8), - } - } -} - -/// Helpers -impl<'a> Extractor<'a> { - #[inline(always)] - fn in_quotes(&self) -> bool { - !self.quote_stack.is_empty() - } - - #[inline(always)] - fn get_current_candidate(&mut self) -> ParseAction<'a> { - if self.discard_next { - return ParseAction::Skip; - } - - let mut candidate = &self.input[self.idx_start..=self.idx_end]; - - // The bracket stack is not empty, which means that we are dealing with unbalanced - // brackets. - if !self.bracket_stack.is_empty() { - return ParseAction::Skip; - } - - while !candidate.is_empty() { - match Extractor::is_valid_candidate_string(candidate) { - ValidationResult::Valid => return ParseAction::SingleCandidate(candidate), - ValidationResult::Restart => return ParseAction::RestartAt(self.idx_start + 1), - _ => {} - } - - match candidate.split_last() { - // At this point the candidate is technically invalid, however it can be that it - // has a few dangling characters attached to it. For example, think about a - // JavaScript object: - // - // ```js - // { underline: true } - // ``` - // - // The candidate at this point will be `underline:`, which is invalid. However, we - // can assume in this case that the `:` should not be there, and therefore we can - // try to slice it off and retry the validation. - Some((b':' | b'/' | b'.', head)) => { - candidate = head; - } - - // It could also be that we have the candidate is nested inside of bracket or quote - // pairs. In this case we want to retrieve the inner part and try to validate that - // inner part instead. For example, in a JavaScript array: - // - // ```js - // let myClasses = ["underline"] - // ``` - // - // The `underline` is nested inside of quotes and in square brackets. Let's try to - // get the inner part and validate that instead. - _ => match Self::slice_surrounding(candidate) { - Some(shorter) if shorter != candidate => { - candidate = shorter; - } - _ => break, - }, - } - } - - ParseAction::Consume - } - - #[inline(always)] - fn split_candidate(candidate: &'a [u8]) -> SplitCandidate<'a> { - let mut brackets = 0; - let mut idx_end = 0; - - for (n, c) in candidate.iter().enumerate() { - match c { - b'[' => brackets += 1, - b']' if brackets > 0 => brackets -= 1, - b':' if brackets == 0 => idx_end = n + 1, - _ => {} - } - } - - SplitCandidate { - variant: &candidate[0..idx_end], - utility: &candidate[idx_end..], - } - } - - #[inline(always)] - fn contains_in_constrained(candidate: &'a [u8], bytes: Vec<u8>) -> bool { - let mut brackets = 0; - - for c in candidate { - match c { - b'[' => brackets += 1, - b']' if brackets > 0 => brackets -= 1, - _ if brackets == 0 && bytes.contains(c) => return true, - _ => {} - } - } - - false - } - - #[inline(always)] - fn is_valid_candidate_string(candidate: &'a [u8]) -> ValidationResult { - // Reject candidates that start with a capital letter - if candidate[0].is_ascii_uppercase() { - return ValidationResult::Invalid; - } - - // Rejects candidates that end with "-" or "_" - if candidate.ends_with(b"-") || candidate.ends_with(b"_") { - return ValidationResult::Invalid; - } - - // Reject candidates that are single camelCase words, e.g.: `useEffect` - if candidate.iter().all(|c| c.is_ascii_alphanumeric()) - && candidate.iter().any(|c| c.is_ascii_uppercase()) - { - return ValidationResult::Invalid; - } - - // Reject candidates that look like SVG path data, e.g.: `m32.368 m7.5` - if !candidate.contains(&b'-') - && !candidate.contains(&b':') - && candidate.iter().any(|c| c == &b'.') - { - return ValidationResult::Invalid; - } - - // Reject candidates that look like version constraints or email addresses, e.g.: `next@latest`, `bob@example.com` - if candidate - .iter() - .all(|c| c.is_ascii_alphanumeric() || c == &b'.' || c == &b'-' || c == &b'@') - && candidate[1..].contains(&b'@') - { - return ValidationResult::Invalid; - } - - // Reject candidates that look like URLs - if candidate.starts_with(b"http://") || candidate.starts_with(b"https://") { - return ValidationResult::Invalid; - } - - // Reject candidates that look short markdown links, e.g.: `[https://example.com]` - if candidate.starts_with(b"[http://") || candidate.starts_with(b"[https://") { - return ValidationResult::Invalid; - } - - // Reject candidates that look like imports with path aliases, e.g.: `@/components/button` - if candidate.len() > 1 && candidate[1] == b'/' { - return ValidationResult::Invalid; - } - - // Reject candidates that look like paths, e.g.: `app/assets/stylesheets` - if !candidate.contains(&b':') && !candidate.contains(&b'[') { - let mut count = 0; - for c in candidate { - if c == &b'/' { - count += 1; - } - if count > 1 { - return ValidationResult::Invalid; - } - } - } - - let split_candidate = Extractor::split_candidate(candidate); - - let mut offset = 0; - let mut offset_end = 0; - let utility = &split_candidate.utility; - let original_utility = &utility; - - // Some special cases that we can ignore while validating - if utility.starts_with(b"!-") { - offset += 2; - } else if utility.starts_with(b"!") || utility.starts_with(b"-") { - offset += 1; - } else if utility.ends_with(b"!") { - offset_end += 1; - } - - // These are allowed in arbitrary values and in variants but nowhere else - if Extractor::contains_in_constrained(utility, vec![b'<', b'>']) { - return ValidationResult::Restart; - } - - // It's an arbitrary property - if utility.starts_with(b"[") - && utility.ends_with(b"]") - && (utility.starts_with(b"['") - || utility.starts_with(b"[\"") - || utility.starts_with(b"[`")) - { - return ValidationResult::Restart; - } - - // Only allow parentheses for the shorthand arbitrary custom properties syntax - if let Some(index) = utility.find(b"(") { - let mut skip_parens_check = false; - let start_brace_index = utility.find(b"["); - let end_brace_index = utility.find(b"]"); - - if let (Some(start_brace_index), Some(end_brace_index)) = - (start_brace_index, end_brace_index) - { - if start_brace_index < index && end_brace_index > index { - skip_parens_check = true; - } - } - - if !skip_parens_check && !utility[index + 1..].starts_with(b"--") { - return ValidationResult::Restart; - } - } - - // Pluck out the part that we are interested in. - let utility = &utility[offset..(utility.len() - offset_end)]; - - // Validations - // We should have _something_ - if utility.is_empty() { - return ValidationResult::Invalid; - } - - // <sm is fine, but only as a variant - // TODO: We probably have to ensure that this `:` is not inside the arbitrary values... - if utility.starts_with(b"<") && !utility.contains(&b':') { - return ValidationResult::Invalid; - } - - // Only variants can start with a number. E.g.: `2xl` is fine, but only as a variant. - // TODO: Adjust this if we run into issues with actual utilities starting with a number? - // TODO: We probably have to ensure that this `:` is not inside the arbitrary values... - if utility[0] >= b'0' && utility[0] <= b'9' && !utility.contains(&b':') { - return ValidationResult::Invalid; - } - - // In case of an arbitrary property, we should have at least this structure: [a:b] - if utility.starts_with(b"[") && utility.ends_with(b"]") { - // [a:b] is at least 5 characters long - if utility.len() < 5 { - return ValidationResult::Invalid; - } - - // Now that we validated that the candidate is technically fine, let's ensure that it - // doesn't start with a `-` because that would make it invalid for arbitrary properties. - if original_utility.starts_with(b"-") || original_utility.starts_with(b"!-") { - return ValidationResult::Invalid; - } - - // Make sure an arbitrary property/value pair is valid, otherwise - // we may generate invalid CSS that will cause tools like PostCSS - // to crash when trying to parse the generated CSS. - if !Self::validate_arbitrary_property(utility) { - return ValidationResult::Invalid; - } - - // The ':` must be preceded by a-Z0-9 because it represents a property name. - // SAFETY: the Self::validate_arbitrary_property function from above validates that the - // `:` exists. - let colon = utility.find(":").unwrap(); - - if !utility - .chars() - .nth(colon - 1) - .map_or_else(|| false, |c| c.is_ascii_alphanumeric()) - { - return ValidationResult::Invalid; - } - - let property = &utility[1..colon]; - - // The property must match /^[a-zA-Z-][a-zA-Z0-9-_]+$/ - if !property[0].is_ascii_alphabetic() && property[0] != b'-' { - return ValidationResult::Invalid; - } - - if !property - .iter() - .all(|c| c.is_ascii_alphanumeric() || c == &b'-' || c == &b'_') - { - return ValidationResult::Invalid; - } - } - - ValidationResult::Valid - } - - /** - * Make sure an arbitrary property/value pair is valid, otherwise - * PostCSS may crash when trying to parse the generated CSS. - * - * `input` - the full candidate string, including the brackets - */ - fn validate_arbitrary_property(candidate: &[u8]) -> bool { - if !candidate.starts_with(b"[") || !candidate.ends_with(b"]") { - return false; - } - let property = &candidate[1..candidate.len() - 1]; - let is_custom_property = property.starts_with(b"--"); - let Some(colon_pos) = property.find(b":") else { - return false; - }; - if is_custom_property { - return true; - } - - let mut stack = vec![]; - let mut iter = property[colon_pos + 1..].iter(); - while let Some(c) = iter.next() { - match c { - // The value portion cannot contain unquoted colons. - // E.g. `[foo::bar]` leads to "foo::bar; which errors because of the `:`. - b':' | b'{' | b'}' if stack.is_empty() => { - return false; - } - - b'\'' => { - if let Some(b'\'') = stack.last() { - _ = stack.pop() - } else { - stack.push(b'\'') - } - } - b'"' => { - if let Some(b'"') = stack.last() { - _ = stack.pop() - } else { - stack.push(b'"') - } - } - - // Skip escaped characters. - b'\\' => { - iter.next(); - } - - _ => {} - } - } - - true - } - - #[inline(always)] - fn parse_escaped(&mut self) -> ParseAction<'a> { - // If this character is escaped, we don't care about it. - // It gets consumed. - trace!("Escape::Consume"); - - self.in_escape = false; - - ParseAction::Consume - } - - #[inline(always)] - fn parse_arbitrary(&mut self) -> ParseAction<'a> { - // In this we could technically use memchr 6 times (then looped) to find the indexes / bounds of arbitrary values - if self.in_escape { - return self.parse_escaped(); - } - - match self.cursor.curr { - b'\\' => { - // The `\` character is used to escape characters in arbitrary content _and_ to prevent the starting of arbitrary content - trace!("Arbitrary::Escape"); - self.in_escape = true; - } - - b'(' => self.bracket_stack.push(self.cursor.curr), - b')' => match self.bracket_stack.last() { - Some(&b'(') => { - self.bracket_stack.pop(); - } - - // This is the last bracket meaning the end of arbitrary content - _ if !self.in_quotes() => { - if matches!(self.cursor.next, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') { - return ParseAction::Consume; - } - - if let Arbitrary::Parens { start_idx } = self.arbitrary { - trace!("Arbitrary::End\t"); - self.arbitrary = Arbitrary::None; - - if self.cursor.pos - start_idx == 1 { - // We have an empty arbitrary value, which is not allowed - return ParseAction::Skip; - } - - // We have a valid arbitrary value - return ParseAction::Consume; - } - - // Last parenthesis is different compared to what we expect, therefore we are - // not in a valid arbitrary value. - return ParseAction::Skip; - } - - // We're probably in quotes or nested brackets, so we keep going - _ => {} - }, - - // Make sure the brackets are balanced - b'[' => self.bracket_stack.push(self.cursor.curr), - b']' => match self.bracket_stack.last() { - // We've ended a nested bracket - Some(&b'[') => { - self.bracket_stack.pop(); - } - - // This is the last bracket meaning the end of arbitrary content - _ if !self.in_quotes() => { - if matches!(self.cursor.next, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') { - return ParseAction::Consume; - } - - if let Arbitrary::Brackets { start_idx: _ } = self.arbitrary { - trace!("Arbitrary::End\t"); - self.arbitrary = Arbitrary::None; - - // TODO: This is temporarily disabled such that the upgrade tool can work - // with legacy arbitrary values. This will be re-enabled in the future (or - // with a flag) - // if self.cursor.pos - start_idx == 1 { - // // We have an empty arbitrary value, which is not allowed - // return ParseAction::Skip; - // } - } - } - - // We're probably in quotes or nested brackets, so we keep going - _ => {} - }, - - // Arbitrary values sometimes contain quotes - // These can "escape" the arbitrary value mode - // switching of `[` and `]` characters - b'"' | b'\'' | b'`' => match self.quote_stack.last() { - Some(&last_quote) if last_quote == self.cursor.curr => { - trace!("Quote::End\t"); - self.quote_stack.pop(); - } - _ => { - trace!("Quote::Start\t"); - self.quote_stack.push(self.cursor.curr); - } - }, - - c if c.is_ascii_whitespace() && !self.opts.preserve_spaces_in_arbitrary => { - trace!("Arbitrary::SkipAndEndEarly\t"); - - if let Arbitrary::Brackets { start_idx } | Arbitrary::Parens { start_idx } = - self.arbitrary - { - // Restart the parser ahead of the arbitrary value It may pick up more - // candidates - return ParseAction::RestartAt(start_idx + 1); - } - } - - // Arbitrary values allow any character inside them - // Except spaces unless you are in loose mode - _ => { - trace!("Arbitrary::Consume\t"); - // No need to move the end index because either the arbitrary value will end properly OR we'll hit invalid characters - } - } - - ParseAction::Consume - } - - #[inline(always)] - fn parse_start(&mut self) -> ParseAction<'a> { - match self.cursor.curr { - // Enter arbitrary property mode - b'[' if self.cursor.prev != b'\\' => { - trace!("Arbitrary::Start\t"); - self.arbitrary = Arbitrary::Brackets { - start_idx: self.cursor.pos, - }; - - ParseAction::Consume - } - - // Allowed first characters. - b'@' | b'!' | b'-' | b'<' | b'>' | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'*' => { - // TODO: A bunch of characters that we currently support but maybe we only want it behind - // a flag. E.g.: `<sm` - // | '$' | '^' | '_' - - // When the new candidate is preceded by a `:`, then we want to keep parsing, but - // throw away the full candidate because it can not be a valid candidate at the end - // of the day. - if self.cursor.prev == b':' { - self.discard_next = true; - } - - trace!("Candidate::Start\t"); - - ParseAction::Consume - } - - _ => ParseAction::Skip, - } - } - - #[inline(always)] - fn parse_continue(&mut self) -> ParseAction<'a> { - match self.cursor.curr { - // Enter arbitrary value mode. E.g.: `bg-[rgba(0, 0, 0)]` - // ^ - b'[' if matches!(self.cursor.prev, b'@' | b'-' | b':' | b'/' | b'!' | b'\0') - || self.cursor.prev.is_ascii_whitespace() => - { - trace!("Arbitrary::Start\t"); - self.arbitrary = Arbitrary::Brackets { - start_idx: self.cursor.pos, - }; - } - - // Enter arbitrary value mode. E.g.: `bg-(--my-color)` - // ^ - b'(' if matches!(self.cursor.prev, b'-' | b'/') => { - trace!("Arbitrary::Start\t"); - self.arbitrary = Arbitrary::Parens { - start_idx: self.cursor.pos, - }; - } - - // Can't enter arbitrary value mode. This can't be a candidate. - b'[' | b'(' => { - trace!("Arbitrary::Skip_Start\t"); - return ParseAction::Skip; - } - - // A % can only appear at the end of the candidate itself. It can also only be after a - // digit 0-9. This covers the following cases: - // - from-15% - b'%' if self.cursor.prev.is_ascii_digit() => { - return match (self.cursor.at_end, self.cursor.next) { - // End of string == end of candidate == okay - (true, _) => ParseAction::Consume, - - // Looks like the end of a candidate == okay - (_, b'\'' | b'"' | b'`') => ParseAction::Consume, - (_, c) if c.is_ascii_whitespace() => ParseAction::Consume, - - // Otherwise, not a valid character in a candidate - _ => ParseAction::Skip, - }; - } - b'%' => return ParseAction::Skip, - - // < and > can only be part of a variant and only be the first or last character - b'<' | b'>' | b'*' => { - // Can only be the first or last character - // - // E.g.: - // - // - <sm:underline - // ^ - // - md>:underline - // ^ - if self.cursor.pos == self.idx_start || self.cursor.pos == self.idx_last { - trace!("Candidate::Consume\t"); - } - // If it is in the middle, it can only be part of a stacked variant - // - dark:<sm:underline - // ^ - // - dark:md>:underline - // ^ - else if self.cursor.prev == b':' || self.cursor.next == b':' { - trace!("Candidate::Consume\t"); - } else { - return ParseAction::Skip; - } - } - - // Allowed characters in the candidate itself - // None of these can come after a closing bracket `]` - b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_' | b'@' - if self.cursor.prev != b']' => - { - /* TODO: The `b'@'` is necessary for custom separators like _@, maybe we can handle this in a better way... */ - trace!("Candidate::Consume\t"); - } - - // A dot (.) can only appear in the candidate itself (not the arbitrary part), if the previous - // and next characters are both digits. This covers the following cases: - // - p-1.5 - b'.' if self.cursor.prev.is_ascii_digit() => match self.cursor.next { - next if next.is_ascii_digit() => { - trace!("Candidate::Consume\t"); - } - _ => return ParseAction::Skip, - }, - - // Allowed characters in the candidate itself - // These MUST NOT appear at the end of the candidate - b'/' | b':' if !self.cursor.at_end => { - trace!("Candidate::Consume\t"); - } - - // The important character `!`, is allowed at the end of the candidate - b'!' => { - trace!("Candidate::Consume\t"); - } - - _ => return ParseAction::Skip, - } - - ParseAction::Consume - } - - #[inline(always)] - fn can_be_candidate(&mut self) -> bool { - self.in_candidate - && matches!(self.arbitrary, Arbitrary::None) - && (0..=127).contains(&self.cursor.curr) - && (self.idx_start == 0 || self.input[self.idx_start - 1] <= 127) - } - - #[inline(always)] - fn handle_skip(&mut self) { - // In all other cases, we skip characters and reset everything so we can make new candidates - trace!("Characters::Skip\t"); - self.idx_start = self.cursor.pos; - self.idx_end = self.cursor.pos; - self.in_candidate = false; - self.arbitrary = Arbitrary::None; - self.in_escape = false; - } - - #[inline(always)] - fn parse_char(&mut self) -> ParseAction<'a> { - if !matches!(self.arbitrary, Arbitrary::None) { - self.parse_arbitrary() - } else if self.in_candidate { - self.parse_continue() - } else if self.parse_start() == ParseAction::Consume { - self.in_candidate = true; - self.idx_start = self.cursor.pos; - self.idx_end = self.cursor.pos; - - ParseAction::Consume - } else { - ParseAction::Skip - } - } - - #[inline(always)] - fn yield_candidate(&mut self) -> ParseAction<'a> { - if self.can_be_candidate() { - self.get_current_candidate() - } else { - ParseAction::Consume - } - } - - #[inline(always)] - fn restart(&mut self, pos: usize) { - trace!("Parser::Restart\t{}", pos); - - self.idx_start = pos; - self.idx_end = pos; - - self.arbitrary = Arbitrary::None; - self.in_candidate = false; - self.in_escape = false; - - self.discard_next = false; - - self.quote_stack.clear(); - self.bracket_stack.clear(); - self.cursor.move_to(pos); - } - - #[inline(always)] - fn without_surrounding(&self) -> Bracketing<'a> { - let range = self.idx_start..=self.idx_end; - let clipped = &self.input[range]; - - Self::slice_surrounding(clipped) - .map(Bracketing::Included) - .or_else(|| { - if self.idx_start == 0 || self.idx_end + 1 == self.idx_last { - return None; - } - - let range = self.idx_start - 1..=self.idx_end + 1; - let clipped = &self.input[range]; - Self::slice_surrounding(clipped).map(Bracketing::Wrapped) - }) - .unwrap_or(Bracketing::None) - } - - #[inline(always)] - fn is_balanced(input: &[u8]) -> bool { - let mut depth = 0isize; - - for n in input { - match n { - b'[' | b'{' | b'(' => depth += 1, - b']' | b'}' | b')' => depth -= 1, - _ => continue, - } - - if depth < 0 { - return false; - } - } - - depth == 0 - } - - #[inline(always)] - fn slice_surrounding(input: &[u8]) -> Option<&[u8]> { - let mut prev = None; - let mut input = input; - - loop { - let leading = input.first().unwrap_or(&0x00); - let trailing = input.last().unwrap_or(&0x00); - - let needed = matches!( - (leading, trailing), - (b'(', b')') - | (b'{', b'}') - | (b'[', b']') - | (b'"', b'"') - | (b'`', b'`') - | (b'\'', b'\'') - ); - - if needed { - prev = Some(input); - input = &input[1..input.len() - 1]; - continue; - } - if Self::is_balanced(input) && prev.is_some() { - return Some(input); - } - return prev; - } - } - - #[inline(always)] - fn parse_and_yield(&mut self) -> ParseAction<'a> { - trace!("Cursor {}", self.cursor); - - // Fast skipping of invalid characters - let can_skip_whitespace = false; // if self.opts.preserve_spaces_in_arbitrary { !self.in_arbitrary } else { true }; - if can_skip_whitespace { - if let Some(pos) = fast_skip(&self.cursor) { - trace!("FastSkip::Restart\t{}", pos); - return ParseAction::RestartAt(pos); - } - } - - let action = self.parse_char(); - - match action { - ParseAction::RestartAt(_) => return action, - ParseAction::Consume => { - self.idx_end = self.cursor.pos; - - // If we're still consuming characters, we keep going - // Only exception is if we've hit the end of the input - if !self.cursor.at_end { - return action; - } - } - _ => {} - } - - let action = self.yield_candidate(); - - match (&action, self.cursor.curr) { - (ParseAction::RestartAt(_), _) => action, - (_, 0x00) => ParseAction::Done, - (ParseAction::SingleCandidate(candidate), _) => self.generate_slices(candidate), - _ => ParseAction::RestartAt(self.cursor.pos + 1), - } - } - - /// Peek inside `[]`, `{}`, and `()` pairs - /// to look for an additional candidate - #[inline(always)] - fn generate_slices(&mut self, candidate: &'a [u8]) -> ParseAction<'a> { - match self.without_surrounding() { - Bracketing::None => ParseAction::SingleCandidate(candidate), - Bracketing::Included(sliceable) | Bracketing::Wrapped(sliceable) => { - if candidate == sliceable { - ParseAction::SingleCandidate(candidate) - } else { - let parts = vec![candidate, sliceable]; - let parts = parts - .into_iter() - .filter(|v| !v.is_empty()) - .collect::<Vec<_>>(); - - ParseAction::MultipleCandidates(parts) - } - } - } - } -} - -impl<'a> Iterator for Extractor<'a> { - type Item = Vec<&'a [u8]>; - - fn next(&mut self) -> Option<Self::Item> { - if self.cursor.at_end { - return None; - } - - loop { - let result = self.parse_and_yield(); - - // Cursor control - match result { - ParseAction::RestartAt(pos) => self.restart(pos), - _ => self.cursor.advance_by(1), - } - - // Candidate state control - match result { - ParseAction::SingleCandidate(_) => self.handle_skip(), - ParseAction::MultipleCandidates(_) => self.handle_skip(), - _ => {} - } - - // Iterator results - return match result { - ParseAction::SingleCandidate(candidate) => Some(vec![candidate]), - ParseAction::MultipleCandidates(candidates) => Some(candidates), - ParseAction::Done => None, - _ => continue, - }; - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - fn _please_trace() { - tracing_subscriber::fmt() - .with_max_level(tracing::Level::TRACE) - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) - .compact() - .init(); - } - - fn run(input: &str, loose: bool) -> Vec<&str> { - Extractor::unique_ord( - input.as_bytes(), - ExtractorOptions { - preserve_spaces_in_arbitrary: loose, - }, - ) - .into_iter() - .map(|s| unsafe { std::str::from_utf8_unchecked(s) }) - .collect() - } - - #[test] - fn it_can_parse_simple_candidates() { - let candidates = run("underline", false); - assert_eq!(candidates, vec!["underline"]); - } - - #[test] - fn it_can_parse_multiple_simple_utilities() { - let candidates = run("font-bold underline", false); - assert_eq!(candidates, vec!["font-bold", "underline"]); - } - - #[test] - fn it_can_parse_simple_candidates_with_variants() { - let candidates = run("hover:underline", false); - assert_eq!(candidates, vec!["hover:underline"]); - } - - #[test] - fn it_can_parse_start_variants() { - let candidates = run("*:underline", false); - assert_eq!(candidates, vec!["*:underline"]); - - let candidates = run("hover:*:underline", false); - assert_eq!(candidates, vec!["hover:*:underline"]); - } - - #[test] - fn it_can_parse_simple_candidates_with_stacked_variants() { - let candidates = run("focus:hover:underline", false); - assert_eq!(candidates, vec!["focus:hover:underline"]); - } - - #[test] - fn it_can_parse_utilities_with_arbitrary_values() { - let candidates = run("m-[2px]", false); - assert_eq!(candidates, vec!["m-[2px]"]); - } - - #[test] - fn it_can_parse_utilities_with_arbitrary_var_shorthand() { - let candidates = run("m-(--my-var)", false); - assert_eq!(candidates, vec!["m-(--my-var)"]); - } - - #[test] - fn it_can_parse_utilities_with_arbitrary_var_shorthand_as_modifier() { - let candidates = run("bg-(--my-color)/(--my-opacity)", false); - assert_eq!(candidates, vec!["bg-(--my-color)/(--my-opacity)"]); - } - - #[test] - fn it_throws_away_arbitrary_values_that_are_unbalanced() { - let candidates = run("m-[calc(100px*2]", false); - assert!(candidates.is_empty()); - } - - #[test] - fn it_can_parse_utilities_with_arbitrary_values_and_variants() { - let candidates = run("hover:m-[2px]", false); - assert_eq!(candidates, vec!["hover:m-[2px]"]); - } - - #[test] - fn it_can_parse_arbitrary_variants() { - let candidates = run("[@media(min-width:200px)]:underline", false); - assert_eq!(candidates, vec!["[@media(min-width:200px)]:underline"]); - } - - #[test] - fn it_can_parse_matched_variants() { - let candidates = run("group-[&:hover]:underline", false); - assert_eq!(candidates, vec!["group-[&:hover]:underline"]); - } - - #[test] - fn it_should_not_keep_spaces() { - let candidates = run("bg-[rgba(0, 0, 0)]", false); - - assert_eq!(candidates, vec!["rgba"]); - } - - #[test] - fn it_should_keep_spaces_in_loose_mode() { - let candidates = run("bg-[rgba(0, 0, 0)]", true); - assert_eq!(candidates, vec!["bg-[rgba(0, 0, 0)]"]); - } - - #[test] - fn it_should_keep_important_arbitrary_properties_legacy() { - let candidates = run("![foo:bar]", false); - assert_eq!(candidates, vec!["![foo:bar]"]); - } - - #[test] - fn it_should_keep_important_arbitrary_properties() { - let candidates = run("[foo:bar]!", false); - assert_eq!(candidates, vec!["[foo:bar]!"]); - } - - #[test] - fn it_should_keep_important_arbitrary_values() { - let candidates = run("w-[calc(var(--size)/2)]!", false); - assert_eq!(candidates, vec!["w-[calc(var(--size)/2)]!"]); - } - - #[test] - fn it_should_keep_important_candidates_legacy() { - let candidates = run("!w-4", false); - assert_eq!(candidates, vec!["!w-4"]); - } - - #[test] - fn it_should_keep_important_candidates() { - let candidates = run("w-4!", false); - assert_eq!(candidates, vec!["w-4!"]); - } - - #[test] - fn it_should_not_allow_for_bogus_candidates() { - let candidates = run("[0]", false); - assert!(candidates.is_empty()); - - let candidates = run("[something]", false); - assert_eq!(candidates, vec!["something"]); - - let candidates = run(" [feature(slice_as_chunks)]", false); - assert_eq!(candidates, vec!["feature", "slice_as_chunks"]); - - let candidates = run("![feature(slice_as_chunks)]", false); - assert!(candidates.is_empty()); - - let candidates = run("-[feature(slice_as_chunks)]", false); - assert!(candidates.is_empty()); - - let candidates = run("!-[feature(slice_as_chunks)]", false); - assert!(candidates.is_empty()); - - let candidates = run("-[foo:bar]", false); - assert!(candidates.is_empty()); - - let candidates = run("!-[foo:bar]", false); - assert!(candidates.is_empty()); - } - - #[test] - fn it_should_keep_candidates_with_brackets_in_arbitrary_values_inside_quotes() { - let candidates = run("content-['hello_[_]_world']", false); - assert_eq!(candidates, vec!["content-['hello_[_]_world']"]); - } - - #[test] - fn it_should_ignore_leading_spaces() { - let candidates = run(" backdrop-filter-none", false); - assert_eq!(candidates, vec!["backdrop-filter-none"]); - } - - #[test] - fn it_should_ignore_trailing_spaces() { - let candidates = run("backdrop-filter-none ", false); - assert_eq!(candidates, vec!["backdrop-filter-none"]); - } - - #[test] - fn it_should_keep_classes_before_an_ending_newline() { - let candidates = run("backdrop-filter-none\n", false); - assert_eq!(candidates, vec!["backdrop-filter-none"]); - } - - #[test] - fn it_should_parse_out_the_correct_classes_from_tailwind_tests() { - // From: tests/arbitrary-variants.test.js - let candidates = run( - r#" - <div class="dark:lg:hover:[&>*]:underline"></div> - - <div class="[&_.foo\_\_bar]:hover:underline"></div> - <div class="hover:[&_.foo\_\_bar]:underline"></div> - "#, - false, - ); - // TODO: it should not include additional (separate) classes: class, hover:, foo: bar, underline - // TODO: Double check the expectations based on above information - assert_eq!( - candidates, - vec![ - "div", - "class", - r#"dark:lg:hover:[&>*]:underline"#, - r#"[&_.foo\_\_bar]:hover:underline"#, - r#"hover:[&_.foo\_\_bar]:underline"# - ] - ); - } - - #[test] - fn potential_candidates_are_skipped_when_hitting_impossible_characters() { - let candidates = run(" <p class=\"text-sm text-blue-700\">A new software update is available. See whatโs new in version 2.0.4.</p>", false); - assert_eq!( - candidates, - vec![ - "p", - "class", - "text-sm", - "text-blue-700", - // "A", // Uppercase first letter is not allowed - "new", - "software", - "update", - "is", - "available", - // "See", // Uppercase first letter is not allowed - // "what", // what is dropped because it is followed by the fancy: โ - // "s", // s is dropped because it is preceded by the fancy: โ - // "new", // Already seen - "in", - "version", - ] - ); - } - - #[test] - fn ignores_arbitrary_property_ish_things() { - let candidates = run(" [feature(slice_as_chunks)]", false); - assert_eq!(candidates, vec!["feature", "slice_as_chunks",]); - } - - #[test] - fn foo_bar() { - // w[โฆ] is not a valid pattern for part of candidate - // but @[] is (specifically in the context of a variant) - - let candidates = run("%w[text-[#bada55]]", false); - assert_eq!(candidates, vec!["w", "text-[#bada55]"]); - } - - #[test] - fn crash_001() { - let candidates = run("Aาฟษฟ[~5", false); - assert!(candidates.is_empty()); - } - - #[test] - fn crash_002() { - let candidates = run("", false); - assert!(candidates.is_empty()); - } - - #[test] - fn bad_001() { - let candidates = run("[ๆๆ]/", false); - assert_eq!(candidates, vec!["ๆๆ"]); - } - - #[test] - fn bad_002() { - let candidates = run(r"[\]\\\:[]", false); - assert!(candidates.is_empty()); - } - - #[test] - fn bad_003() { - // TODO: This seemsโฆ wrong - let candidates = run(r"[๐คต:]", false); - assert_eq!(candidates, vec!["๐คต", "๐คต:",]); - } - - #[test] - fn classes_in_js_arrays() { - let candidates = run( - r#"let classes = ['bg-black', 'hover:px-0.5', 'text-[13px]', '[--my-var:1_/_2]', '[.foo_&]:px-[0]', '[.foo_&]:[color:red]']">"#, - false, - ); - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn classes_in_js_arrays_without_spaces() { - let candidates = run( - r#"let classes = ['bg-black','hover:px-0.5','text-[13px]','[--my-var:1_/_2]','[.foo_&]:px-[0]','[.foo_&]:[color:red]']">"#, - false, - ); - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn classes_as_object_keys() { - let candidates = run( - r#"<div :class="{ underline: active, 'px-1.5': online }"></div>"#, - false, - ); - assert_eq!( - candidates, - vec!["div", "underline", "active", "px-1.5", "online"] - ); - } - - #[test] - fn multiple_nested_candidates() { - let candidates = run(r#"{color:red}"#, false); - assert_eq!(candidates, vec!["color:red"]); - } - - #[test] - fn percent_ended_candidates() { - let candidates = run( - r#"<!-- This should work `underline from-50% flex` -->"#, - false, - ); - assert_eq!( - candidates, - vec!["should", "work", "underline", "from-50%", "flex",] - ); - } - - #[test] - fn candidate_cannot_start_with_uppercase_character() { - let candidates = run(r#"<div class="foo Bar baz"></div>"#, false); - assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); - } - - #[test] - fn candidate_cannot_end_with_a_dash() { - let candidates = run(r#"<div class="foo bar- baz"></div>"#, false); - assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); - } - - #[test] - fn candidate_cannot_end_with_an_underscore() { - let candidates = run(r#"<div class="foo bar_ baz"></div>"#, false); - assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); - } - - #[test] - fn candidate_cannot_be_a_single_camelcase_word() { - let candidates = run(r#"<div class="foo useEffect baz"></div>"#, false); - assert_eq!(candidates, vec!["div", "class", "foo", "baz"]); - } - - #[test] - fn candidate_cannot_be_svg_path_data() { - let candidates = run(r#"<path d="M25.517 0C18.712">"#, false); - assert_eq!(candidates, vec!["path", "d"]); - } - - #[test] - fn candidate_cannot_be_email_or_version_constraint() { - let candidates = run(r#"<div class="@container/dialog"> next@latest"#, false); - assert_eq!(candidates, vec!["div", "class", "@container/dialog"]); - } - - #[test] - fn candidate_cannot_be_a_url() { - let candidates = run( - r#"Our website is https://example.com or http://example.com if you want a virus"#, - false, - ); - assert_eq!( - candidates, - vec!["website", "is", "com", "or", "if", "you", "want", "a", "virus"] - ); - } - - #[test] - fn candidate_cannot_be_a_paths_with_aliases() { - let candidates = run(r#"import potato from '@/potato';"#, false); - assert_eq!(candidates, vec!["import", "potato", "from"]); - } - - #[test] - fn candidate_cannot_be_a_path() { - let candidates = run( - r#"import potato from 'some/path/to/something'; - import banana from '@/banana';"#, - false, - ); - assert_eq!(candidates, vec!["import", "potato", "from", "banana"]); - } - - #[test] - fn ruby_percent_formatted_strings() { - let candidates = run(r#"%w[hover:flex]"#, false); - assert_eq!(candidates, vec!["w", "hover:flex"]); - } - - #[test] - fn urls_in_arbitrary_values_are_ok() { - let candidates = run(r#"<div class="bg-[url('/img/hero-pattern.svg')]">"#, false); - assert_eq!( - candidates, - vec!["div", "class", "bg-[url('/img/hero-pattern.svg')]"] - ); - } - - #[test] - fn colon_in_arbitrary_property_value() { - let candidates = run("[color::] #[test::foo]", false); - assert!(candidates - .iter() - .all(|candidate| !candidate.starts_with('['))); - } - - #[test] - fn braces_in_arbitrary_property_value() { - let candidates = run("[color:${foo}] #[test:{foo}]", false); - assert!(candidates - .iter() - .all(|candidate| !candidate.starts_with('['))); - } - - #[test] - fn quoted_colon_in_arbitrary_property_value() { - let candidates = run("[content:'bar:bar'] [content:\"bar:bar\"]", false); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[content:'bar:bar']")); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[content:\"bar:bar\"]")); - } - - #[test] - fn quoted_braces_in_arbitrary_property_value() { - let candidates = run("[content:'{bar}'] [content:\"{bar}\"]", false); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[content:'{bar}']")); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[content:\"{bar}\"]")); - } - - #[test] - fn colon_in_custom_property_value() { - let candidates = run("[--foo:bar:bar]", false); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[--foo:bar:bar]")); - } - - #[test] - fn braces_in_custom_property_value() { - let candidates = run("[--foo:{bar}]", false); - assert!(candidates - .iter() - .any(|candidate| candidate == &"[--foo:{bar}]")); - } - - #[test] - fn candidate_slicing() { - let result = Extractor::slice_surrounding(&b".foo_&]:px-[0"[..]) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, None); - - let result = Extractor::slice_surrounding(&b"[.foo_&]:px-[0]"[..]) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, Some("[.foo_&]:px-[0]")); - - let result = Extractor::slice_surrounding(&b"{[.foo_&]:px-[0]}"[..]) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, Some("[.foo_&]:px-[0]")); - - let result = Extractor::slice_surrounding(&b"![foo:bar]"[..]) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, None); - - let result = Extractor::slice_surrounding(&b"[\"pt-1.5\"]"[..]) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, Some("pt-1.5")); - - let count = 1_000; - let crazy = format!("{}[.foo_&]:px-[0]{}", "[".repeat(count), "]".repeat(count)); - - let result = Extractor::slice_surrounding(crazy.as_bytes()) - .map(std::str::from_utf8) - .transpose() - .unwrap(); - assert_eq!(result, Some("[.foo_&]:px-[0]")); - } - - #[test] - fn does_not_emit_the_same_slice_multiple_times() { - let candidates: Vec<_> = - Extractor::with_positions("<div class=\"flex\"></div>".as_bytes(), Default::default()) - .into_iter() - .map(|(s, p)| unsafe { (std::str::from_utf8_unchecked(s), p) }) - .collect(); - - assert_eq!(candidates, vec![("div", 1), ("class", 5), ("flex", 12),]); - } - - #[test] - fn empty_arbitrary_values_are_allowed_for_codemods() { - let candidates = run( - r#"<div class="group-[]:flex group-[]/name:flex peer-[]:flex peer-[]/name:flex"></div>"#, - false, - ); - assert_eq!( - candidates, - vec![ - "div", - "class", - "group-[]:flex", - "group-[]/name:flex", - "peer-[]:flex", - "peer-[]/name:flex" - ] - ); - } - - #[test] - fn simple_utility_names_with_numbers_work() { - let candidates = run(r#"<div class="h2 hz"></div>"#, false); - assert_eq!(candidates, vec!["div", "class", "h2", "hz",]); - } - - #[test] - fn classes_in_an_array_without_whitespace() { - let candidates = run( - "let classes = ['bg-black','hover:px-0.5','text-[13px]','[--my-var:1_/_2]','[.foo_&]:px-[0]','[.foo_&]:[color:red]']", - false, - ); - - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn classes_in_an_array_with_spaces() { - let candidates = run( - "let classes = ['bg-black', 'hover:px-0.5', 'text-[13px]', '[--my-var:1_/_2]', '[.foo_&]:px-[0]', '[.foo_&]:[color:red]']", - false, - ); - - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn classes_in_an_array_with_tabs() { - let candidates = run( - "let classes = ['bg-black',\t'hover:px-0.5',\t'text-[13px]',\t'[--my-var:1_/_2]',\t'[.foo_&]:px-[0]',\t'[.foo_&]:[color:red]']", - false, - ); - - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn classes_in_an_array_with_newlines() { - let candidates = run( - "let classes = [\n'bg-black',\n'hover:px-0.5',\n'text-[13px]',\n'[--my-var:1_/_2]',\n'[.foo_&]:px-[0]',\n'[.foo_&]:[color:red]'\n]", - false, - ); - - assert_eq!( - candidates, - vec![ - "let", - "classes", - "bg-black", - "hover:px-0.5", - "text-[13px]", - "[--my-var:1_/_2]", - "--my-var:1_/_2", - "[.foo_&]:px-[0]", - "[.foo_&]:[color:red]", - ] - ); - } - - #[test] - fn arbitrary_properties_are_not_picked_up_after_an_escape() { - let candidates = run( - r#" - <!-- [!code word:group-has-\\[a\\]\\:block] --> - \\[a\\]\\:block] - "#, - false, - ); - - assert_eq!(candidates, vec!["!code", "a"]); - } - - #[test] - fn test_find_candidates_in_braces_inside_brackets() { - let candidates = run( - r#" - const classes = [wrapper("bg-red-500")] - "#, - false, - ); - - assert_eq!( - candidates, - vec!["const", "classes", "wrapper", "bg-red-500"] - ); - } - - #[test] - fn test_find_css_variables() { - let candidates = run("var(--color-red-500)", false); - assert_eq!(candidates, vec!["var", "--color-red-500"]); - - let candidates = run("<div style={{ 'color': 'var(--color-red-500)' }}/>", false); - assert_eq!( - candidates, - vec!["div", "style", "color", "var", "--color-red-500"] - ); - } - - #[test] - fn test_find_css_variables_with_fallback_values() { - let candidates = run("var(--color-red-500, red)", false); - assert_eq!(candidates, vec!["var", "--color-red-500", "red"]); - - let candidates = run("var(--color-red-500,red)", false); - assert_eq!(candidates, vec!["var", "--color-red-500", "red"]); - - let candidates = run( - "<div style={{ 'color': 'var(--color-red-500, red)' }}/>", - false, - ); - assert_eq!( - candidates, - vec!["div", "style", "color", "var", "--color-red-500", "red"] - ); - - let candidates = run( - "<div style={{ 'color': 'var(--color-red-500,red)' }}/>", - false, - ); - assert_eq!( - candidates, - vec!["div", "style", "color", "var", "--color-red-500", "red"] - ); - } - - #[test] - fn test_find_css_variables_with_fallback_css_variable_values() { - let candidates = run("var(--color-red-500, var(--color-blue-500))", false); - assert_eq!( - candidates, - vec!["var", "--color-red-500", "--color-blue-500"] - ); - } - - #[test] - fn test_is_valid_candidate_string() { - assert_eq!( - Extractor::is_valid_candidate_string(b"foo"), - ValidationResult::Valid - ); - assert_eq!( - Extractor::is_valid_candidate_string(b"foo-(--color-red-500)"), - ValidationResult::Valid - ); - assert_eq!( - Extractor::is_valid_candidate_string(b"bg-[url(foo)]"), - ValidationResult::Valid - ); - assert_eq!( - Extractor::is_valid_candidate_string(b"group-foo/(--bar)"), - ValidationResult::Valid - ); - - assert_eq!( - Extractor::is_valid_candidate_string(b"foo(\"bg-red-500\")"), - ValidationResult::Restart - ); - assert_eq!( - Extractor::is_valid_candidate_string(b"foo-("), - ValidationResult::Restart - ); - } -} diff --git a/crates/oxide/src/throughput.rs b/crates/oxide/src/throughput.rs new file mode 100644 index 000000000000..19597925f14f --- /dev/null +++ b/crates/oxide/src/throughput.rs @@ -0,0 +1,52 @@ +use std::fmt::Display; + +pub struct Throughput { + rate: f64, + elapsed: std::time::Duration, +} + +impl Display for Throughput { + #[inline(always)] + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{}/s over {:.2}s", + format_byte_size(self.rate), + self.elapsed.as_secs_f64() + ) + } +} + +impl Throughput { + #[inline(always)] + pub fn compute<F>(iterations: usize, memory_baseline: usize, cb: F) -> Self + where + F: Fn(), + { + let now = std::time::Instant::now(); + for _ in 0..iterations { + cb(); + } + let elapsed = now.elapsed(); + let memory_size = iterations * memory_baseline; + + Self { + rate: memory_size as f64 / elapsed.as_secs_f64(), + elapsed, + } + } +} + +#[inline(always)] +fn format_byte_size(size: f64) -> String { + let units = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]; + let unit = 1000; + let mut size = size; + let mut i = 0; + while size > unit as f64 { + size /= unit as f64; + i += 1; + } + + format!("{:.2} {}", size, units[i]) +} diff --git a/crates/oxide/tests/scanner.rs b/crates/oxide/tests/scanner.rs index 6f30ee5222ec..c5556aaa262e 100644 --- a/crates/oxide/tests/scanner.rs +++ b/crates/oxide/tests/scanner.rs @@ -340,8 +340,8 @@ mod scanner { candidates, vec![ "bool", + "class", "condition", - "div", "font-bold", "md:flex", "px-4", diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 7f466bd2dfc5..aaba1588e6cd 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] -channel = "1.80.1" +channel = "1.85.0" profile = "default"