diff --git a/Cargo.lock b/Cargo.lock
index c905ade6102..f35bfa0d43f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1746,9 +1746,26 @@ dependencies = [
"rome_rowan",
]
+[[package]]
+name = "rome_css_parser"
+version = "0.0.1"
+dependencies = [
+ "insta",
+ "quickcheck",
+ "quickcheck_macros",
+ "rome_console",
+ "rome_css_syntax",
+ "rome_diagnostics",
+ "rome_js_unicode_table",
+ "rome_parser",
+ "rome_rowan",
+ "tests_macros",
+ "tracing",
+]
+
[[package]]
name = "rome_css_syntax"
-version = "0.0.0"
+version = "0.0.1"
dependencies = [
"rome_rowan",
]
diff --git a/Cargo.toml b/Cargo.toml
index 8c24f0b470b..c01d1f4f37f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,7 +31,6 @@ rome_aria_metadata = { path = "./crates/rome_aria_metadata" }
rome_cli = { path = "./crates/rome_cli" }
rome_console = { version = "0.0.1", path = "./crates/rome_console" }
rome_control_flow = { path = "./crates/rome_control_flow" }
-rome_css_syntax = { path = "./crates/rome_css_syntax" }
rome_deserialize = { version = "0.0.0", path = "./crates/rome_deserialize" }
rome_diagnostics = { version = "0.0.1", path = "./crates/rome_diagnostics" }
rome_diagnostics_categories = { version = "0.0.1", path = "./crates/rome_diagnostics_categories" }
@@ -52,6 +51,9 @@ rome_json_factory = { version = "0.0.1", path = "./crates/rome_json_fa
rome_json_formatter = { path = "./crates/rome_json_formatter" }
rome_json_parser = { path = "./crates/rome_json_parser" }
rome_json_syntax = { version = "0.0.1", path = "./crates/rome_json_syntax" }
+rome_css_factory = { path = "./crates/rome_css_factory" }
+rome_css_parser = { path = "./crates/rome_css_parser" }
+rome_css_syntax = { path = "./crates/rome_css_syntax" }
rome_lsp = { path = "./crates/rome_lsp" }
rome_markup = { version = "0.0.1", path = "./crates/rome_markup" }
rome_migrate = { path = "./crates/rome_migrate" }
@@ -63,22 +65,24 @@ rome_text_size = { version = "0.0.1", path = "./crates/rome_text_si
tests_macros = { path = "./crates/tests_macros" }
# Crates needed in the workspace
-bitflags = "2.3.1"
-bpaf = { version = "0.9.1", features = ["derive"] }
-countme = "3.0.1"
-dashmap = "5.4.0"
-indexmap = "1.9.3"
-insta = "1.29.0"
-lazy_static = "1.4.0"
-quote = { version = "1.0.28" }
-rustc-hash = "1.1.0"
-schemars = { version = "0.8.12" }
-serde = { version = "1.0.163", features = ["derive"], default-features = false }
-serde_json = "1.0.96"
-smallvec = { version = "1.10.0", features = ["union", "const_new"] }
-tracing = { version = "0.1.37", default-features = false, features = ["std"] }
+quickcheck_macros = "1.0.0"
+quickcheck = "1.0.3"
+bitflags = "2.3.1"
+bpaf = { version = "0.9.1", features = ["derive"] }
+countme = "3.0.1"
+dashmap = "5.4.0"
+indexmap = "1.9.3"
+insta = "1.29.0"
+lazy_static = "1.4.0"
+quote = { version = "1.0.28" }
+rustc-hash = "1.1.0"
+schemars = { version = "0.8.12" }
+serde = { version = "1.0.163", features = ["derive"], default-features = false }
+serde_json = "1.0.96"
+smallvec = { version = "1.10.0", features = ["union", "const_new"] }
+tracing = { version = "0.1.37", default-features = false, features = ["std"] }
# pinning to version 1.18 to avoid multiple versions of windows-sys as dependency
-tokio = { version = "~1.18.5" }
+tokio = { version = "~1.18.5" }
[profile.dev.package.rome_wasm]
diff --git a/crates/rome_css_parser/Cargo.toml b/crates/rome_css_parser/Cargo.toml
new file mode 100644
index 00000000000..f9c2b27c172
--- /dev/null
+++ b/crates/rome_css_parser/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+authors.workspace = true
+categories = ["parser-implementations", "development-tools"]
+description = "An extremely fast CSS parser"
+documentation = "https://rustdocs.rome.tools/rome_css_parser/index.html"
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+name = "rome_css_parser"
+repository.workspace = true
+version = "0.0.1"
+
+[dependencies]
+rome_console = { workspace = true }
+rome_diagnostics = { workspace = true }
+rome_js_unicode_table = { workspace = true }
+rome_css_syntax = { workspace = true }
+rome_parser = { workspace = true }
+rome_rowan = { workspace = true }
+tracing = { workspace = true }
+
+[dev-dependencies]
+insta = { workspace = true }
+quickcheck = { workspace = true }
+quickcheck_macros = { workspace = true }
+tests_macros = { workspace = true }
+
+# cargo-workspaces metadata
+[package.metadata.workspaces]
+independent = true
diff --git a/crates/rome_css_parser/LICENSE b/crates/rome_css_parser/LICENSE
new file mode 100644
index 00000000000..0c74aa6a873
--- /dev/null
+++ b/crates/rome_css_parser/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Rome Tools, Inc. and its affiliates.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/crates/rome_css_parser/README.md b/crates/rome_css_parser/README.md
new file mode 100644
index 00000000000..26047a3b77e
--- /dev/null
+++ b/crates/rome_css_parser/README.md
@@ -0,0 +1,36 @@
+
+
+
+
+
+
+[![MIT licensed][mit-badge]][mit-url]
+[![Discord chat][discord-badge]][discord-url]
+[![CI on main][ci-badge]][ci-url]
+[![npm version][npm-badge]][npm-url]
+[![VSCode version][vscode-badge]][vscode-url]
+[![cargo version][cargo-badge]][cargo-url]
+
+
+[mit-badge]: https://img.shields.io/badge/license-MIT-blue.svg?color=brightgreen
+[mit-url]: LICENSE
+[discord-badge]: https://img.shields.io/discord/678763474494423051?logo=discord&label=discord&color=brightgreen
+[discord-url]: https://discord.gg/rome
+[ci-badge]: https://github.com/rome/tools/actions/workflows/main.yml/badge.svg
+[ci-url]: https://github.com/rome/tools/actions/workflows/main.yml
+[npm-badge]: https://img.shields.io/npm/v/rome/latest?color=brightgreen
+[npm-url]: https://www.npmjs.com/package/rome/v/latest
+[vscode-badge]: https://img.shields.io/visual-studio-marketplace/v/rome.rome?color=brightgreen&label=vscode
+[vscode-url]: https://marketplace.visualstudio.com/items?itemName=rome.rome
+[cargo-badge]: https://img.shields.io/crates/v/rome_css_parser?&color=brightgreen
+[cargo-url]: https://crates.io/crates/rome_css_parser
+
+
+
+# `rome_css_parser`
+
+Rome's CSS parser implementation. Follow the [documentation](https://rustdocs.rome.tools/rome_css_parser/index.html).
+
diff --git a/crates/rome_css_parser/src/lexer/mod.rs b/crates/rome_css_parser/src/lexer/mod.rs
new file mode 100644
index 00000000000..9a67f65acc8
--- /dev/null
+++ b/crates/rome_css_parser/src/lexer/mod.rs
@@ -0,0 +1,499 @@
+//! An extremely fast, lookup table based, СSS lexer which yields SyntaxKind tokens used by the rome-css parser.
+#![allow(dead_code)]
+
+#[rustfmt::skip]
+mod tests;
+
+use rome_css_syntax::{CssSyntaxKind, CssSyntaxKind::*, TextLen, TextRange, TextSize, T};
+use rome_js_unicode_table::{lookup_byte, Dispatch::*};
+use rome_parser::diagnostic::ParseDiagnostic;
+use std::char::REPLACEMENT_CHARACTER;
+use std::iter::FusedIterator;
+
+pub struct Token {
+ kind: CssSyntaxKind,
+ range: TextRange,
+}
+
+impl Token {
+ pub fn kind(&self) -> CssSyntaxKind {
+ self.kind
+ }
+
+ pub fn range(&self) -> TextRange {
+ self.range
+ }
+}
+
+/// An extremely fast, lookup table based, lossless CSS lexer
+#[derive(Debug)]
+pub(crate) struct Lexer<'src> {
+ /// Source text
+ source: &'src str,
+
+ /// The start byte position in the source text of the next token.
+ position: usize,
+
+ diagnostics: Vec,
+}
+
+impl<'src> Lexer<'src> {
+ /// Make a new lexer from a str, this is safe because strs are valid utf8
+ pub fn from_str(source: &'src str) -> Self {
+ Self {
+ source,
+ position: 0,
+ diagnostics: vec![],
+ }
+ }
+
+ /// Returns the source code
+ pub fn source(&self) -> &'src str {
+ self.source
+ }
+
+ pub fn finish(self) -> Vec {
+ self.diagnostics
+ }
+
+ /// Lexes the next token.
+ ///
+ /// ## Return
+ /// Returns its kind.
+ pub(crate) fn next_token(&mut self) -> Option {
+ let start = self.text_position();
+
+ match self.current_byte() {
+ Some(current) => {
+ let kind = self.lex_token(current);
+
+ debug_assert!(start < self.text_position(), "Lexer did not progress");
+ Some(Token {
+ kind,
+ range: TextRange::new(start, self.text_position()),
+ })
+ }
+ None if self.position == self.source.len() => {
+ self.advance(1);
+ Some(Token {
+ kind: EOF,
+ range: TextRange::new(start, start),
+ })
+ }
+ None => None,
+ }
+ }
+
+ fn text_position(&self) -> TextSize {
+ TextSize::try_from(self.position).expect("Input to be smaller than 4 GB")
+ }
+
+ /// Bumps the current byte and creates a lexed token of the passed in kind
+ fn eat_byte(&mut self, tok: CssSyntaxKind) -> CssSyntaxKind {
+ self.advance(1);
+ tok
+ }
+
+ /// Consume just one newline/line break.
+ ///
+ /// ## Safety
+ /// Must be called at a valid UT8 char boundary
+ fn consume_newline(&mut self) -> bool {
+ self.assert_at_char_boundary();
+
+ match self.current_byte() {
+ Some(b'\n') => {
+ self.advance(1);
+ true
+ }
+ Some(b'\r') => {
+ if self.peek_byte() == Some(b'\n') {
+ self.advance(2)
+ } else {
+ self.advance(1)
+ }
+ true
+ }
+
+ _ => false,
+ }
+ }
+
+ /// Consumes all whitespace until a non-whitespace or a newline is found.
+ ///
+ /// ## Safety
+ /// Must be called at a valid UT8 char boundary
+ fn consume_whitespaces(&mut self) {
+ self.assert_at_char_boundary();
+
+ while let Some(byte) = self.current_byte() {
+ let dispatch = lookup_byte(byte);
+
+ match dispatch {
+ WHS => match byte {
+ b'\t' | b' ' => self.advance(1),
+ b'\r' | b'\n' => {
+ break;
+ }
+ _ => {
+ let start = self.text_position();
+ self.advance(1);
+
+ self.diagnostics.push(
+ ParseDiagnostic::new(
+ "The CSS standard only allows tabs, whitespace, carriage return and line feed whitespace.",
+ start..self.text_position(),
+ )
+ .hint("Use a regular whitespace character instead."),
+ )
+ }
+ },
+
+ _ => break,
+ }
+ }
+ }
+
+ /// Consume one newline or all whitespace until a non-whitespace or a newline is found.
+ ///
+ /// ## Safety
+ /// Must be called at a valid UT8 char boundary
+ fn consume_newline_or_whitespaces(&mut self) -> CssSyntaxKind {
+ if self.consume_newline() {
+ NEWLINE
+ } else {
+ self.consume_whitespaces();
+ WHITESPACE
+ }
+ }
+
+ /// Get the UTF8 char which starts at the current byte
+ ///
+ /// ## Safety
+ /// Must be called at a valid UT8 char boundary
+ fn current_char_unchecked(&self) -> char {
+ // Precautionary measure for making sure the unsafe code below does not read over memory boundary
+ debug_assert!(!self.is_eof());
+ self.assert_at_char_boundary();
+
+ // Safety: We know this is safe because we require the input to the lexer to be valid utf8 and we always call this when we are at a char
+ let string = unsafe {
+ std::str::from_utf8_unchecked(self.source.as_bytes().get_unchecked(self.position..))
+ };
+ let chr = if let Some(chr) = string.chars().next() {
+ chr
+ } else {
+ // Safety: we always call this when we are at a valid char, so this branch is completely unreachable
+ unsafe {
+ core::hint::unreachable_unchecked();
+ }
+ };
+
+ chr
+ }
+
+ /// Gets the current byte.
+ ///
+ /// ## Returns
+ /// The current byte if the lexer isn't at the end of the file.
+ #[inline]
+ fn current_byte(&self) -> Option {
+ if self.is_eof() {
+ None
+ } else {
+ Some(self.source.as_bytes()[self.position])
+ }
+ }
+
+ /// Asserts that the lexer is at a UTF8 char boundary
+ #[inline]
+ fn assert_at_char_boundary(&self) {
+ debug_assert!(self.source.is_char_boundary(self.position));
+ }
+
+ /// Peeks at the next byte
+ #[inline]
+ fn peek_byte(&self) -> Option {
+ self.byte_at(1)
+ }
+
+ /// Returns the byte at position `self.position + offset` or `None` if it is out of bounds.
+ #[inline]
+ fn byte_at(&self, offset: usize) -> Option {
+ self.source.as_bytes().get(self.position + offset).copied()
+ }
+
+ /// Advances the current position by `n` bytes.
+ #[inline]
+ fn advance(&mut self, n: usize) {
+ self.position += n;
+ }
+
+ #[inline]
+ fn advance_byte_or_char(&mut self, chr: u8) {
+ if chr.is_ascii() {
+ self.advance(1);
+ } else {
+ self.advance_char_unchecked();
+ }
+ }
+
+ /// Advances the current position by the current char UTF8 length
+ ///
+ /// ## Safety
+ /// Must be called at a valid UT8 char boundary
+ #[inline]
+ fn advance_char_unchecked(&mut self) {
+ let c = self.current_char_unchecked();
+ self.position += c.len_utf8();
+ }
+
+ /// Returns `true` if the parser is at or passed the end of the file.
+ #[inline]
+ fn is_eof(&self) -> bool {
+ self.position >= self.source.len()
+ }
+
+ /// Lexes the next token
+ ///
+ /// Guaranteed to not be at the end of the file
+ // A lookup table of `byte -> fn(l: &mut Lexer) -> Token` is exponentially slower than this approach
+ fn lex_token(&mut self, current: u8) -> CssSyntaxKind {
+ // The speed difference comes from the difference in table size, a 2kb table is easily fit into cpu cache
+ // While a 16kb table will be ejected from cache very often leading to slowdowns, this also allows LLVM
+ // to do more aggressive optimizations on the match regarding how to map it to instructions
+ let dispatched = lookup_byte(current);
+
+ match dispatched {
+ WHS => self.consume_newline_or_whitespaces(),
+ QOT => self.lex_string_literal(current),
+ SLH => self.lex_slash(),
+
+ PRD => self.eat_byte(T![.]),
+ MUL => self.eat_byte(T![*]),
+ COL => self.eat_byte(T![:]),
+ AT_ => self.eat_byte(T![@]),
+ HAS => self.eat_byte(T![#]),
+ PNO => self.eat_byte(T!['(']),
+ PNC => self.eat_byte(T![')']),
+ BEO => self.eat_byte(T!['{']),
+ BEC => self.eat_byte(T!['}']),
+ BTO => self.eat_byte(T!('[')),
+ BTC => self.eat_byte(T![']']),
+
+ _ => self.eat_unexpected_character(),
+ }
+ }
+
+ fn lex_string_literal(&mut self, quote: u8) -> CssSyntaxKind {
+ self.assert_at_char_boundary();
+ let start = self.text_position();
+
+ self.advance(1); // Skip over the quote
+ let mut state = LexStringState::InString;
+
+ while let Some(chr) = self.current_byte() {
+ let dispatch = lookup_byte(chr);
+
+ match dispatch {
+ QOT if quote == chr => {
+ self.advance(1);
+ state = match state {
+ LexStringState::InString => LexStringState::Terminated,
+ state => state,
+ };
+ break;
+ }
+ // '\t' etc
+ BSL => {
+ let escape_start = self.text_position();
+ self.advance(1);
+
+ match self.current_byte() {
+ Some(b'\n' | b'\r') => self.advance(1),
+
+ // Handle escaped `'` but only if this is a end quote string.
+ Some(b'\'') if quote == b'\'' => {
+ self.advance(1);
+ }
+
+ // Handle escaped `'` but only if this is a end quote string.
+ Some(b'"') if quote == b'"' => {
+ self.advance(1);
+ }
+
+ Some(c) if c.is_ascii_hexdigit() => {
+ // SAFETY: We know that the current byte is a hex digit.
+ let mut hex = (c as char).to_digit(16).unwrap();
+ self.advance(1);
+
+ // Consume as many hex digits as possible, but no more than 5.
+ // Note that this means 1-6 hex digits have been consumed in total.
+ for _ in 0..5 {
+ let Some(digit) = self.current_byte()
+ .and_then(|c| (c as char).to_digit(16)) else { break; };
+ self.advance(1);
+
+ hex = hex * 16 + digit;
+ }
+
+ // Interpret the hex digits as a hexadecimal number. If this number is zero, or
+ // is for a surrogate, or is greater than the maximum allowed code point, return
+ // U+FFFD REPLACEMENT CHARACTER (�).
+ let hex = match hex {
+ // If this number is zero
+ 0 => REPLACEMENT_CHARACTER,
+ // or is for a surrogate
+ 55296..=57343 => REPLACEMENT_CHARACTER,
+ // or is greater than the maximum allowed code point
+ 1114112.. => REPLACEMENT_CHARACTER,
+ _ => char::from_u32(hex).unwrap_or(REPLACEMENT_CHARACTER),
+ };
+
+ if hex == REPLACEMENT_CHARACTER {
+ state = LexStringState::InvalidEscapeSequence;
+
+ let diagnostic = ParseDiagnostic::new(
+ "Invalid escape sequence",
+ escape_start..self.text_position(),
+ );
+ self.diagnostics.push(diagnostic);
+ }
+ }
+
+ Some(chr) => {
+ self.advance_byte_or_char(chr);
+ }
+
+ None => {}
+ }
+ }
+ WHS if matches!(chr, b'\n' | b'\r') => {
+ let unterminated =
+ ParseDiagnostic::new("Missing closing quote", start..self.text_position())
+ .detail(self.position..self.position + 1, "line breaks here");
+
+ self.diagnostics.push(unterminated);
+
+ return ERROR_TOKEN;
+ }
+ UNI => self.advance_char_unchecked(),
+
+ _ => self.advance(1),
+ }
+ }
+
+ match state {
+ LexStringState::Terminated => CSS_STRING_LITERAL,
+ LexStringState::InString => {
+ let unterminated =
+ ParseDiagnostic::new("Missing closing quote", start..self.text_position())
+ .detail(
+ self.source.text_len()..self.source.text_len(),
+ "file ends here",
+ );
+ self.diagnostics.push(unterminated);
+
+ ERROR_TOKEN
+ }
+ LexStringState::InvalidEscapeSequence => ERROR_TOKEN,
+ }
+ }
+
+ /// Lexes a comment.
+ fn lex_slash(&mut self) -> CssSyntaxKind {
+ let start = self.text_position();
+ match self.peek_byte() {
+ Some(b'*') => {
+ // eat `/*`
+ self.advance(2);
+
+ let mut has_newline = false;
+
+ while let Some(chr) = self.current_byte() {
+ match chr {
+ b'*' if self.peek_byte() == Some(b'/') => {
+ self.advance(2);
+
+ if has_newline {
+ return MULTILINE_COMMENT;
+ } else {
+ return COMMENT;
+ }
+ }
+ b'\n' | b'\r' => {
+ has_newline = true;
+ self.advance(1)
+ }
+ chr => self.advance_byte_or_char(chr),
+ }
+ }
+
+ let err =
+ ParseDiagnostic::new("Unterminated block comment", start..self.text_position())
+ .detail(
+ self.position..self.position + 1,
+ "... but the file ends here",
+ );
+
+ self.diagnostics.push(err);
+
+ if has_newline {
+ MULTILINE_COMMENT
+ } else {
+ COMMENT
+ }
+ }
+ Some(b'/') => {
+ self.advance(2);
+
+ while let Some(chr) = self.current_byte() {
+ match chr {
+ b'\n' | b'\r' => return COMMENT,
+ chr => self.advance_byte_or_char(chr),
+ }
+ }
+
+ COMMENT
+ }
+ _ => self.eat_unexpected_character(),
+ }
+ }
+
+ #[inline]
+ fn eat_unexpected_character(&mut self) -> CssSyntaxKind {
+ self.assert_at_char_boundary();
+
+ let char = self.current_char_unchecked();
+ let err = ParseDiagnostic::new(
+ format!("unexpected character `{}`", char),
+ self.text_position()..self.text_position() + char.text_len(),
+ );
+ self.diagnostics.push(err);
+ self.advance(char.len_utf8());
+
+ ERROR_TOKEN
+ }
+}
+
+impl Iterator for Lexer<'_> {
+ type Item = Token;
+
+ fn next(&mut self) -> Option {
+ self.next_token()
+ }
+}
+
+impl FusedIterator for Lexer<'_> {}
+
+#[derive(Copy, Clone, Debug)]
+enum LexStringState {
+ /// String that contains an invalid escape sequence
+ InvalidEscapeSequence,
+
+ /// Between the opening `"` and closing `"` quotes.
+ InString,
+
+ /// Properly terminated string
+ Terminated,
+}
diff --git a/crates/rome_css_parser/src/lexer/tests.rs b/crates/rome_css_parser/src/lexer/tests.rs
new file mode 100644
index 00000000000..4bc08842f78
--- /dev/null
+++ b/crates/rome_css_parser/src/lexer/tests.rs
@@ -0,0 +1,233 @@
+#![cfg(test)]
+#![allow(unused_mut, unused_variables, unused_assignments)]
+
+use super::{Lexer, TextSize};
+use quickcheck_macros::quickcheck;
+use std::sync::mpsc::channel;
+use std::thread;
+use std::time::Duration;
+
+// Assert the result of lexing a piece of source code,
+// and make sure the tokens yielded are fully lossless and the source can be reconstructed from only the tokens
+macro_rules! assert_lex {
+ ($src:expr, $($kind:ident:$len:expr $(,)?)*) => {{
+ let mut lexer = Lexer::from_str($src);
+ let mut idx = 0;
+ let mut tok_idx = TextSize::default();
+
+ let mut new_str = String::with_capacity($src.len());
+ let tokens: Vec<_> = lexer.collect();
+
+ $(
+ assert_eq!(
+ tokens[idx].kind,
+ rome_css_syntax::CssSyntaxKind::$kind,
+ "expected token kind {}, but found {:?}",
+ stringify!($kind),
+ tokens[idx].kind,
+ );
+
+ assert_eq!(
+ tokens[idx].range.len(),
+ TextSize::from($len),
+ "expected token length of {}, but found {:?} for token {:?}",
+ $len,
+ tokens[idx].range.len(),
+ tokens[idx].kind,
+ );
+
+ new_str.push_str(&$src[tokens[idx].range]);
+ tok_idx += tokens[idx].range.len();
+
+ idx += 1;
+ )*
+
+ if idx < tokens.len() {
+ panic!(
+ "expected {} tokens but lexer returned {}, first unexpected token is '{:?}'",
+ idx,
+ tokens.len(),
+ tokens[idx].kind
+ );
+ } else {
+ assert_eq!(idx, tokens.len());
+ }
+
+ assert_eq!($src, new_str, "Failed to reconstruct input");
+ }};
+}
+
+// This is for testing if the lexer is truly lossless
+// It parses random strings and puts them back together with the produced tokens and compares
+#[quickcheck]
+fn losslessness(string: String) -> bool {
+ // using an mpsc channel allows us to spawn a thread and spawn the lexer there, then if
+ // it takes more than 2 seconds we panic because it is 100% infinite recursion
+ let cloned = string.clone();
+ let (sender, receiver) = channel();
+ thread::spawn(move || {
+ let mut lexer = Lexer::from_str(&cloned);
+ let tokens: Vec<_> = lexer.map(|token| token.range).collect();
+
+ sender
+ .send(tokens)
+ .expect("Could not send tokens to receiver");
+ });
+ let token_ranges = receiver
+ .recv_timeout(Duration::from_secs(2))
+ .unwrap_or_else(|_| {
+ panic!(
+ "Lexer is infinitely recursing with this code: ->{}<-",
+ string
+ )
+ });
+
+ let mut new_str = String::with_capacity(string.len());
+ let mut idx = TextSize::from(0);
+
+ for range in token_ranges {
+ new_str.push_str(&string[range]);
+ idx += range.len();
+ }
+
+ string == new_str
+}
+
+#[test]
+fn empty() {
+ assert_lex! {
+ "",
+ EOF:0
+ }
+}
+
+#[test]
+fn string() {
+ assert_lex! {
+ "'5098382'",
+ CSS_STRING_LITERAL:9,
+ EOF:0
+ }
+
+ // double quote
+ assert_lex! {
+ r#"'hel"lo"'"#,
+ CSS_STRING_LITERAL:9,
+ EOF:0
+ }
+
+ // escaped quote
+ assert_lex! {
+ r#"'hel\'lo\''"#,
+ CSS_STRING_LITERAL:11,
+ EOF:0
+ }
+
+ // escaped quote
+ assert_lex! {
+ r#""hel\"lo\"""#,
+ CSS_STRING_LITERAL:11,
+ EOF:0
+ }
+
+ // unicode
+ assert_lex! {
+ "'юникод'",
+ CSS_STRING_LITERAL:14,
+ EOF:0
+ }
+
+ // missing single closing quote
+ assert_lex! {
+ "'he",
+ ERROR_TOKEN:3,
+ EOF:0
+ }
+
+ // missing double closing quote
+ assert_lex! {
+ r#""he"#,
+ ERROR_TOKEN:3,
+ EOF:0
+ }
+
+ // line break
+ assert_lex! {
+ r#"'he
+ "#,
+ ERROR_TOKEN:3,
+ NEWLINE:1,
+ WHITESPACE:4,
+ EOF:0
+ }
+
+ // line break
+ assert_lex! {
+ r#"'he
+ '"#,
+ ERROR_TOKEN:3,
+ NEWLINE:1,
+ WHITESPACE:4,
+ ERROR_TOKEN:1,
+ EOF:0
+ }
+
+ assert_lex! {
+ r#""Escaped \n""#,
+ CSS_STRING_LITERAL:12,
+ EOF:0
+ }
+
+ assert_lex! {
+ r#""Escaped \r""#,
+ CSS_STRING_LITERAL:12,
+ EOF:0
+ }
+
+ // invalid escape sequence
+ assert_lex! {
+ r#"'\0'"#,
+ ERROR_TOKEN:4,
+ EOF:0
+ }
+}
+
+#[test]
+fn single_line_comments() {
+ assert_lex! {
+ "//abc
+ ",
+ COMMENT:5,
+ NEWLINE:1,
+ WHITESPACE:4,
+ EOF:0
+ }
+
+ assert_lex! {
+ "//a",
+ COMMENT:3,
+ EOF:0
+ }
+}
+
+#[test]
+fn block_comment() {
+ assert_lex! {
+ "/*
+ */",
+ MULTILINE_COMMENT:13,
+ EOF:0
+ }
+
+ assert_lex! {
+ "/* */",
+ COMMENT:5,
+ EOF:0
+ }
+
+ assert_lex! {
+ "/* *",
+ COMMENT:4,
+ EOF:0
+ }
+}
diff --git a/crates/rome_css_parser/src/lib.rs b/crates/rome_css_parser/src/lib.rs
new file mode 100644
index 00000000000..027ef38d7e4
--- /dev/null
+++ b/crates/rome_css_parser/src/lib.rs
@@ -0,0 +1,4 @@
+//! Extremely fast, lossless, and error tolerant CSS Parser.
+
+mod lexer;
+mod prelude;
diff --git a/crates/rome_css_parser/src/prelude.rs b/crates/rome_css_parser/src/prelude.rs
new file mode 100644
index 00000000000..bd22b87c894
--- /dev/null
+++ b/crates/rome_css_parser/src/prelude.rs
@@ -0,0 +1,2 @@
+pub use rome_css_syntax::T;
+pub use rome_parser::prelude::*;
diff --git a/crates/rome_css_syntax/Cargo.toml b/crates/rome_css_syntax/Cargo.toml
index 8f6d869dab7..770a2a9e4b7 100644
--- a/crates/rome_css_syntax/Cargo.toml
+++ b/crates/rome_css_syntax/Cargo.toml
@@ -1,11 +1,12 @@
[package]
authors.workspace = true
+description = "SyntaxKind and common rowan definitions for rome_css_parser"
+documentation = "https://rustdocs.rome.tools/rome_css_parser/index.html"
edition.workspace = true
license.workspace = true
name = "rome_css_syntax"
repository.workspace = true
-version = "0.0.0"
-
+version = "0.0.1"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
diff --git a/crates/rome_css_syntax/src/generated/kind.rs b/crates/rome_css_syntax/src/generated/kind.rs
index 7acafe73281..ca29377b187 100644
--- a/crates/rome_css_syntax/src/generated/kind.rs
+++ b/crates/rome_css_syntax/src/generated/kind.rs
@@ -208,6 +208,7 @@ pub enum CssSyntaxKind {
NEWLINE,
WHITESPACE,
COMMENT,
+ MULTILINE_COMMENT,
CSS_ROOT,
CSS_ID_SELECTOR_PATTERN,
CSS_RULE,
diff --git a/crates/rome_js_formatter/Cargo.toml b/crates/rome_js_formatter/Cargo.toml
index e864958cd03..daa2f040b2a 100644
--- a/crates/rome_js_formatter/Cargo.toml
+++ b/crates/rome_js_formatter/Cargo.toml
@@ -30,8 +30,8 @@ unicode-width = "0.1.9"
countme = { workspace = true, features = ["enable"] }
iai = "0.1.1"
insta = { workspace = true, features = ["glob"] }
-quickcheck = "1.0.3"
-quickcheck_macros = "1.0.0"
+quickcheck = { workspace = true }
+quickcheck_macros = { workspace = true }
rome_diagnostics = { workspace = true }
rome_formatter_test = { workspace = true }
rome_fs = { workspace = true }
diff --git a/crates/rome_js_parser/Cargo.toml b/crates/rome_js_parser/Cargo.toml
index a39bd34ea85..6fccb7dc515 100644
--- a/crates/rome_js_parser/Cargo.toml
+++ b/crates/rome_js_parser/Cargo.toml
@@ -30,8 +30,8 @@ tracing = { workspace = true }
[dev-dependencies]
expect-test = "1.2.2"
-quickcheck = "1.0.3"
-quickcheck_macros = "1.0.0"
+quickcheck = { workspace = true }
+quickcheck_macros = { workspace = true }
rome_js_parser = { workspace = true, features = ["serde"] }
tests_macros = { workspace = true }
diff --git a/crates/rome_json_parser/Cargo.toml b/crates/rome_json_parser/Cargo.toml
index 515e545f0e7..0472cf24d74 100644
--- a/crates/rome_json_parser/Cargo.toml
+++ b/crates/rome_json_parser/Cargo.toml
@@ -22,8 +22,8 @@ tracing = { workspace = true }
[dev-dependencies]
insta = { workspace = true }
-quickcheck = "1.0.3"
-quickcheck_macros = "1.0.0"
+quickcheck = { workspace = true }
+quickcheck_macros = { workspace = true }
tests_macros = { workspace = true }
# cargo-workspaces metadata
diff --git a/crates/rome_json_parser/src/lexer/mod.rs b/crates/rome_json_parser/src/lexer/mod.rs
index 25fbbaaead3..95cc27c5362 100644
--- a/crates/rome_json_parser/src/lexer/mod.rs
+++ b/crates/rome_json_parser/src/lexer/mod.rs
@@ -24,7 +24,7 @@ impl Token {
}
}
-/// An extremely fast, lookup table based, lossless ECMAScript lexer
+/// An extremely fast, lookup table based, lossless JSON lexer
#[derive(Debug)]
pub(crate) struct Lexer<'src> {
/// Source text
diff --git a/crates/rome_rowan/Cargo.toml b/crates/rome_rowan/Cargo.toml
index 9f7800b562f..819ae0f46a0 100644
--- a/crates/rome_rowan/Cargo.toml
+++ b/crates/rome_rowan/Cargo.toml
@@ -21,8 +21,8 @@ tracing = { workspace = true }
[dev-dependencies]
iai = "0.1.1"
-quickcheck = "1.0.3"
-quickcheck_macros = "1.0.0"
+quickcheck = { workspace = true }
+quickcheck_macros = { workspace = true }
serde_json = { workspace = true }
[features]
diff --git a/xtask/codegen/src/css_kinds_src.rs b/xtask/codegen/src/css_kinds_src.rs
index 7c2d3e2962d..43003f298c6 100644
--- a/xtask/codegen/src/css_kinds_src.rs
+++ b/xtask/codegen/src/css_kinds_src.rs
@@ -200,7 +200,14 @@ pub const CSS_KINDS_SRC: KindsSrc = KindsSrc {
"CSS_CUSTOM_PROPERTY",
"CSS_SPACE_LITERAL",
],
- tokens: &["ERROR_TOKEN", "IDENT", "NEWLINE", "WHITESPACE", "COMMENT"],
+ tokens: &[
+ "ERROR_TOKEN",
+ "IDENT",
+ "NEWLINE",
+ "WHITESPACE",
+ "COMMENT",
+ "MULTILINE_COMMENT",
+ ],
nodes: &[
"CSS_ROOT",
"CSS_ID_SELECTOR_PATTERN",