Skip to content
This repository has been archived by the owner on Aug 31, 2023. It is now read-only.

Commit

Permalink
feat(rome_json_parser): JSON Lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Micha Reiser authored and MichaReiser committed Nov 21, 2022
1 parent 861eb07 commit d4eda07
Show file tree
Hide file tree
Showing 604 changed files with 311,000 additions and 49 deletions.
20 changes: 20 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/rome_js_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rome_console = { path = "../rome_console" }
rome_diagnostics = { path = "../rome_diagnostics" }
rome_js_syntax = { path = "../rome_js_syntax" }
rome_js_factory = { path = "../rome_js_factory" }
rome_js_unicode_table = { path = "../rome_js_unicode_table" }
rome_rowan = { path = "../rome_rowan" }
drop_bomb = "0.1.5"
bitflags = "1.3.2"
Expand Down
20 changes: 4 additions & 16 deletions crates/rome_js_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,31 +16,27 @@
#![allow(clippy::or_fun_call)]

#[rustfmt::skip]
mod tables;
mod errors;
mod tests;

pub mod buffered_lexer;
mod bytes;
#[cfg(feature = "highlight")]
mod highlight;

use bitflags::bitflags;
#[cfg(feature = "highlight")]
pub use highlight::*;

use tables::derived_property::*;

pub(crate) use buffered_lexer::BufferedLexer;
pub use rome_js_syntax::*;

use self::bytes::{
lookup_byte,
Dispatch::{self, *},
};
use crate::ParseDiagnostic;
use rome_diagnostics::file::FileId;
use rome_js_syntax::JsSyntaxKind::*;
use rome_js_unicode_table::{
is_id_continue, is_id_start, lookup_byte,
Dispatch::{self, *},
};

use self::errors::invalid_digits_after_unicode_escape_sequence;

Expand All @@ -61,14 +57,6 @@ const UNICODE_SPACES: [char; 19] = [
'\u{205F}', '\u{3000}', '\u{FEFF}',
];

fn is_id_start(c: char) -> bool {
c == '_' || c == '$' || ID_Start(c)
}

fn is_id_continue(c: char) -> bool {
c == '$' || c == '\u{200d}' || c == '\u{200c}' || ID_Continue(c)
}

/// Context in which the lexer should lex the next token
#[derive(Debug, Copy, Clone, Eq, PartialEq, Default)]
pub enum LexContext {
Expand Down
42 changes: 21 additions & 21 deletions crates/rome_js_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,13 +496,13 @@ impl ParseDiagnostic {
/// ## Examples
///
/// ```
/// use rome_console::fmt::{Termcolor};
/// use rome_console::markup;
/// use rome_diagnostics::v2::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// use rome_js_parser::ParseDiagnostic;
/// use rome_js_syntax::TextRange;
/// use rome_rowan::TextSize;
/// use std::fmt::Write;
/// # use rome_console::fmt::{Termcolor};
/// # use rome_console::markup;
/// # use rome_diagnostics::v2::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// # use rome_js_parser::ParseDiagnostic;
/// # use rome_js_syntax::TextRange;
/// # use rome_rowan::TextSize;
/// # use std::fmt::Write;
///
/// let source = "const a";
/// let range = TextRange::new(TextSize::from(0), TextSize::from(5));
Expand Down Expand Up @@ -530,15 +530,15 @@ impl ParseDiagnostic {
/// let expected = r#"parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
///
/// × this is wrong!
///
///
/// > 1 │ const a
/// │ ^^^^^
///
///
/// i This is reason why it's broken
///
///
/// > 1 │ const a
/// │ ^
///
///
/// "#;
/// assert_eq!(result, expected);
pub fn detail(mut self, range: impl AsSpan, message: impl Display) -> Self {
Expand All @@ -554,13 +554,13 @@ impl ParseDiagnostic {
/// ## Examples
///
/// ```
/// use rome_console::fmt::{Termcolor};
/// use rome_console::markup;
/// use rome_diagnostics::v2::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// use rome_js_parser::ParseDiagnostic;
/// use rome_js_syntax::TextRange;
/// use rome_rowan::TextSize;
/// use std::fmt::Write;
/// # use rome_console::fmt::{Termcolor};
/// # use rome_console::markup;
/// # use rome_diagnostics::v2::{DiagnosticExt, FileId, PrintDiagnostic, console::fmt::Formatter};
/// # use rome_js_parser::ParseDiagnostic;
/// # use rome_js_syntax::TextRange;
/// # use rome_rowan::TextSize;
/// # use std::fmt::Write;
///
/// let source = "const a";
/// let range = TextRange::new(TextSize::from(0), TextSize::from(5));
Expand Down Expand Up @@ -588,12 +588,12 @@ impl ParseDiagnostic {
/// let expected = r#"parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
///
/// × this is wrong!
///
///
/// > 1 │ const a
/// │ ^^^^^
///
///
/// i You should delete the code
///
///
/// "#;
/// assert_eq!(result, expected);
/// ```
Expand Down
12 changes: 12 additions & 0 deletions crates/rome_js_unicode_table/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
edition = "2021"
name = "rome_js_unicode_table"
version = "0.0.0"
authors = ["Rome Tools"]
license = "MIT"
description = "Unicode table for JavaScript IDs"
repository = "https://github.com/rome/tools"

[dependencies]

[dev-dependencies]
Original file line number Diff line number Diff line change
@@ -1,56 +1,117 @@
use Dispatch::*;

pub(crate) fn lookup_byte(byte: u8) -> Dispatch {
// Safety: our lookup table maps all values of u8, so it's impossible for a u8 to be out of bounds
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}

// Every handler a byte coming in could be mapped to
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
/// Every handler a byte coming in could be mapped to
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[repr(u8)]
pub(crate) enum Dispatch {
pub enum Dispatch {
/// Error token
ERR,

/// Whitespace
WHS,

/// Exclamation
EXL,

/// Single `'` or Double quote `"`
QOT,

/// ASCII identifier, or `$`, `_`
IDT,

/// Hash `#`
HAS,

/// Percentage `%`
PRC,

/// Ampersand `&`
AMP,

/// Left paren `(`
PNO,

/// Right paren `)`
PNC,

/// Multiply `*`
MUL,

/// Plus `+`
PLS,

/// Comma `,`
COM,

/// Minus `-`
MIN,

/// Dot `.`
PRD,

/// Slash `/`
SLH,

/// Zero 0
ZER,

/// Digit (1-9)
DIG,

/// Colon `:`
COL,

/// Semicolon `;`
SEM,

///`Less than `<`
LSS,

/// Equal `=`
EQL,

/// More than `>`
MOR,
/// Question `?`
QST,
/// At `@`
AT_,

/// Left bracket `[`
BTO,

/// Backslash `\`
BSL,

/// Right bracket `]`
BTC,

/// `^`
CRT,

/// Tick `
TPL,

/// Left curly bracket `{`
BEO,

/// Pipe `|`
PIP,

/// Right curly bracket `}`
BEC,

/// Tilde `~`
TLD,

/// Unicode range (non ASCII)
UNI,
}

// A lookup table mapping any incoming byte to a handler function
// This is taken from the ratel project lexer and modified
// FIXME: Should we ignore the first ascii control chars which are nearly never seen instead of returning Err?
static DISPATCHER: [Dispatch; 256] = [
pub(crate) static DISPATCHER: [Dispatch; 256] = [
//0 1 2 3 4 5 6 7 8 9 A B C D E F //
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, WHS, WHS, WHS, WHS, WHS, ERR, ERR, // 0
ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, ERR, // 1
Expand Down
26 changes: 26 additions & 0 deletions crates/rome_js_unicode_table/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use crate::bytes::DISPATCHER;
use crate::tables::derived_property::{ID_Continue, ID_Start};

mod bytes;
mod tables;

pub use crate::bytes::Dispatch;

/// Tests if `c` is a valid start of an identifier
#[inline]
pub fn is_id_start(c: char) -> bool {
c == '_' || c == '$' || ID_Start(c)
}

/// Tests if `c` is a valid continuation of an identifier.
#[inline]
pub fn is_id_continue(c: char) -> bool {
c == '$' || c == '\u{200d}' || c == '\u{200c}' || ID_Continue(c)
}

/// Looks up a byte in the lookup table.
#[inline]
pub fn lookup_byte(byte: u8) -> Dispatch {
// Safety: the lookup table maps all values of u8, so it's impossible for a u8 to be out of bounds
unsafe { *DISPATCHER.get_unchecked(byte as usize) }
}
4 changes: 2 additions & 2 deletions crates/rome_json_factory/src/generated/syntax_factory.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions crates/rome_json_parser/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
edition = "2021"
name = "rome_json_parser"
version = "0.0.0"
authors = ["Rome Tools"]
license = "MIT"
description = "An extremely fast JSON parser"
repository = "https://github.com/rome/tools"

[dependencies]
rome_rowan = { path = "../rome_rowan" }
rome_console = { path = "../rome_console" }
rome_diagnostics = { path = "../rome_diagnostics" }
rome_json_syntax = { path = "../rome_json_syntax" }
rome_js_unicode_table = { path = "../rome_js_unicode_table" }

[dev-dependencies]
tests_macros = { path = "../tests_macros" }
quickcheck = "1.0.3"
quickcheck_macros = "1.0.0"
insta = { version="1.18.2" }

Loading

0 comments on commit d4eda07

Please sign in to comment.