Skip to content

Commit

Permalink
refactor(regular_expression)!: Simplify public APIs (#6262)
Browse files Browse the repository at this point in the history
This PR makes 2 changes to improve the existing API that are not very useful.

- Remove `(Literal)Parser` and `FlagsParser` and their ASTs
- Add `with_flags(flags_text)` helper to `ParserOptions`

Here are the details.

> Remove `(Literal)Parser` and `FlagsParser` and their ASTs

Previously, the `oxc_regular_expression` crate exposed 3 parsers.

- `(Literal)Parser`: assumes `/pattern/flags` format
- `PatternParser`: assumes `pattern` part only
- `FlagsParser`: assumes `flags` part only

However, it turns out that in actual usecases, only the `PatternParser` is actually sufficient, as the pattern and flags are validated and sliced in advance on the `oxc_parser` side.

The current usecase for `(Literal)Parser` is mostly for internal testing.

There were also some misuses of `(Literal)Parser` that restore `format!("/{pattern}/{flags}")` back and use `(Literal)Parser`.

Therefore, only `PatternParser` is now published, and unnecessary ASTs have been removed.
(This also obsoletes #5592 .)

> Added `with_flags(flags_text)` helper to `ParserOptions`

Strictly speaking, there was a subtle difference between the "flag" strings that users were aware of and the "mode" recognised by the parser.

Therefore, it was a common mistake to forget to enable `unicode_mode` when using the `v` flag.

With this helper, crate users no longer need to distinguish between flags and modes.
  • Loading branch information
leaysgur committed Oct 3, 2024
1 parent 5957214 commit 5a73a66
Show file tree
Hide file tree
Showing 29 changed files with 476 additions and 940 deletions.
36 changes: 0 additions & 36 deletions crates/oxc_ast/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1412,24 +1412,6 @@ const _: () = {
assert!(size_of::<LanguageVariant>() == 1usize);
assert!(align_of::<LanguageVariant>() == 1usize);

assert!(size_of::<RegularExpression>() == 72usize);
assert!(align_of::<RegularExpression>() == 8usize);
assert!(offset_of!(RegularExpression, span) == 0usize);
assert!(offset_of!(RegularExpression, pattern) == 8usize);
assert!(offset_of!(RegularExpression, flags) == 56usize);

assert!(size_of::<Flags>() == 16usize);
assert!(align_of::<Flags>() == 4usize);
assert!(offset_of!(Flags, span) == 0usize);
assert!(offset_of!(Flags, global) == 8usize);
assert!(offset_of!(Flags, ignore_case) == 9usize);
assert!(offset_of!(Flags, multiline) == 10usize);
assert!(offset_of!(Flags, unicode) == 11usize);
assert!(offset_of!(Flags, sticky) == 12usize);
assert!(offset_of!(Flags, dot_all) == 13usize);
assert!(offset_of!(Flags, has_indices) == 14usize);
assert!(offset_of!(Flags, unicode_sets) == 15usize);

assert!(size_of::<Pattern>() == 48usize);
assert!(align_of::<Pattern>() == 8usize);
assert!(offset_of!(Pattern, span) == 0usize);
Expand Down Expand Up @@ -2967,24 +2949,6 @@ const _: () = {
assert!(size_of::<LanguageVariant>() == 1usize);
assert!(align_of::<LanguageVariant>() == 1usize);

assert!(size_of::<RegularExpression>() == 56usize);
assert!(align_of::<RegularExpression>() == 4usize);
assert!(offset_of!(RegularExpression, span) == 0usize);
assert!(offset_of!(RegularExpression, pattern) == 8usize);
assert!(offset_of!(RegularExpression, flags) == 40usize);

assert!(size_of::<Flags>() == 16usize);
assert!(align_of::<Flags>() == 4usize);
assert!(offset_of!(Flags, span) == 0usize);
assert!(offset_of!(Flags, global) == 8usize);
assert!(offset_of!(Flags, ignore_case) == 9usize);
assert!(offset_of!(Flags, multiline) == 10usize);
assert!(offset_of!(Flags, unicode) == 11usize);
assert!(offset_of!(Flags, sticky) == 12usize);
assert!(offset_of!(Flags, dot_all) == 13usize);
assert!(offset_of!(Flags, has_indices) == 14usize);
assert!(offset_of!(Flags, unicode_sets) == 15usize);

assert!(size_of::<Pattern>() == 32usize);
assert!(align_of::<Pattern>() == 4usize);
assert!(offset_of!(Pattern, span) == 0usize);
Expand Down
49 changes: 17 additions & 32 deletions crates/oxc_linter/src/rules/eslint/no_control_regex.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
use oxc_allocator::Allocator;
use oxc_ast::{
ast::{Argument, RegExpFlags},
AstKind,
};
use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{
Expand Down Expand Up @@ -92,25 +89,19 @@ impl Rule for NoControlRegex {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
let alloc = Allocator::default();
let pattern_with_slashes = format!("/{}/", &pattern.value);
let flags = extract_regex_flags(&expr.arguments);
let flags_text = flags.map_or(String::new(), |f| f.to_string());
let parser = Parser::new(
&alloc,
pattern_with_slashes.as_str(),
ParserOptions {
span_offset: expr
.arguments
.first()
.map_or(0, |arg| arg.span().start),
unicode_mode: flags
.is_some_and(|flags| flags.contains(RegExpFlags::U)),
unicode_sets_mode: flags
.is_some_and(|flags| flags.contains(RegExpFlags::V)),
},
pattern.value.as_str(),
ParserOptions::default()
.with_span_offset(
expr.arguments.first().map_or(0, |arg| arg.span().start),
)
.with_flags(&flags_text),
);

let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern)
else {
let Ok(pattern) = parser.parse() else {
return;
};

Expand All @@ -133,25 +124,19 @@ impl Rule for NoControlRegex {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
let alloc = Allocator::default();
let pattern_with_slashes = format!("/{}/", &pattern.value);
let flags = extract_regex_flags(&expr.arguments);
let flags_text = flags.map_or(String::new(), |f| f.to_string());
let parser = Parser::new(
&alloc,
pattern_with_slashes.as_str(),
ParserOptions {
span_offset: expr
.arguments
.first()
.map_or(0, |arg| arg.span().start),
unicode_mode: flags
.is_some_and(|flags| flags.contains(RegExpFlags::U)),
unicode_sets_mode: flags
.is_some_and(|flags| flags.contains(RegExpFlags::V)),
},
pattern.value.as_str(),
ParserOptions::default()
.with_span_offset(
expr.arguments.first().map_or(0, |arg| arg.span().start),
)
.with_flags(&flags_text),
);

let Some(pattern) = parser.parse().ok().map(|pattern| pattern.pattern)
else {
let Ok(pattern) = parser.parse() else {
return;
};

Expand Down
105 changes: 49 additions & 56 deletions crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
use oxc_allocator::Allocator;
use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::{LabeledSpan, OxcDiagnostic};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{FlagsParser, ParserOptions, PatternParser};
use oxc_regular_expression::{Parser, ParserOptions};
use oxc_span::Span;
use rustc_hash::FxHashSet;
use serde::Deserialize;

use crate::{context::LintContext, rule::Rule, AstNode};

// Use the same prefix with `oxc_regular_expression` crate
fn duplicated_flag_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_label(span)
}

fn unknown_flag_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: Unknown flag").with_label(span)
}

fn invalid_unicode_flags_diagnostic(span: Span) -> OxcDiagnostic {
OxcDiagnostic::warn("Invalid regular expression: `u` and `v` flags should be used alone")
.with_label(span)
}

#[derive(Debug, Default, Clone)]
pub struct NoInvalidRegexp(Box<NoInvalidRegexpConfig>);

Expand Down Expand Up @@ -72,75 +86,54 @@ impl Rule for NoInvalidRegexp {
return;
}

let allocator = Allocator::default();

// Validate flags first if exists
let mut parsed_flags = None;
if let Some((flags_span_start, flags_text)) = flags_arg {
// Check for duplicated flags
// For compatibility with ESLint, we need to check "user-defined duplicated" flags here
// "valid duplicated" flags are also checked
let (mut u_flag_found, mut v_flag_found) = (false, false);
let mut unique_flags = FxHashSet::default();
let mut violations = vec![];
for (idx, ch) in flags_text.char_indices() {
if !unique_flags.insert(ch) {
violations.push(idx);
}
}
if !violations.is_empty() {
return ctx.diagnostic(
// Use the same prefix with `oxc_regular_expression`
OxcDiagnostic::warn("Invalid regular expression: Duplicated flag").with_labels(
violations
.iter()
.map(|&start| {
#[allow(clippy::cast_possible_truncation)]
let start = flags_span_start + start as u32;
LabeledSpan::new_with_span(None, Span::new(start, start))
})
.collect::<Vec<_>>(),
),
);
}
#[allow(clippy::cast_possible_truncation)]
let start = flags_span_start + idx as u32;

// Omit user defined invalid flags
for flag in &self.0.allow_constructor_flags {
match flag {
// Keep valid flags, even if they are defined
'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => continue,
_ => {
unique_flags.remove(flag);
// Invalid combination: u+v
if ch == 'u' {
if v_flag_found {
return ctx
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
}
u_flag_found = true;
}
if ch == 'v' {
if u_flag_found {
return ctx
.diagnostic(invalid_unicode_flags_diagnostic(Span::new(start, start)));
}
v_flag_found = true;
}

// Duplicated: user defined, invalid or valid
if !unique_flags.insert(ch) {
return ctx.diagnostic(duplicated_flag_diagnostic(Span::new(start, start)));
}
}

// Use parser to check:
// - Unknown invalid flags
// - Invalid flags combination: u+v
// - (Valid duplicated flags are already checked above)
// It can be done without `FlagsParser`, though
let flags_text = unique_flags.iter().collect::<String>();
let options = ParserOptions::default().with_span_offset(flags_span_start);
match FlagsParser::new(&allocator, flags_text.as_str(), options).parse() {
Ok(flags) => parsed_flags = Some(flags),
Err(diagnostic) => return ctx.diagnostic(diagnostic),
// Unknown: not valid, not user defined
if !(matches!(ch, 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y')
|| self.0.allow_constructor_flags.contains(&ch))
{
return ctx.diagnostic(unknown_flag_diagnostic(Span::new(start, start)));
}
}
}

// Then, validate pattern if exists
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
let allocator = Allocator::default();
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
let mut options = ParserOptions::default().with_span_offset(pattern_span_start);
if let Some(flags) = parsed_flags {
if flags.unicode || flags.unicode_sets {
options = options.with_unicode_mode();
}
if flags.unicode_sets {
options = options.with_unicode_sets_mode();
}
}
match PatternParser::new(&allocator, pattern_text, options).parse() {
let options = ParserOptions::default()
.with_span_offset(pattern_span_start)
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));

match Parser::new(&allocator, pattern_text, options).parse() {
Ok(_) => {}
Err(diagnostic) => ctx.diagnostic(diagnostic),
}
Expand Down
12 changes: 7 additions & 5 deletions crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,14 @@ impl NoRegexSpaces {
}

let alloc = Allocator::default();
let pattern_with_slashes = format!("/{}/", &pattern.value);
let parser = Parser::new(&alloc, pattern_with_slashes.as_str(), ParserOptions::default());
let regex = parser.parse().ok()?;
let parser = Parser::new(
&alloc,
pattern.value.as_str(),
ParserOptions::default().with_span_offset(pattern.span.start + 1),
);
let parsed_pattern = parser.parse().ok()?;

find_consecutive_spaces(&regex.pattern)
.map(|span| Span::new(span.start + pattern.span.start, span.end + pattern.span.start))
find_consecutive_spaces(&parsed_pattern)
}

fn is_regexp_new_expression(expr: &NewExpression<'_>) -> bool {
Expand Down
26 changes: 13 additions & 13 deletions crates/oxc_linter/src/snapshots/no_invalid_regexp.snap
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ source: crates/oxc_linter/src/tester.rs
╰────

eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('.', 'aA');
· ▲
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Duplicated flag
Expand All @@ -91,10 +91,10 @@ source: crates/oxc_linter/src/tester.rs
· ▲
╰────

eslint(no-invalid-regexp): Invalid regular expression: Duplicated flag
╭─[no_invalid_regexp.tsx:1:20]
eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
╭─[no_invalid_regexp.tsx:1:18]
1 │ new RegExp('.', 'ouo');
·
· ▲
╰────

eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
Expand Down Expand Up @@ -164,9 +164,9 @@ source: crates/oxc_linter/src/tester.rs
╰────

eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
╭─[no_invalid_regexp.tsx:1:22]
╭─[no_invalid_regexp.tsx:1:23]
1new RegExp(pattern, 'az');
· ▲
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
Expand All @@ -175,16 +175,16 @@ source: crates/oxc_linter/src/tester.rs
· ───
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid flags, `u` and `v` should be used alone
╭─[no_invalid_regexp.tsx:1:18]
eslint(no-invalid-regexp): Invalid regular expression: `u` and `v` flags should be used alone
╭─[no_invalid_regexp.tsx:1:19]
1 │ new RegExp('.', 'uv');
· ──
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid flags, `u` and `v` should be used alone
╭─[no_invalid_regexp.tsx:1:22]
eslint(no-invalid-regexp): Invalid regular expression: `u` and `v` flags should be used alone
╭─[no_invalid_regexp.tsx:1:23]
1 │ new RegExp(pattern, 'uv');
· ──
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Character class atom range out of order
Expand Down
Loading

0 comments on commit 5a73a66

Please sign in to comment.