Skip to content

Commit

Permalink
fix(regular_expression)!: Migrate to new regexp parser API (#6741)
Browse files Browse the repository at this point in the history
Follow up #6635

- [x] Remove old APIs
- [x] Update linter usage
- [x] Update parser usage
- [x] Update transformer usage
  • Loading branch information
leaysgur committed Oct 22, 2024
1 parent 54a5032 commit 8032813
Show file tree
Hide file tree
Showing 14 changed files with 187 additions and 238 deletions.
95 changes: 41 additions & 54 deletions crates/oxc_linter/src/rules/eslint/no_control_regex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{
ast::{CapturingGroup, Character, Pattern},
visit::{walk, Visit},
Parser, ParserOptions,
ConstructorParser, Options,
};
use oxc_span::{GetSpan, Span};
use oxc_span::Span;

use crate::{ast_util::extract_regex_flags, context::LintContext, rule::Rule, AstNode};
use crate::{context::LintContext, rule::Rule, AstNode};

fn no_control_regex_diagnostic(count: usize, regex: &str, span: Span) -> OxcDiagnostic {
debug_assert!(count > 0);
Expand Down Expand Up @@ -82,75 +82,63 @@ impl Rule for NoControlRegex {
}

// new RegExp()
AstKind::NewExpression(expr) => {
// constructor is RegExp,
if expr.callee.is_specific_id("RegExp")
// which is provided at least 1 parameter,
&& expr.arguments.len() > 0
{
// where the first one is a string literal
// note: improvements required for strings used via identifier
// references
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
parse_and_check_regex(
context,
&pattern.value,
&expr.arguments,
pattern.span,
);
AstKind::NewExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// note: improvements required for strings used via identifier references
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
match (&expr.arguments.first(), &expr.arguments.get(1)) {
(
Some(Argument::StringLiteral(pattern)),
Some(Argument::StringLiteral(flags)),
) => {
parse_and_check_regex(context, pattern.span, Some(flags.span));
}
(Some(Argument::StringLiteral(pattern)), _) => {
parse_and_check_regex(context, pattern.span, None);
}
_ => {}
}
}

// RegExp()
AstKind::CallExpression(expr) => {
// constructor is RegExp,
if expr.callee.is_specific_id("RegExp")
// which is provided at least 1 parameter,
&& expr.arguments.len() > 0
{
// where the first one is a string literal
// note: improvements required for strings used via identifier
// references
if let Argument::StringLiteral(pattern) = &expr.arguments[0] {
// get pattern from arguments. Missing or non-string arguments
// will be runtime errors, but are not covered by this rule.
parse_and_check_regex(
context,
&pattern.value,
&expr.arguments,
pattern.span,
);
AstKind::CallExpression(expr) if expr.callee.is_specific_id("RegExp") => {
// note: improvements required for strings used via identifier references
// Missing or non-string arguments will be runtime errors, but are not covered by this rule.
match (&expr.arguments.first(), &expr.arguments.get(1)) {
(
Some(Argument::StringLiteral(pattern)),
Some(Argument::StringLiteral(flags)),
) => {
parse_and_check_regex(context, pattern.span, Some(flags.span));
}
(Some(Argument::StringLiteral(pattern)), _) => {
parse_and_check_regex(context, pattern.span, None);
}
_ => {}
}
}

_ => {}
};
}
}

fn parse_and_check_regex<'a>(
ctx: &LintContext<'a>,
source_text: &'a str,
arguments: &oxc_allocator::Vec<'a, Argument<'a>>,
expr_span: Span,
) {
fn parse_and_check_regex(ctx: &LintContext, pattern_span: Span, flags_span: Option<Span>) {
let allocator = Allocator::default();
let flags = extract_regex_flags(arguments);
let flags_text = flags.map_or(String::new(), |f| f.to_string());
let parser = Parser::new(

let flags_text = flags_span.map(|span| span.source_text(ctx.source_text()));
let parser = ConstructorParser::new(
&allocator,
source_text,
ParserOptions::default()
.with_span_offset(arguments.first().map_or(0, |arg| arg.span().start))
.with_flags(&flags_text),
pattern_span.source_text(ctx.source_text()),
flags_text,
Options {
pattern_span_offset: pattern_span.start,
flags_span_offset: flags_span.map_or(0, |span| span.start),
},
);
let Ok(pattern) = parser.parse() else {
return;
};
check_pattern(ctx, &pattern, expr_span);
check_pattern(ctx, &pattern, pattern_span);
}

fn check_pattern(context: &LintContext, pattern: &Pattern, span: Span) {
Expand Down Expand Up @@ -279,7 +267,6 @@ mod tests {
vec![
r"let r = /\u{0}/u",
r"let r = new RegExp('\\u{0}', 'u');",
r"let r = new RegExp('\\u{0}', `u`);",
r"let r = /\u{c}/u",
r"let r = /\u{1F}/u",
r"let r = new RegExp('\\u{1F}', 'u');", // flags are known & contain u
Expand Down
58 changes: 34 additions & 24 deletions crates/oxc_linter/src/rules/eslint/no_invalid_regexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use oxc_allocator::Allocator;
use oxc_ast::{ast::Argument, AstKind};
use oxc_diagnostics::OxcDiagnostic;
use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{Parser, ParserOptions};
use oxc_regular_expression::{ConstructorParser, Options};
use oxc_span::Span;
use rustc_hash::FxHashSet;
use serde::Deserialize;
Expand Down Expand Up @@ -86,13 +86,20 @@ impl Rule for NoInvalidRegexp {
return;
}

let (mut u_flag_found, mut v_flag_found) = (false, false);
// Validate flags first if exists
if let Some((flags_span_start, flags_text)) = flags_arg {
let (mut u_flag_found, mut v_flag_found) = (false, false);
// `oxc_regular_expression` crate has a ability to validate flags.
// But, it does not accept any `allow_constructor_flags` option.
// And if we omit user defined flags here, `Span` may be incorrect on error reporting.
if let Some(flags_span) = flags_arg {
// Strip quotes
let flags_text =
flags_span.source_text(ctx.source_text()).trim_matches('\'').trim_matches('"');

let mut unique_flags = FxHashSet::default();
for (idx, ch) in flags_text.char_indices() {
#[allow(clippy::cast_possible_truncation)]
let start = flags_span_start + idx as u32;
let start = flags_span.start + 1 + idx as u32;

// Invalid combination: u+v
if ch == 'u' {
Expand Down Expand Up @@ -128,40 +135,43 @@ impl Rule for NoInvalidRegexp {
// Pattern check is skipped when 1st argument is NOT a `StringLiteral`
// e.g. `new RegExp(var)`, `RegExp("str" + var)`
let allocator = Allocator::default();
if let Some((pattern_span_start, pattern_text)) = pattern_arg {
let options = ParserOptions::default()
.with_span_offset(pattern_span_start)
.with_flags(flags_arg.map_or("", |(_, flags_text)| flags_text));
if let Some(pattern_span) = pattern_arg {
let pattern_text = pattern_span.source_text(ctx.source_text());

let flags_text = match (u_flag_found, v_flag_found) {
(true, false) => Some("'u'"),
(_, true) => Some("'v'"),
(false, false) => None,
};

match Parser::new(&allocator, pattern_text, options).parse() {
match ConstructorParser::new(
&allocator,
pattern_text,
flags_text,
Options { pattern_span_offset: pattern_span.start, flags_span_offset: 0 },
)
.parse()
{
Ok(_) => {}
Err(diagnostic) => ctx.diagnostic(diagnostic),
}
}
}
}

/// Returns: (span_start, text)
/// span_start + 1 for opening string bracket.
type ParsedArgument<'a> = (u32, &'a str);
fn parse_arguments_to_check<'a>(
arg1: Option<&Argument<'a>>,
arg2: Option<&Argument<'a>>,
) -> (Option<ParsedArgument<'a>>, Option<ParsedArgument<'a>>) {
) -> (Option<Span>, Option<Span>) {
match (arg1, arg2) {
// ("pattern", "flags")
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => (
Some((pattern.span.start + 1, pattern.value.as_str())),
Some((flags.span.start + 1, flags.value.as_str())),
),
// (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => {
(None, Some((flags.span.start + 1, flags.value.as_str())))
(Some(Argument::StringLiteral(pattern)), Some(Argument::StringLiteral(flags))) => {
(Some(pattern.span), Some(flags.span))
}
// (pattern, "flags")
(Some(_arg), Some(Argument::StringLiteral(flags))) => (None, Some(flags.span)),
// ("pattern")
(Some(Argument::StringLiteral(pattern)), None) => {
(Some((pattern.span.start + 1, pattern.value.as_str())), None)
}
(Some(Argument::StringLiteral(pattern)), None) => (Some(pattern.span), None),
// (pattern), ()
_ => (None, None),
}
Expand All @@ -172,7 +182,7 @@ fn test() {
use crate::tester::Tester;

let pass = vec![
("[RegExp(''), /a/uv]", None),
("RegExp('')", None),
("RegExp()", None),
("RegExp('.', 'g')", None),
("new RegExp('.')", None),
Expand Down
15 changes: 8 additions & 7 deletions crates/oxc_linter/src/rules/eslint/no_regex_spaces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use oxc_macros::declare_oxc_lint;
use oxc_regular_expression::{
ast::{Character, Pattern},
visit::{RegExpAstKind, Visit},
Parser, ParserOptions,
ConstructorParser, Options,
};
use oxc_span::Span;

Expand Down Expand Up @@ -63,13 +63,13 @@ impl Rule for NoRegexSpaces {
}

AstKind::CallExpression(expr) if Self::is_regexp_call_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // RegExp('a b')
}
}

AstKind::NewExpression(expr) if Self::is_regexp_new_expression(expr) => {
if let Some(span) = Self::find_expr_to_report(&expr.arguments) {
if let Some(span) = Self::find_expr_to_report(&expr.arguments, ctx) {
ctx.diagnostic(no_regex_spaces_diagnostic(span)); // new RegExp('a b')
}
}
Expand All @@ -90,7 +90,7 @@ impl NoRegexSpaces {
find_consecutive_spaces(pattern)
}

fn find_expr_to_report(args: &Vec<'_, Argument<'_>>) -> Option<Span> {
fn find_expr_to_report(args: &Vec<'_, Argument<'_>>, ctx: &LintContext) -> Option<Span> {
if let Some(expr) = args.get(1).and_then(Argument::as_expression) {
if !expr.is_string_literal() {
return None; // skip on indeterminate flag, e.g. RegExp('a b', flags)
Expand All @@ -105,10 +105,11 @@ impl NoRegexSpaces {
}

let alloc = Allocator::default();
let parser = Parser::new(
let parser = ConstructorParser::new(
&alloc,
pattern.value.as_str(),
ParserOptions::default().with_span_offset(pattern.span.start + 1),
pattern.span.source_text(ctx.source_text()),
None,
Options { pattern_span_offset: pattern.span.start, ..Options::default() },
);
let parsed_pattern = parser.parse().ok()?;

Expand Down
32 changes: 16 additions & 16 deletions crates/oxc_linter/src/snapshots/no_invalid_regexp.snap
Original file line number Diff line number Diff line change
Expand Up @@ -104,51 +104,51 @@ source: crates/oxc_linter/src/tester.rs
╰────

eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u');
· ▲
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Could not parse the entire pattern
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ new RegExp('\\a', 'u');
· ▲
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1 │ RegExp('\\u{0}*');
· ─
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1new RegExp('\\u{0}*');
· ─
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1new RegExp('\\u{0}*', '');
· ─
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:18]
╭─[no_invalid_regexp.tsx:1:19]
1new RegExp('\\u{0}*', 'a');
· ─
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid braced quantifier
╭─[no_invalid_regexp.tsx:1:14]
╭─[no_invalid_regexp.tsx:1:15]
1RegExp('\\u{0}*');
· ─
·
╰────

eslint(no-invalid-regexp): Invalid regular expression: Invalid extended atom escape
╭─[no_invalid_regexp.tsx:1:13]
1new RegExp('\\');
· ─
· ─
╰────

eslint(no-invalid-regexp): Invalid regular expression: Unknown flag
Expand Down Expand Up @@ -196,7 +196,7 @@ source: crates/oxc_linter/src/tester.rs
eslint(no-invalid-regexp): Invalid regular expression: Unterminated character class
╭─[no_invalid_regexp.tsx:1:13]
1 │ new RegExp('[[]\\u{0}*' /* valid only with `u` flag */, 'v')
· ────────
· ────────
╰────

eslint(no-invalid-regexp): Invalid regular expression: Duplicated capturing group names
Expand Down
8 changes: 4 additions & 4 deletions crates/oxc_linter/src/snapshots/no_regex_spaces.snap
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,16 @@ source: crates/oxc_linter/src/tester.rs
help: Use a quantifier: ` {2}`

eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25]
╭─[no_regex_spaces.tsx:1:26]
1var foo = new RegExp('\\d ')
· ──
· ──
╰────
help: Use a quantifier: ` {2}`

eslint(no-regex-spaces): Multiple consecutive spaces are hard to count.
╭─[no_regex_spaces.tsx:1:25]
╭─[no_regex_spaces.tsx:1:26]
1var foo = RegExp('\\u0041 ')
· ───
· ───
╰────
help: Use a quantifier: ` {3}`

Expand Down
Loading

0 comments on commit 8032813

Please sign in to comment.