Skip to content

Commit

Permalink
fix(lint): handle malformed reegxes
Browse files Browse the repository at this point in the history
  • Loading branch information
Conaclos committed Sep 29, 2023
1 parent 80fb2fc commit c506cf7
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ impl Rule for NoEmptyCharacterClassInRegex {
fn run(ctx: &RuleContext<Self>) -> Self::Signals {
let mut empty_classes = vec![];
let regex = ctx.query();
let (Ok(regex_token), Ok(regex_flags)) = (regex.value_token(), regex.flags()) else {
let Ok((pattern, flags)) = regex.decompose() else {
return empty_classes;
};
let has_v_flag = regex_flags.contains('v');
let trimmed_text = regex_token.text_trimmed();
let has_v_flag = flags.text().contains('v');
let trimmed_text = pattern.text();
let mut class_start_index = None;
let mut is_negated_class = false;
let mut enumerated_char_iter = trimmed_text.chars().enumerate();
Expand Down Expand Up @@ -114,8 +114,8 @@ impl Rule for NoEmptyCharacterClassInRegex {
RuleDiagnostic::new(
rule_category!(),
TextRange::new(
regex_token_range.start() + TextSize::from(empty_class_range.start as u32),
regex_token_range.start() + TextSize::from((empty_class_range.end + 1) as u32),
regex_token_range.start() + TextSize::from(empty_class_range.start as u32 + 1),
regex_token_range.start() + TextSize::from((empty_class_range.end + 2) as u32),
),
markup! {
"The regular expression includes this "<Emphasis>{maybe_negated}"empty character class"</Emphasis>"."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ fn add_control_character_to_vec(
/// - Unicode code point escapes range from `\u{0}` to `\u{1F}`.
/// - The Unicode flag must be set as true in order for these Unicode code point escapes to work: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode.
/// - Unescaped raw characters from U+0000 to U+001F.
fn collect_control_characters(pattern: String, flags: Option<String>) -> Option<Vec<String>> {
fn collect_control_characters(pattern: &str, flags: &str) -> Option<Vec<String>> {
let mut control_characters: Vec<String> = Vec::new();
let is_unicode_flag_set = flags.unwrap_or_default().contains('u');
let is_unicode_flag_set = flags.contains('u');
let mut iter = pattern.chars().peekable();

while let Some(c) = iter.next() {
Expand Down Expand Up @@ -190,9 +190,10 @@ fn collect_control_characters_from_expression(
.next()
.and_then(|arg| arg.ok())
.and_then(|arg| JsStringLiteralExpression::cast_ref(arg.syntax()))
.map(|js_string_literal| js_string_literal.text());
.map(|js_string_literal| js_string_literal.text())
.unwrap_or_default();

return collect_control_characters(pattern, regexp_flags);
return collect_control_characters(&pattern, &regexp_flags);
}
None
}
Expand All @@ -219,10 +220,8 @@ impl Rule for NoControlCharactersInRegex {
)
}
RegexExpressionLike::JsRegexLiteralExpression(js_regex_literal_expression) => {
collect_control_characters(
js_regex_literal_expression.pattern().ok()?,
js_regex_literal_expression.flags().ok(),
)
let (pattern, flags) = js_regex_literal_expression.decompose().ok()?;
collect_control_characters(pattern.text(), flags.text())
}
}
}
Expand All @@ -249,7 +248,7 @@ mod tests {
#[test]
fn test_collect_control_characters() {
assert_eq!(
collect_control_characters(String::from("\\x00\\x0F\\u0010\\u001F"), None),
collect_control_characters("\\x00\\x0F\\u0010\\u001F", ""),
Some(vec![
String::from("\\x00"),
String::from("\\x0F"),
Expand All @@ -258,11 +257,11 @@ mod tests {
])
);
assert_eq!(
collect_control_characters(String::from("\\u{0}\\u{1F}"), Some(String::from("u"))),
collect_control_characters("\\u{0}\\u{1F}", "u"),
Some(vec![String::from("\\u{0}"), String::from("\\u{1F}")])
);
assert_eq!(
collect_control_characters(String::from("\\x20\\u0020\\u{20}\\t\\n"), None),
collect_control_characters("\\x20\\u0020\\u{20}\\t\\n", ""),
None
);
}
Expand Down
3 changes: 2 additions & 1 deletion crates/biome_js_formatter/src/utils/assignment_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,8 @@ fn is_short_argument(
}
AnyJsExpression::AnyJsLiteralExpression(literal) => match literal {
AnyJsLiteralExpression::JsRegexLiteralExpression(regex) => {
regex.pattern()?.chars().count() <= threshold as usize
let (pattern, _) = regex.decompose()?;
pattern.text().chars().count() <= threshold as usize
}
AnyJsLiteralExpression::JsStringLiteralExpression(string) => {
string.value_token()?.text_trimmed().len() <= threshold as usize
Expand Down
64 changes: 43 additions & 21 deletions crates/biome_js_syntax/src/expr_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
use crate::{JsPreUpdateExpression, JsSyntaxKind::*};
use biome_rowan::{
declare_node_union, AstNode, AstNodeList, AstSeparatedList, NodeOrToken, SyntaxResult,
TextRange, TokenText,
TextRange, TextSize, TokenText,
};
use core::iter;

Expand Down Expand Up @@ -621,28 +621,50 @@ impl JsTemplateExpression {
}

impl JsRegexLiteralExpression {
pub fn pattern(&self) -> SyntaxResult<String> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[1..end_slash_pos]))
}

pub fn flags(&self) -> SyntaxResult<String> {
/// Decompose a regular expression into its pattern and flags.
///
/// ```
/// use biome_js_factory::make;
/// use biome_js_syntax::{JsSyntaxKind, JsSyntaxToken};
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/igu"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "igu");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
/// ```
pub fn decompose(&self) -> SyntaxResult<(TokenText, TokenText)> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[end_slash_pos..]))
let token_text = token.token_text_trimmed();
let len = TextSize::from(text_trimmed.len() as u32);
let Some(end_slash_pos) = text_trimmed[1..].rfind('/').map(|x| x + 1) else {
return Ok((
token_text
.clone()
.slice(TextRange::new(TextSize::from(1), len)),
token_text.slice(TextRange::empty(len)),
));
};
let end_slash_pos = end_slash_pos as u32;
let pattern = token_text.clone().slice(TextRange::new(
TextSize::from(1),
TextSize::from(end_slash_pos),
));
let flags = token_text.slice(TextRange::new(TextSize::from(end_slash_pos + 1), len));
Ok((pattern, flags))
}
}

Expand Down

0 comments on commit c506cf7

Please sign in to comment.