Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(lint): handle malformed regexes #452

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom

- Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos

- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash).

### Parser

- Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ impl Rule for NoEmptyCharacterClassInRegex {
fn run(ctx: &RuleContext<Self>) -> Self::Signals {
let mut empty_classes = vec![];
let regex = ctx.query();
let (Ok(regex_token), Ok(regex_flags)) = (regex.value_token(), regex.flags()) else {
let Ok((pattern, flags)) = regex.decompose() else {
return empty_classes;
};
let has_v_flag = regex_flags.contains('v');
let trimmed_text = regex_token.text_trimmed();
let has_v_flag = flags.text().contains('v');
let trimmed_text = pattern.text();
let mut class_start_index = None;
let mut is_negated_class = false;
let mut enumerated_char_iter = trimmed_text.chars().enumerate();
Expand Down Expand Up @@ -114,8 +114,8 @@ impl Rule for NoEmptyCharacterClassInRegex {
RuleDiagnostic::new(
rule_category!(),
TextRange::new(
regex_token_range.start() + TextSize::from(empty_class_range.start as u32),
regex_token_range.start() + TextSize::from((empty_class_range.end + 1) as u32),
regex_token_range.start() + TextSize::from(empty_class_range.start as u32 + 1),
regex_token_range.start() + TextSize::from((empty_class_range.end + 2) as u32),
),
markup! {
"The regular expression includes this "<Emphasis>{maybe_negated}"empty character class"</Emphasis>"."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ fn add_control_character_to_vec(
/// - Unicode code point escapes range from `\u{0}` to `\u{1F}`.
/// - The Unicode flag must be set as true in order for these Unicode code point escapes to work: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/unicode.
/// - Unescaped raw characters from U+0000 to U+001F.
fn collect_control_characters(pattern: String, flags: Option<String>) -> Option<Vec<String>> {
fn collect_control_characters(pattern: &str, flags: &str) -> Option<Vec<String>> {
let mut control_characters: Vec<String> = Vec::new();
let is_unicode_flag_set = flags.unwrap_or_default().contains('u');
let is_unicode_flag_set = flags.contains('u');
let mut iter = pattern.chars().peekable();

while let Some(c) = iter.next() {
Expand Down Expand Up @@ -190,9 +190,10 @@ fn collect_control_characters_from_expression(
.next()
.and_then(|arg| arg.ok())
.and_then(|arg| JsStringLiteralExpression::cast_ref(arg.syntax()))
.map(|js_string_literal| js_string_literal.text());
.map(|js_string_literal| js_string_literal.text())
.unwrap_or_default();

return collect_control_characters(pattern, regexp_flags);
return collect_control_characters(&pattern, &regexp_flags);
}
None
}
Expand All @@ -219,10 +220,8 @@ impl Rule for NoControlCharactersInRegex {
)
}
RegexExpressionLike::JsRegexLiteralExpression(js_regex_literal_expression) => {
collect_control_characters(
js_regex_literal_expression.pattern().ok()?,
js_regex_literal_expression.flags().ok(),
)
let (pattern, flags) = js_regex_literal_expression.decompose().ok()?;
collect_control_characters(pattern.text(), flags.text())
}
}
}
Expand All @@ -249,7 +248,7 @@ mod tests {
#[test]
fn test_collect_control_characters() {
assert_eq!(
collect_control_characters(String::from("\\x00\\x0F\\u0010\\u001F"), None),
collect_control_characters("\\x00\\x0F\\u0010\\u001F", ""),
Some(vec![
String::from("\\x00"),
String::from("\\x0F"),
Expand All @@ -258,11 +257,11 @@ mod tests {
])
);
assert_eq!(
collect_control_characters(String::from("\\u{0}\\u{1F}"), Some(String::from("u"))),
collect_control_characters("\\u{0}\\u{1F}", "u"),
Some(vec![String::from("\\u{0}"), String::from("\\u{1F}")])
);
assert_eq!(
collect_control_characters(String::from("\\x20\\u0020\\u{20}\\t\\n"), None),
collect_control_characters("\\x20\\u0020\\u{20}\\t\\n", ""),
None
);
}
Expand Down
3 changes: 2 additions & 1 deletion crates/biome_js_formatter/src/utils/assignment_like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,8 @@ fn is_short_argument(
}
AnyJsExpression::AnyJsLiteralExpression(literal) => match literal {
AnyJsLiteralExpression::JsRegexLiteralExpression(regex) => {
regex.pattern()?.chars().count() <= threshold as usize
let (pattern, _) = regex.decompose()?;
pattern.text().chars().count() <= threshold as usize
}
AnyJsLiteralExpression::JsStringLiteralExpression(string) => {
string.value_token()?.text_trimmed().len() <= threshold as usize
Expand Down
64 changes: 43 additions & 21 deletions crates/biome_js_syntax/src/expr_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
use crate::{JsPreUpdateExpression, JsSyntaxKind::*};
use biome_rowan::{
declare_node_union, AstNode, AstNodeList, AstSeparatedList, NodeOrToken, SyntaxResult,
TextRange, TokenText,
TextRange, TextSize, TokenText,
};
use core::iter;

Expand Down Expand Up @@ -621,28 +621,50 @@ impl JsTemplateExpression {
}

impl JsRegexLiteralExpression {
pub fn pattern(&self) -> SyntaxResult<String> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[1..end_slash_pos]))
}

pub fn flags(&self) -> SyntaxResult<String> {
/// Decompose a regular expression into its pattern and flags.
///
/// ```
/// use biome_js_factory::make;
/// use biome_js_syntax::{JsSyntaxKind, JsSyntaxToken};
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/igu"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "igu");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+/"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
///
/// let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &format!("/a+"), [], []);
/// let regex = make::js_regex_literal_expression(token);
/// let (pattern, flags) = regex.decompose().unwrap();
/// assert_eq!(pattern.text(), "a+");
/// assert_eq!(flags.text(), "");
/// ```
pub fn decompose(&self) -> SyntaxResult<(TokenText, TokenText)> {
let token = self.value_token()?;
let text_trimmed = token.text_trimmed();

// SAFETY: a valid regex literal must have a end slash
let end_slash_pos = text_trimmed
.rfind('/')
.expect("regex literal must have an end slash");

Ok(String::from(&text_trimmed[end_slash_pos..]))
let token_text = token.token_text_trimmed();
let len = TextSize::from(text_trimmed.len() as u32);
let Some(end_slash_pos) = text_trimmed[1..].rfind('/').map(|x| x + 1) else {
return Ok((
token_text
.clone()
.slice(TextRange::new(TextSize::from(1), len)),
token_text.slice(TextRange::empty(len)),
));
};
let end_slash_pos = end_slash_pos as u32;
let pattern = token_text.clone().slice(TextRange::new(
TextSize::from(1),
TextSize::from(end_slash_pos),
));
let flags = token_text.slice(TextRange::new(TextSize::from(end_slash_pos + 1), len));
Ok((pattern, flags))
}
}

Expand Down
2 changes: 2 additions & 0 deletions website/src/content/docs/internals/changelog.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ Read our [guidelines for writing a good changelog entry](https://github.com/biom

- Fix [#397](https://github.com/biomejs/biome/issues/397). [useNumericLiterals](https://biomejs.dev/linter/rules/use-numeric-literals) now provides correct code fixes for signed numbers. Contributed by @Conaclos

- Fix [452](https://github.com/biomejs/biome/pull/452). The linter panicked when it met a malformed regex (a regex not ending with a slash).

### Parser

- Enhance diagnostic for infer type handling in the parser. The 'infer' keyword can only be utilized within the 'extends' clause of a conditional type. Using it outside of this context will result in an error. Ensure that any type declarations using 'infer' are correctly placed within the conditional type structure to avoid parsing issues. Contributed by @denbezrukov
Expand Down
Loading