Skip to content

Commit

Permalink
feat: handle escaped unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
togami2864 committed Nov 27, 2023
1 parent adcb3b4 commit 38493ce
Show file tree
Hide file tree
Showing 5 changed files with 434 additions and 8 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/biome_js_analyze/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ biome_rowan = { workspace = true }
bpaf.workspace = true
lazy_static = { workspace = true }
natord = "1.0.9"
regex = "1.5.5"
roaring = "0.10.1"
rustc-hash = { workspace = true }
schemars = { workspace = true, optional = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use biome_console::markup;
use biome_diagnostics::Applicability;
use biome_js_syntax::{JsRegexLiteralExpression, JsSyntaxKind, JsSyntaxToken};
use biome_rowan::{AstNode, BatchMutationExt, TextRange};
use regex::Regex;

declare_rule! {
/// Disallow characters which are made with multiple code points in character class syntax
Expand Down Expand Up @@ -88,37 +89,36 @@ impl Rule for NoMisleadingCharacterClass {
return None;
}

let l = pattern.text();

let l = replace_escaped_unicode(pattern.text());
let has_u_flag = flags.text().contains('u');
if !has_u_flag && has_surrogate_pair(l) {
if !has_u_flag && has_surrogate_pair(&l) {
return Some(RuleState {
range,
message: Message::SurrogatePairWithoutUFlag,
});
}

if has_combining_class_or_vs16(l) {
if has_combining_class_or_vs16(&l) {
return Some(RuleState {
range,
message: Message::CombiningClassOrVs16,
});
}
if has_regional_indicator_symbol(l) {
if has_regional_indicator_symbol(&l) {
return Some(RuleState {
range,
message: Message::RegionalIndicatorSymbol,
});
}

if has_emoji_modifier(l) {
if has_emoji_modifier(&l) {
return Some(RuleState {
range,
message: Message::EmojiModifier,
});
}

if zwj(l) {
if zwj(&l) {
return Some(RuleState {
range,
message: Message::JoinedCharSequence,
Expand Down Expand Up @@ -232,3 +232,14 @@ fn zwj(chars: &str) -> bool {
fn has_surrogate_pair(s: &str) -> bool {
s.chars().any(|c| c as u32 > 0xFFFF)
}

fn replace_escaped_unicode(input: &str) -> String {
let re = Regex::new(r"\\u\{([0-9a-fA-F]+)\}").unwrap();
re.replace_all(input, |caps: &regex::Captures| {
u32::from_str_radix(&caps[1], 16)
.ok()
.and_then(char::from_u32)
.map_or_else(String::new, |c| c.to_string())
})
.into_owned()
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,32 @@
/^[👶🏻]$/u;
/^[🇯🇵]$/u;
/^[👨‍👩‍👦]$/u;
/^[👍]$/;
/^[👍]$/;

/[👍]/;
/[\uD83D\uDC4D]/;
/[👍]\\a/;
/(?<=[👍])/;
/[Á]/;
/[Á]/u;
/[\u0041\u0301]/;
/[\u0041\u0301]/u;
/[\u{41}\u{301}]/u;
/[❇️]/;
/[❇️]/u;
/[\u2747\uFE0F]/;
/[\u2747\uFE0F]/u;
/[\u{2747}\u{FE0F}]/u;
/[👶🏻]/;
/[👶🏻]/u;
/[\uD83D\uDC76\uD83C\uDFFB]/u;
/[\u{1F476}\u{1F3FB}]/u;
/[🇯🇵]/;
/[🇯🇵]/i;
/[🇯🇵]/u;
/[\uD83C\uDDEF\uD83C\uDDF5]/u;
/[\u{1F1EF}\u{1F1F5}]/u;
/[👨‍👩‍👦]/;
/[👨‍👩‍👦]/u;
/[\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66]/u;
/[\u{1F468}\u{200D}\u{1F469}\u{200D}\u{1F466}]/u;
Loading

0 comments on commit 38493ce

Please sign in to comment.