From f3ec00a5fecabe5587bdd2f7edee86a755b1f52b Mon Sep 17 00:00:00 2001 From: Charles Lew Date: Fri, 1 May 2020 12:34:04 +0800 Subject: [PATCH] Add a fast code path to optimize `confusable_idents` lint for ASCII code base. --- src/librustc_lint/non_ascii_idents.rs | 53 +++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/librustc_lint/non_ascii_idents.rs b/src/librustc_lint/non_ascii_idents.rs index 522f292dc949b..94fc1a228df03 100644 --- a/src/librustc_lint/non_ascii_idents.rs +++ b/src/librustc_lint/non_ascii_idents.rs @@ -2,6 +2,8 @@ use crate::{EarlyContext, EarlyLintPass, LintContext}; use rustc_ast::ast; use rustc_data_structures::fx::FxHashMap; use rustc_span::symbol::SymbolStr; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; declare_lint! { pub NON_ASCII_IDENTS, @@ -24,9 +26,6 @@ declare_lint! { declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]); -use std::hash::{Hash, Hasher}; -use std::ops::Deref; - enum CowBoxSymStr { Interned(SymbolStr), Owned(Box), @@ -73,6 +72,35 @@ fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr } } +fn is_in_ascii_confusable_closure(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)]; + let c = c as u64; + for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE { + if c >= range_start && c <= range_end { + return true; + } + } + false +} + +fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool { + // FIXME: move this table to `unicode_security` crate. + // data here corresponds to Unicode 13. + const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[ + 0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba, + 0x2080, + ]; + let c = c as u64; + for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST { + if c == item { + return true; + } + } + false +} + impl EarlyLintPass for NonAsciiIdents { fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) { use rustc_session::lint::Level; @@ -81,9 +109,26 @@ impl EarlyLintPass for NonAsciiIdents { } let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock(); let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len()); + let mut in_fast_path = true; for (symbol, sp) in symbols.iter() { + // fast path let symbol_str = symbol.as_str(); - symbol_strs_and_spans.push((symbol_str, *sp)); + if !symbol_str.chars().all(is_in_ascii_confusable_closure) { + // fallback to slow path. + symbol_strs_and_spans.clear(); + in_fast_path = false; + break; + } + if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) { + symbol_strs_and_spans.push((symbol_str, *sp)); + } + } + if !in_fast_path { + // slow path + for (symbol, sp) in symbols.iter() { + let symbol_str = symbol.as_str(); + symbol_strs_and_spans.push((symbol_str, *sp)); + } } drop(symbols); symbol_strs_and_spans.sort_by_key(|x| x.0.clone());