Skip to content

Commit

Permalink
fix: Ignore numbers as identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
Ed Page committed Nov 1, 2019
1 parent cc4b53a commit a00831c
Showing 1 changed file with 23 additions and 4 deletions.
27 changes: 23 additions & 4 deletions typos/src/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,23 +78,33 @@ impl Parser {
}

pub fn parse<'c>(&'c self, content: &'c str) -> impl Iterator<Item = Identifier<'c>> {
let ignore_hex = self.ignore_hex;
self.words_str
.find_iter(content)
.filter(move |m| !ignore_hex || !is_hex(m.as_str().as_bytes()))
.filter(move |m| self.accept(m.as_str().as_bytes()))
.map(|m| Identifier::new_unchecked(m.as_str(), m.start()))
}

pub fn parse_bytes<'c>(&'c self, content: &'c [u8]) -> impl Iterator<Item = Identifier<'c>> {
let ignore_hex = self.ignore_hex;
self.words_bytes
.find_iter(content)
.filter(move |m| !ignore_hex || !is_hex(m.as_bytes()))
.filter(move |m| self.accept(m.as_bytes()))
.filter_map(|m| {
let s = std::str::from_utf8(m.as_bytes()).ok();
s.map(|s| Identifier::new_unchecked(s, m.start()))
})
}

fn accept(&self, contents: &[u8]) -> bool {
if is_number(contents) {
return false;
};

if self.ignore_hex {
return !is_hex(contents);
}

true
}
}

impl Default for Parser {
Expand All @@ -103,6 +113,15 @@ impl Default for Parser {
}
}

fn is_number(ident: &[u8]) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref DIGITS: regex::bytes::Regex = regex::bytes::Regex::new(r#"^[0-9_']+$"#).unwrap();
}
DIGITS.is_match(ident)
}

fn is_hex(ident: &[u8]) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
Expand Down

0 comments on commit a00831c

Please sign in to comment.