Skip to content

Commit

Permalink
fix another lexing bug
Browse files Browse the repository at this point in the history
  • Loading branch information
kaikalii committed Sep 18, 2024
1 parent d1440b2 commit 852983b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 24 deletions.
11 changes: 6 additions & 5 deletions src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use paste::paste;
use crate::{
ast::*,
grid_fmt::GridFmt,
lex::{is_ident_char, CodeSpan, Loc, Sp},
is_ident_start,
lex::{CodeSpan, Loc, Sp},
parse::{flip_unsplit_lines, parse, split_words, trim_spaces},
Compiler, FunctionId, Ident, InputSrc, Inputs, PreEvalMode, Primitive, RunMode, SafeSys,
Signature, Uiua, UiuaErrorKind, UiuaResult, Value, SUBSCRIPT_NUMS,
Expand Down Expand Up @@ -748,7 +749,7 @@ impl<'a> Formatter<'a> {
self.format_ref_path(&r.path);
if r.path.is_empty()
&& r.name.value.starts_with(|c: char| c.is_lowercase())
&& (self.output.chars().last()).is_some_and(|c| c.is_lowercase() && is_ident_char(c))
&& (self.output.chars().last()).is_some_and(|c| c.is_lowercase() && is_ident_start(c))
{
self.output.push(' ');
}
Expand All @@ -758,7 +759,7 @@ impl<'a> Formatter<'a> {
if let Some(first) = comps.first() {
if first.module.value.starts_with(|c: char| c.is_lowercase())
&& (self.output.chars().last())
.is_some_and(|c| c.is_lowercase() && is_ident_char(c))
.is_some_and(|c| c.is_lowercase() && is_ident_start(c))
{
self.output.push(' ');
}
Expand Down Expand Up @@ -887,7 +888,7 @@ impl<'a> Formatter<'a> {
}
Word::Ref(r) => {
if (self.output.chars().rev())
.take_while(|&c| is_ident_char(c))
.take_while(|&c| is_ident_start(c))
.any(|c| c.is_uppercase())
{
self.output.push(' ');
Expand All @@ -896,7 +897,7 @@ impl<'a> Formatter<'a> {
}
Word::IncompleteRef { path, .. } => {
if (self.output.chars().rev())
.take_while(|&c| is_ident_char(c))
.take_while(|&c| is_ident_start(c))
.any(|c| c.is_uppercase())
{
self.output.push(' ');
Expand Down
47 changes: 28 additions & 19 deletions src/lex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -888,8 +888,17 @@ impl<'a> Lexer<'a> {
"_" => {
if self.next_char_exact("_") {
let mut n = 0;
while let Some(c) = self.next_char_if_all(|c| c.is_ascii_digit()) {
n = n * 10 + c.parse::<usize>().unwrap();
loop {
if let Some(c) = self.next_char_if_all(|c| c.is_ascii_digit()) {
n = n * 10 + c.parse::<usize>().unwrap();
} else if let Some(c) =
self.next_char_if_all(|c| SUBSCRIPT_NUMS.contains(&c))
{
let c = c.chars().next().unwrap();
n = n * 10 + SUBSCRIPT_NUMS.iter().position(|&d| d == c).unwrap();
} else {
break;
}
}
self.end(Subscript(n), start)
} else {
Expand Down Expand Up @@ -1305,25 +1314,24 @@ impl<'a> Lexer<'a> {
return s;
}
if !is_custom_glyph(c) {
let mut started_subscript = false;
// Handle identifiers beginning with __
if c == "_" && self.next_char_exact("_") {
s.push('_');
while let Some(c) = self.next_char_if_all(|c| c.is_ascii_digit()) {
s.push_str(c);
}
} else {
loop {
if let Some(c) = self.next_char_if_all(is_ident_char) {
loop {
if self.next_chars_exact(["_"; 2]) {
s.push_str("__");
while let Some(c) = self.next_char_if_all(|c| c.is_ascii_digit()) {
s.push_str(c);
} else if self.next_chars_exact(["_"; 2]) {
s.push_str("__");
while let Some(c) = self.next_char_if_all(|c| c.is_ascii_digit()) {
s.push_str(c);
}
break;
} else {
break;
}
started_subscript = true;
} else if let Some(c) =
self.next_char_if_all(|c| !started_subscript && is_ident_start(c))
{
s.push_str(c);
} else if let Some(c) = self.next_char_if_all(|c| SUBSCRIPT_NUMS.contains(&c)) {
s.push_str(c);
started_subscript = true;
} else {
break;
}
}
}
Expand Down Expand Up @@ -1561,7 +1569,8 @@ pub fn is_ident_char(c: char) -> bool {
is_ident_start(c) || SUBSCRIPT_NUMS.contains(&c)
}

fn is_ident_start(c: char) -> bool {
/// Whether a character can be among the first characters of a Uiua identifier
pub fn is_ident_start(c: char) -> bool {
c.is_alphabetic() && !"ⁿₙπτηℂλ".contains(c)
}

Expand Down

0 comments on commit 852983b

Please sign in to comment.