Skip to content

Commit

Permalink
Support string escape sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
sharkdp committed Jul 1, 2024
1 parent 8abc7d4 commit b00472a
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 13 deletions.
93 changes: 88 additions & 5 deletions numbat/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1339,7 +1339,7 @@ impl<'a> Parser<'a> {
} else if let Some(token) = self.match_exact(TokenKind::StringFixed) {
Ok(Expression::String(
token.span,
vec![StringPart::Fixed(strip_first_and_last(&token.lexeme))],
vec![StringPart::Fixed(strip_and_escape(&token.lexeme))],
))
} else if let Some(token) = self.match_exact(TokenKind::StringInterpolationStart) {
let mut parts = Vec::new();
Expand All @@ -1358,7 +1358,7 @@ impl<'a> Parser<'a> {
self.interpolation(&mut parts, inner_token)?;
}
TokenKind::StringInterpolationEnd => {
parts.push(StringPart::Fixed(strip_first_and_last(&inner_token.lexeme)));
parts.push(StringPart::Fixed(strip_and_escape(&inner_token.lexeme)));
has_end = true;
break;
}
Expand Down Expand Up @@ -1424,7 +1424,7 @@ impl<'a> Parser<'a> {
}

fn interpolation(&mut self, parts: &mut Vec<StringPart>, token: &Token) -> Result<()> {
parts.push(StringPart::Fixed(strip_first_and_last(&token.lexeme)));
parts.push(StringPart::Fixed(strip_and_escape(&token.lexeme)));

let expr = self.expression()?;

Expand Down Expand Up @@ -1728,8 +1728,40 @@ impl<'a> Parser<'a> {
}
}

fn strip_first_and_last(s: &str) -> String {
s[1..(s.len() - 1)].to_string()
fn strip_and_escape(s: &str) -> String {
let trimmed = &s[1..(s.len() - 1)];

let mut result = String::with_capacity(trimmed.len());
let mut escaped = false;
for c in trimmed.chars() {
if escaped {
// Keep this in sync with 'escape_numbat_string',
// where the reverse replacement is needed
match c {
'n' => result.push('\n'),
'r' => result.push('\r'),
't' => result.push('\t'),
'"' => result.push('"'),
'0' => result.push('\0'),
'\\' => result.push('\\'),
'{' => result.push('{'),
'}' => result.push('}'),
_ => {
// We follow Python here, where an unknown escape sequence
// does not lead to an error, but is just passed through.
result.push('\\');
result.push(c)
}
}
escaped = false;
} else if c == '\\' {
escaped = true;
} else {
result.push(c);
}
}

result
}

/// Parse a string.
Expand Down Expand Up @@ -2751,6 +2783,57 @@ mod tests {
Expression::String(Span::dummy(), vec![StringPart::Fixed("hello world".into())]),
);

parse_as_expression(
&[r#""hello \"world\"!""#],
Expression::String(
Span::dummy(),
vec![StringPart::Fixed("hello \"world\"!".into())],
),
);

parse_as_expression(
&[r#""newline: \n, return: \r, tab: \t, quote: \", null: \0, backslash: \\, open_brace: \{, close brace: \}.""#],
Expression::String(
Span::dummy(),
vec![StringPart::Fixed("newline: \n, return: \r, tab: \t, quote: \", null: \0, backslash: \\, open_brace: {, close brace: }.".into())],
),
);

parse_as_expression(
&[r#""\"""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\"".into())]),
);

parse_as_expression(
&[r#""\\""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\\".into())]),
);

parse_as_expression(
&[r#""\\\"""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\\\"".into())]),
);

parse_as_expression(
&[r#""\"\\""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\"\\".into())]),
);

parse_as_expression(
&[r#""\\\n""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\\\n".into())]),
);

parse_as_expression(
&[r#""\n\\""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\n\\".into())]),
);

parse_as_expression(
&[r#""\\n""#],
Expression::String(Span::dummy(), vec![StringPart::Fixed("\\n".into())]),
);

parse_as_expression(
&["\"pi = {pi}\""],
Expression::String(
Expand Down
16 changes: 15 additions & 1 deletion numbat/src/pretty_print.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,22 @@ impl PrettyPrint for bool {
}
}

pub fn escape_numbat_string(s: &str) -> String {
return s
.replace("\\", "\\\\")
.replace("\n", "\\n")
.replace("\r", "\\r")
.replace("\t", "\\t")
.replace("\"", "\\\"")
.replace("\0", "\\0")
.replace("{", "\\{")
.replace("}", "\\}");
}

impl PrettyPrint for String {
fn pretty_print(&self) -> Markup {
crate::markup::operator("\"") + crate::markup::string(self) + crate::markup::operator("\"")
crate::markup::operator("\"")
+ crate::markup::string(escape_numbat_string(self))
+ crate::markup::operator("\"")
}
}
54 changes: 48 additions & 6 deletions numbat/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,26 @@ impl Tokenizer {
Ok(())
}

fn consume_string(&mut self) -> Result<()> {
let mut escaped = false;
loop {
escaped = match self.peek() {
None => {
break;
}
Some('\\') if !escaped => true,
Some('"') | Some('{') if !escaped => {
break;
}
Some(_) => false,
};

self.advance();
}

Ok(())
}

fn scan_single_token(&mut self) -> Result<Option<Token>> {
static KEYWORDS: OnceLock<HashMap<&'static str, TokenKind>> = OnceLock::new();
let keywords = KEYWORDS.get_or_init(|| {
Expand Down Expand Up @@ -545,9 +565,7 @@ impl Tokenizer {
InterpolationState::Outside => {
self.string_start = self.token_start;

while self.peek().map(|c| c != '"' && c != '{').unwrap_or(false) {
self.advance();
}
self.consume_string()?;

if self.match_char('"') {
TokenKind::StringFixed
Expand Down Expand Up @@ -606,9 +624,7 @@ impl Tokenizer {
}
}
'}' if self.interpolation_state.is_inside() => {
while self.peek().map(|c| c != '"' && c != '{').unwrap_or(false) {
self.advance();
}
self.consume_string()?;

if self.match_char('"') {
self.interpolation_state = InterpolationState::Outside;
Expand Down Expand Up @@ -1100,6 +1116,32 @@ fn test_tokenize_string() {
tokenize("\"foo = {foo, bar = {bar}\"", 0).unwrap_err().kind,
TokenizerErrorKind::UnexpectedCurlyInInterpolation
);

insta::assert_snapshot!(
tokenize_reduced_pretty(r#""start \"inner\" end""#).unwrap(),
@r###"
"\"start \\\"inner\\\" end\"", StringFixed, (1, 1)
"", Eof, (1, 22)
"###
);

insta::assert_snapshot!(
tokenize_reduced_pretty(r#""start \{inner\} end""#).unwrap(),
@r###"
"\"start \\{inner\\} end\"", StringFixed, (1, 1)
"", Eof, (1, 22)
"###
);

insta::assert_snapshot!(
tokenize_reduced_pretty(r#""start {1} \"inner\" end""#).unwrap(),
@r###"
"\"start {", StringInterpolationStart, (1, 1)
"1", Number, (1, 9)
"} \\\"inner\\\" end\"", StringInterpolationEnd, (1, 10)
"", Eof, (1, 26)
"###
);
}

#[test]
Expand Down
5 changes: 4 additions & 1 deletion numbat/src/typed_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::arithmetic::Exponent;
pub use crate::ast::{BinaryOperator, TypeExpression, UnaryOperator};
use crate::ast::{ProcedureKind, TypeAnnotation, TypeParameterBound};
use crate::dimension::DimensionRegistry;
use crate::pretty_print::escape_numbat_string;
use crate::traversal::{ForAllExpressions, ForAllTypeSchemes};
use crate::type_variable::TypeVariable;
use crate::typechecker::type_scheme::TypeScheme;
Expand Down Expand Up @@ -460,7 +461,7 @@ pub enum StringPart {
impl PrettyPrint for StringPart {
fn pretty_print(&self) -> Markup {
match self {
StringPart::Fixed(s) => m::string(s),
StringPart::Fixed(s) => m::string(escape_numbat_string(s)),
StringPart::Interpolation {
span: _,
expr,
Expand Down Expand Up @@ -1339,6 +1340,8 @@ mod tests {
roundtrip_check("(-3)!");
roundtrip_check("megapoints");
roundtrip_check("Foo { foo: 1 meter, bar: 1 second }");
roundtrip_check("\"foo\"");
roundtrip_check("\"newline: \\n\"");
}

#[test]
Expand Down

0 comments on commit b00472a

Please sign in to comment.