Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement raw string literals #3556

Merged
merged 10 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions compiler/noirc_frontend/src/ast/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ impl ExpressionKind {
ExpressionKind::Literal(Literal::Str(contents))
}

pub fn raw_string(contents: String, hashes: u8) -> ExpressionKind {
ExpressionKind::Literal(Literal::RawStr(contents, hashes))
}

pub fn format_string(contents: String) -> ExpressionKind {
ExpressionKind::Literal(Literal::FmtStr(contents))
}
Expand Down Expand Up @@ -312,6 +316,7 @@ pub enum Literal {
Bool(bool),
Integer(FieldElement),
Str(String),
RawStr(String, u8),
FmtStr(String),
Unit,
}
Expand Down Expand Up @@ -507,6 +512,11 @@ impl Display for Literal {
Literal::Bool(boolean) => write!(f, "{}", if *boolean { "true" } else { "false" }),
Literal::Integer(integer) => write!(f, "{}", integer.to_u128()),
Literal::Str(string) => write!(f, "\"{string}\""),
Literal::RawStr(string, num_hashes) => {
let hashes: String =
std::iter::once('#').cycle().take(*num_hashes as usize).collect();
write!(f, "r{hashes}\"{string}\"{hashes}")
}
Literal::FmtStr(string) => write!(f, "f\"{string}\""),
Literal::Unit => write!(f, "()"),
}
Expand Down
1 change: 1 addition & 0 deletions compiler/noirc_frontend/src/hir/resolution/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,7 @@ impl<'a> Resolver<'a> {
}
Literal::Integer(integer) => HirLiteral::Integer(integer),
Literal::Str(str) => HirLiteral::Str(str),
Literal::RawStr(str, _) => HirLiteral::Str(str),
Literal::FmtStr(str) => self.resolve_fmt_str_literal(str, expr.span),
Literal::Unit => HirLiteral::Unit,
}),
Expand Down
73 changes: 73 additions & 0 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ impl<'a> Lexer<'a> {
Some(']') => self.single_char_token(Token::RightBracket),
Some('"') => self.eat_string_literal(),
Some('f') => self.eat_format_string_or_alpha_numeric(),
Some('r') => self.eat_raw_string_or_alpha_numeric(),
Some('#') => self.eat_attribute(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Some(ch) => {
Expand Down Expand Up @@ -400,6 +401,78 @@ impl<'a> Lexer<'a> {
}
}

fn eat_raw_string(&mut self) -> SpannedTokenResult {
let start = self.position;

let beginning_hashes = self.eat_while(None, |ch| ch == '#');
let beginning_hashes_count = beginning_hashes.chars().count();
if beginning_hashes_count > 255 {
// too many hashes (unlikely in practice)
// also, Rust disallows 256+ hashes as well
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(start + 255),
found: Some('#'),
expected: "\"".to_owned(),
});
}

if !self.peek_char_is('"') {
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(self.position),
found: self.next_char(),
expected: "\"".to_owned(),
});
}
self.next_char();

let mut str_literal = String::new();
loop {
let chars = self.eat_while(None, |ch| ch != '"');
str_literal.push_str(&chars[..]);
if !self.peek_char_is('"') {
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(self.position),
found: self.next_char(),
expected: "\"".to_owned(),
});
}
self.next_char();
let mut ending_hashes_count = 0;
while let Some('#') = self.peek_char() {
if ending_hashes_count == beginning_hashes_count {
break;
}
self.next_char();
ending_hashes_count += 1;
}
if ending_hashes_count == beginning_hashes_count {
break;
} else {
str_literal.push('"');
for _ in 0..ending_hashes_count {
str_literal.push('#');
}
}
}

let str_literal_token = Token::RawStr(str_literal, beginning_hashes_count as u8);

let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_raw_string_or_alpha_numeric(&mut self) -> SpannedTokenResult {
// Problem: we commit to eating raw strings once we see one or two characters.
// This is unclean, but likely ok in all practical cases, and works with existing
// `Lexer` methods.
let peek1 = self.peek_char().unwrap_or('X');
let peek2 = self.peek2_char().unwrap_or('X');
match (peek1, peek2) {
('#', '#') | ('#', '"') | ('"', _) => self.eat_raw_string(),
_ => self.eat_alpha_numeric('r'),
}
}

fn parse_comment(&mut self, start: u32) -> SpannedTokenResult {
let doc_style = match self.peek_char() {
Some('!') => {
Expand Down
11 changes: 10 additions & 1 deletion compiler/noirc_frontend/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub enum Token {
Int(FieldElement),
Bool(bool),
Str(String),
RawStr(String, u8),
FmtStr(String),
Keyword(Keyword),
IntType(IntType),
Expand Down Expand Up @@ -157,6 +158,10 @@ impl fmt::Display for Token {
Token::Bool(b) => write!(f, "{b}"),
Token::Str(ref b) => write!(f, "{b}"),
Token::FmtStr(ref b) => write!(f, "f{b}"),
Token::RawStr(ref b, hashes) => {
let h: String = std::iter::once('#').cycle().take(hashes as usize).collect();
write!(f, "r{h}\"{b}\"{h}")
}
Token::Keyword(k) => write!(f, "{k}"),
Token::Attribute(ref a) => write!(f, "{a}"),
Token::LineComment(ref s, _style) => write!(f, "//{s}"),
Expand Down Expand Up @@ -227,7 +232,11 @@ impl Token {
pub fn kind(&self) -> TokenKind {
match *self {
Token::Ident(_) => TokenKind::Ident,
Token::Int(_) | Token::Bool(_) | Token::Str(_) | Token::FmtStr(_) => TokenKind::Literal,
Token::Int(_)
| Token::Bool(_)
| Token::Str(_)
| Token::RawStr(..)
| Token::FmtStr(_) => TokenKind::Literal,
Token::Keyword(_) => TokenKind::Keyword,
Token::Attribute(_) => TokenKind::Attribute,
ref tok => TokenKind::Token(tok.clone()),
Expand Down
76 changes: 76 additions & 0 deletions compiler/noirc_frontend/src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,7 @@ fn literal() -> impl NoirParser<ExpressionKind> {
Token::Int(x) => ExpressionKind::integer(x),
Token::Bool(b) => ExpressionKind::boolean(b),
Token::Str(s) => ExpressionKind::string(s),
Token::RawStr(s, hashes) => ExpressionKind::raw_string(s, hashes),
Token::FmtStr(s) => ExpressionKind::format_string(s),
unexpected => unreachable!("Non-literal {} parsed as a literal", unexpected),
})
Expand Down Expand Up @@ -2549,4 +2550,79 @@ mod test {

check_cases_with_errors(&cases[..], block(fresh_statement()));
}

#[test]
fn parse_raw_string_expr() {
let cases = vec![
Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 },
Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 },
// backslash
Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 },
Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 },
Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 },
Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 },
// escape sequence
Case {
source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##,
expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##,
errors: 0,
},
Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 },
// mismatch - errors:
Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
// mismatch: short:
Case { source: r###" r"foo"# "###, expect: r###"r"foo""###, errors: 1 },
Case { source: r###" r#"foo" "###, expect: "(none)", errors: 2 },
// empty string
Case { source: r####"r"""####, expect: r####"r"""####, errors: 0 },
Case { source: r####"r###""###"####, expect: r####"r###""###"####, errors: 0 },
// miscellaneous
Case { source: r###" r#\"foo\"# "###, expect: "plain::r", errors: 2 },
Case { source: r###" r\"foo\" "###, expect: "plain::r", errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
// missing 'r' letter
Case { source: r###" ##"foo"# "###, expect: r#""foo""#, errors: 2 },
Case { source: r###" #"foo" "###, expect: "plain::foo", errors: 2 },
// whitespace
Case { source: r###" r #"foo"# "###, expect: "plain::r", errors: 2 },
Case { source: r###" r# "foo"# "###, expect: "plain::r", errors: 3 },
Case { source: r###" r#"foo" # "###, expect: "(none)", errors: 2 },
// after identifier
Case { source: r###" bar#"foo"# "###, expect: "plain::bar", errors: 2 },
// nested
Case {
source: r###"r##"foo r#"bar"# r"baz" ### bye"##"###,
expect: r###"r##"foo r#"bar"# r"baz" ### bye"##"###,
errors: 0,
},
];

check_cases_with_errors(&cases[..], expression());
}

#[test]
fn parse_raw_string_lit() {
let lit_cases = vec![
Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 },
Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 },
// backslash
Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 },
Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 },
Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 },
Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 },
// escape sequence
Case {
source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##,
expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##,
errors: 0,
},
Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 },
// mismatch - errors:
Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
];

check_cases_with_errors(&lit_cases[..], literal());
}
}
16 changes: 16 additions & 0 deletions docs/docs/language_concepts/data_types/03_strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,19 @@ Example:
let s = "Hello \"world" // prints "Hello "world"
let s = "hey \tyou"; // prints "hey you"
```

## Raw strings

A raw string begins with the letter `r` and is optionally delimited by a number of hashes `#`.

Escape characters are *not* processed within raw strings. All contents are interpreted literally.

Example:

```rust
let s = r"Hello world";
let s = r#"Simon says "hello world""#;
jfecher marked this conversation as resolved.
Show resolved Hide resolved

// Any number of hashes may be used (>= 1) as long as the string also terminates with the same number of hashes
let s = r#####"One "#, Two "##, Three "###, Four "####, Five will end the string."#####;
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[package]
name = "raw_string_huge"
type = "bin"
authors = [""]
[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
// Fails because of too many hashes for raw string (256+ hashes)
let _a = r##############################################################################################################################################################################################################################################################################"hello"##############################################################################################################################################################################################################################################################################;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[package]
name = "raw_string"
type = "bin"
authors = [""]

[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
global D = r#####"Hello "world""#####;

fn main() {
let a = "Hello \"world\"";
let b = r#"Hello "world""#;
let c = r##"Hello "world""##;
assert(a == b);
jfecher marked this conversation as resolved.
Show resolved Hide resolved
assert(b == c);
assert(c == D);
let x = r#"Hello World"#;
let y = r"Hello World";
assert(x == y);
}
8 changes: 5 additions & 3 deletions tooling/nargo_fmt/src/rewrite/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ pub(crate) fn rewrite(
format_parens(None, visitor.fork(), shape, exprs.len() == 1, exprs, span, false)
}
ExpressionKind::Literal(literal) => match literal {
Literal::Integer(_) | Literal::Bool(_) | Literal::Str(_) | Literal::FmtStr(_) => {
visitor.slice(span).to_string()
}
Literal::Integer(_)
| Literal::Bool(_)
| Literal::Str(_)
| Literal::RawStr(..)
| Literal::FmtStr(_) => visitor.slice(span).to_string(),
Literal::Array(ArrayLiteral::Repeated { repeated_element, length }) => {
let repeated = rewrite_sub_expr(visitor, shape, *repeated_element);
let length = rewrite_sub_expr(visitor, shape, *length);
Expand Down
Loading