Skip to content

Commit

Permalink
feat: Implement raw string literals (#3556)
Browse files Browse the repository at this point in the history
* feat: Implement raw string literals

* Add documentation for raw strings

* Add tests for raw strings

* Changes for raw strings

* Fixes for raw strings tests

* Add test for raw strings

* Add test for nested raw strings

* Fix logic bug for raw strings

* Update docs/docs/language_concepts/data_types/03_strings.md

---------

Co-authored-by: kevaundray <kevtheappdev@gmail.com>
Co-authored-by: jfecher <jfecher11@gmail.com>
  • Loading branch information
3 people authored Nov 28, 2023
1 parent f3eac52 commit 87a302f
Show file tree
Hide file tree
Showing 11 changed files with 219 additions and 4 deletions.
10 changes: 10 additions & 0 deletions compiler/noirc_frontend/src/ast/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ impl ExpressionKind {
ExpressionKind::Literal(Literal::Str(contents))
}

pub fn raw_string(contents: String, hashes: u8) -> ExpressionKind {
ExpressionKind::Literal(Literal::RawStr(contents, hashes))
}

pub fn format_string(contents: String) -> ExpressionKind {
ExpressionKind::Literal(Literal::FmtStr(contents))
}
Expand Down Expand Up @@ -312,6 +316,7 @@ pub enum Literal {
Bool(bool),
Integer(FieldElement),
Str(String),
RawStr(String, u8),
FmtStr(String),
Unit,
}
Expand Down Expand Up @@ -507,6 +512,11 @@ impl Display for Literal {
Literal::Bool(boolean) => write!(f, "{}", if *boolean { "true" } else { "false" }),
Literal::Integer(integer) => write!(f, "{}", integer.to_u128()),
Literal::Str(string) => write!(f, "\"{string}\""),
Literal::RawStr(string, num_hashes) => {
let hashes: String =
std::iter::once('#').cycle().take(*num_hashes as usize).collect();
write!(f, "r{hashes}\"{string}\"{hashes}")
}
Literal::FmtStr(string) => write!(f, "f\"{string}\""),
Literal::Unit => write!(f, "()"),
}
Expand Down
1 change: 1 addition & 0 deletions compiler/noirc_frontend/src/hir/resolution/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1203,6 +1203,7 @@ impl<'a> Resolver<'a> {
}
Literal::Integer(integer) => HirLiteral::Integer(integer),
Literal::Str(str) => HirLiteral::Str(str),
Literal::RawStr(str, _) => HirLiteral::Str(str),
Literal::FmtStr(str) => self.resolve_fmt_str_literal(str, expr.span),
Literal::Unit => HirLiteral::Unit,
}),
Expand Down
73 changes: 73 additions & 0 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ impl<'a> Lexer<'a> {
Some(']') => self.single_char_token(Token::RightBracket),
Some('"') => self.eat_string_literal(),
Some('f') => self.eat_format_string_or_alpha_numeric(),
Some('r') => self.eat_raw_string_or_alpha_numeric(),
Some('#') => self.eat_attribute(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Some(ch) => {
Expand Down Expand Up @@ -400,6 +401,78 @@ impl<'a> Lexer<'a> {
}
}

fn eat_raw_string(&mut self) -> SpannedTokenResult {
let start = self.position;

let beginning_hashes = self.eat_while(None, |ch| ch == '#');
let beginning_hashes_count = beginning_hashes.chars().count();
if beginning_hashes_count > 255 {
// too many hashes (unlikely in practice)
// also, Rust disallows 256+ hashes as well
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(start + 255),
found: Some('#'),
expected: "\"".to_owned(),
});
}

if !self.peek_char_is('"') {
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(self.position),
found: self.next_char(),
expected: "\"".to_owned(),
});
}
self.next_char();

let mut str_literal = String::new();
loop {
let chars = self.eat_while(None, |ch| ch != '"');
str_literal.push_str(&chars[..]);
if !self.peek_char_is('"') {
return Err(LexerErrorKind::UnexpectedCharacter {
span: Span::single_char(self.position),
found: self.next_char(),
expected: "\"".to_owned(),
});
}
self.next_char();
let mut ending_hashes_count = 0;
while let Some('#') = self.peek_char() {
if ending_hashes_count == beginning_hashes_count {
break;
}
self.next_char();
ending_hashes_count += 1;
}
if ending_hashes_count == beginning_hashes_count {
break;
} else {
str_literal.push('"');
for _ in 0..ending_hashes_count {
str_literal.push('#');
}
}
}

let str_literal_token = Token::RawStr(str_literal, beginning_hashes_count as u8);

let end = self.position;
Ok(str_literal_token.into_span(start, end))
}

fn eat_raw_string_or_alpha_numeric(&mut self) -> SpannedTokenResult {
// Problem: we commit to eating raw strings once we see one or two characters.
// This is unclean, but likely ok in all practical cases, and works with existing
// `Lexer` methods.
let peek1 = self.peek_char().unwrap_or('X');
let peek2 = self.peek2_char().unwrap_or('X');
match (peek1, peek2) {
('#', '#') | ('#', '"') | ('"', _) => self.eat_raw_string(),
_ => self.eat_alpha_numeric('r'),
}
}

fn parse_comment(&mut self, start: u32) -> SpannedTokenResult {
let doc_style = match self.peek_char() {
Some('!') => {
Expand Down
11 changes: 10 additions & 1 deletion compiler/noirc_frontend/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ pub enum Token {
Int(FieldElement),
Bool(bool),
Str(String),
RawStr(String, u8),
FmtStr(String),
Keyword(Keyword),
IntType(IntType),
Expand Down Expand Up @@ -157,6 +158,10 @@ impl fmt::Display for Token {
Token::Bool(b) => write!(f, "{b}"),
Token::Str(ref b) => write!(f, "{b}"),
Token::FmtStr(ref b) => write!(f, "f{b}"),
Token::RawStr(ref b, hashes) => {
let h: String = std::iter::once('#').cycle().take(hashes as usize).collect();
write!(f, "r{h}\"{b}\"{h}")
}
Token::Keyword(k) => write!(f, "{k}"),
Token::Attribute(ref a) => write!(f, "{a}"),
Token::LineComment(ref s, _style) => write!(f, "//{s}"),
Expand Down Expand Up @@ -227,7 +232,11 @@ impl Token {
pub fn kind(&self) -> TokenKind {
match *self {
Token::Ident(_) => TokenKind::Ident,
Token::Int(_) | Token::Bool(_) | Token::Str(_) | Token::FmtStr(_) => TokenKind::Literal,
Token::Int(_)
| Token::Bool(_)
| Token::Str(_)
| Token::RawStr(..)
| Token::FmtStr(_) => TokenKind::Literal,
Token::Keyword(_) => TokenKind::Keyword,
Token::Attribute(_) => TokenKind::Attribute,
ref tok => TokenKind::Token(tok.clone()),
Expand Down
76 changes: 76 additions & 0 deletions compiler/noirc_frontend/src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1657,6 +1657,7 @@ fn literal() -> impl NoirParser<ExpressionKind> {
Token::Int(x) => ExpressionKind::integer(x),
Token::Bool(b) => ExpressionKind::boolean(b),
Token::Str(s) => ExpressionKind::string(s),
Token::RawStr(s, hashes) => ExpressionKind::raw_string(s, hashes),
Token::FmtStr(s) => ExpressionKind::format_string(s),
unexpected => unreachable!("Non-literal {} parsed as a literal", unexpected),
})
Expand Down Expand Up @@ -2549,4 +2550,79 @@ mod test {

check_cases_with_errors(&cases[..], block(fresh_statement()));
}

#[test]
fn parse_raw_string_expr() {
let cases = vec![
Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 },
Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 },
// backslash
Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 },
Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 },
Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 },
Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 },
// escape sequence
Case {
source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##,
expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##,
errors: 0,
},
Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 },
// mismatch - errors:
Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
// mismatch: short:
Case { source: r###" r"foo"# "###, expect: r###"r"foo""###, errors: 1 },
Case { source: r###" r#"foo" "###, expect: "(none)", errors: 2 },
// empty string
Case { source: r####"r"""####, expect: r####"r"""####, errors: 0 },
Case { source: r####"r###""###"####, expect: r####"r###""###"####, errors: 0 },
// miscellaneous
Case { source: r###" r#\"foo\"# "###, expect: "plain::r", errors: 2 },
Case { source: r###" r\"foo\" "###, expect: "plain::r", errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
// missing 'r' letter
Case { source: r###" ##"foo"# "###, expect: r#""foo""#, errors: 2 },
Case { source: r###" #"foo" "###, expect: "plain::foo", errors: 2 },
// whitespace
Case { source: r###" r #"foo"# "###, expect: "plain::r", errors: 2 },
Case { source: r###" r# "foo"# "###, expect: "plain::r", errors: 3 },
Case { source: r###" r#"foo" # "###, expect: "(none)", errors: 2 },
// after identifier
Case { source: r###" bar#"foo"# "###, expect: "plain::bar", errors: 2 },
// nested
Case {
source: r###"r##"foo r#"bar"# r"baz" ### bye"##"###,
expect: r###"r##"foo r#"bar"# r"baz" ### bye"##"###,
errors: 0,
},
];

check_cases_with_errors(&cases[..], expression());
}

#[test]
fn parse_raw_string_lit() {
let lit_cases = vec![
Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 },
Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 },
// backslash
Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 },
Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 },
Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 },
Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 },
// escape sequence
Case {
source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##,
expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##,
errors: 0,
},
Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 },
// mismatch - errors:
Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 },
Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
];

check_cases_with_errors(&lit_cases[..], literal());
}
}
16 changes: 16 additions & 0 deletions docs/docs/language_concepts/data_types/03_strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,19 @@ Example:
let s = "Hello \"world" // prints "Hello "world"
let s = "hey \tyou"; // prints "hey you"
```

## Raw strings

A raw string begins with the letter `r` and is optionally delimited by a number of hashes `#`.

Escape characters are *not* processed within raw strings. All contents are interpreted literally.

Example:

```rust
let s = r"Hello world";
let s = r#"Simon says "hello world""#;

// Any number of hashes may be used (>= 1) as long as the string also terminates with the same number of hashes
let s = r#####"One "#, Two "##, Three "###, Four "####, Five will end the string."#####;
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[package]
name = "raw_string_huge"
type = "bin"
authors = [""]
[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fn main() {
// Fails because of too many hashes for raw string (256+ hashes)
let _a = r##############################################################################################################################################################################################################################################################################"hello"##############################################################################################################################################################################################################################################################################;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[package]
name = "raw_string"
type = "bin"
authors = [""]

[dependencies]
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
global D = r#####"Hello "world""#####;

fn main() {
let a = "Hello \"world\"";
let b = r#"Hello "world""#;
let c = r##"Hello "world""##;
assert(a == b);
assert(b == c);
assert(c == D);
let x = r#"Hello World"#;
let y = r"Hello World";
assert(x == y);
}
8 changes: 5 additions & 3 deletions tooling/nargo_fmt/src/rewrite/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,11 @@ pub(crate) fn rewrite(
NewlineMode::Normal,
),
ExpressionKind::Literal(literal) => match literal {
Literal::Integer(_) | Literal::Bool(_) | Literal::Str(_) | Literal::FmtStr(_) => {
visitor.slice(span).to_string()
}
Literal::Integer(_)
| Literal::Bool(_)
| Literal::Str(_)
| Literal::RawStr(..)
| Literal::FmtStr(_) => visitor.slice(span).to_string(),
Literal::Array(ArrayLiteral::Repeated { repeated_element, length }) => {
let repeated = rewrite_sub_expr(visitor, shape, *repeated_element);
let length = rewrite_sub_expr(visitor, shape, *length);
Expand Down

0 comments on commit 87a302f

Please sign in to comment.