noir-lang · jfecher · Nov 28, 2023 · Nov 23, 2023 · Nov 24, 2023 · Nov 24, 2023
diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs
@@ -76,6 +76,10 @@ impl ExpressionKind {
         ExpressionKind::Literal(Literal::Str(contents))
     }
 
+    pub fn raw_string(contents: String, hashes: u8) -> ExpressionKind {
+        ExpressionKind::Literal(Literal::RawStr(contents, hashes))
+    }
+
     pub fn format_string(contents: String) -> ExpressionKind {
         ExpressionKind::Literal(Literal::FmtStr(contents))
     }
@@ -312,6 +316,7 @@ pub enum Literal {
     Bool(bool),
     Integer(FieldElement),
     Str(String),
+    RawStr(String, u8),
     FmtStr(String),
     Unit,
 }
@@ -507,6 +512,11 @@ impl Display for Literal {
             Literal::Bool(boolean) => write!(f, "{}", if *boolean { "true" } else { "false" }),
             Literal::Integer(integer) => write!(f, "{}", integer.to_u128()),
             Literal::Str(string) => write!(f, "\"{string}\""),
+            Literal::RawStr(string, num_hashes) => {
+                let hashes: String =
+                    std::iter::once('#').cycle().take(*num_hashes as usize).collect();
+                write!(f, "r{hashes}\"{string}\"{hashes}")
+            }
             Literal::FmtStr(string) => write!(f, "f\"{string}\""),
             Literal::Unit => write!(f, "()"),
         }

diff --git a/compiler/noirc_frontend/src/hir/resolution/resolver.rs b/compiler/noirc_frontend/src/hir/resolution/resolver.rs
@@ -95,7 +95,7 @@

    /// True if the current module is a contract.
    /// This is usually determined by self.path_resolver.module_id(), but it can
    /// be overriden for impls. Impls are an odd case since the methods within resolve
    /// as if they're in the parent module, but should be placed in a child module.
    /// Since they should be within a child module, in_contract is manually set to false
    /// for these so we can still resolve them in the parent module without them being in a contract.
@@ -1203,6 +1203,7 @@
                 }
                 Literal::Integer(integer) => HirLiteral::Integer(integer),
                 Literal::Str(str) => HirLiteral::Str(str),
+                Literal::RawStr(str, _) => HirLiteral::Str(str),
                 Literal::FmtStr(str) => self.resolve_fmt_str_literal(str, expr.span),
                 Literal::Unit => HirLiteral::Unit,
             }),

diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs
@@ -16,7 +16,7 @@
    position: Position,
    done: bool,
    skip_comments: bool,
    skip_whitespaces: bool,
 }

 pub type SpannedTokenResult = Result<SpannedToken, LexerErrorKind>;
@@ -43,7 +43,7 @@
            position: 0,
            done: false,
            skip_comments: true,
            skip_whitespaces: true,
        }
    }

@@ -52,8 +52,8 @@
        self
    }

    pub fn skip_whitespaces(mut self, flag: bool) -> Self {
        self.skip_whitespaces = flag;
        self
    }

@@ -96,7 +96,7 @@
        match self.next_char() {
            Some(x) if x.is_whitespace() => {
                let spanned = self.eat_whitespace(x);
                if self.skip_whitespaces {
                    self.next_token()
                } else {
                    Ok(spanned)
@@ -126,6 +126,7 @@
             Some(']') => self.single_char_token(Token::RightBracket),
             Some('"') => self.eat_string_literal(),
             Some('f') => self.eat_format_string_or_alpha_numeric(),
+            Some('r') => self.eat_raw_string_or_alpha_numeric(),
             Some('#') => self.eat_attribute(),
             Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
             Some(ch) => {
@@ -400,6 +401,80 @@
         }
     }
 
+    fn eat_raw_string(&mut self) -> SpannedTokenResult {
+        let start = self.position;
+
+        let beginning_hashes = self.eat_while(None, |ch| ch == '#');
+        let beginning_hashes_count = beginning_hashes.chars().count();
+        if beginning_hashes_count > 255 {
+            // too many hashes (unlikely in practice)
+            // also, Rust disallows 256+ hashes as well
+            return Err(LexerErrorKind::UnexpectedCharacter {
+                span: Span::single_char(self.position + 255),
+                found: Some('#'),
+                expected: "\"".to_owned(),
+            });
+        }
+
+        if !self.peek_char_is('"') {
+            return Err(LexerErrorKind::UnexpectedCharacter {
+                span: Span::single_char(self.position),
+                found: self.next_char(),
+                expected: "\"".to_owned(),
+            });
+        }
+        self.next_char();
+
+        let mut str_literal = String::new();
+        loop {
+            let chars = self.eat_while(None, |ch| ch != '"');
+            str_literal.push_str(&chars[..]);
+            if !self.peek_char_is('"') {
+                return Err(LexerErrorKind::UnexpectedCharacter {
+                    span: Span::single_char(self.position),
+                    found: self.next_char(),
+                    expected: "\"".to_owned(),
+                });
+            }
+            self.next_char();
+            let mut ending_hashes_count = 0;
+            while let Some('#') = self.peek_char() {
+                if ending_hashes_count == beginning_hashes_count {
+                    break;
+                }
+                self.next_char();
+                ending_hashes_count += 1;
+            }
+            if ending_hashes_count == beginning_hashes_count {
+                break;
+            } else {
+                str_literal.push('"');
+                for _ in 0..ending_hashes_count {
+                    str_literal.push('#');
+                }
+            }
+        }
+
+        let str_literal_token = Token::RawStr(str_literal, beginning_hashes_count as u8);
+
+        let end = self.position;
+        Ok(str_literal_token.into_span(start, end))
+    }
+
+    fn eat_raw_string_or_alpha_numeric(&mut self) -> SpannedTokenResult {
+        // Problem: we commit to eating raw strings once we see one or two characters.
+        // This is unclean, but likely ok in all practical cases, and works with existing
+        // `Lexer` methods.
+        let peek2 = self.peek2_char();
+        if (self.peek_char_is('#') && (peek2 == Some('#') || peek2 == Some('"')))
+            || self.peek_char_is('"')
+        {
+            self.eat_raw_string()
+        } else {
+            self.eat_alpha_numeric('r')
+        }
+    }
+
     fn parse_comment(&mut self, start: u32) -> SpannedTokenResult {
         let doc_style = match self.peek_char() {
             Some('!') => {

diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs
@@ -15,6 +15,7 @@ pub enum Token {
     Int(FieldElement),
     Bool(bool),
     Str(String),
+    RawStr(String, u8),
     FmtStr(String),
     Keyword(Keyword),
     IntType(IntType),
@@ -157,6 +158,10 @@ impl fmt::Display for Token {
             Token::Bool(b) => write!(f, "{b}"),
             Token::Str(ref b) => write!(f, "{b}"),
             Token::FmtStr(ref b) => write!(f, "f{b}"),
+            Token::RawStr(ref b, hashes) => {
+                let h: String = std::iter::once('#').cycle().take(hashes as usize).collect();
+                write!(f, "r{h}\"{b}\"{h}")
+            }
             Token::Keyword(k) => write!(f, "{k}"),
             Token::Attribute(ref a) => write!(f, "{a}"),
             Token::LineComment(ref s, _style) => write!(f, "//{s}"),
@@ -227,7 +232,11 @@ impl Token {
     pub fn kind(&self) -> TokenKind {
         match *self {
             Token::Ident(_) => TokenKind::Ident,
-            Token::Int(_) | Token::Bool(_) | Token::Str(_) | Token::FmtStr(_) => TokenKind::Literal,
+            Token::Int(_)
+            | Token::Bool(_)
+            | Token::Str(_)
+            | Token::RawStr(..)
+            | Token::FmtStr(_) => TokenKind::Literal,
             Token::Keyword(_) => TokenKind::Keyword,
             Token::Attribute(_) => TokenKind::Attribute,
             ref tok => TokenKind::Token(tok.clone()),

diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs
@@ -1657,6 +1657,7 @@ fn literal() -> impl NoirParser<ExpressionKind> {
         Token::Int(x) => ExpressionKind::integer(x),
         Token::Bool(b) => ExpressionKind::boolean(b),
         Token::Str(s) => ExpressionKind::string(s),
+        Token::RawStr(s, hashes) => ExpressionKind::raw_string(s, hashes),
         Token::FmtStr(s) => ExpressionKind::format_string(s),
         unexpected => unreachable!("Non-literal {} parsed as a literal", unexpected),
     })
@@ -2549,4 +2550,23 @@ mod test {
 
         check_cases_with_errors(&cases[..], block(fresh_statement()));
     }
+
+    #[test]
+    fn parse_raw_string() {
+        let cases = vec![
+            Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 },
+            Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 },
+            // backslash
+            Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 },
+            Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 },
+            Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 },
+            Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 },
+            // mismatch - errors:
+            Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 },
+            Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 },
+        ];
+
+        check_cases_with_errors(&cases[..], expression());
+        check_cases_with_errors(&cases[..], literal());
+    }
 }
diff --git a/docs/docs/language_concepts/data_types/03_strings.md b/docs/docs/language_concepts/data_types/03_strings.md
@@ -16,13 +16,13 @@
 The string type is a fixed length value defined with `str<N>`.

 You can use strings in `assert()` functions or print them with
 `std::println()`. See more about [Logging](../../standard_library/logging).

 ```rust
 use dep::std;

 fn main(message : pub str<11>, hex_as_string : str<4>) {
    std::println(message);
    assert(message == "hello world");
    assert(hex_as_string == "0x41");
 }
@@ -61,3 +61,16 @@
 let s = "Hello \"world" // prints "Hello "world"
 let s = "hey \tyou"; // prints "hey   you"
 ```
+
+## Raw strings
+
+A raw string begins with the letter `r` and is optionally delimited by a number of hashes `#`.
+
+Escape characters are *not* processed within raw strings. All contents are interpreted literally.
+
+Example:
+
+```rust
+let s = r"Hello world";
+let s = r#"Simon says "hello world""#;
+```
diff --git a/tooling/nargo_cli/tests/compile_failure/raw_string_huge/Nargo.toml b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/Nargo.toml
@@ -0,0 +1,5 @@
+[package]
+name = "raw_string_huge"
+type = "bin"
+authors = [""]
+[dependencies]
diff --git a/tooling/nargo_cli/tests/compile_failure/raw_string_huge/src/main.nr b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/src/main.nr
@@ -0,0 +1,3 @@
+fn main() {
+    let _a = r##############################################################################################################################################################################################################################################################################"hello"##############################################################################################################################################################################################################################################################################;
+}
diff --git a/tooling/nargo_cli/tests/compile_success_empty/raw_string/Nargo.toml b/tooling/nargo_cli/tests/compile_success_empty/raw_string/Nargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "unit"
+type = "bin"
+authors = [""]
+
+[dependencies]
diff --git a/tooling/nargo_cli/tests/compile_success_empty/raw_string/src/main.nr b/tooling/nargo_cli/tests/compile_success_empty/raw_string/src/main.nr
@@ -0,0 +1,10 @@
+global D = r#####"Hello "world""#####;
+
+fn main() {
+    let a = "Hello \"world\"";
+    let b = r#"Hello "world""#;
+    let c = r##"Hello "world""##;
+    assert(a == b);
+    assert(b == c);
+    assert(c == D);
+}