From f8bf8a5421b3c7964f5b145ab86a7ce4503ca88d Mon Sep 17 00:00:00 2001 From: Jan Solanti Date: Sat, 21 Oct 2017 17:57:20 +0300 Subject: [PATCH] Add subchapter about (byte)string literals --- src/std/str.md | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/src/std/str.md b/src/std/str.md index 54261ee602..770b42a370 100644 --- a/src/std/str.md +++ b/src/std/str.md @@ -57,5 +57,110 @@ More `str`/`String` methods can be found under the [std::string][string] modules +## Literals and escapes + +There are multiple ways to write string literals with special characters in them. +All result in a similar `&str` so it's best to use the form that is the most +convenient to write. Similarly there are multiple ways to write byte string literals, +which all result in `&[u8; N]`. + +Generally special characters are escaped with a backslash character: `\`. +This way you can add any character to your string, even unprintable ones +and ones that you don't know how to type. If you want a literal backslash, +escape it with another one: `\\` + +String or character literal delimiters occuring within a literal must be escaped: `"\""`, `'\''`. + +```rust,editable +fn main() { + // You can use escapes to write bytes by their hexadecimal values... + let byte_escape = "I'm writing \x52\x75\x73\x74!"; + println!("What are you doing\x3F (\\x3F means ?) {}", byte_escape); + + // ...or Unicode code points. + let unicode_codepoint = "\u{211D}"; + let character_name = "\"DOUBLE-STRUCK CAPITAL R\""; + + println!("Unicode character {} (U+211D) is called {}", + unicode_codepoint, character_name ); + + + let long_string = "String literals + can span multiple lines. + The linebreak and indentation here ->\ + <- can be escaped too!"; + println!("{}", long_string); +} +``` + +Sometimes there are just too many characters that need to be escaped or it's just +much more convenient to write a string out as-is. This is where raw string literals come into play. + +```rust, editable +fn main() { + let raw_str = r"Escapes don't work here: \x3F \u{211D}"; + println!("{}", raw_str); + + // If you need quotes in a raw string, add a pair of #s + let quotes = r#"And then I said: "There is no escape!""#; + println!("{}", quotes); + + // If you need "# in your string, just use more #s in the delimiter. + // There is no limit for the number of #s you can use. + let longer_delimiter = r###"A string with "# in it. And even "##!"###; + println!("{}", longer_delimiter); +} +``` + +Want a string that's not UTF-8? (Remember, `str` and `String` must be valid UTF-8) +Or maybe you want an array of bytes that's mostly text? Byte strings to the rescue! + +```rust, editable +use std::str; + +fn main() { + // Note that this is not actually a &str + let bytestring: &[u8; 20] = b"this is a bytestring"; + + // Byte arrays don't have Display so printing them is a bit limited + println!("A bytestring: {:?}", bytestring); + + // Bytestrings can have byte escapes... + let escaped = b"\x52\x75\x73\x74 as bytes"; + // ...but no unicode escapes + // let escaped = b"\u{211D} is not allowed"; + println!("Some escaped bytes: {:?}", escaped); + + + // Raw bytestrings work just like raw strings + let raw_bytestring = br"\u{211D} is not escaped here"; + println!("{:?}", raw_bytestring); + + // Converting a byte array to str can fail + if let Ok(my_str) = str::from_utf8(raw_bytestring) { + println!("And the same as text: '{}'", my_str); + } + + let quotes = br#"You can also use "fancier" formatting, \ + like with normal raw strings"#; + + // Bytestrings don't have to be UTF-8 + let shift_jis = b"\x82\xe6\x82\xa8\x82\xb1\x82"; // "ようこそ" in SHIFT-JIS + + // But then they can't always be converted to str + match str::from_utf8(shift_jis) { + Ok(my_str) => println!("Conversion successful: '{}'", my_str), + Err(e) => println!("Conversion failed: {:?}", e), + }; +} +``` + +For conversions between character encodings check out the [enconding][encoding-crate] crate. + +A more detailed listing of the ways to write string literals and escape characters +is given in the ['Tokens' chapter][tokens] of the Rust Reference. + [str]: https://doc.rust-lang.org/std/str/ [string]: https://doc.rust-lang.org/std/string/ +[tokens]: https://doc.rust-lang.org/reference/tokens.html +[encoding-crate]: https://crates.io/crates/encoding