Skip to content

Commit df73b26

Browse files
committed
convert \r\n -> \n in include_str! macro
Ideally, the meaning of the program should be independent of the line endings used, because, for example, git can change line endings during a checkout. We currently do line-ending conversion in almost all cases, with `include_str` being an exception. This commit removes this exception, bringing `include_str` closer in behavior to string literals. Note that this is technically a breaking change. In case that you really mean to include a string with DOS line endings, you can use `include_bytes!` macro which is guaranteed to not do any translation, like this pub fn my_text() -> &'static str { unsafe { std::str::from_utf8_unchecked(MY_TEXT_BYTES); } } const MY_TEXT_BYTES: &[u8] = include_bytes("my_text.bin"); #[test] fn test_encoding() { std::str::from_utf8(MY_TEXT_BYTES) .unwrap(); }
1 parent ef1ecbe commit df73b26

File tree

4 files changed

+12
-13
lines changed

4 files changed

+12
-13
lines changed

src/libcore/macros.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,9 @@ pub(crate) mod builtin {
978978
/// modules are found)
979979
///
980980
/// This macro will yield an expression of type `&'static str` which is the
981-
/// contents of the file.
981+
/// contents of the file. The string is normalized:
982+
/// * Byte Order Mark (BOM), if any, is removed,
983+
/// * DOS line endings (`\r\n`) are converted to `\n`.
982984
///
983985
/// # Examples
984986
///

src/libsyntax_ext/source_util.rs

+6-10
Original file line numberDiff line numberDiff line change
@@ -112,16 +112,12 @@ pub fn expand_include_str(cx: &mut ExtCtxt<'_>, sp: Span, tts: &[tokenstream::To
112112
None => return DummyResult::any(sp)
113113
};
114114
let file = cx.resolve_path(file, sp);
115-
match cx.source_map().load_binary_file(&file) {
116-
Ok(bytes) => match std::str::from_utf8(&bytes) {
117-
Ok(src) => {
118-
let interned_src = Symbol::intern(&src);
119-
base::MacEager::expr(cx.expr_str(sp, interned_src))
120-
}
121-
Err(_) => {
122-
cx.span_err(sp, &format!("{} wasn't a utf-8 file", file.display()));
123-
DummyResult::any(sp)
124-
}
115+
match cx.source_map().load_file(&file) {
116+
Ok(source_file) => {
117+
let src = source_file.src.as_ref()
118+
.expect("freshly loaded file should have a source");
119+
let interned_src = Symbol::intern(src.as_str());
120+
base::MacEager::expr(cx.expr_str(sp, interned_src))
125121
},
126122
Err(e) => {
127123
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));

src/test/ui/include-macros/normalization.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ fn main() {
77
);
88
assert_eq!(
99
include_str!("data.bin"),
10-
"\u{FEFF}This file starts with BOM.\r\nLines are separated by \\r\\n.\r\n",
10+
"This file starts with BOM.\nLines are separated by \\r\\n.\n",
1111
);
1212
}

src/test/ui/lexer-crlf-line-endings-string-literal-doc-comment.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ literal";
3636
assert_eq!(s, "byte string\nliteral".as_bytes());
3737

3838
// validate that our source file has CRLF endings
39-
let source = include_str!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
39+
let source = include_bytes!("lexer-crlf-line-endings-string-literal-doc-comment.rs");
40+
let source = std::str::from_utf8(&source[..]).unwrap();
4041
assert!(source.contains("string\r\nliteral"));
4142
}

0 commit comments

Comments
 (0)