Skip to content

Commit ff79a44

Browse files
huonwalexcrichton
authored andcommittedFeb 25, 2014
syntax: record multibyte chars' positions absolutely, not relative to
file. Previously multibyte UTF-8 chars were being recorded as byte offsets from the start of the file, and then later compared against global byte positions, resulting in the compiler possibly thinking it had a byte position pointing inside a multibyte character, if there were multibyte characters in any non-crate files. (Although, sometimes the byte offsets line up just right to not ICE, but that was a coincidence.) Fixes #11136. Fixes #11178.
1 parent dad52cf commit ff79a44

File tree

3 files changed

+61
-2
lines changed

3 files changed

+61
-2
lines changed
 

‎src/libsyntax/parse/lexer.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -264,8 +264,7 @@ pub fn bump(rdr: &StringReader) {
264264
}
265265

266266
if byte_offset_diff > 1 {
267-
rdr.filemap.record_multibyte_char(
268-
Pos::from_uint(current_byte_offset), byte_offset_diff);
267+
rdr.filemap.record_multibyte_char(rdr.last_pos.get(), byte_offset_diff);
269268
}
270269
} else {
271270
rdr.curr.set(None);
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-include ../tools.mk
2+
3+
all:
4+
# check that we don't ICE on unicode input, issue #11178
5+
$(RUSTC) multiple_files.rs
6+
$(call RUN,multiple_files) "$(RUSTC)" "$(TMPDIR)"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use std::{char, os, run, str};
2+
use std::rand::{task_rng, Rng};
3+
use std::io::File;
4+
5+
// creates unicode_input_multiple_files_{main,chars}.rs, where the
6+
// former imports the latter. `_chars` just contains an indentifier
7+
// made up of random characters, because will emit an error message
8+
// about the ident being in the wrong place, with a span (and creating
9+
// this span used to upset the compiler).
10+
11+
fn random_char() -> char {
12+
let mut rng = task_rng();
13+
// a subset of the XID_start unicode table (ensuring that the
14+
// compiler doesn't fail with an "unrecognised token" error)
15+
let (lo, hi): (u32, u32) = match rng.gen_range(1, 4 + 1) {
16+
1 => (0x41, 0x5a),
17+
2 => (0xf8, 0x1ba),
18+
3 => (0x1401, 0x166c),
19+
_ => (0x10400, 0x1044f)
20+
};
21+
22+
char::from_u32(rng.gen_range(lo, hi + 1)).unwrap()
23+
}
24+
25+
fn main() {
26+
let args = os::args();
27+
let rustc = args[1].as_slice();
28+
let tmpdir = Path::new(args[2].as_slice());
29+
30+
let main_file = tmpdir.join("unicode_input_multiple_files_main.rs");
31+
let main_file_str = main_file.as_str().unwrap();
32+
{
33+
let _ = File::create(&main_file).unwrap()
34+
.write_str("mod unicode_input_multiple_files_chars;");
35+
}
36+
37+
for _ in range(0, 100) {
38+
{
39+
let mut w = File::create(&tmpdir.join("unicode_input_multiple_files_chars.rs")).unwrap();
40+
for _ in range(0, 30) {
41+
let _ = w.write_char(random_char());
42+
}
43+
}
44+
45+
// rustc is passed to us with --out-dir and -L etc., so we
46+
// can't exec it directly
47+
let result = run::process_output("sh", [~"-c", rustc + " " + main_file_str]).unwrap();
48+
let err = str::from_utf8_lossy(result.error);
49+
50+
// positive test so that this test will be updated when the
51+
// compiler changes.
52+
assert!(err.as_slice().contains("expected item but found"))
53+
}
54+
}

2 commit comments

Comments
 (2)

lambda-fairy commented on Feb 25, 2014

@lambda-fairy
Contributor

I wish hugs worked over the internet. Thank you! ❤️

huonw commented on Feb 25, 2014

@huonw
MemberAuthor

No problem! :)

Please sign in to comment.