Skip to content

Commit 05fb797

Browse files
authored
Auto merge of #35979 - Manishearth:rollup, r=Manishearth
Rollup of 6 pull requests - Successful merges: #35238, #35867, #35885, #35916, #35947, #35955 - Failed merges:
2 parents f5499a0 + 15ca5d4 commit 05fb797

File tree

25 files changed

+641
-268
lines changed

25 files changed

+641
-268
lines changed

src/libcore/char.rs

+69-13
Original file line numberDiff line numberDiff line change
@@ -752,25 +752,81 @@ pub struct InvalidSequence(());
752752
impl<I: Iterator<Item = u8>> Iterator for DecodeUtf8<I> {
753753
type Item = Result<char, InvalidSequence>;
754754
#[inline]
755+
755756
fn next(&mut self) -> Option<Result<char, InvalidSequence>> {
756-
self.0.next().map(|b| {
757-
if b & 0x80 == 0 { Ok(b as char) } else {
758-
let l = (!b).leading_zeros() as usize; // number of bytes in UTF-8 representation
759-
if l < 2 || l > 6 { return Err(InvalidSequence(())) };
760-
let mut x = (b as u32) & (0x7F >> l);
761-
for _ in 0..l-1 {
757+
self.0.next().map(|first_byte| {
758+
// Emit InvalidSequence according to
759+
// Unicode §5.22 Best Practice for U+FFFD Substitution
760+
// http://www.unicode.org/versions/Unicode9.0.0/ch05.pdf#G40630
761+
762+
// Roughly: consume at least one byte,
763+
// then validate one byte at a time and stop before the first unexpected byte
764+
// (which might be the valid start of the next byte sequence).
765+
766+
let mut code_point;
767+
macro_rules! first_byte {
768+
($mask: expr) => {
769+
code_point = u32::from(first_byte & $mask)
770+
}
771+
}
772+
macro_rules! continuation_byte {
773+
() => { continuation_byte!(0x80...0xBF) };
774+
($range: pat) => {
762775
match self.0.peek() {
763-
Some(&b) if b & 0xC0 == 0x80 => {
776+
Some(&byte @ $range) => {
777+
code_point = (code_point << 6) | u32::from(byte & 0b0011_1111);
764778
self.0.next();
765-
x = (x << 6) | (b as u32) & 0x3F;
766-
},
767-
_ => return Err(InvalidSequence(())),
779+
}
780+
_ => return Err(InvalidSequence(()))
768781
}
769782
}
770-
match from_u32(x) {
771-
Some(x) if l == x.len_utf8() => Ok(x),
772-
_ => Err(InvalidSequence(())),
783+
}
784+
785+
match first_byte {
786+
0x00...0x7F => {
787+
first_byte!(0b1111_1111);
788+
}
789+
0xC2...0xDF => {
790+
first_byte!(0b0001_1111);
791+
continuation_byte!();
792+
}
793+
0xE0 => {
794+
first_byte!(0b0000_1111);
795+
continuation_byte!(0xA0...0xBF); // 0x80...0x9F here are overlong
796+
continuation_byte!();
773797
}
798+
0xE1...0xEC | 0xEE...0xEF => {
799+
first_byte!(0b0000_1111);
800+
continuation_byte!();
801+
continuation_byte!();
802+
}
803+
0xED => {
804+
first_byte!(0b0000_1111);
805+
continuation_byte!(0x80...0x9F); // 0xA0..0xBF here are surrogates
806+
continuation_byte!();
807+
}
808+
0xF0 => {
809+
first_byte!(0b0000_0111);
810+
continuation_byte!(0x90...0xBF); // 0x80..0x8F here are overlong
811+
continuation_byte!();
812+
continuation_byte!();
813+
}
814+
0xF1...0xF3 => {
815+
first_byte!(0b0000_0111);
816+
continuation_byte!();
817+
continuation_byte!();
818+
continuation_byte!();
819+
}
820+
0xF4 => {
821+
first_byte!(0b0000_0111);
822+
continuation_byte!(0x80...0x8F); // 0x90..0xBF here are beyond char::MAX
823+
continuation_byte!();
824+
continuation_byte!();
825+
}
826+
_ => return Err(InvalidSequence(())) // Illegal first byte, overlong, or beyond MAX
827+
}
828+
unsafe {
829+
Ok(from_u32_unchecked(code_point))
774830
}
775831
})
776832
}

src/libcoretest/char.rs

+45-24
Original file line numberDiff line numberDiff line change
@@ -358,29 +358,50 @@ fn eu_iterator_specializations() {
358358

359359
#[test]
360360
fn test_decode_utf8() {
361-
use core::char::*;
362-
use core::iter::FromIterator;
363-
364-
for &(str, bs) in [("", &[] as &[u8]),
365-
("A", &[0x41u8] as &[u8]),
366-
("�", &[0xC1u8, 0x81u8] as &[u8]),
367-
("♥", &[0xE2u8, 0x99u8, 0xA5u8]),
368-
("♥A", &[0xE2u8, 0x99u8, 0xA5u8, 0x41u8] as &[u8]),
369-
("�", &[0xE2u8, 0x99u8] as &[u8]),
370-
("�A", &[0xE2u8, 0x99u8, 0x41u8] as &[u8]),
371-
("�", &[0xC0u8] as &[u8]),
372-
("�A", &[0xC0u8, 0x41u8] as &[u8]),
373-
("�", &[0x80u8] as &[u8]),
374-
("�A", &[0x80u8, 0x41u8] as &[u8]),
375-
("�", &[0xFEu8] as &[u8]),
376-
("�A", &[0xFEu8, 0x41u8] as &[u8]),
377-
("�", &[0xFFu8] as &[u8]),
378-
("�A", &[0xFFu8, 0x41u8] as &[u8])].into_iter() {
379-
assert!(Iterator::eq(str.chars(),
380-
decode_utf8(bs.into_iter().map(|&b|b))
381-
.map(|r_b| r_b.unwrap_or('\u{FFFD}'))),
382-
"chars = {}, bytes = {:?}, decoded = {:?}", str, bs,
383-
Vec::from_iter(decode_utf8(bs.into_iter().map(|&b|b))
384-
.map(|r_b| r_b.unwrap_or('\u{FFFD}'))));
361+
macro_rules! assert_decode_utf8 {
362+
($input_bytes: expr, $expected_str: expr) => {
363+
let input_bytes: &[u8] = &$input_bytes;
364+
let s = char::decode_utf8(input_bytes.iter().cloned())
365+
.map(|r_b| r_b.unwrap_or('\u{FFFD}'))
366+
.collect::<String>();
367+
assert_eq!(s, $expected_str,
368+
"input bytes: {:?}, expected str: {:?}, result: {:?}",
369+
input_bytes, $expected_str, s);
370+
assert_eq!(String::from_utf8_lossy(&$input_bytes), $expected_str);
371+
}
385372
}
373+
374+
assert_decode_utf8!([], "");
375+
assert_decode_utf8!([0x41], "A");
376+
assert_decode_utf8!([0xC1, 0x81], "��");
377+
assert_decode_utf8!([0xE2, 0x99, 0xA5], "♥");
378+
assert_decode_utf8!([0xE2, 0x99, 0xA5, 0x41], "♥A");
379+
assert_decode_utf8!([0xE2, 0x99], "�");
380+
assert_decode_utf8!([0xE2, 0x99, 0x41], "�A");
381+
assert_decode_utf8!([0xC0], "�");
382+
assert_decode_utf8!([0xC0, 0x41], "�A");
383+
assert_decode_utf8!([0x80], "�");
384+
assert_decode_utf8!([0x80, 0x41], "�A");
385+
assert_decode_utf8!([0xFE], "�");
386+
assert_decode_utf8!([0xFE, 0x41], "�A");
387+
assert_decode_utf8!([0xFF], "�");
388+
assert_decode_utf8!([0xFF, 0x41], "�A");
389+
assert_decode_utf8!([0xC0, 0x80], "��");
390+
391+
// Surrogates
392+
assert_decode_utf8!([0xED, 0x9F, 0xBF], "\u{D7FF}");
393+
assert_decode_utf8!([0xED, 0xA0, 0x80], "���");
394+
assert_decode_utf8!([0xED, 0xBF, 0x80], "���");
395+
assert_decode_utf8!([0xEE, 0x80, 0x80], "\u{E000}");
396+
397+
// char::MAX
398+
assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0xBF], "\u{10FFFF}");
399+
assert_decode_utf8!([0xF4, 0x8F, 0xBF, 0x41], "�A");
400+
assert_decode_utf8!([0xF4, 0x90, 0x80, 0x80], "����");
401+
402+
// 5 and 6 bytes sequence
403+
// Part of the original design of UTF-8,
404+
// but invalid now that UTF-8 is artificially restricted to match the range of UTF-16.
405+
assert_decode_utf8!([0xF8, 0x80, 0x80, 0x80, 0x80], "�����");
406+
assert_decode_utf8!([0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], "������");
386407
}

src/librbml/lib.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -173,12 +173,12 @@ impl<'doc> Doc<'doc> {
173173
self.start == self.end
174174
}
175175

176-
pub fn as_str_slice(&self) -> &'doc str {
176+
pub fn as_str(&self) -> &'doc str {
177177
str::from_utf8(&self.data[self.start..self.end]).unwrap()
178178
}
179179

180-
pub fn as_str(&self) -> String {
181-
self.as_str_slice().to_string()
180+
pub fn to_string(&self) -> String {
181+
self.as_str().to_string()
182182
}
183183
}
184184

@@ -773,7 +773,7 @@ pub mod reader {
773773
Ok(char::from_u32(doc_as_u32(self.next_doc(EsChar)?)).unwrap())
774774
}
775775
fn read_str(&mut self) -> DecodeResult<String> {
776-
Ok(self.next_doc(EsStr)?.as_str())
776+
Ok(self.next_doc(EsStr)?.to_string())
777777
}
778778

779779
// Compound types:

src/librustc/lint/context.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ pub trait LintContext: Sized {
601601
for (lint_id, level, span) in v {
602602
let (now, now_source) = self.lints().get_level_source(lint_id);
603603
if now == Forbid && level != Forbid {
604-
let lint_name = lint_id.as_str();
604+
let lint_name = lint_id.to_string();
605605
let mut diag_builder = struct_span_err!(self.sess(), span, E0453,
606606
"{}({}) overruled by outer forbid({})",
607607
level.as_str(), lint_name,
@@ -1216,7 +1216,7 @@ pub fn check_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
12161216
for &(lint, span, ref msg) in v {
12171217
span_bug!(span,
12181218
"unprocessed lint {} at {}: {}",
1219-
lint.as_str(), tcx.map.node_to_string(*id), *msg)
1219+
lint.to_string(), tcx.map.node_to_string(*id), *msg)
12201220
}
12211221
}
12221222

@@ -1252,7 +1252,7 @@ pub fn check_ast_crate(sess: &Session, krate: &ast::Crate) {
12521252
// in the iteration code.
12531253
for (_, v) in sess.lints.borrow().iter() {
12541254
for &(lint, span, ref msg) in v {
1255-
span_bug!(span, "unprocessed lint {}: {}", lint.as_str(), *msg)
1255+
span_bug!(span, "unprocessed lint {}: {}", lint.to_string(), *msg)
12561256
}
12571257
}
12581258
}

src/librustc/lint/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ impl LintId {
263263
}
264264

265265
/// Get the name of the lint.
266-
pub fn as_str(&self) -> String {
266+
pub fn to_string(&self) -> String {
267267
self.lint.name_lower()
268268
}
269269
}

src/librustc/middle/region.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ impl CodeExtent {
237237
// (This is the special case aluded to in the
238238
// doc-comment for this method)
239239
let stmt_span = blk.stmts[r.first_statement_index as usize].span;
240-
Some(Span { lo: stmt_span.hi, ..blk.span })
240+
Some(Span { lo: stmt_span.hi, hi: blk.span.hi, expn_id: stmt_span.expn_id })
241241
}
242242
}
243243
}

src/librustc/session/config.rs

+2
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
891891
"force overflow checks on or off"),
892892
trace_macros: bool = (false, parse_bool, [UNTRACKED],
893893
"for every macro invocation, print its name and arguments"),
894+
debug_macros: bool = (false, parse_bool, [TRACKED],
895+
"emit line numbers debug info inside macros"),
894896
enable_nonzeroing_move_hints: bool = (false, parse_bool, [TRACKED],
895897
"force nonzeroing move optimization on"),
896898
keep_hygiene_data: bool = (false, parse_bool, [UNTRACKED],

src/librustc_driver/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ Available lint options:
861861
for (name, to) in lints {
862862
let name = name.to_lowercase().replace("_", "-");
863863
let desc = to.into_iter()
864-
.map(|x| x.as_str().replace("_", "-"))
864+
.map(|x| x.to_string().replace("_", "-"))
865865
.collect::<Vec<String>>()
866866
.join(", ");
867867
println!(" {} {}", padded(&name[..]), desc);

src/librustc_llvm/ffi.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1796,6 +1796,11 @@ extern {
17961796
Col: c_uint)
17971797
-> DILexicalBlock;
17981798

1799+
pub fn LLVMRustDIBuilderCreateLexicalBlockFile(Builder: DIBuilderRef,
1800+
Scope: DIScope,
1801+
File: DIFile)
1802+
-> DILexicalBlock;
1803+
17991804
pub fn LLVMRustDIBuilderCreateStaticVariable(Builder: DIBuilderRef,
18001805
Context: DIScope,
18011806
Name: *const c_char,

0 commit comments

Comments
 (0)