From 94d73d4403ef98b3b38b8e3035c2eac87fb900f9 Mon Sep 17 00:00:00 2001 From: Christiaan Dirkx Date: Tue, 3 Nov 2020 19:26:31 +0100 Subject: [PATCH 01/11] Refactor `parse_prefix` on Windows Refactor `get_first_two_components` to `get_next_component`. Fixes the following behaviour of `parse_prefix`: - series of separator bytes in a prefix are correctly parsed as a single separator - device namespace prefixes correctly recognize both `\\` and `/` as separators --- library/std/src/ffi/os_str.rs | 4 +- library/std/src/path/tests.rs | 8 +- library/std/src/sys/windows/path.rs | 174 +++++++++++++--------- library/std/src/sys/windows/path/tests.rs | 39 ++++- 4 files changed, 141 insertions(+), 84 deletions(-) diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 7e7a28be2b0e5..5d93016cadb37 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -667,10 +667,10 @@ impl OsStr { /// Gets the underlying byte representation. /// - /// Note: it is *crucial* that this API is private, to avoid + /// Note: it is *crucial* that this API is not externally public, to avoid /// revealing the internal, platform-specific encodings. #[inline] - fn bytes(&self) -> &[u8] { + pub(crate) fn bytes(&self) -> &[u8] { unsafe { &*(&self.inner as *const _ as *const [u8]) } } diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index ff94fda5a227b..896d6c2a64c60 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -873,12 +873,12 @@ pub fn test_decompositions_windows() { ); t!("\\\\.\\foo/bar", - iter: ["\\\\.\\foo/bar", "\\"], + iter: ["\\\\.\\foo", "\\", "bar"], has_root: true, is_absolute: true, - parent: None, - file_name: None, - file_stem: None, + parent: Some("\\\\.\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), extension: None ); diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index dda3ed68cfc95..c10c0df4a3a99 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -8,15 +8,12 @@ mod tests; pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; -// The unsafety here stems from converting between `&OsStr` and `&[u8]` -// and back. This is safe to do because (1) we only look at ASCII -// contents of the encoding and (2) new &OsStr values are produced -// only from ASCII-bounded slices of existing &OsStr values. -fn os_str_as_u8_slice(s: &OsStr) -> &[u8] { - unsafe { mem::transmute(s) } -} -unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - mem::transmute(s) +// Safety: `bytes` must be a valid wtf8 encoded slice +#[inline] +unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { + // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, + // which is compatible with &[u8]. + mem::transmute(bytes) } #[inline] @@ -29,79 +26,116 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'\\' } -// In most DOS systems, it is not possible to have more than 26 drive letters. -// See . -pub fn is_valid_drive_letter(disk: u8) -> bool { - disk.is_ascii_alphabetic() -} - pub fn parse_prefix(path: &OsStr) -> Option> { use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; - let path = os_str_as_u8_slice(path); - - // \\ - if let Some(path) = path.strip_prefix(br"\\") { - // \\?\ - if let Some(path) = path.strip_prefix(br"?\") { - // \\?\UNC\server\share - if let Some(path) = path.strip_prefix(br"UNC\") { - let (server, share) = match get_first_two_components(path, is_verbatim_sep) { - Some((server, share)) => unsafe { - (u8_slice_as_os_str(server), u8_slice_as_os_str(share)) - }, - None => (unsafe { u8_slice_as_os_str(path) }, OsStr::new("")), - }; - return Some(VerbatimUNC(server, share)); + if let Some(path) = strip_prefix(path, r"\\") { + // \\ + if let Some(path) = strip_prefix(path, r"?\") { + // \\?\ + if let Some(path) = strip_prefix(path, r"UNC\") { + // \\?\UNC\server\share + + let (server, path) = parse_next_component(path, true); + let (share, _) = parse_next_component(path, true); + + Some(VerbatimUNC(server, share)) } else { - // \\?\path - match path { - // \\?\C:\path - [c, b':', b'\\', ..] if is_valid_drive_letter(*c) => { - return Some(VerbatimDisk(c.to_ascii_uppercase())); - } - // \\?\cat_pics - _ => { - let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len()); - let slice = &path[..idx]; - return Some(Verbatim(unsafe { u8_slice_as_os_str(slice) })); - } + let (prefix, _) = parse_next_component(path, true); + + // in verbatim paths only recognize an exact drive prefix + if let Some(drive) = parse_drive_exact(prefix) { + // \\?\C: + Some(VerbatimDisk(drive)) + } else { + // \\?\prefix + Some(Verbatim(prefix)) } } - } else if let Some(path) = path.strip_prefix(b".\\") { + } else if let Some(path) = strip_prefix(path, r".\") { // \\.\COM42 - let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len()); - let slice = &path[..idx]; - return Some(DeviceNS(unsafe { u8_slice_as_os_str(slice) })); - } - match get_first_two_components(path, is_sep_byte) { - Some((server, share)) if !server.is_empty() && !share.is_empty() => { + let (prefix, _) = parse_next_component(path, false); + Some(DeviceNS(prefix)) + } else { + let (server, path) = parse_next_component(path, false); + let (share, _) = parse_next_component(path, false); + + if !server.is_empty() && !share.is_empty() { // \\server\share - return Some(unsafe { UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)) }); + Some(UNC(server, share)) + } else { + // no valid prefix beginning with "\\" recognized + None } - _ => {} } - } else if let [c, b':', ..] = path { + } else if let Some(drive) = parse_drive(path) { // C: - if is_valid_drive_letter(*c) { - return Some(Disk(c.to_ascii_uppercase())); - } + Some(Disk(drive)) + } else { + // no prefix + None } - None } -/// Returns the first two path components with predicate `f`. -/// -/// The two components returned will be use by caller -/// to construct `VerbatimUNC` or `UNC` Windows path prefix. -/// -/// Returns [`None`] if there are no separators in path. -fn get_first_two_components(path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> { - let idx = path.iter().position(|&x| f(x))?; - // Panic safe - // The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index. - let (first, path) = (&path[..idx], &path[idx + 1..]); - let idx = path.iter().position(|&x| f(x)).unwrap_or(path.len()); - let second = &path[..idx]; - Some((first, second)) +// Parses a drive prefix, e.g. "C:" and "C:\whatever" +fn parse_drive(prefix: &OsStr) -> Option { + // In most DOS systems, it is not possible to have more than 26 drive letters. + // See . + fn is_valid_drive_letter(drive: &u8) -> bool { + drive.is_ascii_alphabetic() + } + + match prefix.bytes() { + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), + _ => None, + } +} + +// Parses a drive prefix exactly, e.g. "C:" +fn parse_drive_exact(prefix: &OsStr) -> Option { + // only parse two bytes: the drive letter and the drive separator + if prefix.len() == 2 { parse_drive(prefix) } else { None } +} + +fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> { + // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]` + // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice. + match path.bytes().strip_prefix(prefix.as_bytes()) { + Some(path) => unsafe { Some(bytes_as_os_str(path)) }, + None => None, + } +} + +// Parse the next path component. +// +// Returns the next component and the rest of the path excluding the component and separator. +// Does not recognize `/` as a separator character if `verbatim` is true. +fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; + + match path.bytes().iter().position(|&x| separator(x)) { + Some(separator_start) => { + let mut separator_end = separator_start + 1; + + // a series of multiple separator characters is treated as a single separator, + // except in verbatim paths + while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end]) + { + separator_end += 1; + } + + let component = &path.bytes()[..separator_start]; + + // Panic safe + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. + let path = &path.bytes()[separator_end..]; + + // Safety: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') + // is encoded in a single byte, therefore `bytes[separator_start]` and + // `bytes[separator_end]` must be code point boundaries and thus + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. + unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } + } + None => (path, OsStr::new("")), + } } diff --git a/library/std/src/sys/windows/path/tests.rs b/library/std/src/sys/windows/path/tests.rs index fbac1dc1ca17a..9675da6ff883b 100644 --- a/library/std/src/sys/windows/path/tests.rs +++ b/library/std/src/sys/windows/path/tests.rs @@ -1,21 +1,44 @@ use super::*; #[test] -fn test_get_first_two_components() { +fn test_parse_next_component() { assert_eq!( - get_first_two_components(br"server\share", is_verbatim_sep), - Some((&b"server"[..], &b"share"[..])), + parse_next_component(OsStr::new(r"server\share"), true), + (OsStr::new(r"server"), OsStr::new(r"share")) ); assert_eq!( - get_first_two_components(br"server\", is_verbatim_sep), - Some((&b"server"[..], &b""[..])) + parse_next_component(OsStr::new(r"server/share"), true), + (OsStr::new(r"server/share"), OsStr::new(r"")) ); assert_eq!( - get_first_two_components(br"\server\", is_verbatim_sep), - Some((&b""[..], &b"server"[..])) + parse_next_component(OsStr::new(r"server/share"), false), + (OsStr::new(r"server"), OsStr::new(r"share")) ); - assert_eq!(get_first_two_components(br"there are no separators here", is_verbatim_sep), None,); + assert_eq!( + parse_next_component(OsStr::new(r"server\"), false), + (OsStr::new(r"server"), OsStr::new(r"")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"\server\"), false), + (OsStr::new(r""), OsStr::new(r"server\")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"servershare"), false), + (OsStr::new(r"servershare"), OsStr::new("")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"server/\//\/\\\\/////\/share"), false), + (OsStr::new(r"server"), OsStr::new(r"share")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"server\\\\\\\\\\\\\\share"), true), + (OsStr::new(r"server"), OsStr::new(r"\\\\\\\\\\\\\share")) + ); } From 9cf2516251c5379d3e8429342e8a392df81b9aaa Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Thu, 10 Dec 2020 17:47:28 -0500 Subject: [PATCH 02/11] doc(array,vec): add notes about side effects when empty-initializing --- library/alloc/src/macros.rs | 3 +++ library/std/src/primitive_docs.rs | 3 +++ 2 files changed, 6 insertions(+) diff --git a/library/alloc/src/macros.rs b/library/alloc/src/macros.rs index a992d768d6314..bbff1d3dc4d2e 100644 --- a/library/alloc/src/macros.rs +++ b/library/alloc/src/macros.rs @@ -29,6 +29,9 @@ /// to the same boxed integer value, not five references pointing to independently /// boxed integers. /// +/// Also, note that `[T; 0]` is a valid initializer. This will initialize (or call) +/// `T` but not populate the vector with it, so be mindful of side effects. +/// /// [`Vec`]: crate::vec::Vec #[cfg(not(test))] #[macro_export] diff --git a/library/std/src/primitive_docs.rs b/library/std/src/primitive_docs.rs index 55171ef2292d7..b22adbbe3d356 100644 --- a/library/std/src/primitive_docs.rs +++ b/library/std/src/primitive_docs.rs @@ -489,6 +489,9 @@ mod prim_pointer {} /// * A repeat expression `[x; N]`, which produces an array with `N` copies of `x`. /// The type of `x` must be [`Copy`]. /// +/// Note that `[x; 0]` is a valid repeat expression. This will produce an empty array +/// but will also initialize (or call) `x`, which may produce side effects. +/// /// Arrays of *any* size implement the following traits if the element type allows it: /// /// - [`Copy`] From d986924eb13a103fc79c474eded47a663c91bb7f Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Fri, 11 Dec 2020 10:09:40 -0500 Subject: [PATCH 03/11] doc: apply suggestions --- library/alloc/src/macros.rs | 5 +++-- library/std/src/primitive_docs.rs | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/library/alloc/src/macros.rs b/library/alloc/src/macros.rs index bbff1d3dc4d2e..7d4eff6185dbe 100644 --- a/library/alloc/src/macros.rs +++ b/library/alloc/src/macros.rs @@ -29,8 +29,9 @@ /// to the same boxed integer value, not five references pointing to independently /// boxed integers. /// -/// Also, note that `[T; 0]` is a valid initializer. This will initialize (or call) -/// `T` but not populate the vector with it, so be mindful of side effects. +/// Also, note that `vec![expr; 0]` is allowed, and produces an empty vector. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. /// /// [`Vec`]: crate::vec::Vec #[cfg(not(test))] diff --git a/library/std/src/primitive_docs.rs b/library/std/src/primitive_docs.rs index b22adbbe3d356..7aca5451ebc20 100644 --- a/library/std/src/primitive_docs.rs +++ b/library/std/src/primitive_docs.rs @@ -489,8 +489,9 @@ mod prim_pointer {} /// * A repeat expression `[x; N]`, which produces an array with `N` copies of `x`. /// The type of `x` must be [`Copy`]. /// -/// Note that `[x; 0]` is a valid repeat expression. This will produce an empty array -/// but will also initialize (or call) `x`, which may produce side effects. +/// Note that `[expr; 0]` is allowed, and produces an empty array. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. /// /// Arrays of *any* size implement the following traits if the element type allows it: /// From 201a833eef3c3cf8beeab0461e3ffc772b07c83e Mon Sep 17 00:00:00 2001 From: Arlie Davis Date: Fri, 11 Dec 2020 11:32:48 -0800 Subject: [PATCH 04/11] Improve error handling in `symbols` proc-macro This improves how the `symbols` proc-macro handles errors. If it finds an error in its input, the macro does not panic. Instead, it still produces an output token stream. That token stream will contain `compile_error!(...)` macro invocations. This will still cause compilation to fail (which is what we want), but it will prevent meaningless errors caused by the output not containing symbols that the macro normally generates. This solves a small (but annoying) problem. When you're editing rustc_span/src/symbol.rs, and you get something wrong (dup symbol name, misordered symbol), you want to get only the errors that are relevant, not a burst of errors that are irrelevant. This change also uses the correct Span when reporting errors, so you get errors that point to the correct place in rustc_span/src/symbol.rs where something is wrong. This also adds several unit tests which test the `symbols` proc-macro. This commit also makes it easy to run the `symbols` proc-macro as an ordinary Cargo test. Just run `cargo test`. This makes it easier to do development on the macro itself, such as running it under a debugger. This commit also uses the `Punctuated` type in `syn` for parsing comma-separated lists, rather than doing it manually. The output of the macro is not changed at all by this commit, so rustc should be completely unchanged. This just improves quality of life during development. --- compiler/rustc_macros/src/lib.rs | 2 +- compiler/rustc_macros/src/symbols.rs | 154 ++++++++++++++------- compiler/rustc_macros/src/symbols/tests.rs | 111 +++++++++++++++ 3 files changed, 213 insertions(+), 54 deletions(-) create mode 100644 compiler/rustc_macros/src/symbols/tests.rs diff --git a/compiler/rustc_macros/src/lib.rs b/compiler/rustc_macros/src/lib.rs index 5c28839c9b7e4..152ae159aed44 100644 --- a/compiler/rustc_macros/src/lib.rs +++ b/compiler/rustc_macros/src/lib.rs @@ -21,7 +21,7 @@ pub fn rustc_queries(input: TokenStream) -> TokenStream { #[proc_macro] pub fn symbols(input: TokenStream) -> TokenStream { - symbols::symbols(input) + symbols::symbols(input.into()).into() } decl_derive!([HashStable, attributes(stable_hasher)] => hash_stable::hash_stable_derive); diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 94d4ad78e8d90..f449900d5c245 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -1,8 +1,35 @@ -use proc_macro::TokenStream; +//! Proc macro which builds the Symbol table +//! +//! # Debugging +//! +//! Since this proc-macro does some non-trivial work, debugging it is important. +//! This proc-macro can be invoked as an ordinary unit test, like so: +//! +//! ```bash +//! cd compiler/rustc_macros +//! cargo test symbols::test_symbols -- --nocapture +//! ``` +//! +//! This unit test finds the `symbols!` invocation in `compiler/rustc_span/src/symbol.rs` +//! and runs it. It verifies that the output token stream can be parsed as valid module +//! items and that no errors were produced. +//! +//! You can also view the generated code by using `cargo expand`: +//! +//! ```bash +//! cargo install cargo-expand # this is necessary only once +//! cd compiler/rustc_span +//! cargo expand > /tmp/rustc_span.rs # it's a big file +//! ``` + +use proc_macro2::{Span, TokenStream}; use quote::quote; -use std::collections::HashSet; +use std::collections::HashMap; use syn::parse::{Parse, ParseStream, Result}; -use syn::{braced, parse_macro_input, Ident, LitStr, Token}; +use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token}; + +#[cfg(test)] +mod tests; mod kw { syn::custom_keyword!(Keywords); @@ -19,7 +46,6 @@ impl Parse for Keyword { let name = input.parse()?; input.parse::()?; let value = input.parse()?; - input.parse::()?; Ok(Keyword { name, value }) } @@ -37,28 +63,14 @@ impl Parse for Symbol { Ok(_) => Some(input.parse()?), Err(_) => None, }; - input.parse::()?; Ok(Symbol { name, value }) } } -/// A type used to greedily parse another type until the input is empty. -struct List(Vec); - -impl Parse for List { - fn parse(input: ParseStream<'_>) -> Result { - let mut list = Vec::new(); - while !input.is_empty() { - list.push(input.parse()?); - } - Ok(List(list)) - } -} - struct Input { - keywords: List, - symbols: List, + keywords: Punctuated, + symbols: Punctuated, } impl Parse for Input { @@ -66,49 +78,86 @@ impl Parse for Input { input.parse::()?; let content; braced!(content in input); - let keywords = content.parse()?; + let keywords = Punctuated::parse_terminated(&content)?; input.parse::()?; let content; braced!(content in input); - let symbols = content.parse()?; + let symbols = Punctuated::parse_terminated(&content)?; Ok(Input { keywords, symbols }) } } +#[derive(Default)] +struct Errors { + list: Vec, +} + +impl Errors { + fn error(&mut self, span: Span, message: String) { + self.list.push(syn::Error::new(span, message)); + } +} + pub fn symbols(input: TokenStream) -> TokenStream { - let input = parse_macro_input!(input as Input); + let (mut output, errors) = symbols_with_errors(input); + + // If we generated any errors, then report them as compiler_error!() macro calls. + // This lets the errors point back to the most relevant span. It also allows us + // to report as many errors as we can during a single run. + output.extend(errors.into_iter().map(|e| e.to_compile_error())); + + output +} + +fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { + let mut errors = Errors::default(); + + let input: Input = match syn::parse2(input) { + Ok(input) => input, + Err(e) => { + // This allows us to display errors at the proper span, while minimizing + // unrelated errors caused by bailing out (and not generating code). + errors.list.push(e); + Input { keywords: Default::default(), symbols: Default::default() } + } + }; let mut keyword_stream = quote! {}; let mut symbols_stream = quote! {}; let mut digits_stream = quote! {}; let mut prefill_stream = quote! {}; let mut counter = 0u32; - let mut keys = HashSet::::new(); - let mut prev_key: Option = None; - let mut errors = Vec::::new(); - - let mut check_dup = |str: &str, errors: &mut Vec| { - if !keys.insert(str.to_string()) { - errors.push(format!("Symbol `{}` is duplicated", str)); + let mut keys = + HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); + let mut prev_key: Option<(Span, String)> = None; + + let mut check_dup = |span: Span, str: &str, errors: &mut Errors| { + if let Some(prev_span) = keys.get(str) { + errors.error(span, format!("Symbol `{}` is duplicated", str)); + errors.error(*prev_span, format!("location of previous definition")); + } else { + keys.insert(str.to_string(), span); } }; - let mut check_order = |str: &str, errors: &mut Vec| { - if let Some(ref prev_str) = prev_key { + let mut check_order = |span: Span, str: &str, errors: &mut Errors| { + if let Some((prev_span, ref prev_str)) = prev_key { if str < prev_str { - errors.push(format!("Symbol `{}` must precede `{}`", str, prev_str)); + errors.error(span, format!("Symbol `{}` must precede `{}`", str, prev_str)); + errors.error(prev_span, format!("location of previous symbol `{}`", prev_str)); } } - prev_key = Some(str.to_string()); + prev_key = Some((span, str.to_string())); }; // Generate the listed keywords. - for keyword in &input.keywords.0 { + for keyword in input.keywords.iter() { let name = &keyword.name; let value = &keyword.value; - check_dup(&value.value(), &mut errors); + let value_string = value.value(); + check_dup(keyword.name.span(), &value_string, &mut errors); prefill_stream.extend(quote! { #value, }); @@ -120,14 +169,15 @@ pub fn symbols(input: TokenStream) -> TokenStream { } // Generate the listed symbols. - for symbol in &input.symbols.0 { + for symbol in input.symbols.iter() { let name = &symbol.name; let value = match &symbol.value { Some(value) => value.value(), None => name.to_string(), }; - check_dup(&value, &mut errors); - check_order(&name.to_string(), &mut errors); + check_dup(symbol.name.span(), &value, &mut errors); + check_order(symbol.name.span(), &name.to_string(), &mut errors); + prefill_stream.extend(quote! { #value, }); @@ -142,7 +192,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { // Generate symbols for the strings "0", "1", ..., "9". for n in 0..10 { let n = n.to_string(); - check_dup(&n, &mut errors); + check_dup(Span::call_site(), &n, &mut errors); prefill_stream.extend(quote! { #n, }); @@ -152,14 +202,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { counter += 1; } - if !errors.is_empty() { - for error in errors.into_iter() { - eprintln!("error: {}", error) - } - panic!("errors in `Keywords` and/or `Symbols`"); - } - - let tt = TokenStream::from(quote! { + let output = quote! { macro_rules! keywords { () => { #keyword_stream @@ -184,11 +227,16 @@ pub fn symbols(input: TokenStream) -> TokenStream { ]) } } - }); + }; - // To see the generated code generated, uncomment this line, recompile, and - // run the resulting output through `rustfmt`. - //eprintln!("{}", tt); + (output, errors.list) - tt + // To see the generated code, use the "cargo expand" command. + // Do this once to install: + // cargo install cargo-expand + // + // Then, cd to rustc_span and run: + // cargo expand > /tmp/rustc_span_expanded.rs + // + // and read that file. } diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs new file mode 100644 index 0000000000000..90a28b518020e --- /dev/null +++ b/compiler/rustc_macros/src/symbols/tests.rs @@ -0,0 +1,111 @@ +use super::*; + +// This test is mainly here for interactive development. Use this test while +// you're working on the proc-macro defined in this file. +#[test] +fn test_symbols() { + // We textually include the symbol.rs file, which contains the list of all + // symbols, keywords, and common words. Then we search for the + // `symbols! { ... }` call. + + static SYMBOL_RS_FILE: &str = include_str!("../../../rustc_span/src/symbol.rs"); + + let file = syn::parse_file(SYMBOL_RS_FILE).unwrap(); + let symbols_path: syn::Path = syn::parse_quote!(symbols); + + let m: &syn::ItemMacro = file + .items + .iter() + .filter_map(|i| { + if let syn::Item::Macro(m) = i { + if m.mac.path == symbols_path { Some(m) } else { None } + } else { + None + } + }) + .next() + .expect("did not find `symbols!` macro invocation."); + + let body_tokens = m.mac.tokens.clone(); + + test_symbols_macro(body_tokens, &[]); +} + +fn test_symbols_macro(input: TokenStream, expected_errors: &[&str]) { + let (output, found_errors) = symbols_with_errors(input); + + // It should always parse. + let _parsed_file = syn::parse2::(output).unwrap(); + + assert_eq!( + found_errors.len(), + expected_errors.len(), + "Macro generated a different number of errors than expected" + ); + + for (found_error, &expected_error) in found_errors.iter().zip(expected_errors.iter()) { + let found_error_str = format!("{}", found_error); + assert_eq!(found_error_str, expected_error); + } +} + +#[test] +fn check_dup_keywords() { + let input = quote! { + Keywords { + Crate: "crate", + Crate: "crate", + } + Symbols {} + }; + test_symbols_macro( + input, + &["Symbol `crate` is duplicated", "location of previous definition"], + ); +} + +#[test] +fn check_dup_symbol() { + let input = quote! { + Keywords {} + Symbols { + splat, + splat, + } + }; + test_symbols_macro( + input, + &["Symbol `splat` is duplicated", "location of previous definition"], + ); +} + +#[test] +fn check_dup_symbol_and_keyword() { + let input = quote! { + Keywords { + Splat: "splat", + } + Symbols { + splat, + } + }; + test_symbols_macro( + input, + &["Symbol `splat` is duplicated", "location of previous definition"], + ); +} + +#[test] +fn check_symbol_order() { + let input = quote! { + Keywords {} + Symbols { + zebra, + aardvark, + } + }; + test_symbols_macro( + input, + &["Symbol `aardvark` must precede `zebra`", "location of previous symbol `zebra`"], + ); +} From 0f30b7dd87b8555698ca7e7de360b037230e1f23 Mon Sep 17 00:00:00 2001 From: Justus K Date: Sun, 13 Dec 2020 10:02:36 +0100 Subject: [PATCH 05/11] fix panic if converting ZST Vec to VecDeque --- library/alloc/src/collections/vec_deque/mod.rs | 8 ++++++-- library/alloc/tests/vec_deque.rs | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs index 57807bc545390..1303b58e31c98 100644 --- a/library/alloc/src/collections/vec_deque/mod.rs +++ b/library/alloc/src/collections/vec_deque/mod.rs @@ -2793,8 +2793,12 @@ impl From> for VecDeque { let len = other.len(); // We need to extend the buf if it's not a power of two, too small - // or doesn't have at least one free space - if !buf.capacity().is_power_of_two() + // or doesn't have at least one free space. + // We check if `T` is a ZST in the first condition, + // because `usize::MAX` (the capacity returned by `capacity()` for ZST) + // is not a power of zero and thus it'll always try + // to reserve more memory which will panic for ZST (rust-lang/rust#78532) + if (!buf.capacity().is_power_of_two() && mem::size_of::() != 0) || (buf.capacity() < (MINIMUM_CAPACITY + 1)) || (buf.capacity() == len) { diff --git a/library/alloc/tests/vec_deque.rs b/library/alloc/tests/vec_deque.rs index 705f0d62fbb7a..a962a5494a9a4 100644 --- a/library/alloc/tests/vec_deque.rs +++ b/library/alloc/tests/vec_deque.rs @@ -1728,3 +1728,10 @@ fn test_zero_sized_push() { } } } + +#[test] +fn test_from_zero_sized_vec() { + let v = vec![(); 100]; + let queue = VecDeque::from(v); + assert!(queue.len(), 100); +} From d75618e7a2517d4b39c4e50ff1644f595a86e92b Mon Sep 17 00:00:00 2001 From: Justus K Date: Sun, 13 Dec 2020 10:21:24 +0100 Subject: [PATCH 06/11] replace assert! with assert_eq! --- library/alloc/tests/vec_deque.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alloc/tests/vec_deque.rs b/library/alloc/tests/vec_deque.rs index a962a5494a9a4..0919b1325bceb 100644 --- a/library/alloc/tests/vec_deque.rs +++ b/library/alloc/tests/vec_deque.rs @@ -1733,5 +1733,5 @@ fn test_zero_sized_push() { fn test_from_zero_sized_vec() { let v = vec![(); 100]; let queue = VecDeque::from(v); - assert!(queue.len(), 100); + assert_eq!(queue.len(), 100); } From 94fd1d325c2f36b12b8d62d8a7b06a431375e55e Mon Sep 17 00:00:00 2001 From: Stein Somers Date: Mon, 23 Nov 2020 14:41:53 +0100 Subject: [PATCH 07/11] BTreeMap: more expressive local variables in merge --- library/alloc/src/collections/btree/node.rs | 55 ++++++++++----------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/library/alloc/src/collections/btree/node.rs b/library/alloc/src/collections/btree/node.rs index ae5831d514067..a8fffb389efa8 100644 --- a/library/alloc/src/collections/btree/node.rs +++ b/library/alloc/src/collections/btree/node.rs @@ -1352,66 +1352,65 @@ impl<'a, K: 'a, V: 'a> BalancingContext<'a, K, V> { /// /// Panics unless we `.can_merge()`. pub fn merge( - mut self, + self, track_edge_idx: Option>, ) -> Handle, K, V, marker::LeafOrInternal>, marker::Edge> { + let Handle { node: mut parent_node, idx: parent_idx, _marker } = self.parent; + let old_parent_len = parent_node.len(); let mut left_node = self.left_child; - let left_len = left_node.len(); + let old_left_len = left_node.len(); let right_node = self.right_child; let right_len = right_node.len(); + let new_left_len = old_left_len + 1 + right_len; - assert!(left_len + right_len < CAPACITY); + assert!(new_left_len <= CAPACITY); assert!(match track_edge_idx { None => true, - Some(LeftOrRight::Left(idx)) => idx <= left_len, + Some(LeftOrRight::Left(idx)) => idx <= old_left_len, Some(LeftOrRight::Right(idx)) => idx <= right_len, }); unsafe { - *left_node.reborrow_mut().into_len_mut() += right_len as u16 + 1; + *left_node.reborrow_mut().into_len_mut() = new_left_len as u16; - let parent_key = slice_remove( - self.parent.node.reborrow_mut().into_key_area_slice(), - self.parent.idx, - ); - left_node.reborrow_mut().into_key_area_mut_at(left_len).write(parent_key); + let parent_key = + slice_remove(parent_node.reborrow_mut().into_key_area_slice(), parent_idx); + left_node.reborrow_mut().into_key_area_mut_at(old_left_len).write(parent_key); ptr::copy_nonoverlapping( right_node.reborrow().key_area().as_ptr(), - left_node.reborrow_mut().into_key_area_slice().as_mut_ptr().add(left_len + 1), + left_node.reborrow_mut().into_key_area_slice().as_mut_ptr().add(old_left_len + 1), right_len, ); - let parent_val = slice_remove( - self.parent.node.reborrow_mut().into_val_area_slice(), - self.parent.idx, - ); - left_node.reborrow_mut().into_val_area_mut_at(left_len).write(parent_val); + let parent_val = + slice_remove(parent_node.reborrow_mut().into_val_area_slice(), parent_idx); + left_node.reborrow_mut().into_val_area_mut_at(old_left_len).write(parent_val); ptr::copy_nonoverlapping( right_node.reborrow().val_area().as_ptr(), - left_node.reborrow_mut().into_val_area_slice().as_mut_ptr().add(left_len + 1), + left_node.reborrow_mut().into_val_area_slice().as_mut_ptr().add(old_left_len + 1), right_len, ); - slice_remove( - &mut self.parent.node.reborrow_mut().into_edge_area_slice(), - self.parent.idx + 1, - ); - let parent_old_len = self.parent.node.len(); - self.parent.node.correct_childrens_parent_links(self.parent.idx + 1..parent_old_len); - *self.parent.node.reborrow_mut().into_len_mut() -= 1; + slice_remove(&mut parent_node.reborrow_mut().into_edge_area_slice(), parent_idx + 1); + parent_node.correct_childrens_parent_links(parent_idx + 1..old_parent_len); + *parent_node.reborrow_mut().into_len_mut() -= 1; - if self.parent.node.height > 1 { + if parent_node.height > 1 { // SAFETY: the height of the nodes being merged is one below the height // of the node of this edge, thus above zero, so they are internal. let mut left_node = left_node.reborrow_mut().cast_to_internal_unchecked(); let right_node = right_node.cast_to_internal_unchecked(); ptr::copy_nonoverlapping( right_node.reborrow().edge_area().as_ptr(), - left_node.reborrow_mut().into_edge_area_slice().as_mut_ptr().add(left_len + 1), + left_node + .reborrow_mut() + .into_edge_area_slice() + .as_mut_ptr() + .add(old_left_len + 1), right_len + 1, ); - left_node.correct_childrens_parent_links(left_len + 1..=left_len + 1 + right_len); + left_node.correct_childrens_parent_links(old_left_len + 1..new_left_len + 1); Global.deallocate(right_node.node.cast(), Layout::new::>()); } else { @@ -1421,7 +1420,7 @@ impl<'a, K: 'a, V: 'a> BalancingContext<'a, K, V> { let new_idx = match track_edge_idx { None => 0, Some(LeftOrRight::Left(idx)) => idx, - Some(LeftOrRight::Right(idx)) => left_len + 1 + idx, + Some(LeftOrRight::Right(idx)) => old_left_len + 1 + idx, }; Handle::new_edge(left_node, new_idx) } From 09d528ec155f77349001fa7eb6c3ae3363f41412 Mon Sep 17 00:00:00 2001 From: Justus K Date: Sun, 13 Dec 2020 15:18:38 +0100 Subject: [PATCH 08/11] fix typo --- library/alloc/src/collections/vec_deque/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs index 1303b58e31c98..9e54c15ea6a0b 100644 --- a/library/alloc/src/collections/vec_deque/mod.rs +++ b/library/alloc/src/collections/vec_deque/mod.rs @@ -2796,7 +2796,7 @@ impl From> for VecDeque { // or doesn't have at least one free space. // We check if `T` is a ZST in the first condition, // because `usize::MAX` (the capacity returned by `capacity()` for ZST) - // is not a power of zero and thus it'll always try + // is not a power of two and thus it'll always try // to reserve more memory which will panic for ZST (rust-lang/rust#78532) if (!buf.capacity().is_power_of_two() && mem::size_of::() != 0) || (buf.capacity() < (MINIMUM_CAPACITY + 1)) From ec0f1d70c955643a77e89d67eb2469ff5d7d6eba Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Sun, 13 Dec 2020 17:47:46 +0100 Subject: [PATCH 09/11] Refactor test_lang_string_parse to make it clearer --- src/librustdoc/html/markdown/tests.rs | 143 +++++++++++++------------- 1 file changed, 69 insertions(+), 74 deletions(-) diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index 9807d8632c75d..75ff3c5af2fd2 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -51,82 +51,77 @@ fn test_unique_id() { #[test] fn test_lang_string_parse() { - fn t( - s: &str, - should_panic: bool, - no_run: bool, - ignore: Ignore, - rust: bool, - test_harness: bool, - compile_fail: bool, - allow_fail: bool, - error_codes: Vec, - edition: Option, - ) { - assert_eq!( - LangString::parse(s, ErrorCodes::Yes, true, None), - LangString { - should_panic, - no_run, - ignore, - rust, - test_harness, - compile_fail, - error_codes, - original: s.to_owned(), - allow_fail, - edition, - } - ) + fn t(lg: LangString) { + let s = &lg.original; + assert_eq!(LangString::parse(s, ErrorCodes::Yes, true, None), lg) } - let ignore_foo = Ignore::Some(vec!["foo".to_string()]); - fn v() -> Vec { - Vec::new() - } - - // marker | should_panic | no_run | ignore | rust | test_harness - // | compile_fail | allow_fail | error_codes | edition - t("", false, false, Ignore::None, true, false, false, false, v(), None); - t("rust", false, false, Ignore::None, true, false, false, false, v(), None); - t("sh", false, false, Ignore::None, false, false, false, false, v(), None); - t("ignore", false, false, Ignore::All, true, false, false, false, v(), None); - t("ignore-foo", false, false, ignore_foo, true, false, false, false, v(), None); - t("should_panic", true, false, Ignore::None, true, false, false, false, v(), None); - t("no_run", false, true, Ignore::None, true, false, false, false, v(), None); - t("test_harness", false, false, Ignore::None, true, true, false, false, v(), None); - t("compile_fail", false, true, Ignore::None, true, false, true, false, v(), None); - t("allow_fail", false, false, Ignore::None, true, false, false, true, v(), None); - t("{.no_run .example}", false, true, Ignore::None, true, false, false, false, v(), None); - t("{.sh .should_panic}", true, false, Ignore::None, false, false, false, false, v(), None); - t("{.example .rust}", false, false, Ignore::None, true, false, false, false, v(), None); - t("{.test_harness .rust}", false, false, Ignore::None, true, true, false, false, v(), None); - t("text, no_run", false, true, Ignore::None, false, false, false, false, v(), None); - t("text,no_run", false, true, Ignore::None, false, false, false, false, v(), None); - t( - "edition2015", - false, - false, - Ignore::None, - true, - false, - false, - false, - v(), - Some(Edition::Edition2015), - ); - t( - "edition2018", - false, - false, - Ignore::None, - true, - false, - false, - false, - v(), - Some(Edition::Edition2018), - ); + t(LangString::all_false()); + t(LangString { original: "rust".into(), ..LangString::all_false() }); + t(LangString { original: "sh".into(), rust: false, ..LangString::all_false() }); + t(LangString { original: "ignore".into(), ignore: Ignore::All, ..LangString::all_false() }); + t(LangString { + original: "ignore-foo".into(), + ignore: Ignore::Some(vec!["foo".to_string()]), + ..LangString::all_false() + }); + t(LangString { + original: "should_panic".into(), + should_panic: true, + ..LangString::all_false() + }); + t(LangString { original: "no_run".into(), no_run: true, ..LangString::all_false() }); + t(LangString { + original: "test_harness".into(), + test_harness: true, + ..LangString::all_false() + }); + t(LangString { + original: "compile_fail".into(), + no_run: true, + compile_fail: true, + ..LangString::all_false() + }); + t(LangString { original: "allow_fail".into(), allow_fail: true, ..LangString::all_false() }); + t(LangString { + original: "{.no_run .example}".into(), + no_run: true, + ..LangString::all_false() + }); + t(LangString { + original: "{.sh .should_panic}".into(), + should_panic: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { original: "{.example .rust}".into(), ..LangString::all_false() }); + t(LangString { + original: "{.test_harness .rust}".into(), + test_harness: true, + ..LangString::all_false() + }); + t(LangString { + original: "text, no_run".into(), + no_run: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { + original: "text,no_run".into(), + no_run: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { + original: "edition2015".into(), + edition: Some(Edition::Edition2015), + ..LangString::all_false() + }); + t(LangString { + original: "edition2018".into(), + edition: Some(Edition::Edition2018), + ..LangString::all_false() + }); } #[test] From 1a5b9b037e0fb8cf204e44e604ff4ad09612b8b6 Mon Sep 17 00:00:00 2001 From: Arlie Davis Date: Sun, 13 Dec 2020 13:36:01 -0800 Subject: [PATCH 10/11] ./x.py fmt --- compiler/rustc_macros/src/symbols/tests.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs index 90a28b518020e..82b4b876978f2 100644 --- a/compiler/rustc_macros/src/symbols/tests.rs +++ b/compiler/rustc_macros/src/symbols/tests.rs @@ -58,10 +58,7 @@ fn check_dup_keywords() { } Symbols {} }; - test_symbols_macro( - input, - &["Symbol `crate` is duplicated", "location of previous definition"], - ); + test_symbols_macro(input, &["Symbol `crate` is duplicated", "location of previous definition"]); } #[test] @@ -73,10 +70,7 @@ fn check_dup_symbol() { splat, } }; - test_symbols_macro( - input, - &["Symbol `splat` is duplicated", "location of previous definition"], - ); + test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); } #[test] @@ -89,10 +83,7 @@ fn check_dup_symbol_and_keyword() { splat, } }; - test_symbols_macro( - input, - &["Symbol `splat` is duplicated", "location of previous definition"], - ); + test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); } #[test] From adda964bb5229a7e7fa21d28c77f7ad71afb9b15 Mon Sep 17 00:00:00 2001 From: Yuki Okushi Date: Sat, 12 Dec 2020 15:15:06 +0900 Subject: [PATCH 11/11] Check the number of entries in UI test --- src/tools/tidy/src/ui_tests.rs | 44 +++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/tools/tidy/src/ui_tests.rs b/src/tools/tidy/src/ui_tests.rs index 03f4efea983bb..72ffdabd5222f 100644 --- a/src/tools/tidy/src/ui_tests.rs +++ b/src/tools/tidy/src/ui_tests.rs @@ -1,9 +1,51 @@ -//! Tidy check to ensure that there are no stray `.stderr` files in UI test directories. +//! Tidy check to ensure below in UI test directories: +//! - the number of entries in each directory must be less than `ENTRY_LIMIT` +//! - there are no stray `.stderr` files use std::fs; use std::path::Path; +const ENTRY_LIMIT: usize = 1000; +// FIXME: The following limits should be reduced eventually. +const ROOT_ENTRY_LIMIT: usize = 1580; +const ISSUES_ENTRY_LIMIT: usize = 2830; + +fn check_entries(path: &Path, bad: &mut bool) { + let dirs = walkdir::WalkDir::new(&path.join("test/ui")) + .into_iter() + .filter_entry(|e| e.file_type().is_dir()); + for dir in dirs { + if let Ok(dir) = dir { + let dir_path = dir.path(); + + // Use special values for these dirs. + let is_root = path.join("test/ui") == dir_path; + let is_issues_dir = path.join("test/ui/issues") == dir_path; + let limit = if is_root { + ROOT_ENTRY_LIMIT + } else if is_issues_dir { + ISSUES_ENTRY_LIMIT + } else { + ENTRY_LIMIT + }; + + let count = std::fs::read_dir(dir_path).unwrap().count(); + if count >= limit { + tidy_error!( + bad, + "following path contains more than {} entries, \ + you should move the test to some relevant subdirectory (current: {}): {}", + limit, + count, + dir_path.display() + ); + } + } + } +} + pub fn check(path: &Path, bad: &mut bool) { + check_entries(&path, bad); for path in &[&path.join("test/ui"), &path.join("test/ui-fulldeps")] { super::walk_no_read(path, &mut |_| false, &mut |entry| { let file_path = entry.path();