diff --git a/compiler/rustc_macros/src/lib.rs b/compiler/rustc_macros/src/lib.rs index 5c28839c9b7e4..152ae159aed44 100644 --- a/compiler/rustc_macros/src/lib.rs +++ b/compiler/rustc_macros/src/lib.rs @@ -21,7 +21,7 @@ pub fn rustc_queries(input: TokenStream) -> TokenStream { #[proc_macro] pub fn symbols(input: TokenStream) -> TokenStream { - symbols::symbols(input) + symbols::symbols(input.into()).into() } decl_derive!([HashStable, attributes(stable_hasher)] => hash_stable::hash_stable_derive); diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 94d4ad78e8d90..f449900d5c245 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -1,8 +1,35 @@ -use proc_macro::TokenStream; +//! Proc macro which builds the Symbol table +//! +//! # Debugging +//! +//! Since this proc-macro does some non-trivial work, debugging it is important. +//! This proc-macro can be invoked as an ordinary unit test, like so: +//! +//! ```bash +//! cd compiler/rustc_macros +//! cargo test symbols::test_symbols -- --nocapture +//! ``` +//! +//! This unit test finds the `symbols!` invocation in `compiler/rustc_span/src/symbol.rs` +//! and runs it. It verifies that the output token stream can be parsed as valid module +//! items and that no errors were produced. +//! +//! You can also view the generated code by using `cargo expand`: +//! +//! ```bash +//! cargo install cargo-expand # this is necessary only once +//! cd compiler/rustc_span +//! cargo expand > /tmp/rustc_span.rs # it's a big file +//! ``` + +use proc_macro2::{Span, TokenStream}; use quote::quote; -use std::collections::HashSet; +use std::collections::HashMap; use syn::parse::{Parse, ParseStream, Result}; -use syn::{braced, parse_macro_input, Ident, LitStr, Token}; +use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token}; + +#[cfg(test)] +mod tests; mod kw { syn::custom_keyword!(Keywords); @@ -19,7 +46,6 @@ impl Parse for Keyword { let name = input.parse()?; input.parse::()?; let value = input.parse()?; - input.parse::()?; Ok(Keyword { name, value }) } @@ -37,28 +63,14 @@ impl Parse for Symbol { Ok(_) => Some(input.parse()?), Err(_) => None, }; - input.parse::()?; Ok(Symbol { name, value }) } } -/// A type used to greedily parse another type until the input is empty. -struct List(Vec); - -impl Parse for List { - fn parse(input: ParseStream<'_>) -> Result { - let mut list = Vec::new(); - while !input.is_empty() { - list.push(input.parse()?); - } - Ok(List(list)) - } -} - struct Input { - keywords: List, - symbols: List, + keywords: Punctuated, + symbols: Punctuated, } impl Parse for Input { @@ -66,49 +78,86 @@ impl Parse for Input { input.parse::()?; let content; braced!(content in input); - let keywords = content.parse()?; + let keywords = Punctuated::parse_terminated(&content)?; input.parse::()?; let content; braced!(content in input); - let symbols = content.parse()?; + let symbols = Punctuated::parse_terminated(&content)?; Ok(Input { keywords, symbols }) } } +#[derive(Default)] +struct Errors { + list: Vec, +} + +impl Errors { + fn error(&mut self, span: Span, message: String) { + self.list.push(syn::Error::new(span, message)); + } +} + pub fn symbols(input: TokenStream) -> TokenStream { - let input = parse_macro_input!(input as Input); + let (mut output, errors) = symbols_with_errors(input); + + // If we generated any errors, then report them as compiler_error!() macro calls. + // This lets the errors point back to the most relevant span. It also allows us + // to report as many errors as we can during a single run. + output.extend(errors.into_iter().map(|e| e.to_compile_error())); + + output +} + +fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { + let mut errors = Errors::default(); + + let input: Input = match syn::parse2(input) { + Ok(input) => input, + Err(e) => { + // This allows us to display errors at the proper span, while minimizing + // unrelated errors caused by bailing out (and not generating code). + errors.list.push(e); + Input { keywords: Default::default(), symbols: Default::default() } + } + }; let mut keyword_stream = quote! {}; let mut symbols_stream = quote! {}; let mut digits_stream = quote! {}; let mut prefill_stream = quote! {}; let mut counter = 0u32; - let mut keys = HashSet::::new(); - let mut prev_key: Option = None; - let mut errors = Vec::::new(); - - let mut check_dup = |str: &str, errors: &mut Vec| { - if !keys.insert(str.to_string()) { - errors.push(format!("Symbol `{}` is duplicated", str)); + let mut keys = + HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); + let mut prev_key: Option<(Span, String)> = None; + + let mut check_dup = |span: Span, str: &str, errors: &mut Errors| { + if let Some(prev_span) = keys.get(str) { + errors.error(span, format!("Symbol `{}` is duplicated", str)); + errors.error(*prev_span, format!("location of previous definition")); + } else { + keys.insert(str.to_string(), span); } }; - let mut check_order = |str: &str, errors: &mut Vec| { - if let Some(ref prev_str) = prev_key { + let mut check_order = |span: Span, str: &str, errors: &mut Errors| { + if let Some((prev_span, ref prev_str)) = prev_key { if str < prev_str { - errors.push(format!("Symbol `{}` must precede `{}`", str, prev_str)); + errors.error(span, format!("Symbol `{}` must precede `{}`", str, prev_str)); + errors.error(prev_span, format!("location of previous symbol `{}`", prev_str)); } } - prev_key = Some(str.to_string()); + prev_key = Some((span, str.to_string())); }; // Generate the listed keywords. - for keyword in &input.keywords.0 { + for keyword in input.keywords.iter() { let name = &keyword.name; let value = &keyword.value; - check_dup(&value.value(), &mut errors); + let value_string = value.value(); + check_dup(keyword.name.span(), &value_string, &mut errors); prefill_stream.extend(quote! { #value, }); @@ -120,14 +169,15 @@ pub fn symbols(input: TokenStream) -> TokenStream { } // Generate the listed symbols. - for symbol in &input.symbols.0 { + for symbol in input.symbols.iter() { let name = &symbol.name; let value = match &symbol.value { Some(value) => value.value(), None => name.to_string(), }; - check_dup(&value, &mut errors); - check_order(&name.to_string(), &mut errors); + check_dup(symbol.name.span(), &value, &mut errors); + check_order(symbol.name.span(), &name.to_string(), &mut errors); + prefill_stream.extend(quote! { #value, }); @@ -142,7 +192,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { // Generate symbols for the strings "0", "1", ..., "9". for n in 0..10 { let n = n.to_string(); - check_dup(&n, &mut errors); + check_dup(Span::call_site(), &n, &mut errors); prefill_stream.extend(quote! { #n, }); @@ -152,14 +202,7 @@ pub fn symbols(input: TokenStream) -> TokenStream { counter += 1; } - if !errors.is_empty() { - for error in errors.into_iter() { - eprintln!("error: {}", error) - } - panic!("errors in `Keywords` and/or `Symbols`"); - } - - let tt = TokenStream::from(quote! { + let output = quote! { macro_rules! keywords { () => { #keyword_stream @@ -184,11 +227,16 @@ pub fn symbols(input: TokenStream) -> TokenStream { ]) } } - }); + }; - // To see the generated code generated, uncomment this line, recompile, and - // run the resulting output through `rustfmt`. - //eprintln!("{}", tt); + (output, errors.list) - tt + // To see the generated code, use the "cargo expand" command. + // Do this once to install: + // cargo install cargo-expand + // + // Then, cd to rustc_span and run: + // cargo expand > /tmp/rustc_span_expanded.rs + // + // and read that file. } diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs new file mode 100644 index 0000000000000..82b4b876978f2 --- /dev/null +++ b/compiler/rustc_macros/src/symbols/tests.rs @@ -0,0 +1,102 @@ +use super::*; + +// This test is mainly here for interactive development. Use this test while +// you're working on the proc-macro defined in this file. +#[test] +fn test_symbols() { + // We textually include the symbol.rs file, which contains the list of all + // symbols, keywords, and common words. Then we search for the + // `symbols! { ... }` call. + + static SYMBOL_RS_FILE: &str = include_str!("../../../rustc_span/src/symbol.rs"); + + let file = syn::parse_file(SYMBOL_RS_FILE).unwrap(); + let symbols_path: syn::Path = syn::parse_quote!(symbols); + + let m: &syn::ItemMacro = file + .items + .iter() + .filter_map(|i| { + if let syn::Item::Macro(m) = i { + if m.mac.path == symbols_path { Some(m) } else { None } + } else { + None + } + }) + .next() + .expect("did not find `symbols!` macro invocation."); + + let body_tokens = m.mac.tokens.clone(); + + test_symbols_macro(body_tokens, &[]); +} + +fn test_symbols_macro(input: TokenStream, expected_errors: &[&str]) { + let (output, found_errors) = symbols_with_errors(input); + + // It should always parse. + let _parsed_file = syn::parse2::(output).unwrap(); + + assert_eq!( + found_errors.len(), + expected_errors.len(), + "Macro generated a different number of errors than expected" + ); + + for (found_error, &expected_error) in found_errors.iter().zip(expected_errors.iter()) { + let found_error_str = format!("{}", found_error); + assert_eq!(found_error_str, expected_error); + } +} + +#[test] +fn check_dup_keywords() { + let input = quote! { + Keywords { + Crate: "crate", + Crate: "crate", + } + Symbols {} + }; + test_symbols_macro(input, &["Symbol `crate` is duplicated", "location of previous definition"]); +} + +#[test] +fn check_dup_symbol() { + let input = quote! { + Keywords {} + Symbols { + splat, + splat, + } + }; + test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); +} + +#[test] +fn check_dup_symbol_and_keyword() { + let input = quote! { + Keywords { + Splat: "splat", + } + Symbols { + splat, + } + }; + test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); +} + +#[test] +fn check_symbol_order() { + let input = quote! { + Keywords {} + Symbols { + zebra, + aardvark, + } + }; + test_symbols_macro( + input, + &["Symbol `aardvark` must precede `zebra`", "location of previous symbol `zebra`"], + ); +} diff --git a/library/alloc/src/collections/btree/node.rs b/library/alloc/src/collections/btree/node.rs index 0fec2b70a855e..22e179af4a932 100644 --- a/library/alloc/src/collections/btree/node.rs +++ b/library/alloc/src/collections/btree/node.rs @@ -1355,66 +1355,65 @@ impl<'a, K: 'a, V: 'a> BalancingContext<'a, K, V> { /// /// Panics unless we `.can_merge()`. pub fn merge( - mut self, + self, track_edge_idx: Option>, ) -> Handle, K, V, marker::LeafOrInternal>, marker::Edge> { + let Handle { node: mut parent_node, idx: parent_idx, _marker } = self.parent; + let old_parent_len = parent_node.len(); let mut left_node = self.left_child; - let left_len = left_node.len(); + let old_left_len = left_node.len(); let right_node = self.right_child; let right_len = right_node.len(); + let new_left_len = old_left_len + 1 + right_len; - assert!(left_len + right_len < CAPACITY); + assert!(new_left_len <= CAPACITY); assert!(match track_edge_idx { None => true, - Some(LeftOrRight::Left(idx)) => idx <= left_len, + Some(LeftOrRight::Left(idx)) => idx <= old_left_len, Some(LeftOrRight::Right(idx)) => idx <= right_len, }); unsafe { - *left_node.reborrow_mut().into_len_mut() += right_len as u16 + 1; + *left_node.reborrow_mut().into_len_mut() = new_left_len as u16; - let parent_key = slice_remove( - self.parent.node.reborrow_mut().into_key_area_slice(), - self.parent.idx, - ); - left_node.reborrow_mut().into_key_area_mut_at(left_len).write(parent_key); + let parent_key = + slice_remove(parent_node.reborrow_mut().into_key_area_slice(), parent_idx); + left_node.reborrow_mut().into_key_area_mut_at(old_left_len).write(parent_key); ptr::copy_nonoverlapping( right_node.reborrow().key_area().as_ptr(), - left_node.reborrow_mut().into_key_area_slice().as_mut_ptr().add(left_len + 1), + left_node.reborrow_mut().into_key_area_slice().as_mut_ptr().add(old_left_len + 1), right_len, ); - let parent_val = slice_remove( - self.parent.node.reborrow_mut().into_val_area_slice(), - self.parent.idx, - ); - left_node.reborrow_mut().into_val_area_mut_at(left_len).write(parent_val); + let parent_val = + slice_remove(parent_node.reborrow_mut().into_val_area_slice(), parent_idx); + left_node.reborrow_mut().into_val_area_mut_at(old_left_len).write(parent_val); ptr::copy_nonoverlapping( right_node.reborrow().val_area().as_ptr(), - left_node.reborrow_mut().into_val_area_slice().as_mut_ptr().add(left_len + 1), + left_node.reborrow_mut().into_val_area_slice().as_mut_ptr().add(old_left_len + 1), right_len, ); - slice_remove( - &mut self.parent.node.reborrow_mut().into_edge_area_slice(), - self.parent.idx + 1, - ); - let parent_old_len = self.parent.node.len(); - self.parent.node.correct_childrens_parent_links(self.parent.idx + 1..parent_old_len); - *self.parent.node.reborrow_mut().into_len_mut() -= 1; + slice_remove(&mut parent_node.reborrow_mut().into_edge_area_slice(), parent_idx + 1); + parent_node.correct_childrens_parent_links(parent_idx + 1..old_parent_len); + *parent_node.reborrow_mut().into_len_mut() -= 1; - if self.parent.node.height > 1 { + if parent_node.height > 1 { // SAFETY: the height of the nodes being merged is one below the height // of the node of this edge, thus above zero, so they are internal. let mut left_node = left_node.reborrow_mut().cast_to_internal_unchecked(); let right_node = right_node.cast_to_internal_unchecked(); ptr::copy_nonoverlapping( right_node.reborrow().edge_area().as_ptr(), - left_node.reborrow_mut().into_edge_area_slice().as_mut_ptr().add(left_len + 1), + left_node + .reborrow_mut() + .into_edge_area_slice() + .as_mut_ptr() + .add(old_left_len + 1), right_len + 1, ); - left_node.correct_childrens_parent_links(left_len + 1..=left_len + 1 + right_len); + left_node.correct_childrens_parent_links(old_left_len + 1..new_left_len + 1); Global.deallocate(right_node.node.cast(), Layout::new::>()); } else { @@ -1424,7 +1423,7 @@ impl<'a, K: 'a, V: 'a> BalancingContext<'a, K, V> { let new_idx = match track_edge_idx { None => 0, Some(LeftOrRight::Left(idx)) => idx, - Some(LeftOrRight::Right(idx)) => left_len + 1 + idx, + Some(LeftOrRight::Right(idx)) => old_left_len + 1 + idx, }; Handle::new_edge(left_node, new_idx) } diff --git a/library/alloc/src/collections/vec_deque/mod.rs b/library/alloc/src/collections/vec_deque/mod.rs index 57807bc545390..9e54c15ea6a0b 100644 --- a/library/alloc/src/collections/vec_deque/mod.rs +++ b/library/alloc/src/collections/vec_deque/mod.rs @@ -2793,8 +2793,12 @@ impl From> for VecDeque { let len = other.len(); // We need to extend the buf if it's not a power of two, too small - // or doesn't have at least one free space - if !buf.capacity().is_power_of_two() + // or doesn't have at least one free space. + // We check if `T` is a ZST in the first condition, + // because `usize::MAX` (the capacity returned by `capacity()` for ZST) + // is not a power of two and thus it'll always try + // to reserve more memory which will panic for ZST (rust-lang/rust#78532) + if (!buf.capacity().is_power_of_two() && mem::size_of::() != 0) || (buf.capacity() < (MINIMUM_CAPACITY + 1)) || (buf.capacity() == len) { diff --git a/library/alloc/src/macros.rs b/library/alloc/src/macros.rs index a992d768d6314..7d4eff6185dbe 100644 --- a/library/alloc/src/macros.rs +++ b/library/alloc/src/macros.rs @@ -29,6 +29,10 @@ /// to the same boxed integer value, not five references pointing to independently /// boxed integers. /// +/// Also, note that `vec![expr; 0]` is allowed, and produces an empty vector. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. +/// /// [`Vec`]: crate::vec::Vec #[cfg(not(test))] #[macro_export] diff --git a/library/alloc/tests/vec_deque.rs b/library/alloc/tests/vec_deque.rs index 705f0d62fbb7a..0919b1325bceb 100644 --- a/library/alloc/tests/vec_deque.rs +++ b/library/alloc/tests/vec_deque.rs @@ -1728,3 +1728,10 @@ fn test_zero_sized_push() { } } } + +#[test] +fn test_from_zero_sized_vec() { + let v = vec![(); 100]; + let queue = VecDeque::from(v); + assert_eq!(queue.len(), 100); +} diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 7e7a28be2b0e5..5d93016cadb37 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -667,10 +667,10 @@ impl OsStr { /// Gets the underlying byte representation. /// - /// Note: it is *crucial* that this API is private, to avoid + /// Note: it is *crucial* that this API is not externally public, to avoid /// revealing the internal, platform-specific encodings. #[inline] - fn bytes(&self) -> &[u8] { + pub(crate) fn bytes(&self) -> &[u8] { unsafe { &*(&self.inner as *const _ as *const [u8]) } } diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index ff94fda5a227b..896d6c2a64c60 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -873,12 +873,12 @@ pub fn test_decompositions_windows() { ); t!("\\\\.\\foo/bar", - iter: ["\\\\.\\foo/bar", "\\"], + iter: ["\\\\.\\foo", "\\", "bar"], has_root: true, is_absolute: true, - parent: None, - file_name: None, - file_stem: None, + parent: Some("\\\\.\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), extension: None ); diff --git a/library/std/src/primitive_docs.rs b/library/std/src/primitive_docs.rs index 55171ef2292d7..7aca5451ebc20 100644 --- a/library/std/src/primitive_docs.rs +++ b/library/std/src/primitive_docs.rs @@ -489,6 +489,10 @@ mod prim_pointer {} /// * A repeat expression `[x; N]`, which produces an array with `N` copies of `x`. /// The type of `x` must be [`Copy`]. /// +/// Note that `[expr; 0]` is allowed, and produces an empty array. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. +/// /// Arrays of *any* size implement the following traits if the element type allows it: /// /// - [`Copy`] diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index dda3ed68cfc95..c10c0df4a3a99 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -8,15 +8,12 @@ mod tests; pub const MAIN_SEP_STR: &str = "\\"; pub const MAIN_SEP: char = '\\'; -// The unsafety here stems from converting between `&OsStr` and `&[u8]` -// and back. This is safe to do because (1) we only look at ASCII -// contents of the encoding and (2) new &OsStr values are produced -// only from ASCII-bounded slices of existing &OsStr values. -fn os_str_as_u8_slice(s: &OsStr) -> &[u8] { - unsafe { mem::transmute(s) } -} -unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr { - mem::transmute(s) +// Safety: `bytes` must be a valid wtf8 encoded slice +#[inline] +unsafe fn bytes_as_os_str(bytes: &[u8]) -> &OsStr { + // &OsStr is layout compatible with &Slice, which is compatible with &Wtf8, + // which is compatible with &[u8]. + mem::transmute(bytes) } #[inline] @@ -29,79 +26,116 @@ pub fn is_verbatim_sep(b: u8) -> bool { b == b'\\' } -// In most DOS systems, it is not possible to have more than 26 drive letters. -// See . -pub fn is_valid_drive_letter(disk: u8) -> bool { - disk.is_ascii_alphabetic() -} - pub fn parse_prefix(path: &OsStr) -> Option> { use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; - let path = os_str_as_u8_slice(path); - - // \\ - if let Some(path) = path.strip_prefix(br"\\") { - // \\?\ - if let Some(path) = path.strip_prefix(br"?\") { - // \\?\UNC\server\share - if let Some(path) = path.strip_prefix(br"UNC\") { - let (server, share) = match get_first_two_components(path, is_verbatim_sep) { - Some((server, share)) => unsafe { - (u8_slice_as_os_str(server), u8_slice_as_os_str(share)) - }, - None => (unsafe { u8_slice_as_os_str(path) }, OsStr::new("")), - }; - return Some(VerbatimUNC(server, share)); + if let Some(path) = strip_prefix(path, r"\\") { + // \\ + if let Some(path) = strip_prefix(path, r"?\") { + // \\?\ + if let Some(path) = strip_prefix(path, r"UNC\") { + // \\?\UNC\server\share + + let (server, path) = parse_next_component(path, true); + let (share, _) = parse_next_component(path, true); + + Some(VerbatimUNC(server, share)) } else { - // \\?\path - match path { - // \\?\C:\path - [c, b':', b'\\', ..] if is_valid_drive_letter(*c) => { - return Some(VerbatimDisk(c.to_ascii_uppercase())); - } - // \\?\cat_pics - _ => { - let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len()); - let slice = &path[..idx]; - return Some(Verbatim(unsafe { u8_slice_as_os_str(slice) })); - } + let (prefix, _) = parse_next_component(path, true); + + // in verbatim paths only recognize an exact drive prefix + if let Some(drive) = parse_drive_exact(prefix) { + // \\?\C: + Some(VerbatimDisk(drive)) + } else { + // \\?\prefix + Some(Verbatim(prefix)) } } - } else if let Some(path) = path.strip_prefix(b".\\") { + } else if let Some(path) = strip_prefix(path, r".\") { // \\.\COM42 - let idx = path.iter().position(|&b| b == b'\\').unwrap_or(path.len()); - let slice = &path[..idx]; - return Some(DeviceNS(unsafe { u8_slice_as_os_str(slice) })); - } - match get_first_two_components(path, is_sep_byte) { - Some((server, share)) if !server.is_empty() && !share.is_empty() => { + let (prefix, _) = parse_next_component(path, false); + Some(DeviceNS(prefix)) + } else { + let (server, path) = parse_next_component(path, false); + let (share, _) = parse_next_component(path, false); + + if !server.is_empty() && !share.is_empty() { // \\server\share - return Some(unsafe { UNC(u8_slice_as_os_str(server), u8_slice_as_os_str(share)) }); + Some(UNC(server, share)) + } else { + // no valid prefix beginning with "\\" recognized + None } - _ => {} } - } else if let [c, b':', ..] = path { + } else if let Some(drive) = parse_drive(path) { // C: - if is_valid_drive_letter(*c) { - return Some(Disk(c.to_ascii_uppercase())); - } + Some(Disk(drive)) + } else { + // no prefix + None } - None } -/// Returns the first two path components with predicate `f`. -/// -/// The two components returned will be use by caller -/// to construct `VerbatimUNC` or `UNC` Windows path prefix. -/// -/// Returns [`None`] if there are no separators in path. -fn get_first_two_components(path: &[u8], f: fn(u8) -> bool) -> Option<(&[u8], &[u8])> { - let idx = path.iter().position(|&x| f(x))?; - // Panic safe - // The max `idx+1` is `path.len()` and `path[path.len()..]` is a valid index. - let (first, path) = (&path[..idx], &path[idx + 1..]); - let idx = path.iter().position(|&x| f(x)).unwrap_or(path.len()); - let second = &path[..idx]; - Some((first, second)) +// Parses a drive prefix, e.g. "C:" and "C:\whatever" +fn parse_drive(prefix: &OsStr) -> Option { + // In most DOS systems, it is not possible to have more than 26 drive letters. + // See . + fn is_valid_drive_letter(drive: &u8) -> bool { + drive.is_ascii_alphabetic() + } + + match prefix.bytes() { + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), + _ => None, + } +} + +// Parses a drive prefix exactly, e.g. "C:" +fn parse_drive_exact(prefix: &OsStr) -> Option { + // only parse two bytes: the drive letter and the drive separator + if prefix.len() == 2 { parse_drive(prefix) } else { None } +} + +fn strip_prefix<'a>(path: &'a OsStr, prefix: &str) -> Option<&'a OsStr> { + // `path` and `prefix` are valid wtf8 and utf8 encoded slices respectively, `path[prefix.len()]` + // is thus a code point boundary and `path[prefix.len()..]` is a valid wtf8 encoded slice. + match path.bytes().strip_prefix(prefix.as_bytes()) { + Some(path) => unsafe { Some(bytes_as_os_str(path)) }, + None => None, + } +} + +// Parse the next path component. +// +// Returns the next component and the rest of the path excluding the component and separator. +// Does not recognize `/` as a separator character if `verbatim` is true. +fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; + + match path.bytes().iter().position(|&x| separator(x)) { + Some(separator_start) => { + let mut separator_end = separator_start + 1; + + // a series of multiple separator characters is treated as a single separator, + // except in verbatim paths + while !verbatim && separator_end < path.len() && separator(path.bytes()[separator_end]) + { + separator_end += 1; + } + + let component = &path.bytes()[..separator_start]; + + // Panic safe + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. + let path = &path.bytes()[separator_end..]; + + // Safety: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') + // is encoded in a single byte, therefore `bytes[separator_start]` and + // `bytes[separator_end]` must be code point boundaries and thus + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. + unsafe { (bytes_as_os_str(component), bytes_as_os_str(path)) } + } + None => (path, OsStr::new("")), + } } diff --git a/library/std/src/sys/windows/path/tests.rs b/library/std/src/sys/windows/path/tests.rs index fbac1dc1ca17a..9675da6ff883b 100644 --- a/library/std/src/sys/windows/path/tests.rs +++ b/library/std/src/sys/windows/path/tests.rs @@ -1,21 +1,44 @@ use super::*; #[test] -fn test_get_first_two_components() { +fn test_parse_next_component() { assert_eq!( - get_first_two_components(br"server\share", is_verbatim_sep), - Some((&b"server"[..], &b"share"[..])), + parse_next_component(OsStr::new(r"server\share"), true), + (OsStr::new(r"server"), OsStr::new(r"share")) ); assert_eq!( - get_first_two_components(br"server\", is_verbatim_sep), - Some((&b"server"[..], &b""[..])) + parse_next_component(OsStr::new(r"server/share"), true), + (OsStr::new(r"server/share"), OsStr::new(r"")) ); assert_eq!( - get_first_two_components(br"\server\", is_verbatim_sep), - Some((&b""[..], &b"server"[..])) + parse_next_component(OsStr::new(r"server/share"), false), + (OsStr::new(r"server"), OsStr::new(r"share")) ); - assert_eq!(get_first_two_components(br"there are no separators here", is_verbatim_sep), None,); + assert_eq!( + parse_next_component(OsStr::new(r"server\"), false), + (OsStr::new(r"server"), OsStr::new(r"")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"\server\"), false), + (OsStr::new(r""), OsStr::new(r"server\")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"servershare"), false), + (OsStr::new(r"servershare"), OsStr::new("")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"server/\//\/\\\\/////\/share"), false), + (OsStr::new(r"server"), OsStr::new(r"share")) + ); + + assert_eq!( + parse_next_component(OsStr::new(r"server\\\\\\\\\\\\\\share"), true), + (OsStr::new(r"server"), OsStr::new(r"\\\\\\\\\\\\\share")) + ); } diff --git a/src/librustdoc/html/markdown/tests.rs b/src/librustdoc/html/markdown/tests.rs index 9807d8632c75d..75ff3c5af2fd2 100644 --- a/src/librustdoc/html/markdown/tests.rs +++ b/src/librustdoc/html/markdown/tests.rs @@ -51,82 +51,77 @@ fn test_unique_id() { #[test] fn test_lang_string_parse() { - fn t( - s: &str, - should_panic: bool, - no_run: bool, - ignore: Ignore, - rust: bool, - test_harness: bool, - compile_fail: bool, - allow_fail: bool, - error_codes: Vec, - edition: Option, - ) { - assert_eq!( - LangString::parse(s, ErrorCodes::Yes, true, None), - LangString { - should_panic, - no_run, - ignore, - rust, - test_harness, - compile_fail, - error_codes, - original: s.to_owned(), - allow_fail, - edition, - } - ) + fn t(lg: LangString) { + let s = &lg.original; + assert_eq!(LangString::parse(s, ErrorCodes::Yes, true, None), lg) } - let ignore_foo = Ignore::Some(vec!["foo".to_string()]); - fn v() -> Vec { - Vec::new() - } - - // marker | should_panic | no_run | ignore | rust | test_harness - // | compile_fail | allow_fail | error_codes | edition - t("", false, false, Ignore::None, true, false, false, false, v(), None); - t("rust", false, false, Ignore::None, true, false, false, false, v(), None); - t("sh", false, false, Ignore::None, false, false, false, false, v(), None); - t("ignore", false, false, Ignore::All, true, false, false, false, v(), None); - t("ignore-foo", false, false, ignore_foo, true, false, false, false, v(), None); - t("should_panic", true, false, Ignore::None, true, false, false, false, v(), None); - t("no_run", false, true, Ignore::None, true, false, false, false, v(), None); - t("test_harness", false, false, Ignore::None, true, true, false, false, v(), None); - t("compile_fail", false, true, Ignore::None, true, false, true, false, v(), None); - t("allow_fail", false, false, Ignore::None, true, false, false, true, v(), None); - t("{.no_run .example}", false, true, Ignore::None, true, false, false, false, v(), None); - t("{.sh .should_panic}", true, false, Ignore::None, false, false, false, false, v(), None); - t("{.example .rust}", false, false, Ignore::None, true, false, false, false, v(), None); - t("{.test_harness .rust}", false, false, Ignore::None, true, true, false, false, v(), None); - t("text, no_run", false, true, Ignore::None, false, false, false, false, v(), None); - t("text,no_run", false, true, Ignore::None, false, false, false, false, v(), None); - t( - "edition2015", - false, - false, - Ignore::None, - true, - false, - false, - false, - v(), - Some(Edition::Edition2015), - ); - t( - "edition2018", - false, - false, - Ignore::None, - true, - false, - false, - false, - v(), - Some(Edition::Edition2018), - ); + t(LangString::all_false()); + t(LangString { original: "rust".into(), ..LangString::all_false() }); + t(LangString { original: "sh".into(), rust: false, ..LangString::all_false() }); + t(LangString { original: "ignore".into(), ignore: Ignore::All, ..LangString::all_false() }); + t(LangString { + original: "ignore-foo".into(), + ignore: Ignore::Some(vec!["foo".to_string()]), + ..LangString::all_false() + }); + t(LangString { + original: "should_panic".into(), + should_panic: true, + ..LangString::all_false() + }); + t(LangString { original: "no_run".into(), no_run: true, ..LangString::all_false() }); + t(LangString { + original: "test_harness".into(), + test_harness: true, + ..LangString::all_false() + }); + t(LangString { + original: "compile_fail".into(), + no_run: true, + compile_fail: true, + ..LangString::all_false() + }); + t(LangString { original: "allow_fail".into(), allow_fail: true, ..LangString::all_false() }); + t(LangString { + original: "{.no_run .example}".into(), + no_run: true, + ..LangString::all_false() + }); + t(LangString { + original: "{.sh .should_panic}".into(), + should_panic: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { original: "{.example .rust}".into(), ..LangString::all_false() }); + t(LangString { + original: "{.test_harness .rust}".into(), + test_harness: true, + ..LangString::all_false() + }); + t(LangString { + original: "text, no_run".into(), + no_run: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { + original: "text,no_run".into(), + no_run: true, + rust: false, + ..LangString::all_false() + }); + t(LangString { + original: "edition2015".into(), + edition: Some(Edition::Edition2015), + ..LangString::all_false() + }); + t(LangString { + original: "edition2018".into(), + edition: Some(Edition::Edition2018), + ..LangString::all_false() + }); } #[test] diff --git a/src/tools/tidy/src/ui_tests.rs b/src/tools/tidy/src/ui_tests.rs index 03f4efea983bb..72ffdabd5222f 100644 --- a/src/tools/tidy/src/ui_tests.rs +++ b/src/tools/tidy/src/ui_tests.rs @@ -1,9 +1,51 @@ -//! Tidy check to ensure that there are no stray `.stderr` files in UI test directories. +//! Tidy check to ensure below in UI test directories: +//! - the number of entries in each directory must be less than `ENTRY_LIMIT` +//! - there are no stray `.stderr` files use std::fs; use std::path::Path; +const ENTRY_LIMIT: usize = 1000; +// FIXME: The following limits should be reduced eventually. +const ROOT_ENTRY_LIMIT: usize = 1580; +const ISSUES_ENTRY_LIMIT: usize = 2830; + +fn check_entries(path: &Path, bad: &mut bool) { + let dirs = walkdir::WalkDir::new(&path.join("test/ui")) + .into_iter() + .filter_entry(|e| e.file_type().is_dir()); + for dir in dirs { + if let Ok(dir) = dir { + let dir_path = dir.path(); + + // Use special values for these dirs. + let is_root = path.join("test/ui") == dir_path; + let is_issues_dir = path.join("test/ui/issues") == dir_path; + let limit = if is_root { + ROOT_ENTRY_LIMIT + } else if is_issues_dir { + ISSUES_ENTRY_LIMIT + } else { + ENTRY_LIMIT + }; + + let count = std::fs::read_dir(dir_path).unwrap().count(); + if count >= limit { + tidy_error!( + bad, + "following path contains more than {} entries, \ + you should move the test to some relevant subdirectory (current: {}): {}", + limit, + count, + dir_path.display() + ); + } + } + } +} + pub fn check(path: &Path, bad: &mut bool) { + check_entries(&path, bad); for path in &[&path.join("test/ui"), &path.join("test/ui-fulldeps")] { super::walk_no_read(path, &mut |_| false, &mut |entry| { let file_path = entry.path();