From a93d31603f80e16a185cda3377c328ae85273325 Mon Sep 17 00:00:00 2001 From: Russell Cohen Date: Thu, 23 Apr 2020 15:51:12 -0400 Subject: [PATCH] Fix bug in shebang handling Shebang handling was too agressive in stripping out the first line in cases where it is actually _not_ a shebang, but instead, valid rust (#70528). This is a second attempt at resolving this issue (the first attempt was flawed, for, among other reasons, causing an ICE in certain cases (#71372, #71471). The behavior is now codified by a number of UI tests, but simply: For the first line to be a shebang, the following must all be true: 1. The line must start with `#!` 2. The line must contain a non whitespace character after `#!` 3. The next character in the file, ignoring comments & whitespace must not be `[` I believe this is a strict superset of what we used to allow, so perhaps a crater run is unnecessary, but probably not a terrible idea. --- src/librustc_lexer/src/lib.rs | 26 ++++++--- src/librustc_lexer/src/tests.rs | 57 +++++++++++++++++++ .../parser/shebang/issue-71471-ignore-tidy.rs | 2 + .../shebang/issue-71471-ignore-tidy.stderr | 8 +++ .../ui/parser/shebang/multiline-attrib.rs | 7 +++ src/test/ui/parser/shebang/regular-attrib.rs | 5 ++ .../ui/parser/shebang/shebang-and-attrib.rs | 9 +++ src/test/ui/parser/shebang/shebang-comment.rs | 6 ++ .../parser/shebang/shebang-must-start-file.rs | 6 ++ .../shebang/shebang-must-start-file.stderr | 8 +++ src/test/ui/parser/shebang/sneaky-attrib.rs | 16 ++++++ src/test/ui/parser/shebang/valid-shebang.rs | 6 ++ src/tools/tidy/src/style.rs | 5 ++ 13 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs create mode 100644 src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr create mode 100644 src/test/ui/parser/shebang/multiline-attrib.rs create mode 100644 src/test/ui/parser/shebang/regular-attrib.rs create mode 100644 src/test/ui/parser/shebang/shebang-and-attrib.rs create mode 100644 src/test/ui/parser/shebang/shebang-comment.rs create mode 100644 src/test/ui/parser/shebang/shebang-must-start-file.rs create mode 100644 src/test/ui/parser/shebang/shebang-must-start-file.stderr create mode 100644 src/test/ui/parser/shebang/sneaky-attrib.rs create mode 100644 src/test/ui/parser/shebang/valid-shebang.rs diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index e44feee96607a..fe6785de009a1 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -236,16 +236,28 @@ pub enum Base { } /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun", -/// but shebang isn't a part of rust syntax, so this function -/// skips the line if it starts with a shebang ("#!"). -/// Line won't be skipped if it represents a valid Rust syntax -/// (e.g. "#![deny(missing_docs)]"). +/// but shebang isn't a part of rust syntax. pub fn strip_shebang(input: &str) -> Option { - debug_assert!(!input.is_empty()); - if !input.starts_with("#!") || input.starts_with("#![") { + let first_line = input.lines().next()?; + // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line. + let could_be_shebang = + first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c)); + if !could_be_shebang { return None; } - Some(input.find('\n').unwrap_or(input.len())) + let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok| + !matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace) + ); + let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket]; + let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied()); + if starts_with_attribute { + // If the file starts with #![ then it's definitely not a shebang -- it couldn't be + // a rust program since a Rust program can't start with `[` + None + } else { + // It's a #!... and there isn't a `[` in sight, must be a shebang + Some(first_line.len()) + } } /// Parses the first token from the provided input string. diff --git a/src/librustc_lexer/src/tests.rs b/src/librustc_lexer/src/tests.rs index 06fc159fe2516..725799374fc64 100644 --- a/src/librustc_lexer/src/tests.rs +++ b/src/librustc_lexer/src/tests.rs @@ -145,4 +145,61 @@ mod tests { }), ); } + + #[test] + fn test_valid_shebang() { + // https://github.com/rust-lang/rust/issues/70528 + let input = "#!/usr/bin/rustrun\nlet x = 5;"; + assert_eq!(strip_shebang(input), Some(18)); + } + + #[test] + fn test_invalid_shebang_valid_rust_syntax() { + // https://github.com/rust-lang/rust/issues/70528 + let input = "#! [bad_attribute]"; + assert_eq!(strip_shebang(input), None); + } + + #[test] + fn test_shebang_second_line() { + // Because shebangs are interpreted by the kernel, they must be on the first line + let input = "\n#!/bin/bash"; + assert_eq!(strip_shebang(input), None); + } + + #[test] + fn test_shebang_space() { + let input = "#! /bin/bash"; + assert_eq!(strip_shebang(input), Some(input.len())); + } + + #[test] + fn test_shebang_empty_shebang() { + let input = "#! \n[attribute(foo)]"; + assert_eq!(strip_shebang(input), None); + } + + #[test] + fn test_invalid_shebang_comment() { + let input = "#!//bin/ami/a/comment\n["; + assert_eq!(strip_shebang(input), None) + } + + #[test] + fn test_invalid_shebang_another_comment() { + let input = "#!/*bin/ami/a/comment*/\n[attribute"; + assert_eq!(strip_shebang(input), None) + } + + #[test] + fn test_shebang_valid_rust_after() { + let input = "#!/*bin/ami/a/comment*/\npub fn main() {}"; + assert_eq!(strip_shebang(input), Some(23)) + } + + #[test] + fn test_shebang_followed_by_attrib() { + let input = "#!/bin/rust-scripts\n#![allow_unused(true)]"; + assert_eq!(strip_shebang(input), Some(19)); + } } diff --git a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs new file mode 100644 index 0000000000000..a2505180884aa --- /dev/null +++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs @@ -0,0 +1,2 @@ + +#!B //~ expected `[`, found `B` diff --git a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr new file mode 100644 index 0000000000000..896a9dc83d8b9 --- /dev/null +++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr @@ -0,0 +1,8 @@ +error: expected `[`, found `B` + --> $DIR/issue-71471-ignore-tidy.rs:2:3 + | +LL | #!B + | ^ expected `[` + +error: aborting due to previous error + diff --git a/src/test/ui/parser/shebang/multiline-attrib.rs b/src/test/ui/parser/shebang/multiline-attrib.rs new file mode 100644 index 0000000000000..931c94c7fba03 --- /dev/null +++ b/src/test/ui/parser/shebang/multiline-attrib.rs @@ -0,0 +1,7 @@ +#! +[allow(unused_variables)] +// check-pass + +fn main() { + let x = 5; +} diff --git a/src/test/ui/parser/shebang/regular-attrib.rs b/src/test/ui/parser/shebang/regular-attrib.rs new file mode 100644 index 0000000000000..ca8fb0830ffb1 --- /dev/null +++ b/src/test/ui/parser/shebang/regular-attrib.rs @@ -0,0 +1,5 @@ +#![allow(unused_variables)] +// check-pass +fn main() { + let x = 5; +} diff --git a/src/test/ui/parser/shebang/shebang-and-attrib.rs b/src/test/ui/parser/shebang/shebang-and-attrib.rs new file mode 100644 index 0000000000000..61b89c655a3fc --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-and-attrib.rs @@ -0,0 +1,9 @@ +#!/usr/bin/env run-cargo-script + +// check-pass +#![allow(unused_variables)] + + +fn main() { + let x = 5; +} diff --git a/src/test/ui/parser/shebang/shebang-comment.rs b/src/test/ui/parser/shebang/shebang-comment.rs new file mode 100644 index 0000000000000..2b1ab0c574d26 --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-comment.rs @@ -0,0 +1,6 @@ +#!//bin/bash + +// check-pass +fn main() { + println!("a valid shebang (that is also a rust comment)") +} diff --git a/src/test/ui/parser/shebang/shebang-must-start-file.rs b/src/test/ui/parser/shebang/shebang-must-start-file.rs new file mode 100644 index 0000000000000..e0392572dc81d --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-must-start-file.rs @@ -0,0 +1,6 @@ +// something on the first line for tidy +#!/bin/bash //~ expected `[`, found `/` + +fn main() { + println!("ok!"); +} diff --git a/src/test/ui/parser/shebang/shebang-must-start-file.stderr b/src/test/ui/parser/shebang/shebang-must-start-file.stderr new file mode 100644 index 0000000000000..50543e8bdb816 --- /dev/null +++ b/src/test/ui/parser/shebang/shebang-must-start-file.stderr @@ -0,0 +1,8 @@ +error: expected `[`, found `/` + --> $DIR/shebang-must-start-file.rs:2:3 + | +LL | #!/bin/bash + | ^ expected `[` + +error: aborting due to previous error + diff --git a/src/test/ui/parser/shebang/sneaky-attrib.rs b/src/test/ui/parser/shebang/sneaky-attrib.rs new file mode 100644 index 0000000000000..b406cc3aa13c7 --- /dev/null +++ b/src/test/ui/parser/shebang/sneaky-attrib.rs @@ -0,0 +1,16 @@ +#!//bin/bash + + +// This could not possibly be a shebang & also a valid rust file, since a Rust file +// can't start with `[` +/* + [ (mixing comments to also test that we ignore both types of comments) + + */ + +[allow(unused_variables)] + +// check-pass +fn main() { + let x = 5; +} diff --git a/src/test/ui/parser/shebang/valid-shebang.rs b/src/test/ui/parser/shebang/valid-shebang.rs new file mode 100644 index 0000000000000..e480d3da3fc8d --- /dev/null +++ b/src/test/ui/parser/shebang/valid-shebang.rs @@ -0,0 +1,6 @@ +#!/usr/bin/env run-cargo-script + +// check-pass +fn main() { + println!("Hello World!"); +} diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs index 4247fcb3b7f53..396d6c0cfcdef 100644 --- a/src/tools/tidy/src/style.rs +++ b/src/tools/tidy/src/style.rs @@ -174,6 +174,11 @@ pub fn check(path: &Path, bad: &mut bool) { let can_contain = contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-"); + // Enable testing ICE's that require specific (untidy) + // file formats easily eg. `issue-1234-ignore-tidy.rs` + if filename.contains("ignore-tidy") { + return; + } let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr"); let mut skip_undocumented_unsafe = contains_ignore_directive(can_contain, &contents, "undocumented-unsafe");