feat: implement shellcheck lints in command blocks (stjude-rust-labs#191

)
thatRichman · Dec 8, 2024 · 6cccaf7 · 6cccaf7
1 parent 995946b
commit 6cccaf7
Show file tree

Hide file tree

Showing 9 changed files with 539 additions and 0 deletions.
diff --git a/wdl-lint/Cargo.toml b/wdl-lint/Cargo.toml
@@ -16,6 +16,9 @@ wdl-ast = { path = "../wdl-ast", version = "0.9.0" }
 convert_case = { workspace = true }
 indexmap = { workspace = true }
 rowan = { workspace = true }
+serde_json = { workspace = true }
+anyhow = { workspace = true }
+serde = { workspace = true }
 
 [dev-dependencies]
 codespan-reporting = { workspace = true }

diff --git a/wdl-lint/src/lib.rs b/wdl-lint/src/lib.rs
@@ -84,6 +84,7 @@ pub trait Rule: Visitor<State = Diagnostics> {
 /// Gets the default rule set.
 pub fn rules() -> Vec<Box<dyn Rule>> {
     let rules: Vec<Box<dyn Rule>> = vec![
+        Box::<rules::CommandSectionShellCheck>::default(),
         Box::<rules::DoubleQuotesRule>::default(),
         Box::<rules::NoCurlyCommandsRule>::default(),
         Box::<rules::SnakeCaseRule>::default(),

diff --git a/wdl-lint/src/rules.rs b/wdl-lint/src/rules.rs
@@ -3,6 +3,7 @@
 mod blank_lines_between_elements;
 mod call_input_spacing;
 mod command_mixed_indentation;
+mod command_shellcheck;
 mod comment_whitespace;
 mod container_value;
 mod deprecated_object;
@@ -45,6 +46,7 @@ mod whitespace;
 pub use blank_lines_between_elements::*;
 pub use call_input_spacing::*;
 pub use command_mixed_indentation::*;
+pub use command_shellcheck::*;
 pub use comment_whitespace::*;
 pub use container_value::*;
 pub use deprecated_object::*;

diff --git a/wdl-lint/src/rules/command_shellcheck.rs b/wdl-lint/src/rules/command_shellcheck.rs
@@ -0,0 +1,357 @@
+//! A lint rule for checking mixed indentation in command text.
+use std::collections::HashMap;
+use std::collections::HashSet;
+use std::io::Write;
+use std::process;
+use std::process::Stdio;
+
+use anyhow::Context;
+use anyhow::Result;
+use anyhow::bail;
+use serde::Deserialize;
+use serde_json;
+
+use wdl_ast::AstNode;
+use wdl_ast::AstToken;
+use wdl_ast::Diagnostic;
+use wdl_ast::Diagnostics;
+use wdl_ast::Document;
+use wdl_ast::Span;
+use wdl_ast::SupportedVersion;
+use wdl_ast::SyntaxElement;
+use wdl_ast::SyntaxKind;
+use wdl_ast::ToSpan;
+use wdl_ast::VisitReason;
+use wdl_ast::Visitor;
+use wdl_ast::support;
+use wdl_ast::v1::CommandPart;
+use wdl_ast::v1::CommandSection;
+use wdl_ast::v1::Placeholder;
+use wdl_ast::v1::StrippedCommandPart;
+use wdl_ast::v1::TaskDefinition;
+
+use crate::Rule;
+use crate::Tag;
+use crate::TagSet;
+use crate::util::{count_leading_whitespace, lines_with_offset, program_exists};
+
+/// The shellcheck executable
+const SHELLCHECK_BIN: &str = "shellcheck";
+
+/// shellcheck lints that we want to suppress
+const SHELLCHECK_SUPPRESS: &[&str] = &[
+    "1009", // the mentioned parser error was in...
+    "1072", // Unexpected..
+    "1083", // this {/} is literal
+    "2043", // this loop will only ever run once for constant value
+    "2050", // This expression is constant
+    "2157", // Argument to -n is always true due to literal strings
+    "2193", // The arguments to this comparison can never be equal
+];
+
+/// The identifier for the command section ShellCheck rule.
+const ID: &str = "CommandSectionShellCheck";
+
+/// A ShellCheck comment
+///
+/// The file and fix fields are ommitted as we have no use for them.
+#[derive(Clone, Debug, Deserialize)]
+struct ShellCheckComment {
+    /// line number comment starts on
+    pub line: usize,
+    /// line number comment ends on
+    #[serde(rename = "endLine")]
+    pub end_line: usize,
+    /// column comment starts on
+    pub column: usize,
+    /// column comment ends on
+    #[serde(rename = "endColumn")]
+    pub end_column: usize,
+    /// severity of the comment
+    pub level: String,
+    /// shellcheck error code
+    pub code: usize,
+    /// message associated with the comment
+    pub message: String,
+}
+
+/// Run shellcheck
+///
+/// writes command text to stdin of shellcheck process
+/// and returns parsed ShellCheckComments
+fn run_shellcheck(command: &str) -> Result<Vec<ShellCheckComment>> {
+    let mut sc_proc = process::Command::new(SHELLCHECK_BIN)
+        .args([
+            "-s",
+            "bash",
+            "-f",
+            "json",
+            "-e",
+            &SHELLCHECK_SUPPRESS.join(","),
+            "-",
+        ])
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()
+        .context("error calling shellcheck")?;
+    {
+        let mut proc_stdin = sc_proc
+            .stdin
+            .take()
+            .context("error obtaining stdin of shellcheck")?;
+        proc_stdin.write_all(command.as_bytes())?;
+    }
+
+    let output = sc_proc
+        .wait_with_output()
+        .context("error calling shellcheck")?;
+
+    // shellcheck returns exit code 1 if
+    // any checked files result in comments
+    // so cannot check with status.success()
+    match output.status.code() {
+        Some(0) | Some(1) => serde_json::from_slice::<Vec<ShellCheckComment>>(&output.stdout)
+            .context("failed to parse shellcheck output"),
+        Some(code) => bail!("error calling shellcheck: exit code {}", code),
+        None => bail!("error calling check: process interrupted"),
+    }
+}
+
+/// Runs ShellCheck on a command section and reports violations
+#[derive(Default, Debug, Clone, Copy)]
+pub struct CommandSectionShellCheck;
+
+impl Rule for CommandSectionShellCheck {
+    fn id(&self) -> &'static str {
+        ID
+    }
+
+    fn description(&self) -> &'static str {
+        "Ensures that command blocks are free of ShellCheck violations."
+    }
+
+    fn explanation(&self) -> &'static str {
+        "ShellCheck is a static analysis tool and linter for sh / bash. \
+        The lints provided by ShellCheck help prevent common errors and \
+        pitfalls in your scripts. Following its recommendations will increase \
+        the robustness of your command blocks."
+    }
+
+    fn tags(&self) -> TagSet {
+        TagSet::new(&[Tag::Correctness, Tag::Style, Tag::Portability])
+    }
+
+    fn exceptable_nodes(&self) -> Option<&'static [SyntaxKind]> {
+        Some(&[
+            SyntaxKind::VersionStatementNode,
+            SyntaxKind::TaskDefinitionNode,
+            SyntaxKind::CommandSectionNode,
+        ])
+    }
+}
+
+/// Convert a WDL placeholder to a bash variable
+fn to_bash_var(placeholder: &Placeholder) -> String {
+    let placeholder_text = placeholder.syntax().text().to_string();
+    let mut bash_var = String::from("_WL");
+    bash_var.push_str(
+        &placeholder_text
+            .chars()
+            .map(|c| {
+                if c.is_ascii_alphanumeric() || c == '_' {
+                    c
+                } else {
+                    '_'
+                }
+            })
+            .collect::<String>(),
+    );
+    bash_var
+}
+
+/// retrieve all input and private declations for a task
+fn task_declarations(task: &TaskDefinition) -> HashSet<String> {
+    let mut decls = HashSet::new();
+    if let Some(input) = task.input() {
+        for decl in input.declarations() {
+            decls.insert(decl.name().as_str().to_owned());
+        }
+    }
+
+    for decl in task.declarations() {
+        decls.insert(decl.name().as_str().to_owned());
+    }
+    decls
+}
+
+/// Creates a "ShellCheck lint" diagnostic from a ShellCheckComment
+fn shellcheck_lint(comment: &ShellCheckComment, span: Span) -> Diagnostic {
+    dbg!(
+        Diagnostic::note("shellcheck violations within a command")
+            .with_rule(ID)
+            .with_label(
+                format!("SC{}[{}]: {}", comment.code, comment.level, comment.message),
+                span,
+            )
+            .with_fix("address ShellCheck violations")
+    )
+}
+
+impl Visitor for CommandSectionShellCheck {
+    type State = Diagnostics;
+
+    fn document(
+        &mut self,
+        _: &mut Self::State,
+        reason: VisitReason,
+        _: &Document,
+        _: SupportedVersion,
+    ) {
+        if reason == VisitReason::Exit {
+            return;
+        }
+
+        // Reset the visitor upon document entry
+        *self = Default::default();
+    }
+
+    fn command_section(
+        &mut self,
+        state: &mut Self::State,
+        reason: VisitReason,
+        section: &CommandSection,
+    ) {
+        if reason == VisitReason::Exit {
+            return;
+        }
+
+        if !program_exists(SHELLCHECK_BIN) {
+            return;
+        }
+
+        // collect declarations so we can ignore placeholder variables
+        let parent_task = section.parent().into_task().expect("parent is a task");
+        let mut decls = task_declarations(&parent_task);
+
+        // replace all placeholders in the command with
+        // dummy bash variables
+        let mut sanitized_command = String::new();
+        if let Some(cmd_parts) = section.strip_whitespace() {
+            cmd_parts.iter().for_each(|part| match part {
+                StrippedCommandPart::Text(text) => {
+                    sanitized_command.push_str(text);
+                }
+                StrippedCommandPart::Placeholder(placeholder) => {
+                    let bash_var = to_bash_var(placeholder);
+                    // we need to save the var so we can suppress later
+                    decls.insert(bash_var.clone());
+                    let mut expansion = String::from("\"$");
+                    expansion.push_str(&bash_var);
+                    expansion.push('"');
+                    sanitized_command.push_str(&expansion);
+                }
+            });
+        } else {
+            // TODO add a diagnostic explaining why this is being skipped?
+            return;
+        }
+
+        // get leading whitespace so we can add it to each span
+        // TODO there may be a more elegant way to do this
+        let leading_whitespace = {
+            if let Some(first_part) = section.parts().next() {
+                match first_part {
+                    CommandPart::Text(text) => {
+                        let text_str = text
+                            .as_str()
+                            .strip_prefix("\n")
+                            .unwrap_or_else(|| text.as_str());
+                        count_leading_whitespace(text_str)
+                    }
+                    CommandPart::Placeholder(pholder) => {
+                        let pholder_str = &pholder.syntax().text().to_string();
+                        count_leading_whitespace(
+                            pholder_str.strip_prefix("\n").unwrap_or(pholder_str),
+                        )
+                    }
+                }
+            } else {
+                0
+            }
+        };
+
+        // Map each actual line of the command to its corresponding
+        // CommandPart and start position
+        let mut line_map = HashMap::new();
+        let mut on_same_line = false;
+        let mut line_num = 0usize;
+        for part in section.parts() {
+            match part {
+                CommandPart::Text(ref text) => {
+                    let mut last_line = 0usize;
+                    for (_, start, _) in lines_with_offset(text.as_str()) {
+                        // Check to see if we are still on the previous line
+                        // This happens after we encounter a placeholder
+                        if on_same_line {
+                            on_same_line = false;
+                            last_line = last_line.saturating_sub(1);
+                        }
+                        line_map.insert(line_num + last_line, (part.clone(), start));
+                        last_line += 1;
+                    }
+                    line_num += last_line;
+                }
+                CommandPart::Placeholder(_) => {
+                    on_same_line = true;
+                }
+            }
+        }
+
+        match run_shellcheck(&sanitized_command) {
+            Ok(comments) => {
+                for comment in comments {
+                    // skip declarations that shellcheck is unaware of
+                    if comment.code == 2154
+                        && decls.contains(comment.message.split_whitespace().next().unwrap_or(""))
+                    {
+                        continue;
+                    }
+                    let (rel_part, start) = line_map
+                        .get(&comment.line)
+                        .expect("shellcheck line corresponds to command line");
+                    let rel_token = match rel_part {
+                        CommandPart::Text(text) => text.syntax(),
+                        CommandPart::Placeholder(phold) => {
+                            &support::token(phold.syntax(), SyntaxKind::PlaceholderNode)
+                                .expect("should have a placeholder node token")
+                        }
+                    };
+                    let inner_span = {
+                        let outer_span = rel_token.text_range().to_span();
+                        Span::new(
+                            outer_span.start() + leading_whitespace + start + comment.column - 1,
+                            comment.end_column - comment.column,
+                        )
+                    };
+                    state.exceptable_add(
+                        shellcheck_lint(&comment, inner_span),
+                        SyntaxElement::from(rel_token.clone()),
+                        &self.exceptable_nodes(),
+                    )
+                }
+            }
+            Err(e) => {
+                let command_keyword = support::token(section.syntax(), SyntaxKind::CommandKeyword)
+                    .expect("should have a command keyword token");
+                state.exceptable_add(
+                    Diagnostic::error("failed to run shellcheck on command block")
+                        .with_label(e.to_string(), command_keyword.text_range().to_span())
+                        .with_rule(ID)
+                        .with_fix("ensure shellcheck is installed or disable this lint"),
+                    SyntaxElement::from(section.syntax().clone()),
+                    &self.exceptable_nodes(),
+                );
+            }
+        }
+    }
+}