From e48747c0666436b13524242610161b3ec1a641f2 Mon Sep 17 00:00:00 2001 From: Clay McLeod Date: Thu, 16 Nov 2023 09:52:25 -0600 Subject: [PATCH] feat: adds validation and linting --- Cargo.lock | 7 + Gauntlet.toml | 96 ++++++-- rustfmt.toml | 1 + wdl-grammar/Cargo.toml | 5 +- wdl-grammar/src/commands/create_test.rs | 18 +- wdl-grammar/src/commands/gauntlet.rs | 60 +++-- wdl-grammar/src/commands/gauntlet/config.rs | 1 + .../src/commands/gauntlet/config/inner.rs | 2 +- wdl-grammar/src/commands/gauntlet/report.rs | 26 ++- wdl-grammar/src/commands/parse.rs | 18 +- wdl-grammar/src/core.rs | 9 + wdl-grammar/src/core/code.rs | 120 ++++++++++ wdl-grammar/src/core/code/identity.rs | 31 +++ wdl-grammar/src/core/lint.rs | 45 ++++ wdl-grammar/src/core/lint/group.rs | 16 ++ wdl-grammar/src/core/lint/level.rs | 25 ++ wdl-grammar/src/core/lint/linter.rs | 63 +++++ wdl-grammar/src/core/lint/warning.rs | 174 ++++++++++++++ wdl-grammar/src/core/lint/warning/builder.rs | 218 ++++++++++++++++++ wdl-grammar/src/core/tree.rs | 82 +++++++ wdl-grammar/src/core/validation.rs | 38 +++ wdl-grammar/src/core/validation/error.rs | 97 ++++++++ .../src/core/validation/error/builder.rs | 122 ++++++++++ wdl-grammar/src/core/validation/validator.rs | 54 +++++ wdl-grammar/src/lib.rs | 45 ++-- wdl-grammar/src/main.rs | 11 + wdl-grammar/src/v1.rs | 82 ++++++- wdl-grammar/src/v1/lint.rs | 13 ++ wdl-grammar/src/v1/lint/whitespace.rs | 181 +++++++++++++++ wdl-grammar/src/v1/parse.rs | 15 ++ wdl-grammar/src/v1/tests/expression/core.rs | 78 ++++--- .../v1/tests/expression/core/array_literal.rs | 192 +++++++++------ wdl-grammar/src/v1/tests/literal.rs | 56 ++++- wdl-grammar/src/v1/tests/primitives/char.rs | 80 +++++-- .../src/v1/tests/primitives/char/unicode.rs | 32 ++- wdl-grammar/src/v1/tests/primitives/string.rs | 60 ++++- .../primitives/string/double_quoted_string.rs | 62 ++--- .../primitives/string/single_quoted_string.rs | 66 +++--- wdl-grammar/src/v1/validation.rs | 13 ++ .../v1/validation/invalid_escape_character.rs | 63 +++++ wdl-grammar/src/v1/wdl.pest | 147 ++++++------ wdl-grammar/src/version.rs | 33 +++ 42 files changed, 2209 insertions(+), 348 deletions(-) create mode 100644 rustfmt.toml create mode 100644 wdl-grammar/src/core.rs create mode 100644 wdl-grammar/src/core/code.rs create mode 100644 wdl-grammar/src/core/code/identity.rs create mode 100644 wdl-grammar/src/core/lint.rs create mode 100644 wdl-grammar/src/core/lint/group.rs create mode 100644 wdl-grammar/src/core/lint/level.rs create mode 100644 wdl-grammar/src/core/lint/linter.rs create mode 100644 wdl-grammar/src/core/lint/warning.rs create mode 100644 wdl-grammar/src/core/lint/warning/builder.rs create mode 100644 wdl-grammar/src/core/tree.rs create mode 100644 wdl-grammar/src/core/validation.rs create mode 100644 wdl-grammar/src/core/validation/error.rs create mode 100644 wdl-grammar/src/core/validation/error/builder.rs create mode 100644 wdl-grammar/src/core/validation/validator.rs create mode 100644 wdl-grammar/src/v1/lint.rs create mode 100644 wdl-grammar/src/v1/lint/whitespace.rs create mode 100644 wdl-grammar/src/v1/parse.rs create mode 100644 wdl-grammar/src/v1/validation.rs create mode 100644 wdl-grammar/src/v1/validation/invalid_escape_character.rs create mode 100644 wdl-grammar/src/version.rs diff --git a/Cargo.lock b/Cargo.lock index 2c7608ad..bf4506da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1787,6 +1787,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "to_snake_case" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "890233d8f267716f1ec0f272330498ed42f186b458ad8c9868fa6cb40fea87ad" + [[package]] name = "tokio" version = "1.34.0" @@ -2151,6 +2157,7 @@ dependencies = [ "reqwest", "serde", "serde_with", + "to_snake_case", "tokio", "toml", ] diff --git a/Gauntlet.toml b/Gauntlet.toml index 79d92964..46d9ca21 100644 --- a/Gauntlet.toml +++ b/Gauntlet.toml @@ -1,47 +1,101 @@ version = "v1" +[[repositories]] +organization = "stjudecloud" +name = "workflows" + [[repositories]] organization = "PacificBiosciences" name = "HiFi-human-WGS-WDL" [[repositories]] -organization = "biowdl" -name = "tasks" +organization = "chanzuckerberg" +name = "czid-workflows" [[repositories]] -organization = "stjudecloud" -name = "workflows" +organization = "biowdl" +name = "tasks" -[[repositories]] -organization = "chanzuckerberg" -name = "czid-workflows" +[[ignored_errors]] +document = "biowdl/tasks:bcftools.wdl" +error = '''validation error: [v1::001] invalid escape character '\_' in string at line 114:75''' [[ignored_errors]] document = "biowdl/tasks:bedtools.wdl" -error = """ - --> 29:67 - | -29 | String memory = \"~{512 + ceil(size([inputBed, faidx], \"MiB\"))}MiB\" - | ^--- - | - = expected WHITESPACE or OPTION""" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 27:48''' [[ignored_errors]] document = "biowdl/tasks:bowtie.wdl" -error = """ - --> 40:58 - | -40 | String memory = \"~{5 + ceil(size(indexFiles, \"GiB\"))}GiB\" - | ^--- - | - = expected WHITESPACE or OPTION""" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 63:32''' + +[[ignored_errors]] +document = "biowdl/tasks:centrifuge.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 122:57''' + +[[ignored_errors]] +document = "biowdl/tasks:common.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 275:45''' + +[[ignored_errors]] +document = "biowdl/tasks:fastqc.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 59:42''' + +[[ignored_errors]] +document = "biowdl/tasks:gatk.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 127:57''' + +[[ignored_errors]] +document = "biowdl/tasks:hisat2.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 60:34''' + +[[ignored_errors]] +document = "biowdl/tasks:multiqc.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 133:45''' + +[[ignored_errors]] +document = "biowdl/tasks:picard.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 643:51''' + +[[ignored_errors]] +document = "biowdl/tasks:sambamba.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 91:44''' + +[[ignored_errors]] +document = "biowdl/tasks:samtools.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 80:42''' + +[[ignored_errors]] +document = "biowdl/tasks:umi-tools.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 95:49''' + +[[ignored_errors]] +document = "biowdl/tasks:umi.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 39:60''' [[ignored_errors]] document = "stjudecloud/workflows:template/task-templates.wdl" error = """ +parse error: + --> 17:25 | 17 | Int memory_gb = <> | ^--- | = expected WHITESPACE, COMMENT, or expression""" + +[[ignored_errors]] +document = "stjudecloud/workflows:tools/bwa.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 34:17''' + +[[ignored_errors]] +document = "stjudecloud/workflows:tools/fq.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 123:17''' + +[[ignored_errors]] +document = "stjudecloud/workflows:tools/kraken2.wdl" +error = '''validation error: [v1::001] invalid escape character '\.' in string at line 335:17''' + +[[ignored_errors]] +document = "stjudecloud/workflows:workflows/rnaseq/rnaseq-standard-fastq.wdl" +error = '''validation error: [v1::001] invalid escape character '\*' in string at line 55:241''' diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..08e342cb --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +format_code_in_doc_comments = true \ No newline at end of file diff --git a/wdl-grammar/Cargo.toml b/wdl-grammar/Cargo.toml index 9c664fb0..6d46bb4e 100644 --- a/wdl-grammar/Cargo.toml +++ b/wdl-grammar/Cargo.toml @@ -19,7 +19,8 @@ pest_derive = { workspace = true } reqwest = { version = "0.11.22", optional = true } serde = { workspace = true } serde_with = { workspace = true, optional = true } -tokio = { version = "1.33.0", features = ["full"], optional = true} +to_snake_case = "0.1.1" +tokio = { version = "1.33.0", features = ["full"], optional = true } toml = { workspace = true, optional = true } [features] @@ -36,7 +37,7 @@ binaries = [ "reqwest", "serde_with", "tokio", - "toml" + "toml", ] [[bin]] diff --git a/wdl-grammar/src/commands/create_test.rs b/wdl-grammar/src/commands/create_test.rs index 54d8e23f..d6884d6e 100644 --- a/wdl-grammar/src/commands/create_test.rs +++ b/wdl-grammar/src/commands/create_test.rs @@ -1,8 +1,8 @@ //! `wdl-grammar create-test` use clap::Parser; +use log::warn; use pest::iterators::Pair; -use pest::Parser as _; use pest::RuleType; use wdl_grammar as grammar; @@ -18,8 +18,8 @@ pub enum Error { /// Multiple root nodes parsed. MultipleRootNodes, - /// A parsing error from Pest. - Parse(Box), + /// An error parsing the grammar. + GrammarV1(grammar::Error), /// Unknown rule name. UnknownRule { @@ -35,11 +35,11 @@ impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::Common(err) => write!(f, "{err}"), + Error::GrammarV1(err) => write!(f, "grammar parse error: {err}"), Error::MultipleRootNodes => write!(f, "multiple root nodes found"), Error::UnknownRule { name, grammar } => { write!(f, "unknown rule '{name}' for grammar {grammar}") } - Error::Parse(err) => write!(f, "parse error: {err}"), } } } @@ -84,11 +84,15 @@ pub fn create_test(args: Args) -> Result<()> { .unwrap_or_else(|| get_contents_stdin().map_err(Error::Common))?; let mut parse_tree = match args.specification_version { - grammar::Version::V1 => { - grammar::v1::Parser::parse(rule, &input).map_err(|err| Error::Parse(Box::new(err)))? - } + grammar::Version::V1 => grammar::v1::parse(rule, &input).map_err(Error::GrammarV1)?, }; + if let Some(warnings) = parse_tree.warnings() { + for warning in warnings { + warn!("{}", warning); + } + } + let root = match parse_tree.len() { // SAFETY: this should not be possible, as parsing just successfully // completed. As such, we should always have at least one parsed diff --git a/wdl-grammar/src/commands/gauntlet.rs b/wdl-grammar/src/commands/gauntlet.rs index 1ece77f4..bd2f3694 100644 --- a/wdl-grammar/src/commands/gauntlet.rs +++ b/wdl-grammar/src/commands/gauntlet.rs @@ -8,7 +8,6 @@ use clap::Parser; use colored::Colorize as _; use log::debug; use log::trace; -use pest::Parser as _; pub mod config; pub mod document; @@ -81,12 +80,16 @@ pub struct Args { #[arg(short, long)] config_file: Option, + /// Don't load any configuration from the cache. + #[arg(short, long, global = true)] + no_cache: bool, + /// Only errors are printed to the stderr stream. #[arg(short, long, global = true)] quiet: bool, /// Overwrites the configuration file. - #[arg(short, long, global = true)] + #[arg(long, global = true)] save_config: bool, /// Silences printing detailed error information. @@ -97,6 +100,10 @@ pub struct Args { #[arg(long, global = true)] skip_remote: bool, + /// Displays warnings as part of the report output. + #[arg(long, global = true)] + show_warnings: bool, + /// The Workflow Description Language (WDL) specification version to use. #[arg(value_name = "VERSION", short = 's', long, default_value_t, value_enum)] specification_version: grammar::Version, @@ -108,9 +115,16 @@ pub struct Args { /// Main function for this subcommand. pub async fn gauntlet(args: Args) -> Result<()> { - let path = args.config_file.unwrap_or(Config::default_path()); - let mut config = - Config::load_or_new(path, args.specification_version).map_err(Error::Config)?; + let mut config = match args.no_cache { + true => { + debug!("Skipping loading from cache."); + Config::default() + } + false => { + let path = args.config_file.unwrap_or(Config::default_path()); + Config::load_or_new(path, args.specification_version).map_err(Error::Config)? + } + }; if let Some(repositories) = args.repositories { config.repositories_mut().extend( @@ -157,13 +171,33 @@ pub async fn gauntlet(args: Args) -> Result<()> { match config.version() { grammar::Version::V1 => { - match grammar::v1::Parser::parse(grammar::v1::Rule::document, &content) { - Ok(_) => { - trace!("{}: successfully parsed.", document_identifier); - report - .register(document_identifier, Status::Success) - .map_err(Error::InputOutput)?; - } + match grammar::v1::parse(grammar::v1::Rule::document, &content) { + Ok(tree) => match tree.warnings() { + Some(warnings) => { + trace!( + "{}: successfully parsed with {} warnings.", + document_identifier, + warnings.len() + ); + report + .register(document_identifier, Status::Warning) + .map_err(Error::InputOutput)?; + + if args.show_warnings { + for warning in warnings { + report + .report_warning(warning) + .map_err(Error::InputOutput)?; + } + } + } + None => { + trace!("{}: succesfully parsed.", document_identifier,); + report + .register(document_identifier, Status::Success) + .map_err(Error::InputOutput)?; + } + }, Err(err) => { let actual_error = err.to_string(); @@ -273,7 +307,7 @@ pub async fn gauntlet(args: Args) -> Result<()> { println!( "\n{}\n", "Undetected expected errors: you should remove these from your \ - Config.toml or run this command with the `-s` option!" + Config.toml or run this command with the `--save-config` option!" .red() .bold() ); diff --git a/wdl-grammar/src/commands/gauntlet/config.rs b/wdl-grammar/src/commands/gauntlet/config.rs index 4c98daa4..9ac986f4 100644 --- a/wdl-grammar/src/commands/gauntlet/config.rs +++ b/wdl-grammar/src/commands/gauntlet/config.rs @@ -58,6 +58,7 @@ type Result = std::result::Result; /// configuration itself. Notably, the path to the configuration file should /// _not_ be part of the serialized configuration value. Thus, I split the /// concept of the path and the actual configuration into two different structs. +#[derive(Default)] pub struct Config { /// The path to the configuration file. path: PathBuf, diff --git a/wdl-grammar/src/commands/gauntlet/config/inner.rs b/wdl-grammar/src/commands/gauntlet/config/inner.rs index fdc76ec4..0bb7b274 100644 --- a/wdl-grammar/src/commands/gauntlet/config/inner.rs +++ b/wdl-grammar/src/commands/gauntlet/config/inner.rs @@ -27,7 +27,7 @@ pub type Repositories = HashSet; /// /// This object stores the actual configuration values for this subcommand. #[serde_as] -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Default, Deserialize, Serialize)] pub struct Inner { /// The WDL version. pub(super) version: grammar::Version, diff --git a/wdl-grammar/src/commands/gauntlet/report.rs b/wdl-grammar/src/commands/gauntlet/report.rs index cb0f1f90..ce5687b6 100644 --- a/wdl-grammar/src/commands/gauntlet/report.rs +++ b/wdl-grammar/src/commands/gauntlet/report.rs @@ -2,9 +2,12 @@ use std::collections::HashMap; +use colored::Colorize as _; use indexmap::IndexMap; -use colored::Colorize as _; +use wdl_grammar as grammar; + +use grammar::core::lint; use crate::commands::gauntlet::repository; use crate::gauntlet::document; @@ -198,6 +201,21 @@ impl Report { Ok(()) } + /// Report a warning for a registered result. + pub fn report_warning(&mut self, warning: &lint::Warning) -> std::io::Result<()> { + if self.section != Section::Summary { + panic!( + "cannot report a warning when the report phase is {:?}", + self.section + ); + } + + writeln!(self.inner, " ↳ {}", warning)?; + self.printed = true; + + Ok(()) + } + /// Reports all unexpected errors for a repository report. pub fn report_unexpected_errors_for_repository( &mut self, @@ -294,12 +312,12 @@ impl Report { { 0 => {} 1 => with.push(String::from("1 mismatch error")), - v => with.push(format!("{} mismatched errors", v)), + v => with.push(format!("{} mismatch errors", v)), }; match results.get(&Status::Warning).copied() { - Some(1) => with.push(String::from("1 error with warnings")), - Some(v) => with.push(format!("{} errors with warnings", v)), + Some(1) => with.push(String::from("1 test containing warnings")), + Some(v) => with.push(format!("{} tests containing warnings", v)), None => {} } diff --git a/wdl-grammar/src/commands/parse.rs b/wdl-grammar/src/commands/parse.rs index 891154e1..0846273d 100644 --- a/wdl-grammar/src/commands/parse.rs +++ b/wdl-grammar/src/commands/parse.rs @@ -1,7 +1,7 @@ //! `wdl-grammar parse` use clap::Parser; -use pest::Parser as _; +use log::warn; use wdl_grammar as grammar; @@ -16,8 +16,8 @@ pub enum Error { /// A common error. Common(super::Error), - /// A parsing error from Pest. - Parse(Box), + /// An error parsing the grammar. + GrammarV1(grammar::Error), /// Unknown rule name. UnknownRule { @@ -36,10 +36,10 @@ impl std::fmt::Display for Error { write!(f, "cannot print children with empty parse tree") } Error::Common(err) => write!(f, "{err}"), + Error::GrammarV1(err) => write!(f, "grammar parse error: {err}"), Error::UnknownRule { name, grammar } => { write!(f, "unknown rule '{name}' for grammar {grammar}") } - Error::Parse(err) => write!(f, "parse error: {err}"), } } } @@ -88,11 +88,15 @@ pub fn parse(args: Args) -> Result<()> { .unwrap_or_else(|| get_contents_stdin().map_err(Error::Common))?; let mut parse_tree = match args.specification_version { - grammar::Version::V1 => { - grammar::v1::Parser::parse(rule, &input).map_err(|err| Error::Parse(Box::new(err)))? - } + grammar::Version::V1 => grammar::v1::parse(rule, &input).map_err(Error::GrammarV1)?, }; + if let Some(warnings) = parse_tree.warnings() { + for warning in warnings { + warn!("{}", warning); + } + } + if args.children_only { let children = match parse_tree.next() { Some(root) => root.into_inner(), diff --git a/wdl-grammar/src/core.rs b/wdl-grammar/src/core.rs new file mode 100644 index 00000000..53dce80b --- /dev/null +++ b/wdl-grammar/src/core.rs @@ -0,0 +1,9 @@ +//! Core functionality used across all grammar versions. + +mod code; +pub mod lint; +mod tree; +pub mod validation; + +pub use code::Code; +pub use tree::Tree; diff --git a/wdl-grammar/src/core/code.rs b/wdl-grammar/src/core/code.rs new file mode 100644 index 00000000..01c8e920 --- /dev/null +++ b/wdl-grammar/src/core/code.rs @@ -0,0 +1,120 @@ +//! Codes for validation errors and lint warnings. + +use std::num::NonZeroUsize; + +use crate::Version; + +/// An error related to a [`Code`]. +#[derive(Debug)] +pub enum Error { + /// Attempted to make a code with an invalid index. + InvalidIndex(usize), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::InvalidIndex(index) => write!(f, "invalid index: {index}"), + } + } +} + +impl std::error::Error for Error {} + +/// A [`Result`](std::result::Result) with an [`Error`]. +type Result = std::result::Result; + +/// A code. +#[derive(Clone, Debug)] +pub struct Code { + /// The grammar for this code. + grammar: Version, + + /// The index for this code. + index: NonZeroUsize, +} + +impl Code { + /// Attempts to create a new [`Code`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// assert_eq!(code.grammar(), &Version::V1); + /// assert_eq!(code.index().get(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn try_new(grammar: Version, index: usize) -> Result { + let index = NonZeroUsize::try_from(index).map_err(|_| Error::InvalidIndex(index))?; + + Ok(Self { grammar, index }) + } + + /// Gets the grammar [`Version`] for this [`Code`] by reference. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// assert_eq!(code.grammar(), &Version::V1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn grammar(&self) -> &Version { + &self.grammar + } + + /// Gets the index of this [`Code`] by reference. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// assert_eq!(code.index().get(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn index(&self) -> NonZeroUsize { + self.index + } +} + +impl std::fmt::Display for Code { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}::{:03}", self.grammar.short_name(), self.index) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn zero_index() { + let err = Code::try_new(Version::V1, 0).unwrap_err(); + assert!(matches!(err, Error::InvalidIndex(0))); + } + + #[test] + fn display() { + let identity = Code::try_new(Version::V1, 1).unwrap(); + assert_eq!(identity.to_string(), String::from("v1::001")); + } +} diff --git a/wdl-grammar/src/core/code/identity.rs b/wdl-grammar/src/core/code/identity.rs new file mode 100644 index 00000000..686c02ad --- /dev/null +++ b/wdl-grammar/src/core/code/identity.rs @@ -0,0 +1,31 @@ +use std::num::NonZeroUsize; + +use crate::Version; + +#[derive(Debug)] +pub enum Error { + InvalidIndex(usize), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::InvalidIndex(index) => write!(f, "invalid index: {index}"), + } + } +} + +impl std::error::Error for Error {} + +type Result = std::result::Result; + +pub struct Identity { + pub index: NonZeroUsize, + pub grammar: Version, +} + +impl Identity { + pub fn try_new(grammar: Version, index: usize) -> Result { + Ok(Self { index, grammar }) + } +} diff --git a/wdl-grammar/src/core/lint.rs b/wdl-grammar/src/core/lint.rs new file mode 100644 index 00000000..f658d0c5 --- /dev/null +++ b/wdl-grammar/src/core/lint.rs @@ -0,0 +1,45 @@ +//! Linting. + +use pest::iterators::Pairs; +use pest::RuleType; +use to_snake_case::ToSnakeCase as _; + +mod group; +mod level; +mod linter; +pub mod warning; + +pub use group::Group; +pub use level::Level; +pub use linter::Linter; +pub use warning::Warning; + +use crate::core::Code; + +/// A [`Result`](std::result::Result) returned from a lint check. +pub type Result = std::result::Result>, Box>; + +/// A lint rule. +pub trait Rule: std::fmt::Debug { + /// The name of the lint rule. + /// + /// This is what will show up in style guides, it is required to be snake + /// case (even though the rust struct is camel case). + fn name(&self) -> String { + format!("{:?}", self).to_snake_case() + } + + /// Get the code for this lint rule. + fn code(&self) -> Code; + + /// Get the lint group for this lint rule. + fn group(&self) -> Group; + + /// Checks the parse tree according to the implemented lint rule. + /// + /// **Note:** it would be much better to pass a reference to the parse tree + /// (`&Pairs<'a, R>`) here to avoid unnecessary cloning of the tree. + /// Unfortunately, the [`Pest`](https://pest.rs) library does not support a + /// reference to [`Pairs`] being turned into an iterator at the moment. + fn check(&self, tree: Pairs<'_, R>) -> Result; +} diff --git a/wdl-grammar/src/core/lint/group.rs b/wdl-grammar/src/core/lint/group.rs new file mode 100644 index 00000000..949af12f --- /dev/null +++ b/wdl-grammar/src/core/lint/group.rs @@ -0,0 +1,16 @@ +//! Lint groups. + +/// A lint group. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum Group { + /// Rules associated with the style of an input. + Style, +} + +impl std::fmt::Display for Group { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Group::Style => write!(f, "Style"), + } + } +} diff --git a/wdl-grammar/src/core/lint/level.rs b/wdl-grammar/src/core/lint/level.rs new file mode 100644 index 00000000..2dd0a953 --- /dev/null +++ b/wdl-grammar/src/core/lint/level.rs @@ -0,0 +1,25 @@ +//! Lint levels. + +/// A lint level. +#[derive(Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] +pub enum Level { + /// The lowest priority lint level. + Low, + + /// A moderate lint level. + Medium, + + /// The highest priority lint level. + High, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn order() { + assert!(Level::Low < Level::Medium); + assert!(Level::Medium < Level::High); + } +} diff --git a/wdl-grammar/src/core/lint/linter.rs b/wdl-grammar/src/core/lint/linter.rs new file mode 100644 index 00000000..4418ec02 --- /dev/null +++ b/wdl-grammar/src/core/lint/linter.rs @@ -0,0 +1,63 @@ +//! Linters. + +use pest::iterators::Pairs; +use pest::RuleType; + +use crate::core::lint; +use crate::core::lint::Rule; +use crate::core::lint::Warning; + +/// A [`Result`](std::result::Result) for the [`Linter::lint`] function. +pub type Result = std::result::Result>, Box>; + +/// A linter for a WDL parse tree. +#[derive(Debug)] +pub struct Linter; + +impl Linter { + /// Lints a WDL parse tree according to a set of lint rules. + pub fn lint(tree: Pairs<'_, R>, rules: &[Box>]) -> Result { + let warnings = rules + .iter() + .map(|rule| rule.check(tree.clone())) + .collect::>>, Box>>()? + .into_iter() + .flatten() + .flatten() + .collect::>(); + + match warnings.is_empty() { + true => Ok(None), + false => Ok(Some(warnings)), + } + } +} + +#[cfg(test)] +mod tests { + use pest::Parser as _; + + use crate::v1::Parser; + use crate::v1::Rule; + + use super::*; + + #[test] + fn baseline() -> std::result::Result<(), Box> { + let tree = Parser::parse(Rule::document, "version 1.1 \n \n")?; + let rules = crate::v1::lint::rules(); + let mut results = Linter::lint(tree, rules.as_ref())?.unwrap(); + + assert_eq!(results.len(), 2); + assert_eq!( + results.pop().unwrap().to_string(), + String::from("[v1::001::Style/Low] line 2 is empty but contains spaces") + ); + assert_eq!( + results.pop().unwrap().to_string(), + String::from("[v1::001::Style/Low] trailing space at the end of line 1") + ); + + Ok(()) + } +} diff --git a/wdl-grammar/src/core/lint/warning.rs b/wdl-grammar/src/core/lint/warning.rs new file mode 100644 index 00000000..2ca33bc0 --- /dev/null +++ b/wdl-grammar/src/core/lint/warning.rs @@ -0,0 +1,174 @@ +//! Lint warnings. + +use crate::core::lint::Group; +use crate::core::lint::Level; +use crate::core::Code; + +mod builder; + +pub use builder::Builder; + +/// A lint warning. +#[derive(Clone, Debug)] +pub struct Warning { + /// The code. + code: Code, + + /// The lint level. + level: Level, + + /// The lint group. + group: Group, + + /// The message. + message: String, +} + +impl Warning { + /// Gets the code for this [`Warning`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn code(&self) -> &Code { + &self.code + } + + /// Gets the lint level for this [`Warning`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.level(), &Level::High); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn level(&self) -> &Level { + &self.level + } + + /// Gets the lint group for this [`Warning`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.group(), &Group::Style); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn group(&self) -> &Group { + &self.group + } + + /// Gets the message for this [`Warning`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.message(), "Hello, world!"); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn message(&self) -> &str { + self.message.as_ref() + } +} + +impl std::fmt::Display for Warning { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{}::{}/{:?}] {}", + self.code, self.group, self.level, self.message + ) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn display() -> Result<(), Box> { + let code = Code::try_new(crate::Version::V1, 1)?; + let warning = Builder::default() + .code(code) + .level(Level::Medium) + .group(Group::Style) + .message("Hello, world!") + .try_build()?; + + assert_eq!(warning.to_string(), "[v1::001::Style/Medium] Hello, world!"); + + Ok(()) + } +} diff --git a/wdl-grammar/src/core/lint/warning/builder.rs b/wdl-grammar/src/core/lint/warning/builder.rs new file mode 100644 index 00000000..83a98f4c --- /dev/null +++ b/wdl-grammar/src/core/lint/warning/builder.rs @@ -0,0 +1,218 @@ +//! A builder for a lint [`Warning`]. + +use crate::core::lint::Group; +use crate::core::lint::Level; +use crate::core::lint::Warning; +use crate::core::Code; + +/// An error related to building a lint warning. +#[derive(Debug)] +pub enum MissingError { + /// A code was not provided. + Code, + + /// A lint level was not provided. + Level, + + /// A lint group was not provided. + Group, + + /// A message was not provided. + Message, +} + +impl std::fmt::Display for MissingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + MissingError::Code => write!(f, "missing code"), + MissingError::Level => write!(f, "missing level"), + MissingError::Group => write!(f, "missing group"), + MissingError::Message => write!(f, "missing message"), + } + } +} + +impl std::error::Error for MissingError {} + +/// A [`Result`](std::result::Result) with a [`MissingError`]. +pub type Result = std::result::Result; + +/// A builder for a [`Warning`]. +#[derive(Debug, Default)] +pub struct Builder { + /// The code. + code: Option, + + /// The lint level. + level: Option, + + /// The lint group. + group: Option, + + /// The message. + message: Option, +} + +impl Builder { + /// Sets the code for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// + /// # Ok::<(), Box>(()) + /// ``` + pub fn code(mut self, code: Code) -> Self { + self.code = Some(code); + self + } + + /// Sets the lint level for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.level(), &Level::High); + /// + /// # Ok::<(), Box>(()) + pub fn level(mut self, level: Level) -> Self { + self.level = Some(level); + self + } + + /// Sets the lint group for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.group(), &Group::Style); + /// + /// # Ok::<(), Box>(()) + pub fn group(mut self, group: Group) -> Self { + self.group = Some(group); + self + } + + /// Sets the message for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.message(), "Hello, world!"); + /// + /// # Ok::<(), Box>(()) + pub fn message(mut self, message: impl Into) -> Self { + let message = message.into(); + self.message = Some(message); + self + } + + /// Consumes `self` to attempt to build a [`Warning`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::lint::warning::Builder; + /// use grammar::core::lint::Group; + /// use grammar::core::lint::Level; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .level(Level::High) + /// .group(Group::Style) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// assert_eq!(warning.level(), &Level::High); + /// assert_eq!(warning.group(), &Group::Style); + /// assert_eq!(warning.message(), "Hello, world!"); + /// assert_eq!(warning.to_string(), "[v1::001::Style/High] Hello, world!"); + /// + /// # Ok::<(), Box>(()) + pub fn try_build(self) -> Result { + let code = self.code.map(Ok).unwrap_or(Err(MissingError::Code))?; + let level = self.level.map(Ok).unwrap_or(Err(MissingError::Level))?; + let group = self.group.map(Ok).unwrap_or(Err(MissingError::Group))?; + let message = self.message.map(Ok).unwrap_or(Err(MissingError::Message))?; + + Ok(Warning { + code, + level, + group, + message, + }) + } +} diff --git a/wdl-grammar/src/core/tree.rs b/wdl-grammar/src/core/tree.rs new file mode 100644 index 00000000..90445866 --- /dev/null +++ b/wdl-grammar/src/core/tree.rs @@ -0,0 +1,82 @@ +//! A parse tree. + +use pest::iterators::Pairs; +use pest::RuleType; + +use crate::core::lint; + +/// A parse tree with a set of lint [`Warning`](lint::Warning)s. +/// +/// **Note:** this struct implements [`std::ops::Deref`] for the native Pest +/// parse tree ([`Pairs`]), so you can treat this exactly as if you were +/// workings with [`Pairs`] directly. +#[derive(Debug)] +pub struct Tree<'a, R: RuleType> { + /// The inner Pest parse tree. + inner: Pairs<'a, R>, + + /// The lint warnings associated with the parse tree. + warnings: Option>, +} + +impl<'a, R: RuleType> Tree<'a, R> { + /// Creates a new [`Tree`]. + pub fn new(inner: Pairs<'a, R>, warnings: Option>) -> Self { + Self { inner, warnings } + } + + /// Gets the inner [Pest parse tree](Pairs) for the [`Tree`] by reference. + pub fn inner(&self) -> &Pairs<'a, R> { + &self.inner + } + + /// Consumes `self` to return the inner [Pest parse tree](Pairs) from the + /// [`Tree`]. + pub fn into_inner(self) -> Pairs<'a, R> { + self.inner + } + + /// Gets the [`Warning`](lint::Warning)s from the [`Tree`] by reference. + pub fn warnings(&self) -> Option<&Vec> { + self.warnings.as_ref() + } +} + +impl<'a, R: RuleType> std::ops::Deref for Tree<'a, R> { + type Target = Pairs<'a, R>; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<'a, R: RuleType> std::ops::DerefMut for Tree<'a, R> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +#[cfg(test)] +mod tests { + use pest::Parser as _; + + use super::*; + use crate::core::lint::Linter; + use crate::v1::Parser; + use crate::v1::Rule; + + #[test] + fn new() -> Result<(), Box> { + let tree = Parser::parse(Rule::document, "version 1.1\n \n")?; + let lints = Linter::lint(tree.clone(), &crate::v1::lint::rules())?; + + let tree = Tree::new(tree, lints); + assert_eq!( + tree.warnings().unwrap().first().unwrap().to_string(), + String::from("[v1::001::Style/Low] line 2 is empty but contains spaces") + ); + assert_eq!(tree.into_inner().len(), 1); + + Ok(()) + } +} diff --git a/wdl-grammar/src/core/validation.rs b/wdl-grammar/src/core/validation.rs new file mode 100644 index 00000000..7bbf81e1 --- /dev/null +++ b/wdl-grammar/src/core/validation.rs @@ -0,0 +1,38 @@ +//! Validation. + +use pest::iterators::Pairs; +use pest::RuleType; +use to_snake_case::ToSnakeCase as _; + +pub mod error; +pub mod validator; + +pub use error::Error; +pub use validator::Validator; + +use crate::core::Code; + +/// A [`Result`](std::result::Result) with a validation [`Error`]. +pub type Result = std::result::Result<(), Error>; + +/// A validation rule. +pub trait Rule: std::fmt::Debug { + /// The name of the validation rule. + /// + /// This is what will show up in style guides, it is required to be snake + /// case (even though the rust struct is camel case). + fn name(&self) -> String { + format!("{:?}", self).to_snake_case() + } + + /// Get the code for this validation rule. + fn code(&self) -> Code; + + /// Checks the parse tree according to the implemented validation rule. + /// + /// **Note:** it would be much better to pass a reference to the parse tree + /// (`&Pairs<'a, R>`) here to avoid unnecessary cloning of the tree. + /// Unfortunately, the [`Pest`](https://pest.rs) library does not support a + /// reference to [`Pairs`] being turned into an iterator at the moment. + fn validate(&self, tree: Pairs<'_, R>) -> Result; +} diff --git a/wdl-grammar/src/core/validation/error.rs b/wdl-grammar/src/core/validation/error.rs new file mode 100644 index 00000000..1281b469 --- /dev/null +++ b/wdl-grammar/src/core/validation/error.rs @@ -0,0 +1,97 @@ +//! Validation errors. + +mod builder; + +pub use builder::Builder; + +use crate::core::Code; + +/// A validation error. +#[derive(Clone, Debug)] +pub struct Error { + /// The code. + code: Code, + + /// The message. + message: String, +} + +impl Error { + /// Gets the code for this [`Error`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::validation::error::Builder; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// + /// # Ok::<(), Box>(()) + pub fn code(&self) -> &Code { + &self.code + } + + /// Gets the message for this [`Error`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::validation::error::Builder; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.message(), "Hello, world!"); + /// + /// # Ok::<(), Box>(()) + pub fn message(&self) -> &str { + self.message.as_ref() + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}] {}", self.code, self.message) + } +} + +impl std::error::Error for Error {} + +/// A [`Result`](std::error::Error) with a zero or more validation [`Error`]s. +pub type Result = std::result::Result<(), Vec>; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn display() -> std::result::Result<(), Box> { + let code = Code::try_new(crate::Version::V1, 1)?; + let error = Builder::default() + .code(code) + .message("Hello, world!") + .try_build()?; + + assert_eq!(error.to_string(), "[v1::001] Hello, world!"); + + Ok(()) + } +} diff --git a/wdl-grammar/src/core/validation/error/builder.rs b/wdl-grammar/src/core/validation/error/builder.rs new file mode 100644 index 00000000..c6ef0089 --- /dev/null +++ b/wdl-grammar/src/core/validation/error/builder.rs @@ -0,0 +1,122 @@ +//! A builder for a validation [`Error`](super::Error). + +use crate::core::validation; +use crate::core::Code; + +/// An error related to building a validation error. +#[derive(Debug)] +pub enum MissingError { + /// A code was not provided. + Code, + + /// A message was not provided. + Message, +} + +impl std::fmt::Display for MissingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + MissingError::Code => write!(f, "missing code"), + MissingError::Message => write!(f, "missing message"), + } + } +} + +impl std::error::Error for MissingError {} + +/// A [`Result`](std::result::Result) with a [`MissingError`]. +pub type Result = std::result::Result; + +/// A builder for an [`Error`](validation::Error). +#[derive(Debug, Default)] +pub struct Builder { + /// The code. + code: Option, + + /// The message. + message: Option, +} + +impl Builder { + /// Sets the code for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::validation::error::Builder; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// + /// # Ok::<(), Box>(()) + pub fn code(mut self, code: Code) -> Self { + self.code = Some(code); + self + } + + /// Sets the message for this [`Builder`]. + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::validation::error::Builder; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.message(), "Hello, world!"); + /// + /// # Ok::<(), Box>(()) + pub fn message(mut self, message: impl Into) -> Self { + let message = message.into(); + self.message = Some(message); + self + } + + /// Consumes `self` to attempt to build an [`Error`](validation::Error). + /// + /// # Examples + /// + /// ``` + /// use wdl_grammar as grammar; + /// + /// use grammar::core::validation::error::Builder; + /// use grammar::core::Code; + /// use grammar::Version; + /// + /// let code = Code::try_new(Version::V1, 1)?; + /// let warning = Builder::default() + /// .code(code) + /// .message("Hello, world!") + /// .try_build()?; + /// + /// assert_eq!(warning.code().grammar(), &Version::V1); + /// assert_eq!(warning.code().index().get(), 1); + /// assert_eq!(warning.message(), "Hello, world!"); + /// assert_eq!(warning.to_string(), "[v1::001] Hello, world!"); + /// + /// # Ok::<(), Box>(()) + pub fn try_build(self) -> Result { + let code = self.code.map(Ok).unwrap_or(Err(MissingError::Code))?; + let message = self.message.map(Ok).unwrap_or(Err(MissingError::Message))?; + + Ok(validation::Error { code, message }) + } +} diff --git a/wdl-grammar/src/core/validation/validator.rs b/wdl-grammar/src/core/validation/validator.rs new file mode 100644 index 00000000..3cdf4ebb --- /dev/null +++ b/wdl-grammar/src/core/validation/validator.rs @@ -0,0 +1,54 @@ +//! Validators. + +use pest::iterators::Pairs; +use pest::RuleType; + +use crate::core::validation; +use crate::core::validation::Rule; + +/// A validator for a WDL parse tree. +#[derive(Debug)] +pub struct Validator; + +impl Validator { + /// Validates a WDL parse tree according to a set of validation rules. + pub fn validate( + tree: Pairs<'_, R>, + rules: &[Box>], + ) -> validation::Result { + rules + .iter() + .try_for_each(|rule| rule.validate(tree.clone())) + } +} + +#[cfg(test)] +mod tests { + use pest::Parser as _; + + use crate::v1::Parser; + use crate::v1::Rule; + + use super::*; + + #[test] + fn baseline() -> std::result::Result<(), Box> { + let tree = Parser::parse( + Rule::document, + "version 1.1 +task test { + output { + String hello = \"\\.\" + } +}", + )?; + let rules = crate::v1::validation::rules(); + let err = Validator::validate(tree, rules.as_ref()).unwrap_err(); + assert_eq!( + err.to_string(), + String::from("[v1::001] invalid escape character '\\.' in string at line 4:25") + ); + + Ok(()) + } +} diff --git a/wdl-grammar/src/lib.rs b/wdl-grammar/src/lib.rs index c7b1ea6d..13ef1022 100644 --- a/wdl-grammar/src/lib.rs +++ b/wdl-grammar/src/lib.rs @@ -2,32 +2,51 @@ //! (WDL) using [`pest`](https://pest.rs). #![feature(let_chains)] +#![warn(missing_docs)] #![warn(rust_2018_idioms)] #![warn(rust_2021_compatibility)] #![warn(missing_debug_implementations)] #![warn(clippy::missing_docs_in_private_items)] #![warn(rustdoc::broken_intra_doc_links)] -#[cfg(feature = "binaries")] -use clap::ValueEnum; -use serde::Deserialize; -use serde::Serialize; +use pest::RuleType; +pub mod core; pub mod v1; +mod version; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] -#[cfg_attr(feature = "binaries", derive(ValueEnum))] -#[serde(rename_all = "lowercase")] -pub enum Version { - /// Version 1.x of the WDL specification. - #[default] - V1, +pub use version::Version; + +/// An error that can occur when parsing. +/// +/// **Note:** the contents of these errors are all boxed because the have +/// relatively large struct sizes (and, thus, are unwieldy to pass around on the +/// stack). As such, they boxed so that only a pointer to the heap is stored. +#[derive(Debug)] +pub enum Error { + /// An error occurred while linting a parse tree. + Lint(Box), + + /// An error occurred while Pest was parsing the parse tree. + Parse(Box>), + + /// An error occurred while validating a parse tree. + Validation(Box), } -impl std::fmt::Display for Version { +impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Version::V1 => write!(f, "WDL v1.x"), + Error::Lint(err) => { + write!(f, "lint error: {err}") + } + Error::Parse(err) => write!(f, "parse error:\n\n{err}"), + Error::Validation(err) => write!(f, "validation error: {err}"), } } } + +impl std::error::Error for Error {} + +/// A [`Result`](std::result::Result) with an [`Error`]. +pub type Result = std::result::Result>; diff --git a/wdl-grammar/src/main.rs b/wdl-grammar/src/main.rs index a650d4de..67e99729 100644 --- a/wdl-grammar/src/main.rs +++ b/wdl-grammar/src/main.rs @@ -106,3 +106,14 @@ async fn main() { Err(err) => eprintln!("error: {}", err), } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn verify_arguments() { + use clap::CommandFactory; + Args::command().debug_assert() + } +} diff --git a/wdl-grammar/src/v1.rs b/wdl-grammar/src/v1.rs index cfa4c3ca..fd2d686c 100644 --- a/wdl-grammar/src/v1.rs +++ b/wdl-grammar/src/v1.rs @@ -1,11 +1,85 @@ -use pest_derive::Parser; +//! WDL 1.x +use pest::Parser as _; + +use crate::core::lint::Linter; +use crate::core::validation::Validator; +use crate::core::Tree; +use crate::Error; +use crate::Result; + +pub(crate) mod lint; +mod parse; #[cfg(test)] mod tests; +pub(crate) mod validation; + +pub(crate) use parse::Parser; +pub use parse::Rule; + +/// Parses a WDL 1.x input according to the specified [Rule]. +/// +/// # Examples +/// +/// ``` +/// use wdl_grammar as grammar; +/// +/// use grammar::v1::Rule; +/// use grammar::Error; +/// +/// // A valid grammar tree. +/// +/// let tree = grammar::v1::parse(Rule::document, "version 1.1\n \n")?; +/// +/// let warnings = tree.warnings().unwrap(); +/// assert_eq!(warnings.len(), 1); +/// +/// let warning = warnings.first().unwrap(); +/// assert_eq!( +/// warning.to_string(), +/// String::from("[v1::001::Style/Low] line 2 is empty but contains spaces") +/// ); +/// +/// let pair = tree.into_inner().next().unwrap(); +/// assert!(matches!(pair.as_rule(), Rule::document)); +/// +/// // An invalid grammar tree due to pest parsing. +/// +/// let err = grammar::v1::parse(Rule::document, "Hello, world!").unwrap_err(); +/// assert!(matches!(err, Error::Parse(_))); +/// +/// // An invalid grammar tree due to our additional validation. +/// +/// let err = grammar::v1::parse( +/// Rule::document, +/// "version 1.1 +/// task test { +/// output { +/// String hello = \"\\.\" +/// } +/// }", +/// ) +/// .unwrap_err(); +/// +/// assert!(matches!(err, Error::Validation(_))); +/// +/// # Ok::<(), Box>(()) +/// ``` +pub fn parse(rule: Rule, input: &str) -> Result, Rule> { + let tree = Parser::parse(rule, input) + .map_err(Box::new) + .map_err(Error::Parse)?; -#[derive(Debug, Parser)] -#[grammar = "v1/wdl.pest"] -pub struct Parser; + let validations = validation::rules(); + Validator::validate(tree.clone(), validations.as_ref()) + .map_err(Box::new) + .map_err(Error::Validation)?; + + let lints = lint::rules(); + let warnings = Linter::lint(tree.clone(), lints.as_ref()).map_err(Error::Lint)?; + + Ok(Tree::new(tree, warnings)) +} /// Gets a rule by name. /// diff --git a/wdl-grammar/src/v1/lint.rs b/wdl-grammar/src/v1/lint.rs new file mode 100644 index 00000000..617a13f7 --- /dev/null +++ b/wdl-grammar/src/v1/lint.rs @@ -0,0 +1,13 @@ +//! Lint rules for WDL 1.x. + +mod whitespace; + +pub use whitespace::Whitespace; + +use crate::core::lint::Rule; +use crate::v1; + +/// Gets all lint rules available for WDL 1.x. +pub fn rules() -> Vec>> { + vec![Box::new(Whitespace)] +} diff --git a/wdl-grammar/src/v1/lint/whitespace.rs b/wdl-grammar/src/v1/lint/whitespace.rs new file mode 100644 index 00000000..a4d9556d --- /dev/null +++ b/wdl-grammar/src/v1/lint/whitespace.rs @@ -0,0 +1,181 @@ +//! Various lints for invalid whitespace. + +use std::num::NonZeroUsize; + +use pest::iterators::Pairs; + +use crate::core::lint; +use crate::core::lint::Group; +use crate::core::lint::Rule; +use crate::core::Code; +use crate::v1; +use crate::Version; + +/// Various lints for invalid whitespace. +#[derive(Debug)] +pub struct Whitespace; + +impl Whitespace { + /// Creates an error corresponding to a line being filled only with blank + /// spaces. + fn empty_line(&self, line_no: NonZeroUsize) -> lint::Warning + where + Self: Rule, + { + // SAFETY: this error is written so that it will always unwrap. + lint::warning::Builder::default() + .code(self.code()) + .level(lint::Level::Low) + .group(lint::Group::Style) + .message(format!("line {} is empty but contains spaces", line_no)) + .try_build() + .unwrap() + } + + /// Creates an error corresponding to a line with a trailing space. + fn trailing_space(&self, line_no: NonZeroUsize) -> lint::Warning + where + Self: Rule, + { + // SAFETY: this error is written so that it will always unwrap. + lint::warning::Builder::default() + .code(self.code()) + .level(lint::Level::Low) + .group(lint::Group::Style) + .message(format!("trailing space at the end of line {}", line_no)) + .try_build() + .unwrap() + } + + /// Creates an error corresponding to a line with a trailing tab. + fn trailing_tab(&self, line_no: NonZeroUsize) -> lint::Warning + where + Self: Rule, + { + // SAFETY: this error is written so that it will always unwrap. + lint::warning::Builder::default() + .code(self.code()) + .level(lint::Level::Low) + .group(lint::Group::Style) + .message(format!("trailing tab at the end of line {}", line_no)) + .try_build() + .unwrap() + } +} + +impl Rule for Whitespace { + fn code(&self) -> Code { + // SAFETY: this manually crafted to unwrap successfully every time. + Code::try_new(Version::V1, 1).unwrap() + } + + fn group(&self) -> lint::Group { + Group::Style + } + + fn check(&self, tree: Pairs<'_, v1::Rule>) -> lint::Result { + let mut results = Vec::new(); + + for (i, line) in tree.as_str().lines().enumerate() { + // SAFETY: this will always unwrap because we add one to the current + // enumeration index. Technically it will not unwrap for usize::MAX + // - 1, but we don't expect that any WDL document will have that + // many lines. + let line_no = NonZeroUsize::try_from(i + 1).unwrap(); + let trimmed_line = line.trim(); + + if trimmed_line.is_empty() && line != trimmed_line { + results.push(self.empty_line(line_no)); + } else if line.ends_with(' ') { + results.push(self.trailing_space(line_no)); + } else if line.ends_with('\t') { + results.push(self.trailing_tab(line_no)); + } + } + + match results.is_empty() { + true => Ok(None), + false => Ok(Some(results)), + } + } +} + +#[cfg(test)] +mod tests { + use pest::Parser as _; + + use crate::core::lint::Rule as _; + use crate::v1::parse::Parser; + use crate::v1::Rule; + + use super::*; + + #[test] + fn it_catches_an_empty_line() -> Result<(), Box> { + let tree = Parser::parse(Rule::document, "version 1.1\n \n")?; + let warning = Whitespace.check(tree)?.unwrap(); + + assert_eq!(warning.len(), 1); + assert_eq!( + warning.first().unwrap().to_string(), + "[v1::001::Style/Low] line 2 is empty but contains spaces" + ); + + Ok(()) + } + + #[test] + fn it_catches_a_trailing_space() -> Result<(), Box> { + let tree = Parser::parse(Rule::document, "version 1.1 ")?; + let warning = Whitespace.check(tree)?.unwrap(); + + assert_eq!(warning.len(), 1); + assert_eq!( + warning.first().unwrap().to_string(), + "[v1::001::Style/Low] trailing space at the end of line 1" + ); + + Ok(()) + } + + #[test] + fn it_catches_a_trailing_tab() -> Result<(), Box> { + let tree = Parser::parse(Rule::document, "version 1.1\t")?; + let warning = Whitespace.check(tree)?.unwrap(); + + assert_eq!(warning.len(), 1); + assert_eq!( + warning.first().unwrap().to_string(), + "[v1::001::Style/Low] trailing tab at the end of line 1" + ); + + Ok(()) + } + + #[test] + fn it_unwraps_a_trailing_space_error() { + let warning = Whitespace.trailing_space(NonZeroUsize::try_from(1).unwrap()); + assert_eq!( + warning.to_string(), + "[v1::001::Style/Low] trailing space at the end of line 1" + ) + } + + #[test] + fn it_unwraps_a_trailing_tab_error() { + let warning = Whitespace.trailing_tab(NonZeroUsize::try_from(1).unwrap()); + assert_eq!( + warning.to_string(), + "[v1::001::Style/Low] trailing tab at the end of line 1" + ) + } + + #[test] + fn it_unwraps_an_empty_line_error() { + let warning = Whitespace.empty_line(NonZeroUsize::try_from(1).unwrap()); + assert_eq!( + warning.to_string(), + "[v1::001::Style/Low] line 1 is empty but contains spaces" + ) + } +} diff --git a/wdl-grammar/src/v1/parse.rs b/wdl-grammar/src/v1/parse.rs new file mode 100644 index 00000000..e79acd8c --- /dev/null +++ b/wdl-grammar/src/v1/parse.rs @@ -0,0 +1,15 @@ +//! WDL 1.x parsing. +#![allow(missing_docs)] + +use pest_derive::Parser; + +/// A Pest [`pest::Parser`] for the WDL 1.x grammar. +/// +/// **Note:** this [`Parser`] is not exposed directly to the user. Instead, you +/// should use the provided [`parse`] method, which performs additional +/// validation outside of the PEG grammar itself (the choice was made to do some +/// validation outside of the PEG grammar to give users better error messages in +/// some use cases). +#[derive(Debug, Parser)] +#[grammar = "v1/wdl.pest"] +pub(crate) struct Parser; diff --git a/wdl-grammar/src/v1/tests/expression/core.rs b/wdl-grammar/src/v1/tests/expression/core.rs index 17ca8451..1a6443c1 100644 --- a/wdl-grammar/src/v1/tests/expression/core.rs +++ b/wdl-grammar/src/v1/tests/expression/core.rs @@ -18,45 +18,49 @@ fn it_successfully_parses_an_array_literal_with_spaces_inside() { parser: WdlParser, input: "[if a then b else c, \"Hello, world!\"]", rule: Rule::core, - tokens: [array_literal(0, 37, [ - expression(1, 19, [ - r#if(1, 19, [ - WHITESPACE(3, 4, [ - SPACE(3, 4), - ]), - expression(4, 5, [ - identifier(4, 5), - ]), - WHITESPACE(5, 6, [ - SPACE(5, 6), - ]), - WHITESPACE(10, 11, [ - SPACE(10, 11), - ]), - expression(11, 12, [ - identifier(11, 12), - ]), - WHITESPACE(12, 13, [ - SPACE(12, 13), - ]), - WHITESPACE(17, 18, [ - SPACE(17, 18), + tokens: [ + // `[if a then b else c, "Hello, world!"]` + array_literal(0, 37, [ + // `if a then b else c` + expression(1, 19, [ + // `if a then b else c` + r#if(1, 19, [ + WHITESPACE(3, 4, [SPACE(3, 4)]), + // `a` + expression(4, 5, [ + // `a` + identifier(4, 5), + ]), + WHITESPACE(5, 6, [SPACE(5, 6)]), + WHITESPACE(10, 11, [SPACE(10, 11)]), + // `b` + expression(11, 12, [ + // `b` + identifier(11, 12), + ]), + WHITESPACE(12, 13, [SPACE(12, 13)]), + WHITESPACE(17, 18, [SPACE(17, 18)]), + // `c` + expression(18, 19, [ + // `c` + identifier(18, 19), + ]), + ]), ]), - expression(18, 19, [ - identifier(18, 19), + // `,` + COMMA(19, 20), + WHITESPACE(20, 21, [SPACE(20, 21)]), + // `"Hello, world!"` + expression(21, 36, [ + // `"Hello, world!"` + string(21, 36, [ + // `"` + double_quote(21, 22), + // `Hello, world!` + string_literal_contents(22, 35), + ]), ]), - ]), - ]), - COMMA(19, 20), - WHITESPACE(20, 21, [ - SPACE(20, 21), - ]), - expression(21, 36, [ - string(21, 36, [ - double_quoted_string(21, 36), - ]), - ]), - ]) + ]) ] } } diff --git a/wdl-grammar/src/v1/tests/expression/core/array_literal.rs b/wdl-grammar/src/v1/tests/expression/core/array_literal.rs index d19bc404..8753ceaf 100644 --- a/wdl-grammar/src/v1/tests/expression/core/array_literal.rs +++ b/wdl-grammar/src/v1/tests/expression/core/array_literal.rs @@ -33,44 +33,60 @@ fn it_fails_to_parse_an_array_literal_with_spaces_outside_the_input() { fn it_successfully_parses_an_array_literal() { parses_to! { parser: WdlParser, - input: "[if a then b else c,\"Hello, world!\"] ", + input: "[if a then b else c,\"Hello, world!\"]", rule: Rule::array_literal, - tokens: [array_literal(0, 36, [ - expression(1, 19, [ - r#if(1, 19, [ - WHITESPACE(3, 4, [ - SPACE(3, 4), - ]), - expression(4, 5, [ - identifier(4, 5), - ]), - WHITESPACE(5, 6, [ - SPACE(5, 6), - ]), - WHITESPACE(10, 11, [ - SPACE(10, 11), - ]), - expression(11, 12, [ - identifier(11, 12), - ]), - WHITESPACE(12, 13, [ - SPACE(12, 13), - ]), - WHITESPACE(17, 18, [ - SPACE(17, 18), - ]), - expression(18, 19, [ - identifier(18, 19), + tokens: [ + // `[if a then b else c,"Hello, world!"]` + array_literal(0, 36, [ + // `if a then b else c` + expression(1, 19, [ + // `if a then b else c` + r#if(1, 19, [ + WHITESPACE(3, 4, [ + SPACE(3, 4), + ]), + // `a` + expression(4, 5, [ + // `a` + identifier(4, 5), + ]), + WHITESPACE(5, 6, [ + SPACE(5, 6), + ]), + WHITESPACE(10, 11, [ + SPACE(10, 11), + ]), + // `b` + expression(11, 12, [ + // `b` + identifier(11, 12), + ]), + WHITESPACE(12, 13, [ + SPACE(12, 13), + ]), + WHITESPACE(17, 18, [ + SPACE(17, 18), + ]), + // `c` + expression(18, 19, [ + // `c` + identifier(18, 19), + ]), ]), ]), - ]), - COMMA(19, 20), - expression(20, 35, [ - string(20, 35, [ - double_quoted_string(20, 35), + // `,` + COMMA(19, 20), + // `"Hello, world!"` + expression(20, 35, [ + // `"Hello, world!"` + string(20, 35, [ + // `"` + double_quote(20, 21), + // `Hello, world!` + string_literal_contents(21, 34), + ]), ]), - ]), - ]) + ]) ] } } @@ -81,13 +97,19 @@ fn it_successfully_parses_an_array_literal_without_the_trailing_space() { parser: WdlParser, input: "[if a then b else c, \"Hello, world!\"] ", rule: Rule::array_literal, - tokens: [array_literal(0, 37, [ + tokens: [ + // `[if a then b else c, "Hello, world!"]` + array_literal(0, 37, [ + // `if a then b else c` expression(1, 19, [ + // `if a then b else c` r#if(1, 19, [ WHITESPACE(3, 4, [ SPACE(3, 4), ]), + // `a` expression(4, 5, [ + // `a` identifier(4, 5), ]), WHITESPACE(5, 6, [ @@ -96,7 +118,9 @@ fn it_successfully_parses_an_array_literal_without_the_trailing_space() { WHITESPACE(10, 11, [ SPACE(10, 11), ]), + // `b` expression(11, 12, [ + // `b` identifier(11, 12), ]), WHITESPACE(12, 13, [ @@ -105,18 +129,26 @@ fn it_successfully_parses_an_array_literal_without_the_trailing_space() { WHITESPACE(17, 18, [ SPACE(17, 18), ]), + // `c` expression(18, 19, [ + // `c` identifier(18, 19), ]), ]), ]), + // `,` COMMA(19, 20), WHITESPACE(20, 21, [ SPACE(20, 21), ]), + // `"Hello, world!"` expression(21, 36, [ + // `"Hello, world!"` string(21, 36, [ - double_quoted_string(21, 36), + // `"` + double_quote(21, 22), + // `Hello, world!` + string_literal_contents(22, 35), ]), ]), ]) @@ -130,45 +162,61 @@ fn it_successfully_parses_an_array_literal_with_spaces_inside() { parser: WdlParser, input: "[if a then b else c, \"Hello, world!\"]", rule: Rule::array_literal, - tokens: [array_literal(0, 37, [ - expression(1, 19, [ - r#if(1, 19, [ - WHITESPACE(3, 4, [ - SPACE(3, 4), - ]), - expression(4, 5, [ - identifier(4, 5), - ]), - WHITESPACE(5, 6, [ - SPACE(5, 6), - ]), - WHITESPACE(10, 11, [ - SPACE(10, 11), - ]), - expression(11, 12, [ - identifier(11, 12), - ]), - WHITESPACE(12, 13, [ - SPACE(12, 13), - ]), - WHITESPACE(17, 18, [ - SPACE(17, 18), - ]), - expression(18, 19, [ - identifier(18, 19), + tokens: [ + // `[if a then b else c, "Hello, world!"]` + array_literal(0, 37, [ + // `if a then b else c` + expression(1, 19, [ + // `if a then b else c` + r#if(1, 19, [ + WHITESPACE(3, 4, [ + SPACE(3, 4), + ]), + // `a` + expression(4, 5, [ + // `a` + identifier(4, 5), + ]), + WHITESPACE(5, 6, [ + SPACE(5, 6), + ]), + WHITESPACE(10, 11, [ + SPACE(10, 11), + ]), + // `b` + expression(11, 12, [ + // `b` + identifier(11, 12), + ]), + WHITESPACE(12, 13, [ + SPACE(12, 13), + ]), + WHITESPACE(17, 18, [ + SPACE(17, 18), + ]), + // `c` + expression(18, 19, [ + // `c` + identifier(18, 19), + ]), ]), ]), - ]), - COMMA(19, 20), - WHITESPACE(20, 21, [ - SPACE(20, 21), - ]), - expression(21, 36, [ - string(21, 36, [ - double_quoted_string(21, 36), + // `,` + COMMA(19, 20), + WHITESPACE(20, 21, [ + SPACE(20, 21), ]), - ]), - ]) + // `"Hello, world!"` + expression(21, 36, [ + // `"Hello, world!"` + string(21, 36, [ + // `"` + double_quote(21, 22), + // `Hello, world!` + string_literal_contents(22, 35), + ]), + ]), + ]) ] } } diff --git a/wdl-grammar/src/v1/tests/literal.rs b/wdl-grammar/src/v1/tests/literal.rs index d93c1192..51f2841a 100644 --- a/wdl-grammar/src/v1/tests/literal.rs +++ b/wdl-grammar/src/v1/tests/literal.rs @@ -134,7 +134,13 @@ fn it_successfully_parses_an_empty_double_quoted_string() { parser: WdlParser, input: "\"\"", rule: Rule::literal, - tokens: [string(0, 2, [double_quoted_string(0, 2)])] + tokens: [ + // `""` + string(0, 2, [ + // `"` + double_quote(0, 1), + ]) + ] } } @@ -144,7 +150,13 @@ fn it_successfully_parses_an_empty_single_quoted_string() { parser: WdlParser, input: "''", rule: Rule::literal, - tokens: [string(0, 2, [single_quoted_string(0, 2)])] + tokens: [ + // `''` + string(0, 2, [ + // `'` + single_quote(0, 1), + ]) + ] } } @@ -154,7 +166,15 @@ fn it_successfully_parses_a_double_quoted_string_with_a_unicode_character() { parser: WdlParser, input: "\"πŸ˜€\"", rule: Rule::literal, - tokens: [string(0, 6, [double_quoted_string(0, 6)])] + tokens: [ + // `"πŸ˜€"` + string(0, 6, [ + // `"` + double_quote(0, 1), + // `πŸ˜€` + string_literal_contents(1, 5), + ]) + ] } } @@ -164,7 +184,15 @@ fn it_successfully_parses_a_single_quoted_string_with_a_unicode_character() { parser: WdlParser, input: "'πŸ˜€'", rule: Rule::literal, - tokens: [string(0, 6, [single_quoted_string(0, 6)])] + tokens: [ + // `'πŸ˜€'` + string(0, 6, [ + // `'` + single_quote(0, 1), + // `πŸ˜€` + string_literal_contents(1, 5), + ]) + ] } } @@ -174,7 +202,15 @@ fn it_successfully_parses_a_double_quoted_string() { parser: WdlParser, input: "\"Hello, world!\"", rule: Rule::literal, - tokens: [string(0, 15, [double_quoted_string(0, 15)])] + tokens: [ + // `"Hello, world!"` + string(0, 15, [ + // `"` + double_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } @@ -184,7 +220,15 @@ fn it_successfully_parses_a_single_quoted_string() { parser: WdlParser, input: "'Hello, world!'", rule: Rule::literal, - tokens: [string(0, 15, [single_quoted_string(0, 15)])] + tokens: [ + // `'Hello, world!'` + string(0, 15, [ + // `'` + single_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } diff --git a/wdl-grammar/src/v1/tests/primitives/char.rs b/wdl-grammar/src/v1/tests/primitives/char.rs index ad7c7841..e9d20507 100644 --- a/wdl-grammar/src/v1/tests/primitives/char.rs +++ b/wdl-grammar/src/v1/tests/primitives/char.rs @@ -16,13 +16,7 @@ fn it_fails_to_parse_an_empty_char_special() { parser: WdlParser, input: "", rule: Rule::char_special, - positives: vec![ - Rule::char_escaped_invalid, - Rule::char_escaped, - Rule::char_octal, - Rule::char_hex, - Rule::char_unicode, - ], + positives: vec![Rule::char_special], negatives: vec![], pos: 0 } @@ -34,7 +28,13 @@ fn it_successfully_parses_char_escaped() { parser: WdlParser, input: "\\\\", rule: Rule::char_special, - tokens: [char_escaped(0, 2)] + tokens: [ + // `\\` + char_special(0, 2, [ + // `\\` + char_escaped(0, 2), + ]) + ] } } @@ -44,7 +44,13 @@ fn it_successfully_parses_char_hex() { parser: WdlParser, input: "\\xFF", rule: Rule::char_special, - tokens: [char_hex(0, 4)] + tokens: [ + // `\xFF` + char_special(0, 4, [ + // `\xFF` + char_hex(0, 4), + ]) + ] } } @@ -54,7 +60,13 @@ fn it_successfully_parses_char_octal() { parser: WdlParser, input: "\\123", rule: Rule::char_special, - tokens: [char_octal(0, 4)] + tokens: [ + // `\123` + char_special(0, 4, [ + // `\123` + char_octal(0, 4), + ]) + ] } } @@ -64,27 +76,63 @@ fn it_successfully_parses_char_unicode() { parser: WdlParser, input: "\\uFFFF", rule: Rule::char_special, - tokens: [char_unicode(0, 6)] + tokens: [ + // `\uFFFF` + char_special(0, 6, [ + // `\uFFFF` + char_unicode(0, 6, [ + // `\uFFFF` + char_unicode_four(0, 6), + ]), + ]) + ] } parses_to! { parser: WdlParser, - input: "\\uFFFF", + input: "\\UFFFF", rule: Rule::char_special, - tokens: [char_unicode(0, 6)] + tokens: [ + // `\UFFFF` + char_special(0, 6, [ + // `\UFFFF` + char_unicode(0, 6, [ + // `\UFFFF` + char_unicode_four(0, 6), + ]), + ]) + ] } parses_to! { parser: WdlParser, input: "\\uFFFFFFFF", rule: Rule::char_special, - tokens: [char_unicode(0, 10)] + tokens: [ + // `\uFFFFFFFF` + char_special(0, 10, [ + // `\uFFFFFFFF` + char_unicode(0, 10, [ + // `\uFFFFFFFF` + char_unicode_eight(0, 10), + ]), + ]) + ] } parses_to! { parser: WdlParser, - input: "\\uFFFFFFFF", + input: "\\UFFFFFFFF", rule: Rule::char_special, - tokens: [char_unicode(0, 10)] + tokens: [ + // `\UFFFFFFFF` + char_special(0, 10, [ + // `\UFFFFFFFF` + char_unicode(0, 10, [ + // `\UFFFFFFFF` + char_unicode_eight(0, 10), + ]), + ]) + ] } } diff --git a/wdl-grammar/src/v1/tests/primitives/char/unicode.rs b/wdl-grammar/src/v1/tests/primitives/char/unicode.rs index d3ae95ec..b520d309 100644 --- a/wdl-grammar/src/v1/tests/primitives/char/unicode.rs +++ b/wdl-grammar/src/v1/tests/primitives/char/unicode.rs @@ -80,14 +80,26 @@ fn it_successfully_parses_char_unicode_with_four_hex_characters() { parser: WdlParser, input: "\\uFFFF", rule: Rule::char_unicode, - tokens: [char_unicode(0, 6)] + tokens: [ + // `\uFFFF` + char_unicode(0, 6, [ + // `\uFFFF` + char_unicode_four(0, 6), + ]) + ] } parses_to! { parser: WdlParser, input: "\\UFFFF", rule: Rule::char_unicode, - tokens: [char_unicode(0, 6)] + tokens: [ + // `\UFFFF` + char_unicode(0, 6, [ + // `\UFFFF` + char_unicode_four(0, 6), + ]) + ] } } @@ -97,14 +109,26 @@ fn it_successfully_parses_char_unicode_with_eight_hex_characters() { parser: WdlParser, input: "\\uFFFFFFFF", rule: Rule::char_unicode, - tokens: [char_unicode(0, 10)] + tokens: [ + // `\uFFFFFFFF` + char_unicode(0, 10, [ + // `\uFFFFFFFF` + char_unicode_eight(0, 10), + ]) + ] } parses_to! { parser: WdlParser, input: "\\UFFFFFFFF", rule: Rule::char_unicode, - tokens: [char_unicode(0, 10)] + tokens: [ + // `\UFFFFFFFF` + char_unicode(0, 10, [ + // `\UFFFFFFFF` + char_unicode_eight(0, 10), + ]) + ] } } diff --git a/wdl-grammar/src/v1/tests/primitives/string.rs b/wdl-grammar/src/v1/tests/primitives/string.rs index 8c948533..28db8fef 100644 --- a/wdl-grammar/src/v1/tests/primitives/string.rs +++ b/wdl-grammar/src/v1/tests/primitives/string.rs @@ -38,7 +38,7 @@ fn it_fails_to_parse_a_single_double_quote() { parser: WdlParser, input: "\"", rule: Rule::string, - positives: vec![Rule::char_escaped], + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 1 } @@ -50,7 +50,7 @@ fn it_fails_to_parse_a_single_single_quote() { parser: WdlParser, input: "\'", rule: Rule::string, - positives: vec![Rule::char_escaped], + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 1 } @@ -62,7 +62,13 @@ fn it_successfully_parses_an_empty_double_quoted_string() { parser: WdlParser, input: "\"\"", rule: Rule::string, - tokens: [string(0, 2, [double_quoted_string(0, 2)])] + tokens: [ + // `""` + string(0, 2, [ + // `"` + double_quote(0, 1), + ]) + ] } } @@ -72,7 +78,13 @@ fn it_successfully_parses_an_empty_single_quoted_string() { parser: WdlParser, input: "''", rule: Rule::string, - tokens: [string(0, 2, [single_quoted_string(0, 2)])] + tokens: [ + // `''` + string(0, 2, [ + // `'` + single_quote(0, 1), + ]) + ] } } @@ -82,7 +94,15 @@ fn it_successfully_parses_a_double_quoted_string_with_a_unicode_character() { parser: WdlParser, input: "\"πŸ˜€\"", rule: Rule::string, - tokens: [string(0, 6, [double_quoted_string(0, 6)])] + tokens: [ + // `"πŸ˜€"` + string(0, 6, [ + // `"` + double_quote(0, 1), + // `πŸ˜€` + string_literal_contents(1, 5), + ]) + ] } } @@ -92,7 +112,15 @@ fn it_successfully_parses_a_single_quoted_string_with_a_unicode_character() { parser: WdlParser, input: "'πŸ˜€'", rule: Rule::string, - tokens: [string(0, 6, [single_quoted_string(0, 6)])] + tokens: [ + // `'πŸ˜€'` + string(0, 6, [ + // `'` + single_quote(0, 1), + // `πŸ˜€` + string_literal_contents(1, 5), + ]) + ] } } @@ -102,7 +130,15 @@ fn it_successfully_parses_a_double_quoted_string() { parser: WdlParser, input: "\"Hello, world!\"", rule: Rule::string, - tokens: [string(0, 15, [double_quoted_string(0, 15)])] + tokens: [ + // `"Hello, world!"` + string(0, 15, [ + // `"` + double_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } @@ -112,6 +148,14 @@ fn it_successfully_parses_a_single_quoted_string() { parser: WdlParser, input: "'Hello, world!'", rule: Rule::string, - tokens: [string(0, 15, [single_quoted_string(0, 15)])] + tokens: [ + // `'Hello, world!'` + string(0, 15, [ + // `'` + single_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } diff --git a/wdl-grammar/src/v1/tests/primitives/string/double_quoted_string.rs b/wdl-grammar/src/v1/tests/primitives/string/double_quoted_string.rs index 23e3bcfb..5a827c78 100644 --- a/wdl-grammar/src/v1/tests/primitives/string/double_quoted_string.rs +++ b/wdl-grammar/src/v1/tests/primitives/string/double_quoted_string.rs @@ -10,20 +10,8 @@ fn it_fails_to_parse_an_empty_double_quoted_string() { fails_with! { parser: WdlParser, input: "", - rule: Rule::double_quoted_string, - positives: vec![Rule::double_quoted_string], - negatives: vec![], - pos: 0 - } -} - -#[test] -fn it_fails_to_parse_single_quoted_string() { - fails_with! { - parser: WdlParser, - input: "'hello, world'", - rule: Rule::double_quoted_string, - positives: vec![Rule::double_quoted_string], + rule: Rule::string, + positives: vec![Rule::string], negatives: vec![], pos: 0 } @@ -34,10 +22,8 @@ fn it_fails_to_parse_a_single_double_quote() { fails_with! { parser: WdlParser, input: "\"", - rule: Rule::double_quoted_string, - positives: vec![ - Rule::char_escaped - ], + rule: Rule::string, + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 1 } @@ -48,10 +34,8 @@ fn it_fails_to_parse_a_string_with_a_newline() { fails_with! { parser: WdlParser, input: "\"Hello,\nworld!\"", - rule: Rule::double_quoted_string, - positives: vec![ - Rule::char_escaped - ], + rule: Rule::string, + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 7 } @@ -62,22 +46,34 @@ fn it_parses_an_empty_double_quoted_string() { parses_to! { parser: WdlParser, input: "\"\"", - rule: Rule::double_quoted_string, - tokens: [double_quoted_string(0, 2)] + rule: Rule::string, + tokens: [ + // `""` + string(0, 2, [ + // `"` + double_quote(0, 1), + ]) + ] } } #[test] fn it_successfully_parses_the_first_two_double_quotes() { - // This test will succeed, as `""`` matches the pattern, but the last double + // This test will succeed, as `""` matches the pattern, but the last double // quote will not be included. This is fine for parsing, as the now // unmatched `"` will throw an error. parses_to! { parser: WdlParser, input: "\"\"\"", - rule: Rule::double_quoted_string, - tokens: [double_quoted_string(0, 2)] + rule: Rule::string, + tokens: [ + // `""` + string(0, 2, [ + // `"` + double_quote(0, 1), + ]) + ] } } @@ -86,7 +82,15 @@ fn it_parses_a_double_quoted_string() { parses_to! { parser: WdlParser, input: "\"Hello, world!\"", - rule: Rule::double_quoted_string, - tokens: [double_quoted_string(0, 15)] + rule: Rule::string, + tokens: [ + // `"Hello, world!"` + string(0, 15, [ + // `"` + double_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } diff --git a/wdl-grammar/src/v1/tests/primitives/string/single_quoted_string.rs b/wdl-grammar/src/v1/tests/primitives/string/single_quoted_string.rs index 8eec5950..4f6037b0 100644 --- a/wdl-grammar/src/v1/tests/primitives/string/single_quoted_string.rs +++ b/wdl-grammar/src/v1/tests/primitives/string/single_quoted_string.rs @@ -10,34 +10,20 @@ fn it_fails_to_parse_an_empty_single_quoted_string() { fails_with! { parser: WdlParser, input: "", - rule: Rule::single_quoted_string, - positives: vec![Rule::single_quoted_string], + rule: Rule::string, + positives: vec![Rule::string], negatives: vec![], pos: 0 } } #[test] -fn it_fails_to_parse_single_quoted_string() { - fails_with! { - parser: WdlParser, - input: "\"hello, world\"", - rule: Rule::single_quoted_string, - positives: vec![Rule::single_quoted_string], - negatives: vec![], - pos: 0 - } -} - -#[test] -fn it_fails_to_parse_a_single_double_quote() { +fn it_fails_to_parse_a_single_single_quote() { fails_with! { parser: WdlParser, input: "\'", - rule: Rule::single_quoted_string, - positives: vec![ - Rule::char_escaped - ], + rule: Rule::string, + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 1 } @@ -48,10 +34,8 @@ fn it_fails_to_parse_a_string_with_a_newline() { fails_with! { parser: WdlParser, input: "'Hello,\nworld!'", - rule: Rule::single_quoted_string, - positives: vec![ - Rule::char_escaped - ], + rule: Rule::string, + positives: vec![Rule::char_special, Rule::string_expression_placeholder_start], negatives: vec![], pos: 7 } @@ -62,22 +46,34 @@ fn it_parses_an_empty_single_quoted_string() { parses_to! { parser: WdlParser, input: "''", - rule: Rule::single_quoted_string, - tokens: [single_quoted_string(0, 2)] + rule: Rule::string, + tokens: [ + // `''` + string(0, 2, [ + // `'` + single_quote(0, 1), + ]) + ] } } #[test] fn it_successfully_parses_the_first_two_double_quotes() { - // This test will succeed, as `""`` matches the pattern, but the last double + // This test will succeed, as `''` matches the pattern, but the last single // quote will not be included. This is fine for parsing, as the now - // unmatched `"` will throw an error. + // unmatched `'` will throw an error. parses_to! { parser: WdlParser, input: "'''", - rule: Rule::single_quoted_string, - tokens: [single_quoted_string(0, 2)] + rule: Rule::string, + tokens: [ + // `''` + string(0, 2, [ + // `'` + single_quote(0, 1), + ]) + ] } } @@ -86,7 +82,15 @@ fn it_parses_a_single_quoted_string() { parses_to! { parser: WdlParser, input: "'Hello, world!'", - rule: Rule::single_quoted_string, - tokens: [single_quoted_string(0, 15)] + rule: Rule::string, + tokens: [ + // `'Hello, world!'` + string(0, 15, [ + // `'` + single_quote(0, 1), + // `Hello, world!` + string_literal_contents(1, 14), + ]) + ] } } diff --git a/wdl-grammar/src/v1/validation.rs b/wdl-grammar/src/v1/validation.rs new file mode 100644 index 00000000..e5353708 --- /dev/null +++ b/wdl-grammar/src/v1/validation.rs @@ -0,0 +1,13 @@ +//! Validation rules for WDL 1.x. + +mod invalid_escape_character; + +pub use invalid_escape_character::InvalidEscapeCharacter; + +use crate::core::validation::Rule; +use crate::v1; + +/// Gets all validation rules available for WDL 1.x. +pub fn rules() -> Vec>> { + vec![Box::new(InvalidEscapeCharacter)] +} diff --git a/wdl-grammar/src/v1/validation/invalid_escape_character.rs b/wdl-grammar/src/v1/validation/invalid_escape_character.rs new file mode 100644 index 00000000..4eb364e1 --- /dev/null +++ b/wdl-grammar/src/v1/validation/invalid_escape_character.rs @@ -0,0 +1,63 @@ +//! Invalid escape character(s) within a string. + +use pest::iterators::Pairs; + +use crate::core::validation; +use crate::core::validation::Rule; +use crate::core::Code; +use crate::v1; +use crate::Version; + +/// An invalid escape character within a string. +#[derive(Debug)] +pub struct InvalidEscapeCharacter; + +impl Rule for InvalidEscapeCharacter { + fn code(&self) -> Code { + // SAFETY: this manually crafted to unwrap successfully every time. + Code::try_new(Version::V1, 1).unwrap() + } + + fn validate(&self, tree: Pairs<'_, v1::Rule>) -> validation::Result { + tree.flatten().try_for_each(|node| match node.as_rule() { + v1::Rule::char_escaped_invalid => { + let (line_no, col) = node.line_col(); + Err(validation::error::Builder::default() + .code(self.code()) + .message(format!( + "invalid escape character '{}' in string at line {}:{}", + node.as_str(), + line_no, + col + )) + .try_build() + .unwrap()) + } + _ => Ok(()), + }) + } +} + +#[cfg(test)] +mod tests { + use pest::Parser as _; + + use crate::core::validation::Rule as _; + use crate::v1::parse::Parser; + use crate::v1::Rule; + + use super::*; + + #[test] + fn it_catches_an_invalid_escape_character() -> Result<(), Box> { + let tree = Parser::parse(Rule::string, "\"\\.\"")?; + let error = InvalidEscapeCharacter.validate(tree).unwrap_err(); + + assert_eq!( + error.to_string(), + String::from("[v1::001] invalid escape character '\\.' in string at line 1:2") + ); + + Ok(()) + } +} diff --git a/wdl-grammar/src/v1/wdl.pest b/wdl-grammar/src/v1/wdl.pest index 3c1020e6..37c074f2 100644 --- a/wdl-grammar/src/v1/wdl.pest +++ b/wdl-grammar/src/v1/wdl.pest @@ -28,9 +28,9 @@ COMMENT = { "#" ~ (!LINE_ENDING ~ ANY)* } // Atoms // // =======// -OPTION = { "?" } -ONE_OR_MORE = { "+" } -COMMA = { "," } +OPTION = { "?" } +ONE_OR_MORE = { "+" } +COMMA = { "," } // ==========// // Literals // @@ -74,37 +74,47 @@ number = _{ float | integer } // parsing of these invalid string characters so that we can return lint errors // for them (rather than fail parsing, which returns a relatively unhelpful // error message at the time of writing). -// +// // If you wish to remove this leniency, you can remove the // `char_escaped_invalid` rule and its inclusion in the `char_escaped` rule. -char_escaped_invalid = @{ "\\" ~ ANY } -char_escaped = ${ "\\" ~ ("\\" | "\"" | "\'" | "n" | "r" | "b" | "t" | "f" | "a" | "v" | "?") } -char_octal = @{ "\\" ~ ASCII_OCT_DIGIT{1, 3} ~ !ASCII_OCT_DIGIT } -char_hex = @{ "\\x" ~ ASCII_HEX_DIGIT+ } -char_unicode_four = @{ "\\" ~ ("u" | "U") ~ ASCII_HEX_DIGIT{4} ~ !ASCII_HEX_DIGIT } -char_unicode_eight = @{ "\\" ~ ("u" | "U") ~ ASCII_HEX_DIGIT{8} ~ !ASCII_HEX_DIGIT } -char_unicode = @{ char_unicode_four | char_unicode_eight } -char_special = _{ char_escaped | char_hex | char_unicode | char_octal | char_escaped_invalid } -char_other_double_quote = @{ !("\\" | "\"" | "\n") ~ ANY } -char_other_single_quote = @{ !("\\" | "\'" | "\n") ~ ANY } +char_escaped_invalid = @{ "\\" ~ ANY } +char_escaped = ${ "\\" ~ ("\\" | "\"" | "\'" | "n" | "r" | "b" | "t" | "f" | "a" | "v" | "?") } +char_octal = @{ "\\" ~ ASCII_OCT_DIGIT{1, 3} ~ !ASCII_OCT_DIGIT } +char_hex = @{ "\\x" ~ ASCII_HEX_DIGIT+ } +char_unicode_four = @{ "\\" ~ ("u" | "U") ~ ASCII_HEX_DIGIT{4} ~ !ASCII_HEX_DIGIT } +char_unicode_eight = @{ "\\" ~ ("u" | "U") ~ ASCII_HEX_DIGIT{8} ~ !ASCII_HEX_DIGIT } +char_unicode = ${ char_unicode_four | char_unicode_eight } +char_special = ${ char_escaped | char_hex | char_unicode | char_octal | char_escaped_invalid } +char_other = @{ !("\\" | "\n") ~ ANY } // String. -double_quoted_string = @{ - "\"" ~ (char_special | char_other_double_quote)* ~ "\"" + +double_quote = { "\"" } +single_quote = { "\'" } +string_expression_placeholder_start = { "~{" | "${" } +string_expression_placeholder_end = { "}" } + +string_literal_contents = @{ + // NOTE: the `PEEK` here looks at what quoting is being used (double or + // single) and denies any use of the character in literal contents. The + // presence of `PEEK` implies that this rule must be embedded within a rule + // that `PUSH`es these tokens on the stack. + (!string_expression_placeholder_start ~ !PEEK ~ (char_special | char_other))+ } -single_quoted_string = @{ - "\'" ~ (char_special | char_other_single_quote)* ~ "\'" + +string_expression_placeholder_expression = { string_placeholder | expression } + +string_placeholder = ${ + string_expression_placeholder_start ~ placeholder_options* ~ string_expression_placeholder_expression ~ string_expression_placeholder_end } -// For strings, whether the string is double-quoted or single-quote is not -// important context when parsing. HOWEVER, we would like to return whether the -// strings are single or double quoted for linting purposes. Thus, the type of -// quoting is retained. -// // NOTE: all rules included in `string` must be marked as atomic (`@`) or -// compound-atomic (`$`). This is not checked by the compiler, so you must +// compound-atomic (`$`). This is because we don't want rules eating up +// whitespace within a string. This is not checked by the compiler, so you must // ensure it remains true. -string = { (double_quoted_string | single_quoted_string) } +string = ${ + PUSH(double_quote | single_quote) ~ (string_placeholder | string_literal_contents)* ~ POP +} // Identifier. identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } @@ -196,10 +206,7 @@ core = _{ // As such, you will see that none of the permutations of the rule below end in // an optional token. That is by design to avoid the problem above. expression = ${ - prefix* ~ (WHITESPACE | COMMENT)* ~ core ~ (WHITESPACE | COMMENT)* ~ postfix* ~ ( - (WHITESPACE | COMMENT)* ~ infix ~ (WHITESPACE | COMMENT)* ~ prefix* ~ (WHITESPACE | COMMENT)* ~ core ~ (WHITESPACE | COMMENT)* ~ postfix+ | - (WHITESPACE | COMMENT)* ~ infix ~ (WHITESPACE | COMMENT)* ~ prefix* ~ (WHITESPACE | COMMENT)* ~ core - )+ + prefix* ~ (WHITESPACE | COMMENT)* ~ core ~ (WHITESPACE | COMMENT)* ~ postfix* ~ ((WHITESPACE | COMMENT)* ~ infix ~ (WHITESPACE | COMMENT)* ~ prefix* ~ (WHITESPACE | COMMENT)* ~ core ~ (WHITESPACE | COMMENT)* ~ postfix+ | (WHITESPACE | COMMENT)* ~ infix ~ (WHITESPACE | COMMENT)* ~ prefix* ~ (WHITESPACE | COMMENT)* ~ core)+ | prefix* ~ (WHITESPACE | COMMENT)* ~ core ~ (WHITESPACE | COMMENT)* ~ postfix+ | prefix* ~ (WHITESPACE | COMMENT)* ~ core } @@ -209,19 +216,14 @@ expression = ${ // NOTE: techically the spec calls the optional `+` the "non-empty" operator. // Since we have already defined this as the "one or more" operator and they // mean effectively the same thing, I've just kept this nomeclature. -array_type = ${ - "Array" ~ - (WHITESPACE | COMMENT)* ~ - ( - ("[" ~ (WHITESPACE | COMMENT)* ~ wdl_type_inner ~ (WHITESPACE | COMMENT)* ~ "]" ~ ONE_OR_MORE) | - ("[" ~ (WHITESPACE | COMMENT)* ~ wdl_type_inner ~ (WHITESPACE | COMMENT)* ~ "]") - ) +array_type = ${ + "Array" ~ (WHITESPACE | COMMENT)* ~ (("[" ~ (WHITESPACE | COMMENT)* ~ wdl_type_inner ~ (WHITESPACE | COMMENT)* ~ "]" ~ ONE_OR_MORE) | ("[" ~ (WHITESPACE | COMMENT)* ~ wdl_type_inner ~ (WHITESPACE | COMMENT)* ~ "]")) } // NOTE: The `map_type` and `pair_type` rules **must** be marked as non-atomic, as the // `unbound_declaration` and `bound_declaration` rules that use them are marked // as compound-atomic. -map_type = !{ "Map" ~ "[" ~ wdl_type_inner ~ COMMA ~ wdl_type_inner ~ "]" } -pair_type = !{ "Pair" ~ "[" ~ wdl_type_inner ~ COMMA ~ wdl_type_inner ~ "]" } +map_type = !{ "Map" ~ "[" ~ wdl_type_inner ~ COMMA ~ wdl_type_inner ~ "]" } +pair_type = !{ "Pair" ~ "[" ~ wdl_type_inner ~ COMMA ~ wdl_type_inner ~ "]" } string_type = { "String" } file_type = { "File" } bool_type = { "Boolean" } @@ -260,7 +262,7 @@ wdl_type_inner = ${ // **must** be a whitespace between the `wdl_type` and the `identifier` for // `bound_declaration`s and `unbound_declaration`s. Else, you get weird things // happening in these rules. -// +// // For example, when considering `IntermediateFiles`, `Int` matching the integer // `wdl_type` and `ermediateFiles` matching the `identifier`. wdl_type = ${ @@ -379,74 +381,73 @@ task_input = { common_input } // Task output. task_output = { common_output } -// Task expression placeholders. -// +// Expression placeholder options. +// +// Expression placeholder options can be used anywhere where expression +// placeholders are evaluated, including (but not limited to): +// +// * Strings +// // DIVERGE: the specification states that any expression placeholder conforms to // the pattern `option="value"`. However, it is clear from the examples in the // spec that single quoted strings are also allowed. Thus, we allow for either // single or double quoted strings hereβ€”we will leave the selection of which to // use up to a linting question. -// +// // LENIENT: the specification is pretty clear that no spaces are allowed between // the option and the equals sign or the equals sign and the value. However, // many tools choose to allow spaces here. As such, we will allow spaces in // between these elements, but we will throw a lint warning for these cases. -expression_placeholder_sep = { "sep" ~ "=" ~ string } -expression_placeholder_boolean = { boolean ~ "=" ~ string } -expression_placeholder_default = { "default" ~ "=" ~ string } - -expression_placeholder = { - expression_placeholder_sep | - expression_placeholder_boolean | - expression_placeholder_default +placeholder_option_sep = { "sep" ~ "=" ~ string } +placeholder_option_boolean = { boolean ~ "=" ~ string } +placeholder_option_default = { "default" ~ "=" ~ string } + +placeholder_option = { + placeholder_option_sep + | placeholder_option_boolean + | placeholder_option_default } -expression_placeholders = { expression_placeholder+ } +placeholder_options = { placeholder_option+ } // Task commands, curly. command_curly_begin = { "command" ~ "{" } command_curly_end = { "}" } -command_curly_interpolation_start = _{ "~{" | "${" } -command_curly_interpolation_end = _{ "}" } +command_curly_expression_placeholder_start = _{ "~{" | "${" } +command_curly_expression_placeholder_end = _{ "}" } command_curly_literal_contents = _{ - (!command_curly_begin ~ !command_curly_end ~ !command_curly_interpolation_start ~ ANY)+ + (!command_curly_begin ~ !command_curly_end ~ !command_curly_expression_placeholder_start ~ ANY)+ } -command_curly_interpolated_expression = { command_curly_interpolated_contents | expression } +command_curly_expression_placeholder_expression = { command_curly_placeholder | expression } -command_curly_interpolated_contents = { - command_curly_interpolation_start ~ - expression_placeholders* ~ - command_curly_interpolated_expression ~ - command_curly_interpolation_end +command_curly_placeholder = { + command_curly_expression_placeholder_start ~ placeholder_options* ~ command_curly_expression_placeholder_expression ~ command_curly_expression_placeholder_end } command_curly = { - command_curly_begin ~ (command_curly_interpolated_contents | command_curly_literal_contents)* ~ command_curly_end + command_curly_begin ~ (command_curly_placeholder | command_curly_literal_contents)* ~ command_curly_end } // Task commands, heredoc. -command_heredoc_begin = { "command" ~ "<<<" } -command_heredoc_end = { ">>>" } -command_heredoc_interpolation_start = _{ "~{" } -command_heredoc_interpolation_end = _{ "}" } +command_heredoc_begin = { "command" ~ "<<<" } +command_heredoc_end = { ">>>" } +command_heredoc_expression_placeholder_start = _{ "~{" } +command_heredoc_expression_placeholder_end = _{ "}" } command_heredoc_literal_contents = _{ - (!command_heredoc_begin ~ !command_heredoc_end ~ !command_heredoc_interpolation_start ~ ANY)+ + (!command_heredoc_begin ~ !command_heredoc_end ~ !command_heredoc_expression_placeholder_start ~ ANY)+ } -command_heredoc_interpolated_expression = { command_heredoc_interpolated_contents | expression } +command_heredoc_expression_placeholder_expression = { command_heredoc_placeholder | expression } -command_heredoc_interpolated_contents = { - command_heredoc_interpolation_start ~ - expression_placeholders* ~ - command_heredoc_interpolated_expression ~ - command_heredoc_interpolation_end +command_heredoc_placeholder = { + command_heredoc_expression_placeholder_start ~ placeholder_options* ~ command_heredoc_expression_placeholder_expression ~ command_heredoc_expression_placeholder_end } command_heredoc = { - command_heredoc_begin ~ (command_heredoc_interpolated_contents | command_heredoc_literal_contents)* ~ command_heredoc_end + command_heredoc_begin ~ (command_heredoc_placeholder | command_heredoc_literal_contents)* ~ command_heredoc_end } task_command = { (command_heredoc | command_curly) } diff --git a/wdl-grammar/src/version.rs b/wdl-grammar/src/version.rs new file mode 100644 index 00000000..6dbcc881 --- /dev/null +++ b/wdl-grammar/src/version.rs @@ -0,0 +1,33 @@ +//! Workflow Description Language (WDL) grammar versions. + +#[cfg(feature = "binaries")] +use clap::ValueEnum; +use serde::Deserialize; +use serde::Serialize; + +/// A Workflow Description Language (WDL) grammar version. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +#[cfg_attr(feature = "binaries", derive(ValueEnum))] +#[serde(rename_all = "lowercase")] +pub enum Version { + /// Version 1.x of the WDL specification. + #[default] + V1, +} + +impl Version { + /// Gets a short, displayable name for this [`Version`]. + pub fn short_name(&self) -> &'static str { + match self { + Version::V1 => "v1", + } + } +} + +impl std::fmt::Display for Version { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Version::V1 => write!(f, "WDL v1.x"), + } + } +}