diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 91829fef..ad090830 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,7 @@ jobs: format: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update nightly && rustup default nightly - name: Install rustfmt @@ -20,7 +20,7 @@ jobs: lint: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - name: Install clippy @@ -30,7 +30,7 @@ jobs: test: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo test --all-features @@ -38,7 +38,7 @@ jobs: test-examples: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo test --all-features --examples @@ -46,7 +46,7 @@ jobs: docs: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo doc @@ -54,7 +54,7 @@ jobs: gauntlet: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo run --release --bin gauntlet @@ -62,21 +62,30 @@ jobs: arena: runs-on: ubuntu-22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Update Rust run: rustup update stable && rustup default stable - run: cargo run --release --bin gauntlet -- --arena + workspace-lints-enabled: + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v4 + - name: Update Rust + run: rustup update stable && rustup default stable + - run: cargo install cargo-workspace-lints --locked + - run: cargo workspace-lints + msrv: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Update Rust - run: rustup update stable && rustup default stable - - name: Install cargo-binstall - run: curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash - - name: Install cargo-msrv - run: cargo binstall -y --version 0.16.0-beta.23 cargo-msrv - - name: Verify the MSRV - working-directory: ./wdl - run: cargo msrv verify --output-format minimal --all-features + - uses: actions/checkout@v4 + - name: Update Rust + run: rustup update stable && rustup default stable + - name: Install cargo-binstall + run: curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash + - name: Install cargo-msrv + run: cargo binstall -y --version 0.16.0-beta.23 cargo-msrv + - name: Verify the MSRV + working-directory: ./wdl + run: cargo msrv verify --output-format minimal --all-features diff --git a/Cargo.toml b/Cargo.toml index c77a6f9d..1b125c67 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,11 @@ [workspace] -members = [ "ci", +members = [ + "ci", + "gauntlet", "wdl", "wdl-analysis", "wdl-ast", - "gauntlet", + "wdl-format", "wdl-grammar", "wdl-lint", "wdl-lsp", @@ -19,40 +21,56 @@ repository = "https://github.com/stjude-rust-labs/wdl" rust-version = "1.80.0" [workspace.dependencies] +anyhow = "1.0.86" +approx = "0.5.1" clap = { version = "4.5.7", features = ["derive"] } +clap-verbosity-flag = "2.2.1" +codespan-reporting = "0.11.1" colored = "2.1.0" convert_case = "0.6.0" +dirs = "5.0.1" +faster-hex = "0.9.0" +futures = "0.3.30" +git2 = "0.18.3" +id-arena = "2.2.1" indexmap = { version = "2.2.6", features = ["serde"] } +indicatif = "0.17.8" +itertools = "0.13.0" +line-index = "0.1.1" +logos = "0.14.0" +nonempty = "0.10.0" +parking_lot = "0.12.3" +path-clean = "1.0.1" +petgraph = "0.6.5" +pretty_assertions = "1.4.0" +rayon = "1.10.0" +reqwest = "0.12.5" +rowan = "0.15.15" serde = { version = "1", features = ["derive"] } +serde_json = "1.0.120" serde_with = "3.8.1" +tempfile = "3.10.1" tokio = { version = "1.38.0", features = ["full"] } toml = "0.8.14" +tower-lsp = "0.20.0" tracing = "0.1.40" +tracing-log = "0.2.0" tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } -logos = "0.14.0" -rowan = "0.15.15" -pretty_assertions = "1.4.0" -rayon = "1.10.0" -approx = "0.5.1" -codespan-reporting = "0.11.1" -anyhow = "1.0.86" -dirs = "5.0.1" -faster-hex = "0.9.0" -git2 = "0.18.3" -tempfile = "3.10.1" url = "2.5.2" urlencoding = "2.1.3" -parking_lot = "0.12.3" -reqwest = "0.12.5" -petgraph = "0.6.5" -futures = "0.3.30" -walkdir = "2.5.0" -path-clean = "1.0.1" -indicatif = "0.17.8" -tower-lsp = "0.20.0" -line-index = "0.1.1" -serde_json = "1.0.120" uuid = "1.10.0" -id-arena = "2.2.1" -clap-verbosity-flag = "2.2.1" -tracing-log = "0.2.0" +walkdir = "2.5.0" + +[workspace.lints.rust] +missing_docs = "warn" +nonstandard-style = "warn" +rust-2018-idioms = "warn" +rust-2021-compatibility = "warn" +rust-2024-compatibility = "warn" +edition_2024_expr_fragment_specifier = "allow" + +[workspace.lints.rustdoc] +broken_intra_doc_links = "warn" + +[workspace.lints.clippy] +missing_docs_in_private_items = "warn" diff --git a/README.md b/README.md index b6b1cc57..642eeeab 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ cargo run --bin wdl --features cli -- $ARGS Where `$ARGS` are the command line arguments to the `wdl` CLI tool. -The `wdl` CLI tool currently supports three subcommands: +The `wdl` CLI tool currently supports the following subcommands: * `parse` - Parses a WDL document and prints both the parse diagnostics and the resulting Concrete Syntax Tree (CST). @@ -137,6 +137,8 @@ The `wdl` CLI tool currently supports three subcommands: document scopes and exits with a status code of `0` if the documents are valid; otherwise, prints the validation diagnostics and exits with a status code of `1`. +* `format` - Parses, validates, and then formats a single WDL document, printing + the result to STDOUT. Each of the subcommands supports passing `-` as the file path to denote reading from STDIN instead of a file on disk. diff --git a/ci/Cargo.toml b/ci/Cargo.toml index 17fb3540..eab3dc40 100644 --- a/ci/Cargo.toml +++ b/ci/Cargo.toml @@ -15,3 +15,6 @@ clap.workspace = true reqwest = { workspace = true, features = ["blocking", "rustls-tls"] } toml.workspace = true toml_edit = { version = "0.22.21", features = ["serde"] } + +[lints] +workspace = true diff --git a/ci/src/main.rs b/ci/src/main.rs index ee4de15a..206cd11d 100644 --- a/ci/src/main.rs +++ b/ci/src/main.rs @@ -48,55 +48,82 @@ use std::time::Duration; use clap::Parser; use toml_edit::DocumentMut; -// note that this list must be topologically sorted by dependencies +/// Crates names to publish. +// Note that this list must be topologically sorted by dependencies. const SORTED_CRATES_TO_PUBLISH: &[&str] = &[ "wdl-grammar", "wdl-ast", "wdl-lint", + "wdl-format", "wdl-analysis", "wdl-lsp", "wdl", ]; +/// Paths to ignore. const IGNORE_PATHS: &[&str] = &["target", "tests", "examples", "benches", "book", "docs"]; +/// An in-memory representation of a crate. #[derive(Debug, Clone)] struct Crate { + /// The manifest file. manifest: DocumentMut, + + /// The path to the manifest. manifest_path: PathBuf, + + /// The path to the changelog. changelog_path: Option, + + /// The name of the crate. name: String, + + /// The version of the crate. version: String, + + /// Whether the version should be bumped. should_bump: bool, } +/// The command line arguments. #[derive(Parser)] -struct Opts { +struct Args { + /// The subcommand. #[clap(subcommand)] - subcmd: SubCommand, + command: Subcommand, } +/// The subcommand to use. #[derive(Parser)] -enum SubCommand { +enum Subcommand { + /// Request to bump a crate/crates. Bump(Bump), + + /// Publishes a crate/crates. Publish(Publish), } +/// The arguments to the `bump` subcommand. #[derive(Parser)] struct Bump { + /// Whether or not the bump should be a patch version increase. #[clap(short, long)] patch: bool, + /// The list of crate names to bump. #[clap(short, long)] crates_to_bump: Vec, } +/// The arguments to the `publish` subcommand. #[derive(Parser)] struct Publish { + /// Whether or not to perform a dry-run of the publishing. #[clap(short, long)] dry_run: bool, } +/// The main function. fn main() { let mut all_crates: Vec>> = Vec::new(); find_crates(".".as_ref(), &mut all_crates); @@ -108,9 +135,9 @@ fn main() { .collect::>(); all_crates.sort_by_key(|krate| publish_order.get(&krate.borrow().name[..])); - let opts = Opts::parse(); - match opts.subcmd { - SubCommand::Bump(Bump { + let opts = Args::parse(); + match opts.command { + Subcommand::Bump(Bump { patch, crates_to_bump, }) => { @@ -150,7 +177,7 @@ fn main() { .success() ); } - SubCommand::Publish(Publish { dry_run }) => { + Subcommand::Publish(Publish { dry_run }) => { // We have so many crates to publish we're frequently either // rate-limited or we run into issues where crates can't publish // successfully because they're waiting on the index entries of @@ -177,6 +204,7 @@ fn main() { } } +/// Finds crates in a particular directory. fn find_crates(dir: &Path, dst: &mut Vec>>) { if dir.join("Cargo.toml").exists() { if let Some(krate) = read_crate(&dir.join("Cargo.toml")) { @@ -195,6 +223,7 @@ fn find_crates(dir: &Path, dst: &mut Vec>>) { } } +/// Reads a crate from a manifest. fn read_crate(manifest_path: &Path) -> Option { let contents = fs::read_to_string(manifest_path).expect("failed to read manifest"); let mut manifest = @@ -223,6 +252,7 @@ fn read_crate(manifest_path: &Path) -> Option { }) } +/// Bumps the version of a crate. fn bump_version(krate: &Crate, crates: &[Rc>], patch: bool) { let mut new_manifest = krate.manifest.clone(); @@ -289,6 +319,7 @@ fn bump(version: &str, patch_bump: bool) -> String { } } +/// Publishes a crate. fn publish(krate: &Crate, dry_run: bool) -> bool { if !SORTED_CRATES_TO_PUBLISH.iter().any(|s| *s == krate.name) { return true; diff --git a/gauntlet/Cargo.toml b/gauntlet/Cargo.toml index b6b4bc11..58e2f857 100644 --- a/gauntlet/Cargo.toml +++ b/gauntlet/Cargo.toml @@ -26,3 +26,6 @@ tracing.workspace = true tracing-subscriber.workspace = true anyhow.workspace = true codespan-reporting.workspace = true + +[lints] +workspace = true diff --git a/rustfmt.toml b/rustfmt.toml index 3e50da09..259cb677 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -10,6 +10,6 @@ newline_style = "Unix" normalize_comments = true normalize_doc_attributes = true reorder_impl_items = true +style_edition = "2024" use_field_init_shorthand = true wrap_comments = true -style_edition = "2024" \ No newline at end of file diff --git a/wdl-analysis/Cargo.toml b/wdl-analysis/Cargo.toml index a7d09f0d..0dc4a725 100644 --- a/wdl-analysis/Cargo.toml +++ b/wdl-analysis/Cargo.toml @@ -40,6 +40,9 @@ tempfile = { workspace = true } default = [] codespan = ["wdl-ast/codespan"] +[lints] +workspace = true + [[test]] name = "analysis" required-features = ["codespan"] diff --git a/wdl-analysis/src/analyzer.rs b/wdl-analysis/src/analyzer.rs index aca481d8..c9a7fa7e 100644 --- a/wdl-analysis/src/analyzer.rs +++ b/wdl-analysis/src/analyzer.rs @@ -27,7 +27,6 @@ use tokio::sync::mpsc; use tokio::sync::oneshot; use url::Url; use walkdir::WalkDir; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::Severity; use wdl_ast::SyntaxNode; diff --git a/wdl-analysis/src/graph.rs b/wdl-analysis/src/graph.rs index 8257e8a2..142a8916 100644 --- a/wdl-analysis/src/graph.rs +++ b/wdl-analysis/src/graph.rs @@ -27,7 +27,6 @@ use tracing::debug; use tracing::info; use url::Url; use uuid::Uuid; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::SyntaxNode; use wdl_ast::Validator; diff --git a/wdl-analysis/tests/analysis.rs b/wdl-analysis/tests/analysis.rs index f5e77bb6..5ac567fd 100644 --- a/wdl-analysis/tests/analysis.rs +++ b/wdl-analysis/tests/analysis.rs @@ -38,6 +38,7 @@ use wdl_analysis::rules; use wdl_ast::Diagnostic; use wdl_ast::SyntaxNode; +/// Finds tests to run as part of the analysis test suite. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -65,6 +66,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a result. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -75,6 +77,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Comparse a single result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<()> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -101,6 +104,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<()> { Ok(()) } +/// Compares the provided results. fn compare_results(test: &Path, results: Vec) -> Result<()> { let mut buffer = Buffer::no_color(); let cwd = std::env::current_dir().expect("must have a CWD"); @@ -150,6 +154,7 @@ fn compare_results(test: &Path, results: Vec) -> Result<()> { async fn main() { // These are the tests that require single document analysis as they are // sensitive to parse order + /// The tests that require single document analysis. const SINGLE_DOCUMENT_TESTS: &[&str] = &["import-dependency-cycle"]; let tests = find_tests(); @@ -177,6 +182,9 @@ async fn main() { // Discover the results that are relevant only to this test let base = clean(absolute(test).expect("should be made absolute")); + // NOTE: clippy appears to be incorrect that this can be modified to use + // `filter_map`. Perhaps this should be revisited in the future. + #[allow(clippy::filter_map_bool_then)] let results = results .iter() .filter_map(|r| { diff --git a/wdl-ast/CHANGELOG.md b/wdl-ast/CHANGELOG.md index e631c27b..935e5656 100644 --- a/wdl-ast/CHANGELOG.md +++ b/wdl-ast/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Changed + +* Introduce a guarantee that each CST element (node or token) has one and only one analogous AST element ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) + ### Fixed * Detect duplicate call inputs ([#199](https://github.com/stjude-rust-labs/wdl/pull/199)). diff --git a/wdl-ast/Cargo.toml b/wdl-ast/Cargo.toml index 37c3b9b8..fe6698db 100644 --- a/wdl-ast/Cargo.toml +++ b/wdl-ast/Cargo.toml @@ -12,6 +12,8 @@ documentation = "https://docs.rs/wdl-ast" [dependencies] wdl-grammar = { path = "../wdl-grammar", version = "0.8.0" } +macropol = "0.1.3" +paste = "1.0.15" rowan = { workspace = true } url = { workspace = true } urlencoding = { workspace = true } @@ -26,6 +28,9 @@ codespan-reporting = { workspace = true } [features] codespan = ["wdl-grammar/codespan"] +[lints] +workspace = true + [[test]] name = "validation" required-features = ["codespan"] diff --git a/wdl-ast/src/element.rs b/wdl-ast/src/element.rs new file mode 100644 index 00000000..e578d469 --- /dev/null +++ b/wdl-ast/src/element.rs @@ -0,0 +1,764 @@ +//! Elements (nodes or tokens) within the AST. + +use rowan::NodeOrToken; + +use crate::AstNode; +use crate::AstToken; +use crate::Comment; +use crate::Ident; +use crate::SyntaxElement; +use crate::SyntaxKind; +use crate::SyntaxNode; +use crate::SyntaxToken; +use crate::Version; +use crate::VersionStatement; +use crate::Whitespace; +use crate::v1::*; + +#[macropol::macropol] +macro_rules! ast_element_impl { + ( + // The name of the impl to create (e.g., `Node`). + $name:ident, + // The improper name of the impl to be displayed (e.g., `node`). + $display:ident, + // The prefix of the syntax element (e.g., `SyntaxNode`). + $syntax_prefix:ty, + // A mapping of all of the elements to map from syntax elements to ast + // elements. + // + // E.g., `command_section(): CommandSectionNode => CommandSection => CommandSection`. + [$($suffix:ident(): $syntax_kind:ty => $inner:ty => $variant:ty),*] + ) => { + paste::paste! { + impl $name { + #[doc = "Attempts to cast a [`SyntaxElement`] to a [`" $name "`]."] + pub fn cast(element: SyntaxElement) -> Option { + match element.kind() { + $( + SyntaxKind::$syntax_kind => { + let $display = element + .[]() + .expect( + "`SyntaxElement` with kind \ + `SyntaxKind::${stringify!($syntax_kind)}` could not \ + be turned into a `${stringify!($syntax_prefix)}`" + ); + + let inner = $inner::cast($display) + .expect( + "couldn't cast ${stringify!($display)} to \ + `${stringify!($inner)}` + "); + + Some($name::$variant(inner)) + }, + )* + _ => None + } + } + + #[doc = "Returns whether or not a particular [`SyntaxKind`] can cast to a [`" $name "`]."] + pub fn can_cast(kind: &SyntaxKind) -> bool { + match kind { + $( + SyntaxKind::$syntax_kind => true, + )* + _ => false + } + } + + + #[doc = "Gets the inner [`" $syntax_prefix "`] from the [`" $name "`]."] + pub fn syntax(&self) -> &$syntax_prefix { + match self { + $( + $name::$variant(inner) => inner.syntax(), + )* + // NOTE: a wildcard pattern (`_`) should not be required + // here. If one is suggested by the compiler, that means + // you're probably missing a pattern in the macros + // below. + } + } + + $( + /// Attempts to get a reference to the inner [`${stringify!($inner)}`]. + /// + /// * If `self` is a [`${stringify!($variant)}`], then a reference to the + /// inner [`${stringify!($inner)}`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn [](&self) -> Option<&$inner> { + match self { + $name::$variant($suffix) => Some($suffix), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`${stringify!($inner)}`]. + /// + /// * If `self` is a [`${stringify!($variant)}`], then the inner + /// [`${stringify!($inner)}`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn [](self) -> Option<$inner> { + match self { + $name::$variant($suffix) => Some($suffix), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`${stringify!($inner)}`]. + /// + /// # Panics + /// + /// If `self` is not a [`${stringify!($variant)}`]. + pub fn [](self) -> $inner { + self.[]().expect( + "expected `${stringify!($variant)}` but got a different variant" + ) + } + )* + } + } + }; +} + +/// An abstract syntax tree node. +/// +/// This enum has a variant for each struct implementing the [`AstNode`] trait. +#[derive(Clone, Debug)] +pub enum Node { + /// An access expression. + AccessExpr(AccessExpr), + /// An addition expression. + AdditionExpr(AdditionExpr), + /// An array type. + ArrayType(ArrayType), + /// A V1 abstract syntax tree. + Ast(Ast), + /// A bound declaration. + BoundDecl(BoundDecl), + /// An after clause in a call statement. + CallAfter(CallAfter), + /// An alias clause in a call statement. + CallAlias(CallAlias), + /// A call expression. + CallExpr(CallExpr), + /// A call input item. + CallInputItem(CallInputItem), + /// A call statement. + CallStatement(CallStatement), + /// A target within a call statement. + CallTarget(CallTarget), + /// A command section. + CommandSection(CommandSection), + /// A conditional statement. + ConditionalStatement(ConditionalStatement), + /// The `default` placeholder option. + DefaultOption(DefaultOption), + /// A division expression. + DivisionExpr(DivisionExpr), + /// An equality expression. + EqualityExpr(EqualityExpr), + /// An exponentiation expression. + ExponentiationExpr(ExponentiationExpr), + /// A greater than or equal to expression. + GreaterEqualExpr(GreaterEqualExpr), + /// A greater than expression. + GreaterExpr(GreaterExpr), + /// An if expression. + IfExpr(IfExpr), + /// An import alias. + ImportAlias(ImportAlias), + /// An import statement. + ImportStatement(ImportStatement), + /// An index expression. + IndexExpr(IndexExpr), + /// An inequality expression. + InequalityExpr(InequalityExpr), + /// An input section. + InputSection(InputSection), + /// A less than or equal to expression. + LessEqualExpr(LessEqualExpr), + /// A less than expression. + LessExpr(LessExpr), + /// A literal array. + LiteralArray(LiteralArray), + /// A literal boolean. + LiteralBoolean(LiteralBoolean), + /// A literal float. + LiteralFloat(LiteralFloat), + /// A literal hints. + LiteralHints(LiteralHints), + /// A literal hints item. + LiteralHintsItem(LiteralHintsItem), + /// A literal input. + LiteralInput(LiteralInput), + /// A literal input item. + LiteralInputItem(LiteralInputItem), + /// A literal integer. + LiteralInteger(LiteralInteger), + /// A literal map. + LiteralMap(LiteralMap), + /// A literal map item. + LiteralMapItem(LiteralMapItem), + /// A literal none. + LiteralNone(LiteralNone), + /// A literal null. + LiteralNull(LiteralNull), + /// A literal object. + LiteralObject(LiteralObject), + /// A literal object item. + LiteralObjectItem(LiteralObjectItem), + /// A literal output. + LiteralOutput(LiteralOutput), + /// A literal output item. + LiteralOutputItem(LiteralOutputItem), + /// A literal pair. + LiteralPair(LiteralPair), + /// A literal string. + LiteralString(LiteralString), + /// A literal struct. + LiteralStruct(LiteralStruct), + /// A literal struct item. + LiteralStructItem(LiteralStructItem), + /// A logical and expression. + LogicalAndExpr(LogicalAndExpr), + /// A logical not expression. + LogicalNotExpr(LogicalNotExpr), + /// A logical or expression. + LogicalOrExpr(LogicalOrExpr), + /// A map type. + MapType(MapType), + /// A metadata array. + MetadataArray(MetadataArray), + /// A metadata object. + MetadataObject(MetadataObject), + /// A metadata object item. + MetadataObjectItem(MetadataObjectItem), + /// A metadata section. + MetadataSection(MetadataSection), + /// A modulo expression. + ModuloExpr(ModuloExpr), + /// A multiplication expression. + MultiplicationExpr(MultiplicationExpr), + /// A reference to a name. + NameRef(NameRef), + /// A negation expression. + NegationExpr(NegationExpr), + /// An output section. + OutputSection(OutputSection), + /// A pair type. + PairType(PairType), + /// An object type. + ObjectType(ObjectType), + /// A parameter metadata section. + ParameterMetadataSection(ParameterMetadataSection), + /// A parenthesized expression. + ParenthesizedExpr(ParenthesizedExpr), + /// A placeholder. + Placeholder(Placeholder), + /// A primitive type. + PrimitiveType(PrimitiveType), + /// A requirements item. + RequirementsItem(RequirementsItem), + /// A requirements section. + RequirementsSection(RequirementsSection), + /// A runtime item. + RuntimeItem(RuntimeItem), + /// A runtime section. + RuntimeSection(RuntimeSection), + /// A scatter statement. + ScatterStatement(ScatterStatement), + /// The `sep` placeholder option. + SepOption(SepOption), + /// A struct definition. + StructDefinition(StructDefinition), + /// A subtraction expression. + SubtractionExpr(SubtractionExpr), + /// A task definition. + TaskDefinition(TaskDefinition), + /// A task item within a hints section. + TaskHintsItem(TaskHintsItem), + /// A hints section within a task. + TaskHintsSection(TaskHintsSection), + /// A `true`/`false` placeholder option. + TrueFalseOption(TrueFalseOption), + /// A reference to a type. + TypeRef(TypeRef), + /// An unbound declaration. + UnboundDecl(UnboundDecl), + /// A version statement. + VersionStatement(VersionStatement), + /// A workflow definition. + WorkflowDefinition(WorkflowDefinition), + /// An array within a workflow hints section. + WorkflowHintsArray(WorkflowHintsArray), + /// A hints item within a workflow hints section. + WorkflowHintsItem(WorkflowHintsItem), + /// An object within a workflow hints section. + WorkflowHintsObject(WorkflowHintsObject), + /// An item within an object within a workflow hints section. + WorkflowHintsObjectItem(WorkflowHintsObjectItem), + /// A hints section within a workflow. + WorkflowHintsSection(WorkflowHintsSection), +} + +ast_element_impl!( + Node, + node, + SyntaxNode, + [ + access_expr(): AccessExprNode => AccessExpr => AccessExpr, + addition_expr(): AdditionExprNode => AdditionExpr => AdditionExpr, + array_type(): ArrayTypeNode => ArrayType => ArrayType, + ast(): RootNode => Ast => Ast, + bound_decl(): BoundDeclNode => BoundDecl => BoundDecl, + call_after(): CallAfterNode => CallAfter => CallAfter, + call_alias(): CallAliasNode => CallAlias => CallAlias, + call_expr(): CallExprNode => CallExpr => CallExpr, + call_input_item(): CallInputItemNode => CallInputItem => CallInputItem, + call_statement(): CallStatementNode => CallStatement => CallStatement, + call_target(): CallTargetNode => CallTarget => CallTarget, + command_section(): CommandSectionNode => CommandSection => CommandSection, + conditional_statement(): ConditionalStatementNode => ConditionalStatement => ConditionalStatement, + default_option(): PlaceholderDefaultOptionNode => DefaultOption => DefaultOption, + division_expr(): DivisionExprNode => DivisionExpr => DivisionExpr, + equality_expr(): EqualityExprNode => EqualityExpr => EqualityExpr, + exponentiation_expr(): ExponentiationExprNode => ExponentiationExpr => ExponentiationExpr, + greater_equal_expr(): GreaterEqualExprNode => GreaterEqualExpr => GreaterEqualExpr, + greater_expr(): GreaterExprNode => GreaterExpr => GreaterExpr, + if_expr(): IfExprNode => IfExpr => IfExpr, + import_alias(): ImportAliasNode => ImportAlias => ImportAlias, + import_statement(): ImportStatementNode => ImportStatement => ImportStatement, + index_expr(): IndexExprNode => IndexExpr => IndexExpr, + inequality_expr(): InequalityExprNode => InequalityExpr => InequalityExpr, + input_section(): InputSectionNode => InputSection => InputSection, + less_equal_expr(): LessEqualExprNode => LessEqualExpr => LessEqualExpr, + less_expr(): LessExprNode => LessExpr => LessExpr, + literal_array(): LiteralArrayNode => LiteralArray => LiteralArray, + literal_boolean(): LiteralBooleanNode => LiteralBoolean => LiteralBoolean, + literal_float(): LiteralFloatNode => LiteralFloat => LiteralFloat, + literal_hints(): LiteralHintsNode => LiteralHints => LiteralHints, + literal_hints_item(): LiteralHintsItemNode => LiteralHintsItem => LiteralHintsItem, + literal_input(): LiteralInputNode => LiteralInput => LiteralInput, + literal_input_item(): LiteralInputItemNode => LiteralInputItem => LiteralInputItem, + literal_integer(): LiteralIntegerNode => LiteralInteger => LiteralInteger, + literal_map(): LiteralMapNode => LiteralMap => LiteralMap, + literal_map_item(): LiteralMapItemNode => LiteralMapItem => LiteralMapItem, + literal_none(): LiteralNoneNode => LiteralNone => LiteralNone, + literal_null(): LiteralNullNode => LiteralNull => LiteralNull, + literal_object(): LiteralObjectNode => LiteralObject => LiteralObject, + literal_object_item(): LiteralObjectItemNode => LiteralObjectItem => LiteralObjectItem, + literal_output(): LiteralOutputNode => LiteralOutput => LiteralOutput, + literal_output_item(): LiteralOutputItemNode => LiteralOutputItem => LiteralOutputItem, + literal_pair(): LiteralPairNode => LiteralPair => LiteralPair, + literal_string(): LiteralStringNode => LiteralString => LiteralString, + literal_struct(): LiteralStructNode => LiteralStruct => LiteralStruct, + literal_struct_item(): LiteralStructItemNode => LiteralStructItem => LiteralStructItem, + logical_and_expr(): LogicalAndExprNode => LogicalAndExpr => LogicalAndExpr, + logical_not_expr(): LogicalNotExprNode => LogicalNotExpr => LogicalNotExpr, + logical_or_expr(): LogicalOrExprNode => LogicalOrExpr => LogicalOrExpr, + map_type(): MapTypeNode => MapType => MapType, + metadata_array(): MetadataArrayNode => MetadataArray => MetadataArray, + metadata_object(): MetadataObjectNode => MetadataObject => MetadataObject, + metadata_object_item(): MetadataObjectItemNode => MetadataObjectItem => MetadataObjectItem, + metadata_section(): MetadataSectionNode => MetadataSection => MetadataSection, + modulo_expr(): ModuloExprNode => ModuloExpr => ModuloExpr, + multiplication_expr(): MultiplicationExprNode => MultiplicationExpr => MultiplicationExpr, + name_ref(): NameRefNode => NameRef => NameRef, + negation_expr(): NegationExprNode => NegationExpr => NegationExpr, + object_type(): ObjectTypeNode => ObjectType => ObjectType, + output_section(): OutputSectionNode => OutputSection => OutputSection, + pair_type(): PairTypeNode => PairType => PairType, + parameter_metadata_section(): ParameterMetadataSectionNode => ParameterMetadataSection => ParameterMetadataSection, + parenthesized_expr(): ParenthesizedExprNode => ParenthesizedExpr => ParenthesizedExpr, + placeholder(): PlaceholderNode => Placeholder => Placeholder, + primitive_type(): PrimitiveTypeNode => PrimitiveType => PrimitiveType, + requirements_item(): RequirementsItemNode => RequirementsItem => RequirementsItem, + requirements_section(): RequirementsSectionNode => RequirementsSection => RequirementsSection, + runtime_item(): RuntimeItemNode => RuntimeItem => RuntimeItem, + runtime_section(): RuntimeSectionNode => RuntimeSection => RuntimeSection, + scatter_statement(): ScatterStatementNode => ScatterStatement => ScatterStatement, + sep_option(): PlaceholderSepOptionNode => SepOption => SepOption, + struct_definition(): StructDefinitionNode => StructDefinition => StructDefinition, + subtraction_expr(): SubtractionExprNode => SubtractionExpr => SubtractionExpr, + task_definition(): TaskDefinitionNode => TaskDefinition => TaskDefinition, + task_hints_item(): TaskHintsItemNode => TaskHintsItem => TaskHintsItem, + task_hints_section(): TaskHintsSectionNode => TaskHintsSection => TaskHintsSection, + true_false_option(): PlaceholderTrueFalseOptionNode => TrueFalseOption => TrueFalseOption, + type_ref(): TypeRefNode => TypeRef => TypeRef, + unbound_decl(): UnboundDeclNode => UnboundDecl => UnboundDecl, + version_statement(): VersionStatementNode => VersionStatement => VersionStatement, + workflow_definition(): WorkflowDefinitionNode => WorkflowDefinition => WorkflowDefinition, + workflow_hints_array(): WorkflowHintsArrayNode => WorkflowHintsArray => WorkflowHintsArray, + workflow_hints_item(): WorkflowHintsItemNode => WorkflowHintsItem => WorkflowHintsItem, + workflow_hints_object(): WorkflowHintsObjectNode => WorkflowHintsObject => WorkflowHintsObject, + workflow_hints_object_item(): WorkflowHintsObjectItemNode => WorkflowHintsObjectItem => WorkflowHintsObjectItem, + workflow_hints_section(): WorkflowHintsSectionNode => WorkflowHintsSection => WorkflowHintsSection + ] +); + +/// An abstract syntax tree token. +/// +/// This enum has a variant for each struct implementing the [`AstToken`] trait. +#[derive(Clone, Debug)] +pub enum Token { + /// The `after` keyword. + AfterKeyword(AfterKeyword), + /// The `alias` keyword. + AliasKeyword(AliasKeyword), + /// The `Array` type keyword. + ArrayTypeKeyword(ArrayTypeKeyword), + /// The `as` keyword. + AsKeyword(AsKeyword), + /// The `=` symbol. + Assignment(Assignment), + /// The `*` symbol. + Asterisk(Asterisk), + /// The `Boolean` type keyword. + BooleanTypeKeyword(BooleanTypeKeyword), + /// The `call` keyword. + CallKeyword(CallKeyword), + /// The `}` symbol. + CloseBrace(CloseBrace), + /// The `]` symbol. + CloseBracket(CloseBracket), + /// The `>>>` symbol. + CloseHeredoc(CloseHeredoc), + /// The `)` symbol. + CloseParen(CloseParen), + /// The `:` symbol. + Colon(Colon), + /// The `,` symbol. + Comma(Comma), + /// The `command` keyword. + CommandKeyword(CommandKeyword), + /// The text within a command section. + CommandText(CommandText), + /// A comment. + Comment(Comment), + /// The `Directory` type keyword. + DirectoryTypeKeyword(DirectoryTypeKeyword), + /// The `.` symbol. + Dot(Dot), + /// The `"` symbol. + DoubleQuote(DoubleQuote), + /// The `else` keyword. + ElseKeyword(ElseKeyword), + /// The `==` symbol. + Equal(Equal), + /// The `!` symbol. + Exclamation(Exclamation), + /// The `**` symbol. + Exponentiation(Exponentiation), + /// The `false` keyword. + FalseKeyword(FalseKeyword), + /// The `File` type keyword. + FileTypeKeyword(FileTypeKeyword), + /// A float. + Float(Float), + /// The `Float` type keyword. + FloatTypeKeyword(FloatTypeKeyword), + /// The `>` symbol. + Greater(Greater), + /// The `>=` symbol. + GreaterEqual(GreaterEqual), + /// The `hints` keyword. + HintsKeyword(HintsKeyword), + /// An identity. + Ident(Ident), + /// The `if` keyword. + IfKeyword(IfKeyword), + /// The `import` keyword. + ImportKeyword(ImportKeyword), + /// The `in` keyword. + InKeyword(InKeyword), + /// The `input` keyword. + InputKeyword(InputKeyword), + /// An integer. + Integer(Integer), + /// The `Int` type keyword. + IntTypeKeyword(IntTypeKeyword), + /// The `<` symbol. + Less(Less), + /// The `<=` symbol. + LessEqual(LessEqual), + /// The `&&` symbol. + LogicalAnd(LogicalAnd), + /// The `||` symbol. + LogicalOr(LogicalOr), + /// The `Map` type keyword. + MapTypeKeyword(MapTypeKeyword), + /// The `meta` keyword. + MetaKeyword(MetaKeyword), + /// The `-` symbol. + Minus(Minus), + /// The `None` keyword. + NoneKeyword(NoneKeyword), + /// The `!=` symbol. + NotEqual(NotEqual), + /// The `null` keyword. + NullKeyword(NullKeyword), + /// The `object` keyword. + ObjectKeyword(ObjectKeyword), + /// The `Object` type keyword. + ObjectTypeKeyword(ObjectTypeKeyword), + /// The `{` symbol. + OpenBrace(OpenBrace), + /// The `[` symbol. + OpenBracket(OpenBracket), + /// The `<<<` symbol. + OpenHeredoc(OpenHeredoc), + /// The `(` symbol. + OpenParen(OpenParen), + /// The `output` keyword. + OutputKeyword(OutputKeyword), + /// The `Pair` type keyword. + PairTypeKeyword(PairTypeKeyword), + /// The `parameter_meta` keyword. + ParameterMetaKeyword(ParameterMetaKeyword), + /// The `%` symbol. + Percent(Percent), + /// One of the placeholder open symbols. + PlaceholderOpen(PlaceholderOpen), + /// The `+` symbol. + Plus(Plus), + /// The `?` symbol. + QuestionMark(QuestionMark), + /// The `requirements` keyword. + RequirementsKeyword(RequirementsKeyword), + /// The `runtime` keyword. + RuntimeKeyword(RuntimeKeyword), + /// The `scatter` keyword. + ScatterKeyword(ScatterKeyword), + /// The `'` symbol. + SingleQuote(SingleQuote), + /// The `/` symbol. + Slash(Slash), + /// The textual part of a string. + StringText(StringText), + /// The `String` type keyword. + StringTypeKeyword(StringTypeKeyword), + /// The `struct` keyword. + StructKeyword(StructKeyword), + /// The `task` keyword. + TaskKeyword(TaskKeyword), + /// The `then` keyword. + ThenKeyword(ThenKeyword), + /// The `true` keyword. + TrueKeyword(TrueKeyword), + /// A version. + Version(Version), + /// The `version` keyword. + VersionKeyword(VersionKeyword), + /// Whitespace. + Whitespace(Whitespace), + /// The `workflow` keyword. + WorkflowKeyword(WorkflowKeyword), +} + +ast_element_impl!( + Token, + token, + SyntaxToken, + [ + after_keyword(): AfterKeyword => AfterKeyword => AfterKeyword, + alias_keyword(): AliasKeyword => AliasKeyword => AliasKeyword, + array_type_keyword(): ArrayTypeKeyword => ArrayTypeKeyword => ArrayTypeKeyword, + as_keyword(): AsKeyword => AsKeyword => AsKeyword, + assignment(): Assignment => Assignment => Assignment, + asterisk(): Asterisk => Asterisk => Asterisk, + boolean_type_keyword(): BooleanTypeKeyword => BooleanTypeKeyword => BooleanTypeKeyword, + call_keyword(): CallKeyword => CallKeyword => CallKeyword, + close_brace(): CloseBrace => CloseBrace => CloseBrace, + close_brack(): CloseBracket => CloseBracket => CloseBracket, + close_heredoc(): CloseHeredoc => CloseHeredoc => CloseHeredoc, + close_paren(): CloseParen => CloseParen => CloseParen, + colon(): Colon => Colon => Colon, + comma(): Comma => Comma => Comma, + command_keyword(): CommandKeyword => CommandKeyword => CommandKeyword, + command_text(): LiteralCommandText => CommandText => CommandText, + comment(): Comment => Comment => Comment, + directory_type_keyword(): DirectoryTypeKeyword => DirectoryTypeKeyword => DirectoryTypeKeyword, + dot(): Dot => Dot => Dot, + double_quote(): DoubleQuote => DoubleQuote => DoubleQuote, + else_keyword(): ElseKeyword => ElseKeyword => ElseKeyword, + equal(): Equal => Equal => Equal, + exclaimation(): Exclamation => Exclamation => Exclamation, + exponentiation(): Exponentiation => Exponentiation => Exponentiation, + false_keyword(): FalseKeyword => FalseKeyword => FalseKeyword, + file_type_keyword(): FileTypeKeyword => FileTypeKeyword => FileTypeKeyword, + float(): Float => Float => Float, + float_type_keyword(): FloatTypeKeyword => FloatTypeKeyword => FloatTypeKeyword, + greater(): Greater => Greater => Greater, + greater_equal(): GreaterEqual => GreaterEqual => GreaterEqual, + hints_keyword(): HintsKeyword => HintsKeyword => HintsKeyword, + ident(): Ident => Ident => Ident, + if_keyword(): IfKeyword => IfKeyword => IfKeyword, + import_keyword(): ImportKeyword => ImportKeyword => ImportKeyword, + in_keyword(): InKeyword => InKeyword => InKeyword, + input_keyword(): InputKeyword => InputKeyword => InputKeyword, + integer(): Integer => Integer => Integer, + int_type_keyword(): IntTypeKeyword => IntTypeKeyword => IntTypeKeyword, + less(): Less => Less => Less, + less_equal(): LessEqual => LessEqual => LessEqual, + logical_and(): LogicalAnd => LogicalAnd => LogicalAnd, + logical_or(): LogicalOr => LogicalOr => LogicalOr, + map_type_keyword(): MapTypeKeyword => MapTypeKeyword => MapTypeKeyword, + meta_keyword(): MetaKeyword => MetaKeyword => MetaKeyword, + minus(): Minus => Minus => Minus, + none_keyword(): NoneKeyword => NoneKeyword => NoneKeyword, + not_equal(): NotEqual => NotEqual => NotEqual, + null_keyword(): NullKeyword => NullKeyword => NullKeyword, + object_keyword(): ObjectKeyword => ObjectKeyword => ObjectKeyword, + object_type_keyword(): ObjectTypeKeyword => ObjectTypeKeyword => ObjectTypeKeyword, + open_brace(): OpenBrace => OpenBrace => OpenBrace, + open_bracket(): OpenBracket => OpenBracket => OpenBracket, + open_heredoc(): OpenHeredoc => OpenHeredoc => OpenHeredoc, + open_paren(): OpenParen => OpenParen => OpenParen, + output_keyword(): OutputKeyword => OutputKeyword => OutputKeyword, + pair_type_keyword(): PairTypeKeyword => PairTypeKeyword => PairTypeKeyword, + parameter_meta_keyword(): ParameterMetaKeyword => ParameterMetaKeyword => ParameterMetaKeyword, + percent(): Percent => Percent => Percent, + placeholder_open(): PlaceholderOpen => PlaceholderOpen => PlaceholderOpen, + plus(): Plus => Plus => Plus, + question_mark(): QuestionMark => QuestionMark => QuestionMark, + requirements_keyword(): RequirementsKeyword => RequirementsKeyword => RequirementsKeyword, + runtime_keyword(): RuntimeKeyword => RuntimeKeyword => RuntimeKeyword, + scatter_keyword(): ScatterKeyword => ScatterKeyword => ScatterKeyword, + single_quote(): SingleQuote => SingleQuote => SingleQuote, + slash(): Slash => Slash => Slash, + string_text(): LiteralStringText => StringText => StringText, + string_type_keyword(): StringTypeKeyword => StringTypeKeyword => StringTypeKeyword, + struct_keyword(): StructKeyword => StructKeyword => StructKeyword, + task_keyword(): TaskKeyword => TaskKeyword => TaskKeyword, + then_keyword(): ThenKeyword => ThenKeyword => ThenKeyword, + true_keyword(): TrueKeyword => TrueKeyword => TrueKeyword, + version_keyword(): VersionKeyword => VersionKeyword => VersionKeyword, + version(): Version => Version => Version, + whitespace(): Whitespace => Whitespace => Whitespace, + workflow_keyword(): WorkflowKeyword => WorkflowKeyword => WorkflowKeyword + ] +); + +/// An abstract syntax tree element. +#[derive(Clone, Debug)] +pub enum Element { + /// An abstract syntax tree node. + Node(Node), + + /// An abstract syntax tree token. + Token(Token), +} + +impl Element { + /// Attempts to get a reference to the inner [`Node`]. + /// + /// * If `self` is a [`Element::Node`], then a reference to the inner + /// [`Node`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn as_node(&self) -> Option<&Node> { + match self { + Self::Node(node) => Some(node), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`Node`]. + /// + /// * If `self` is a [`Element::Node`], then the inner [`Node`] wrapped in + /// [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn into_node(self) -> Option { + match self { + Self::Node(node) => Some(node), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`Node`]. + /// + /// # Panics + /// + /// If `self` is not a [`Element::Node`]. + pub fn unwrap_node(self) -> Node { + self.into_node() + .expect("expected `Element::Node` but got a different variant") + } + + /// Attempts to get a reference to the inner [`Token`]. + /// + /// * If `self` is a [`Element::Token`], then a reference to the inner + /// [`Token`] wrapped in [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn as_token(&self) -> Option<&Token> { + match self { + Self::Token(token) => Some(token), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`Token`]. + /// + /// * If `self` is a [`Element::Token`], then the inner [`Token`] wrapped in + /// [`Some`] is returned. + /// * Else, [`None`] is returned. + pub fn into_token(self) -> Option { + match self { + Self::Token(token) => Some(token), + _ => None, + } + } + + /// Consumes `self` and returns the inner [`Token`]. + /// + /// # Panics + /// + /// If `self` is not a [`Element::Token`]. + pub fn unwrap_token(self) -> Token { + self.into_token() + .expect("expected `Element::Token` but got a different variant") + } + + /// Gets the underlying [`SyntaxElement`] from the [`Element`]. + pub fn syntax(&self) -> SyntaxElement { + match self { + Element::Node(node) => SyntaxElement::Node(node.syntax().clone()), + Element::Token(token) => SyntaxElement::Token(token.syntax().clone()), + } + } + + /// Gets the underlying [`SyntaxKind`] from the [`Element`]. + pub fn kind(&self) -> SyntaxKind { + match self { + Element::Node(node) => node.syntax().kind(), + Element::Token(token) => token.syntax().kind(), + } + } + + /// Returns whether the [`SyntaxElement`] represents trivia. + pub fn is_trivia(&self) -> bool { + match self { + Element::Node(node) => node.syntax().kind().is_trivia(), + Element::Token(token) => token.syntax().kind().is_trivia(), + } + } + + /// Casts a [`SyntaxElement`] to an [`Element`]. + /// + /// This is expected to always succeed, as any [`SyntaxElement`] _should_ + /// have a corresponding [`Element`] (and, if it doesn't, that's very + /// likely a bug). + pub fn cast(element: SyntaxElement) -> Self { + match &element { + NodeOrToken::Node(_) => { + Self::Node(Node::cast(element).expect("a syntax node should cast to a Node")) + } + NodeOrToken::Token(_) => { + Self::Token(Token::cast(element).expect("a syntax token should cast to a Token")) + } + } + } +} diff --git a/wdl-ast/src/lib.rs b/wdl-ast/src/lib.rs index 460e0fb4..5618ebe9 100644 --- a/wdl-ast/src/lib.rs +++ b/wdl-ast/src/lib.rs @@ -49,9 +49,11 @@ pub use wdl_grammar::Severity; pub use wdl_grammar::Span; pub use wdl_grammar::SupportedVersion; pub use wdl_grammar::SyntaxElement; +pub use wdl_grammar::SyntaxExt; pub use wdl_grammar::SyntaxKind; pub use wdl_grammar::SyntaxNode; pub use wdl_grammar::SyntaxToken; +pub use wdl_grammar::SyntaxTokenExt; pub use wdl_grammar::SyntaxTree; pub use wdl_grammar::ToSpan; pub use wdl_grammar::WorkflowDescriptionLanguage; @@ -59,9 +61,11 @@ pub use wdl_grammar::version; pub mod v1; +mod element; mod validation; mod visitor; +pub use element::*; pub use validation::*; pub use visitor::*; @@ -182,6 +186,21 @@ pub trait AstToken { } } +/// Finds the first child that casts to a particular [`AstToken`]. +pub fn token_child(parent: &SyntaxNode) -> Option { + parent + .children_with_tokens() + .filter_map(|c| c.into_token()) + .find_map(T::cast) +} + +/// Finds all children that cast to a particular [`AstToken`]. +pub fn token_children(parent: &SyntaxNode) -> impl Iterator { + parent + .children_with_tokens() + .filter_map(|c| c.into_token().and_then(T::cast)) +} + /// Represents the AST of a [Document]. /// /// See [Document::ast]. @@ -230,6 +249,27 @@ impl Ast { pub struct Document(SyntaxNode); impl Document { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Document`]. + pub fn can_cast(kind: SyntaxKind) -> bool { + kind == SyntaxKind::RootNode + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Document`]. + pub fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self(syntax)) + } else { + None + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + &self.0 + } + /// Parses a document from the given source. /// /// A document and its AST elements are trivially cloned. @@ -291,26 +331,6 @@ impl Document { } } -impl AstNode for Document { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool { - kind == SyntaxKind::RootNode - } - - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self(syntax)) - } else { - None - } - } - - fn syntax(&self) -> &SyntaxNode { - &self.0 - } -} - impl fmt::Debug for Document { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) @@ -318,7 +338,7 @@ impl fmt::Debug for Document { } /// Represents a whitespace token in the AST. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Whitespace(SyntaxToken); impl AstToken for Whitespace { @@ -380,6 +400,11 @@ impl VersionStatement { pub fn version(&self) -> Version { token(&self.0).expect("version statement must have a version token") } + + /// Gets the version keyword of the version statement. + pub fn keyword(&self) -> v1::VersionKeyword { + token(&self.0).expect("version statement must have a version keyword") + } } impl AstNode for VersionStatement { @@ -408,7 +433,7 @@ impl AstNode for VersionStatement { } /// Represents a version in the AST. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Version(SyntaxToken); impl AstToken for Version { diff --git a/wdl-ast/src/v1.rs b/wdl-ast/src/v1.rs index d434f1db..93dfcc5c 100644 --- a/wdl-ast/src/v1.rs +++ b/wdl-ast/src/v1.rs @@ -12,6 +12,7 @@ mod expr; mod import; mod r#struct; mod task; +mod tokens; mod workflow; pub use decls::*; @@ -19,6 +20,7 @@ pub use expr::*; pub use import::*; pub use r#struct::*; pub use task::*; +pub use tokens::*; pub use workflow::*; /// Represents a WDL V1 Abstract Syntax Tree (AST). @@ -39,8 +41,8 @@ pub struct Ast(SyntaxNode); impl Ast { /// Gets all of the document items in the AST. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + DocumentItem::children(&self.0) } /// Gets the import statements in the AST. @@ -102,10 +104,10 @@ pub enum DocumentItem { Workflow(WorkflowDefinition), } -impl AstNode for DocumentItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl DocumentItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`DocumentItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -118,25 +120,150 @@ impl AstNode for DocumentItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`DocumentItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::ImportStatementNode => Some(Self::Import(ImportStatement(syntax))), - SyntaxKind::StructDefinitionNode => Some(Self::Struct(StructDefinition(syntax))), - SyntaxKind::TaskDefinitionNode => Some(Self::Task(TaskDefinition(syntax))), - SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow(WorkflowDefinition(syntax))), + SyntaxKind::ImportStatementNode => Some(Self::Import( + ImportStatement::cast(syntax).expect("import statement to cast"), + )), + SyntaxKind::StructDefinitionNode => Some(Self::Struct( + StructDefinition::cast(syntax).expect("struct definition to cast"), + )), + SyntaxKind::TaskDefinitionNode => Some(Self::Task( + TaskDefinition::cast(syntax).expect("task definition to cast"), + )), + SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow( + WorkflowDefinition::cast(syntax).expect("workflow definition to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Import(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + Self::Task(element) => element.syntax(), + Self::Workflow(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`ImportStatement`]. + /// + /// * If `self` is a [`DocumentItem::Import`], then a reference to the inner + /// [`ImportStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_import_statement(&self) -> Option<&ImportStatement> { + match self { + DocumentItem::Import(import) => Some(import), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ImportStatement`]. + /// + /// * If `self` is a [`DocumentItem::Import`], then the inner + /// [`ImportStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_import_statement(self) -> Option { + match self { + DocumentItem::Import(import) => Some(import), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`StructDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Struct`], then a reference to the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_struct_definition(&self) -> Option<&StructDefinition> { + match self { + DocumentItem::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`StructDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Struct`], then the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_struct_definition(self) -> Option { match self { - Self::Import(i) => &i.0, - Self::Struct(s) => &s.0, - Self::Task(t) => &t.0, - Self::Workflow(w) => &w.0, + DocumentItem::Struct(r#struct) => Some(r#struct), + _ => None, } } + + /// Attempts to get a reference to the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Task`], then a reference to the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task_definition(&self) -> Option<&TaskDefinition> { + match self { + DocumentItem::Task(task) => Some(task), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Task`], then the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task_definition(self) -> Option { + match self { + DocumentItem::Task(task) => Some(task), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Workflow`], then a reference to the + /// inner [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow_definition(&self) -> Option<&WorkflowDefinition> { + match self { + DocumentItem::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`DocumentItem::Workflow`], then the inner + /// [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow_definition(self) -> Option { + match self { + DocumentItem::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`DocumentItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`DocumentItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`DocumentItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`DocumentItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } diff --git a/wdl-ast/src/v1/decls.rs b/wdl-ast/src/v1/decls.rs index 45e78f09..f35d35cc 100644 --- a/wdl-ast/src/v1/decls.rs +++ b/wdl-ast/src/v1/decls.rs @@ -10,7 +10,6 @@ use crate::SyntaxKind; use crate::SyntaxNode; use crate::WorkflowDescriptionLanguage; use crate::support; -use crate::support::child; use crate::token; /// Represents a `Map` type. @@ -87,7 +86,7 @@ pub struct ArrayType(SyntaxNode); impl ArrayType { /// Gets the element type of the array. pub fn element_type(&self) -> Type { - child(&self.0).expect("array should have an element type") + Type::child(&self.0).expect("array should have an element type") } /// Determines if the type has the "non-empty" qualifier. @@ -444,6 +443,61 @@ pub enum Type { } impl Type { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Type`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::MapTypeNode + | SyntaxKind::ArrayTypeNode + | SyntaxKind::PairTypeNode + | SyntaxKind::ObjectTypeNode + | SyntaxKind::TypeRefNode + | SyntaxKind::PrimitiveTypeNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Type`]. + pub fn cast(syntax: SyntaxNode) -> Option { + match syntax.kind() { + SyntaxKind::MapTypeNode => { + Some(Self::Map(MapType::cast(syntax).expect("map type to cast"))) + } + SyntaxKind::ArrayTypeNode => Some(Self::Array( + ArrayType::cast(syntax).expect("array type to cast"), + )), + SyntaxKind::PairTypeNode => Some(Self::Pair( + PairType::cast(syntax).expect("pair type to cast"), + )), + SyntaxKind::ObjectTypeNode => Some(Self::Object( + ObjectType::cast(syntax).expect("object type to cast"), + )), + SyntaxKind::TypeRefNode => { + Some(Self::Ref(TypeRef::cast(syntax).expect("type ref to cast"))) + } + SyntaxKind::PrimitiveTypeNode => Some(Self::Primitive( + PrimitiveType::cast(syntax).expect("primitive type to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Type::Map(element) => element.syntax(), + Type::Array(element) => element.syntax(), + Type::Pair(element) => element.syntax(), + Type::Object(element) => element.syntax(), + Type::Ref(element) => element.syntax(), + Type::Primitive(element) => element.syntax(), + } + } + /// Determines if the type is optional. pub fn is_optional(&self) -> bool { match self { @@ -456,6 +510,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`MapType`]. + /// + /// * If `self` is a [`Type::Map`], then a reference to the inner + /// [`MapType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_map_type(&self) -> Option<&MapType> { + match self { + Self::Map(map) => Some(map), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MapType`]. + /// + /// * If `self` is a [`Type::Map`], then the inner [`MapType`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_map_type(self) -> Option { + match self { + Self::Map(map) => Some(map), + _ => None, + } + } + /// Unwraps the type into a map type. /// /// # Panics @@ -468,6 +546,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`ArrayType`]. + /// + /// * If `self` is a [`Type::Array`], then a reference to the inner + /// [`ArrayType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_array_type(&self) -> Option<&ArrayType> { + match self { + Self::Array(array) => Some(array), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ArrayType`]. + /// + /// * If `self` is a [`Type::Array`], then the inner [`ArrayType`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_array_type(self) -> Option { + match self { + Self::Array(array) => Some(array), + _ => None, + } + } + /// Unwraps the type into an array type. /// /// # Panics @@ -480,6 +582,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`PairType`]. + /// + /// * If `self` is a [`Type::Pair`], then a reference to the inner + /// [`PairType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_pair_type(&self) -> Option<&PairType> { + match self { + Self::Pair(pair) => Some(pair), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`PairType`]. + /// + /// * If `self` is a [`Type::Pair`], then the inner [`PairType`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_pair_type(self) -> Option { + match self { + Self::Pair(pair) => Some(pair), + _ => None, + } + } + /// Unwraps the type into a pair type. /// /// # Panics @@ -492,6 +618,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`ObjectType`]. + /// + /// * If `self` is a [`Type::Object`], then a reference to the inner + /// [`ObjectType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_object_type(&self) -> Option<&ObjectType> { + match self { + Self::Object(object) => Some(object), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`ObjectType`]. + /// + /// * If `self` is a [`Type::Object`], then the inner [`ObjectType`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_object_type(self) -> Option { + match self { + Self::Object(object) => Some(object), + _ => None, + } + } + /// Unwraps the type into an object type. /// /// # Panics @@ -504,6 +654,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`TypeRef`]. + /// + /// * If `self` is a [`Type::Ref`], then a reference to the inner + /// [`TypeRef`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_type_ref(&self) -> Option<&TypeRef> { + match self { + Self::Ref(type_ref) => Some(type_ref), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TypeRef`]. + /// + /// * If `self` is a [`Type::Ref`], then the inner [`TypeRef`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_type_ref(self) -> Option { + match self { + Self::Ref(type_ref) => Some(type_ref), + _ => None, + } + } + /// Unwraps the type into a type reference. /// /// # Panics @@ -516,6 +690,30 @@ impl Type { } } + /// Attempts to get a reference to the inner [`PrimitiveType`]. + /// + /// * If `self` is a [`Type::Primitive`], then a reference to the inner + /// [`PrimitiveType`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_primitive_type(&self) -> Option<&PrimitiveType> { + match self { + Self::Primitive(primitive) => Some(primitive), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`PrimitiveType`]. + /// + /// * If `self` is a [`Type::Primitive`], then the inner [`PrimitiveType`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_primitive_type(self) -> Option { + match self { + Self::Primitive(primitive) => Some(primitive), + _ => None, + } + } + /// Unwraps the type into a primitive type. /// /// # Panics @@ -527,50 +725,23 @@ impl Type { _ => panic!("not a primitive type"), } } -} - -impl AstNode for Type { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::MapTypeNode - | SyntaxKind::ArrayTypeNode - | SyntaxKind::PairTypeNode - | SyntaxKind::ObjectTypeNode - | SyntaxKind::TypeRefNode - | SyntaxKind::PrimitiveTypeNode - ) - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::MapTypeNode => Some(Self::Map(MapType(syntax))), - SyntaxKind::ArrayTypeNode => Some(Self::Array(ArrayType(syntax))), - SyntaxKind::PairTypeNode => Some(Self::Pair(PairType(syntax))), - SyntaxKind::ObjectTypeNode => Some(Self::Object(ObjectType(syntax))), - SyntaxKind::TypeRefNode => Some(Self::Ref(TypeRef(syntax))), - SyntaxKind::PrimitiveTypeNode => Some(Self::Primitive(PrimitiveType(syntax))), - _ => None, - } + /// Finds the first child that can be cast to a [`Type`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Type`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Type::Map(m) => &m.0, - Type::Array(a) => &a.0, - Type::Pair(p) => &p.0, - Type::Object(o) => &o.0, - Type::Ref(r) => &r.0, - Type::Primitive(t) => &t.0, - } + /// Finds all children that can be cast to a [`Type`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Type`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -594,7 +765,7 @@ pub struct UnboundDecl(pub(crate) SyntaxNode); impl UnboundDecl { /// Gets the type of the declaration. pub fn ty(&self) -> Type { - child(&self.0).expect("unbound declaration should have a type") + Type::child(&self.0).expect("unbound declaration should have a type") } /// Gets the name of the declaration. @@ -635,7 +806,7 @@ pub struct BoundDecl(pub(crate) SyntaxNode); impl BoundDecl { /// Gets the type of the declaration. pub fn ty(&self) -> Type { - child(&self.0).expect("bound declaration should have a type") + Type::child(&self.0).expect("bound declaration should have a type") } /// Gets the name of the declaration. @@ -645,7 +816,7 @@ impl BoundDecl { /// Gets the expression the declaration is bound to. pub fn expr(&self) -> Expr { - child(&self.0).expect("bound declaration should have an expression") + Expr::child(&self.0).expect("bound declaration should have an expression") } } @@ -684,6 +855,40 @@ pub enum Decl { } impl Decl { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Decl`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + kind == SyntaxKind::BoundDeclNode || kind == SyntaxKind::UnboundDeclNode + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Decl`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::BoundDeclNode => Some(Self::Bound( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), + SyntaxKind::UnboundDeclNode => Some(Self::Unbound( + UnboundDecl::cast(syntax).expect("unbound decl to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Bound(element) => element.syntax(), + Self::Unbound(element) => element.syntax(), + } + } + /// Gets the type of the declaration. pub fn ty(&self) -> Type { match self { @@ -710,6 +915,30 @@ impl Decl { } } + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`Decl::Bound`], then a reference to the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_bound_decl(&self) -> Option<&BoundDecl> { + match self { + Self::Bound(bound_decl) => Some(bound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`Decl::Bound`], then the inner [`BoundDecl`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_bound_decl(self) -> Option { + match self { + Self::Bound(bound_decl) => Some(bound_decl), + _ => None, + } + } + /// Unwraps the declaration into a bound declaration. /// /// # Panics @@ -722,6 +951,30 @@ impl Decl { } } + /// Attempts to get a reference to the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`Decl::Unbound`], then a reference to the inner + /// [`UnboundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_unbound_decl(&self) -> Option<&UnboundDecl> { + match self { + Self::Unbound(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`Decl::Unbound`], then the inner [`UnboundDecl`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_unbound_decl(self) -> Option { + match self { + Self::Unbound(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + /// Unwraps the declaration into an unbound declaration. /// /// # Panics @@ -733,34 +986,23 @@ impl Decl { _ => panic!("not an unbound declaration"), } } -} - -impl AstNode for Decl { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - kind == SyntaxKind::BoundDeclNode || kind == SyntaxKind::UnboundDeclNode - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::BoundDeclNode => Some(Self::Bound(BoundDecl(syntax))), - SyntaxKind::UnboundDeclNode => Some(Self::Unbound(UnboundDecl(syntax))), - _ => None, - } + /// Finds the first child that can be cast to a [`Decl`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Decl`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Bound(b) => &b.0, - Self::Unbound(u) => &u.0, - } + /// Finds all children that can be cast to a [`Decl`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Decl`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } diff --git a/wdl-ast/src/v1/expr.rs b/wdl-ast/src/v1/expr.rs index f9178309..8d492b73 100644 --- a/wdl-ast/src/v1/expr.rs +++ b/wdl-ast/src/v1/expr.rs @@ -13,6 +13,7 @@ use crate::support; use crate::support::child; use crate::support::children; use crate::token; +use crate::token_child; /// Represents an expression. #[derive(Clone, Debug, PartialEq, Eq)] @@ -66,24 +67,172 @@ pub enum Expr { } impl Expr { - /// Attempts to reference a literal expression. - /// - /// - If the value is a literal expression, `Some()` is returned. - /// - Else, `None` is returned. + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`Expr`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + if LiteralExpr::can_cast(kind) { + return true; + } + + matches!( + kind, + SyntaxKind::NameRefNode + | SyntaxKind::ParenthesizedExprNode + | SyntaxKind::IfExprNode + | SyntaxKind::LogicalNotExprNode + | SyntaxKind::NegationExprNode + | SyntaxKind::LogicalOrExprNode + | SyntaxKind::LogicalAndExprNode + | SyntaxKind::EqualityExprNode + | SyntaxKind::InequalityExprNode + | SyntaxKind::LessExprNode + | SyntaxKind::LessEqualExprNode + | SyntaxKind::GreaterExprNode + | SyntaxKind::GreaterEqualExprNode + | SyntaxKind::AdditionExprNode + | SyntaxKind::SubtractionExprNode + | SyntaxKind::MultiplicationExprNode + | SyntaxKind::DivisionExprNode + | SyntaxKind::ModuloExprNode + | SyntaxKind::ExponentiationExprNode + | SyntaxKind::CallExprNode + | SyntaxKind::IndexExprNode + | SyntaxKind::AccessExprNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`Expr`]. + pub fn cast(syntax: SyntaxNode) -> Option { + if LiteralExpr::can_cast(syntax.kind()) { + return Some(Self::Literal( + LiteralExpr::cast(syntax).expect("literal expr should cast"), + )); + } + + match syntax.kind() { + SyntaxKind::NameRefNode => Some(Self::Name( + NameRef::cast(syntax).expect("name ref should cast"), + )), + SyntaxKind::ParenthesizedExprNode => Some(Self::Parenthesized( + ParenthesizedExpr::cast(syntax).expect("parenthesized expr should cast"), + )), + SyntaxKind::IfExprNode => { + Some(Self::If(IfExpr::cast(syntax).expect("if expr should cast"))) + } + SyntaxKind::LogicalNotExprNode => Some(Self::LogicalNot( + LogicalNotExpr::cast(syntax).expect("logical not expr should cast"), + )), + SyntaxKind::NegationExprNode => Some(Self::Negation( + NegationExpr::cast(syntax).expect("negation expr should cast"), + )), + SyntaxKind::LogicalOrExprNode => Some(Self::LogicalOr( + LogicalOrExpr::cast(syntax).expect("logical or expr should cast"), + )), + SyntaxKind::LogicalAndExprNode => Some(Self::LogicalAnd( + LogicalAndExpr::cast(syntax).expect("logical and expr should cast"), + )), + SyntaxKind::EqualityExprNode => Some(Self::Equality( + EqualityExpr::cast(syntax).expect("equality expr should cast"), + )), + SyntaxKind::InequalityExprNode => Some(Self::Inequality( + InequalityExpr::cast(syntax).expect("inequality expr should cast"), + )), + SyntaxKind::LessExprNode => Some(Self::Less( + LessExpr::cast(syntax).expect("less expr should cast"), + )), + SyntaxKind::LessEqualExprNode => Some(Self::LessEqual( + LessEqualExpr::cast(syntax).expect("less equal expr should cast"), + )), + SyntaxKind::GreaterExprNode => Some(Self::Greater( + GreaterExpr::cast(syntax).expect("greater expr should cast"), + )), + SyntaxKind::GreaterEqualExprNode => Some(Self::GreaterEqual( + GreaterEqualExpr::cast(syntax).expect("greater equal expr should cast"), + )), + SyntaxKind::AdditionExprNode => Some(Self::Addition( + AdditionExpr::cast(syntax).expect("addition expr should cast"), + )), + SyntaxKind::SubtractionExprNode => Some(Self::Subtraction( + SubtractionExpr::cast(syntax).expect("subtraction expr should cast"), + )), + SyntaxKind::MultiplicationExprNode => Some(Self::Multiplication( + MultiplicationExpr::cast(syntax).expect("multiplication expr should cast"), + )), + SyntaxKind::DivisionExprNode => Some(Self::Division( + DivisionExpr::cast(syntax).expect("division expr should cast"), + )), + SyntaxKind::ModuloExprNode => Some(Self::Modulo( + ModuloExpr::cast(syntax).expect("modulo expr should cast"), + )), + SyntaxKind::ExponentiationExprNode => Some(Self::Exponentiation( + ExponentiationExpr::cast(syntax).expect("exponentiation expr should cast"), + )), + SyntaxKind::CallExprNode => Some(Self::Call( + CallExpr::cast(syntax).expect("call expr should cast"), + )), + SyntaxKind::IndexExprNode => Some(Self::Index( + IndexExpr::cast(syntax).expect("index expr should cast"), + )), + SyntaxKind::AccessExprNode => Some(Self::Access( + AccessExpr::cast(syntax).expect("access expr should cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Expr::Literal(element) => element.syntax(), + Expr::Name(element) => element.syntax(), + Expr::Parenthesized(element) => element.syntax(), + Expr::If(element) => element.syntax(), + Expr::LogicalNot(element) => element.syntax(), + Expr::Negation(element) => element.syntax(), + Expr::LogicalOr(element) => element.syntax(), + Expr::LogicalAnd(element) => element.syntax(), + Expr::Equality(element) => element.syntax(), + Expr::Inequality(element) => element.syntax(), + Expr::Less(element) => element.syntax(), + Expr::LessEqual(element) => element.syntax(), + Expr::Greater(element) => element.syntax(), + Expr::GreaterEqual(element) => element.syntax(), + Expr::Addition(element) => element.syntax(), + Expr::Subtraction(element) => element.syntax(), + Expr::Multiplication(element) => element.syntax(), + Expr::Division(element) => element.syntax(), + Expr::Modulo(element) => element.syntax(), + Expr::Exponentiation(element) => element.syntax(), + Expr::Call(element) => element.syntax(), + Expr::Index(element) => element.syntax(), + Expr::Access(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`LiteralExpr`]. + /// + /// * If `self` is a [`Expr::Literal`], then a reference to the inner + /// [`LiteralExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_literal(&self) -> Option<&LiteralExpr> { match self { - Self::Literal(expr) => Some(expr), + Self::Literal(literal) => Some(literal), _ => None, } } - /// Consumes `self` and attempts to return a literal expression. + /// Consumes `self` and attempts to return the inner [`LiteralExpr`]. /// - /// - If the value is a literal expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Literal`], then the inner [`LiteralExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_literal(self) -> Option { match self { - Self::Literal(expr) => Some(expr), + Self::Literal(literal) => Some(literal), _ => None, } } @@ -100,24 +249,26 @@ impl Expr { } } - /// Attempts to reference a name reference. + /// Attempts to get a reference to the inner [`NameRef`]. /// - /// - If the value is a name reference, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Name`], then a reference to the inner + /// [`NameRef`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_name_ref(&self) -> Option<&NameRef> { match self { - Self::Name(expr) => Some(expr), + Self::Name(name_ref) => Some(name_ref), _ => None, } } - /// Consumes `self` and attempts to return a name reference. + /// Consumes `self` and attempts to return the inner [`NameRef`]. /// - /// - If the value is a name reference, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Name`], then the inner [`NameRef`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_name_ref(self) -> Option { match self { - Self::Name(expr) => Some(expr), + Self::Name(name_ref) => Some(name_ref), _ => None, } } @@ -134,24 +285,26 @@ impl Expr { } } - /// Attempts to reference a parenthesized expression. + /// Attempts to get a reference to the inner [`ParenthesizedExpr`]. /// - /// - If the value is a parenthesized expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Parenthesized`], then a reference to the inner + /// [`ParenthesizedExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_parenthesized(&self) -> Option<&ParenthesizedExpr> { match self { - Self::Parenthesized(expr) => Some(expr), + Self::Parenthesized(parenthesized) => Some(parenthesized), _ => None, } } - /// Consumes `self` and attempts to return a parenthesized expression. + /// Consumes `self` and attempts to return the inner [`ParenthesizedExpr`]. /// - /// - If the value is a parenthesized expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Parenthesized`], then the inner + /// [`ParenthesizedExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_parenthesized(self) -> Option { match self { - Self::Parenthesized(expr) => Some(expr), + Self::Parenthesized(parenthesized) => Some(parenthesized), _ => None, } } @@ -168,24 +321,26 @@ impl Expr { } } - /// Attempts to reference an `if` expression. + /// Attempts to get a reference to the inner [`IfExpr`]. /// - /// - If the value is an `if` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::If`], then a reference to the inner [`IfExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_if(&self) -> Option<&IfExpr> { match self { - Self::If(expr) => Some(expr), + Self::If(r#if) => Some(r#if), _ => None, } } - /// Consumes `self` and attempts to return an `if` expression. + /// Consumes `self` and attempts to return the inner [`IfExpr`]. /// - /// - If the value is an `if` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::If`], then the inner [`IfExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_if(self) -> Option { match self { - Self::If(expr) => Some(expr), + Self::If(r#if) => Some(r#if), _ => None, } } @@ -202,24 +357,26 @@ impl Expr { } } - /// Attempts to reference a logical `not` expression. + /// Attempts to get a reference to the inner [`LogicalNotExpr`]. /// - /// - If the value is a logical `not` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalNot`], then a reference to the inner + /// [`LogicalNotExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_not(&self) -> Option<&LogicalNotExpr> { match self { - Self::LogicalNot(expr) => Some(expr), + Self::LogicalNot(logical_not) => Some(logical_not), _ => None, } } - /// Consumes `self` and attempts to return a logical `not` expression. + /// Consumes `self` and attempts to return the inner [`LogicalNotExpr`]. /// - /// - If the value is a logical `not` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalNot`], then the inner [`LogicalNotExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_not(self) -> Option { match self { - Self::LogicalNot(expr) => Some(expr), + Self::LogicalNot(logical_not) => Some(logical_not), _ => None, } } @@ -236,24 +393,26 @@ impl Expr { } } - /// Attempts to reference a negation expression. + /// Attempts to get a reference to the inner [`NegationExpr`]. /// - /// - If the value is a negation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Negation`], then a reference to the inner + /// [`NegationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_negation(&self) -> Option<&NegationExpr> { match self { - Self::Negation(expr) => Some(expr), + Self::Negation(negation) => Some(negation), _ => None, } } - /// Consumes `self` and attempts to return a negation expression. + /// Consumes `self` and attempts to return the inner [`NegationExpr`]. /// - /// - If the value is a negation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Negation`], then the inner [`NegationExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_negation(self) -> Option { match self { - Self::Negation(expr) => Some(expr), + Self::Negation(negation) => Some(negation), _ => None, } } @@ -270,24 +429,26 @@ impl Expr { } } - /// Attempts to reference a logical `or` expression. + /// Attempts to get a reference to the inner [`LogicalOrExpr`]. /// - /// - If the value is a logical `or` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalOr`], then a reference to the inner + /// [`LogicalOrExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_or(&self) -> Option<&LogicalOrExpr> { match self { - Self::LogicalOr(expr) => Some(expr), + Self::LogicalOr(logical_or) => Some(logical_or), _ => None, } } - /// Consumes `self` and attempts to return a logical `or` expression. + /// Consumes `self` and attempts to return the inner [`LogicalOrExpr`]. /// - /// - If the value is a logical `or` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalOr`], then the inner [`LogicalOrExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_or(self) -> Option { match self { - Self::LogicalOr(expr) => Some(expr), + Self::LogicalOr(logical_or) => Some(logical_or), _ => None, } } @@ -304,24 +465,26 @@ impl Expr { } } - /// Attempts to reference a logical `and` expression. + /// Attempts to get a reference to the inner [`LogicalAndExpr`]. /// - /// - If the value is a logical `and` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalAnd`], then a reference to the inner + /// [`LogicalAndExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_logical_and(&self) -> Option<&LogicalAndExpr> { match self { - Self::LogicalAnd(expr) => Some(expr), + Self::LogicalAnd(logical_and) => Some(logical_and), _ => None, } } - /// Consumes `self` and attempts to return a logical `and` expression. + /// Consumes `self` and attempts to return the inner [`LogicalAndExpr`]. /// - /// - If the value is a logical `and` expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LogicalAnd`], then the inner [`LogicalAndExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_logical_and(self) -> Option { match self { - Self::LogicalAnd(expr) => Some(expr), + Self::LogicalAnd(logical_and) => Some(logical_and), _ => None, } } @@ -338,24 +501,26 @@ impl Expr { } } - /// Attempts to reference an equality expression. + /// Attempts to get a reference to the inner [`EqualityExpr`]. /// - /// - If the value is an equality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Equality`], then a reference to the inner + /// [`EqualityExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_equality(&self) -> Option<&EqualityExpr> { match self { - Self::Equality(expr) => Some(expr), + Self::Equality(equality) => Some(equality), _ => None, } } - /// Consumes `self` and attempts to return an equality expression. + /// Consumes `self` and attempts to return the inner [`EqualityExpr`]. /// - /// - If the value is an equality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Equality`], then the inner [`EqualityExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_equality(self) -> Option { match self { - Self::Equality(expr) => Some(expr), + Self::Equality(equality) => Some(equality), _ => None, } } @@ -372,24 +537,26 @@ impl Expr { } } - /// Attempts to reference an inequality expression. + /// Attempts to get a reference to the inner [`InequalityExpr`]. /// - /// - If the value is an inequality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Inequality`], then a reference to the inner + /// [`InequalityExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_inequality(&self) -> Option<&InequalityExpr> { match self { - Self::Inequality(expr) => Some(expr), + Self::Inequality(inequality) => Some(inequality), _ => None, } } - /// Consumes `self` and attempts to return an inequality expression. + /// Consumes `self` and attempts to return the inner [`InequalityExpr`]. /// - /// - If the value is an inequality expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Inequality`], then the inner [`InequalityExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_inequality(self) -> Option { match self { - Self::Inequality(expr) => Some(expr), + Self::Inequality(inequality) => Some(inequality), _ => None, } } @@ -406,24 +573,26 @@ impl Expr { } } - /// Attempts to reference a "less than" expression. + /// Attempts to get a reference to the inner [`LessExpr`]. /// - /// - If the value is a "less than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Less`], then a reference to the inner + /// [`LessExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_less(&self) -> Option<&LessExpr> { match self { - Self::Less(expr) => Some(expr), + Self::Less(less) => Some(less), _ => None, } } - /// Consumes `self` and attempts to return a "less than" expression. + /// Consumes `self` and attempts to return the inner [`LessExpr`]. /// - /// - If the value is a "less than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Less`], then the inner [`LessExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_less(self) -> Option { match self { - Self::Less(expr) => Some(expr), + Self::Less(less) => Some(less), _ => None, } } @@ -440,27 +609,26 @@ impl Expr { } } - /// Attempts to reference a "less than or equal to" expression. + /// Attempts to get a reference to the inner [`LessEqualExpr`]. /// - /// - If the value is a "less than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LessEqual`], then a reference to the inner + /// [`LessEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_less_equal(&self) -> Option<&LessEqualExpr> { match self { - Self::LessEqual(expr) => Some(expr), + Self::LessEqual(less_equal) => Some(less_equal), _ => None, } } - /// Consumes `self` and attempts to return a "less than or equal to" - /// expression. + /// Consumes `self` and attempts to return the inner [`LessEqualExpr`]. /// - /// - If the value is a "less than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::LessEqual`], then the inner [`LessEqualExpr`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_less_equal(self) -> Option { match self { - Self::LessEqual(expr) => Some(expr), + Self::LessEqual(less_equal) => Some(less_equal), _ => None, } } @@ -477,24 +645,26 @@ impl Expr { } } - /// Attempts to reference a "greater than" expression. + /// Attempts to get a reference to the inner [`GreaterExpr`]. /// - /// - If the value is a "greater than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Greater`], then a reference to the inner + /// [`GreaterExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_greater(&self) -> Option<&GreaterExpr> { match self { - Self::Greater(expr) => Some(expr), + Self::Greater(greater) => Some(greater), _ => None, } } - /// Consumes `self` and attempts to return a "greater than" expression. + /// Consumes `self` and attempts to return the inner [`GreaterExpr`]. /// - /// - If the value is a "greater than" expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Greater`], then the inner [`GreaterExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_greater(self) -> Option { match self { - Self::Greater(expr) => Some(expr), + Self::Greater(greater) => Some(greater), _ => None, } } @@ -511,27 +681,26 @@ impl Expr { } } - /// Attempts to reference a "greater than or equal to" expression. + /// Attempts to get a reference to the inner [`GreaterEqualExpr`]. /// - /// - If the value is a "greater than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::GreaterEqual`], then a reference to the inner + /// [`GreaterEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_greater_equal(&self) -> Option<&GreaterEqualExpr> { match self { - Self::GreaterEqual(expr) => Some(expr), + Self::GreaterEqual(greater_equal) => Some(greater_equal), _ => None, } } - /// Consumes `self` and attempts to return a "greater than or equal to" - /// expression. + /// Consumes `self` and attempts to return the inner [`GreaterEqualExpr`]. /// - /// - If the value is a "greater than or equal to" expression, `Some()` is - /// returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::GreaterEqual`], then the inner + /// [`GreaterEqualExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_greater_equal(self) -> Option { match self { - Self::GreaterEqual(expr) => Some(expr), + Self::GreaterEqual(greater_equal) => Some(greater_equal), _ => None, } } @@ -548,24 +717,26 @@ impl Expr { } } - /// Attempts to reference an addition expression. + /// Attempts to get a reference to the inner [`AdditionExpr`]. /// - /// - If the value is an addition expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Addition`], then a reference to the inner + /// [`AdditionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_addition(&self) -> Option<&AdditionExpr> { match self { - Self::Addition(expr) => Some(expr), + Self::Addition(addition) => Some(addition), _ => None, } } - /// Consumes `self` and attempts to return an addition expression. + /// Consumes `self` and attempts to return the inner [`AdditionExpr`]. /// - /// - If the value is an addition expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Addition`], then the inner [`AdditionExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_addition(self) -> Option { match self { - Self::Addition(expr) => Some(expr), + Self::Addition(addition) => Some(addition), _ => None, } } @@ -582,24 +753,26 @@ impl Expr { } } - /// Attempts to reference a subtraction expression. + /// Attempts to get a reference to the inner [`SubtractionExpr`]. /// - /// - If the value is a subtraction expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Subtraction`], then a reference to the inner + /// [`SubtractionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_subtraction(&self) -> Option<&SubtractionExpr> { match self { - Self::Subtraction(expr) => Some(expr), + Self::Subtraction(subtraction) => Some(subtraction), _ => None, } } - /// Consumes `self` and attempts to return a subtraction expression. + /// Consumes `self` and attempts to return the inner [`SubtractionExpr`]. /// - /// - If the value is a subtraction expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Subtraction`], then the inner + /// [`SubtractionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_subtraction(self) -> Option { match self { - Self::Subtraction(expr) => Some(expr), + Self::Subtraction(subtraction) => Some(subtraction), _ => None, } } @@ -616,24 +789,26 @@ impl Expr { } } - /// Attempts to reference a multiplication expression. + /// Attempts to get a reference to the inner [`MultiplicationExpr`]. /// - /// - If the value is a multiplication expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Multiplication`], then a reference to the inner + /// [`MultiplicationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_multiplication(&self) -> Option<&MultiplicationExpr> { match self { - Self::Multiplication(expr) => Some(expr), + Self::Multiplication(multiplication) => Some(multiplication), _ => None, } } - /// Consumes `self` and attempts to return a multiplication expression. + /// Consumes `self` and attempts to return the inner [`MultiplicationExpr`]. /// - /// - If the value is a multiplication expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Multiplication`], then the inner + /// [`MultiplicationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_multiplication(self) -> Option { match self { - Self::Multiplication(expr) => Some(expr), + Self::Multiplication(multiplication) => Some(multiplication), _ => None, } } @@ -650,24 +825,26 @@ impl Expr { } } - /// Attempts to reference a division expression. + /// Attempts to get a reference to the inner [`DivisionExpr`]. /// - /// - If the value is a division expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Division`], then a reference to the inner + /// [`DivisionExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_division(&self) -> Option<&DivisionExpr> { match self { - Self::Division(expr) => Some(expr), + Self::Division(division) => Some(division), _ => None, } } - /// Consumes `self` and attempts to return a division expression. + /// Consumes `self` and attempts to return the inner [`DivisionExpr`]. /// - /// - If the value is a division expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Division`], then the inner [`DivisionExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_division(self) -> Option { match self { - Self::Division(expr) => Some(expr), + Self::Division(division) => Some(division), _ => None, } } @@ -684,24 +861,26 @@ impl Expr { } } - /// Attempts to reference a modulo expression. + /// Attempts to get a reference to the inner [`ModuloExpr`]. /// - /// - If the value is a modulo expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Modulo`], then a reference to the inner + /// [`ModuloExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_modulo(&self) -> Option<&ModuloExpr> { match self { - Self::Modulo(expr) => Some(expr), + Self::Modulo(modulo) => Some(modulo), _ => None, } } - /// Consumes `self` and attempts to return a modulo expression. + /// Consumes `self` and attempts to return the inner [`ModuloExpr`]. /// - /// - If the value is a modulo expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Modulo`], then the inner [`ModuloExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_modulo(self) -> Option { match self { - Self::Modulo(expr) => Some(expr), + Self::Modulo(modulo) => Some(modulo), _ => None, } } @@ -718,24 +897,26 @@ impl Expr { } } - /// Attempts to reference an exponentiation expression. + /// Attempts to get a reference to the inner [`ExponentiationExpr`]. /// - /// - If the value is an exponentiation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Exponentiation`], then a reference to the inner + /// [`ExponentiationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_exponentiation(&self) -> Option<&ExponentiationExpr> { match self { - Self::Exponentiation(expr) => Some(expr), + Self::Exponentiation(exponentiation) => Some(exponentiation), _ => None, } } - /// Consumes `self` and attempts to return an exponentiation expression. + /// Consumes `self` and attempts to return the inner [`ExponentiationExpr`]. /// - /// - If the value is an exponentiation expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Exponentiation`], then the inner + /// [`ExponentiationExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_exponentiation(self) -> Option { match self { - Self::Exponentiation(expr) => Some(expr), + Self::Exponentiation(exponentiation) => Some(exponentiation), _ => None, } } @@ -752,24 +933,26 @@ impl Expr { } } - /// Attempts to reference a call expression. + /// Attempts to get a reference to the inner [`CallExpr`]. /// - /// - If the value is a call expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Call`], then a reference to the inner + /// [`CallExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_call(&self) -> Option<&CallExpr> { match self { - Self::Call(expr) => Some(expr), + Self::Call(call) => Some(call), _ => None, } } - /// Consumes `self` and attempts to return a call expression. + /// Consumes `self` and attempts to return the inner [`CallExpr`]. /// - /// - If the value is a call expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Call`], then the inner [`CallExpr`] is returned + /// wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_call(self) -> Option { match self { - Self::Call(expr) => Some(expr), + Self::Call(call) => Some(call), _ => None, } } @@ -786,24 +969,26 @@ impl Expr { } } - /// Attempts to reference an index expression. + /// Attempts to get a reference to the inner [`IndexExpr`]. /// - /// - If the value is an index expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Index`], then a reference to the inner + /// [`IndexExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_index(&self) -> Option<&IndexExpr> { match self { - Self::Index(expr) => Some(expr), + Self::Index(index) => Some(index), _ => None, } } - /// Consumes `self` and attempts to return an index expression. + /// Consumes `self` and attempts to return the inner [`IndexExpr`]. /// - /// - If the value is an index expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Index`], then the inner [`IndexExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_index(self) -> Option { match self { - Self::Index(expr) => Some(expr), + Self::Index(index) => Some(index), _ => None, } } @@ -820,24 +1005,26 @@ impl Expr { } } - /// Attempts to reference an access expression. + /// Attempts to get a reference to the inner [`AccessExpr`]. /// - /// - If the value is an access expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Access`], then a reference to the inner + /// [`AccessExpr`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_access(&self) -> Option<&AccessExpr> { match self { - Self::Access(expr) => Some(expr), + Self::Access(access) => Some(access), _ => None, } } - /// Consumes `self` and attempts to return an access expression. + /// Consumes `self` and attempts to return the inner [`AccessExpr`]. /// - /// - If the value is an access expression, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`Expr::Access`], then the inner [`AccessExpr`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_access(self) -> Option { match self { - Self::Access(expr) => Some(expr), + Self::Access(access) => Some(access), _ => None, } } @@ -854,6 +1041,24 @@ impl Expr { } } + /// Finds the first child that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`Expr`] to implement + /// the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`Expr`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } + /// Determines if the expression is an empty array literal or any number of /// parenthesized expressions that terminate with an empty array literal. pub fn is_empty_array_literal(&self) -> bool { @@ -1005,25 +1210,116 @@ pub enum LiteralExpr { } impl LiteralExpr { - /// Attempts to reference the expression as a literal boolean. - /// - /// - If the value is a literal boolean, `Some()` is returned. - /// - Else, `None` is returned. + /// Returns whether or not a [`SyntaxKind`] is able to be cast + /// to any of the underlying members within the [`Expr`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::LiteralBooleanNode + | SyntaxKind::LiteralIntegerNode + | SyntaxKind::LiteralFloatNode + | SyntaxKind::LiteralStringNode + | SyntaxKind::LiteralArrayNode + | SyntaxKind::LiteralPairNode + | SyntaxKind::LiteralMapNode + | SyntaxKind::LiteralObjectNode + | SyntaxKind::LiteralStructNode + | SyntaxKind::LiteralNoneNode + | SyntaxKind::LiteralHintsNode + | SyntaxKind::LiteralInputNode + | SyntaxKind::LiteralOutputNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`LiteralExpr`]. + pub fn cast(syntax: SyntaxNode) -> Option { + match syntax.kind() { + SyntaxKind::LiteralBooleanNode => Some(Self::Boolean( + LiteralBoolean::cast(syntax).expect("literal boolean to cast"), + )), + SyntaxKind::LiteralIntegerNode => Some(Self::Integer( + LiteralInteger::cast(syntax).expect("literal integer to cast"), + )), + SyntaxKind::LiteralFloatNode => Some(Self::Float( + LiteralFloat::cast(syntax).expect("literal float to cast"), + )), + SyntaxKind::LiteralStringNode => Some(Self::String( + LiteralString::cast(syntax).expect("literal string to cast"), + )), + SyntaxKind::LiteralArrayNode => Some(Self::Array( + LiteralArray::cast(syntax).expect("literal array to cast"), + )), + SyntaxKind::LiteralPairNode => Some(Self::Pair( + LiteralPair::cast(syntax).expect("literal pair to cast"), + )), + SyntaxKind::LiteralMapNode => Some(Self::Map( + LiteralMap::cast(syntax).expect("literal map to case"), + )), + SyntaxKind::LiteralObjectNode => Some(Self::Object( + LiteralObject::cast(syntax).expect("literal object to cast"), + )), + SyntaxKind::LiteralStructNode => Some(Self::Struct( + LiteralStruct::cast(syntax).expect("literal struct to cast"), + )), + SyntaxKind::LiteralNoneNode => Some(Self::None( + LiteralNone::cast(syntax).expect("literal none to cast"), + )), + SyntaxKind::LiteralHintsNode => Some(Self::Hints( + LiteralHints::cast(syntax).expect("literal hints to cast"), + )), + SyntaxKind::LiteralInputNode => Some(Self::Input( + LiteralInput::cast(syntax).expect("literal input to cast"), + )), + SyntaxKind::LiteralOutputNode => Some(Self::Output( + LiteralOutput::cast(syntax).expect("literal output to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Boolean(element) => element.syntax(), + Self::Integer(element) => element.syntax(), + Self::Float(element) => element.syntax(), + Self::String(element) => element.syntax(), + Self::Array(element) => element.syntax(), + Self::Pair(element) => element.syntax(), + Self::Map(element) => element.syntax(), + Self::Object(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + Self::None(element) => element.syntax(), + Self::Hints(element) => element.syntax(), + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`LiteralBoolean`]. + /// + /// * If `self` is a [`LiteralExpr::Boolean`], then a reference to the inner + /// [`LiteralBoolean`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_boolean(&self) -> Option<&LiteralBoolean> { match self { - Self::Boolean(literal) => Some(literal), + Self::Boolean(boolean) => Some(boolean), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// boolean. + /// Consumes `self` and attempts to return the inner [`LiteralBoolean`]. /// - /// - If the value is a literal boolean, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Boolean`], then the inner + /// [`LiteralBoolean`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_boolean(self) -> Option { match self { - Self::Boolean(literal) => Some(literal), + Self::Boolean(boolean) => Some(boolean), _ => None, } } @@ -1040,25 +1336,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal integer. + /// Attempts to get a reference to the inner [`LiteralInteger`]. /// - /// - If the value is a literal integer, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Integer`], then a reference to the inner + /// [`LiteralInteger`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_integer(&self) -> Option<&LiteralInteger> { match self { - Self::Integer(literal) => Some(literal), + Self::Integer(integer) => Some(integer), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// integer. + /// Consumes `self` and attempts to return the inner [`LiteralInteger`]. /// - /// - If the value is a literal integer, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Integer`], then the inner + /// [`LiteralInteger`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_integer(self) -> Option { match self { - Self::Integer(literal) => Some(literal), + Self::Integer(integer) => Some(integer), _ => None, } } @@ -1075,25 +1372,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal float. + /// Attempts to get a reference to the inner [`LiteralFloat`]. /// - /// - If the value is a literal float, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Float`], then a reference to the inner + /// [`LiteralFloat`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_float(&self) -> Option<&LiteralFloat> { match self { - Self::Float(literal) => Some(literal), + Self::Float(float) => Some(float), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// float. + /// Consumes `self` and attempts to return the inner [`LiteralFloat`]. /// - /// - If the value is a literal float, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Float`], then the inner [`LiteralFloat`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_float(self) -> Option { match self { - Self::Float(literal) => Some(literal), + Self::Float(float) => Some(float), _ => None, } } @@ -1110,25 +1408,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal string. + /// Attempts to get a reference to the inner [`LiteralString`]. /// - /// - If the value is a literal string, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::String`], then a reference to the inner + /// [`LiteralString`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_string(&self) -> Option<&LiteralString> { match self { - Self::String(literal) => Some(literal), + Self::String(string) => Some(string), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// string. + /// Consumes `self` and attempts to return the inner [`LiteralString`]. /// - /// - If the value is a literal string, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::String`], then the inner + /// [`LiteralString`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_string(self) -> Option { match self { - Self::String(literal) => Some(literal), + Self::String(string) => Some(string), _ => None, } } @@ -1145,25 +1444,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal array. + /// Attempts to get a reference to the inner [`LiteralArray`]. /// - /// - If the value is a literal array, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Array`], then a reference to the inner + /// [`LiteralArray`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_array(&self) -> Option<&LiteralArray> { match self { - Self::Array(literal) => Some(literal), + Self::Array(array) => Some(array), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// array. + /// Consumes `self` and attempts to return the inner [`LiteralArray`]. /// - /// - If the value is a literal array, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Array`], then the inner [`LiteralArray`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_array(self) -> Option { match self { - Self::Array(literal) => Some(literal), + Self::Array(array) => Some(array), _ => None, } } @@ -1180,24 +1480,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal pair. + /// Attempts to get a reference to the inner [`LiteralPair`]. /// - /// - If the value is a literal pair, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Pair`], then a reference to the inner + /// [`LiteralPair`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_pair(&self) -> Option<&LiteralPair> { match self { - Self::Pair(literal) => Some(literal), + Self::Pair(pair) => Some(pair), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal pair. + /// Consumes `self` and attempts to return the inner [`LiteralPair`]. /// - /// - If the value is a literal pair, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Pair`], then the inner [`LiteralPair`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_pair(self) -> Option { match self { - Self::Pair(literal) => Some(literal), + Self::Pair(pair) => Some(pair), _ => None, } } @@ -1214,24 +1516,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal map. + /// Attempts to get a reference to the inner [`LiteralMap`]. /// - /// - If the value is a literal map, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Map`], then a reference to the inner + /// [`LiteralMap`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_map(&self) -> Option<&LiteralMap> { match self { - Self::Map(literal) => Some(literal), + Self::Map(map) => Some(map), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal map. + /// Consumes `self` and attempts to return the inner [`LiteralMap`]. /// - /// - If the value is a literal map, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Map`], then the inner [`LiteralMap`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_map(self) -> Option { match self { - Self::Map(literal) => Some(literal), + Self::Map(map) => Some(map), _ => None, } } @@ -1248,25 +1552,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal object. + /// Attempts to get a reference to the inner [`LiteralObject`]. /// - /// - If the value is a literal object, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Object`], then a reference to the inner + /// [`LiteralObject`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_object(&self) -> Option<&LiteralObject> { match self { - Self::Object(literal) => Some(literal), + Self::Object(object) => Some(object), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// object. + /// Consumes `self` and attempts to return the inner [`LiteralObject`]. /// - /// - If the value is a literal object, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Object`], then the inner + /// [`LiteralObject`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_object(self) -> Option { match self { - Self::Object(literal) => Some(literal), + Self::Object(object) => Some(object), _ => None, } } @@ -1283,25 +1588,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal struct. + /// Attempts to get a reference to the inner [`LiteralStruct`]. /// - /// - If the value is a literal struct, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Struct`], then a reference to the inner + /// [`LiteralStruct`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_struct(&self) -> Option<&LiteralStruct> { match self { - Self::Struct(literal) => Some(literal), + Self::Struct(r#struct) => Some(r#struct), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// struct. + /// Consumes `self` and attempts to return the inner [`LiteralStruct`]. /// - /// - If the value is a literal struct, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Struct`], then the inner + /// [`LiteralStruct`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_struct(self) -> Option { match self { - Self::Struct(literal) => Some(literal), + Self::Struct(r#struct) => Some(r#struct), _ => None, } } @@ -1318,25 +1624,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `None`. + /// Attempts to get a reference to the inner [`LiteralNone`]. /// - /// - If the value is a literal `None`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::None`], then a reference to the inner + /// [`LiteralNone`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_none(&self) -> Option<&LiteralNone> { match self { - Self::None(literal) => Some(literal), + Self::None(none) => Some(none), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `None`. + /// Consumes `self` and attempts to return the inner [`LiteralNone`]. /// - /// - If the value is a literal `None`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::None`], then the inner [`LiteralNone`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_none(self) -> Option { match self { - Self::None(literal) => Some(literal), + Self::None(none) => Some(none), _ => None, } } @@ -1353,25 +1660,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `hints`. + /// Attempts to get a reference to the inner [`LiteralHints`]. /// - /// - If the value is a literal `hints`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Hints`], then a reference to the inner + /// [`LiteralHints`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_hints(&self) -> Option<&LiteralHints> { match self { - Self::Hints(literal) => Some(literal), + Self::Hints(hints) => Some(hints), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `hints`. + /// Consumes `self` and attempts to return the inner [`LiteralHints`]. /// - /// - If the value is a literal `hints`, `Some()` is returned. - /// - Else, `None` is returned. + /// * If `self` is a [`LiteralExpr::Hints`], then the inner [`LiteralHints`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_hints(self) -> Option { match self { - Self::Hints(literal) => Some(literal), + Self::Hints(hints) => Some(hints), _ => None, } } @@ -1388,25 +1696,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `input`. - - /// - If the value is a literal `input`, `Some()` is returned. - /// - Else, `None` is returned. + /// Attempts to get a reference to the inner [`LiteralInput`]. + /// + /// * If `self` is a [`LiteralExpr::Input`], then a reference to the inner + /// [`LiteralInput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_input(&self) -> Option<&LiteralInput> { match self { - Self::Input(literal) => Some(literal), + Self::Input(input) => Some(input), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `input`. - - /// - If the value is a literal `input`, `Some()` is returned. - /// - Else, `None` is returned. + /// Consumes `self` and attempts to return the inner [`LiteralInput`]. + /// + /// * If `self` is a [`LiteralExpr::Input`], then the inner [`LiteralInput`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_input(self) -> Option { match self { - Self::Input(literal) => Some(literal), + Self::Input(input) => Some(input), _ => None, } } @@ -1423,25 +1732,26 @@ impl LiteralExpr { } } - /// Attempts to reference the expression as a literal `output`. - - /// - If the value is a literal `output`, `Some()` is returned. - /// - Else, `None` is returned. + /// Attempts to get a reference to the inner [`LiteralOutput`]. + /// + /// * If `self` is a [`LiteralExpr::Output`], then a reference to the inner + /// [`LiteralOutput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn as_output(&self) -> Option<&LiteralOutput> { match self { - Self::Output(literal) => Some(literal), + Self::Output(output) => Some(output), _ => None, } } - /// Consumes `self` and attempts to return the expression as a literal - /// `output`. - - /// - If the value is a literal `output`, `Some()` is returned. - /// - Else, `None` is returned. + /// Consumes `self` and attempts to return the inner [`LiteralOutput`]. + /// + /// * If `self` is a [`LiteralExpr::Output`], then the inner + /// [`LiteralOutput`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. pub fn into_output(self) -> Option { match self { - Self::Output(literal) => Some(literal), + Self::Output(output) => Some(output), _ => None, } } @@ -1457,71 +1767,23 @@ impl LiteralExpr { _ => panic!("not a literal `output`"), } } -} - -impl AstNode for LiteralExpr { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::LiteralBooleanNode - | SyntaxKind::LiteralIntegerNode - | SyntaxKind::LiteralFloatNode - | SyntaxKind::LiteralStringNode - | SyntaxKind::LiteralArrayNode - | SyntaxKind::LiteralPairNode - | SyntaxKind::LiteralMapNode - | SyntaxKind::LiteralObjectNode - | SyntaxKind::LiteralStructNode - | SyntaxKind::LiteralNoneNode - | SyntaxKind::LiteralHintsNode - | SyntaxKind::LiteralInputNode - | SyntaxKind::LiteralOutputNode - ) - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::LiteralBooleanNode => Some(Self::Boolean(LiteralBoolean(syntax))), - SyntaxKind::LiteralIntegerNode => Some(Self::Integer(LiteralInteger(syntax))), - SyntaxKind::LiteralFloatNode => Some(Self::Float(LiteralFloat(syntax))), - SyntaxKind::LiteralStringNode => Some(Self::String(LiteralString(syntax))), - SyntaxKind::LiteralArrayNode => Some(Self::Array(LiteralArray(syntax))), - SyntaxKind::LiteralPairNode => Some(Self::Pair(LiteralPair(syntax))), - SyntaxKind::LiteralMapNode => Some(Self::Map(LiteralMap(syntax))), - SyntaxKind::LiteralObjectNode => Some(Self::Object(LiteralObject(syntax))), - SyntaxKind::LiteralStructNode => Some(Self::Struct(LiteralStruct(syntax))), - SyntaxKind::LiteralNoneNode => Some(Self::None(LiteralNone(syntax))), - SyntaxKind::LiteralHintsNode => Some(Self::Hints(LiteralHints(syntax))), - SyntaxKind::LiteralInputNode => Some(Self::Input(LiteralInput(syntax))), - SyntaxKind::LiteralOutputNode => Some(Self::Output(LiteralOutput(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`LiteralExpr`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Boolean(b) => &b.0, - Self::Integer(i) => &i.0, - Self::Float(f) => &f.0, - Self::String(s) => &s.0, - Self::Array(a) => &a.0, - Self::Pair(p) => &p.0, - Self::Map(m) => &m.0, - Self::Object(o) => &o.0, - Self::Struct(s) => &s.0, - Self::None(n) => &n.0, - Self::Hints(h) => &h.0, - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - } + /// Finds all children that can be cast to an [`Expr`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`LiteralExpr`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -1990,7 +2252,7 @@ impl Placeholder { /// Gets the placeholder expression. pub fn expr(&self) -> Expr { - child(&self.0).expect("placeholder should have an expression") + Expr::child(&self.0).expect("placeholder should have an expression") } } @@ -2033,6 +2295,73 @@ pub enum PlaceholderOption { } impl PlaceholderOption { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`PlaceholderOption`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::PlaceholderSepOptionNode + | SyntaxKind::PlaceholderDefaultOptionNode + | SyntaxKind::PlaceholderTrueFalseOptionNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`PlaceholderOption`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::PlaceholderSepOptionNode => Some(Self::Sep( + SepOption::cast(syntax).expect("separator option to cast"), + )), + SyntaxKind::PlaceholderDefaultOptionNode => Some(Self::Default( + DefaultOption::cast(syntax).expect("default option to cast"), + )), + SyntaxKind::PlaceholderTrueFalseOptionNode => Some(Self::TrueFalse( + TrueFalseOption::cast(syntax).expect("true false option to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Sep(element) => element.syntax(), + Self::Default(element) => element.syntax(), + Self::TrueFalse(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`SepOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Sep`], then a reference to the + /// inner [`SepOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_sep(&self) -> Option<&SepOption> { + match self { + Self::Sep(sep) => Some(sep), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`SepOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Sep`], then the inner + /// [`SepOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_sep(self) -> Option { + match self { + Self::Sep(sep) => Some(sep), + _ => None, + } + } + /// Unwraps the option into a separator option. /// /// # Panics @@ -2045,6 +2374,30 @@ impl PlaceholderOption { } } + /// Attempts to get a reference to the inner [`DefaultOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Default`], then a reference to the + /// inner [`DefaultOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_default(&self) -> Option<&DefaultOption> { + match self { + Self::Default(default) => Some(default), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`DefaultOption`]. + /// + /// * If `self` is a [`PlaceholderOption::Default`], then the inner + /// [`DefaultOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_default(self) -> Option { + match self { + Self::Default(default) => Some(default), + _ => None, + } + } + /// Unwraps the option into a default option. /// /// # Panics @@ -2057,6 +2410,30 @@ impl PlaceholderOption { } } + /// Attempts to get a reference to the inner [`TrueFalseOption`]. + /// + /// * If `self` is a [`PlaceholderOption::TrueFalse`], then a reference to + /// the inner [`TrueFalseOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_true_false(&self) -> Option<&TrueFalseOption> { + match self { + Self::TrueFalse(true_false) => Some(true_false), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TrueFalseOption`]. + /// + /// * If `self` is a [`PlaceholderOption::TrueFalse`], then the inner + /// [`TrueFalseOption`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_true_false(self) -> Option { + match self { + Self::TrueFalse(true_false) => Some(true_false), + _ => None, + } + } + /// Unwraps the option into a true/false option. /// /// # Panics @@ -2068,6 +2445,24 @@ impl PlaceholderOption { _ => panic!("not a true/false option"), } } + + /// Finds the first child that can be cast to an [`PlaceholderOption`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`PlaceholderOption`] + /// to implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`PlaceholderOption`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring + /// [`PlaceholderOption`] to implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } impl AstNode for PlaceholderOption { @@ -2259,8 +2654,8 @@ pub struct LiteralArray(SyntaxNode); impl LiteralArray { /// Gets the elements of the literal array. - pub fn elements(&self) -> AstChildren { - children(&self.0) + pub fn elements(&self) -> impl Iterator { + Expr::children(&self.0) } } @@ -2296,7 +2691,7 @@ pub struct LiteralPair(SyntaxNode); impl LiteralPair { /// Gets the first and second expressions in the literal pair. pub fn exprs(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let first = children .next() .expect("pair should have a first expression"); @@ -2375,7 +2770,7 @@ pub struct LiteralMapItem(SyntaxNode); impl LiteralMapItem { /// Gets the key and the value of the item. pub fn key_value(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let key = children.next().expect("expected a key expression"); let value = children.next().expect("expected a value expression"); (key, value) @@ -2445,25 +2840,9 @@ impl AstNode for LiteralObject { /// Gets the name and value of a object or struct literal item. fn name_value(parent: &SyntaxNode) -> (Ident, Expr) { - let mut children = parent - .children_with_tokens() - .filter(|c| Ident::can_cast(c.kind()) || Expr::can_cast(c.kind())); - let key = Ident::cast( - children - .next() - .expect("expected a key token") - .into_token() - .expect("key should be a token"), - ) - .expect("token should cast to ident"); - let value = Expr::cast( - children - .next() - .expect("there should be a value expression") - .into_node() - .expect("value should be a node"), - ) - .expect("node should cast to an expression"); + let key = token_child::(parent).expect("expected a key token"); + let value = Expr::child(parent).expect("expected a value expression"); + (key, value) } @@ -2657,7 +3036,7 @@ impl LiteralHintsItem { /// Gets the expression of the hints item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2739,7 +3118,7 @@ impl LiteralInputItem { /// Gets the expression of the input item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2821,7 +3200,7 @@ impl LiteralOutputItem { /// Gets the expression of the output item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -2893,7 +3272,7 @@ pub struct ParenthesizedExpr(SyntaxNode); impl ParenthesizedExpr { /// Gets the inner expression. pub fn inner(&self) -> Expr { - child(&self.0).expect("expected an inner expression") + Expr::child(&self.0).expect("expected an inner expression") } } @@ -2933,7 +3312,7 @@ impl IfExpr { /// The second expression is the `true` expression. /// The third expression is the `false` expression. pub fn exprs(&self) -> (Expr, Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let conditional = children .next() .expect("should have a conditional expression"); @@ -2978,7 +3357,7 @@ macro_rules! prefix_expression { impl $name { /// Gets the operand expression. pub fn operand(&self) -> Expr { - child(&self.0).expect("expected an operand expression") + Expr::child(&self.0).expect("expected an operand expression") } } @@ -3019,7 +3398,7 @@ macro_rules! infix_expression { impl $name { /// Gets the operands of the expression. pub fn operands(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let lhs = children.next().expect("expected a lhs expression"); let rhs = children.next().expect("expected a rhs expression"); (lhs, rhs) @@ -3086,7 +3465,7 @@ impl CallExpr { /// Gets the call arguments. pub fn arguments(&self) -> impl Iterator { - children(&self.0) + Expr::children(&self.0) } } @@ -3125,7 +3504,7 @@ impl IndexExpr { /// The first is the operand expression. /// The second is the index expression. pub fn operands(&self) -> (Expr, Expr) { - let mut children = self.0.children().filter_map(Expr::cast); + let mut children = Expr::children(&self.0); let operand = children.next().expect("expected an operand expression"); let index = children.next().expect("expected an index expression"); (operand, index) @@ -3167,7 +3546,7 @@ impl AccessExpr { /// The first is the operand expression. /// The second is the member name. pub fn operands(&self) -> (Expr, Ident) { - let operand = child(&self.0).expect("expected an operand expression"); + let operand = Expr::child(&self.0).expect("expected an operand expression"); let name = Ident::cast(self.0.last_token().expect("expected a last token")) .expect("expected an ident token"); (operand, name) diff --git a/wdl-ast/src/v1/import.rs b/wdl-ast/src/v1/import.rs index f42d6793..98bf70cf 100644 --- a/wdl-ast/src/v1/import.rs +++ b/wdl-ast/src/v1/import.rs @@ -7,6 +7,9 @@ use url::Url; use wdl_grammar::lexer::v1::Logos; use wdl_grammar::lexer::v1::Token; +use super::AliasKeyword; +use super::AsKeyword; +use super::ImportKeyword; use super::LiteralString; use crate::AstChildren; use crate::AstNode; @@ -32,6 +35,11 @@ impl ImportStatement { child(&self.0).expect("import should have a URI") } + /// Gets the `import` keyword of the import statement. + pub fn keyword(&self) -> ImportKeyword { + token(&self.0).expect("import should have a keyword") + } + /// Gets the explicit namespace of the import statement (i.e. the `as` /// clause). pub fn explicit_namespace(&self) -> Option { @@ -129,6 +137,16 @@ impl ImportAlias { let target = children.next().expect("expected a target identifier"); (source, target) } + + /// Gets the `alias` keyword of the alias. + pub fn alias_keyword(&self) -> AliasKeyword { + token(&self.0).expect("alias should have an `alias` keyword") + } + + /// Gets the `as` keyword of the alias. + pub fn as_keyword(&self) -> AsKeyword { + token(&self.0).expect("alias should have an `as` keyword") + } } impl AstNode for ImportAlias { diff --git a/wdl-ast/src/v1/struct.rs b/wdl-ast/src/v1/struct.rs index 6e53ecc4..c4eda27f 100644 --- a/wdl-ast/src/v1/struct.rs +++ b/wdl-ast/src/v1/struct.rs @@ -2,6 +2,7 @@ use super::MetadataSection; use super::ParameterMetadataSection; +use super::StructKeyword; use super::UnboundDecl; use crate::AstChildren; use crate::AstNode; @@ -22,9 +23,14 @@ impl StructDefinition { token(&self.0).expect("struct should have a name") } + /// Gets the `struct` keyword of the struct definition. + pub fn keyword(&self) -> StructKeyword { + token(&self.0).expect("struct should have a keyword") + } + /// Gets the items in the struct definition. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + StructItem::children(&self.0) } /// Gets the member declarations of the struct. @@ -79,10 +85,10 @@ pub enum StructItem { ParameterMetadata(ParameterMetadataSection), } -impl AstNode for StructItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl StructItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`StructItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -94,27 +100,126 @@ impl AstNode for StructItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`StructItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::UnboundDeclNode => Some(Self::Member(UnboundDecl(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } + SyntaxKind::UnboundDeclNode => Some(Self::Member( + UnboundDecl::cast(syntax).expect("unbound decl to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Member(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`StructItem::Member`], then a reference to the inner + /// [`UnboundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_unbound_decl(&self) -> Option<&UnboundDecl> { + match self { + Self::Member(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`UnboundDecl`]. + /// + /// * If `self` is a [`StructItem::Member`], then the inner [`UnboundDecl`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_unbound_decl(self) -> Option { + match self { + Self::Member(unbound_decl) => Some(unbound_decl), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`StructItem::Metadata`], then a reference to the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`StructItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`StructItem::ParameterMetadata`], then a reference to + /// the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { match self { - Self::Member(m) => &m.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`StructItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, } } + + /// Finds the first child that can be cast to an [`StructItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`StructItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`StructItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`StructItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } #[cfg(test)] @@ -179,7 +284,7 @@ struct ComplexTypes { parameter_meta { a: "foo" } -} +} "#, ); assert!(diagnostics.is_empty()); diff --git a/wdl-ast/src/v1/task.rs b/wdl-ast/src/v1/task.rs index bccc1452..f3b0526c 100644 --- a/wdl-ast/src/v1/task.rs +++ b/wdl-ast/src/v1/task.rs @@ -39,8 +39,8 @@ impl TaskDefinition { } /// Gets the items of the task. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + TaskItem::children(&self.0) } /// Gets the input section of the task. @@ -125,7 +125,7 @@ pub enum TaskItem { Command(CommandSection), /// The item is a requirements section. Requirements(RequirementsSection), - /// The item is a hints section. + /// The item is a task hints section. Hints(TaskHintsSection), /// The item is a runtime section. Runtime(RuntimeSection), @@ -137,10 +137,10 @@ pub enum TaskItem { Declaration(BoundDecl), } -impl AstNode for TaskItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl TaskItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`TaskItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -158,41 +158,295 @@ impl AstNode for TaskItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`TaskItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::InputSectionNode => Some(Self::Input(InputSection(syntax))), - SyntaxKind::OutputSectionNode => Some(Self::Output(OutputSection(syntax))), - SyntaxKind::CommandSectionNode => Some(Self::Command(CommandSection(syntax))), - SyntaxKind::RequirementsSectionNode => { - Some(Self::Requirements(RequirementsSection(syntax))) - } - SyntaxKind::TaskHintsSectionNode => Some(Self::Hints(TaskHintsSection(syntax))), - SyntaxKind::RuntimeSectionNode => Some(Self::Runtime(RuntimeSection(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), + SyntaxKind::InputSectionNode => Some(Self::Input( + InputSection::cast(syntax).expect("input section to cast"), + )), + SyntaxKind::OutputSectionNode => Some(Self::Output( + OutputSection::cast(syntax).expect("output section to cast"), + )), + SyntaxKind::CommandSectionNode => Some(Self::Command( + CommandSection::cast(syntax).expect("command section to cast"), + )), + SyntaxKind::RequirementsSectionNode => Some(Self::Requirements( + RequirementsSection::cast(syntax).expect("requirements section to cast"), + )), + SyntaxKind::RuntimeSectionNode => Some(Self::Runtime( + RuntimeSection::cast(syntax).expect("runtime section to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), + SyntaxKind::TaskHintsSectionNode => Some(Self::Hints( + TaskHintsSection::cast(syntax).expect("task hints section to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + Self::Command(element) => element.syntax(), + Self::Requirements(element) => element.syntax(), + Self::Hints(element) => element.syntax(), + Self::Runtime(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`InputSection`]. + /// + /// * If `self` is a [`TaskItem::Input`], then a reference to the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_input_section(&self) -> Option<&InputSection> { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`InputSection`]. + /// + /// * If `self` is a [`TaskItem::Input`], then the inner [`InputSection`] is + /// returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_input_section(self) -> Option { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`OutputSection`]. + /// + /// * If `self` is a [`TaskItem::Output`], then a reference to the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_output_section(&self) -> Option<&OutputSection> { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`OutputSection`]. + /// + /// * If `self` is a [`TaskItem::Output`], then the inner [`OutputSection`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_output_section(self) -> Option { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`CommandSection`]. + /// + /// * If `self` is a [`TaskItem::Command`], then a reference to the inner + /// [`CommandSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_command_section(&self) -> Option<&CommandSection> { + match self { + Self::Command(command_section) => Some(command_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`CommandSection`]. + /// + /// * If `self` is a [`TaskItem::Command`], then the inner + /// [`CommandSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_command_section(self) -> Option { + match self { + Self::Command(command_section) => Some(command_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`RequirementsSection`]. + /// + /// * If `self` is a [`TaskItem::Requirements`], then a reference to the + /// inner [`RequirementsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_requirements_section(&self) -> Option<&RequirementsSection> { + match self { + Self::Requirements(requirements_section) => Some(requirements_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`RequirementsSection`]. + /// + /// * If `self` is a [`TaskItem::Requirements`], then the inner + /// [`RequirementsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_requirements_section(self) -> Option { + match self { + Self::Requirements(requirements_section) => Some(requirements_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`TaskItem::Hints`], then a reference to the inner + /// [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_hints_section(&self) -> Option<&TaskHintsSection> { + match self { + Self::Hints(hints_section) => Some(hints_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskHintsSection`]. + /// + /// * If `self` is a [`TaskItem::Hints`], then the inner + /// [`TaskHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_hints_section(self) -> Option { + match self { + Self::Hints(hints_section) => Some(hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`RuntimeSection`]. + /// + /// * If `self` is a [`TaskItem::Runtime`], then a reference to the inner + /// [`RuntimeSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_runtime_section(&self) -> Option<&RuntimeSection> { + match self { + Self::Runtime(runtime_section) => Some(runtime_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`RuntimeSection`]. + /// + /// * If `self` is a [`TaskItem::Runtime`], then the inner + /// [`RuntimeSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_runtime_section(self) -> Option { + match self { + Self::Runtime(runtime_section) => Some(runtime_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`TaskItem::Metadata`], then a reference to the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`TaskItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { match self { - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - Self::Command(c) => &c.0, - Self::Requirements(r) => &r.0, - Self::Hints(h) => &h.0, - Self::Runtime(r) => &r.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, - Self::Declaration(d) => &d.0, + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`TaskItem::ParameterMetadata`], then a reference to + /// the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`TaskItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, } } + + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`TaskItem::Declaration`], then a reference to the + /// inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`TaskItem::Declaration`], then the inner [`BoundDecl`] + /// is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`TaskItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`TaskItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`TaskItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`TaskItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } /// Represents the parent of a section. @@ -207,6 +461,49 @@ pub enum SectionParent { } impl SectionParent { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`SectionParent`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::TaskDefinitionNode + | SyntaxKind::WorkflowDefinitionNode + | SyntaxKind::StructDefinitionNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`SectionParent`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::TaskDefinitionNode => Some(Self::Task( + TaskDefinition::cast(syntax).expect("task definition to cast"), + )), + SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow( + WorkflowDefinition::cast(syntax).expect("workflow definition to cast"), + )), + SyntaxKind::StructDefinitionNode => Some(Self::Struct( + StructDefinition::cast(syntax).expect("struct definition to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Task(element) => element.syntax(), + Self::Workflow(element) => element.syntax(), + Self::Struct(element) => element.syntax(), + } + } + /// Gets the name of the section parent. pub fn name(&self) -> Ident { match self { @@ -216,6 +513,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`SectionParent::Task`], then a reference to the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_task(&self) -> Option<&TaskDefinition> { + match self { + Self::Task(task) => Some(task), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`TaskDefinition`]. + /// + /// * If `self` is a [`SectionParent::Task`], then the inner + /// [`TaskDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_task(self) -> Option { + match self { + Self::Task(task) => Some(task), + _ => None, + } + } + /// Unwraps to a task definition. /// /// # Panics @@ -228,6 +549,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`SectionParent::Workflow`], then a reference to the + /// inner [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_workflow(&self) -> Option<&WorkflowDefinition> { + match self { + Self::Workflow(workflow) => Some(workflow), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`WorkflowDefinition`]. + /// + /// * If `self` is a [`SectionParent::Workflow`], then the inner + /// [`WorkflowDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_workflow(self) -> Option { + match self { + Self::Workflow(workflow) => Some(workflow), + _ => None, + } + } + /// Unwraps to a workflow definition. /// /// # Panics @@ -240,6 +585,30 @@ impl SectionParent { } } + /// Attempts to get a reference to the inner [`StructDefinition`]. + /// + /// * If `self` is a [`SectionParent::Struct`], then a reference to the + /// inner [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_struct(&self) -> Option<&StructDefinition> { + match self { + Self::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`StructDefinition`]. + /// + /// * If `self` is a [`SectionParent::Struct`], then the inner + /// [`StructDefinition`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_struct(self) -> Option { + match self { + Self::Struct(r#struct) => Some(r#struct), + _ => None, + } + } + /// Unwraps to a struct definition. /// /// # Panics @@ -251,41 +620,23 @@ impl SectionParent { _ => panic!("not a struct definition"), } } -} -impl AstNode for SectionParent { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::TaskDefinitionNode - | SyntaxKind::WorkflowDefinitionNode - | SyntaxKind::StructDefinitionNode - ) - } - - fn cast(node: SyntaxNode) -> Option - where - Self: Sized, - { - match node.kind() { - SyntaxKind::TaskDefinitionNode => Some(Self::Task(TaskDefinition(node))), - SyntaxKind::WorkflowDefinitionNode => Some(Self::Workflow(WorkflowDefinition(node))), - SyntaxKind::StructDefinitionNode => Some(Self::Struct(StructDefinition(node))), - _ => None, - } + /// Finds the first child that can be cast to an [`SectionParent`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`SectionParent`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Task(t) => &t.0, - Self::Workflow(w) => &w.0, - Self::Struct(s) => &s.0, - } + /// Finds all children that can be cast to an [`SectionParent`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`SectionParent`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -295,8 +646,8 @@ pub struct InputSection(pub(crate) SyntaxNode); impl InputSection { /// Gets the declarations of the input section. - pub fn declarations(&self) -> AstChildren { - children(&self.0) + pub fn declarations(&self) -> impl Iterator { + Decl::children(&self.0) } /// Gets the parent of the input section. @@ -388,12 +739,11 @@ impl CommandSection { self.0.children_with_tokens().filter_map(CommandPart::cast) } - /// Gets the command text if the command is not interpolated (i.e. - /// has no placeholders). + /// Gets the command text if the command is not interpolated (i.e. has no + /// placeholders). /// - /// Returns `None` if the command is interpolated, as - /// interpolated commands cannot be represented as a single - /// span of text. + /// Returns `None` if the command is interpolated, as interpolated commands + /// cannot be represented as a single span of text. pub fn text(&self) -> Option { let mut parts = self.parts(); if let Some(CommandPart::Text(text)) = parts.next() { @@ -569,7 +919,7 @@ impl RequirementsItem { /// Gets the expression of the requirements item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } /// Consumes `self` and attempts to cast the requirements item to a @@ -658,7 +1008,7 @@ impl TaskHintsItem { /// Gets the expression of the hints item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } } @@ -749,7 +1099,7 @@ impl RuntimeItem { /// Gets the expression of the runtime item. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected an item expression") + Expr::child(&self.0).expect("expected an item expression") } /// Consumes `self` and attempts to cast the runtime item to a diff --git a/wdl-ast/src/v1/task/common/container/value.rs b/wdl-ast/src/v1/task/common/container/value.rs index 5e6a703b..d4035138 100644 --- a/wdl-ast/src/v1/task/common/container/value.rs +++ b/wdl-ast/src/v1/task/common/container/value.rs @@ -3,8 +3,6 @@ use std::ops::Deref; -use rowan::ast::AstNode; - use crate::v1::Expr; use crate::v1::LiteralExpr; diff --git a/wdl-ast/src/v1/tokens.rs b/wdl-ast/src/v1/tokens.rs new file mode 100644 index 00000000..798b603a --- /dev/null +++ b/wdl-ast/src/v1/tokens.rs @@ -0,0 +1,2089 @@ +//! V1 AST tokens. + +use crate::AstToken; +use crate::SyntaxKind; +use crate::SyntaxToken; + +/// A token representing the `after` keyword. +#[derive(Clone, Debug)] +pub struct AfterKeyword(SyntaxToken); + +impl AstToken for AfterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AfterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option + where + Self: Sized, + { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AfterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "after") + } +} + +/// A token representing the `alias` keyword. +#[derive(Clone, Debug)] +pub struct AliasKeyword(SyntaxToken); + +impl AstToken for AliasKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AliasKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AliasKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "alias") + } +} + +/// A token representing the `Array` type keyword. +#[derive(Clone, Debug)] +pub struct ArrayTypeKeyword(SyntaxToken); + +impl AstToken for ArrayTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ArrayTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ArrayTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Array") + } +} + +/// A token representing the `as` keyword. +#[derive(Clone, Debug)] +pub struct AsKeyword(SyntaxToken); + +impl AstToken for AsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::AsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for AsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "as") + } +} + +/// A token representing the `=` symbol. +#[derive(Clone, Debug)] +pub struct Assignment(SyntaxToken); + +impl AstToken for Assignment { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Assignment) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Assignment { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `*` symbol. +#[derive(Clone, Debug)] +pub struct Asterisk(SyntaxToken); + +impl AstToken for Asterisk { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Asterisk) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Asterisk { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "*") + } +} + +/// A token representing the `Boolean` keyword. +#[derive(Clone, Debug)] +pub struct BooleanTypeKeyword(SyntaxToken); + +impl AstToken for BooleanTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::BooleanTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for BooleanTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Boolean") + } +} + +/// A token representing the `call` keyword. +#[derive(Clone, Debug)] +pub struct CallKeyword(SyntaxToken); + +impl AstToken for CallKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CallKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CallKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "call") + } +} + +/// A token representing the `}` symbol. +#[derive(Clone, Debug)] +pub struct CloseBrace(SyntaxToken); + +impl AstToken for CloseBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "}}") + } +} + +/// A token representing the `]` symbol. +#[derive(Clone, Debug)] +pub struct CloseBracket(SyntaxToken); + +impl AstToken for CloseBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "]") + } +} + +/// A token representing the `>>>` token. +#[derive(Clone, Debug)] +pub struct CloseHeredoc(SyntaxToken); + +impl AstToken for CloseHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">>>") + } +} + +/// A token representing the `)` symbol. +#[derive(Clone, Debug)] +pub struct CloseParen(SyntaxToken); + +impl AstToken for CloseParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CloseParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CloseParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ")") + } +} + +/// A token representing the `:` symbol. +#[derive(Clone, Debug)] +pub struct Colon(SyntaxToken); + +impl AstToken for Colon { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Colon) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Colon { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ":") + } +} + +/// A token representing the `,` symbol. +#[derive(Clone, Debug)] +pub struct Comma(SyntaxToken); + +impl AstToken for Comma { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Comma) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Comma { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ",") + } +} + +/// A token representing the `command` keyword. +#[derive(Clone, Debug)] +pub struct CommandKeyword(SyntaxToken); + +impl AstToken for CommandKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::CommandKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for CommandKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "command") + } +} + +/// A token representing the `Directory` type keyword. +#[derive(Clone, Debug)] +pub struct DirectoryTypeKeyword(SyntaxToken); + +impl AstToken for DirectoryTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DirectoryTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DirectoryTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Directory") + } +} + +/// A token representing the `.` symbol. +#[derive(Clone, Debug)] +pub struct Dot(SyntaxToken); + +impl AstToken for Dot { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Dot) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Dot { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ".") + } +} + +/// A token representing the `"` symbol. +#[derive(Clone, Debug)] +pub struct DoubleQuote(SyntaxToken); + +impl AstToken for DoubleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::DoubleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for DoubleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, r#"""#) + } +} + +/// A token representing the `else` keyword. +#[derive(Clone, Debug)] +pub struct ElseKeyword(SyntaxToken); + +impl AstToken for ElseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ElseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ElseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "else") + } +} + +/// A token representing the `==` symbol. +#[derive(Clone, Debug)] +pub struct Equal(SyntaxToken); + +impl AstToken for Equal { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Equal) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Equal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "=") + } +} + +/// A token representing the `!` symbol. +#[derive(Clone, Debug)] +pub struct Exclamation(SyntaxToken); + +impl AstToken for Exclamation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exclamation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exclamation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!") + } +} + +/// A token representing the `**` keyword. +#[derive(Clone, Debug)] +pub struct Exponentiation(SyntaxToken); + +impl AstToken for Exponentiation { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Exponentiation) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Exponentiation { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "**") + } +} + +/// A token representing the `false` keyword. +#[derive(Clone, Debug)] +pub struct FalseKeyword(SyntaxToken); + +impl AstToken for FalseKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FalseKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FalseKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "false") + } +} + +/// A token representing the `File` type keyword. +#[derive(Clone, Debug)] +pub struct FileTypeKeyword(SyntaxToken); + +impl AstToken for FileTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FileTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FileTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "File") + } +} + +/// A token representing the `Float` type keyword. +#[derive(Clone, Debug)] +pub struct FloatTypeKeyword(SyntaxToken); + +impl AstToken for FloatTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::FloatTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for FloatTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Float") + } +} + +/// A token representing the `>` symbol. +#[derive(Clone, Debug)] +pub struct Greater(SyntaxToken); + +impl AstToken for Greater { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Greater) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Greater { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">") + } +} + +/// A token representing the `>=` symbol. +#[derive(Clone, Debug)] +pub struct GreaterEqual(SyntaxToken); + +impl AstToken for GreaterEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::GreaterEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for GreaterEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, ">=") + } +} + +/// A token representing the `hints` keyword. +#[derive(Clone, Debug)] +pub struct HintsKeyword(SyntaxToken); + +impl AstToken for HintsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::HintsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for HintsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "hints") + } +} + +/// A token representing the `if` keyword. +#[derive(Clone, Debug)] +pub struct IfKeyword(SyntaxToken); + +impl AstToken for IfKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IfKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IfKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "if") + } +} + +/// A token representing the `import` keyword. +#[derive(Clone, Debug)] +pub struct ImportKeyword(SyntaxToken); + +impl AstToken for ImportKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ImportKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ImportKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "import") + } +} + +/// A token representing the `in` keyword. +#[derive(Clone, Debug)] +pub struct InKeyword(SyntaxToken); + +impl AstToken for InKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "in") + } +} + +/// A token representing the `input` keyword. +#[derive(Clone, Debug)] +pub struct InputKeyword(SyntaxToken); + +impl AstToken for InputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::InputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for InputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "input") + } +} + +/// A token representing the `Int` type keyword. +#[derive(Clone, Debug)] +pub struct IntTypeKeyword(SyntaxToken); + +impl AstToken for IntTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::IntTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for IntTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Int") + } +} + +/// A token representing the `<` symbol. +#[derive(Clone, Debug)] +pub struct Less(SyntaxToken); + +impl AstToken for Less { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Less) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Less { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<") + } +} + +/// A token representing the `<=` symbol. +#[derive(Clone, Debug)] +pub struct LessEqual(SyntaxToken); + +impl AstToken for LessEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LessEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LessEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<=") + } +} + +/// A token representing the `&&` symbol. +#[derive(Clone, Debug)] +pub struct LogicalAnd(SyntaxToken); + +impl AstToken for LogicalAnd { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalAnd) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalAnd { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "&&") + } +} + +/// A token representing the `||` symbol. +#[derive(Clone, Debug)] +pub struct LogicalOr(SyntaxToken); + +impl AstToken for LogicalOr { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::LogicalOr) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for LogicalOr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "||") + } +} + +/// A token representing the `Map` type keyword. +#[derive(Clone, Debug)] +pub struct MapTypeKeyword(SyntaxToken); + +impl AstToken for MapTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MapTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MapTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Map") + } +} + +/// A token representing the `meta` keyword. +#[derive(Clone, Debug)] +pub struct MetaKeyword(SyntaxToken); + +impl AstToken for MetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::MetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for MetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "meta") + } +} + +/// A token representing the `-` symbol. +#[derive(Clone, Debug)] +pub struct Minus(SyntaxToken); + +impl AstToken for Minus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Minus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Minus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "-") + } +} + +/// A token representing the `None` keyword. +#[derive(Clone, Debug)] +pub struct NoneKeyword(SyntaxToken); + +impl AstToken for NoneKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NoneKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NoneKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "None") + } +} + +/// A token representing the `!=` symbol. +#[derive(Clone, Debug)] +pub struct NotEqual(SyntaxToken); + +impl AstToken for NotEqual { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NotEqual) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NotEqual { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "!=") + } +} + +/// A token representing the `null` keyword. +#[derive(Clone, Debug)] +pub struct NullKeyword(SyntaxToken); + +impl AstToken for NullKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::NullKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for NullKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "null") + } +} + +/// A token representing the `object` keyword. +#[derive(Clone, Debug)] +pub struct ObjectKeyword(SyntaxToken); + +impl AstToken for ObjectKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "object") + } +} + +/// A token representing the `Object` type keyword. +#[derive(Clone, Debug)] +pub struct ObjectTypeKeyword(SyntaxToken); + +impl AstToken for ObjectTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ObjectTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ObjectTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Object") + } +} + +/// A token representing the `{` symbol. +#[derive(Clone, Debug)] +pub struct OpenBrace(SyntaxToken); + +impl AstToken for OpenBrace { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBrace) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{") + } +} + +/// A token representing the `[` symbol. +#[derive(Clone, Debug)] +pub struct OpenBracket(SyntaxToken); + +impl AstToken for OpenBracket { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenBracket) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenBracket { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[") + } +} + +/// A token representing the `<<<` symbol. +#[derive(Clone, Debug)] +pub struct OpenHeredoc(SyntaxToken); + +impl AstToken for OpenHeredoc { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenHeredoc) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenHeredoc { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "<<<") + } +} + +/// A token representing the `(` keyword. +#[derive(Clone, Debug)] +pub struct OpenParen(SyntaxToken); + +impl AstToken for OpenParen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OpenParen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OpenParen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "(") + } +} + +/// A token representing the `output` keyword. +#[derive(Clone, Debug)] +pub struct OutputKeyword(SyntaxToken); + +impl AstToken for OutputKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::OutputKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for OutputKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "output") + } +} + +/// A token representing the `Pair` type keyword. +#[derive(Clone, Debug)] +pub struct PairTypeKeyword(SyntaxToken); + +impl AstToken for PairTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PairTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PairTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Pair") + } +} + +/// A token representing the `parameter_meta` keyword. +#[derive(Clone, Debug)] +pub struct ParameterMetaKeyword(SyntaxToken); + +impl AstToken for ParameterMetaKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ParameterMetaKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ParameterMetaKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "parameter_meta") + } +} + +/// A token representing the `%` symbol. +#[derive(Clone, Debug)] +pub struct Percent(SyntaxToken); + +impl AstToken for Percent { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Percent) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Percent { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "%") + } +} + +/// Represents one of the placeholder open symbols. +#[derive(Clone, Debug)] +pub struct PlaceholderOpen(SyntaxToken); + +impl AstToken for PlaceholderOpen { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::PlaceholderOpen) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for PlaceholderOpen { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `+` symbol. +#[derive(Clone, Debug)] +pub struct Plus(SyntaxToken); + +impl AstToken for Plus { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Plus) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Plus { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "+") + } +} + +/// A token representing the `?` symbol. +#[derive(Clone, Debug)] +pub struct QuestionMark(SyntaxToken); + +impl AstToken for QuestionMark { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::QuestionMark) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for QuestionMark { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "?") + } +} + +/// A token representing the `requirements` keyword. +#[derive(Clone, Debug)] +pub struct RequirementsKeyword(SyntaxToken); + +impl AstToken for RequirementsKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RequirementsKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RequirementsKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "requirements") + } +} + +/// A token representing the `runtime` keyword. +#[derive(Clone, Debug)] +pub struct RuntimeKeyword(SyntaxToken); + +impl AstToken for RuntimeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::RuntimeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for RuntimeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "runtime") + } +} + +/// A token representing the `scatter` keyword. +#[derive(Clone, Debug)] +pub struct ScatterKeyword(SyntaxToken); + +impl AstToken for ScatterKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ScatterKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ScatterKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "scatter") + } +} + +/// A token representing the `'` symbol. +#[derive(Clone, Debug)] +pub struct SingleQuote(SyntaxToken); + +impl AstToken for SingleQuote { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::SingleQuote) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for SingleQuote { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "'") + } +} + +/// A token representing the `/` symbol. +#[derive(Clone, Debug)] +pub struct Slash(SyntaxToken); + +impl AstToken for Slash { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Slash) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Slash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "/") + } +} + +/// A token representing the `String` type keyword. +#[derive(Clone, Debug)] +pub struct StringTypeKeyword(SyntaxToken); + +impl AstToken for StringTypeKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StringTypeKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StringTypeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "String") + } +} + +/// A token representing the `struct` keyword. +#[derive(Clone, Debug)] +pub struct StructKeyword(SyntaxToken); + +impl AstToken for StructKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::StructKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for StructKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "struct") + } +} + +/// A token representing the `task` keyword. +#[derive(Clone, Debug)] +pub struct TaskKeyword(SyntaxToken); + +impl AstToken for TaskKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TaskKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TaskKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "task") + } +} + +/// A token representing the `then` keyword. +#[derive(Clone, Debug)] +pub struct ThenKeyword(SyntaxToken); + +impl AstToken for ThenKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::ThenKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for ThenKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "then") + } +} + +/// A token representing the `true` keyword. +#[derive(Clone, Debug)] +pub struct TrueKeyword(SyntaxToken); + +impl AstToken for TrueKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::TrueKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for TrueKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "true") + } +} + +/// A token representing unknown contents within a WDL document. +#[derive(Debug)] +pub struct Unknown(SyntaxToken); + +impl AstToken for Unknown { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::Unknown) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for Unknown { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // NOTE: this is deferred to the entire underlying string simply because + // we cannot known a priori what the captured text is. + write!(f, "{}", self.0) + } +} + +/// A token representing the `version` keyword. +#[derive(Clone, Debug)] +pub struct VersionKeyword(SyntaxToken); + +impl AstToken for VersionKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::VersionKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for VersionKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "version") + } +} + +/// A token representing the `workflow` keyword. +#[derive(Clone, Debug)] +pub struct WorkflowKeyword(SyntaxToken); + +impl AstToken for WorkflowKeyword { + fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!(kind, SyntaxKind::WorkflowKeyword) + } + + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + return Some(Self(syntax)); + } + None + } + + fn syntax(&self) -> &SyntaxToken { + &self.0 + } +} + +impl std::fmt::Display for WorkflowKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "workflow") + } +} diff --git a/wdl-ast/src/v1/workflow.rs b/wdl-ast/src/v1/workflow.rs index 0096c9c2..78dcb472 100644 --- a/wdl-ast/src/v1/workflow.rs +++ b/wdl-ast/src/v1/workflow.rs @@ -33,8 +33,8 @@ impl WorkflowDefinition { } /// Gets the items of the workflow. - pub fn items(&self) -> AstChildren { - children(&self.0) + pub fn items(&self) -> impl Iterator { + WorkflowItem::children(&self.0) } /// Gets the input section of the workflow. @@ -48,8 +48,8 @@ impl WorkflowDefinition { } /// Gets the statements of the workflow. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } /// Gets the metadata section of the workflow. @@ -115,16 +115,16 @@ pub enum WorkflowItem { Metadata(MetadataSection), /// The item is a parameter meta section. ParameterMetadata(ParameterMetadataSection), - /// The item is a hints section. + /// The item is a workflow hints section. Hints(WorkflowHintsSection), /// The item is a private bound declaration. Declaration(BoundDecl), } -impl AstNode for WorkflowItem { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool +impl WorkflowItem { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`WorkflowItem`]. + pub fn can_cast(kind: SyntaxKind) -> bool where Self: Sized, { @@ -142,41 +142,297 @@ impl AstNode for WorkflowItem { ) } - fn cast(syntax: SyntaxNode) -> Option + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`WorkflowItem`]. + pub fn cast(syntax: SyntaxNode) -> Option where Self: Sized, { match syntax.kind() { - SyntaxKind::InputSectionNode => Some(Self::Input(InputSection(syntax))), - SyntaxKind::OutputSectionNode => Some(Self::Output(OutputSection(syntax))), - SyntaxKind::ConditionalStatementNode => { - Some(Self::Conditional(ConditionalStatement(syntax))) - } - SyntaxKind::ScatterStatementNode => Some(Self::Scatter(ScatterStatement(syntax))), - SyntaxKind::CallStatementNode => Some(Self::Call(CallStatement(syntax))), - SyntaxKind::MetadataSectionNode => Some(Self::Metadata(MetadataSection(syntax))), - SyntaxKind::ParameterMetadataSectionNode => { - Some(Self::ParameterMetadata(ParameterMetadataSection(syntax))) - } - SyntaxKind::WorkflowHintsSectionNode => Some(Self::Hints(WorkflowHintsSection(syntax))), - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), + SyntaxKind::InputSectionNode => Some(Self::Input( + InputSection::cast(syntax).expect("input section to cast"), + )), + SyntaxKind::OutputSectionNode => Some(Self::Output( + OutputSection::cast(syntax).expect("output section to cast"), + )), + SyntaxKind::ConditionalStatementNode => Some(Self::Conditional( + ConditionalStatement::cast(syntax).expect("conditional statement to cast"), + )), + SyntaxKind::ScatterStatementNode => Some(Self::Scatter( + ScatterStatement::cast(syntax).expect("scatter statement to cast"), + )), + SyntaxKind::CallStatementNode => Some(Self::Call( + CallStatement::cast(syntax).expect("call statement to cast"), + )), + SyntaxKind::MetadataSectionNode => Some(Self::Metadata( + MetadataSection::cast(syntax).expect("metadata section to cast"), + )), + SyntaxKind::ParameterMetadataSectionNode => Some(Self::ParameterMetadata( + ParameterMetadataSection::cast(syntax).expect("parameter metadata section to cast"), + )), + SyntaxKind::WorkflowHintsSectionNode => Some(Self::Hints( + WorkflowHintsSection::cast(syntax).expect("workflow hints section to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), _ => None, } } - fn syntax(&self) -> &SyntaxNode { + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Input(element) => element.syntax(), + Self::Output(element) => element.syntax(), + Self::Conditional(element) => element.syntax(), + Self::Scatter(element) => element.syntax(), + Self::Call(element) => element.syntax(), + Self::Metadata(element) => element.syntax(), + Self::ParameterMetadata(element) => element.syntax(), + Self::Hints(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`InputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Input`], then a reference to the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_input_section(&self) -> Option<&InputSection> { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`InputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Input`], then the inner + /// [`InputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_input_section(self) -> Option { + match self { + Self::Input(input_section) => Some(input_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`OutputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Output`], then a reference to the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_output_section(&self) -> Option<&OutputSection> { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`OutputSection`]. + /// + /// * If `self` is a [`WorkflowItem::Output`], then the inner + /// [`OutputSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_output_section(self) -> Option { + match self { + Self::Output(output_section) => Some(output_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Conditional`], then a reference to the + /// inner [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_conditional(&self) -> Option<&ConditionalStatement> { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Conditional`], then the inner + /// [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_conditional(self) -> Option { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Scatter`], then a reference to the + /// inner [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_scatter(&self) -> Option<&ScatterStatement> { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Scatter`], then the inner + /// [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_scatter(self) -> Option { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Call`], then a reference to the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_call(&self) -> Option<&CallStatement> { match self { - Self::Input(i) => &i.0, - Self::Output(o) => &o.0, - Self::Conditional(s) => &s.0, - Self::Scatter(s) => &s.0, - Self::Call(s) => &s.0, - Self::Metadata(m) => &m.0, - Self::ParameterMetadata(m) => &m.0, - Self::Hints(h) => &h.0, - Self::Declaration(d) => &d.0, + Self::Call(call) => Some(call), + _ => None, } } + + /// Consumes `self` and attempts to return the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowItem::Call`], then the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_call(self) -> Option { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`MetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::Metadata`], then a reference to the + /// inner [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_metadata_section(&self) -> Option<&MetadataSection> { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`MetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::Metadata`], then the inner + /// [`MetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_metadata_section(self) -> Option { + match self { + Self::Metadata(metadata_section) => Some(metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::ParameterMetadata`], then a reference + /// to the inner [`ParameterMetadataSection`] is returned wrapped in + /// [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_parameter_metadata_section(&self) -> Option<&ParameterMetadataSection> { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ParameterMetadataSection`]. + /// + /// * If `self` is a [`WorkflowItem::ParameterMetadata`], then the inner + /// [`ParameterMetadataSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_parameter_metadata_section(self) -> Option { + match self { + Self::ParameterMetadata(parameter_metadata_section) => Some(parameter_metadata_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::Hints`], then a reference to the inner + /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_hints_section(&self) -> Option<&WorkflowHintsSection> { + match self { + Self::Hints(hints_section) => Some(hints_section), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`WorkflowHintsSection`]. + /// + /// * If `self` is a [`WorkflowItem::Hints`], then the inner + /// [`WorkflowHintsSection`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_hints_section(self) -> Option { + match self { + Self::Hints(hints_section) => Some(hints_section), + _ => None, + } + } + + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowItem::Declaration`], then a reference to the + /// inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowItem::Declaration`], then the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Finds the first child that can be cast to an [`WorkflowItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`WorkflowItem`] to + /// implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) + } + + /// Finds all children that can be cast to an [`WorkflowItem`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring [`WorkflowItem`] to + /// implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) + } } /// Represents a statement in a workflow definition. @@ -193,6 +449,79 @@ pub enum WorkflowStatement { } impl WorkflowStatement { + /// Returns whether or not a [`SyntaxKind`] is able to be cast to any of the + /// underlying members within the [`WorkflowStatement`]. + pub fn can_cast(kind: SyntaxKind) -> bool + where + Self: Sized, + { + matches!( + kind, + SyntaxKind::ConditionalStatementNode + | SyntaxKind::ScatterStatementNode + | SyntaxKind::CallStatementNode + | SyntaxKind::BoundDeclNode + ) + } + + /// Attempts to cast the [`SyntaxNode`] to any of the underlying members + /// within the [`WorkflowStatement`]. + pub fn cast(syntax: SyntaxNode) -> Option + where + Self: Sized, + { + match syntax.kind() { + SyntaxKind::ConditionalStatementNode => Some(Self::Conditional( + ConditionalStatement::cast(syntax).expect("conditional statement to cast"), + )), + SyntaxKind::ScatterStatementNode => Some(Self::Scatter( + ScatterStatement::cast(syntax).expect("scatter statement to cast"), + )), + SyntaxKind::CallStatementNode => Some(Self::Call( + CallStatement::cast(syntax).expect("call statement to cast"), + )), + SyntaxKind::BoundDeclNode => Some(Self::Declaration( + BoundDecl::cast(syntax).expect("bound decl to cast"), + )), + _ => None, + } + } + + /// Gets a reference to the underlying [`SyntaxNode`]. + pub fn syntax(&self) -> &SyntaxNode { + match self { + Self::Conditional(element) => element.syntax(), + Self::Scatter(element) => element.syntax(), + Self::Call(element) => element.syntax(), + Self::Declaration(element) => element.syntax(), + } + } + + /// Attempts to get a reference to the inner [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Conditional`], then a reference to + /// the inner [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_conditional(&self) -> Option<&ConditionalStatement> { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ConditionalStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Conditional`], then the inner + /// [`ConditionalStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_conditional(self) -> Option { + match self { + Self::Conditional(conditional) => Some(conditional), + _ => None, + } + } + /// Unwraps the statement into a conditional statement. /// /// # Panics @@ -205,6 +534,31 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Scatter`], then a reference to the + /// inner [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_scatter(&self) -> Option<&ScatterStatement> { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`ScatterStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Scatter`], then the inner + /// [`ScatterStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_scatter(self) -> Option { + match self { + Self::Scatter(scatter) => Some(scatter), + _ => None, + } + } + /// Unwraps the statement into a scatter statement. /// /// # Panics @@ -217,6 +571,31 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Call`], then a reference to the + /// inner [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_call(&self) -> Option<&CallStatement> { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`CallStatement`]. + /// + /// * If `self` is a [`WorkflowStatement::Call`], then the inner + /// [`CallStatement`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_call(self) -> Option { + match self { + Self::Call(call) => Some(call), + _ => None, + } + } + /// Unwraps the statement into a call statement. /// /// # Panics @@ -229,57 +608,59 @@ impl WorkflowStatement { } } + /// Attempts to get a reference to the inner [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowStatement::Declaration`], then a reference to + /// the inner [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn as_declaration(&self) -> Option<&BoundDecl> { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + + /// Consumes `self` and attempts to return the inner + /// [`BoundDecl`]. + /// + /// * If `self` is a [`WorkflowStatement::Declaration`], then the inner + /// [`BoundDecl`] is returned wrapped in [`Some`]. + /// * Else, [`None`] is returned. + pub fn into_declaration(self) -> Option { + match self { + Self::Declaration(declaration) => Some(declaration), + _ => None, + } + } + /// Unwraps the statement into a bound declaration. /// /// # Panics /// /// Panics if the statement is not a bound declaration. - pub fn unwrap_bound_decl(self) -> BoundDecl { + pub fn unwrap_declaration(self) -> BoundDecl { match self { - Self::Declaration(stmt) => stmt, + Self::Declaration(declaration) => declaration, _ => panic!("not a bound declaration"), } } -} - -impl AstNode for WorkflowStatement { - type Language = WorkflowDescriptionLanguage; - - fn can_cast(kind: SyntaxKind) -> bool - where - Self: Sized, - { - matches!( - kind, - SyntaxKind::ConditionalStatementNode - | SyntaxKind::ScatterStatementNode - | SyntaxKind::CallStatementNode - | SyntaxKind::BoundDeclNode - ) - } - fn cast(syntax: SyntaxNode) -> Option - where - Self: Sized, - { - match syntax.kind() { - SyntaxKind::ConditionalStatementNode => { - Some(Self::Conditional(ConditionalStatement(syntax))) - } - SyntaxKind::ScatterStatementNode => Some(Self::Scatter(ScatterStatement(syntax))), - SyntaxKind::CallStatementNode => Some(Self::Call(CallStatement(syntax))), - SyntaxKind::BoundDeclNode => Some(Self::Declaration(BoundDecl(syntax))), - _ => None, - } + /// Finds the first child that can be cast to an [`WorkflowStatement`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::child`] without requiring [`WorkflowStatement`] + /// to implement the `AstNode` trait. + pub fn child(syntax: &SyntaxNode) -> Option { + syntax.children().find_map(Self::cast) } - fn syntax(&self) -> &SyntaxNode { - match self { - Self::Conditional(s) => &s.0, - Self::Scatter(s) => &s.0, - Self::Call(s) => &s.0, - Self::Declaration(d) => &d.0, - } + /// Finds all children that can be cast to an [`WorkflowStatement`]. + /// + /// This is meant to emulate the functionality of + /// [`rowan::ast::support::children`] without requiring + /// [`WorkflowStatement`] to implement the `AstNode` trait. + pub fn children(syntax: &SyntaxNode) -> impl Iterator { + syntax.children().filter_map(Self::cast) } } @@ -290,12 +671,12 @@ pub struct ConditionalStatement(pub(crate) SyntaxNode); impl ConditionalStatement { /// Gets the expression of the conditional statement pub fn expr(&self) -> Expr { - child(&self.0).expect("expected a conditional expression") + Expr::child(&self.0).expect("expected a conditional expression") } /// Gets the statements of the conditional body. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } } @@ -336,12 +717,12 @@ impl ScatterStatement { /// Gets the scatter expression. pub fn expr(&self) -> Expr { - child(&self.0).expect("expected a scatter expression") + Expr::child(&self.0).expect("expected a scatter expression") } /// Gets the statements of the scatter body. - pub fn statements(&self) -> AstChildren { - children(&self.0) + pub fn statements(&self) -> impl Iterator { + WorkflowStatement::children(&self.0) } } @@ -547,7 +928,7 @@ impl CallInputItem { /// The optional expression for the input. pub fn expr(&self) -> Option { - child(&self.0) + Expr::child(&self.0) } /// Gets the call statement for the call input item. diff --git a/wdl-ast/src/visitor.rs b/wdl-ast/src/visitor.rs index 0c8bd9de..48653aa0 100644 --- a/wdl-ast/src/visitor.rs +++ b/wdl-ast/src/visitor.rs @@ -23,7 +23,6 @@ use rowan::WalkEvent; -use crate::AstNode; use crate::AstToken as _; use crate::Comment; use crate::Document; @@ -426,11 +425,16 @@ pub(crate) fn visit(root: &SyntaxNode, state: &mut V::State, visitor SyntaxKind::LiteralNullNode => { // Skip these nodes as they're part of a metadata section } - k if Expr::can_cast(k) => visitor.expr( - state, - reason, - &Expr::cast(element.into_node().unwrap()).expect("node should cast"), - ), + k if Expr::can_cast(k) => { + visitor.expr( + state, + reason, + &Expr::cast(element.into_node().expect( + "any element that is able to be turned into an expr should be a node", + )) + .expect("expr should be built"), + ) + } SyntaxKind::LiteralMapItemNode | SyntaxKind::LiteralObjectItemNode | SyntaxKind::LiteralStructItemNode diff --git a/wdl-ast/tests/registry.rs b/wdl-ast/tests/registry.rs new file mode 100644 index 00000000..49df23f8 --- /dev/null +++ b/wdl-ast/tests/registry.rs @@ -0,0 +1,375 @@ +//! The AST node registry. +//! +//! The AST node registry was introduced only to ensure that all nodes in the +//! concrete syntax tree have one and _only_ one analogous AST entity. +//! +//! The reason this is important to ensure statically is because this assumption +//! of one-to-one mapping between elements within the two types of tree is +//! relied upon in downstream crates. For example, formatting works by +//! traversing the CST of a WDL document and attempting to cast a node to any +//! AST type that can then be recursively formatted. +//! +//! Furthermore, this is just a good invariant to uphold to ensure in general in +//! that the code remains straightforward to reason about (a CST element that +//! can map to multiple AST elements in different contexts is inherently +//! confusing). + +use std::any::type_name; +use std::collections::HashMap; +use std::sync::LazyLock; + +use wdl_ast::AstNode; +use wdl_ast::AstToken; +use wdl_ast::Comment; +use wdl_ast::Ident; +use wdl_ast::SyntaxKind; +use wdl_ast::Version; +use wdl_ast::VersionStatement; +use wdl_ast::Whitespace; +use wdl_ast::v1; +use wdl_grammar::ALL_SYNTAX_KIND; +use wdl_grammar::WorkflowDescriptionLanguage; + +/// A private module for sealed traits. +/// +/// The traits are sealed because we want to ensure that we reserve the right to +/// implement them in the future unhindered without introducing breaking +/// changes. +mod private { + /// The sealed trait for [`AstNodeRegistrant`](super::AstNodeRegistrant). + pub trait SealedNode {} + + /// The sealed trait for [`AstTokenRegistrant`](super::AstTokenRegistrant). + pub trait SealedToken {} +} + +/// A registry of all known mappings between AST elements (individual Rust types +/// that implement the [`AstNode`] trait or [`AstToken`] trait) and the CST +/// elements they can be cast from (via [`SyntaxKind`]\(s)). +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +static REGISTRY: LazyLock>> = LazyLock::new(|| { + let types = vec![ + Comment::register(), + Ident::register(), + v1::AccessExpr::register(), + v1::AdditionExpr::register(), + v1::AfterKeyword::register(), + v1::AliasKeyword::register(), + v1::ArrayType::register(), + v1::ArrayTypeKeyword::register(), + v1::AsKeyword::register(), + v1::Assignment::register(), + v1::Ast::register(), + v1::Asterisk::register(), + v1::BooleanTypeKeyword::register(), + v1::BoundDecl::register(), + v1::CallAfter::register(), + v1::CallAlias::register(), + v1::CallExpr::register(), + v1::CallInputItem::register(), + v1::CallKeyword::register(), + v1::CallStatement::register(), + v1::CallTarget::register(), + v1::CloseBrace::register(), + v1::CloseBracket::register(), + v1::CloseHeredoc::register(), + v1::CloseParen::register(), + v1::Colon::register(), + v1::Comma::register(), + v1::CommandKeyword::register(), + v1::CommandSection::register(), + v1::CommandText::register(), + v1::ConditionalStatement::register(), + v1::DefaultOption::register(), + v1::DirectoryTypeKeyword::register(), + v1::DivisionExpr::register(), + v1::Dot::register(), + v1::DoubleQuote::register(), + v1::ElseKeyword::register(), + v1::Equal::register(), + v1::EqualityExpr::register(), + v1::Exclamation::register(), + v1::Exponentiation::register(), + v1::ExponentiationExpr::register(), + v1::FalseKeyword::register(), + v1::FileTypeKeyword::register(), + v1::Float::register(), + v1::FloatTypeKeyword::register(), + v1::Greater::register(), + v1::GreaterEqual::register(), + v1::GreaterEqualExpr::register(), + v1::GreaterExpr::register(), + v1::HintsKeyword::register(), + v1::IfExpr::register(), + v1::IfKeyword::register(), + v1::ImportAlias::register(), + v1::ImportKeyword::register(), + v1::ImportStatement::register(), + v1::IndexExpr::register(), + v1::InequalityExpr::register(), + v1::InKeyword::register(), + v1::InputKeyword::register(), + v1::InputSection::register(), + v1::Integer::register(), + v1::IntTypeKeyword::register(), + v1::Less::register(), + v1::LessEqual::register(), + v1::LessEqualExpr::register(), + v1::LessExpr::register(), + v1::LiteralArray::register(), + v1::LiteralBoolean::register(), + v1::LiteralFloat::register(), + v1::LiteralHints::register(), + v1::LiteralHintsItem::register(), + v1::LiteralInput::register(), + v1::LiteralInputItem::register(), + v1::LiteralInteger::register(), + v1::LiteralMap::register(), + v1::LiteralMapItem::register(), + v1::LiteralNone::register(), + v1::LiteralNull::register(), + v1::LiteralObject::register(), + v1::LiteralObjectItem::register(), + v1::LiteralOutput::register(), + v1::LiteralOutputItem::register(), + v1::LiteralPair::register(), + v1::LiteralString::register(), + v1::LiteralStruct::register(), + v1::LiteralStructItem::register(), + v1::LogicalAnd::register(), + v1::LogicalAndExpr::register(), + v1::LogicalNotExpr::register(), + v1::LogicalOr::register(), + v1::LogicalOrExpr::register(), + v1::MapType::register(), + v1::MapTypeKeyword::register(), + v1::MetadataArray::register(), + v1::MetadataObject::register(), + v1::MetadataObjectItem::register(), + v1::MetadataSection::register(), + v1::MetaKeyword::register(), + v1::Minus::register(), + v1::ModuloExpr::register(), + v1::MultiplicationExpr::register(), + v1::NameRef::register(), + v1::NegationExpr::register(), + v1::NoneKeyword::register(), + v1::NotEqual::register(), + v1::NullKeyword::register(), + v1::ObjectKeyword::register(), + v1::ObjectType::register(), + v1::ObjectTypeKeyword::register(), + v1::OpenBrace::register(), + v1::OpenBracket::register(), + v1::OpenHeredoc::register(), + v1::OpenParen::register(), + v1::OutputKeyword::register(), + v1::OutputSection::register(), + v1::PairType::register(), + v1::PairTypeKeyword::register(), + v1::ParameterMetadataSection::register(), + v1::ParameterMetaKeyword::register(), + v1::ParenthesizedExpr::register(), + v1::Percent::register(), + v1::Placeholder::register(), + v1::PlaceholderOpen::register(), + v1::Plus::register(), + v1::PrimitiveType::register(), + v1::QuestionMark::register(), + v1::RequirementsItem::register(), + v1::RequirementsKeyword::register(), + v1::RequirementsSection::register(), + v1::RuntimeItem::register(), + v1::RuntimeKeyword::register(), + v1::RuntimeSection::register(), + v1::ScatterKeyword::register(), + v1::ScatterStatement::register(), + v1::SepOption::register(), + v1::SingleQuote::register(), + v1::Slash::register(), + v1::StringText::register(), + v1::StringTypeKeyword::register(), + v1::StructDefinition::register(), + v1::StructKeyword::register(), + v1::SubtractionExpr::register(), + v1::TaskDefinition::register(), + v1::TaskHintsItem::register(), + v1::TaskHintsSection::register(), + v1::TaskKeyword::register(), + v1::ThenKeyword::register(), + v1::TrueFalseOption::register(), + v1::TrueKeyword::register(), + v1::TypeRef::register(), + v1::UnboundDecl::register(), + v1::Unknown::register(), + v1::VersionKeyword::register(), + v1::WorkflowDefinition::register(), + v1::WorkflowHintsItem::register(), + v1::WorkflowHintsSection::register(), + v1::WorkflowHintsArray::register(), + v1::WorkflowHintsObject::register(), + v1::WorkflowHintsObjectItem::register(), + v1::WorkflowKeyword::register(), + Version::register(), + VersionStatement::register(), + Whitespace::register(), + ]; + + let mut result = HashMap::new(); + + // NOTE: this is done this way instead of simply collecting into a + // [`HashMap`] to ensure on the fly that no keys are duplicated. + for (r#type, kinds) in types { + if result.contains_key(&r#type) { + panic!("the `{:?}` key is duplicated", r#type); + } + + result.insert(r#type, kinds); + } + + result +}); + +/// Computes the inverse of the registry. +/// +/// In other words, maps CST elements—dynamically typed as [`SyntaxKind`]s—to +/// the corresponding AST element(s) that can cast from them. +/// +/// This is useful for ensuring that AST elements have a one-to-one mapping with +/// CST element kinds. +fn inverse() -> HashMap> { + let mut result = HashMap::>::new(); + + for (key, values) in REGISTRY.iter() { + for value in values.into_iter() { + result.entry(value.to_owned()).or_default().push(*key); + } + } + + result + .into_iter() + .map(|(key, values)| (key, values.into_boxed_slice())) + .collect() +} + +trait AstNodeRegistrant: private::SealedNode { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST node type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl + 'static> private::SealedNode for T {} + +impl + 'static> AstNodeRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +trait AstTokenRegistrant: private::SealedToken { + /// Returns the [`SyntaxKind`]\(s) that can be cast into this AST token + /// type. + fn register() -> (&'static str, Box<[SyntaxKind]>); +} + +impl private::SealedToken for T {} + +impl AstTokenRegistrant for T { + fn register() -> (&'static str, Box<[SyntaxKind]>) { + ( + type_name::(), + ALL_SYNTAX_KIND + .iter() + .filter(|kind| T::can_cast(**kind)) + .cloned() + .collect::>() + .into_boxed_slice(), + ) + } +} + +/// This test ensures there is a one-to-one mapping between CST elements +/// ([`SyntaxKind`]\(s)) and AST elements (Rust types that implement +/// the [`AstNode`] trait or the [`AstToken`] trait). +/// +/// The importance of this is described at the top of the module. +#[test] +fn ensures_one_to_one() { + let mut missing = Vec::new(); + let mut multiple = Vec::new(); + + let inverse_registry = inverse(); + + for kind in ALL_SYNTAX_KIND { + // NOTE: these are symbolic elements and should not be included in + // the analysis here. + if kind.is_symbolic() { + continue; + } + + match inverse_registry.get(kind) { + // SAFETY: because this is an inverse registry, only + // [`SyntaxKind`]s with at least one registered implementing + // type would be registered here. Thus, by design of the + // `inverse()` method, this will never occur. + Some(values) if values.is_empty() => { + unreachable!("the inverse registry should never contain an empty array") + } + Some(values) if values.len() > 1 => multiple.push((kind, values)), + None => missing.push(kind), + // NOTE: this is essentially only if the values exist and the + // length is 1—in that case, there is a one to one mapping, + // which is what we would like the case to be. + _ => {} + } + } + + if !missing.is_empty() { + let mut missing = missing + .into_iter() + .map(|kind| format!("{:?}", kind)) + .collect::>(); + missing.sort(); + + panic!( + "detected `SyntaxKind`s without an associated `AstNode`/`AstToken` (n={}): {}", + missing.len(), + missing.join(", ") + ) + } + + if !multiple.is_empty() { + multiple.sort(); + let mut multiple = multiple + .into_iter() + .map(|(kind, types)| { + let mut types = types.clone(); + types.sort(); + + let mut result = format!("== {:?} ==", kind); + for r#type in types { + result.push_str("\n* "); + result.push_str(r#type); + } + + result + }) + .collect::>(); + multiple.sort(); + + panic!( + "detected `SyntaxKind`s associated with multiple `AstNode`s/`AstToken`s (n={}):\n\n{}", + multiple.len(), + multiple.join("\n\n") + ) + } +} diff --git a/wdl-ast/tests/validation.rs b/wdl-ast/tests/validation.rs index 071a707e..eb460ee5 100644 --- a/wdl-ast/tests/validation.rs +++ b/wdl-ast/tests/validation.rs @@ -31,6 +31,7 @@ use wdl_ast::Diagnostic; use wdl_ast::Document; use wdl_ast::Validator; +/// Finds tests for grammar validation. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -58,6 +59,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a result. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -68,6 +70,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -84,6 +87,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a single result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -115,6 +119,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path) diff --git a/wdl-format/CHANGELOG.md b/wdl-format/CHANGELOG.md new file mode 100644 index 00000000..5ff4dfed --- /dev/null +++ b/wdl-format/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + +* Adds the initial version of the crate. diff --git a/wdl-format/Cargo.toml b/wdl-format/Cargo.toml new file mode 100644 index 00000000..532b7b66 --- /dev/null +++ b/wdl-format/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "wdl-format" +version = "0.1.0" +license.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true + +[dependencies] +wdl-ast = { path = "../wdl-ast", version = "0.7.1", features = ["codespan"] } +nonempty.workspace = true + +[dev-dependencies] +pretty_assertions.workspace = true +approx = { workspace = true } +rayon = { workspace = true } +colored = { workspace = true } +codespan-reporting = { workspace = true } + +[lints] +workspace = true + +[[test]] +name = "format" +harness = false diff --git a/wdl-format/src/config.rs b/wdl-format/src/config.rs new file mode 100644 index 00000000..942715f9 --- /dev/null +++ b/wdl-format/src/config.rs @@ -0,0 +1,21 @@ +//! Formatting configuration. + +mod builder; +mod indent; + +pub use builder::Builder; +pub use indent::Indent; + +/// Configuration for formatting. +#[derive(Debug, Default)] +pub struct Config { + /// The number of characters to indent. + indent: Indent, +} + +impl Config { + /// Gets the indent level of the configuration. + pub fn indent(&self) -> Indent { + self.indent + } +} diff --git a/wdl-format/src/config/builder.rs b/wdl-format/src/config/builder.rs new file mode 100644 index 00000000..d9fe0195 --- /dev/null +++ b/wdl-format/src/config/builder.rs @@ -0,0 +1,61 @@ +//! Builders for formatting configuration. + +use crate::Config; +use crate::config::Indent; + +/// An error related to a [`Builder`]. +#[derive(Debug)] +pub enum Error { + /// A required value was missing for a builder field. + Missing(&'static str), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Missing(field) => write!( + f, + "missing required value for '{field}' in a formatter configuration builder" + ), + } + } +} + +impl std::error::Error for Error {} + +/// A [`Result`](std::result::Result) with an [`Error`]. +pub type Result = std::result::Result; + +/// A builder for a [`Config`]. +pub struct Builder { + /// The number of characters to indent. + indent: Option, +} + +impl Builder { + /// Sets the indentation level. + /// + /// # Notes + /// + /// This silently overwrites any previously provided value for the + /// indentation level. + pub fn indent(mut self, indent: Indent) -> Self { + self.indent = Some(indent); + self + } + + /// Consumes `self` and attempts to build a [`Config`]. + pub fn try_build(self) -> Result { + let indent = self.indent.ok_or(Error::Missing("indent"))?; + + Ok(Config { indent }) + } +} + +impl Default for Builder { + fn default() -> Self { + Self { + indent: Some(Default::default()), + } + } +} diff --git a/wdl-format/src/config/indent.rs b/wdl-format/src/config/indent.rs new file mode 100644 index 00000000..549f96f7 --- /dev/null +++ b/wdl-format/src/config/indent.rs @@ -0,0 +1,22 @@ +//! Indentation within formatting configuration. + +use std::num::NonZeroUsize; + +/// The default indentation. +pub const DEFAULT_INDENT: Indent = Indent::Spaces(unsafe { NonZeroUsize::new_unchecked(4) }); + +/// An indentation level. +#[derive(Clone, Copy, Debug)] +pub enum Indent { + /// Tabs. + Tabs(NonZeroUsize), + + /// Spaces. + Spaces(NonZeroUsize), +} + +impl Default for Indent { + fn default() -> Self { + DEFAULT_INDENT + } +} diff --git a/wdl-format/src/element.rs b/wdl-format/src/element.rs new file mode 100644 index 00000000..011adf85 --- /dev/null +++ b/wdl-format/src/element.rs @@ -0,0 +1,274 @@ +//! Elements used during formatting. + +use std::iter::Peekable; + +use nonempty::NonEmpty; +use wdl_ast::Element; +use wdl_ast::Node; + +pub mod node; + +/// An iterator that asserts that all items have been consumed when dropped. +pub struct AssertConsumedIter(Peekable); + +impl AssertConsumedIter +where + I: Iterator, +{ + /// Creates a new [`AssertConsumedIter`]. + pub fn new(iter: I) -> Self { + Self(iter.peekable()) + } +} + +impl Iterator for AssertConsumedIter +where + I: Iterator, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +impl Drop for AssertConsumedIter +where + I: Iterator, +{ + fn drop(&mut self) { + assert!( + self.0.peek().is_none(), + "not all iterator items were consumed!" + ); + } +} + +/// A formattable element. +#[derive(Clone, Debug)] +pub struct FormatElement { + /// The inner element. + element: Element, + + /// Children as format elements. + children: Option>>, +} + +impl FormatElement { + /// Creates a new [`FormatElement`]. + pub fn new(element: Element, children: Option>>) -> Self { + Self { element, children } + } + + /// Gets the inner element. + pub fn element(&self) -> &Element { + &self.element + } + + /// Gets the children for this node. + pub fn children(&self) -> Option>> { + self.children + .as_ref() + // NOTE: we wrap the iterator in an [`AssertConsumedIter`] to ensure + // that no children are ever forgotten to be formatted (they must be + // explicitly consumed and dropped). + .map(|children| AssertConsumedIter::new(children.iter().map(|c| c.as_ref()))) + } +} + +/// An extension trait for formatting [`Element`]s. +pub trait AstElementFormatExt { + /// Consumes `self` and returns the [`Element`] as a [`FormatElement`]. + fn into_format_element(self) -> FormatElement; +} + +impl AstElementFormatExt for Element { + fn into_format_element(self) -> FormatElement + where + Self: Sized, + { + let children = match &self { + Element::Node(node) => collate(node), + Element::Token(_) => None, + }; + + FormatElement::new(self, children) + } +} + +/// Collates the children of a particular node. +/// +/// This function ignores trivia. +fn collate(node: &Node) -> Option>> { + let mut results = Vec::new(); + let stream = node.syntax().children_with_tokens().filter_map(|syntax| { + if syntax.kind().is_trivia() { + None + } else { + Some(Element::cast(syntax)) + } + }); + + for element in stream { + let children = match element { + Element::Node(ref node) => collate(node), + Element::Token(_) => None, + }; + + results.push(Box::new(FormatElement { element, children })); + } + + if !results.is_empty() { + let mut results = results.into_iter(); + // SAFETY: we just checked to ensure that `results` wasn't empty, so + // this will always unwrap. + let mut children = NonEmpty::new(results.next().unwrap()); + children.extend(results); + Some(children) + } else { + None + } +} + +#[cfg(test)] +mod tests { + use wdl_ast::Document; + use wdl_ast::Node; + use wdl_ast::SyntaxKind; + + use crate::element::node::AstNodeFormatExt; + + #[test] + fn smoke() { + let (document, diagnostics) = Document::parse( + "## WDL +version 1.2 # This is a comment attached to the version. + +# This is a comment attached to the task keyword. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task close brace. + +# This is a comment attached to the workflow keyword. +workflow bar # This is an inline comment on the workflow ident. +{ + # This is attached to the call keyword. + call foo {} +} # This is an inline comment on the workflow close brace.", + ); + + assert!(diagnostics.is_empty()); + let document = document.ast().into_v1().unwrap(); + + let format_element = Node::Ast(document).into_format_element(); + let mut children = format_element.children().unwrap(); + + // Version statement. + + let version = children.next().expect("version statement element"); + assert_eq!( + version.element().syntax().kind(), + SyntaxKind::VersionStatementNode + ); + + let mut version_children = version.children().unwrap(); + assert_eq!( + version_children.next().unwrap().element().kind(), + SyntaxKind::VersionKeyword + ); + assert_eq!( + version_children.next().unwrap().element().kind(), + SyntaxKind::Version + ); + + // Task Definition. + + let task = children.next().expect("task element"); + assert_eq!( + task.element().syntax().kind(), + SyntaxKind::TaskDefinitionNode + ); + + // Children. + + let mut task_children = task.children().unwrap(); + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::TaskKeyword + ); + + let ident = task_children.next().unwrap(); + assert_eq!(ident.element().kind(), SyntaxKind::Ident); + + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::OpenBrace + ); + assert_eq!( + task_children.next().unwrap().element().kind(), + SyntaxKind::CloseBrace + ); + + assert!(task_children.next().is_none()); + + // Workflow Definition. + + let workflow = children.next().expect("workflow element"); + assert_eq!( + workflow.element().syntax().kind(), + SyntaxKind::WorkflowDefinitionNode + ); + + // Children. + + let mut workflow_children = workflow.children().unwrap(); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::WorkflowKeyword + ); + + let ident = workflow_children.next().unwrap(); + assert_eq!(ident.element().kind(), SyntaxKind::Ident); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::OpenBrace + ); + + let call = workflow_children.next().unwrap(); + assert_eq!(call.element().kind(), SyntaxKind::CallStatementNode); + + assert_eq!( + workflow_children.next().unwrap().element().kind(), + SyntaxKind::CloseBrace + ); + + assert!(workflow_children.next().is_none()); + } + + #[test] + #[should_panic] + fn unconsumed_children_nodes_panic() { + let (document, diagnostics) = Document::parse( + "## WDL +version 1.2 # This is a comment attached to the version. + +# This is a comment attached to the task keyword. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task close brace.", + ); + + assert!(diagnostics.is_empty()); + let document = document.ast().into_v1().unwrap(); + + let format_element = Node::Ast(document).into_format_element(); + fn inner(format_element: &crate::element::FormatElement) { + let mut _children = format_element.children().unwrap(); + } + inner(&format_element); + } +} diff --git a/wdl-format/src/element/node.rs b/wdl-format/src/element/node.rs new file mode 100644 index 00000000..9ef74309 --- /dev/null +++ b/wdl-format/src/element/node.rs @@ -0,0 +1,23 @@ +//! A wrapper for formatting [`AstNode`]s. + +use wdl_ast::Element; +use wdl_ast::Node; + +use crate::element::FormatElement; +use crate::element::collate; + +/// An extension trait for formatting [`Node`]s. +pub trait AstNodeFormatExt { + /// Consumes `self` and returns the [`Node`] as a [`FormatElement`]. + fn into_format_element(self) -> FormatElement; +} + +impl AstNodeFormatExt for Node { + fn into_format_element(self) -> FormatElement + where + Self: Sized, + { + let children = collate(&self); + FormatElement::new(Element::Node(self), children) + } +} diff --git a/wdl-format/src/lib.rs b/wdl-format/src/lib.rs new file mode 100644 index 00000000..545d5b01 --- /dev/null +++ b/wdl-format/src/lib.rs @@ -0,0 +1,265 @@ +//! Formatting facilities for WDL. + +pub mod config; +pub mod element; +mod token; +pub mod v1; + +use std::fmt::Write; + +pub use config::Config; +pub use token::*; +use wdl_ast::Element; +use wdl_ast::Node as AstNode; + +use crate::element::FormatElement; + +/// Newline constant used for formatting on windows platforms. +#[cfg(windows)] +pub const NEWLINE: &str = "\r\n"; +/// Newline constant used for formatting on non-windows platforms. +#[cfg(not(windows))] +pub const NEWLINE: &str = "\n"; + +/// A space. +pub const SPACE: &str = " "; + +/// Returns exactly one entity from an enumerable list of entities (usually a +/// [`Vec`]). +#[macro_export] +macro_rules! exactly_one { + ($entities:expr, $name:expr) => { + match $entities.len() { + 0 => unreachable!("we should never have zero {}", $name), + // SAFETY: we just checked to ensure that exactly + // one element exists, so this will always unwrap. + 1 => $entities.pop().unwrap(), + _ => unreachable!("we should not have two or more {}", $name), + } + }; +} + +/// An element that can be written to a token stream. +pub trait Writable { + /// Writes the element to the token stream. + fn write(&self, stream: &mut TokenStream); +} + +impl Writable for &FormatElement { + fn write(&self, stream: &mut TokenStream) { + match self.element() { + Element::Node(node) => match node { + AstNode::AccessExpr(_) => v1::expr::format_access_expr(self, stream), + AstNode::AdditionExpr(_) => v1::expr::format_addition_expr(self, stream), + AstNode::ArrayType(_) => v1::decl::format_array_type(self, stream), + AstNode::Ast(_) => v1::format_ast(self, stream), + AstNode::BoundDecl(_) => v1::decl::format_bound_decl(self, stream), + AstNode::CallAfter(_) => v1::workflow::call::format_call_after(self, stream), + AstNode::CallAlias(_) => v1::workflow::call::format_call_alias(self, stream), + AstNode::CallExpr(_) => v1::expr::format_call_expr(self, stream), + AstNode::CallInputItem(_) => { + v1::workflow::call::format_call_input_item(self, stream) + } + AstNode::CallStatement(_) => { + v1::workflow::call::format_call_statement(self, stream) + } + AstNode::CallTarget(_) => v1::workflow::call::format_call_target(self, stream), + AstNode::CommandSection(_) => v1::task::format_command_section(self, stream), + AstNode::ConditionalStatement(_) => { + v1::workflow::format_conditional_statement(self, stream) + } + AstNode::DefaultOption(_) => v1::expr::format_default_option(self, stream), + AstNode::DivisionExpr(_) => v1::expr::format_division_expr(self, stream), + AstNode::EqualityExpr(_) => v1::expr::format_equality_expr(self, stream), + AstNode::ExponentiationExpr(_) => { + v1::expr::format_exponentiation_expr(self, stream) + } + AstNode::GreaterEqualExpr(_) => v1::expr::format_greater_equal_expr(self, stream), + AstNode::GreaterExpr(_) => v1::expr::format_greater_expr(self, stream), + AstNode::IfExpr(_) => v1::expr::format_if_expr(self, stream), + AstNode::ImportAlias(_) => v1::import::format_import_alias(self, stream), + AstNode::ImportStatement(_) => v1::import::format_import_statement(self, stream), + AstNode::IndexExpr(_) => v1::expr::format_index_expr(self, stream), + AstNode::InequalityExpr(_) => v1::expr::format_inequality_expr(self, stream), + AstNode::InputSection(_) => v1::format_input_section(self, stream), + AstNode::LessEqualExpr(_) => v1::expr::format_less_equal_expr(self, stream), + AstNode::LessExpr(_) => v1::expr::format_less_expr(self, stream), + AstNode::LiteralArray(_) => v1::expr::format_literal_array(self, stream), + AstNode::LiteralBoolean(_) => v1::expr::format_literal_boolean(self, stream), + AstNode::LiteralFloat(_) => v1::expr::format_literal_float(self, stream), + AstNode::LiteralHints(_) => v1::format_literal_hints(self, stream), + AstNode::LiteralHintsItem(_) => v1::format_literal_hints_item(self, stream), + AstNode::LiteralInput(_) => v1::format_literal_input(self, stream), + AstNode::LiteralInputItem(_) => v1::format_literal_input_item(self, stream), + AstNode::LiteralInteger(_) => v1::expr::format_literal_integer(self, stream), + AstNode::LiteralMap(_) => v1::expr::format_literal_map(self, stream), + AstNode::LiteralMapItem(_) => v1::expr::format_literal_map_item(self, stream), + AstNode::LiteralNone(_) => v1::expr::format_literal_none(self, stream), + AstNode::LiteralNull(_) => v1::meta::format_literal_null(self, stream), + AstNode::LiteralObject(_) => v1::expr::format_literal_object(self, stream), + AstNode::LiteralObjectItem(_) => v1::expr::format_literal_object_item(self, stream), + AstNode::LiteralOutput(_) => v1::format_literal_output(self, stream), + AstNode::LiteralOutputItem(_) => v1::format_literal_output_item(self, stream), + AstNode::LiteralPair(_) => v1::expr::format_literal_pair(self, stream), + AstNode::LiteralString(_) => v1::expr::format_literal_string(self, stream), + AstNode::LiteralStruct(_) => v1::r#struct::format_literal_struct(self, stream), + AstNode::LiteralStructItem(_) => { + v1::r#struct::format_literal_struct_item(self, stream) + } + AstNode::LogicalAndExpr(_) => v1::expr::format_logical_and_expr(self, stream), + AstNode::LogicalNotExpr(_) => v1::expr::format_logical_not_expr(self, stream), + AstNode::LogicalOrExpr(_) => v1::expr::format_logical_or_expr(self, stream), + AstNode::MapType(_) => v1::decl::format_map_type(self, stream), + AstNode::MetadataArray(_) => v1::meta::format_metadata_array(self, stream), + AstNode::MetadataObject(_) => v1::meta::format_metadata_object(self, stream), + AstNode::MetadataObjectItem(_) => { + v1::meta::format_metadata_object_item(self, stream) + } + AstNode::MetadataSection(_) => v1::meta::format_metadata_section(self, stream), + AstNode::ModuloExpr(_) => v1::expr::format_modulo_expr(self, stream), + AstNode::MultiplicationExpr(_) => { + v1::expr::format_multiplication_expr(self, stream) + } + AstNode::NameRef(_) => v1::expr::format_name_ref(self, stream), + AstNode::NegationExpr(_) => v1::expr::format_negation_expr(self, stream), + AstNode::OutputSection(_) => v1::format_output_section(self, stream), + AstNode::PairType(_) => v1::decl::format_pair_type(self, stream), + AstNode::ObjectType(_) => v1::decl::format_object_type(self, stream), + AstNode::ParameterMetadataSection(_) => { + v1::meta::format_parameter_metadata_section(self, stream) + } + AstNode::ParenthesizedExpr(_) => v1::expr::format_parenthesized_expr(self, stream), + AstNode::Placeholder(_) => v1::expr::format_placeholder(self, stream), + AstNode::PrimitiveType(_) => v1::decl::format_primitive_type(self, stream), + AstNode::RequirementsItem(_) => v1::task::format_requirements_item(self, stream), + AstNode::RequirementsSection(_) => { + v1::task::format_requirements_section(self, stream) + } + AstNode::RuntimeItem(_) => v1::task::format_runtime_item(self, stream), + AstNode::RuntimeSection(_) => v1::task::format_runtime_section(self, stream), + AstNode::ScatterStatement(_) => { + v1::workflow::format_scatter_statement(self, stream) + } + AstNode::SepOption(_) => v1::expr::format_sep_option(self, stream), + AstNode::StructDefinition(_) => { + v1::r#struct::format_struct_definition(self, stream) + } + AstNode::SubtractionExpr(_) => v1::expr::format_subtraction_expr(self, stream), + AstNode::TaskDefinition(_) => v1::task::format_task_definition(self, stream), + AstNode::TaskHintsItem(_) => v1::task::format_task_hints_item(self, stream), + AstNode::TaskHintsSection(_) => v1::task::format_task_hints_section(self, stream), + AstNode::TrueFalseOption(_) => v1::expr::format_true_false_option(self, stream), + AstNode::TypeRef(_) => v1::decl::format_type_ref(self, stream), + AstNode::UnboundDecl(_) => v1::decl::format_unbound_decl(self, stream), + AstNode::VersionStatement(_) => v1::format_version_statement(self, stream), + AstNode::WorkflowDefinition(_) => { + v1::workflow::format_workflow_definition(self, stream) + } + AstNode::WorkflowHintsArray(_) => { + v1::workflow::format_workflow_hints_array(self, stream) + } + AstNode::WorkflowHintsItem(_) => { + v1::workflow::format_workflow_hints_item(self, stream) + } + AstNode::WorkflowHintsObject(_) => { + v1::workflow::format_workflow_hints_object(self, stream) + } + AstNode::WorkflowHintsObjectItem(_) => { + v1::workflow::format_workflow_hints_object_item(self, stream) + } + AstNode::WorkflowHintsSection(_) => { + v1::workflow::format_workflow_hints_section(self, stream) + } + }, + Element::Token(token) => { + stream.push_ast_token(token); + } + } + } +} + +/// A formatter. +#[derive(Debug, Default)] +pub struct Formatter { + /// The configuration. + config: Config, +} + +impl Formatter { + /// Creates a new formatter. + pub fn new(config: Config) -> Self { + Self { config } + } + + /// Gets the configuration for this formatter. + pub fn config(&self) -> &Config { + &self.config + } + + /// Formats an element. + pub fn format(&self, element: W) -> std::result::Result { + let mut result = String::new(); + + for token in self.to_stream(element) { + write!(result, "{token}", token = token.display(self.config()))?; + } + + Ok(result) + } + + /// Gets the [`PostToken`] stream. + /// + /// # Notes + /// + /// * This shouldn't be exposed publicly. + fn to_stream(&self, element: W) -> TokenStream { + let mut stream = TokenStream::default(); + element.write(&mut stream); + + let mut postprocessor = Postprocessor::default(); + postprocessor.run(stream) + } +} + +#[cfg(test)] +mod tests { + use wdl_ast::Document; + use wdl_ast::Node; + + use crate::Formatter; + use crate::element::node::AstNodeFormatExt as _; + + #[test] + fn smoke() { + let (document, diagnostics) = Document::parse( + "## WDL +version 1.2 # This is a comment attached to the version. + +# This is a comment attached to the task keyword. +task foo # This is an inline comment on the task ident. +{ + +} # This is an inline comment on the task close brace. + +# This is a comment attached to the workflow keyword. +workflow bar # This is an inline comment on the workflow ident. +{ + # This is attached to the call keyword. + call foo {} +} # This is an inline comment on the workflow close brace.", + ); + + assert!(diagnostics.is_empty()); + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let formatter = Formatter::default(); + let result = formatter.format(&document); + match result { + Ok(s) => { + print!("{}", s); + } + Err(err) => { + panic!("failed to format document: {}", err); + } + } + } +} diff --git a/wdl-format/src/token.rs b/wdl-format/src/token.rs new file mode 100644 index 00000000..21e3d8df --- /dev/null +++ b/wdl-format/src/token.rs @@ -0,0 +1,92 @@ +//! Tokens used during formatting. + +mod post; +mod pre; + +use std::fmt::Display; + +pub use post::*; +pub use pre::*; + +use crate::Config; + +/// Tokens that are streamable. +pub trait Token: Eq + PartialEq { + /// Returns a displayable version of the token. + fn display<'a>(&'a self, config: &'a Config) -> impl Display + 'a; +} + +/// A stream of tokens. Tokens in this case are either [`PreToken`]s or +/// [`PostToken`]s. Note that, unless you are working on formatting +/// specifically, you should never need to work with [`PostToken`]s. +#[derive(Debug)] + +pub struct TokenStream(Vec); + +impl Default for TokenStream { + fn default() -> Self { + Self(Default::default()) + } +} + +impl TokenStream { + /// Pushes a token into the stream. + pub fn push(&mut self, token: T) { + self.0.push(token); + } + + /// Removes any number of `token`s at the end of the stream. + pub fn trim_end(&mut self, token: &T) { + while Some(token) == self.0.last() { + let _ = self.0.pop(); + } + } + + /// Removes any number of `token`s at the end of the stream. + pub fn trim_while bool>(&mut self, predicate: F) { + while let Some(token) = self.0.last() { + if !predicate(token) { + break; + } + + let _ = self.0.pop(); + } + } +} + +impl IntoIterator for TokenStream { + type IntoIter = std::vec::IntoIter; + type Item = T; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +/// The kind of comment. +#[derive(Debug, Eq, PartialEq)] +pub enum Comment { + /// A comment on its own line. + Preceding(String), + /// A comment on the same line as the code preceding it. + Inline(String), +} + +/// Trivia. +#[derive(Debug, Eq, PartialEq)] +pub enum Trivia { + /// A blank line. This may be ignored by the postprocessor. + BlankLine, + /// A comment. + Comment(Comment), +} + +/// Whether optional blank lines are allowed in the current context. +#[derive(Eq, PartialEq, Default, Debug, Clone, Copy)] +pub enum LineSpacingPolicy { + /// Blank lines are allowed before comments. + BeforeComments, + /// Blank lines are always allowed. + #[default] + Always, +} diff --git a/wdl-format/src/token/post.rs b/wdl-format/src/token/post.rs new file mode 100644 index 00000000..1fe44316 --- /dev/null +++ b/wdl-format/src/token/post.rs @@ -0,0 +1,281 @@ +//! Postprocessed tokens. +//! +//! Generally speaking, unless you are working with the internals of code +//! formatting, you're not going to be working with these. + +use std::fmt::Display; + +use wdl_ast::SyntaxKind; + +use crate::Comment; +use crate::LineSpacingPolicy; +use crate::NEWLINE; +use crate::PreToken; +use crate::SPACE; +use crate::Token; +use crate::TokenStream; +use crate::Trivia; +use crate::config::Indent; + +/// A postprocessed token. +#[derive(Eq, PartialEq)] +pub enum PostToken { + /// A space. + Space, + + /// A newline. + Newline, + + /// One indentation. + Indent, + + /// A string literal. + Literal(String), +} + +impl std::fmt::Debug for PostToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Space => write!(f, ""), + Self::Newline => write!(f, ""), + Self::Indent => write!(f, ""), + Self::Literal(value) => write!(f, " {value}"), + } + } +} + +impl Token for PostToken { + /// Returns a displayable version of the token. + fn display<'a>(&'a self, config: &'a crate::Config) -> impl Display + 'a { + /// A displayable version of a [`PostToken`]. + struct Display<'a> { + /// The token to display. + token: &'a PostToken, + /// The configuration to use. + config: &'a crate::Config, + } + + impl std::fmt::Display for Display<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.token { + PostToken::Space => write!(f, "{SPACE}"), + PostToken::Newline => write!(f, "{NEWLINE}"), + PostToken::Indent => { + let (c, n) = match self.config.indent() { + Indent::Spaces(n) => (' ', n), + Indent::Tabs(n) => ('\t', n), + }; + + for _ in 0..n.get() { + write!(f, "{c}")?; + } + + Ok(()) + } + PostToken::Literal(value) => write!(f, "{value}"), + } + } + } + + Display { + token: self, + config, + } + } +} + +/// Current position in a line. +#[derive(Default, Eq, PartialEq)] +enum LinePosition { + /// The start of a line. + #[default] + StartOfLine, + + /// The middle of a line. + MiddleOfLine, +} + +/// A postprocessor of [tokens](PreToken). +#[derive(Default)] +pub struct Postprocessor { + /// The current position in the line. + position: LinePosition, + + /// The current indentation level. + indent_level: usize, + + /// Whether the current line has been interrupted by trivia. + interrupted: bool, + + /// Whether blank lines are allowed in the current context. + line_spacing_policy: LineSpacingPolicy, +} + +impl Postprocessor { + /// Runs the postprocessor. + pub fn run(&mut self, input: TokenStream) -> TokenStream { + let mut output = TokenStream::::default(); + + let mut stream = input.into_iter().peekable(); + while let Some(token) = stream.next() { + self.step(token, stream.peek(), &mut output); + } + + self.trim_whitespace(&mut output); + output.push(PostToken::Newline); + + output + } + + /// Takes a step of a [`PreToken`] stream and processes the appropriate + /// [`PostToken`]s. + pub fn step( + &mut self, + token: PreToken, + next: Option<&PreToken>, + stream: &mut TokenStream, + ) { + match token { + PreToken::BlankLine => { + self.blank_line(stream); + } + PreToken::LineEnd => { + self.interrupted = false; + self.end_line(stream); + } + PreToken::WordEnd => { + stream.trim_end(&PostToken::Space); + + if self.position == LinePosition::MiddleOfLine { + stream.push(PostToken::Space); + } else { + // We're at the start of a line, so we don't need to add a + // space. + } + } + PreToken::IndentStart => { + self.indent_level += 1; + self.end_line(stream); + } + PreToken::IndentEnd => { + self.indent_level = self.indent_level.saturating_sub(1); + self.end_line(stream); + } + PreToken::LineSpacingPolicy(policy) => { + self.line_spacing_policy = policy; + } + PreToken::Literal(value, kind) => { + assert!(kind != SyntaxKind::Comment && kind != SyntaxKind::Whitespace); + if self.interrupted + && matches!( + kind, + SyntaxKind::OpenBrace + | SyntaxKind::OpenBracket + | SyntaxKind::OpenParen + | SyntaxKind::OpenHeredoc + ) + && stream.0.last() == Some(&PostToken::Indent) + { + stream.0.pop(); + } + stream.push(PostToken::Literal(value)); + self.position = LinePosition::MiddleOfLine; + } + PreToken::Trivia(trivia) => match trivia { + Trivia::BlankLine => match self.line_spacing_policy { + LineSpacingPolicy::Always => { + self.blank_line(stream); + } + LineSpacingPolicy::BeforeComments => { + if matches!(next, Some(&PreToken::Trivia(Trivia::Comment(_)))) { + self.blank_line(stream); + } + } + }, + Trivia::Comment(comment) => { + match comment { + Comment::Preceding(value) => { + if !matches!( + stream.0.last(), + Some(&PostToken::Newline) | Some(&PostToken::Indent) | None + ) { + self.interrupted = true; + } + self.end_line(stream); + stream.push(PostToken::Literal(value)); + self.position = LinePosition::MiddleOfLine; + } + Comment::Inline(value) => { + assert!(self.position == LinePosition::MiddleOfLine); + if let Some(next) = next { + if next != &PreToken::LineEnd { + self.interrupted = true; + } + } + self.trim_last_line(stream); + stream.push(PostToken::Space); + stream.push(PostToken::Space); + stream.push(PostToken::Literal(value)); + } + } + self.end_line(stream); + } + }, + } + } + + /// Trims any and all whitespace from the end of the stream. + fn trim_whitespace(&mut self, stream: &mut TokenStream) { + stream.trim_while(|token| { + matches!( + token, + PostToken::Space | PostToken::Newline | PostToken::Indent + ) + }); + } + + /// Trims spaces and indents (and not newlines) from the end of the stream. + fn trim_last_line(&mut self, stream: &mut TokenStream) { + stream.trim_while(|token| matches!(token, PostToken::Space | PostToken::Indent)); + } + + /// Ends the current line without resetting the interrupted flag. + /// + /// Removes any trailing spaces or indents and adds a newline only if state + /// is not [`LinePosition::StartOfLine`]. State is then set to + /// [`LinePosition::StartOfLine`]. Safe to call multiple times in a row. + fn end_line(&mut self, stream: &mut TokenStream) { + self.trim_last_line(stream); + if self.position != LinePosition::StartOfLine { + stream.push(PostToken::Newline); + } + self.position = LinePosition::StartOfLine; + self.indent(stream); + } + + /// Pushes the current indentation level to the stream. + /// This should only be called when the state is + /// [`LinePosition::StartOfLine`]. + fn indent(&self, stream: &mut TokenStream) { + assert!(self.position == LinePosition::StartOfLine); + + let level = if self.interrupted { + self.indent_level + 1 + } else { + self.indent_level + }; + + for _ in 0..level { + stream.push(PostToken::Indent); + } + } + + /// Creates a blank line and then indents. + fn blank_line(&mut self, stream: &mut TokenStream) { + self.trim_whitespace(stream); + stream.push(PostToken::Newline); + stream.push(PostToken::Newline); + self.position = LinePosition::StartOfLine; + self.indent(stream); + } +} diff --git a/wdl-format/src/token/pre.rs b/wdl-format/src/token/pre.rs new file mode 100644 index 00000000..6ec41ed7 --- /dev/null +++ b/wdl-format/src/token/pre.rs @@ -0,0 +1,230 @@ +//! Tokens emitted during the formatting of particular elements. + +use wdl_ast::SyntaxKind; +use wdl_ast::SyntaxTokenExt; + +use crate::Comment; +use crate::LineSpacingPolicy; +use crate::Token; +use crate::TokenStream; +use crate::Trivia; + +/// A token that can be written by elements. +/// +/// These are tokens that are intended to be written directly by elements to a +/// [`TokenStream`](super::TokenStream) consisting of [`PreToken`]s. Note that +/// this will transformed into a [`TokenStream`](super::TokenStream) of +/// [`PostToken`](super::PostToken)s by a +/// [`Postprocessor`](super::Postprocessor) (authors of elements are never +/// expected to write [`PostToken`](super::PostToken)s directly). +#[derive(Debug, Eq, PartialEq)] +pub enum PreToken { + /// A blank line. + BlankLine, + + /// The end of a line. + LineEnd, + + /// The end of a word. + WordEnd, + + /// The start of an indented block. + IndentStart, + + /// The end of an indented block. + IndentEnd, + + /// How to handle blank lines from this point onwards. + LineSpacingPolicy(LineSpacingPolicy), + + /// Literal text. + Literal(String, SyntaxKind), + + /// Trivia. + Trivia(Trivia), +} + +/// The line length to use when displaying pretokens. +const DISPLAY_LINE_LENGTH: usize = 90; + +impl std::fmt::Display for PreToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PreToken::BlankLine => write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)), + PreToken::LineEnd => write!(f, ""), + PreToken::WordEnd => write!(f, ""), + PreToken::IndentStart => write!(f, ""), + PreToken::IndentEnd => write!(f, ""), + PreToken::LineSpacingPolicy(policy) => { + write!(f, "", policy) + } + PreToken::Literal(value, kind) => { + write!( + f, + "{:width$}", + value, + kind, + width = DISPLAY_LINE_LENGTH + ) + } + PreToken::Trivia(trivia) => match trivia { + Trivia::BlankLine => { + write!(f, "{}", " ".repeat(DISPLAY_LINE_LENGTH)) + } + Trivia::Comment(comment) => match comment { + Comment::Preceding(value) => { + write!( + f, + "{:width$}", + value, + width = DISPLAY_LINE_LENGTH + ) + } + Comment::Inline(value) => { + write!( + f, + "{:width$}", + value, + width = DISPLAY_LINE_LENGTH + ) + } + }, + }, + } + } +} + +impl Token for PreToken { + /// Returns a displayable version of the token. + fn display<'a>(&'a self, _config: &'a crate::Config) -> impl std::fmt::Display { + self + } +} + +impl TokenStream { + /// Inserts a blank line token to the stream if the stream does not already + /// end with a blank line. This will replace any [`Trivia::BlankLine`] + /// tokens with [`PreToken::BlankLine`]. + pub fn blank_line(&mut self) { + self.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); + self.0.push(PreToken::BlankLine); + } + + /// Inserts an end of line token to the stream if the stream does not + /// already end with an end of line token. + /// + /// This will also trim any trailing [`PreToken::WordEnd`] tokens. + pub fn end_line(&mut self) { + self.trim_while(|t| matches!(t, PreToken::WordEnd | PreToken::LineEnd)); + self.0.push(PreToken::LineEnd); + } + + /// Inserts a word end token to the stream if the stream does not already + /// end with a word end token. + pub fn end_word(&mut self) { + self.trim_end(&PreToken::WordEnd); + self.0.push(PreToken::WordEnd); + } + + /// Inserts an indent start token to the stream. This will also end the + /// current line. + pub fn increment_indent(&mut self) { + self.end_line(); + self.0.push(PreToken::IndentStart); + } + + /// Inserts an indent end token to the stream. This will also end the + /// current line. + pub fn decrement_indent(&mut self) { + self.end_line(); + self.0.push(PreToken::IndentEnd); + } + + /// Inserts a blank lines allowed context change. + pub fn blank_lines_allowed(&mut self) { + self.0 + .push(PreToken::LineSpacingPolicy(LineSpacingPolicy::Always)); + } + + /// Inserts a blank lines allowed between comments context change. + pub fn blank_lines_allowed_between_comments(&mut self) { + self.0.push(PreToken::LineSpacingPolicy( + LineSpacingPolicy::BeforeComments, + )); + } + + /// Inserts any preceding trivia into the stream. + fn push_preceding_trivia(&mut self, token: &wdl_ast::Token) { + assert!(!token.syntax().kind().is_trivia()); + let preceding_trivia = token.syntax().preceding_trivia(); + for token in preceding_trivia { + match token.kind() { + SyntaxKind::Whitespace => { + if !self.0.last().map_or(false, |t| { + matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine)) + }) { + self.0.push(PreToken::Trivia(Trivia::BlankLine)); + } + } + SyntaxKind::Comment => { + let comment = PreToken::Trivia(Trivia::Comment(Comment::Preceding( + token.text().trim_end().to_owned(), + ))); + self.0.push(comment); + } + _ => unreachable!("unexpected trivia: {:?}", token), + }; + } + } + + /// Inserts any inline trivia into the stream. + fn push_inline_trivia(&mut self, token: &wdl_ast::Token) { + assert!(!token.syntax().kind().is_trivia()); + if let Some(token) = token.syntax().inline_comment() { + let inline_comment = PreToken::Trivia(Trivia::Comment(Comment::Inline( + token.text().trim_end().to_owned(), + ))); + self.0.push(inline_comment); + } + } + + /// Pushes an AST token into the stream. + /// + /// This will also push any preceding or inline trivia into the stream. + /// Any token may have preceding or inline trivia, unless that token is + /// itself trivia (i.e. trivia cannot have trivia). + pub fn push_ast_token(&mut self, token: &wdl_ast::Token) { + self.push_preceding_trivia(token); + self.0.push(PreToken::Literal( + token.syntax().text().to_owned(), + token.syntax().kind(), + )); + self.push_inline_trivia(token); + } + + /// Pushes a literal string into the stream in place of an AST token. + /// This will insert any trivia that would have been inserted with the AST + /// token. + pub fn push_literal_in_place_of_token(&mut self, token: &wdl_ast::Token, replacement: String) { + self.push_preceding_trivia(token); + self.0 + .push(PreToken::Literal(replacement, token.syntax().kind())); + self.push_inline_trivia(token); + } + + /// Pushes a literal string into the stream. + /// This will not insert any trivia. + pub fn push_literal(&mut self, value: String, kind: SyntaxKind) { + self.0.push(PreToken::Literal(value, kind)); + } + + /// Returns the kind of the last literal token in the stream. + pub fn last_literal_kind(&self) -> Option { + match self.0.last_chunk::<3>() { + Some([_, _, PreToken::Literal(_, kind)]) => Some(*kind), + Some([_, PreToken::Literal(_, kind), _]) => Some(*kind), + Some([PreToken::Literal(_, kind), _, _]) => Some(*kind), + _ => None, + } + } +} diff --git a/wdl-format/src/v1.rs b/wdl-format/src/v1.rs new file mode 100644 index 00000000..a211a620 --- /dev/null +++ b/wdl-format/src/v1.rs @@ -0,0 +1,309 @@ +//! Formatting of WDL v1.x elements. + +use wdl_ast::AstToken; +use wdl_ast::SyntaxKind; + +pub mod decl; +pub mod expr; +pub mod import; +pub mod meta; +pub mod r#struct; +pub mod task; +pub mod workflow; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats an [`Ast`](wdl_ast::Ast). +pub fn format_ast(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("AST children"); + + let version_statement = children.next().expect("version statement"); + assert!(version_statement.element().kind() == SyntaxKind::VersionStatementNode); + (&version_statement).write(stream); + + stream.blank_line(); + + let mut imports = Vec::new(); + let mut remainder = Vec::new(); + + for child in children { + match child.element().kind() { + SyntaxKind::ImportStatementNode => imports.push(child), + _ => remainder.push(child), + } + } + + imports.sort_by(|a, b| { + let a = a + .element() + .as_node() + .expect("import statement node") + .as_import_statement() + .expect("import statement"); + let b = b + .element() + .as_node() + .expect("import statement node") + .as_import_statement() + .expect("import statement"); + let a_uri = a.uri().text().expect("import uri"); + let b_uri = b.uri().text().expect("import uri"); + a_uri.as_str().cmp(b_uri.as_str()) + }); + + stream.blank_lines_allowed_between_comments(); + for import in imports { + (&import).write(stream); + } + + stream.blank_line(); + + for child in remainder { + (&child).write(stream); + stream.blank_line(); + } +} + +/// Formats a [`VersionStatement`](wdl_ast::VersionStatement). +pub fn format_version_statement(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("version statement children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} + +/// Formats an [`InputSection`](wdl_ast::v1::InputSection). +pub fn format_input_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("input section children"); + + let input_keyword = children.next().expect("input section input keyword"); + assert!(input_keyword.element().kind() == SyntaxKind::InputKeyword); + (&input_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("input section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut inputs = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::BoundDeclNode | SyntaxKind::UnboundDeclNode => inputs.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected input section child"), + } + } + + // TODO: sort inputs + for input in inputs { + (&input).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("input section close brace")).write(stream); + stream.end_line(); +} + +/// Formats an [`OutputSection`](wdl_ast::v1::OutputSection). +pub fn format_output_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("output section children"); + + let output_keyword = children.next().expect("output keyword"); + assert!(output_keyword.element().kind() == SyntaxKind::OutputKeyword); + (&output_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("output section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } else { + assert!(child.element().kind() == SyntaxKind::BoundDeclNode); + } + (&child).write(stream); + stream.end_line(); + } +} + +/// Formats a [`LiteralInputItem`](wdl_ast::v1::LiteralInputItem). +pub fn format_literal_input_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal input item children"); + + let key = children.next().expect("literal input item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal input item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let hints_node = children.next().expect("literal input item hints node"); + assert!(hints_node.element().kind() == SyntaxKind::LiteralHintsNode); + (&hints_node).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralInput`](wdl_ast::v1::LiteralInput). +pub fn format_literal_input(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal input children"); + + let input_keyword = children.next().expect("literal input keyword"); + assert!(input_keyword.element().kind() == SyntaxKind::InputKeyword); + (&input_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal input open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } else { + assert!(child.element().kind() == SyntaxKind::LiteralInputItemNode); + } + (&child).write(stream); + } + stream.end_line(); +} + +/// Formats a [`LiteralHintsItem`](wdl_ast::v1::LiteralHintsItem). +pub fn format_literal_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal hints item children"); + + let key = children.next().expect("literal hints item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal hints item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralHints`](wdl_ast::v1::LiteralHints). +pub fn format_literal_hints(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal hints children"); + + let hints_keyword = children.next().expect("literal hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal hints open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::LiteralHintsItemNode => items.push(child), + SyntaxKind::Comma => commas.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected literal hints child"), + } + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal hints close brace")).write(stream); +} + +/// Formats a [`LiteralOutputItem`](wdl_ast::v1::LiteralOutputItem). +pub fn format_literal_output_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element + .children() + .expect("literal output item children") + .peekable(); + + for child in children.by_ref() { + if matches!(child.element().kind(), SyntaxKind::Ident | SyntaxKind::Dot) { + (&child).write(stream); + } else { + assert!(child.element().kind() == SyntaxKind::Colon); + (&child).write(stream); + stream.end_word(); + break; + } + } + + let value = children.next().expect("literal output item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralOutput`](wdl_ast::v1::LiteralOutput). +pub fn format_literal_output(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal output children"); + + let output_keyword = children.next().expect("literal output keyword"); + assert!(output_keyword.element().kind() == SyntaxKind::OutputKeyword); + (&output_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal output open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::LiteralOutputItemNode => items.push(child), + SyntaxKind::Comma => commas.push(child), + SyntaxKind::CloseBrace => close_brace = Some(child), + _ => panic!("unexpected literal output child"), + } + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal output close brace")).write(stream); +} diff --git a/wdl-format/src/v1/decl.rs b/wdl-format/src/v1/decl.rs new file mode 100644 index 00000000..6b149942 --- /dev/null +++ b/wdl-format/src/v1/decl.rs @@ -0,0 +1,77 @@ +//! Formatting functions for declarations. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`PrimitiveType`](wdl_ast::v1::PrimitiveType). +pub fn format_primitive_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("primitive type children") { + (&child).write(stream); + } +} + +/// Formats an [`ArrayType`](wdl_ast::v1::ArrayType). +pub fn format_array_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("array type children") { + (&child).write(stream); + } +} + +/// Formats a [`MapType`](wdl_ast::v1::MapType). +pub fn format_map_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("map type children") { + (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } + } +} + +/// Formats an [`ObjectType`](wdl_ast::v1::ObjectType). +pub fn format_object_type(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("object type children"); + let object_keyword = children.next().expect("object type object keyword"); + assert!(object_keyword.element().kind() == SyntaxKind::ObjectTypeKeyword); + (&object_keyword).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`PairType`](wdl_ast::v1::PairType). +pub fn format_pair_type(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("pair type children") { + (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } + } +} + +/// Formats a [`TypeRef`](wdl_ast::v1::TypeRef). +pub fn format_type_ref(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("type ref children"); + let t = children.next().expect("type ref type"); + (&t).write(stream); + assert!(children.next().is_none()); +} + +/// Formats an [`UnboundDecl`](wdl_ast::v1::UnboundDecl). +pub fn format_unbound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("unbound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} + +/// Formats a [`BoundDecl`](wdl_ast::v1::BoundDecl). +pub fn format_bound_decl(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("bound decl children") { + (&child).write(stream); + stream.end_word(); + } + stream.end_line(); +} diff --git a/wdl-format/src/v1/expr.rs b/wdl-format/src/v1/expr.rs new file mode 100644 index 00000000..1285c5f8 --- /dev/null +++ b/wdl-format/src/v1/expr.rs @@ -0,0 +1,715 @@ +//! Formatting of WDL v1.x expression elements. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`SepOption`](wdl_ast::v1::SepOption). +pub fn format_sep_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("sep option children"); + + let sep_keyword = children.next().expect("sep keyword"); + assert!(sep_keyword.element().kind() == SyntaxKind::Ident); + (&sep_keyword).write(stream); + + let equals = children.next().expect("sep equals"); + assert!(equals.element().kind() == SyntaxKind::Assignment); + (&equals).write(stream); + + let sep_value = children.next().expect("sep value"); + assert!(sep_value.element().kind() == SyntaxKind::LiteralStringNode); + (&sep_value).write(stream); + stream.end_word(); + + assert!(children.next().is_none()); +} + +/// Formats a [`DefaultOption`](wdl_ast::v1::DefaultOption). +pub fn format_default_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("default option children"); + + let default_keyword = children.next().expect("default keyword"); + assert!(default_keyword.element().kind() == SyntaxKind::Ident); + (&default_keyword).write(stream); + + let equals = children.next().expect("default equals"); + assert!(equals.element().kind() == SyntaxKind::Assignment); + (&equals).write(stream); + + let default_value = children.next().expect("default value"); + (&default_value).write(stream); + stream.end_word(); + + assert!(children.next().is_none()); +} + +/// Formats a [`TrueFalseOption`](wdl_ast::v1::TrueFalseOption). +pub fn format_true_false_option(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("true false option children"); + + let first_keyword = children.next().expect("true false option first keyword"); + let first_keyword_kind = first_keyword.element().kind(); + assert!( + first_keyword_kind == SyntaxKind::TrueKeyword + || first_keyword_kind == SyntaxKind::FalseKeyword + ); + + let first_equals = children.next().expect("true false option first equals"); + assert!(first_equals.element().kind() == SyntaxKind::Assignment); + + let first_value = children.next().expect("true false option first value"); + + let second_keyword = children.next().expect("true false option second keyword"); + let second_keyword_kind = second_keyword.element().kind(); + assert!( + second_keyword_kind == SyntaxKind::TrueKeyword + || second_keyword_kind == SyntaxKind::FalseKeyword + ); + + let second_equals = children.next().expect("true false option second equals"); + assert!(second_equals.element().kind() == SyntaxKind::Assignment); + + let second_value = children.next().expect("true false option second value"); + + if first_keyword_kind == SyntaxKind::TrueKeyword { + assert!(second_keyword_kind == SyntaxKind::FalseKeyword); + (&first_keyword).write(stream); + (&first_equals).write(stream); + (&first_value).write(stream); + stream.end_word(); + (&second_keyword).write(stream); + (&second_equals).write(stream); + (&second_value).write(stream); + } else { + assert!(second_keyword_kind == SyntaxKind::TrueKeyword); + (&second_keyword).write(stream); + (&second_equals).write(stream); + (&second_value).write(stream); + stream.end_word(); + (&first_keyword).write(stream); + (&first_equals).write(stream); + (&first_value).write(stream); + } + stream.end_word(); + + assert!(children.next().is_none()); +} + +/// Formats a [`Placeholder`](wdl_ast::v1::Placeholder). +pub fn format_placeholder(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("placeholder children"); + + let open = children.next().expect("placeholder open"); + assert!(open.element().kind() == SyntaxKind::PlaceholderOpen); + let syntax = open.element().syntax(); + let text = syntax.as_token().expect("token").text(); + match text { + "${" => { + stream.push_literal_in_place_of_token( + open.element().as_token().expect("token"), + "~{".to_owned(), + ); + } + "~{" => { + (&open).write(stream); + } + _ => { + unreachable!("unexpected placeholder open: {:?}", text); + } + } + + for child in children { + (&child).write(stream); + } +} + +/// Formats a [`LiteralString`](wdl_ast::v1::LiteralString). +pub fn format_literal_string(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal string children") { + match child.element().kind() { + SyntaxKind::SingleQuote => { + stream.push_literal_in_place_of_token( + child.element().as_token().expect("token"), + "\"".to_owned(), + ); + } + SyntaxKind::DoubleQuote => { + (&child).write(stream); + } + SyntaxKind::LiteralStringText => { + let mut replacement = String::new(); + let syntax = child.element().syntax(); + let mut chars = syntax.as_token().expect("token").text().chars().peekable(); + let mut prev_c = None; + while let Some(c) = chars.next() { + match c { + '\\' => { + if let Some(next_c) = chars.peek() { + if *next_c == '\'' { + // Do not write this backslash + prev_c = Some(c); + continue; + } + } + replacement.push(c); + } + '"' => { + if let Some(pc) = prev_c { + if pc != '\\' { + replacement.push('\\'); + } + } + replacement.push(c); + } + _ => { + replacement.push(c); + } + } + prev_c = Some(c); + } + + stream.push_literal_in_place_of_token( + child.element().as_token().expect("token"), + replacement, + ); + } + SyntaxKind::PlaceholderNode => { + (&child).write(stream); + } + _ => { + unreachable!( + "unexpected child in literal string: {:?}", + child.element().kind() + ); + } + } + } +} + +/// Formats a [`LiteralNone`](wdl_ast::v1::LiteralNone). +pub fn format_literal_none(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal none children"); + let none = children.next().expect("literal none token"); + assert!(none.element().kind() == SyntaxKind::NoneKeyword); + (&none).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralPair`](wdl_ast::v1::LiteralPair). +pub fn format_literal_pair(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal pair children"); + + let open_paren = children.next().expect("literal pair open paren"); + assert!(open_paren.element().kind() == SyntaxKind::OpenParen); + (&open_paren).write(stream); + + let left = children.next().expect("literal pair left"); + (&left).write(stream); + + let comma = children.next().expect("literal pair comma"); + assert!(comma.element().kind() == SyntaxKind::Comma); + (&comma).write(stream); + stream.end_word(); + + let right = children.next().expect("literal pair right"); + (&right).write(stream); + + let close_paren = children.next().expect("literal pair close paren"); + assert!(close_paren.element().kind() == SyntaxKind::CloseParen); + (&close_paren).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralBoolean`](wdl_ast::v1::LiteralBoolean). +pub fn format_literal_boolean(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal boolean children"); + let bool = children.next().expect("literal boolean token"); + (&bool).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`NegationExpr`](wdl_ast::v1::NegationExpr). +pub fn format_negation_expr(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("negation expr children"); + let minus = children.next().expect("negation expr minus"); + assert!(minus.element().kind() == SyntaxKind::Minus); + (&minus).write(stream); + + let expr = children.next().expect("negation expr expr"); + (&expr).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralInteger`](wdl_ast::v1::LiteralInteger). +pub fn format_literal_integer(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal integer children") { + (&child).write(stream); + } +} + +/// Formats a [`LiteralFloat`](wdl_ast::v1::LiteralFloat). +pub fn format_literal_float(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("literal float children") { + (&child).write(stream); + } +} + +/// Formats a [`NameRef`](wdl_ast::v1::NameRef). +pub fn format_name_ref(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("name ref children"); + let name = children.next().expect("name ref name"); + (&name).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralArray`](wdl_ast::v1::LiteralArray). +pub fn format_literal_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal array children"); + + let open_bracket = children.next().expect("literal array open bracket"); + assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); + (&open_bracket).write(stream); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_bracket = None; + + for child in children { + match child.element().kind() { + SyntaxKind::CloseBracket => { + close_bracket = Some(child.to_owned()); + } + SyntaxKind::Comma => { + commas.push(child.to_owned()); + } + _ => { + items.push(child.to_owned()); + } + } + } + + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + if !empty { + stream.decrement_indent(); + } + (&close_bracket.expect("literal array close bracket")).write(stream); +} + +/// Formats a [`LiteralMapItem`](wdl_ast::v1::LiteralMapItem). +pub fn format_literal_map_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal map item children"); + + let key = children.next().expect("literal map item key"); + (&key).write(stream); + + let colon = children.next().expect("literal map item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal map item value"); + (&value).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralMap`](wdl_ast::v1::LiteralMap). +pub fn format_literal_map(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal map children"); + + let open_brace = children.next().expect("literal map open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::CloseBrace => { + close_brace = Some(child.to_owned()); + } + SyntaxKind::Comma => { + commas.push(child.to_owned()); + } + _ => { + items.push(child.to_owned()); + } + } + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal map close brace")).write(stream); +} + +/// Formats a [`LiteralObjectItem`](wdl_ast::v1::LiteralObjectItem). +pub fn format_literal_object_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal object item children"); + + let key = children.next().expect("literal object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("literal object item value"); + (&value).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LiteralObject`](wdl_ast::v1::LiteralObject). +pub fn format_literal_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal object children"); + + let open_brace = children.next().expect("literal object open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut members = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::CloseBrace => { + close_brace = Some(child.to_owned()); + } + SyntaxKind::Comma => { + commas.push(child.to_owned()); + } + _ => { + members.push(child.to_owned()); + } + } + } + + let mut commas = commas.iter(); + for member in members { + (&member).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal object close brace")).write(stream); +} + +/// Formats a [`AccessExpr`](wdl_ast::v1::AccessExpr). +pub fn format_access_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("access expr children") { + (&child).write(stream); + } +} + +/// Formats a [`CallExpr`](wdl_ast::v1::CallExpr). +pub fn format_call_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("call expr children") { + (&child).write(stream); + if child.element().kind() == SyntaxKind::Comma { + stream.end_word(); + } + } +} + +/// Formats an [`IndexExpr`](wdl_ast::v1::IndexExpr). +pub fn format_index_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("index expr children") { + (&child).write(stream); + } +} + +/// Formats an [`AdditionExpr`](wdl_ast::v1::AdditionExpr). +pub fn format_addition_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("addition expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Plus; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`SubtractionExpr`](wdl_ast::v1::SubtractionExpr). +pub fn format_subtraction_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("subtraction expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Minus; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`MultiplicationExpr`](wdl_ast::v1::MultiplicationExpr). +pub fn format_multiplication_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("multiplication expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Asterisk; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`DivisionExpr`](wdl_ast::v1::DivisionExpr). +pub fn format_division_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("division expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Slash; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`ModuloExpr`](wdl_ast::v1::ModuloExpr). +pub fn format_modulo_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("modulo expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Percent; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats an [`ExponentiationExpr`](wdl_ast::v1::ExponentiationExpr). +pub fn format_exponentiation_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("exponentiation expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Exponentiation; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`LogicalAndExpr`](wdl_ast::v1::LogicalAndExpr). +pub fn format_logical_and_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("logical and expr children") { + let should_end_word = child.element().kind() == SyntaxKind::LogicalAnd; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`LogicalNotExpr`](wdl_ast::v1::LogicalNotExpr). +pub fn format_logical_not_expr(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("logical not expr children"); + let not = children.next().expect("logical not expr not"); + assert!(not.element().kind() == SyntaxKind::Exclamation); + (¬).write(stream); + + let expr = children.next().expect("logical not expr expr"); + (&expr).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`LogicalOrExpr`](wdl_ast::v1::LogicalOrExpr). +pub fn format_logical_or_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("logical or expr children") { + let should_end_word = child.element().kind() == SyntaxKind::LogicalOr; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats an [`EqualityExpr`](wdl_ast::v1::EqualityExpr). +pub fn format_equality_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("equality expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Equal; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`InequalityExpr`](wdl_ast::v1::InequalityExpr). +pub fn format_inequality_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("inequality expr children") { + let should_end_word = child.element().kind() == SyntaxKind::NotEqual; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`LessExpr`](wdl_ast::v1::LessExpr). +pub fn format_less_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("less expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Less; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`LessEqualExpr`](wdl_ast::v1::LessEqualExpr). +pub fn format_less_equal_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("less equal expr children") { + let should_end_word = child.element().kind() == SyntaxKind::LessEqual; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`GreaterExpr`](wdl_ast::v1::GreaterExpr). +pub fn format_greater_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("greater expr children") { + let should_end_word = child.element().kind() == SyntaxKind::Greater; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`GreaterEqualExpr`](wdl_ast::v1::GreaterEqualExpr). +pub fn format_greater_equal_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("greater equal expr children") { + let should_end_word = child.element().kind() == SyntaxKind::GreaterEqual; + if should_end_word { + stream.end_word(); + } + (&child).write(stream); + if should_end_word { + stream.end_word(); + } + } +} + +/// Formats a [`ParenthesizedExpr`](wdl_ast::v1::ParenthesizedExpr). +pub fn format_parenthesized_expr(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("parenthesized expr children") { + (&child).write(stream); + } +} + +/// Formats an [`IfExpr`](wdl_ast::v1::IfExpr). +pub fn format_if_expr(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("if expr children"); + + let last = stream.last_literal_kind(); + + // Nested `if` expressions are a special case where we don't want to add + // parentheses or increment the indent level. + // Otherwise, we need to add parentheses and increment the indent if the last + // token is not an open parenthesis. + let nested_else_if = matches!(last, Some(SyntaxKind::ElseKeyword)); + let paren_needed = !matches!(last, Some(SyntaxKind::OpenParen)) && !nested_else_if; + + if paren_needed { + stream.push_literal("(".to_string(), SyntaxKind::OpenParen); + } + if !nested_else_if { + stream.increment_indent(); + } + + let if_keyword = children.next().expect("if keyword"); + assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); + (&if_keyword).write(stream); + stream.end_word(); + + for child in children { + let kind = child.element().kind(); + if matches!(kind, SyntaxKind::ElseKeyword | SyntaxKind::ThenKeyword) { + stream.end_line(); + } + (&child).write(stream); + if matches!(kind, SyntaxKind::ElseKeyword | SyntaxKind::ThenKeyword) { + stream.end_word(); + } + } + + if !nested_else_if { + stream.decrement_indent(); + } + if paren_needed { + stream.push_literal(")".to_string(), SyntaxKind::CloseParen); + } +} diff --git a/wdl-format/src/v1/import.rs b/wdl-format/src/v1/import.rs new file mode 100644 index 00000000..88f317b5 --- /dev/null +++ b/wdl-format/src/v1/import.rs @@ -0,0 +1,24 @@ +//! Formatting for imports. + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats an [`ImportAlias`](wdl_ast::v1::ImportAlias). +pub fn format_import_alias(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("import alias children") { + (&child).write(stream); + stream.end_word(); + } +} + +/// Formats an [`ImportStatement`](wdl_ast::v1::ImportStatement). +pub fn format_import_statement(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("import statement children") { + (&child).write(stream); + stream.end_word(); + } + + stream.end_line(); +} diff --git a/wdl-format/src/v1/meta.rs b/wdl-format/src/v1/meta.rs new file mode 100644 index 00000000..c7966bf5 --- /dev/null +++ b/wdl-format/src/v1/meta.rs @@ -0,0 +1,224 @@ +//! Formatting functions for meta and parameter_meta sections. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`LiteralNull`](wdl_ast::v1::LiteralNull). +pub fn format_literal_null(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal null children"); + let null = children.next().expect("literal null token"); + (&null).write(stream); + assert!(children.next().is_none()); +} + +/// Formats a [`MetadataArray`](wdl_ast::v1::MetadataArray). +pub fn format_metadata_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata array children"); + + let open_bracket = children.next().expect("metadata array open bracket"); + assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); + (&open_bracket).write(stream); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_bracket = None; + + for child in children { + match child.element().kind() { + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBracket => { + close_bracket = Some(child.clone()); + } + _ => { + items.push(child.clone()); + } + } + } + + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + if !empty { + stream.decrement_indent(); + } + (&close_bracket.expect("metadata array close bracket")).write(stream); +} + +/// Formats a [`MetadataObject`](wdl_ast::v1::MetadataObject). +pub fn format_metadata_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object children"); + + let open_brace = children.next().expect("metadata object open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => unreachable!( + "unexpected metadata object child: {:?}", + child.element().kind() + ), + } + } + + let empty = items.is_empty(); + if !empty { + stream.increment_indent(); + } + + let mut commas = commas.iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + if !empty { + stream.decrement_indent(); + } + (&close_brace.expect("metadata object close brace")).write(stream); +} + +/// Formats a [`MetadataObjectItem`](wdl_ast::v1::MetadataObjectItem). +pub fn format_metadata_object_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("metadata object item children"); + + let key = children.next().expect("metadata object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("metadata object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("metadata object item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [MetadataSection](wdl_ast::v1::MetadataSection). +pub fn format_metadata_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("meta section children"); + + let meta_keyword = children.next().expect("meta keyword"); + assert!(meta_keyword.element().kind() == SyntaxKind::MetaKeyword); + (&meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => unreachable!( + "unexpected metadata section child: {:?}", + child.element().kind() + ), + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("metadata section close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`ParameterMetadataSection`](wdl_ast::v1::ParameterMetadataSection). +pub fn format_parameter_metadata_section( + element: &FormatElement, + stream: &mut TokenStream, +) { + let mut children = element.children().expect("parameter meta section children"); + + let parameter_meta_keyword = children.next().expect("parameter meta keyword"); + assert!(parameter_meta_keyword.element().kind() == SyntaxKind::ParameterMetaKeyword); + (¶meter_meta_keyword).write(stream); + stream.end_word(); + + let open_brace = children + .next() + .expect("parameter metadata section open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataObjectItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => unreachable!( + "unexpected parameter metadata section child: {:?}", + child.element().kind() + ), + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("parameter metadata section close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/struct.rs b/wdl-format/src/v1/struct.rs new file mode 100644 index 00000000..0b240102 --- /dev/null +++ b/wdl-format/src/v1/struct.rs @@ -0,0 +1,147 @@ +//! Formatting for structs. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`StructDefinition`](wdl_ast::v1::StructDefinition). +pub fn format_struct_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("struct definition children"); + + let struct_keyword = children.next().expect("struct keyword"); + assert!(struct_keyword.element().kind() == SyntaxKind::StructKeyword); + (&struct_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("struct name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut members = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); + } + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); + } + SyntaxKind::UnboundDeclNode => { + members.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in struct definition: {:?}", + child.element().kind() + ); + } + } + } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + for member in members { + (&member).write(stream); + } + + stream.decrement_indent(); + (&close_brace.expect("struct definition close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`LiteralStructItem`](wdl_ast::v1::LiteralStructItem). +pub fn format_literal_struct_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal struct item children"); + + let key = children.next().expect("literal struct item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("literal struct item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + for child in children { + (&child).write(stream); + } +} + +/// Formats a [`LiteralStruct`](wdl_ast::v1::LiteralStruct). +pub fn format_literal_struct(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("literal struct children"); + + let name = children.next().expect("literal struct name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("literal struct open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut members = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::LiteralStructItemNode => { + members.push(child.clone()); + } + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in literal struct: {:?}", + child.element().kind() + ); + } + } + } + + let mut commas = commas.iter(); + for member in members { + (&member).write(stream); + if let Some(comma) = commas.next() { + (comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("literal struct close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/task.rs b/wdl-format/src/v1/task.rs new file mode 100644 index 00000000..d46928e6 --- /dev/null +++ b/wdl-format/src/v1/task.rs @@ -0,0 +1,386 @@ +//! Formatting for tasks. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Trivia; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`TaskDefinition`](wdl_ast::v1::TaskDefinition). +pub fn format_task_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("task definition children"); + + stream.blank_lines_allowed_between_comments(); + + let task_keyword = children.next().expect("task keyword"); + assert!(task_keyword.element().kind() == SyntaxKind::TaskKeyword); + (&task_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("task name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut input = None; + let mut body = Vec::new(); + let mut command = None; + let mut output = None; + let mut requirements = None; + let mut runtime = None; + let mut hints = None; + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::InputSectionNode => { + input = Some(child.clone()); + } + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); + } + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); + } + SyntaxKind::BoundDeclNode => { + body.push(child.clone()); + } + SyntaxKind::CommandSectionNode => { + command = Some(child.clone()); + } + SyntaxKind::OutputSectionNode => { + output = Some(child.clone()); + } + SyntaxKind::RequirementsSectionNode => { + requirements = Some(child.clone()); + } + SyntaxKind::RuntimeSectionNode => { + runtime = Some(child.clone()); + } + SyntaxKind::TaskHintsSectionNode => { + hints = Some(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in task definition: {:?}", + child.element().kind() + ); + } + } + } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + if let Some(input) = input { + (&input).write(stream); + stream.blank_line(); + } + + stream.blank_lines_allowed(); + for child in body { + (&child).write(stream); + } + stream.blank_lines_allowed_between_comments(); + stream.blank_line(); + + if let Some(command) = command { + (&command).write(stream); + stream.blank_line(); + } + + if let Some(output) = output { + (&output).write(stream); + stream.blank_line(); + } + + if let Some(requirements) = requirements { + (&requirements).write(stream); + stream.blank_line(); + } else if let Some(runtime) = runtime { + (&runtime).write(stream); + stream.blank_line(); + } + + if let Some(hints) = hints { + (&hints).write(stream); + stream.blank_line(); + } + + stream.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); + + stream.decrement_indent(); + (&close_brace.expect("task close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`CommandSection`](wdl_ast::v1::CommandSection). +pub fn format_command_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("command section children"); + + let command_keyword = children.next().expect("command keyword"); + assert!(command_keyword.element().kind() == SyntaxKind::CommandKeyword); + (&command_keyword).write(stream); + stream.end_word(); + + let open_delimiter = children.next().expect("open delimiter"); + match open_delimiter.element().kind() { + SyntaxKind::OpenBrace => { + stream.push_literal_in_place_of_token( + open_delimiter + .element() + .as_token() + .expect("open brace should be token"), + "<<<".to_string(), + ); + } + SyntaxKind::OpenHeredoc => { + (&open_delimiter).write(stream); + } + _ => { + unreachable!( + "unexpected open delimiter in command section: {:?}", + open_delimiter.element().kind() + ); + } + } + // Technically there's no trivia inside the command section, + // so we don't want to increment indent here. + // All the indentation should be handled by the command text itself. + // TODO: multi-line placeholders need better formatting + for child in children { + match child.element().kind() { + SyntaxKind::CloseBrace => { + stream.push_literal_in_place_of_token( + child + .element() + .as_token() + .expect("close brace should be token"), + ">>>".to_string(), + ); + } + SyntaxKind::CloseHeredoc => { + (&child).write(stream); + } + SyntaxKind::LiteralCommandText | SyntaxKind::PlaceholderNode => { + (&child).write(stream); + } + _ => { + unreachable!( + "unexpected child in command section: {:?}", + child.element().kind() + ); + } + } + } + stream.end_line(); +} + +/// Formats a [`RequirementsItem`](wdl_ast::v1::RequirementsItem). +pub fn format_requirements_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("requirements item children"); + + let name = children.next().expect("requirements item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("requirements item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("requirements item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RequirementsSection`](wdl_ast::v1::RequirementsSection). +pub fn format_requirements_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("requirements section children"); + + let requirements_keyword = children.next().expect("requirements keyword"); + assert!(requirements_keyword.element().kind() == SyntaxKind::RequirementsKeyword); + (&requirements_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::RequirementsItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in requirements section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("requirements close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`TaskHintsItem`](wdl_ast::v1::TaskHintsItem). +pub fn format_task_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("task hints item children"); + + let name = children.next().expect("task hints item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("task hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("task hints item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RuntimeItem`](wdl_ast::v1::RuntimeItem). +pub fn format_runtime_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("runtime item children"); + + let name = children.next().expect("runtime item name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + + let colon = children.next().expect("runtime item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("runtime item value"); + (&value).write(stream); + + assert!(children.next().is_none()); +} + +/// Formats a [`RuntimeSection`](wdl_ast::v1::RuntimeSection). +pub fn format_runtime_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("runtime section children"); + + let runtime_keyword = children.next().expect("runtime keyword"); + assert!(runtime_keyword.element().kind() == SyntaxKind::RuntimeKeyword); + (&runtime_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::RuntimeItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in runtime section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("runtime close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`TaskHintsSection`](wdl_ast::v1::TaskHintsSection). +pub fn format_task_hints_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("task hints section children"); + + let hints_keyword = children.next().expect("hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::TaskHintsItemNode => { + items.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in task hints section: {:?}", + child.element().kind() + ); + } + } + } + + for item in items { + (&item).write(stream); + stream.end_line(); + } + + stream.decrement_indent(); + (&close_brace.expect("task hints close brace")).write(stream); + stream.end_line(); +} diff --git a/wdl-format/src/v1/workflow.rs b/wdl-format/src/v1/workflow.rs new file mode 100644 index 00000000..db866210 --- /dev/null +++ b/wdl-format/src/v1/workflow.rs @@ -0,0 +1,331 @@ +//! Formatting for workflows. + +pub mod call; + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Trivia; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`ConditionalStatement`](wdl_ast::v1::ConditionalStatement). +pub fn format_conditional_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("conditional statement children"); + + let if_keyword = children.next().expect("if keyword"); + assert!(if_keyword.element().kind() == SyntaxKind::IfKeyword); + (&if_keyword).write(stream); + stream.end_word(); + + let open_paren = children.next().expect("open paren"); + assert!(open_paren.element().kind() == SyntaxKind::OpenParen); + (&open_paren).write(stream); + + for child in children.by_ref() { + (&child).write(stream); + if child.element().kind() == SyntaxKind::CloseParen { + stream.end_word(); + break; + } + } + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + } + stream.end_line(); +} + +/// Formats a [`ScatterStatement`](wdl_ast::v1::ScatterStatement). +pub fn format_scatter_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("scatter statement children"); + + let scatter_keyword = children.next().expect("scatter keyword"); + assert!(scatter_keyword.element().kind() == SyntaxKind::ScatterKeyword); + (&scatter_keyword).write(stream); + stream.end_word(); + + let open_paren = children.next().expect("open paren"); + assert!(open_paren.element().kind() == SyntaxKind::OpenParen); + (&open_paren).write(stream); + + let variable = children.next().expect("scatter variable"); + assert!(variable.element().kind() == SyntaxKind::Ident); + (&variable).write(stream); + stream.end_word(); + + let in_keyword = children.next().expect("in keyword"); + assert!(in_keyword.element().kind() == SyntaxKind::InKeyword); + (&in_keyword).write(stream); + stream.end_word(); + + for child in children.by_ref() { + (&child).write(stream); + if child.element().kind() == SyntaxKind::CloseParen { + stream.end_word(); + break; + } + } + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.end_line(); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + } + stream.end_line(); +} + +/// Formats a [`WorkflowDefinition`](wdl_ast::v1::WorkflowDefinition). +pub fn format_workflow_definition(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow definition children"); + + stream.blank_lines_allowed_between_comments(); + + let workflow_keyword = children.next().expect("workflow keyword"); + assert!(workflow_keyword.element().kind() == SyntaxKind::WorkflowKeyword); + (&workflow_keyword).write(stream); + stream.end_word(); + + let name = children.next().expect("workflow name"); + assert!(name.element().kind() == SyntaxKind::Ident); + (&name).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + let mut meta = None; + let mut parameter_meta = None; + let mut input = None; + let mut body = Vec::new(); + let mut output = None; + let mut hints = None; + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::MetadataSectionNode => { + meta = Some(child.clone()); + } + SyntaxKind::ParameterMetadataSectionNode => { + parameter_meta = Some(child.clone()); + } + SyntaxKind::InputSectionNode => { + input = Some(child.clone()); + } + SyntaxKind::BoundDeclNode => { + body.push(child.clone()); + } + SyntaxKind::CallStatementNode => { + body.push(child.clone()); + } + SyntaxKind::ConditionalStatementNode => { + body.push(child.clone()); + } + SyntaxKind::ScatterStatementNode => { + body.push(child.clone()); + } + SyntaxKind::OutputSectionNode => { + output = Some(child.clone()); + } + SyntaxKind::WorkflowHintsSectionNode => { + hints = Some(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in workflow definition: {:?}", + child.element().kind() + ); + } + } + } + + if let Some(meta) = meta { + (&meta).write(stream); + stream.blank_line(); + } + + if let Some(parameter_meta) = parameter_meta { + (¶meter_meta).write(stream); + stream.blank_line(); + } + + if let Some(input) = input { + (&input).write(stream); + stream.blank_line(); + } + + stream.blank_lines_allowed(); + for child in body { + (&child).write(stream); + } + stream.blank_lines_allowed_between_comments(); + stream.blank_line(); + + if let Some(output) = output { + (&output).write(stream); + stream.blank_line(); + } + + if let Some(hints) = hints { + (&hints).write(stream); + stream.blank_line(); + } + + stream.trim_while(|t| matches!(t, PreToken::BlankLine | PreToken::Trivia(Trivia::BlankLine))); + + stream.decrement_indent(); + (&close_brace.expect("workflow close brace")).write(stream); + stream.end_line(); +} + +/// Formats a [`WorkflowHintsArray`](wdl_ast::v1::WorkflowHintsArray). +pub fn format_workflow_hints_array(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints array children"); + + let open_bracket = children.next().expect("open bracket"); + assert!(open_bracket.element().kind() == SyntaxKind::OpenBracket); + (&open_bracket).write(stream); + stream.increment_indent(); + + let mut items = Vec::new(); + let mut commas = Vec::new(); + let mut close_bracket = None; + + for child in children { + match child.element().kind() { + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBracket => { + close_bracket = Some(child.clone()); + } + _ => { + items.push(child.clone()); + } + } + } + + let mut commas = commas.into_iter(); + for item in items { + (&item).write(stream); + if let Some(comma) = commas.next() { + (&comma).write(stream); + } else { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + stream.end_line(); + } + + stream.decrement_indent(); + (&close_bracket.expect("workflow hints array close bracket")).write(stream); +} + +/// Formats a [`WorkflowHintsItem`](wdl_ast::v1::WorkflowHintsItem). +pub fn format_workflow_hints_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints item children"); + + let key = children.next().expect("workflow hints item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("workflow hints item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("workflow hints item value"); + (&value).write(stream); + + stream.end_line(); + + assert!(children.next().is_none()); +} + +/// Formats a [`WorkflowHintsObjectItem`](wdl_ast::v1::WorkflowHintsObjectItem). +pub fn format_workflow_hints_object_item( + element: &FormatElement, + stream: &mut TokenStream, +) { + let mut children = element + .children() + .expect("workflow hints object item children"); + + let key = children.next().expect("workflow hints object item key"); + assert!(key.element().kind() == SyntaxKind::Ident); + (&key).write(stream); + + let colon = children.next().expect("workflow hints object item colon"); + assert!(colon.element().kind() == SyntaxKind::Colon); + (&colon).write(stream); + stream.end_word(); + + let value = children.next().expect("workflow hints object item value"); + (&value).write(stream); + + stream.end_line(); + + assert!(children.next().is_none()); +} + +/// Formats a [`WorkflowHintsObject`](wdl_ast::v1::WorkflowHintsObject). +pub fn format_workflow_hints_object(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints object children"); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + stream.end_line(); + } +} + +/// Formats a [`WorkflowHintsSection`](wdl_ast::v1::WorkflowHintsSection). +pub fn format_workflow_hints_section(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("workflow hints section children"); + + let hints_keyword = children.next().expect("hints keyword"); + assert!(hints_keyword.element().kind() == SyntaxKind::HintsKeyword); + (&hints_keyword).write(stream); + stream.end_word(); + + let open_brace = children.next().expect("open brace"); + assert!(open_brace.element().kind() == SyntaxKind::OpenBrace); + (&open_brace).write(stream); + stream.increment_indent(); + + for child in children { + if child.element().kind() == SyntaxKind::CloseBrace { + stream.decrement_indent(); + } + (&child).write(stream); + stream.end_line(); + } +} diff --git a/wdl-format/src/v1/workflow/call.rs b/wdl-format/src/v1/workflow/call.rs new file mode 100644 index 00000000..316da7c2 --- /dev/null +++ b/wdl-format/src/v1/workflow/call.rs @@ -0,0 +1,160 @@ +//! Formatting for workflow calls. + +use wdl_ast::SyntaxKind; + +use crate::PreToken; +use crate::TokenStream; +use crate::Writable as _; +use crate::element::FormatElement; + +/// Formats a [`CallTarget`](wdl_ast::v1::CallTarget). +pub fn format_call_target(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("call target children") { + (&child).write(stream); + } +} + +/// Formats a [`CallAlias`](wdl_ast::v1::CallAlias). +pub fn format_call_alias(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("call alias children") { + (&child).write(stream); + stream.end_word(); + } +} + +/// Formats a [`CallAfter`](wdl_ast::v1::CallAfter). +pub fn format_call_after(element: &FormatElement, stream: &mut TokenStream) { + for child in element.children().expect("call after children") { + (&child).write(stream); + stream.end_word(); + } +} + +/// Formats a [`CallInputItem`](wdl_ast::v1::CallInputItem). +pub fn format_call_input_item(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("call input item children"); + + let name = children.next().expect("call input item name"); + (&name).write(stream); + // Don't call end_word() here in case the name is alone in which case it should + // be followed by a comma. + + if let Some(equals) = children.next() { + stream.end_word(); + (&equals).write(stream); + stream.end_word(); + + let value = children.next().expect("call input item value"); + (&value).write(stream); + assert!(children.next().is_none()); + } +} + +/// Formats a [`CallStatement`](wdl_ast::v1::CallStatement). +pub fn format_call_statement(element: &FormatElement, stream: &mut TokenStream) { + let mut children = element.children().expect("call statement children"); + + let call_keyword = children.next().expect("call keyword"); + assert!(call_keyword.element().kind() == SyntaxKind::CallKeyword); + (&call_keyword).write(stream); + stream.end_word(); + + let target = children.next().expect("call target"); + (&target).write(stream); + stream.end_word(); + + let mut alias = None; + let mut afters = Vec::new(); + let mut open_brace = None; + let mut input_keyword = None; + let mut colon = None; + let mut inputs = Vec::new(); + let mut commas = Vec::new(); + let mut close_brace = None; + + for child in children { + match child.element().kind() { + SyntaxKind::CallAliasNode => { + alias = Some(child.clone()); + } + SyntaxKind::CallAfterNode => { + afters.push(child.clone()); + } + SyntaxKind::OpenBrace => { + open_brace = Some(child.clone()); + } + SyntaxKind::InputKeyword => { + input_keyword = Some(child.clone()); + } + SyntaxKind::Colon => { + colon = Some(child.clone()); + } + SyntaxKind::CallInputItemNode => { + inputs.push(child.clone()); + } + SyntaxKind::Comma => { + commas.push(child.clone()); + } + SyntaxKind::CloseBrace => { + close_brace = Some(child.clone()); + } + _ => { + unreachable!( + "unexpected child in call statement: {:?}", + child.element().kind() + ); + } + } + } + + if let Some(alias) = alias { + (&alias).write(stream); + stream.end_word(); + } + + for after in afters { + (&after).write(stream); + stream.end_word(); + } + + if let Some(open_brace) = open_brace { + (&open_brace).write(stream); + stream.end_word(); + + if let Some(input_keyword) = input_keyword { + (&input_keyword).write(stream); + (&colon.expect("colon")).write(stream); + stream.end_word(); + } + + // TODO: Make this check smarter in case a single input spans multiple lines or + // is interrupted + let single_line = inputs.len() == 1; + if !single_line { + stream.increment_indent(); + } + + let mut commas = commas.iter(); + for input in inputs { + (&input).write(stream); + + if let Some(comma) = commas.next() { + (comma).write(stream); + } else if !single_line { + stream.push_literal(",".to_string(), SyntaxKind::Comma); + } + + if !single_line { + stream.end_line(); + } + } + + if !single_line { + stream.decrement_indent(); + } else { + stream.end_word(); + } + (&close_brace.expect("close brace")).write(stream); + stream.end_line(); + } +} diff --git a/wdl-format/tests/format.rs b/wdl-format/tests/format.rs new file mode 100644 index 00000000..43a466cf --- /dev/null +++ b/wdl-format/tests/format.rs @@ -0,0 +1,216 @@ +//! The format file tests. +//! +//! This test looks for directories in `tests/format`. +//! +//! Each directory is expected to contain: +//! +//! * `source.wdl` - the test input source to parse. +//! * `source.formatted.wdl` - the expected formatted output. +//! +//! The `source.formatted.wdl` file may be automatically generated or updated by +//! setting the `BLESS` environment variable when running this test. + +use std::collections::HashSet; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::exit; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use codespan_reporting::files::SimpleFile; +use codespan_reporting::term; +use codespan_reporting::term::Config; +use codespan_reporting::term::termcolor::Buffer; +use colored::Colorize; +use pretty_assertions::StrComparison; +use rayon::prelude::*; +use wdl_ast::Diagnostic; +use wdl_ast::Document; +use wdl_ast::Node; +use wdl_format::Formatter; +use wdl_format::element::node::AstNodeFormatExt; + +/// Find all the tests in the `tests/format` directory. +fn find_tests() -> Vec { + // Check for filter arguments consisting of test names + let mut filter = HashSet::new(); + for arg in std::env::args().skip_while(|a| a != "--").skip(1) { + if !arg.starts_with('-') { + filter.insert(arg); + } + } + + let mut tests: Vec = Vec::new(); + for entry in Path::new("tests/format").read_dir().unwrap() { + let entry = entry.expect("failed to read directory"); + let path = entry.path(); + if !path.is_dir() + || (!filter.is_empty() + && !filter.contains(entry.file_name().to_str().expect("name should be UTF-8"))) + { + continue; + } + + tests.push(path); + } + + tests.sort(); + tests +} + +/// Format a list of diagnostics. +fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { + let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); + let mut buffer = Buffer::no_color(); + for diagnostic in diagnostics { + term::emit( + &mut buffer, + &Config::default(), + &file, + &diagnostic.to_codespan(), + ) + .expect("should emit"); + } + + String::from_utf8(buffer.into_inner()).expect("should be UTF-8") +} + +/// Compare the result of a test to the expected result. +fn compare_result(path: &Path, result: &str) -> Result<(), String> { + if env::var_os("BLESS").is_some() { + fs::write(path, &result).map_err(|e| { + format!( + "failed to write result file `{path}`: {e}", + path = path.display() + ) + })?; + return Ok(()); + } + + let expected = fs::read_to_string(path) + .map_err(|e| { + format!( + "failed to read result file `{path}`: {e}", + path = path.display() + ) + })? + .replace("\r\n", "\n"); + + if expected != result { + return Err(format!( + "result is not as expected:\n{}", + StrComparison::new(&expected, &result), + )); + } + + Ok(()) +} + +/// Run a test. +fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { + let path = test.join("source.wdl"); + let source = std::fs::read_to_string(&path).map_err(|e| { + format!( + "failed to read source file `{path}`: {e}", + path = path.display() + ) + })?; + + let (document, diagnostics) = Document::parse(&source); + + if !diagnostics.is_empty() { + return Err(format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = format_diagnostics(&diagnostics, path.as_path(), &source) + )); + }; + + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let formatter = Formatter::default(); + + let formatted = match formatter.format(&document) { + Ok(formatted) => formatted, + Err(e) => { + return Err(format!( + "failed to format `{path}`: {e}", + path = path.display(), + e = e + )); + } + }; + compare_result(path.with_extension("formatted.wdl").as_path(), &formatted)?; + + ntests.fetch_add(1, Ordering::SeqCst); + Ok(()) +} + +/// Run all the tests. +fn main() { + let tests = find_tests(); + println!("\nrunning {} tests\n", tests.len()); + + let ntests = AtomicUsize::new(0); + let errors = tests + .par_iter() + .filter_map(|test| { + let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); + match std::panic::catch_unwind(|| { + match run_test(test, &ntests) + .map_err(|e| format!("failed to run test `{path}`: {e}", path = test.display())) + .err() + { + Some(e) => { + println!("test {test_name} ... {failed}", failed = "failed".red()); + Some((test_name, e)) + } + None => { + println!("test {test_name} ... {ok}", ok = "ok".green()); + None + } + } + }) { + Ok(result) => result, + Err(e) => { + println!( + "test {test_name} ... {panicked}", + panicked = "panicked".red() + ); + Some(( + test_name, + format!( + "test panicked: {e:?}", + e = e + .downcast_ref::() + .map(|s| s.as_str()) + .or_else(|| e.downcast_ref::<&str>().copied()) + .unwrap_or("no panic message") + ), + )) + } + } + }) + .collect::>(); + + if !errors.is_empty() { + eprintln!( + "\n{count} test(s) {failed}:", + count = errors.len(), + failed = "failed".red() + ); + + for (name, msg) in errors.iter() { + eprintln!("{name}: {msg}", msg = msg.red()); + } + + exit(1); + } + + println!( + "\ntest result: ok. {} passed\n", + ntests.load(Ordering::SeqCst) + ); +} diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt new file mode 100644 index 00000000..d9a98e06 --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/LICENSE.txt @@ -0,0 +1,25 @@ +'source.wdl' obtained from: https://github.com/ENCODE-DCC/chip-seq-pipeline2/blob/26eeda81a0540dc793fc69b0c390d232ca7ca50a/chip.wdl +on the date 08-05-2024. +It was accompanied by the following license: + +MIT License + +Copyright (c) 2017 ENCODE DCC + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl new file mode 100644 index 00000000..2c8bbbac --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.formatted.wdl @@ -0,0 +1,3945 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + meta { + version: "v2.2.2" + author: "Jin wook Lee" + email: "leepc12@gmail.com" + description: "ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil." + organization: "ENCODE DCC" + specification_document: "https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing" + default_docker: "encodedcc/chip-seq-pipeline:v2.2.2" + default_singularity: "https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif" + croo_out_def: "https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json" + parameter_group: { + runtime_environment: { + title: "Runtime environment", + description: "Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.", + }, + pipeline_metadata: { + title: "Pipeline metadata", + description: "Metadata for a pipeline (e.g. title and description).", + }, + reference_genome: { + title: "Reference genome", + description: "Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.", + help: "Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.", + }, + input_genomic_data: { + title: "Input genomic data", + description: "Genomic input files for experiment.", + help: "Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: [\"rep1.bam\", \"rep1.bam\"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].", + }, + input_genomic_data_control: { + title: "Input genomic data (control)", + description: "Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.", + help: "Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: [\"ctl1.bam\", \"ctl1.bam\"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.", + }, + pipeline_parameter: { + title: "Pipeline parameter", + description: "Pipeline type and flags to turn on/off analyses.", + help: "Use chip.align_only to align FASTQs without peak calling.", + }, + alignment: { + title: "Alignment", + description: "Parameters for alignment.", + help: "Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.", + }, + peak_calling: { + title: "Peak calling", + description: "Parameters for peak calling.", + help: "This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.", + }, + resource_parameter: { + title: "Resource parameter", + description: "Number of CPUs (threads), max. memory and walltime for tasks.", + help: "Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.", + }, + } + } + + parameter_meta { + docker: { + description: "Default Docker image URI to run WDL tasks.", + group: "runtime_environment", + example: "ubuntu:20.04", + } + singularity: { + description: "Default Singularity image URI to run WDL tasks. For Singularity users only.", + group: "runtime_environment", + example: "docker://ubuntu:20.04", + } + conda: { + description: "Default Conda environment name to run WDL tasks. For Conda users only.", + group: "runtime_environment", + example: "encd-chip", + } + conda_macs2: { + description: "Conda environment name for task macs2. For Conda users only.", + group: "runtime_environment", + example: "encd-chip-macs2", + } + conda_spp: { + description: "Conda environment name for tasks spp/xcor. For Conda users only.", + group: "runtime_environment", + example: "encd-chip-spp", + } + title: { + description: "Experiment title.", + group: "pipeline_metadata", + example: "ENCSR936XTK (subsampled 1/50)", + } + description: { + description: "Experiment description.", + group: "pipeline_metadata", + example: "ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)", + } + genome_tsv: { + description: "Reference genome database TSV.", + group: "reference_genome", + help: "This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.", + example: "https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv", + } + genome_name: { + description: "Genome name.", + group: "reference_genome", + } + ref_fa: { + description: "Reference FASTA file.", + group: "reference_genome", + } + bowtie2_idx_tar: { + description: "BWA index TAR file.", + group: "reference_genome", + } + custom_aligner_idx_tar: { + description: "Index TAR file for a custom aligner. To use a custom aligner, define \"chip.custom_align_py\" too.", + group: "reference_genome", + } + chrsz: { + description: "2-col chromosome sizes file.", + group: "reference_genome", + } + blacklist: { + description: "Blacklist file in BED format.", + group: "reference_genome", + help: "Peaks will be filtered with this file.", + } + blacklist2: { + description: "Secondary blacklist file in BED format.", + group: "reference_genome", + help: "If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.", + } + mito_chr_name: { + description: "Mitochondrial chromosome name.", + group: "reference_genome", + help: "e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in \"filter\" task.", + } + regex_bfilt_peak_chr_name: { + description: "Reg-ex for chromosomes to keep while filtering peaks.", + group: "reference_genome", + help: "Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.", + } + gensz: { + description: "Genome sizes. \"hs\" for human, \"mm\" for mouse or sum of 2nd columnin chromosome sizes file.", + group: "reference_genome", + } + paired_end: { + description: "Sequencing endedness.", + group: "input_genomic_data", + help: "Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.", + example: true, + } + paired_ends: { + description: "Sequencing endedness array (for mixed SE/PE datasets).", + group: "input_genomic_data", + help: "Whether each biological replicate is paired ended or not.", + } + fastqs_rep1_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 1.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz", + ], + } + fastqs_rep1_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 1.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz", + ], + } + fastqs_rep2_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 2.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz", + ], + } + fastqs_rep2_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 2.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz", + ], + } + fastqs_rep3_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 3.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep3_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 3.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep4_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 4.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep4_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 4.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep5_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 5.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep5_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 5.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep6_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 6.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep6_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 6.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep7_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 7.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep7_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 7.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep8_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 8.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep8_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 8.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep9_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 9.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep9_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 9.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep10_R1: { + description: "Read1 FASTQs to be merged for a biological replicate 10.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.", + } + fastqs_rep10_R2: { + description: "Read2 FASTQs to be merged for a biological replicate 10.", + group: "input_genomic_data", + help: "Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.", + } + bams: { + description: "List of unfiltered/raw BAM files for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].", + } + nodup_bams: { + description: "List of filtered/deduped BAM files for each biological replicate", + group: "input_genomic_data", + help: "Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].", + } + tas: { + description: "List of TAG-ALIGN files for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].", + } + peaks: { + description: "List of NARROWPEAK files (not blacklist filtered) for each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.", + } + peaks_pr1: { + description: "List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.", + } + peaks_pr2: { + description: "List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.", + } + peak_pooled: { + description: "NARROWPEAK file for pooled true replicate.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.", + } + peak_ppr1: { + description: "NARROWPEAK file for pooled pseudo replicate 1.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate's 1st pseudos.", + } + peak_ppr2: { + description: "NARROWPEAK file for pooled pseudo replicate 2.", + group: "input_genomic_data", + help: "Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate's 2nd pseudos.", + } + ctl_paired_end: { + description: "Sequencing endedness for all controls.", + group: "input_genomic_data_control", + help: "Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.", + } + ctl_paired_ends: { + description: "Sequencing endedness array for mixed SE/PE controls.", + group: "input_genomic_data_control", + help: "Whether each control replicate is paired ended or not.", + } + ctl_fastqs_rep1_R1: { + description: "Read1 FASTQs to be merged for a control replicate 1.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep1_R2: { + description: "Read2 FASTQs to be merged for a control replicate 1.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep2_R1: { + description: "Read1 FASTQs to be merged for a control replicate 2.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep2_R2: { + description: "Read2 FASTQs to be merged for a control replicate 2.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.", + example: [ + "https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz", + ], + } + ctl_fastqs_rep3_R1: { + description: "Read1 FASTQs to be merged for a control replicate 3.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep3_R2: { + description: "Read2 FASTQs to be merged for a control replicate 3.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep4_R1: { + description: "Read1 FASTQs to be merged for a control replicate 4.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep4_R2: { + description: "Read2 FASTQs to be merged for a control replicate 4.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep5_R1: { + description: "Read1 FASTQs to be merged for a control replicate 5.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep5_R2: { + description: "Read2 FASTQs to be merged for a control replicate 5.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep6_R1: { + description: "Read1 FASTQs to be merged for a control replicate 6.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep6_R2: { + description: "Read2 FASTQs to be merged for a control replicate 6.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep7_R1: { + description: "Read1 FASTQs to be merged for a control replicate 7.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep7_R2: { + description: "Read2 FASTQs to be merged for a control replicate 7.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep8_R1: { + description: "Read1 FASTQs to be merged for a control replicate 8.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep8_R2: { + description: "Read2 FASTQs to be merged for a control replicate 8.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep9_R1: { + description: "Read1 FASTQs to be merged for a control replicate 9.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep9_R2: { + description: "Read2 FASTQs to be merged for a control replicate 9.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep10_R1: { + description: "Read1 FASTQs to be merged for a control replicate 10.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.", + } + ctl_fastqs_rep10_R2: { + description: "Read2 FASTQs to be merged for a control replicate 10.", + group: "input_genomic_data_control", + help: "Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.", + } + ctl_bams: { + description: "List of unfiltered/raw BAM files for each control replicate.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].", + } + ctl_nodup_bams: { + description: "List of filtered/deduped BAM files for each control replicate", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].", + } + ctl_tas: { + description: "List of TAG-ALIGN files for each biological replicate.", + group: "input_genomic_data_control", + help: "Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].", + } + pipeline_type: { + description: "Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.", + group: "pipeline_parameter", + help: "Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn't. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.", + choices: [ + "tf", + "histone", + "control", + ], + example: "tf", + } + redact_nodup_bam: { + description: "Redact filtered/nodup BAM.", + group: "pipeline_parameter", + help: "Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.", + } + align_only: { + description: "Align only mode.", + group: "pipeline_parameter", + help: "Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.", + } + true_rep_only: { + description: "Disables all analyses related to pseudo-replicates.", + group: "pipeline_parameter", + help: "Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).", + } + enable_count_signal_track: { + description: "Enables generation of count signal tracks.", + group: "pipeline_parameter", + } + enable_jsd: { + description: "Enables Jensen-Shannon Distance (JSD) plot generation.", + group: "pipeline_parameter", + } + enable_gc_bias: { + description: "Enables GC bias calculation.", + group: "pipeline_parameter", + } + aligner: { + description: "Aligner. bowtie2, bwa or custom", + group: "alignment", + help: "It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.", + choices: [ + "bowtie2", + "bwa", + "custom", + ], + example: "bowtie2", + } + custom_align_py: { + description: "Python script for a custom aligner.", + group: "alignment", + help: "There is a template included in the documentation for inputs. Defining this parameter will automatically change \"chip.aligner\" to \"custom\". You should also define \"chip.custom_aligner_idx_tar\".", + } + use_bwa_mem_for_pe: { + description: "For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.", + group: "alignment", + help: "Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.", + } + bwa_mem_read_len_limit: { + description: "Read length limit for bwa mem (for PE FASTQs only).", + group: "alignment", + help: "If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.", + } + use_bowtie2_local_mode: { + description: "Use bowtie2's local mode (soft-clipping).", + group: "alignment", + help: "This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.", + } + crop_length: { + description: "Crop FASTQs' reads longer than this length.", + group: "alignment", + help: "Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.", + } + crop_length_tol: { + description: "Tolerance for cropping reads in FASTQs.", + group: "alignment", + help: "Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.", + } + trimmomatic_phred_score_format: { + description: "Base encoding (format) for Phred score in FASTQs.", + group: "alignment", + choices: [ + "auto", + "phred33", + "phred64", + ], + help: "This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like \"Error: Unable to detect quality encoding\".", + } + xcor_trim_bp: { + description: "Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.", + group: "alignment", + help: "This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.", + } + use_filt_pe_ta_for_xcor: { + description: "Use filtered PE BAM for cross-correlation analysis.", + group: "alignment", + help: "If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.", + } + dup_marker: { + description: "Marker for duplicate reads. picard or sambamba.", + group: "alignment", + help: "picard for Picard MarkDuplicates or sambamba for sambamba markdup.", + choices: [ + "picard", + "sambamba", + ], + example: "picard", + } + no_dup_removal: { + description: "Disable removal of duplicate reads during filtering BAM.", + group: "alignment", + help: "Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.", + } + mapq_thresh: { + description: "Threshold for low MAPQ reads removal.", + group: "alignment", + help: "Low MAPQ reads are filtered out while filtering BAM.", + } + filter_chrs: { + description: "List of chromosomes to be filtered out while filtering BAM.", + group: "alignment", + help: "It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.", + } + subsample_reads: { + description: "Subsample reads. Shuffle and subsample reads.", + group: "alignment", + help: "This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.", + } + ctl_subsample_reads: { + description: "Subsample control reads. Shuffle and subsample control reads.", + group: "alignment", + help: "This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.", + } + xcor_subsample_reads: { + description: "Subsample reads for cross-corrlelation analysis only.", + group: "alignment", + help: "This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.", + } + xcor_exclusion_range_min: { + description: "Exclusion minimum for cross-correlation analysis.", + group: "alignment", + help: "For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.", + } + xcor_exclusion_range_max: { + description: "Exclusion maximum for cross-coorrelation analysis.", + group: "alignment", + help: "For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used", + } + pseudoreplication_random_seed: { + description: "Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).", + group: "alignment", + help: "Pseudo-replication (task spr) is done by using GNU \"shuf --random-source=sha256(random_seed)\". If this parameter == 0, then pipeline uses input TAG-ALIGN file's size (in bytes) for the random_seed.", + } + ctl_depth_limit: { + description: "Hard limit for chosen control's depth.", + group: "peak_calling", + help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.", + } + exp_ctl_depth_ratio_limit: { + description: "Second limit for chosen control's depth.", + group: "peak_calling", + help: "If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate's read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.", + } + fraglen: { + description: "Fragment length for each biological replicate.", + group: "peak_calling", + help: "Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.", + } + peak_caller: { + description: "Peak caller.", + group: "peak_calling", + help: "It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).", + example: "spp", + } + always_use_pooled_ctl: { + description: "Always choose a pooled control for each experiment replicate.", + group: "peak_calling", + help: "If turned on, ignores chip.ctl_depth_ratio.", + } + ctl_depth_ratio: { + description: "Maximum depth ratio between control replicates.", + group: "peak_calling", + help: "If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.", + } + cap_num_peak: { + description: "Upper limit on the number of peaks.", + group: "peak_calling", + help: "It is 30000000 and 50000000 by default for spp and macs2, respectively.", + } + pval_thresh: { + description: "p-value Threshold for MACS2 peak caller.", + group: "peak_calling", + help: "macs2 callpeak -p", + } + fdr_thresh: { + description: "FDR threshold for spp peak caller (phantompeakqualtools).", + group: "peak_calling", + help: "run_spp.R -fdr=", + } + idr_thresh: { + description: "IDR threshold.", + group: "peak_calling", + } + align_cpu: { + description: "Number of cores for task align.", + group: "resource_parameter", + help: "Task align merges/crops/maps FASTQs.", + } + align_bowtie2_mem_factor: { + description: "Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + align_bwa_mem_factor: { + description: "Multiplication factor to determine memory required for task align with bwa as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + align_time_hr: { + description: "Walltime (h) required for task align.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + align_bowtie2_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.", + } + align_bwa_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task align with bwa as aligner.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.", + } + filter_cpu: { + description: "Number of cores for task filter.", + group: "resource_parameter", + help: "Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.", + } + filter_mem_factor: { + description: "Multiplication factor to determine memory required for task filter.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + filter_time_hr: { + description: "Walltime (h) required for task filter.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + filter_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task filter.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.", + } + bam2ta_cpu: { + description: "Number of cores for task bam2ta.", + group: "resource_parameter", + help: "Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.", + } + bam2ta_mem_factor: { + description: "Multiplication factor to determine memory required for task bam2ta.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + bam2ta_time_hr: { + description: "Walltime (h) required for task bam2ta.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + bam2ta_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task bam2ta.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + spr_mem_factor: { + description: "Multiplication factor to determine memory required for task spr.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + spr_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task spr.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + jsd_cpu: { + description: "Number of cores for task jsd.", + group: "resource_parameter", + help: "Task jsd plots Jensen-Shannon distance and metrics related to it.", + } + jsd_mem_factor: { + description: "Multiplication factor to determine memory required for task jsd.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + jsd_time_hr: { + description: "Walltime (h) required for task jsd.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + jsd_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task jsd.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.", + } + xcor_cpu: { + description: "Number of cores for task xcor.", + group: "resource_parameter", + help: "Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.", + } + xcor_mem_factor: { + description: "Multiplication factor to determine memory required for task xcor.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + xcor_time_hr: { + description: "Walltime (h) required for task xcor.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + xcor_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task xcor.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + subsample_ctl_mem_factor: { + description: "Multiplication factor to determine memory required for task subsample_ctl.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + subsample_ctl_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task subsample_ctl.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + call_peak_cpu: { + description: "Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.", + group: "resource_parameter", + help: "Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.", + } + call_peak_spp_mem_factor: { + description: "Multiplication factor to determine memory required for task call_peak with spp as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + call_peak_macs2_mem_factor: { + description: "Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + call_peak_time_hr: { + description: "Walltime (h) required for task call_peak.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + call_peak_spp_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + call_peak_macs2_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + macs2_signal_track_mem_factor: { + description: "Multiplication factor to determine memory required for task macs2_signal_track.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).", + } + macs2_signal_track_time_hr: { + description: "Walltime (h) required for task macs2_signal_track.", + group: "resource_parameter", + help: "This is for HPCs only. e.g. SLURM, SGE, ...", + } + macs2_signal_track_disk_factor: { + description: "Multiplication factor to determine persistent disk size for task macs2_signal_track.", + group: "resource_parameter", + help: "This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.", + } + align_trimmomatic_java_heap: { + description: "Maximum Java heap (java -Xmx) in task align.", + group: "resource_parameter", + help: "Maximum memory for Trimmomatic. If not defined, 90% of align task's memory will be used.", + } + filter_picard_java_heap: { + description: "Maximum Java heap (java -Xmx) in task filter.", + group: "resource_parameter", + help: "Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task's memory will be used.", + } + gc_bias_picard_java_heap: { + description: "Maximum Java heap (java -Xmx) in task gc_bias.", + group: "resource_parameter", + help: "Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task's memory will be used.", + } + } + + input { + # group: runtime_environment + String docker = "encodedcc/chip-seq-pipeline:v2.2.2" + String singularity = "https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif" + String conda = "encd-chip" + String conda_macs2 = "encd-chip-macs2" + String conda_spp = "encd-chip-spp" + + # group: pipeline_metadata + String title = "Untitled" + String description = "No description" + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = "bowtie2" + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = "auto" + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = "picard" + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + String pipeline_ver = "v2.2.2" + RuntimeEnvironment runtime_environment = { + "docker": docker, + "singularity": singularity, + "conda": conda, + } + RuntimeEnvironment runtime_environment_spp = { + "docker": docker, + "singularity": singularity, + "conda": conda_spp, + } + RuntimeEnvironment runtime_environment_macs2 = { + "docker": docker, + "singularity": singularity, + "conda": conda_macs2, + } + + # read genome data and paths + if (defined(genome_tsv)) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment, + } + } + File ref_fa_ = select_first([ + ref_fa, + read_genome_tsv.ref_fa, + ]) + File? bwa_idx_tar_ = ( + if defined(bwa_idx_tar) + then bwa_idx_tar + else read_genome_tsv.bwa_idx_tar + ) + File bowtie2_idx_tar_ = select_first([ + bowtie2_idx_tar, + read_genome_tsv.bowtie2_idx_tar, + ]) + File chrsz_ = select_first([ + chrsz, + read_genome_tsv.chrsz, + ]) + String gensz_ = select_first([ + gensz, + read_genome_tsv.gensz, + ]) + File? blacklist1_ = ( + if defined(blacklist) + then blacklist + else read_genome_tsv.blacklist + ) + File? blacklist2_ = ( + if defined(blacklist2) + then blacklist2 + else read_genome_tsv.blacklist2 + ) + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([ + blacklist1_, + blacklist2_, + ]) + if (length(blacklists) > 1) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment, + } + } + File? blacklist_ = ( + if length(blacklists) > 1 + then pool_blacklist.ta_pooled + else if length(blacklists) > 0 + then blacklists[0] + else blacklist2_ + ) + String mito_chr_name_ = select_first([ + mito_chr_name, + read_genome_tsv.mito_chr_name, + ]) + String regex_bfilt_peak_chr_name_ = select_first([ + regex_bfilt_peak_chr_name, + read_genome_tsv.regex_bfilt_peak_chr_name, + ]) + String genome_name_ = select_first([ + genome_name, + read_genome_tsv.genome_name, + basename(chrsz_), + ]) + + ### temp vars (do not define these) + String aligner_ = ( + if defined(custom_align_py) + then "custom" + else aligner + ) + String peak_caller_ = ( + if pipeline_type == "tf" + then select_first([ + peak_caller, + "spp", + ]) + else select_first([ + peak_caller, + "macs2", + ]) + ) + String peak_type_ = ( + if peak_caller_ == "spp" + then "regionPeak" + else "narrowPeak" + ) + Boolean enable_idr = pipeline_type == "tf" # enable_idr for TF chipseq only + String idr_rank_ = ( + if peak_caller_ == "spp" + then "signal.value" + else if peak_caller_ == "macs2" + then "p.value" + else "p.value" + ) + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = ( + if peak_caller_ == "spp" + then select_first([ + cap_num_peak, + cap_num_peak_spp, + ]) + else select_first([ + cap_num_peak, + cap_num_peak_macs2, + ]) + ) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = ( + if pipeline_type == "control" + then false + else true + ) + Boolean enable_count_signal_track_ = ( + if pipeline_type == "control" + then false + else enable_count_signal_track + ) + Boolean enable_jsd_ = ( + if pipeline_type == "control" + then false + else enable_jsd + ) + Boolean enable_gc_bias_ = ( + if pipeline_type == "control" + then false + else enable_gc_bias + ) + Boolean align_only_ = ( + if pipeline_type == "control" + then true + else align_only + ) + + Float align_mem_factor_ = ( + if aligner_ == "bowtie2" + then align_bowtie2_mem_factor + else align_bwa_mem_factor + ) + Float align_disk_factor_ = ( + if aligner_ == "bowtie2" + then align_bowtie2_disk_factor + else align_bwa_disk_factor + ) + Float call_peak_mem_factor_ = ( + if peak_caller_ == "spp" + then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + ) + Float call_peak_disk_factor_ = ( + if peak_caller_ == "spp" + then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor + ) + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = ( + if length(fastqs_rep10_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + fastqs_rep10_R1, + ] + else if length(fastqs_rep9_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + fastqs_rep9_R1, + ] + else if length(fastqs_rep8_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + fastqs_rep8_R1, + ] + else if length(fastqs_rep7_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + fastqs_rep7_R1, + ] + else if length(fastqs_rep6_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + fastqs_rep6_R1, + ] + else if length(fastqs_rep5_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + fastqs_rep5_R1, + ] + else if length(fastqs_rep4_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + fastqs_rep4_R1, + ] + else if length(fastqs_rep3_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + fastqs_rep3_R1, + ] + else if length(fastqs_rep2_R1) > 0 + then [ + fastqs_rep1_R1, + fastqs_rep2_R1, + ] + else if length(fastqs_rep1_R1) > 0 + then [ + fastqs_rep1_R1, + ] + else [] + ) + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = [ + fastqs_rep1_R2, + fastqs_rep2_R2, + fastqs_rep3_R2, + fastqs_rep4_R2, + fastqs_rep5_R2, + fastqs_rep6_R2, + fastqs_rep7_R2, + fastqs_rep8_R2, + fastqs_rep9_R2, + fastqs_rep10_R2, + ] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = ( + if length(ctl_fastqs_rep10_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ctl_fastqs_rep10_R1, + ] + else if length(ctl_fastqs_rep9_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ctl_fastqs_rep9_R1, + ] + else if length(ctl_fastqs_rep8_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ctl_fastqs_rep8_R1, + ] + else if length(ctl_fastqs_rep7_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ctl_fastqs_rep7_R1, + ] + else if length(ctl_fastqs_rep6_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, + ] + else if length(ctl_fastqs_rep5_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ctl_fastqs_rep5_R1, + ] + else if length(ctl_fastqs_rep4_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ctl_fastqs_rep4_R1, + ] + else if length(ctl_fastqs_rep3_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ctl_fastqs_rep3_R1, + ] + else if length(ctl_fastqs_rep2_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ctl_fastqs_rep2_R1, + ] + else if length(ctl_fastqs_rep1_R1) > 0 + then [ + ctl_fastqs_rep1_R1, + ] + else [] + ) + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = [ + ctl_fastqs_rep1_R2, + ctl_fastqs_rep2_R2, + ctl_fastqs_rep3_R2, + ctl_fastqs_rep4_R2, + ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, + ctl_fastqs_rep7_R2, + ctl_fastqs_rep8_R2, + ctl_fastqs_rep9_R2, + ctl_fastqs_rep10_R2, + ] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = ( + if length(bams) < num_rep_fastq + then num_rep_fastq + else length(bams) + ) + Int num_rep_nodup_bam = ( + if length(nodup_bams) < num_rep_bam + then num_rep_bam + else length(nodup_bams) + ) + Int num_rep_ta = ( + if length(tas) < num_rep_nodup_bam + then num_rep_nodup_bam + else length(tas) + ) + Int num_rep_peak = ( + if length(peaks) < num_rep_ta + then num_rep_ta + else length(peaks) + ) + Int num_rep = num_rep_peak + + # temporary variables to get number of controls + Int num_ctl_fastq = length(ctl_fastqs_R1) + Int num_ctl_bam = ( + if length(ctl_bams) < num_ctl_fastq + then num_ctl_fastq + else length(ctl_bams) + ) + Int num_ctl_nodup_bam = ( + if length(ctl_nodup_bams) < num_ctl_bam + then num_ctl_bam + else length(ctl_nodup_bams) + ) + Int num_ctl_ta = ( + if length(ctl_tas) < num_ctl_nodup_bam + then num_ctl_nodup_bam + else length(ctl_tas) + ) + Int num_ctl = num_ctl_ta + + # sanity check for inputs + if (num_rep == 0 && num_ctl == 0) { + call raise_exception as error_input_data { input: + msg = "No FASTQ/BAM/TAG-ALIGN/PEAK defined in your input JSON. Check if your FASTQs are defined as \"chip.fastqs_repX_RY\". DO NOT MISS suffix _R1 even for single ended FASTQ.", + runtime_environment = runtime_environment, + } + } + if (!align_only_ && peak_caller_ == "spp" && num_ctl == 0) { + call raise_exception as error_control_required { input: + msg = "SPP requires control inputs. Define control input files (\"chip.ctl_*\") in an input JSON file.", + runtime_environment = runtime_environment, + } + } + if ((num_rep_fastq > 0 || num_ctl_fastq > 0) && aligner_ != "bwa" && aligner_ != "bowtie2" && aligner_ != "custom") { + call raise_exception as error_wrong_aligner { input: + msg = "Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.", + runtime_environment = runtime_environment, + } + } + if (aligner_ != "bwa" && use_bwa_mem_for_pe) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = "To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.", + runtime_environment = runtime_environment, + } + } + if (aligner_ != "bowtie2" && use_bowtie2_local_mode) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = "To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.", + runtime_environment = runtime_environment, + } + } + if (aligner_ == "custom" && (!defined(custom_align_py) || !defined(custom_aligner_idx_tar))) { + call raise_exception as error_custom_aligner { input: + msg = "To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.", + runtime_environment = runtime_environment, + } + } + + if ((ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0) && num_ctl > 1 && length(ctl_paired_ends) > 1) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = "Cannot use automatic control subsampling (\"chip.ctl_depth_limit\">0 and \"chip.exp_ctl_depth_limit\">0) for " + "multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). " + "Automatic control subsampling is enabled by default. " + "Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. " + "You can still use manual control subsamping (\"chip.ctl_subsample_reads\">0) since it is done " + "for individual control's TAG-ALIGN output according to each control's endedness. ", + runtime_environment = runtime_environment, + } + } + if (pipeline_type == "control" && num_ctl > 0) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = "In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.", + runtime_environment = runtime_environment, + } + } + if (pipeline_type == "control" && num_rep_fastq == 0) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = "Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.", + runtime_environment = runtime_environment, + } + } + + # align each replicate + scatter (i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = ( + if !defined(paired_end) && i < length(paired_ends) + then paired_ends[i] + else select_first([ + paired_end, + ]) + ) + + Boolean has_input_of_align = i < length(fastqs_R1) && length(fastqs_R1[i]) > 0 + Boolean has_output_of_align = i < length(bams) + if (has_input_of_align && !has_output_of_align) { + call align { input: + fastqs_R1 = fastqs_R1[i], + fastqs_R2 = ( + if paired_end_ + then fastqs_R2[i] + else [] + ), + crop_length = crop_length, + crop_length_tol = crop_length_tol, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = ( + if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ + else custom_aligner_idx_tar + ), + paired_end = paired_end_, + use_bwa_mem_for_pe = use_bwa_mem_for_pe, + bwa_mem_read_len_limit = bwa_mem_read_len_limit, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + trimmomatic_java_heap = align_trimmomatic_java_heap, + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + } + File? bam_ = ( + if has_output_of_align + then bams[i] + else align.bam + ) + + Boolean has_input_of_filter = has_output_of_align || defined(align.bam) + Boolean has_output_of_filter = i < length(nodup_bams) + # skip if we already have output of this step + if (has_input_of_filter && !has_output_of_filter) { + call filter { input: + bam = bam_, + paired_end = paired_end_, + ref_fa = ref_fa_, + redact_nodup_bam = redact_nodup_bam, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = no_dup_removal, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + } + File? nodup_bam_ = ( + if has_output_of_filter + then nodup_bams[i] + else filter.nodup_bam + ) + + Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam) + Boolean has_output_of_bam2ta = i < length(tas) + if (has_input_of_bam2ta && !has_output_of_bam2ta) { + call bam2ta { input: + bam = nodup_bam_, + subsample = subsample_reads, + paired_end = paired_end_, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ta_ = ( + if has_output_of_bam2ta + then tas[i] + else bam2ta.ta + ) + + Boolean has_input_of_spr = has_output_of_bam2ta || defined(bam2ta.ta) + if (has_input_of_spr && !align_only_ && !true_rep_only) { + call spr { input: + ta = ta_, + paired_end = paired_end_, + pseudoreplication_random_seed = pseudoreplication_random_seed, + mem_factor = spr_mem_factor, + disk_factor = spr_disk_factor, + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_count_signal_track = has_output_of_bam2ta || defined(bam2ta.ta) + if (has_input_of_count_signal_track && enable_count_signal_track_) { + # generate count signal track + call count_signal_track { input: + ta = ta_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + if (enable_gc_bias_ && defined(nodup_bam_) && defined(ref_fa_)) { + call gc_bias { input: + nodup_bam = nodup_bam_, + ref_fa = ref_fa_, + picard_java_heap = gc_bias_picard_java_heap, + runtime_environment = runtime_environment, + } + } + + # special trimming/mapping for xcor (when starting from FASTQs) + if (has_input_of_align) { + call align as align_R1 { input: + fastqs_R1 = fastqs_R1[i], + fastqs_R2 = [], + trim_bp = xcor_trim_bp, + crop_length = 0, + crop_length_tol = 0, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = ( + if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ + else custom_aligner_idx_tar + ), + paired_end = false, + use_bwa_mem_for_pe = false, + bwa_mem_read_len_limit = 0, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + # no bam deduping for xcor + call filter as filter_R1 { input: + bam = align_R1.bam, + paired_end = false, + redact_nodup_bam = false, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = true, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + call bam2ta as bam2ta_no_dedup_R1 { input: + bam = filter_R1.nodup_bam, # it's named as nodup bam but it's not deduped but just filtered + paired_end = false, + subsample = 0, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + + # special trimming/mapping for xcor (when starting from BAMs) + Boolean has_input_of_bam2ta_no_dedup = (has_output_of_align || defined(align.bam)) && !defined(bam2ta_no_dedup_R1.ta) + if (has_input_of_bam2ta_no_dedup) { + call filter as filter_no_dedup { input: + bam = bam_, + paired_end = paired_end_, + redact_nodup_bam = false, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = true, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + call bam2ta as bam2ta_no_dedup { input: + bam = filter_no_dedup.nodup_bam, # output name is nodup but it's not deduped + paired_end = paired_end_, + subsample = 0, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + + # use trimmed/unfilitered R1 tagAlign for paired end dataset + # if not starting from fastqs, keep using old method + # (mapping with both ends for tag-aligns to be used for xcor) + # subsample tagalign (non-mito) and cross-correlation analysis + File? ta_xcor = ( + if defined(bam2ta_no_dedup_R1.ta) + then bam2ta_no_dedup_R1.ta + else if defined(bam2ta_no_dedup.ta) + then bam2ta_no_dedup.ta + else ta_ + ) + Boolean paired_end_xcor = ( + if defined(bam2ta_no_dedup_R1.ta) + then false + else paired_end_ + ) + + Boolean has_input_of_xcor = defined(ta_xcor) + if (has_input_of_xcor && enable_xcor_) { + call xcor { input: + ta = ta_xcor, + paired_end = paired_end_xcor, + subsample = xcor_subsample_reads, + mito_chr_name = mito_chr_name_, + chip_seq_type = pipeline_type, + exclusion_range_min = xcor_exclusion_range_min, + exclusion_range_max = xcor_exclusion_range_max, + cpu = xcor_cpu, + mem_factor = xcor_mem_factor, + time_hr = xcor_time_hr, + disk_factor = xcor_disk_factor, + runtime_environment = runtime_environment_spp, + } + } + + # before peak calling, get fragment length from xcor analysis or given input + # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored + Int? fraglen_ = ( + if i < length(fraglen) + then fraglen[i] + else xcor.fraglen + ) + } + + # align each control + scatter (i in range(num_ctl)) { + # to override endedness definition for individual control + # ctl_paired_end will override ctl_paired_ends[i] + Boolean ctl_paired_end_ = ( + if !defined(ctl_paired_end) && i < length(ctl_paired_ends) + then ctl_paired_ends[i] + else select_first([ + ctl_paired_end, + paired_end, + ]) + ) + + Boolean has_input_of_align_ctl = i < length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i]) > 0 + Boolean has_output_of_align_ctl = i < length(ctl_bams) + if (has_input_of_align_ctl && !has_output_of_align_ctl) { + call align as align_ctl { input: + fastqs_R1 = ctl_fastqs_R1[i], + fastqs_R2 = ( + if ctl_paired_end_ + then ctl_fastqs_R2[i] + else [] + ), + crop_length = crop_length, + crop_length_tol = crop_length_tol, + trimmomatic_phred_score_format = trimmomatic_phred_score_format, + + aligner = aligner_, + mito_chr_name = mito_chr_name_, + custom_align_py = custom_align_py, + idx_tar = ( + if aligner == "bwa" + then bwa_idx_tar_ + else if aligner == "bowtie2" + then bowtie2_idx_tar_ + else custom_aligner_idx_tar + ), + paired_end = ctl_paired_end_, + use_bwa_mem_for_pe = use_bwa_mem_for_pe, + bwa_mem_read_len_limit = bwa_mem_read_len_limit, + use_bowtie2_local_mode = use_bowtie2_local_mode, + ref_fa = ref_fa_, + + trimmomatic_java_heap = align_trimmomatic_java_heap, + cpu = align_cpu, + mem_factor = align_mem_factor_, + time_hr = align_time_hr, + disk_factor = align_disk_factor_, + runtime_environment = runtime_environment, + } + } + File? ctl_bam_ = ( + if has_output_of_align_ctl + then ctl_bams[i] + else align_ctl.bam + ) + + Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam) + Boolean has_output_of_filter_ctl = i < length(ctl_nodup_bams) + # skip if we already have output of this step + if (has_input_of_filter_ctl && !has_output_of_filter_ctl) { + call filter as filter_ctl { input: + bam = ctl_bam_, + paired_end = ctl_paired_end_, + ref_fa = ref_fa_, + redact_nodup_bam = redact_nodup_bam, + dup_marker = dup_marker, + mapq_thresh = mapq_thresh_, + filter_chrs = filter_chrs, + chrsz = chrsz_, + no_dup_removal = no_dup_removal, + mito_chr_name = mito_chr_name_, + + cpu = filter_cpu, + mem_factor = filter_mem_factor, + picard_java_heap = filter_picard_java_heap, + time_hr = filter_time_hr, + disk_factor = filter_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ctl_nodup_bam_ = ( + if has_output_of_filter_ctl + then ctl_nodup_bams[i] + else filter_ctl.nodup_bam + ) + + Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam) + Boolean has_output_of_bam2ta_ctl = i < length(ctl_tas) + if (has_input_of_bam2ta_ctl && !has_output_of_bam2ta_ctl) { + call bam2ta as bam2ta_ctl { input: + bam = ctl_nodup_bam_, + subsample = ctl_subsample_reads, + paired_end = ctl_paired_end_, + mito_chr_name = mito_chr_name_, + + cpu = bam2ta_cpu, + mem_factor = bam2ta_mem_factor, + time_hr = bam2ta_time_hr, + disk_factor = bam2ta_disk_factor, + runtime_environment = runtime_environment, + } + } + File? ctl_ta_ = ( + if has_output_of_bam2ta_ctl + then ctl_tas[i] + else bam2ta_ctl.ta + ) + } + + # if there are TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta = length(select_all(ta_)) == num_rep + if (has_all_inputs_of_pool_ta && num_rep > 1) { + # pool tagaligns from true replicates + call pool_ta { input: + tas = ta_, + prefix = "rep", + runtime_environment = runtime_environment, + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1)) == num_rep + if (has_all_inputs_of_pool_ta_pr1 && num_rep > 1 && !align_only_ && !true_rep_only) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input: + tas = spr.ta_pr1, + prefix = "rep-pr1", + runtime_environment = runtime_environment, + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2)) == num_rep + if (has_all_inputs_of_pool_ta_pr1 && num_rep > 1 && !align_only_ && !true_rep_only) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input: + tas = spr.ta_pr2, + prefix = "rep-pr2", + runtime_environment = runtime_environment, + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_)) == num_ctl + if (has_all_inputs_of_pool_ta_ctl && num_ctl > 1) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input: + tas = ctl_ta_, + prefix = "ctl", + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if (has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep > 1) { + call count_signal_track as count_signal_track_pooled { input: + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_)) == num_rep + if (has_input_of_jsd && num_rep > 0 && enable_jsd_) { + # fingerprint and JS-distance plot + call jsd { input: + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment, + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_)) == num_rep && length(select_all(ctl_ta_)) == num_ctl && num_ctl > 0 + if (has_all_input_of_choose_ctl && !align_only_) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment, + } + } + + scatter (i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = ( + if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_ids, + ])[i] + else -2 + ) + Int chosen_ctl_ta_subsample = ( + if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_subsample, + ])[i] + else 0 + ) + Boolean chosen_ctl_paired_end = ( + if chosen_ctl_ta_id == -2 + then false + else if chosen_ctl_ta_id == -1 + then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] + ) + + if (chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0) { + call subsample_ctl { input: + ta = ( + if chosen_ctl_ta_id == -1 + then pool_ta_ctl.ta_pooled + else ctl_ta_[chosen_ctl_ta_id] + ), + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment, + } + } + Array[File] chosen_ctl_tas = ( + if chosen_ctl_ta_id <= -2 + then [] + else if chosen_ctl_ta_subsample > 0 + then [ + select_first([ + subsample_ctl.ta_subsampled, + ]), + ] + else if chosen_ctl_ta_id == -1 + then [ + select_first([ + pool_ta_ctl.ta_pooled, + ]), + ] + else [ + select_first([ + ctl_ta_[chosen_ctl_ta_id], + ]), + ] + ) + } + Int chosen_ctl_ta_pooled_subsample = ( + if has_all_input_of_choose_ctl && !align_only_ + then select_first([ + choose_ctl.chosen_ctl_ta_subsample_pooled, + ]) + else 0 + ) + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter (i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i < length(peaks) + if (has_input_of_call_peak && !has_output_of_call_peak && !align_only_) { + call call_peak { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + ta_[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_ = ( + if has_output_of_call_peak + then peaks[i] + else call_peak.peak + ) + + # signal track + if (has_input_of_call_peak && !align_only_) { + call macs2_signal_track { input: + tas = flatten([ + [ + ta_[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_tmp[i], + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2, + } + } + + # call peaks on 1st pseudo replicated tagalign + Boolean has_input_of_call_peak_pr1 = defined(spr.ta_pr1[i]) + Boolean has_output_of_call_peak_pr1 = i < length(peaks_pr1) + if (has_input_of_call_peak_pr1 && !has_output_of_call_peak_pr1 && !true_rep_only) { + call call_peak as call_peak_pr1 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + spr.ta_pr1[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_pr1_ = ( + if has_output_of_call_peak_pr1 + then peaks_pr1[i] + else call_peak_pr1.peak + ) + + # call peaks on 2nd pseudo replicated tagalign + Boolean has_input_of_call_peak_pr2 = defined(spr.ta_pr2[i]) + Boolean has_output_of_call_peak_pr2 = i < length(peaks_pr2) + if (has_input_of_call_peak_pr2 && !has_output_of_call_peak_pr2 && !true_rep_only) { + call call_peak as call_peak_pr2 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + [ + spr.ta_pr2[i], + ], + chosen_ctl_tas[i], + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_tmp[i], + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_pr2_ = ( + if has_output_of_call_peak_pr2 + then peaks_pr2[i] + else call_peak_pr2.peak + ) + } + + # if ( !align_only_ && num_rep > 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input: + ints = fraglen_tmp, + runtime_environment = runtime_environment, + } + # } + + if (has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = ( + if num_ctl < 2 + then ctl_ta_[0] + else pool_ta_ctl.ta_pooled + ), + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment, + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = ( + if !has_all_input_of_choose_ctl || align_only_ + then [] + else if chosen_ctl_ta_pooled_subsample > 0 + then [ + subsample_ctl_pooled.ta_subsampled, + ] + else if num_ctl < 2 + then [ + ctl_ta_[0], + ] + else [ + pool_ta_ctl.ta_pooled, + ] + ) + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if (has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep > 1) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_pooled_ = ( + if has_output_of_call_peak_pooled + then peak_pooled + else call_peak_pooled.peak + ) + + # macs2 signal track for pooled rep + if (has_input_of_call_peak_pooled && !align_only_ && num_rep > 1) { + call macs2_signal_track as macs2_signal_track_pooled { input: + tas = flatten([ + select_all([ + pool_ta.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2, + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if (has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep > 1) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta_pr1.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_ppr1_ = ( + if has_output_of_call_peak_ppr1 + then peak_ppr1 + else call_peak_ppr1.peak + ) + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if (has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep > 1) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input: + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([ + select_all([ + pool_ta_pr2.ta_pooled, + ]), + chosen_ctl_ta_pooled, + ]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = ( + if peak_caller_ == "spp" + then runtime_environment_spp + else if peak_caller_ == "macs2" + then runtime_environment_macs2 + else runtime_environment + ), + } + } + File? peak_ppr2_ = ( + if has_output_of_call_peak_ppr2 + then peak_ppr2 + else call_peak_ppr2.peak + ) + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter (pair in cross(range(num_rep), range(num_rep))) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if (!align_only_ && pair.left < pair.right) { + # Naive overlap on every pair of true replicates + call overlap { input: + prefix = "rep" + (pair.left + 1) + "_vs_rep" + (pair.right + 1), + peak1 = peak1_, + peak2 = peak2_, + peak_pooled = peak_pooled_, + fraglen = fraglen_mean.rounded_mean, + peak_type = peak_type_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + if (enable_idr && !align_only_ && pair.left < pair.right) { + # IDR on every pair of true replicates + call idr { input: + prefix = "rep" + (pair.left + 1) + "_vs_rep" + (pair.right + 1), + peak1 = peak1_, + peak2 = peak2_, + peak_pooled = peak_pooled_, + fraglen = fraglen_mean.rounded_mean, + idr_thresh = idr_thresh, + peak_type = peak_type_, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + } + + # overlap on pseudo-replicates (pr1, pr2) for each true replicate + if (!align_only_ && !true_rep_only) { + scatter (i in range(num_rep)) { + call overlap as overlap_pr { input: + prefix = "rep" + (i + 1) + "-pr1_vs_rep" + (i + 1) + "-pr2", + peak1 = peak_pr1_[i], + peak2 = peak_pr2_[i], + peak_pooled = peak_[i], + fraglen = fraglen_[i], + peak_type = peak_type_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = ta_[i], + runtime_environment = runtime_environment, + } + } + } + + if (!align_only_ && !true_rep_only && enable_idr) { + scatter (i in range(num_rep)) { + # IDR on pseduo replicates + call idr as idr_pr { input: + prefix = "rep" + (i + 1) + "-pr1_vs_rep" + (i + 1) + "-pr2", + peak1 = peak_pr1_[i], + peak2 = peak_pr2_[i], + peak_pooled = peak_[i], + fraglen = fraglen_[i], + idr_thresh = idr_thresh, + peak_type = peak_type_, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = ta_[i], + runtime_environment = runtime_environment, + } + } + } + + if (!align_only_ && !true_rep_only && num_rep > 1) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input: + prefix = "pooled-pr1_vs_pooled-pr2", + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + + if (!align_only_ && !true_rep_only && num_rep > 1 && enable_idr) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input: + prefix = "pooled-pr1_vs_pooled-pr2", + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment, + } + } + + # reproducibility QC for overlap/IDR peaks + if (!align_only_ && !true_rep_only && num_rep > 0) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input: + prefix = "overlap", + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = ( + if defined(overlap_pr.bfilt_overlap_peak) + then select_first([ + overlap_pr.bfilt_overlap_peak, + ]) + else [] + ), + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + if (!align_only_ && !true_rep_only && num_rep > 0 && enable_idr) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input: + prefix = "idr", + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = ( + if defined(idr_pr.bfilt_idr_peak) + then select_first([ + idr_pr.bfilt_idr_peak, + ]) + else [] + ), + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment, + } + } + + # Generate final QC report and JSON + call qc_report { input: + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = ( + if defined(jsd.jsd_qcs) + then select_first([ + jsd.jsd_qcs, + ]) + else [] + ), + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = ( + if defined(idr_pr.idr_plot) + then select_first([ + idr_pr.idr_plot, + ]) + else [] + ), + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = ( + if defined(idr_pr.frip_qc) + then select_first([ + idr_pr.frip_qc, + ]) + else [] + ), + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = ( + if defined(overlap_pr.frip_qc) + then select_first([ + overlap_pr.frip_qc, + ]) + else [] + ), + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment, + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + String aligner + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = ( + if paired_end + then transpose([ + fastqs_R1, + fastqs_R2, + ]) + else transpose([ + fastqs_R1, + ]) + ) + + command <<< + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ~{write_tsv(tmp_fastqs)} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{"--nth " + cpu} + + if [ -z '~{trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ~{trim_bp} \ + --out-dir R1$SUFFIX + if [ '~{paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ~{trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '~{crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ~{( + if paired_end + then "--fastq2 R2$SUFFIX/*.fastq.gz" + else "" + )} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + --crop-length ~{crop_length} \ + --crop-length-tol "~{crop_length_tol}" \ + ~{"--phred-score-format " + trimmomatic_phred_score_format} \ + --out-dir-R1 R1$NEW_SUFFIX \ + ~{( + if paired_end + then "--out-dir-R2 R2$NEW_SUFFIX" + else "" + )} \ + ~{"--trimmomatic-java-heap " + ( + if defined(trimmomatic_java_heap) + then trimmomatic_java_heap + else (round(mem_gb * trimmomatic_java_heap_factor) + "G") + )} \ + ~{"--nth " + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '~{aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{( + if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + )} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{( + if use_bwa_mem_for_pe + then "--use-bwa-mem-for-pe" + else "" + )} \ + ~{"--bwa-mem-read-len-limit " + bwa_mem_read_len_limit} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + + elif [ '~{aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{( + if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + )} \ + ~{"--multimapping " + multimapping} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{( + if use_bowtie2_local_mode + then "--local" + else "" + )} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + else + python3 ~{custom_align_py} \ + ~{idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ~{( + if paired_end + then "R2$SUFFIX/*.fastq.gz" + else "" + )} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + >>> + + output { + File bam = glob("*.bam")[0] + File bai = glob("*.bai")[0] + File samstat_qc = glob("*.samstats.qc")[0] + File read_len_log = glob("*.read_length.txt")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_filter.py) \ + ~{bam} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + --multimapping 0 \ + ~{"--dup-marker " + dup_marker} \ + ~{"--mapq-thresh " + mapq_thresh} \ + --filter-chrs ~{sep=" " filter_chrs} \ + ~{"--chrsz " + chrsz} \ + ~{( + if no_dup_removal + then "--no-dup-removal" + else "" + )} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} \ + ~{"--picard-java-heap " + ( + if defined(picard_java_heap) + then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G") + )} + + if [ '~{redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ~{"--ref-fa " + ref_fa} \ + '--delete-original-bam' + fi + >>> + + output { + File nodup_bam = glob("*.bam")[0] + File nodup_bai = glob("*.bai")[0] + File samstat_qc = glob("*.samstats.qc")[0] + File dup_qc = glob("*.dup.qc")[0] + File lib_complexity_qc = glob("*.lib_complexity.qc")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_bam2ta.py) \ + ~{bam} \ + --disable-tn5-shift \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--subsample " + subsample} \ + ~{"--mem-gb " + samtools_mem_gb} \ + ~{"--nth " + cpu} + >>> + + output { + File ta = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + Float mem_factor + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_spr.py) \ + ~{ta} \ + ~{"--pseudoreplication-random-seed " + pseudoreplication_random_seed} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} + >>> + + output { + File ta_pr1 = glob("*.pr1.tagAlign.gz")[0] + File ta_pr2 = glob("*.pr2.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + python3 $(which encode_task_pool_ta.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--prefix " + prefix} \ + ~{"--col " + col} + >>> + + output { + File ta_pooled = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "8 GB" + time: 4 + disks: "local-disk 100 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_xcor.py) \ + ~{ta} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + ~{"--mito-chr-name " + mito_chr_name} \ + ~{"--subsample " + subsample} \ + ~{"--chip-seq-type " + chip_seq_type} \ + ~{"--exclusion-range-min " + exclusion_range_min} \ + ~{"--exclusion-range-max " + exclusion_range_max} \ + ~{"--subsample " + subsample} \ + ~{"--nth " + cpu} + >>> + + output { + File plot_pdf = glob("*.cc.plot.pdf")[0] + File plot_png = glob("*.cc.plot.png")[0] + File score = glob("*.cc.qc")[0] + File fraglen_log = glob("*.cc.fraglen.txt")[0] + Int fraglen = read_int(fraglen_log) + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_jsd.py) \ + ~{sep=" " select_all(nodup_bams)} \ + ~{( + if length(ctl_bams) > 0 + then "--ctl-bam " + select_first(ctl_bams) + else "" + )} \ + ~{"--mapq-thresh " + mapq_thresh} \ + ~{"--blacklist " + blacklist} \ + ~{"--nth " + cpu} + >>> + + output { + File plot = glob("*.png")[0] + Array[File] jsd_qcs = glob("*.jsd.qc") + } + + runtime { + cpu: cpu + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ~{sep=" " select_all(tas)} \ + --ctl-tas ~{sep=" " select_all(ctl_tas)} \ + ~{"--ta-pooled " + ta_pooled} \ + ~{"--ctl-ta-pooled " + ctl_ta_pooled} \ + ~{( + if always_use_pooled_ctl + then "--always-use-pooled-ctl" + else "" + )} \ + ~{"--ctl-depth-ratio " + ctl_depth_ratio} \ + ~{"--ctl-depth-limit " + ctl_depth_limit} \ + ~{"--exp-ctl-depth-ratio-limit " + exp_ctl_depth_ratio_limit} + >>> + + output { + File chosen_ctl_id_tsv = glob("chosen_ctl.tsv")[0] + File chosen_ctl_subsample_tsv = glob("chosen_ctl_subsample.tsv")[0] + File chosen_ctl_subsample_pooled_txt = glob("chosen_ctl_subsample_pooled.txt")[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + RuntimeEnvironment runtime_environment + } + + Float mem_gb = 8.0 + + command <<< + set -e + python3 $(which encode_task_count_signal_track.py) \ + ~{ta} \ + ~{"--chrsz " + chrsz} \ + ~{"--mem-gb " + mem_gb} + >>> + + output { + File pos_bw = glob("*.positive.bigwig")[0] + File neg_bw = glob("*.negative.bigwig")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + Float mem_factor + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + python3 $(which encode_task_subsample_ctl.py) \ + ~{ta} \ + ~{"--subsample " + subsample} \ + ~{( + if paired_end + then "--paired-end" + else "" + )} \ + >>> + + output { + File ta_subsampled = glob("*.tagAlign.gz")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 4 + disks: "local-disk ~{disk_gb} SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + + if [ '~{peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--gensz " + gensz} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--cap-num-peak " + cap_num_peak} \ + ~{"--pval-thresh " + pval_thresh} \ + ~{"--mem-gb " + mem_gb} + + elif [ '~{peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--cap-num-peak " + cap_num_peak} \ + ~{"--fdr-thresh " + fdr_thresh} \ + ~{"--nth " + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ~{"--ta " + tas[0]} \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--peak-type " + peak_type} \ + ~{"--blacklist " + blacklist} + >>> + + output { + File peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + # generated by post_call_peak py + File bfilt_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File frip_qc = glob("*.frip.qc")[0] + File peak_region_size_qc = glob("*.peak_region_size.qc")[0] + File peak_region_size_plot = glob("*.peak_region_size.png")[0] + File num_peak_qc = glob("*.num_peak.qc")[0] + } + + runtime { + cpu: ( + if peak_caller == "macs2" + then 2 + else cpu + ) + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + Float mem_factor + Int time_hr + Float disk_factor + RuntimeEnvironment runtime_environment + } + + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command <<< + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ~{sep=" " select_all(tas)} \ + ~{"--gensz " + gensz} \ + ~{"--chrsz " + chrsz} \ + ~{"--fraglen " + fraglen} \ + ~{"--pval-thresh " + pval_thresh} \ + ~{"--mem-gb " + mem_gb} + >>> + + output { + File pval_bw = glob("*.pval.signal.bigwig")[0] + File fc_bw = glob("*.fc.signal.bigwig")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: time_hr + disks: "local-disk ~{disk_gb} SSD" + preemptible: 0 + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + ~{( + if defined(ta) + then "" + else "touch null.frip.qc" + )} + touch null + python3 $(which encode_task_idr.py) \ + ~{peak1} ~{peak2} ~{peak_pooled} \ + ~{"--prefix " + prefix} \ + ~{"--idr-thresh " + idr_thresh} \ + ~{"--peak-type " + peak_type} \ + --idr-rank ~{rank} \ + ~{"--fraglen " + fraglen} \ + ~{"--chrsz " + chrsz} \ + ~{"--blacklist " + blacklist} \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ + ~{"--ta " + ta} + >>> + + output { + File idr_peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + File bfilt_idr_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_idr_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_idr_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_idr_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_idr_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File idr_plot = glob("*.txt.png")[0] + File idr_unthresholded_peak = glob("*.txt.gz")[0] + File idr_log = glob("*.idr*.log")[0] + File frip_qc = ( + if defined(ta) + then glob("*.frip.qc")[0] + else glob("null")[0] + ) + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + ~{( + if defined(ta) + then "" + else "touch null.frip.qc" + )} + touch null + python3 $(which encode_task_overlap.py) \ + ~{peak1} ~{peak2} ~{peak_pooled} \ + ~{"--prefix " + prefix} \ + ~{"--peak-type " + peak_type} \ + ~{"--fraglen " + fraglen} \ + ~{"--chrsz " + chrsz} \ + ~{"--blacklist " + blacklist} \ + --nonamecheck \ + ~{"--regex-bfilt-peak-chr-name '" + regex_bfilt_peak_chr_name + "'"} \ + ~{"--ta " + ta} + >>> + + output { + File overlap_peak = glob("*[!.][!b][!f][!i][!l][!t]." + peak_type + ".gz")[0] + File bfilt_overlap_peak = glob("*.bfilt." + peak_type + ".gz")[0] + File bfilt_overlap_peak_bb = glob("*.bfilt." + peak_type + ".bb")[0] + File bfilt_overlap_peak_starch = glob("*.bfilt." + peak_type + ".starch")[0] + File bfilt_overlap_peak_hammock = glob("*.bfilt." + peak_type + ".hammock.gz*")[0] + File bfilt_overlap_peak_hammock_tbi = glob("*.bfilt." + peak_type + ".hammock.gz*")[1] + File frip_qc = ( + if defined(ta) + then glob("*.frip.qc")[0] + else glob("null")[0] + ) + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + python3 $(which encode_task_reproducibility.py) \ + ~{sep=" " peaks} \ + --peaks-pr ~{sep=" " peaks_pr} \ + ~{"--peak-ppr " + peak_ppr} \ + --prefix ~{prefix} \ + ~{"--peak-type " + peak_type} \ + ~{"--chrsz " + chrsz} + >>> + + output { + File optimal_peak = glob("*optimal_peak.*.gz")[0] + File optimal_peak_bb = glob("*optimal_peak.*.bb")[0] + File optimal_peak_starch = glob("*optimal_peak.*.starch")[0] + File optimal_peak_hammock = glob("*optimal_peak.*.hammock.gz*")[0] + File optimal_peak_hammock_tbi = glob("*optimal_peak.*.hammock.gz*")[1] + File conservative_peak = glob("*conservative_peak.*.gz")[0] + File conservative_peak_bb = glob("*conservative_peak.*.bb")[0] + File conservative_peak_starch = glob("*conservative_peak.*.starch")[0] + File conservative_peak_hammock = glob("*conservative_peak.*.hammock.gz*")[0] + File conservative_peak_hammock_tbi = glob("*conservative_peak.*.hammock.gz*")[1] + File reproducibility_qc = glob("*reproducibility.qc")[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob("*.peak_region_size.qc")[0] + File peak_region_size_plot = glob("*.peak_region_size.png")[0] + File num_peak_qc = glob("*.num_peak.qc")[0] + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + String? picard_java_heap + RuntimeEnvironment runtime_environment + } + + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command <<< + set -e + python3 $(which encode_task_gc_bias.py) \ + ~{"--nodup-bam " + nodup_bam} \ + ~{"--ref-fa " + ref_fa} \ + ~{"--picard-java-heap " + ( + if defined(picard_java_heap) + then picard_java_heap + else (round(mem_gb * picard_java_heap_factor) + "G") + )} + >>> + + output { + File gc_plot = glob("*.gc_plot.png")[0] + File gc_log = glob("*.gc.txt")[0] + } + + runtime { + cpu: 1 + memory: "~{mem_gb} GB" + time: 6 + disks: "local-disk 250 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + Array[File] gc_plots + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + File? qc_json_ref + RuntimeEnvironment runtime_environment + } + + command <<< + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ~{"--pipeline-ver " + pipeline_ver} \ + ~{"--title '" + sub(title, "'", "_") + "'"} \ + ~{"--desc '" + sub(description, "'", "_") + "'"} \ + ~{"--genome " + genome} \ + ~{"--multimapping " + 0} \ + --paired-ends ~{sep=" " paired_ends} \ + --ctl-paired-ends ~{sep=" " ctl_paired_ends} \ + --pipeline-type ~{pipeline_type} \ + --aligner ~{aligner} \ + ~{( + if (no_dup_removal) + then "--no-dup-removal " + else "" + )} \ + --peak-caller ~{peak_caller} \ + ~{"--cap-num-peak " + cap_num_peak} \ + --idr-thresh ~{idr_thresh} \ + --pval-thresh ~{pval_thresh} \ + --xcor-trim-bp ~{xcor_trim_bp} \ + --xcor-subsample-reads ~{xcor_subsample_reads} \ + --samstat-qcs ~{sep="_:_" samstat_qcs} \ + --nodup-samstat-qcs ~{sep="_:_" nodup_samstat_qcs} \ + --dup-qcs ~{sep="_:_" dup_qcs} \ + --lib-complexity-qcs ~{sep="_:_" lib_complexity_qcs} \ + --xcor-plots ~{sep="_:_" xcor_plots} \ + --xcor-scores ~{sep="_:_" xcor_scores} \ + --idr-plots ~{sep="_:_" idr_plots} \ + --idr-plots-pr ~{sep="_:_" idr_plots_pr} \ + --ctl-samstat-qcs ~{sep="_:_" ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ~{sep="_:_" ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ~{sep="_:_" ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ~{sep="_:_" ctl_lib_complexity_qcs} \ + ~{"--jsd-plot " + jsd_plot} \ + --jsd-qcs ~{sep="_:_" jsd_qcs} \ + ~{"--idr-plot-ppr " + idr_plot_ppr} \ + --frip-qcs ~{sep="_:_" frip_qcs} \ + --frip-qcs-pr1 ~{sep="_:_" frip_qcs_pr1} \ + --frip-qcs-pr2 ~{sep="_:_" frip_qcs_pr2} \ + ~{"--frip-qc-pooled " + frip_qc_pooled} \ + ~{"--frip-qc-ppr1 " + frip_qc_ppr1} \ + ~{"--frip-qc-ppr2 " + frip_qc_ppr2} \ + --frip-idr-qcs ~{sep="_:_" frip_idr_qcs} \ + --frip-idr-qcs-pr ~{sep="_:_" frip_idr_qcs_pr} \ + ~{"--frip-idr-qc-ppr " + frip_idr_qc_ppr} \ + --frip-overlap-qcs ~{sep="_:_" frip_overlap_qcs} \ + --frip-overlap-qcs-pr ~{sep="_:_" frip_overlap_qcs_pr} \ + ~{"--frip-overlap-qc-ppr " + frip_overlap_qc_ppr} \ + ~{"--idr-reproducibility-qc " + idr_reproducibility_qc} \ + ~{"--overlap-reproducibility-qc " + overlap_reproducibility_qc} \ + --gc-plots ~{sep="_:_" gc_plots} \ + --peak-region-size-qcs ~{sep="_:_" peak_region_size_qcs} \ + --peak-region-size-plots ~{sep="_:_" peak_region_size_plots} \ + --num-peak-qcs ~{sep="_:_" num_peak_qcs} \ + ~{"--idr-opt-peak-region-size-qc " + idr_opt_peak_region_size_qc} \ + ~{"--idr-opt-peak-region-size-plot " + idr_opt_peak_region_size_plot} \ + ~{"--idr-opt-num-peak-qc " + idr_opt_num_peak_qc} \ + ~{"--overlap-opt-peak-region-size-qc " + overlap_opt_peak_region_size_qc} \ + ~{"--overlap-opt-peak-region-size-plot " + overlap_opt_peak_region_size_plot} \ + ~{"--overlap-opt-num-peak-qc " + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ~{"--qc-json-ref " + qc_json_ref} + >>> + + output { + File report = glob("*qc.html")[0] + File qc_json = glob("*qc.json")[0] + Boolean qc_json_ref_match = read_string("qc_json_ref_match.txt") == "True" + } + + runtime { + cpu: 1 + memory: "4 GB" + time: 4 + disks: "local-disk 50 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + RuntimeEnvironment runtime_environment + } + + command <<< + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + + output { + String? genome_name = read_string("genome_name") + String? ref_fa = ( + if size("ref_fa") == 0 + then null_s + else read_string("ref_fa") + ) + String? bwa_idx_tar = ( + if size("bwa_idx_tar") == 0 + then null_s + else read_string("bwa_idx_tar") + ) + String? bowtie2_idx_tar = ( + if size("bowtie2_idx_tar") == 0 + then null_s + else read_string("bowtie2_idx_tar") + ) + String? chrsz = ( + if size("chrsz") == 0 + then null_s + else read_string("chrsz") + ) + String? gensz = ( + if size("gensz") == 0 + then null_s + else read_string("gensz") + ) + String? blacklist = ( + if size("blacklist") == 0 + then null_s + else read_string("blacklist") + ) + String? blacklist2 = ( + if size("blacklist2") == 0 + then null_s + else read_string("blacklist2") + ) + String? mito_chr_name = ( + if size("mito_chr_name") == 0 + then null_s + else read_string("mito_chr_name") + ) + String? regex_bfilt_peak_chr_name = ( + if size("regex_bfilt_peak_chr_name") == 0 + then "chr[\\dXY]+" + else read_string("regex_bfilt_peak_chr_name") + ) + } + + runtime { + maxRetries: 0 + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + RuntimeEnvironment runtime_environment + } + + command <<< + python <>> + + output { + Int rounded_mean = read_int("tmp.txt") + } + + runtime { + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + RuntimeEnvironment runtime_environment + } + + command <<< + echo -e "\n* Error: ~{msg}\n" >&2 + exit 2 + >>> + + output { + String error_msg = "~{msg}" + } + + runtime { + maxRetries: 0 + cpu: 1 + memory: "2 GB" + time: 4 + disks: "local-disk 10 SSD" + docker: runtime_environment.docker + singularity: runtime_environment.singularity + conda: runtime_environment.conda + } +} diff --git a/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl new file mode 100644 index 00000000..92c09ea8 --- /dev/null +++ b/wdl-format/tests/format/ENCODE-DCC_chip-seq-pipeline/source.wdl @@ -0,0 +1,3296 @@ +version 1.0 + +struct RuntimeEnvironment { + String docker + String singularity + String conda +} + +workflow chip { + String pipeline_ver = 'v2.2.2' + + meta { + version: 'v2.2.2' + + author: 'Jin wook Lee' + email: 'leepc12@gmail.com' + description: 'ENCODE TF/Histone ChIP-Seq pipeline. See https://github.com/ENCODE-DCC/chip-seq-pipeline2 for more details. e.g. example input JSON for Terra/Anvil.' + organization: 'ENCODE DCC' + + specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing' + + default_docker: 'encodedcc/chip-seq-pipeline:v2.2.2' + default_singularity: 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json' + + parameter_group: { + runtime_environment: { + title: 'Runtime environment', + description: 'Runtime environment such as container URIs (Docker, Singularity) and Conda environment name.' + }, + pipeline_metadata: { + title: 'Pipeline metadata', + description: 'Metadata for a pipeline (e.g. title and description).' + }, + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, bowtie2 index, chromosome sizes file.', + help: 'Choose one chip.genome_tsv file that defines all genome specific parameters in it or define each genome specific parameter in input JSON to override those defined in genome TSV file. If you use Caper then use https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/[GENOME]_caper.tsv. Caper will automatically download/install all files defined in such TSV. Otherwise download genome TSV file by using a shell script (scripts/download_genome_data.sh [GENOME] [DEST_DIR]). Supported genomes are hg38, hg19, mm10 and mm9. See pipeline documentation if you want to build genome database from your own FASTA file. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input genomic data', + description: 'Genomic input files for experiment.', + help: 'Pipeline can start with any types of experiment data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN, PEAK). Choose one type and leave others empty. FASTQs have a variable for each biological replicate. e.g. chip.fastqs_rep1_R1 and chip.fastqs_rep2_R1. You can define up to 10 experiment replicates. For other types, there is an array to define file for each biological replicate. e.g. chip.bams: ["rep1.bam", "rep1.bam"]. Define sequential endedness with chip.paired_end, if you have mixed SE and PE replicates then define chip.paired_ends instead for each replicate. e.g. chip.paired_ends: [false, true].' + }, + input_genomic_data_control: { + title: 'Input genomic data (control)', + description: 'Genomic input files for control. TF ChIP-seq requires control for peak calling but histone ChIP-seq does not.', + help: 'Pipeline can start with any types of control data (e.g. FASTQ, BAM, NODUP_BAM, TAG-ALIGN). Choose one type and leave others empty. FASTQs have a variable for each control replicate. e.g. chip.ctl_fastqs_rep1_R1 and chip.ctl_fastqs_rep2_R1. You can define up to 10 control replicates. For other types, there is an array to define file for each control replicate. e.g. chip.ctl_bams: ["ctl1.bam", "ctl1.bam"]. Define sequential endedness with chip.ctl_paired_end, if you have mixed SE and PE control replicates then define chip.ctl_paired_ends instead for each replicate. e.g. chip.ctl_paired_ends: [false, true]. If none of these are defined, pipeline will use chip.paired_end for controls.' + }, + pipeline_parameter: { + title: 'Pipeline parameter', + description: 'Pipeline type and flags to turn on/off analyses.', + help: 'Use chip.align_only to align FASTQs without peak calling.' + }, + alignment: { + title: 'Alignment', + description: 'Parameters for alignment.', + help: 'Pipeline can crop FASTQs (chip.crop_length > 0) with tolerance (chip.crop_length_tol) before mapping.' + }, + peak_calling: { + title: 'Peak calling', + description: 'Parameters for peak calling.', + help: 'This group includes statistical thresholds for peak-calling or post-peak-calling analyses: p-val, FDR, IDR. It also include parameters for control choosing/subsampling. All control replicates are pooled and pooled control is used for peak calling against each experiment replicate by default (see chip.always_use_pooled_ctl). Pipeline compares read depth of experiment replicate and a chosen control. It also compare read depth of controls. If control is too deep then it is subsampled.' + }, + resource_parameter: { + title: 'Resource parameter', + description: 'Number of CPUs (threads), max. memory and walltime for tasks.', + help: 'Resource settings are used for determining an instance type on cloud backends (e.g. GCP, AWS) and used for submitting tasks to a cluster engine (e.g. SLURM, SGE, ...). Walltime (chip.*_time_hr) is only used for cluster engines. Other tasks default to use 1 CPU and 4GB of memory.' + } + } + } + input { + # group: runtime_environment + String docker = 'encodedcc/chip-seq-pipeline:v2.2.2' + String singularity = 'https://encode-pipeline-singularity-image.s3.us-west-2.amazonaws.com/chip-seq-pipeline_v2.2.2.sif' + String conda = 'encd-chip' + String conda_macs2 = 'encd-chip-macs2' + String conda_spp = 'encd-chip-spp' + + # group: pipeline_metadata + String title = 'Untitled' + String description = 'No description' + + # group: reference_genome + File? genome_tsv + String? genome_name + File? ref_fa + File? bwa_idx_tar + File? bowtie2_idx_tar + File? chrsz + File? blacklist + File? blacklist2 + String? mito_chr_name + String? regex_bfilt_peak_chr_name + String? gensz + File? custom_aligner_idx_tar + + # group: input_genomic_data + Boolean? paired_end + Array[Boolean] paired_ends = [] + Array[File] fastqs_rep1_R1 = [] + Array[File] fastqs_rep1_R2 = [] + Array[File] fastqs_rep2_R1 = [] + Array[File] fastqs_rep2_R2 = [] + Array[File] fastqs_rep3_R1 = [] + Array[File] fastqs_rep3_R2 = [] + Array[File] fastqs_rep4_R1 = [] + Array[File] fastqs_rep4_R2 = [] + Array[File] fastqs_rep5_R1 = [] + Array[File] fastqs_rep5_R2 = [] + Array[File] fastqs_rep6_R1 = [] + Array[File] fastqs_rep6_R2 = [] + Array[File] fastqs_rep7_R1 = [] + Array[File] fastqs_rep7_R2 = [] + Array[File] fastqs_rep8_R1 = [] + Array[File] fastqs_rep8_R2 = [] + Array[File] fastqs_rep9_R1 = [] + Array[File] fastqs_rep9_R2 = [] + Array[File] fastqs_rep10_R1 = [] + Array[File] fastqs_rep10_R2 = [] + Array[File] bams = [] + Array[File] nodup_bams = [] + Array[File] tas = [] + Array[File] peaks = [] + Array[File] peaks_pr1 = [] + Array[File] peaks_pr2 = [] + File? peak_ppr1 + File? peak_ppr2 + File? peak_pooled + + Boolean? ctl_paired_end + Array[Boolean] ctl_paired_ends = [] + Array[File] ctl_fastqs_rep1_R1 = [] + Array[File] ctl_fastqs_rep1_R2 = [] + Array[File] ctl_fastqs_rep2_R1 = [] + Array[File] ctl_fastqs_rep2_R2 = [] + Array[File] ctl_fastqs_rep3_R1 = [] + Array[File] ctl_fastqs_rep3_R2 = [] + Array[File] ctl_fastqs_rep4_R1 = [] + Array[File] ctl_fastqs_rep4_R2 = [] + Array[File] ctl_fastqs_rep5_R1 = [] + Array[File] ctl_fastqs_rep5_R2 = [] + Array[File] ctl_fastqs_rep6_R1 = [] + Array[File] ctl_fastqs_rep6_R2 = [] + Array[File] ctl_fastqs_rep7_R1 = [] + Array[File] ctl_fastqs_rep7_R2 = [] + Array[File] ctl_fastqs_rep8_R1 = [] + Array[File] ctl_fastqs_rep8_R2 = [] + Array[File] ctl_fastqs_rep9_R1 = [] + Array[File] ctl_fastqs_rep9_R2 = [] + Array[File] ctl_fastqs_rep10_R1 = [] + Array[File] ctl_fastqs_rep10_R2 = [] + Array[File] ctl_bams = [] + Array[File] ctl_nodup_bams = [] + Array[File] ctl_tas = [] + + # group: pipeline_parameter + String pipeline_type + Boolean align_only = false + Boolean redact_nodup_bam = false + Boolean true_rep_only = false + Boolean enable_count_signal_track = false + Boolean enable_jsd = true + Boolean enable_gc_bias = true + + # group: alignment + String aligner = 'bowtie2' + File? custom_align_py + Boolean use_bwa_mem_for_pe = false + Int bwa_mem_read_len_limit = 70 + Boolean use_bowtie2_local_mode = false + Int crop_length = 0 + Int crop_length_tol = 2 + String trimmomatic_phred_score_format = 'auto' + Int xcor_trim_bp = 50 + Boolean use_filt_pe_ta_for_xcor = false + String dup_marker = 'picard' + Boolean no_dup_removal = false + Int mapq_thresh = 30 + Array[String] filter_chrs = [] + Int subsample_reads = 0 + Int ctl_subsample_reads = 0 + Int xcor_subsample_reads = 15000000 + Int xcor_exclusion_range_min = -500 + Int? xcor_exclusion_range_max + Int pseudoreplication_random_seed = 0 + + # group: peak_calling + Int ctl_depth_limit = 200000000 + Float exp_ctl_depth_ratio_limit = 5.0 + Array[Int?] fraglen = [] + String? peak_caller + Boolean always_use_pooled_ctl = true + Float ctl_depth_ratio = 1.2 + Int? cap_num_peak + Float pval_thresh = 0.01 + Float fdr_thresh = 0.01 + Float idr_thresh = 0.05 + + # group: resource_parameter + Int align_cpu = 6 + Float align_bowtie2_mem_factor = 0.15 + Float align_bwa_mem_factor = 1.0 + Int align_time_hr = 48 + Float align_bowtie2_disk_factor = 8.0 + Float align_bwa_disk_factor = 8.0 + + Int filter_cpu = 4 + Float filter_mem_factor = 0.4 + Int filter_time_hr = 24 + Float filter_disk_factor = 8.0 + + Int bam2ta_cpu = 2 + Float bam2ta_mem_factor = 0.35 + Int bam2ta_time_hr = 6 + Float bam2ta_disk_factor = 4.0 + + Float spr_mem_factor = 20.0 + Float spr_disk_factor = 30.0 + + Int jsd_cpu = 4 + Float jsd_mem_factor = 0.1 + Int jsd_time_hr = 6 + Float jsd_disk_factor = 2.0 + + Int xcor_cpu = 2 + Float xcor_mem_factor = 1.0 + Int xcor_time_hr = 24 + Float xcor_disk_factor = 4.5 + + Float subsample_ctl_mem_factor = 22.0 + Float subsample_ctl_disk_factor = 15.0 + + Float macs2_signal_track_mem_factor = 12.0 + Int macs2_signal_track_time_hr = 24 + Float macs2_signal_track_disk_factor = 80.0 + + Int call_peak_cpu = 6 + Float call_peak_spp_mem_factor = 5.0 + Float call_peak_macs2_mem_factor = 5.0 + Int call_peak_time_hr = 72 + Float call_peak_spp_disk_factor = 5.0 + Float call_peak_macs2_disk_factor = 30.0 + + String? align_trimmomatic_java_heap + String? filter_picard_java_heap + String? gc_bias_picard_java_heap + } + + parameter_meta { + docker: { + description: 'Default Docker image URI to run WDL tasks.', + group: 'runtime_environment', + example: 'ubuntu:20.04' + } + singularity: { + description: 'Default Singularity image URI to run WDL tasks. For Singularity users only.', + group: 'runtime_environment', + example: 'docker://ubuntu:20.04' + } + conda: { + description: 'Default Conda environment name to run WDL tasks. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip' + } + conda_macs2: { + description: 'Conda environment name for task macs2. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-macs2' + } + conda_spp: { + description: 'Conda environment name for tasks spp/xcor. For Conda users only.', + group: 'runtime_environment', + example: 'encd-chip-spp' + } + title: { + description: 'Experiment title.', + group: 'pipeline_metadata', + example: 'ENCSR936XTK (subsampled 1/50)' + } + description: { + description: 'Experiment description.', + group: 'pipeline_metadata', + example: 'ZNF143 ChIP-seq on human GM12878 (subsampled 1/50)' + } + genome_tsv: { + description: 'Reference genome database TSV.', + group: 'reference_genome', + help: 'This TSV files includes all genome specific parameters (e.g. reference FASTA, bowtie2 index). You can still invidiaully define any parameters in it. Parameters defined in input JSON will override those defined in genome TSV.', + example: 'https://storage.googleapis.com/encode-pipeline-genome-data/genome_tsv/v1/hg38_caper.tsv' + } + genome_name: { + description: 'Genome name.', + group: 'reference_genome' + } + ref_fa: { + description: 'Reference FASTA file.', + group: 'reference_genome' + } + bowtie2_idx_tar: { + description: 'BWA index TAR file.', + group: 'reference_genome' + } + custom_aligner_idx_tar: { + description: 'Index TAR file for a custom aligner. To use a custom aligner, define "chip.custom_align_py" too.', + group: 'reference_genome' + } + chrsz: { + description: '2-col chromosome sizes file.', + group: 'reference_genome' + } + blacklist: { + description: 'Blacklist file in BED format.', + group: 'reference_genome', + help: 'Peaks will be filtered with this file.' + } + blacklist2: { + description: 'Secondary blacklist file in BED format.', + group: 'reference_genome', + help: 'If it is defined, it will be merged with chip.blacklist. Peaks will be filtered with merged blacklist.' + } + mito_chr_name: { + description: 'Mitochondrial chromosome name.', + group: 'reference_genome', + help: 'e.g. chrM, MT. Mitochondrial reads defined here will be filtered out during filtering BAMs in "filter" task.' + } + regex_bfilt_peak_chr_name: { + description: 'Reg-ex for chromosomes to keep while filtering peaks.', + group: 'reference_genome', + help: 'Chromosomes defined here will be kept. All other chromosomes will be filtered out in .bfilt. peak file. This is done along with blacklist filtering peak file.' + } + gensz: { + description: 'Genome sizes. "hs" for human, "mm" for mouse or sum of 2nd columnin chromosome sizes file.', + group: 'reference_genome' + } + paired_end: { + description: 'Sequencing endedness.', + group: 'input_genomic_data', + help: 'Setting this on means that all replicates are paired ended. For mixed samples, use chip.paired_ends array instead.', + example: true + } + paired_ends: { + description: 'Sequencing endedness array (for mixed SE/PE datasets).', + group: 'input_genomic_data', + help: 'Whether each biological replicate is paired ended or not.' + } + fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of inputs (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Especially for FASTQs, we have individual variable for each biological replicate to allow FASTQs of technical replicates can be merged. Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep1_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 1.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep1-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R1.subsampled.50.fastq.gz' + ] + } + fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 2.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/rep2-R2.subsampled.50.fastq.gz' + ] + } + fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 3.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 4.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 5.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 6.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 7.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 8.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 9.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read2 FASTQs (chip.fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a biological replicate 10.', + group: 'input_genomic_data', + help: 'Make sure that they are consistent with read1 FASTQs (chip.fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + bams: { + description: 'List of unfiltered/raw BAM files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each biological replicate. e.g. [rep1.bam, rep2.bam, rep3.bam, ...].' + } + nodup_bams: { + description: 'List of filtered/deduped BAM files for each biological replicate', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each biological replicate. e.g. [rep1.nodup.bam, rep2.nodup.bam, rep3.nodup.bam, ...].' + } + tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each biological replicate. e.g. [rep1.tagAlign.gz, rep2.tagAlign.gz, ...].' + } + peaks: { + description: 'List of NARROWPEAK files (not blacklist filtered) for each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Each entry for each biological replicate. e.g. [rep1.narrowPeak.gz, rep2.narrowPeak.gz, ...]. Define other PEAK parameters (e.g. chip.peaks_pr1, chip.peak_pooled) according to your flag settings (e.g. chip.true_rep_only) and number of replicates. If you have more than one replicate then define chip.peak_pooled, chip.peak_ppr1 and chip.peak_ppr2. If chip.true_rep_only flag is on then do not define any parameters (chip.peaks_pr1, chip.peaks_pr2, chip.peak_ppr1 and chip.peak_ppr2) related to pseudo replicates.' + } + peaks_pr1: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 1 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peaks_pr2: { + description: 'List of NARROWPEAK files (not blacklist filtered) for pseudo-replicate 2 of each biological replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if chip.true_rep_only flag is off.' + } + peak_pooled: { + description: 'NARROWPEAK file for pooled true replicate.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates. Pooled true replicate means analysis on pooled biological replicates.' + } + peak_ppr1: { + description: 'NARROWPEAK file for pooled pseudo replicate 1.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 1st pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 1st pseudos.' + } + peak_ppr2: { + description: 'NARROWPEAK file for pooled pseudo replicate 2.', + group: 'input_genomic_data', + help: 'Define if you want to start pipeline from PEAK files. Define if you have multiple biological replicates and chip.true_rep_only flag is off. PPR1 means analysis on pooled 2nd pseudo replicates. Each biological replicate is shuf/split into two pseudos. This is a pooling of each replicate\'s 2nd pseudos.' + } + + ctl_paired_end: { + description: 'Sequencing endedness for all controls.', + group: 'input_genomic_data_control', + help: 'Setting this on means that all control replicates are paired ended. For mixed controls, use chip.ctl_paired_ends array instead.' + } + ctl_paired_ends: { + description: 'Sequencing endedness array for mixed SE/PE controls.', + group: 'input_genomic_data_control', + help: 'Whether each control replicate is paired ended or not.' + } + ctl_fastqs_rep1_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from FASTQs files. Pipeline can start from any type of controls (e.g. FASTQs, BAMs, ...). Choose one type and fill paramters for that type and leave other undefined. Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep1_R2).', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep1_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 1.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep1_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl1-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep2_R2). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R1.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep2_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 2.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep2_R1). These FASTQs are usually technical replicates to be merged.', + example: [ + 'https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR936XTK/fastq_subsampled/ctl2-R2.subsampled.80.fastq.gz' + ] + } + ctl_fastqs_rep3_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep3_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep3_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 3.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep3_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep4_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep4_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 4.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep4_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep5_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep5_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 5.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep5_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep6_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep6_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 6.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep6_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep7_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep7_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 7.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep7_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep8_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep8_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 8.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep8_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep9_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep9_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 9.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep9_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R1: { + description: 'Read1 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read2 FASTQs (chip.ctl_fastqs_rep10_R2). These FASTQs are usually technical replicates to be merged.' + } + ctl_fastqs_rep10_R2: { + description: 'Read2 FASTQs to be merged for a control replicate 10.', + group: 'input_genomic_data_control', + help: 'Make sure that they are consistent with read1 FASTQs (chip.ctl_fastqs_rep10_R1). These FASTQs are usually technical replicates to be merged.' + } + ctl_bams: { + description: 'List of unfiltered/raw BAM files for each control replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from BAM files. Unfiltered/raw BAM file generated from aligner (e.g. bowtie2). Each entry for each control replicate. e.g. [ctl1.bam, ctl2.bam, ctl3.bam, ...].' + } + ctl_nodup_bams: { + description: 'List of filtered/deduped BAM files for each control replicate', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from filtered BAM files. Filtered/deduped BAM file. Each entry for each control replicate. e.g. [ctl1.nodup.bam, ctl2.nodup.bam, ctl3.nodup.bam, ...].' + } + ctl_tas: { + description: 'List of TAG-ALIGN files for each biological replicate.', + group: 'input_genomic_data_control', + help: 'Define if you want to start pipeline from TAG-ALIGN files. TAG-ALIGN is in a 6-col BED format. It is a simplified version of BAM. Each entry for each control replicate. e.g. [ctl1.tagAlign.gz, ctl2.tagAlign.gz, ...].' + } + + pipeline_type: { + description: 'Pipeline type. tf for TF ChIP-Seq, histone for Histone ChIP-Seq or control for mapping controls only.', + group: 'pipeline_parameter', + help: 'Default peak caller is different for each type. spp For TF ChIP-Seq and macs2 for histone ChIP-Seq. Regardless of pipeline type, spp always requires controls but macs2 doesn\'t. For control mode, chip.align_only is automatically turned on and cross-correlation analysis is disabled. Do not define ctl_* for control mode. Define fastqs_repX_RY instead.', + choices: ['tf', 'histone', 'control'], + example: 'tf' + } + redact_nodup_bam: { + description: 'Redact filtered/nodup BAM.', + group: 'pipeline_parameter', + help: 'Redact filtered/nodup BAM at the end of the filtering step (task filter). Raw BAM from the aligner (task align) will still remain unredacted. Quality metrics on filtered BAM will be calculated before being redacted. However, all downstream analyses (e.g. peak-calling) will be done on the redacted BAM. If you start from nodup BAM then this flag will not be active.' + } + align_only: { + description: 'Align only mode.', + group: 'pipeline_parameter', + help: 'Reads will be aligned but there will be no peak-calling on them. It is turned on automatically if chip.pipeline_type is control.' + } + true_rep_only: { + description: 'Disables all analyses related to pseudo-replicates.', + group: 'pipeline_parameter', + help: 'Pipeline generates 2 pseudo-replicate from one biological replicate. This flag turns off all analyses related to pseudos (with prefix/suffix pr, ppr).' + } + enable_count_signal_track: { + description: 'Enables generation of count signal tracks.', + group: 'pipeline_parameter' + } + enable_jsd: { + description: 'Enables Jensen-Shannon Distance (JSD) plot generation.', + group: 'pipeline_parameter' + } + enable_gc_bias: { + description: 'Enables GC bias calculation.', + group: 'pipeline_parameter' + } + + aligner: { + description: 'Aligner. bowtie2, bwa or custom', + group: 'alignment', + help: 'It is bowtie2 by default. To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + choices: ['bowtie2', 'bwa', 'custom'], + example: 'bowtie2' + } + custom_align_py: { + description: 'Python script for a custom aligner.', + group: 'alignment', + help: 'There is a template included in the documentation for inputs. Defining this parameter will automatically change "chip.aligner" to "custom". You should also define "chip.custom_aligner_idx_tar".' + } + use_bwa_mem_for_pe: { + description: 'For paired end dataset with read length >= chip.bwa_mem_read_len_limit (default 70) bp, use bwa mem instead of bwa aln.', + group: 'alignment', + help: 'Use it only for paired end reads >= chip.bwa_mem_read_len_limit (default 70) bp. Otherwise keep using bwa aln.' + } + bwa_mem_read_len_limit: { + description: 'Read length limit for bwa mem (for PE FASTQs only).', + group: 'alignment', + help: 'If chip.use_bwa_mem_for_pe is activated and reads are shorter than this limit, then bwa aln will be used instead of bwa mem.' + } + use_bowtie2_local_mode: { + description: 'Use bowtie2\'s local mode (soft-clipping).', + group: 'alignment', + help: 'This will add --local to bowtie2 command line so that it will replace the default end-to-end mode.' + } + crop_length: { + description: 'Crop FASTQs\' reads longer than this length.', + group: 'alignment', + help: 'Also drop all reads shorter than chip.crop_length - chip.crop_length_tol.' + } + crop_length_tol: { + description: 'Tolerance for cropping reads in FASTQs.', + group: 'alignment', + help: 'Drop all reads shorter than chip.crop_length - chip.crop_length_tol. Activated only when chip.crop_length is defined.' + } + trimmomatic_phred_score_format: { + description: 'Base encoding (format) for Phred score in FASTQs.', + group: 'alignment', + choices: ['auto', 'phred33', 'phred64'], + help: 'This is used for Trimmomatic only. It is auto by default, which means that Trimmomatic automatically detect it from FASTQs. Otherwise -phred33 or -phred64 will be passed to the Trimmomatic command line. Use this if you see an error like "Error: Unable to detect quality encoding".' + } + xcor_trim_bp: { + description: 'Trim experiment read1 FASTQ (for both SE and PE) for cross-correlation analysis.', + group: 'alignment', + help: 'This does not affect alignment of experimental/control replicates. Pipeline additionaly aligns R1 FASTQ only for cross-correlation analysis only. This parameter is used for it.' + } + use_filt_pe_ta_for_xcor: { + description: 'Use filtered PE BAM for cross-correlation analysis.', + group: 'alignment', + help: 'If not defined, pipeline uses SE BAM generated from trimmed read1 FASTQ for cross-correlation analysis.' + } + dup_marker: { + description: 'Marker for duplicate reads. picard or sambamba.', + group: 'alignment', + help: 'picard for Picard MarkDuplicates or sambamba for sambamba markdup.', + choices: ['picard', 'sambamba'], + example: 'picard' + } + no_dup_removal: { + description: 'Disable removal of duplicate reads during filtering BAM.', + group: 'alignment', + help: 'Duplicate reads are filtererd out during filtering BAMs to gerenate NODUP_BAM. This flag will keep all duplicate reads in NODUP_BAM. This flag does not affect naming of NODUP_BAM. NODUP_BAM will still have .nodup. suffix in its filename.' + } + mapq_thresh: { + description: 'Threshold for low MAPQ reads removal.', + group: 'alignment', + help: 'Low MAPQ reads are filtered out while filtering BAM.' + } + filter_chrs: { + description: 'List of chromosomes to be filtered out while filtering BAM.', + group: 'alignment', + help: 'It is empty by default, hence no filtering out of specfic chromosomes. It is case-sensitive. Use exact word for chromosome names.' + } + subsample_reads: { + description: 'Subsample reads. Shuffle and subsample reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering experiment BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + ctl_subsample_reads: { + description: 'Subsample control reads. Shuffle and subsample control reads.', + group: 'alignment', + help: 'This affects all downstream analyses after filtering control BAM. (e.g. all TAG-ALIGN files, peak-calling). Reads will be shuffled only if actual number of reads in BAM exceeds this number. 0 means disabled.' + } + xcor_subsample_reads: { + description: 'Subsample reads for cross-corrlelation analysis only.', + group: 'alignment', + help: 'This does not affect downstream analyses after filtering BAM. It is for cross-correlation analysis only. 0 means disabled.' + } + xcor_exclusion_range_min: { + description: 'Exclusion minimum for cross-correlation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. Make sure that it is consistent with default strand shift -s=-500:5:1500 in run_spp.R.' + } + xcor_exclusion_range_max: { + description: 'Exclusion maximum for cross-coorrelation analysis.', + group: 'alignment', + help: 'For run_spp.R -s. If not defined default value of `max(read length + 10, 50)` for TF and `max(read_len + 10, 100)` for histone are used' + } + pseudoreplication_random_seed: { + description: 'Random seed (positive integer) used for pseudo-replication (shuffling reads in TAG-ALIGN and then split it into two).', + group: 'alignment', + help: 'Pseudo-replication (task spr) is done by using GNU "shuf --random-source=sha256(random_seed)". If this parameter == 0, then pipeline uses input TAG-ALIGN file\'s size (in bytes) for the random_seed.' + } + ctl_depth_limit: { + description: 'Hard limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than this hard limit, then such control is subsampled.' + } + exp_ctl_depth_ratio_limit: { + description: 'Second limit for chosen control\'s depth.', + group: 'peak_calling', + help: 'If control chosen by chip.always_use_pooled_ctl and chip.ctl_depth_ratio is deeper than experiment replicate\'s read depth multiplied by this factor then such control is subsampled down to maximum of multiplied value and hard limit chip.ctl_depth_limit.' + } + fraglen: { + description: 'Fragment length for each biological replicate.', + group: 'peak_calling', + help: 'Fragment length is estimated by cross-correlation analysis, which is valid only when pipeline started from FASTQs. If defined, fragment length estimated by cross-correlation analysis is ignored.' + } + peak_caller: { + description: 'Peak caller.', + group: 'peak_calling', + help: 'It is spp and macs2 by default for TF ChIP-seq and histone ChIP-seq, respectively. e.g. you can use macs2 for TF ChIP-Seq even though spp is by default for TF ChIP-Seq (chip.pipeline_type == tf).', + example: 'spp' + } + always_use_pooled_ctl: { + description: 'Always choose a pooled control for each experiment replicate.', + group: 'peak_calling', + help: 'If turned on, ignores chip.ctl_depth_ratio.' + } + ctl_depth_ratio: { + description: 'Maximum depth ratio between control replicates.', + group: 'peak_calling', + help: 'If ratio of depth between any two controls is higher than this, then always use a pooled control for all experiment replicates.' + } + + cap_num_peak: { + description: 'Upper limit on the number of peaks.', + group: 'peak_calling', + help: 'It is 30000000 and 50000000 by default for spp and macs2, respectively.' + } + pval_thresh: { + description: 'p-value Threshold for MACS2 peak caller.', + group: 'peak_calling', + help: 'macs2 callpeak -p' + } + fdr_thresh: { + description: 'FDR threshold for spp peak caller (phantompeakqualtools).', + group: 'peak_calling', + help: 'run_spp.R -fdr=' + } + idr_thresh: { + description: 'IDR threshold.', + group: 'peak_calling' + } + + align_cpu: { + description: 'Number of cores for task align.', + group: 'resource_parameter', + help: 'Task align merges/crops/maps FASTQs.' + } + align_bowtie2_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_bwa_mem_factor: { + description: 'Multiplication factor to determine memory required for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + align_time_hr: { + description: 'Walltime (h) required for task align.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + align_bowtie2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bowtie2 (default) as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + align_bwa_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task align with bwa as aligner.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of FASTQs to determine required disk size of instance on GCP/AWS.' + } + filter_cpu: { + description: 'Number of cores for task filter.', + group: 'resource_parameter', + help: 'Task filter filters raw/unfiltered BAM to get filtered/deduped BAM.' + } + filter_mem_factor: { + description: 'Multiplication factor to determine memory required for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + filter_time_hr: { + description: 'Walltime (h) required for task filter.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + filter_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task filter.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of BAMs to determine required disk size of instance on GCP/AWS.' + } + bam2ta_cpu: { + description: 'Number of cores for task bam2ta.', + group: 'resource_parameter', + help: 'Task bam2ta converts filtered/deduped BAM in to TAG-ALIGN (6-col BED) format.' + } + bam2ta_mem_factor: { + description: 'Multiplication factor to determine memory required for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + bam2ta_time_hr: { + description: 'Walltime (h) required for task bam2ta.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + bam2ta_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task bam2ta.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + spr_mem_factor: { + description: 'Multiplication factor to determine memory required for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + spr_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task spr.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + jsd_cpu: { + description: 'Number of cores for task jsd.', + group: 'resource_parameter', + help: 'Task jsd plots Jensen-Shannon distance and metrics related to it.' + } + jsd_mem_factor: { + description: 'Multiplication factor to determine memory required for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + jsd_time_hr: { + description: 'Walltime (h) required for task jsd.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + jsd_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task jsd.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of filtered BAMs to determine required disk size of instance on GCP/AWS.' + } + xcor_cpu: { + description: 'Number of cores for task xcor.', + group: 'resource_parameter', + help: 'Task xcor does cross-correlation analysis (including a plot) on subsampled TAG-ALIGNs.' + } + xcor_mem_factor: { + description: 'Multiplication factor to determine memory required for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + xcor_time_hr: { + description: 'Walltime (h) required for task xcor.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + xcor_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task xcor.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + subsample_ctl_mem_factor: { + description: 'Multiplication factor to determine memory required for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + subsample_ctl_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task subsample_ctl.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_cpu: { + description: 'Number of cores for task call_peak. IF MACS2 is chosen as peak_caller (or chip.pipeline_type is histone), then cpu will be fixed at 2.', + group: 'resource_parameter', + help: 'Task call_peak call peaks on TAG-ALIGNs by using SPP/MACS2 peak caller. MACS2 is single-threaded so cpu will be fixed at 2 for MACS2.' + } + call_peak_spp_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_macs2_mem_factor: { + description: 'Multiplication factor to determine memory required for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + call_peak_time_hr: { + description: 'Walltime (h) required for task call_peak.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + call_peak_spp_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with spp as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + call_peak_macs2_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task call_peak with macs2 as peak_caller.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + macs2_signal_track_mem_factor: { + description: 'Multiplication factor to determine memory required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required memory of instance (GCP/AWS) or job (HPCs).' + } + macs2_signal_track_time_hr: { + description: 'Walltime (h) required for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This is for HPCs only. e.g. SLURM, SGE, ...' + } + macs2_signal_track_disk_factor: { + description: 'Multiplication factor to determine persistent disk size for task macs2_signal_track.', + group: 'resource_parameter', + help: 'This factor will be multiplied to the size of TAG-ALIGNs (BEDs) to determine required disk size of instance on GCP/AWS.' + } + align_trimmomatic_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task align.', + group: 'resource_parameter', + help: 'Maximum memory for Trimmomatic. If not defined, 90% of align task\'s memory will be used.' + } + filter_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task filter.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools MarkDuplicates. If not defined, 90% of filter task\'s memory will be used.' + } + gc_bias_picard_java_heap: { + description: 'Maximum Java heap (java -Xmx) in task gc_bias.', + group: 'resource_parameter', + help: 'Maximum memory for Picard tools CollectGcBiasMetrics. If not defined, 90% of gc_bias task\'s memory will be used.' + } + } + RuntimeEnvironment runtime_environment = { + 'docker': docker, 'singularity': singularity, 'conda': conda + } + RuntimeEnvironment runtime_environment_spp = { + 'docker': docker, 'singularity': singularity, 'conda': conda_spp + } + RuntimeEnvironment runtime_environment_macs2 = { + 'docker': docker, 'singularity': singularity, 'conda': conda_macs2 + } + + # read genome data and paths + if ( defined(genome_tsv) ) { + call read_genome_tsv { input: + genome_tsv = genome_tsv, + runtime_environment = runtime_environment + } + } + File ref_fa_ = select_first([ref_fa, read_genome_tsv.ref_fa]) + File? bwa_idx_tar_ = if defined(bwa_idx_tar) then bwa_idx_tar + else read_genome_tsv.bwa_idx_tar + File bowtie2_idx_tar_ = select_first([bowtie2_idx_tar, read_genome_tsv.bowtie2_idx_tar]) + File chrsz_ = select_first([chrsz, read_genome_tsv.chrsz]) + String gensz_ = select_first([gensz, read_genome_tsv.gensz]) + File? blacklist1_ = if defined(blacklist) then blacklist + else read_genome_tsv.blacklist + File? blacklist2_ = if defined(blacklist2) then blacklist2 + else read_genome_tsv.blacklist2 + # merge multiple blacklists + # two blacklists can have different number of columns (3 vs 6) + # so we limit merged blacklist's columns to 3 + Array[File] blacklists = select_all([blacklist1_, blacklist2_]) + if ( length(blacklists) > 1 ) { + call pool_ta as pool_blacklist { input: + tas = blacklists, + col = 3, + runtime_environment = runtime_environment + } + } + File? blacklist_ = if length(blacklists) > 1 then pool_blacklist.ta_pooled + else if length(blacklists) > 0 then blacklists[0] + else blacklist2_ + String mito_chr_name_ = select_first([mito_chr_name, read_genome_tsv.mito_chr_name]) + String regex_bfilt_peak_chr_name_ = select_first([regex_bfilt_peak_chr_name, read_genome_tsv.regex_bfilt_peak_chr_name]) + String genome_name_ = select_first([genome_name, read_genome_tsv.genome_name, basename(chrsz_)]) + + ### temp vars (do not define these) + String aligner_ = if defined(custom_align_py) then 'custom' else aligner + String peak_caller_ = if pipeline_type=='tf' then select_first([peak_caller, 'spp']) + else select_first([peak_caller, 'macs2']) + String peak_type_ = if peak_caller_=='spp' then 'regionPeak' + else 'narrowPeak' + Boolean enable_idr = pipeline_type=='tf' # enable_idr for TF chipseq only + String idr_rank_ = if peak_caller_=='spp' then 'signal.value' + else if peak_caller_=='macs2' then 'p.value' + else 'p.value' + Int cap_num_peak_spp = 300000 + Int cap_num_peak_macs2 = 500000 + Int cap_num_peak_ = if peak_caller_ == 'spp' then select_first([cap_num_peak, cap_num_peak_spp]) + else select_first([cap_num_peak, cap_num_peak_macs2]) + Int mapq_thresh_ = mapq_thresh + Boolean enable_xcor_ = if pipeline_type=='control' then false else true + Boolean enable_count_signal_track_ = if pipeline_type=='control' then false else enable_count_signal_track + Boolean enable_jsd_ = if pipeline_type=='control' then false else enable_jsd + Boolean enable_gc_bias_ = if pipeline_type=='control' then false else enable_gc_bias + Boolean align_only_ = if pipeline_type=='control' then true else align_only + + Float align_mem_factor_ = if aligner_ =='bowtie2' then align_bowtie2_mem_factor + else align_bwa_mem_factor + Float align_disk_factor_ = if aligner_ =='bowtie2' then align_bowtie2_disk_factor + else align_bwa_disk_factor + Float call_peak_mem_factor_ = if peak_caller_ =='spp' then call_peak_spp_mem_factor + else call_peak_macs2_mem_factor + Float call_peak_disk_factor_ = if peak_caller_ =='spp' then call_peak_spp_disk_factor + else call_peak_macs2_disk_factor + + # temporary 2-dim fastqs array [rep_id][merge_id] + Array[Array[File]] fastqs_R1 = + if length(fastqs_rep10_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1, fastqs_rep10_R1] + else if length(fastqs_rep9_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1, fastqs_rep9_R1] + else if length(fastqs_rep8_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1, fastqs_rep8_R1] + else if length(fastqs_rep7_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1, fastqs_rep7_R1] + else if length(fastqs_rep6_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1, + fastqs_rep6_R1] + else if length(fastqs_rep5_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1, fastqs_rep5_R1] + else if length(fastqs_rep4_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1, fastqs_rep4_R1] + else if length(fastqs_rep3_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1, fastqs_rep3_R1] + else if length(fastqs_rep2_R1)>0 then + [fastqs_rep1_R1, fastqs_rep2_R1] + else if length(fastqs_rep1_R1)>0 then + [fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] fastqs_R2 = + [fastqs_rep1_R2, fastqs_rep2_R2, fastqs_rep3_R2, fastqs_rep4_R2, fastqs_rep5_R2, + fastqs_rep6_R2, fastqs_rep7_R2, fastqs_rep8_R2, fastqs_rep9_R2, fastqs_rep10_R2] + + # temporary 2-dim ctl fastqs array [rep_id][merge_id] + Array[Array[File]] ctl_fastqs_R1 = + if length(ctl_fastqs_rep10_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1, ctl_fastqs_rep10_R1] + else if length(ctl_fastqs_rep9_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1, ctl_fastqs_rep9_R1] + else if length(ctl_fastqs_rep8_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1, ctl_fastqs_rep8_R1] + else if length(ctl_fastqs_rep7_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1, ctl_fastqs_rep7_R1] + else if length(ctl_fastqs_rep6_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1, + ctl_fastqs_rep6_R1] + else if length(ctl_fastqs_rep5_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1, ctl_fastqs_rep5_R1] + else if length(ctl_fastqs_rep4_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1, ctl_fastqs_rep4_R1] + else if length(ctl_fastqs_rep3_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1, ctl_fastqs_rep3_R1] + else if length(ctl_fastqs_rep2_R1)>0 then + [ctl_fastqs_rep1_R1, ctl_fastqs_rep2_R1] + else if length(ctl_fastqs_rep1_R1)>0 then + [ctl_fastqs_rep1_R1] + else [] + # no need to do that for R2 (R1 array will be used to determine presense of fastq for each rep) + Array[Array[File]] ctl_fastqs_R2 = + [ctl_fastqs_rep1_R2, ctl_fastqs_rep2_R2, ctl_fastqs_rep3_R2, ctl_fastqs_rep4_R2, ctl_fastqs_rep5_R2, + ctl_fastqs_rep6_R2, ctl_fastqs_rep7_R2, ctl_fastqs_rep8_R2, ctl_fastqs_rep9_R2, ctl_fastqs_rep10_R2] + + # temporary variables to get number of replicates + # WDLic implementation of max(A,B,C,...) + Int num_rep_fastq = length(fastqs_R1) + Int num_rep_bam = if length(bams) 0 || num_ctl_fastq > 0) && aligner_ != 'bwa' && aligner_ != 'bowtie2' && aligner_ != 'custom' ) { + call raise_exception as error_wrong_aligner { input: + msg = 'Choose chip.aligner to align your fastqs. Choices: bwa, bowtie2, custom.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bwa' && use_bwa_mem_for_pe ) { + call raise_exception as error_use_bwa_mem_for_non_bwa { input: + msg = 'To use chip.use_bwa_mem_for_pe, choose bwa for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ != 'bowtie2' && use_bowtie2_local_mode ) { + call raise_exception as error_use_bowtie2_local_mode_for_non_bowtie2 { input: + msg = 'To use chip.use_bowtie2_local_mode, choose bowtie2 for chip.aligner.', + runtime_environment = runtime_environment + } + } + if ( aligner_ == 'custom' && ( !defined(custom_align_py) || !defined(custom_aligner_idx_tar) ) ) { + call raise_exception as error_custom_aligner { input: + msg = 'To use a custom aligner, define chip.custom_align_py and chip.custom_aligner_idx_tar.', + runtime_environment = runtime_environment + } + } + + if ( ( ctl_depth_limit > 0 || exp_ctl_depth_ratio_limit > 0 ) && num_ctl > 1 && length(ctl_paired_ends) > 1 ) { + call raise_exception as error_subsample_pooled_control_with_mixed_endedness { input: + msg = 'Cannot use automatic control subsampling ("chip.ctl_depth_limit">0 and "chip.exp_ctl_depth_limit">0) for ' + + 'multiple controls with mixed endedness (e.g. SE ctl-rep1 and PE ctl-rep2). ' + + 'Automatic control subsampling is enabled by default. ' + + 'Disable automatic control subsampling by explicitly defining the above two parameters as 0 in your input JSON file. ' + + 'You can still use manual control subsamping ("chip.ctl_subsample_reads">0) since it is done ' + + 'for individual control\'s TAG-ALIGN output according to each control\'s endedness. ', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_ctl > 0 ) { + call raise_exception as error_ctl_input_defined_in_control_mode { input: + msg = 'In control mode (chip.pipeline_type: control), do not define ctl_* input variables. Define fastqs_repX_RY instead.', + runtime_environment = runtime_environment + } + } + if ( pipeline_type == 'control' && num_rep_fastq == 0 ) { + call raise_exception as error_ctl_fastq_input_required_for_control_mode { input: + msg = 'Control mode (chip.pipeline_type: control) is for FASTQs only. Define FASTQs in fastqs_repX_RY. Pipeline will recognize them as control FASTQs.', + runtime_environment = runtime_environment + } + } + + # align each replicate + scatter(i in range(num_rep)) { + # to override endedness definition for individual replicate + # paired_end will override paired_ends[i] + Boolean paired_end_ = if !defined(paired_end) && i0 + Boolean has_output_of_align = i0 + Boolean has_output_of_align_ctl = i1 ) { + # pool tagaligns from true replicates + call pool_ta { input : + tas = ta_, + prefix = 'rep', + runtime_environment = runtime_environment + } + } + + # if there are pr1 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr1 = length(select_all(spr.ta_pr1))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 1 + call pool_ta as pool_ta_pr1 { input : + tas = spr.ta_pr1, + prefix = 'rep-pr1', + runtime_environment = runtime_environment + } + } + + # if there are pr2 TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_pr2 = length(select_all(spr.ta_pr2))==num_rep + if ( has_all_inputs_of_pool_ta_pr1 && num_rep>1 && !align_only_ && !true_rep_only ) { + # pool tagaligns from pseudo replicate 2 + call pool_ta as pool_ta_pr2 { input : + tas = spr.ta_pr2, + prefix = 'rep-pr2', + runtime_environment = runtime_environment + } + } + + # if there are CTL TAs for ALL replicates then pool them + Boolean has_all_inputs_of_pool_ta_ctl = length(select_all(ctl_ta_))==num_ctl + if ( has_all_inputs_of_pool_ta_ctl && num_ctl>1 ) { + # pool tagaligns from true replicates + call pool_ta as pool_ta_ctl { input : + tas = ctl_ta_, + prefix = 'ctl', + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_count_signal_track_pooled = defined(pool_ta.ta_pooled) + if ( has_input_of_count_signal_track_pooled && enable_count_signal_track_ && num_rep>1 ) { + call count_signal_track as count_signal_track_pooled { input : + ta = pool_ta.ta_pooled, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + Boolean has_input_of_jsd = defined(blacklist_) && length(select_all(nodup_bam_))==num_rep + if ( has_input_of_jsd && num_rep > 0 && enable_jsd_ ) { + # fingerprint and JS-distance plot + call jsd { input : + nodup_bams = nodup_bam_, + ctl_bams = ctl_nodup_bam_, # use first control only + blacklist = blacklist_, + mapq_thresh = mapq_thresh_, + + cpu = jsd_cpu, + mem_factor = jsd_mem_factor, + time_hr = jsd_time_hr, + disk_factor = jsd_disk_factor, + runtime_environment = runtime_environment + } + } + + Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep + && length(select_all(ctl_ta_))==num_ctl && num_ctl > 0 + if ( has_all_input_of_choose_ctl && !align_only_ ) { + # choose appropriate control for each exp IP replicate + # outputs: + # choose_ctl.idx : control replicate index for each exp replicate + # -1 means pooled ctl replicate + call choose_ctl { input: + tas = ta_, + ctl_tas = ctl_ta_, + ta_pooled = pool_ta.ta_pooled, + ctl_ta_pooled = pool_ta_ctl.ta_pooled, + always_use_pooled_ctl = always_use_pooled_ctl, + ctl_depth_ratio = ctl_depth_ratio, + ctl_depth_limit = ctl_depth_limit, + exp_ctl_depth_ratio_limit = exp_ctl_depth_ratio_limit, + runtime_environment = runtime_environment + } + } + + scatter(i in range(num_rep)) { + # make control ta array [[1,2,3,4]] -> [[1],[2],[3],[4]] + # chosen_ctl_ta_id + # >=0: control TA index (this means that control TA with this index exists) + # -1: use pooled control + # -2: there is no control + Int chosen_ctl_ta_id = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_ids])[i] else -2 + Int chosen_ctl_ta_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample])[i] else 0 + Boolean chosen_ctl_paired_end = if chosen_ctl_ta_id == -2 then false + else if chosen_ctl_ta_id == -1 then ctl_paired_end_[0] + else ctl_paired_end_[chosen_ctl_ta_id] + + if ( chosen_ctl_ta_id > -2 && chosen_ctl_ta_subsample > 0 ) { + call subsample_ctl { input: + ta = if chosen_ctl_ta_id == -1 then pool_ta_ctl.ta_pooled + else ctl_ta_[ chosen_ctl_ta_id ], + subsample = chosen_ctl_ta_subsample, + paired_end = chosen_ctl_paired_end, + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + Array[File] chosen_ctl_tas = if chosen_ctl_ta_id <= -2 then [] + else if chosen_ctl_ta_subsample > 0 then [ select_first([subsample_ctl.ta_subsampled]) ] + else if chosen_ctl_ta_id == -1 then [ select_first([pool_ta_ctl.ta_pooled]) ] + else [ select_first([ctl_ta_[ chosen_ctl_ta_id ]]) ] + } + Int chosen_ctl_ta_pooled_subsample = if has_all_input_of_choose_ctl && !align_only_ then + select_first([choose_ctl.chosen_ctl_ta_subsample_pooled]) else 0 + + # workaround for dx error (Unsupported combination: womType: Int womValue: ([225], Array[Int])) + Array[Int] fraglen_tmp = select_all(fraglen_) + + # we have all tas and ctl_tas (optional for histone chipseq) ready, let's call peaks + scatter(i in range(num_rep)) { + Boolean has_input_of_call_peak = defined(ta_[i]) + Boolean has_output_of_call_peak = i 1 ) { + # rounded mean of fragment length, which will be used for + # 1) calling peaks for pooled true/pseudo replicates + # 2) calculating FRiP + call rounded_mean as fraglen_mean { input : + ints = fraglen_tmp, + runtime_environment = runtime_environment + } + # } + + if ( has_all_input_of_choose_ctl && !align_only_ && chosen_ctl_ta_pooled_subsample > 0 ) { + call subsample_ctl as subsample_ctl_pooled { input: + ta = if num_ctl < 2 then ctl_ta_[0] + else pool_ta_ctl.ta_pooled, + subsample = chosen_ctl_ta_pooled_subsample, + paired_end = ctl_paired_end_[0], + mem_factor = subsample_ctl_mem_factor, + disk_factor = subsample_ctl_disk_factor, + runtime_environment = runtime_environment + } + } + # actually not an array + Array[File?] chosen_ctl_ta_pooled = if !has_all_input_of_choose_ctl || align_only_ then [] + else if chosen_ctl_ta_pooled_subsample > 0 then [ subsample_ctl_pooled.ta_subsampled ] + else if num_ctl < 2 then [ ctl_ta_[0] ] + else [ pool_ta_ctl.ta_pooled ] + + Boolean has_input_of_call_peak_pooled = defined(pool_ta.ta_pooled) + Boolean has_output_of_call_peak_pooled = defined(peak_pooled) + if ( has_input_of_call_peak_pooled && !has_output_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + # call peaks on pooled replicate + # always call peaks for pooled replicate to get signal tracks + call call_peak as call_peak_pooled { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_pooled_ = if has_output_of_call_peak_pooled then peak_pooled + else call_peak_pooled.peak + + # macs2 signal track for pooled rep + if ( has_input_of_call_peak_pooled && !align_only_ && num_rep>1 ) { + call macs2_signal_track as macs2_signal_track_pooled { input : + tas = flatten([select_all([pool_ta.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + pval_thresh = pval_thresh, + fraglen = fraglen_mean.rounded_mean, + + mem_factor = macs2_signal_track_mem_factor, + disk_factor = macs2_signal_track_disk_factor, + time_hr = macs2_signal_track_time_hr, + runtime_environment = runtime_environment_macs2 + } + } + + Boolean has_input_of_call_peak_ppr1 = defined(pool_ta_pr1.ta_pooled) + Boolean has_output_of_call_peak_ppr1 = defined(peak_ppr1) + if ( has_input_of_call_peak_ppr1 && !has_output_of_call_peak_ppr1 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 1st pooled pseudo replicates + call call_peak as call_peak_ppr1 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr1.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr1_ = if has_output_of_call_peak_ppr1 then peak_ppr1 + else call_peak_ppr1.peak + + Boolean has_input_of_call_peak_ppr2 = defined(pool_ta_pr2.ta_pooled) + Boolean has_output_of_call_peak_ppr2 = defined(peak_ppr2) + if ( has_input_of_call_peak_ppr2 && !has_output_of_call_peak_ppr2 && !align_only_ && !true_rep_only && num_rep>1 ) { + # call peaks on 2nd pooled pseudo replicates + call call_peak as call_peak_ppr2 { input : + peak_caller = peak_caller_, + peak_type = peak_type_, + tas = flatten([select_all([pool_ta_pr2.ta_pooled]), chosen_ctl_ta_pooled]), + gensz = gensz_, + chrsz = chrsz_, + cap_num_peak = cap_num_peak_, + pval_thresh = pval_thresh, + fdr_thresh = fdr_thresh, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + + cpu = call_peak_cpu, + mem_factor = call_peak_mem_factor_, + disk_factor = call_peak_disk_factor_, + time_hr = call_peak_time_hr, + runtime_environment = if peak_caller_ == 'spp' then runtime_environment_spp + else if peak_caller_ == 'macs2' then runtime_environment_macs2 + else runtime_environment + } + } + File? peak_ppr2_ = if has_output_of_call_peak_ppr2 then peak_ppr2 + else call_peak_ppr2.peak + + # do IDR/overlap on all pairs of two replicates (i,j) + # where i and j are zero-based indices and 0 <= i < j < num_rep + scatter( pair in cross(range(num_rep),range(num_rep)) ) { + # pair.left = 0-based index of 1st replicate + # pair.right = 0-based index of 2nd replicate + File? peak1_ = peak_[pair.left] + File? peak2_ = peak_[pair.right] + if ( !align_only_ && pair.left 1 ) { + # Naive overlap on pooled pseudo replicates + call overlap as overlap_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 1 && enable_idr ) { + # IDR on pooled pseduo replicates + call idr as idr_ppr { input : + prefix = 'pooled-pr1_vs_pooled-pr2', + peak1 = peak_ppr1_, + peak2 = peak_ppr2_, + peak_pooled = peak_pooled_, + idr_thresh = idr_thresh, + peak_type = peak_type_, + fraglen = fraglen_mean.rounded_mean, + rank = idr_rank_, + blacklist = blacklist_, + chrsz = chrsz_, + regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_, + ta = pool_ta.ta_pooled, + runtime_environment = runtime_environment + } + } + + # reproducibility QC for overlap/IDR peaks + if ( !align_only_ && !true_rep_only && num_rep > 0 ) { + # reproducibility QC for overlapping peaks + call reproducibility as reproducibility_overlap { input : + prefix = 'overlap', + peaks = select_all(overlap.bfilt_overlap_peak), + peaks_pr = if defined(overlap_pr.bfilt_overlap_peak) then select_first([overlap_pr.bfilt_overlap_peak]) else [], + peak_ppr = overlap_ppr.bfilt_overlap_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + if ( !align_only_ && !true_rep_only && num_rep > 0 && enable_idr ) { + # reproducibility QC for IDR peaks + call reproducibility as reproducibility_idr { input : + prefix = 'idr', + peaks = select_all(idr.bfilt_idr_peak), + peaks_pr = if defined(idr_pr.bfilt_idr_peak) then select_first([idr_pr.bfilt_idr_peak]) else [], + peak_ppr = idr_ppr.bfilt_idr_peak, + peak_type = peak_type_, + chrsz = chrsz_, + runtime_environment = runtime_environment + } + } + + # Generate final QC report and JSON + call qc_report { input : + pipeline_ver = pipeline_ver, + title = title, + description = description, + genome = genome_name_, + paired_ends = paired_end_, + ctl_paired_ends = ctl_paired_end_, + pipeline_type = pipeline_type, + aligner = aligner_, + no_dup_removal = no_dup_removal, + peak_caller = peak_caller_, + cap_num_peak = cap_num_peak_, + idr_thresh = idr_thresh, + pval_thresh = pval_thresh, + xcor_trim_bp = xcor_trim_bp, + xcor_subsample_reads = xcor_subsample_reads, + + samstat_qcs = select_all(align.samstat_qc), + nodup_samstat_qcs = select_all(filter.samstat_qc), + dup_qcs = select_all(filter.dup_qc), + lib_complexity_qcs = select_all(filter.lib_complexity_qc), + xcor_plots = select_all(xcor.plot_png), + xcor_scores = select_all(xcor.score), + + ctl_samstat_qcs = select_all(align_ctl.samstat_qc), + ctl_nodup_samstat_qcs = select_all(filter_ctl.samstat_qc), + ctl_dup_qcs = select_all(filter_ctl.dup_qc), + ctl_lib_complexity_qcs = select_all(filter_ctl.lib_complexity_qc), + + jsd_plot = jsd.plot, + jsd_qcs = if defined(jsd.jsd_qcs) then select_first([jsd.jsd_qcs]) else [], + + frip_qcs = select_all(call_peak.frip_qc), + frip_qcs_pr1 = select_all(call_peak_pr1.frip_qc), + frip_qcs_pr2 = select_all(call_peak_pr2.frip_qc), + frip_qc_pooled = call_peak_pooled.frip_qc, + frip_qc_ppr1 = call_peak_ppr1.frip_qc, + frip_qc_ppr2 = call_peak_ppr2.frip_qc, + + idr_plots = select_all(idr.idr_plot), + idr_plots_pr = if defined(idr_pr.idr_plot) then select_first([idr_pr.idr_plot]) else [], + idr_plot_ppr = idr_ppr.idr_plot, + frip_idr_qcs = select_all(idr.frip_qc), + frip_idr_qcs_pr = if defined(idr_pr.frip_qc) then select_first([idr_pr.frip_qc]) else [], + frip_idr_qc_ppr = idr_ppr.frip_qc, + frip_overlap_qcs = select_all(overlap.frip_qc), + frip_overlap_qcs_pr = if defined(overlap_pr.frip_qc) then select_first([overlap_pr.frip_qc]) else [], + frip_overlap_qc_ppr = overlap_ppr.frip_qc, + idr_reproducibility_qc = reproducibility_idr.reproducibility_qc, + overlap_reproducibility_qc = reproducibility_overlap.reproducibility_qc, + + gc_plots = select_all(gc_bias.gc_plot), + + peak_region_size_qcs = select_all(call_peak.peak_region_size_qc), + peak_region_size_plots = select_all(call_peak.peak_region_size_plot), + num_peak_qcs = select_all(call_peak.num_peak_qc), + + idr_opt_peak_region_size_qc = reproducibility_idr.peak_region_size_qc, + idr_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + idr_opt_num_peak_qc = reproducibility_idr.num_peak_qc, + + overlap_opt_peak_region_size_qc = reproducibility_overlap.peak_region_size_qc, + overlap_opt_peak_region_size_plot = reproducibility_overlap.peak_region_size_plot, + overlap_opt_num_peak_qc = reproducibility_overlap.num_peak_qc, + + runtime_environment = runtime_environment + } + + output { + File report = qc_report.report + File qc_json = qc_report.qc_json + Boolean qc_json_ref_match = qc_report.qc_json_ref_match + } +} + +task align { + input { + Array[File] fastqs_R1 # [merge_id] + Array[File] fastqs_R2 + File? ref_fa + Int? trim_bp # this is for R1 only + Int crop_length + Int crop_length_tol + String? trimmomatic_phred_score_format + + String aligner + + String mito_chr_name + Int? multimapping + File? custom_align_py + File? idx_tar # reference index tar + Boolean paired_end + Boolean use_bwa_mem_for_pe + Int bwa_mem_read_len_limit + Boolean use_bowtie2_local_mode + + String? trimmomatic_java_heap + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(fastqs_R1, "G") + size(fastqs_R2, "G") + Float mem_gb = 5.0 + size(idx_tar, "G") + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(40.0 + disk_factor * input_file_size_gb) + + Float trimmomatic_java_heap_factor = 0.9 + Array[Array[File]] tmp_fastqs = if paired_end then transpose([fastqs_R1, fastqs_R2]) + else transpose([fastqs_R1]) + command { + set -e + + # check if pipeline dependencies can be found + if [[ -z "$(which encode_task_merge_fastq.py 2> /dev/null || true)" ]] + then + echo -e "\n* Error: pipeline environment (docker, singularity or conda) not found." 1>&2 + exit 3 + fi + python3 $(which encode_task_merge_fastq.py) \ + ${write_tsv(tmp_fastqs)} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--nth ' + cpu} + + if [ -z '${trim_bp}' ]; then + SUFFIX= + else + SUFFIX=_trimmed + python3 $(which encode_task_trim_fastq.py) \ + R1/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R1$SUFFIX + if [ '${paired_end}' == 'true' ]; then + python3 $(which encode_task_trim_fastq.py) \ + R2/*.fastq.gz \ + --trim-bp ${trim_bp} \ + --out-dir R2$SUFFIX + fi + fi + if [ '${crop_length}' == '0' ]; then + SUFFIX=$SUFFIX + else + NEW_SUFFIX="$SUFFIX"_cropped + python3 $(which encode_task_trimmomatic.py) \ + --fastq1 R1$SUFFIX/*.fastq.gz \ + ${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + --crop-length ${crop_length} \ + --crop-length-tol "${crop_length_tol}" \ + ${'--phred-score-format ' + trimmomatic_phred_score_format } \ + --out-dir-R1 R1$NEW_SUFFIX \ + ${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \ + ${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (round(mem_gb * trimmomatic_java_heap_factor) + 'G')} \ + ${'--nth ' + cpu} + SUFFIX=$NEW_SUFFIX + fi + + if [ '${aligner}' == 'bwa' ]; then + python3 $(which encode_task_bwa.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bwa_mem_for_pe then '--use-bwa-mem-for-pe' else ''} \ + ${'--bwa-mem-read-len-limit ' + bwa_mem_read_len_limit} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + + elif [ '${aligner}' == 'bowtie2' ]; then + python3 $(which encode_task_bowtie2.py) \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${'--multimapping ' + multimapping} \ + ${if paired_end then '--paired-end' else ''} \ + ${if use_bowtie2_local_mode then '--local' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + else + python3 ${custom_align_py} \ + ${idx_tar} \ + R1$SUFFIX/*.fastq.gz \ + ${if paired_end then 'R2$SUFFIX/*.fastq.gz' else ''} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_align.py) \ + R1$SUFFIX/*.fastq.gz $(ls *.bam) \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + rm -rf R1 R2 R1$SUFFIX R2$SUFFIX + } + output { + File bam = glob('*.bam')[0] + File bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File read_len_log = glob('*.read_length.txt')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task filter { + input { + File? bam + Boolean paired_end + File? ref_fa + Boolean redact_nodup_bam + String dup_marker # picard.jar MarkDuplicates (picard) or + # sambamba markdup (sambamba) + Int mapq_thresh # threshold for low MAPQ reads removal + Array[String] filter_chrs # chrs to be removed from final (nodup/filt) BAM + File chrsz # 2-col chromosome sizes file + Boolean no_dup_removal # no dupe reads removal when filtering BAM + String mito_chr_name + + Int cpu + Float mem_factor + String? picard_java_heap + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float picard_java_heap_factor = 0.9 + Float mem_gb = 6.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_filter.py) \ + ${bam} \ + ${if paired_end then '--paired-end' else ''} \ + --multimapping 0 \ + ${'--dup-marker ' + dup_marker} \ + ${'--mapq-thresh ' + mapq_thresh} \ + --filter-chrs ${sep=' ' filter_chrs} \ + ${'--chrsz ' + chrsz} \ + ${if no_dup_removal then '--no-dup-removal' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + + if [ '${redact_nodup_bam}' == 'true' ]; then + python3 $(which encode_task_bam_to_pbam.py) \ + $(ls *.bam) \ + ${'--ref-fa ' + ref_fa} \ + '--delete-original-bam' + fi + } + output { + File nodup_bam = glob('*.bam')[0] + File nodup_bai = glob('*.bai')[0] + File samstat_qc = glob('*.samstats.qc')[0] + File dup_qc = glob('*.dup.qc')[0] + File lib_complexity_qc = glob('*.lib_complexity.qc')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task bam2ta { + input { + File? bam + Boolean paired_end + String mito_chr_name # mito chromosome name + Int subsample # number of reads to subsample TAGALIGN + # this affects all downstream analysis + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float samtools_mem_gb = 0.8 * mem_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_bam2ta.py) \ + ${bam} \ + --disable-tn5-shift \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--mem-gb ' + samtools_mem_gb} \ + ${'--nth ' + cpu} + } + output { + File ta = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task spr { + input { + File? ta + Boolean paired_end + Int pseudoreplication_random_seed + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_spr.py) \ + ${ta} \ + ${'--pseudoreplication-random-seed ' + pseudoreplication_random_seed} \ + ${if paired_end then '--paired-end' else ''} + } + output { + File ta_pr1 = glob('*.pr1.tagAlign.gz')[0] + File ta_pr2 = glob('*.pr2.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task pool_ta { + input { + Array[File?] tas + Int? col # number of columns in pooled TA + String? prefix # basename prefix + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_pool_ta.py) \ + ${sep=' ' select_all(tas)} \ + ${'--prefix ' + prefix} \ + ${'--col ' + col} + } + output { + File ta_pooled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '8 GB' + time : 4 + disks : 'local-disk 100 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task xcor { + input { + File? ta + Boolean paired_end + String mito_chr_name + Int subsample # number of reads to subsample TAGALIGN + # this will be used for xcor only + # will not affect any downstream analysis + String? chip_seq_type + Int? exclusion_range_min + Int? exclusion_range_max + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 8.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_xcor.py) \ + ${ta} \ + ${if paired_end then '--paired-end' else ''} \ + ${'--mito-chr-name ' + mito_chr_name} \ + ${'--subsample ' + subsample} \ + ${'--chip-seq-type ' + chip_seq_type} \ + ${'--exclusion-range-min ' + exclusion_range_min} \ + ${'--exclusion-range-max ' + exclusion_range_max} \ + ${'--subsample ' + subsample} \ + ${'--nth ' + cpu} + } + output { + File plot_pdf = glob('*.cc.plot.pdf')[0] + File plot_png = glob('*.cc.plot.png')[0] + File score = glob('*.cc.qc')[0] + File fraglen_log = glob('*.cc.fraglen.txt')[0] + Int fraglen = read_int(fraglen_log) + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task jsd { + input { + Array[File?] nodup_bams + Array[File?] ctl_bams + File? blacklist + Int mapq_thresh + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(nodup_bams, "G") + size(ctl_bams, "G") + Float mem_gb = 5.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_jsd.py) \ + ${sep=' ' select_all(nodup_bams)} \ + ${if length(ctl_bams)>0 then '--ctl-bam '+ select_first(ctl_bams) else ''} \ + ${'--mapq-thresh '+ mapq_thresh} \ + ${'--blacklist '+ blacklist} \ + ${'--nth ' + cpu} + } + output { + File plot = glob('*.png')[0] + Array[File] jsd_qcs = glob('*.jsd.qc') + } + runtime { + cpu : cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task choose_ctl { + input { + Array[File?] tas + Array[File?] ctl_tas + File? ta_pooled + File? ctl_ta_pooled + Boolean always_use_pooled_ctl # always use pooled control for all exp rep. + Float ctl_depth_ratio # if ratio between controls is higher than this + # then always use pooled control for all exp rep. + Int ctl_depth_limit + Float exp_ctl_depth_ratio_limit + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_choose_ctl.py) \ + --tas ${sep=' ' select_all(tas)} \ + --ctl-tas ${sep=' ' select_all(ctl_tas)} \ + ${'--ta-pooled ' + ta_pooled} \ + ${'--ctl-ta-pooled ' + ctl_ta_pooled} \ + ${if always_use_pooled_ctl then '--always-use-pooled-ctl' else ''} \ + ${'--ctl-depth-ratio ' + ctl_depth_ratio} \ + ${'--ctl-depth-limit ' + ctl_depth_limit} \ + ${'--exp-ctl-depth-ratio-limit ' + exp_ctl_depth_ratio_limit} + } + output { + File chosen_ctl_id_tsv = glob('chosen_ctl.tsv')[0] + File chosen_ctl_subsample_tsv = glob('chosen_ctl_subsample.tsv')[0] + File chosen_ctl_subsample_pooled_txt = glob('chosen_ctl_subsample_pooled.txt')[0] + Array[Int] chosen_ctl_ta_ids = read_lines(chosen_ctl_id_tsv) + Array[Int] chosen_ctl_ta_subsample = read_lines(chosen_ctl_subsample_tsv) + Int chosen_ctl_ta_subsample_pooled = read_int(chosen_ctl_subsample_pooled_txt) + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task count_signal_track { + input { + File? ta # tag-align + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + Float mem_gb = 8.0 + + command { + set -e + python3 $(which encode_task_count_signal_track.py) \ + ${ta} \ + ${'--chrsz ' + chrsz} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pos_bw = glob('*.positive.bigwig')[0] + File neg_bw = glob('*.negative.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task subsample_ctl { + input { + File? ta + Boolean paired_end + Int subsample + + Float mem_factor + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(ta, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + python3 $(which encode_task_subsample_ctl.py) \ + ${ta} \ + ${'--subsample ' + subsample} \ + ${if paired_end then '--paired-end' else ''} \ + } + output { + File ta_subsampled = glob('*.tagAlign.gz')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 4 + disks : 'local-disk ${disk_gb} SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task call_peak { + input { + String peak_caller + String peak_type + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Int cap_num_peak # cap number of raw peaks called from MACS2 + Float pval_thresh # p.value threshold for MACS2 + Float? fdr_thresh # FDR threshold for SPP + + File? blacklist # blacklist BED to filter raw peaks + String? regex_bfilt_peak_chr_name + + Int cpu + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + + if [ '${peak_caller}' == 'macs2' ]; then + python3 $(which encode_task_macs2_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + + elif [ '${peak_caller}' == 'spp' ]; then + python3 $(which encode_task_spp.py) \ + ${sep=' ' select_all(tas)} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--cap-num-peak ' + cap_num_peak} \ + ${'--fdr-thresh '+ fdr_thresh} \ + ${'--nth ' + cpu} + fi + + python3 $(which encode_task_post_call_peak_chip.py) \ + $(ls *Peak.gz) \ + ${'--ta ' + tas[0]} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--peak-type ' + peak_type} \ + ${'--blacklist ' + blacklist} + } + output { + File peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + # generated by post_call_peak py + File bfilt_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = glob('*.frip.qc')[0] + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : if peak_caller == 'macs2' then 2 else cpu + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task macs2_signal_track { + input { + Array[File?] tas # [ta, control_ta]. control_ta is optional + Int fraglen # fragment length from xcor + String gensz # Genome size (sum of entries in 2nd column of + # chr. sizes file, or hs for human, ms for mouse) + File chrsz # 2-col chromosome sizes file + Float pval_thresh # p.value threshold + + Float mem_factor + Int time_hr + Float disk_factor + + RuntimeEnvironment runtime_environment + } + Float input_file_size_gb = size(tas, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Int disk_gb = round(20.0 + disk_factor * input_file_size_gb) + + command { + set -e + python3 $(which encode_task_macs2_signal_track_chip.py) \ + ${sep=' ' select_all(tas)} \ + ${'--gensz '+ gensz} \ + ${'--chrsz ' + chrsz} \ + ${'--fraglen ' + fraglen} \ + ${'--pval-thresh '+ pval_thresh} \ + ${'--mem-gb ' + mem_gb} + } + output { + File pval_bw = glob('*.pval.signal.bigwig')[0] + File fc_bw = glob('*.fc.signal.bigwig')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : time_hr + disks : 'local-disk ${disk_gb} SSD' + preemptible: 0 + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task idr { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + Float idr_thresh + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor + File chrsz # 2-col chromosome sizes file + String peak_type + String rank + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_idr.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--idr-thresh ' + idr_thresh} \ + ${'--peak-type ' + peak_type} \ + --idr-rank ${rank} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File idr_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_idr_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_idr_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_idr_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_idr_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_idr_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File idr_plot = glob('*.txt.png')[0] + File idr_unthresholded_peak = glob('*.txt.gz')[0] + File idr_log = glob('*.idr*.log')[0] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task overlap { + input { + String prefix # prefix for IDR output file + File? peak1 + File? peak2 + File? peak_pooled + File? blacklist # blacklist BED to filter raw peaks + String regex_bfilt_peak_chr_name + # parameters to compute FRiP + File? ta # to calculate FRiP + Int? fraglen # fragment length from xcor (for FRIP) + File chrsz # 2-col chromosome sizes file + String peak_type + + RuntimeEnvironment runtime_environment + } + + command { + set -e + ${if defined(ta) then '' else 'touch null.frip.qc'} + touch null + python3 $(which encode_task_overlap.py) \ + ${peak1} ${peak2} ${peak_pooled} \ + ${'--prefix ' + prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--fraglen ' + fraglen} \ + ${'--chrsz ' + chrsz} \ + ${'--blacklist '+ blacklist} \ + --nonamecheck \ + ${'--regex-bfilt-peak-chr-name \'' + regex_bfilt_peak_chr_name + '\''} \ + ${'--ta ' + ta} + } + output { + File overlap_peak = glob('*[!.][!b][!f][!i][!l][!t].'+peak_type+'.gz')[0] + File bfilt_overlap_peak = glob('*.bfilt.'+peak_type+'.gz')[0] + File bfilt_overlap_peak_bb = glob('*.bfilt.'+peak_type+'.bb')[0] + File bfilt_overlap_peak_starch = glob('*.bfilt.'+peak_type+'.starch')[0] + File bfilt_overlap_peak_hammock = glob('*.bfilt.'+peak_type+'.hammock.gz*')[0] + File bfilt_overlap_peak_hammock_tbi = glob('*.bfilt.'+peak_type+'.hammock.gz*')[1] + File frip_qc = if defined(ta) then glob('*.frip.qc')[0] else glob('null')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task reproducibility { + input { + String prefix + Array[File] peaks # peak files from pair of true replicates + # in a sorted order. for example of 4 replicates, + # 1,2 1,3 1,4 2,3 2,4 3,4. + # x,y means peak file from rep-x vs rep-y + Array[File] peaks_pr # peak files from pseudo replicates + File? peak_ppr # Peak file from pooled pseudo replicate. + String peak_type + File chrsz # 2-col chromosome sizes file + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_reproducibility.py) \ + ${sep=' ' peaks} \ + --peaks-pr ${sep=' ' peaks_pr} \ + ${'--peak-ppr '+ peak_ppr} \ + --prefix ${prefix} \ + ${'--peak-type ' + peak_type} \ + ${'--chrsz ' + chrsz} + } + output { + File optimal_peak = glob('*optimal_peak.*.gz')[0] + File optimal_peak_bb = glob('*optimal_peak.*.bb')[0] + File optimal_peak_starch = glob('*optimal_peak.*.starch')[0] + File optimal_peak_hammock = glob('*optimal_peak.*.hammock.gz*')[0] + File optimal_peak_hammock_tbi = glob('*optimal_peak.*.hammock.gz*')[1] + File conservative_peak = glob('*conservative_peak.*.gz')[0] + File conservative_peak_bb = glob('*conservative_peak.*.bb')[0] + File conservative_peak_starch = glob('*conservative_peak.*.starch')[0] + File conservative_peak_hammock = glob('*conservative_peak.*.hammock.gz*')[0] + File conservative_peak_hammock_tbi = glob('*conservative_peak.*.hammock.gz*')[1] + File reproducibility_qc = glob('*reproducibility.qc')[0] + # QC metrics for optimal peak + File peak_region_size_qc = glob('*.peak_region_size.qc')[0] + File peak_region_size_plot = glob('*.peak_region_size.png')[0] + File num_peak_qc = glob('*.num_peak.qc')[0] + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task gc_bias { + input { + File? nodup_bam + File ref_fa + + String? picard_java_heap + + RuntimeEnvironment runtime_environment + } + Float mem_factor = 0.3 + Float input_file_size_gb = size(nodup_bam, "G") + Float mem_gb = 4.0 + mem_factor * input_file_size_gb + Float picard_java_heap_factor = 0.9 + + command { + set -e + python3 $(which encode_task_gc_bias.py) \ + ${'--nodup-bam ' + nodup_bam} \ + ${'--ref-fa ' + ref_fa} \ + ${'--picard-java-heap ' + if defined(picard_java_heap) then picard_java_heap else (round(mem_gb * picard_java_heap_factor) + 'G')} + } + output { + File gc_plot = glob('*.gc_plot.png')[0] + File gc_log = glob('*.gc.txt')[0] + } + runtime { + cpu : 1 + memory : '${mem_gb} GB' + time : 6 + disks : 'local-disk 250 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task qc_report { + input { + # optional metadata + String pipeline_ver + String title # name of sample + String description # description for sample + String? genome + #String? encode_accession_id # ENCODE accession ID of sample + # workflow params + Array[Boolean] paired_ends + Array[Boolean] ctl_paired_ends + String pipeline_type + String aligner + Boolean no_dup_removal + String peak_caller + Int cap_num_peak + Float idr_thresh + Float pval_thresh + Int xcor_trim_bp + Int xcor_subsample_reads + # QCs + Array[File] samstat_qcs + Array[File] nodup_samstat_qcs + Array[File] dup_qcs + Array[File] lib_complexity_qcs + Array[File] ctl_samstat_qcs + Array[File] ctl_nodup_samstat_qcs + Array[File] ctl_dup_qcs + Array[File] ctl_lib_complexity_qcs + Array[File] xcor_plots + Array[File] xcor_scores + File? jsd_plot + Array[File] jsd_qcs + Array[File] idr_plots + Array[File] idr_plots_pr + File? idr_plot_ppr + Array[File] frip_qcs + Array[File] frip_qcs_pr1 + Array[File] frip_qcs_pr2 + File? frip_qc_pooled + File? frip_qc_ppr1 + File? frip_qc_ppr2 + Array[File] frip_idr_qcs + Array[File] frip_idr_qcs_pr + File? frip_idr_qc_ppr + Array[File] frip_overlap_qcs + Array[File] frip_overlap_qcs_pr + File? frip_overlap_qc_ppr + File? idr_reproducibility_qc + File? overlap_reproducibility_qc + + Array[File] gc_plots + + Array[File] peak_region_size_qcs + Array[File] peak_region_size_plots + Array[File] num_peak_qcs + + File? idr_opt_peak_region_size_qc + File? idr_opt_peak_region_size_plot + File? idr_opt_num_peak_qc + + File? overlap_opt_peak_region_size_qc + File? overlap_opt_peak_region_size_plot + File? overlap_opt_num_peak_qc + + File? qc_json_ref + + RuntimeEnvironment runtime_environment + } + + command { + set -e + python3 $(which encode_task_qc_report.py) \ + --pipeline-prefix chip \ + ${'--pipeline-ver ' + pipeline_ver} \ + ${"--title '" + sub(title,"'","_") + "'"} \ + ${"--desc '" + sub(description,"'","_") + "'"} \ + ${'--genome ' + genome} \ + ${'--multimapping ' + 0} \ + --paired-ends ${sep=' ' paired_ends} \ + --ctl-paired-ends ${sep=' ' ctl_paired_ends} \ + --pipeline-type ${pipeline_type} \ + --aligner ${aligner} \ + ${if (no_dup_removal) then '--no-dup-removal ' else ''} \ + --peak-caller ${peak_caller} \ + ${'--cap-num-peak ' + cap_num_peak} \ + --idr-thresh ${idr_thresh} \ + --pval-thresh ${pval_thresh} \ + --xcor-trim-bp ${xcor_trim_bp} \ + --xcor-subsample-reads ${xcor_subsample_reads} \ + --samstat-qcs ${sep='_:_' samstat_qcs} \ + --nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \ + --dup-qcs ${sep='_:_' dup_qcs} \ + --lib-complexity-qcs ${sep='_:_' lib_complexity_qcs} \ + --xcor-plots ${sep='_:_' xcor_plots} \ + --xcor-scores ${sep='_:_' xcor_scores} \ + --idr-plots ${sep='_:_' idr_plots} \ + --idr-plots-pr ${sep='_:_' idr_plots_pr} \ + --ctl-samstat-qcs ${sep='_:_' ctl_samstat_qcs} \ + --ctl-nodup-samstat-qcs ${sep='_:_' ctl_nodup_samstat_qcs} \ + --ctl-dup-qcs ${sep='_:_' ctl_dup_qcs} \ + --ctl-lib-complexity-qcs ${sep='_:_' ctl_lib_complexity_qcs} \ + ${'--jsd-plot ' + jsd_plot} \ + --jsd-qcs ${sep='_:_' jsd_qcs} \ + ${'--idr-plot-ppr ' + idr_plot_ppr} \ + --frip-qcs ${sep='_:_' frip_qcs} \ + --frip-qcs-pr1 ${sep='_:_' frip_qcs_pr1} \ + --frip-qcs-pr2 ${sep='_:_' frip_qcs_pr2} \ + ${'--frip-qc-pooled ' + frip_qc_pooled} \ + ${'--frip-qc-ppr1 ' + frip_qc_ppr1} \ + ${'--frip-qc-ppr2 ' + frip_qc_ppr2} \ + --frip-idr-qcs ${sep='_:_' frip_idr_qcs} \ + --frip-idr-qcs-pr ${sep='_:_' frip_idr_qcs_pr} \ + ${'--frip-idr-qc-ppr ' + frip_idr_qc_ppr} \ + --frip-overlap-qcs ${sep='_:_' frip_overlap_qcs} \ + --frip-overlap-qcs-pr ${sep='_:_' frip_overlap_qcs_pr} \ + ${'--frip-overlap-qc-ppr ' + frip_overlap_qc_ppr} \ + ${'--idr-reproducibility-qc ' + idr_reproducibility_qc} \ + ${'--overlap-reproducibility-qc ' + overlap_reproducibility_qc} \ + --gc-plots ${sep='_:_' gc_plots} \ + --peak-region-size-qcs ${sep='_:_' peak_region_size_qcs} \ + --peak-region-size-plots ${sep='_:_' peak_region_size_plots} \ + --num-peak-qcs ${sep='_:_' num_peak_qcs} \ + ${'--idr-opt-peak-region-size-qc ' + idr_opt_peak_region_size_qc} \ + ${'--idr-opt-peak-region-size-plot ' + idr_opt_peak_region_size_plot} \ + ${'--idr-opt-num-peak-qc ' + idr_opt_num_peak_qc} \ + ${'--overlap-opt-peak-region-size-qc ' + overlap_opt_peak_region_size_qc} \ + ${'--overlap-opt-peak-region-size-plot ' + overlap_opt_peak_region_size_plot} \ + ${'--overlap-opt-num-peak-qc ' + overlap_opt_num_peak_qc} \ + --out-qc-html qc.html \ + --out-qc-json qc.json \ + ${'--qc-json-ref ' + qc_json_ref} + } + output { + File report = glob('*qc.html')[0] + File qc_json = glob('*qc.json')[0] + Boolean qc_json_ref_match = read_string('qc_json_ref_match.txt')=='True' + } + runtime { + cpu : 1 + memory : '4 GB' + time : 4 + disks : 'local-disk 50 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +### workflow system tasks +task read_genome_tsv { + input { + File? genome_tsv + String? null_s + + RuntimeEnvironment runtime_environment + } + command <<< + echo "$(basename ~{genome_tsv})" > genome_name + # create empty files for all entries + touch ref_fa bowtie2_idx_tar bwa_idx_tar chrsz gensz blacklist blacklist2 + touch mito_chr_name + touch regex_bfilt_peak_chr_name + + python <>> + output { + String? genome_name = read_string('genome_name') + String? ref_fa = if size('ref_fa')==0 then null_s else read_string('ref_fa') + String? bwa_idx_tar = if size('bwa_idx_tar')==0 then null_s else read_string('bwa_idx_tar') + String? bowtie2_idx_tar = if size('bowtie2_idx_tar')==0 then null_s else read_string('bowtie2_idx_tar') + String? chrsz = if size('chrsz')==0 then null_s else read_string('chrsz') + String? gensz = if size('gensz')==0 then null_s else read_string('gensz') + String? blacklist = if size('blacklist')==0 then null_s else read_string('blacklist') + String? blacklist2 = if size('blacklist2')==0 then null_s else read_string('blacklist2') + String? mito_chr_name = if size('mito_chr_name')==0 then null_s else read_string('mito_chr_name') + String? regex_bfilt_peak_chr_name = if size('regex_bfilt_peak_chr_name')==0 then 'chr[\\dXY]+' + else read_string('regex_bfilt_peak_chr_name') + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task rounded_mean { + input { + Array[Int] ints + + RuntimeEnvironment runtime_environment + } + command <<< + python <>> + output { + Int rounded_mean = read_int('tmp.txt') + } + runtime { + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} + +task raise_exception { + input { + String msg + + RuntimeEnvironment runtime_environment + } + command { + echo -e "\n* Error: ${msg}\n" >&2 + exit 2 + } + output { + String error_msg = '${msg}' + } + runtime { + maxRetries : 0 + cpu : 1 + memory : '2 GB' + time : 4 + disks : 'local-disk 10 SSD' + + docker : runtime_environment.docker + singularity : runtime_environment.singularity + conda : runtime_environment.conda + } +} \ No newline at end of file diff --git a/wdl-format/tests/format/clays_complex_script/source.formatted.wdl b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl new file mode 100644 index 00000000..589c3137 --- /dev/null +++ b/wdl-format/tests/format/clays_complex_script/source.formatted.wdl @@ -0,0 +1,186 @@ +## # Header +# regular comment +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the open brace. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the open brace. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment between `input` and the open brace. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< >>> + + output + # Here is a comment between `output` and the open brace. + { + Object some_other_object = { + } + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct { + } + } + + requirements + # This is a comment between `requirements` and the open brace. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These are double-pound-sign comments. +## blah blah blah. +workflow hello { + meta + # Here is a comment between `meta` and the open brace. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the open brace. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t ' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [ + true, + -42, + "hello, world", + ] ## This is a double-pound-sign comment at the end of the line. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two { + } + } + + output + # Here is a comment before the output. + { + Object some_other_object = { + } + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct { + } + } +} diff --git a/wdl-format/tests/format/clays_complex_script/source.wdl b/wdl-format/tests/format/clays_complex_script/source.wdl new file mode 100644 index 00000000..b3b78ba1 --- /dev/null +++ b/wdl-format/tests/format/clays_complex_script/source.wdl @@ -0,0 +1,165 @@ +## # Header +# regular comment +#@ except: CommentWhitespace, DeprecatedObject, DescriptionMissing +#@ except: InputSorting, MatchingParameterMeta, NonmatchingOutput + +## part of preamble +version 1.2 + +#@ except: MissingMetas +struct AStruct { + String member +} + +task a_task { + meta + # Here is a comment between `meta` and the open brace. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the open brace. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input + # Here is a comment between `input` and the open brace. + { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + command <<< >>> + + output + # Here is a comment between `output` and the open brace. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } + + requirements + # This is a comment between `requirements` and the open brace. + { + container: "ubuntu:latest" + } + + hints { + max_cpu: 1 + } +} + +## These are double-pound-sign comments. +## blah blah blah. +workflow hello { + meta + # Here is a comment between `meta` and the open brace. + { + # Here is a comment within `meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + parameter_meta + # Here is a comment between `parameter_meta` and the open brace. + { + # Here is a comment within `parameter_meta`. + an_escaped_string: "bar \\ \n \t \' \" \~ \$ \000 \xFF \uFFFF \UFFFFFFFF" + a_true: true + a_false: false + an_integer: 42 + a_float: -0.0e123 + an_array: [true, -42, "hello, world"] ## This is a double-pound-sign comment at the end of the line. + an_object: { + subkey_one: "a", + subkey_two: 73, + subkey_three: true, + subkey_four: false, + } + an_undefined_value: null + } + + input { + Object an_object + String a_string + Boolean a_boolean + Int an_integer + Float a_float + AStruct a_struct # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + } + + call a_task { + } + + scatter (name in name_array) { + call say_task { greeting = greeting } + } + + if (some_condition_task) { + call a_task as task_two {} + } + + output + # Here is a comment before the output. + { + Object some_other_object = {} + String some_other_string = "foo bar baz" + Boolean some_other_boolean = true + Int some_other_integer = 42 + Float some_other_float = 0e3 + # This should not be higlighted, as it's not known within + # the TextMate language that it's a custom struct. + AStruct some_other_struct = AStruct {} + } +} \ No newline at end of file diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl new file mode 100644 index 00000000..5cc8e792 --- /dev/null +++ b/wdl-format/tests/format/complex_meta_and_calls/source.formatted.wdl @@ -0,0 +1,118 @@ +version 1.0 + +workflow test_wf { + meta { + a: "hello" + b: "world" + c: 5 + d: -0xf + e: 1.0e10 + f: -2. + g: true + h: false + i: null + j: { + a: [ + 1, + 2, + 3, + ], + b: [ + "hello", + "world", + "!", + ], + c: { + x: 1, + y: 2, + z: 3, + }, + } + k: [ + { + a: {}, + b: 0, + c: "", + d: "", + e: [], + }, + { + x: [ + 1.0, + 2.0, + 3.0, + ], + }, + ] + } + + parameter_meta { + out_sj_filter_overhang_min: { + type: "SpliceJunctionMotifs", + label: "Minimum overhang required to support a splicing junction", + } + } + + input { + SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { + noncanonical_motifs: 30, + GT_AG_and_CT_AC_motif: 12, + } + } + + call no_params call with_params { input: + a, + b, + c, + d = 1, + } + call qualified.name call qualified.name { input: + a = 1, + b = 2, + c = "3", + } + call aliased as x call aliased as x { input: + } + call f after x after y call f after x after y { input: a = [] } + call f as x after x call f as x after x after y { input: name = "hello" } + call test_task as foo { input: bowchicka = "wowwow" } + if (true) { + + call test_task after foo { input: bowchicka = "bowchicka" } + scatter (i in range(3)) { + call test_task as bar { input: bowchicka = i * 42 } + } + } + + output { + SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min + String a = "friend" + Int b = 1 + 2 + String c = "Hello, ~{a}" + Map[String, Int] d = { + "a": 0, + "b": 1, + "c": 2, + } + } +} + +task test_task { + parameter_meta { + bowchicka: { + type: "String", + label: "Bowchicka", + } + } + + input { + String bowchicka + } + + command <<<>>> +} + +struct SpliceJunctionMotifs { + Int noncanonical_motifs + Int GT_AG_and_CT_AC_motif +} diff --git a/wdl-format/tests/format/complex_meta_and_calls/source.wdl b/wdl-format/tests/format/complex_meta_and_calls/source.wdl new file mode 100644 index 00000000..7e3333f0 --- /dev/null +++ b/wdl-format/tests/format/complex_meta_and_calls/source.wdl @@ -0,0 +1,106 @@ +version +1.0 +workflow +test_wf +{ +input +{ +SpliceJunctionMotifs out_sj_filter_overhang_min = SpliceJunctionMotifs { +noncanonical_motifs: 30, +GT_AG_and_CT_AC_motif: 12, +} +} +parameter_meta +{ +out_sj_filter_overhang_min: { +type: "SpliceJunctionMotifs", +label: "Minimum overhang required to support a splicing junction" +} +} +output +{ +SpliceJunctionMotifs KAZAM = out_sj_filter_overhang_min +String a = "friend" +Int b = 1 + 2 +String c = "Hello, ~{a}" +Map[String, Int] d = { "a": 0, "b": 1, "c": 2} +} +meta { +a: "hello" +b: 'world' +c: 5 +d: -0xf +e: 1.0e10 +f: -2. +g: true +h: false +i: null +j: { +a: [1, 2, 3], +b: ["hello", "world", "!"], +c: { +x: 1, +y: 2, +z: 3 +} +} +k: [ +{ +a: {}, +b: 0, +c: "", +d: '', +e: [], +}, +{ +x: [1.0, 2.0, 3.0] +} +] +} +call no_params +call with_params { input: a, b, c, d = 1 } +call qualified.name +call qualified.name { input: a = 1, b = 2, c = "3" } +call aliased as x +call aliased as x { input: } +call f after x after y +call f after x after y { input: a = [] } +call f as x after x +call f as x after x after y { input: name = "hello" } +call test_task as foo { +input: bowchicka = "wowwow" +} +if ( +true +) { + +call test_task after foo { +input: bowchicka = "bowchicka" +} +scatter (i in range(3)) { +call test_task as bar { +input: bowchicka = i * 42 +} +} +} + +} +task +test_task +{ +command <<<>>> +input { +String bowchicka +} +parameter_meta { +bowchicka: { +type: "String", +label: "Bowchicka" +} +} +} + +struct SpliceJunctionMotifs { +Int noncanonical_motifs +Int GT_AG_and_CT_AC_motif +} diff --git a/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl new file mode 100644 index 00000000..2042a3f7 --- /dev/null +++ b/wdl-format/tests/format/if_then_else_exprs/source.formatted.wdl @@ -0,0 +1,25 @@ +## This is a test WDL file for if-then-else expressions +version 1.0 + +workflow if_then_else_exprs { + input { + Int a + Int b + } + + Int c = ( + if (a < b) + then a + else b + ) + + Int d = ( + if (a < b) + then a + else b + ) + + output { + Int result = c + } +} diff --git a/wdl-format/tests/format/if_then_else_exprs/source.wdl b/wdl-format/tests/format/if_then_else_exprs/source.wdl new file mode 100644 index 00000000..322bf08c --- /dev/null +++ b/wdl-format/tests/format/if_then_else_exprs/source.wdl @@ -0,0 +1,23 @@ +## This is a test WDL file for if-then-else expressions +version 1.0 +workflow if_then_else_exprs { + input { + Int a + Int b + } + + Int c = ( + if (a < b) + then a + else b + ) + + Int d = + if (a < b) + then a + else b + + output { + Int result = c + } +} diff --git a/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl new file mode 100644 index 00000000..f5eb2056 --- /dev/null +++ b/wdl-format/tests/format/imports_with_both_comments/source.formatted.wdl @@ -0,0 +1,26 @@ +version 1.1 + +# fileA 1.1 +import # fileA 1.2 + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" # fileA 2.3 + # fileA 3.1 + as # fileA 3.2 + # fileA 4.1 + bar # fileA 4.2 + # fileA 5.1 + alias # fileA 5.2 + # fileA 6.1 + qux # fileA 6.2 + # fileA 7.1 + as # fileA 7.2 + # fileA 8.1 + Qux # fileA 8.2 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# this comment belongs to fileC +import "fileC.wdl" # also fileC + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_both_comments/source.wdl b/wdl-format/tests/format/imports_with_both_comments/source.wdl new file mode 100644 index 00000000..1c32809f --- /dev/null +++ b/wdl-format/tests/format/imports_with_both_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +# this comment belongs to fileB +import "fileB.wdl" as foo # also fileB +# fileA 1.1 +import # fileA 1.2 +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" # fileA 2.3 +# fileA 3.1 +as # fileA 3.2 +# fileA 4.1 +bar # fileA 4.2 +# fileA 5.1 +alias # fileA 5.2 +# fileA 6.1 +qux # fileA 6.2 +# fileA 7.1 +as # fileA 7.2 +# fileA 8.1 +Qux # fileA 8.2 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" # also fileC diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl new file mode 100644 index 00000000..cb225b41 --- /dev/null +++ b/wdl-format/tests/format/imports_with_inline_comments/source.formatted.wdl @@ -0,0 +1,15 @@ +version 1.0 + +import # fileA 1 + "fileA.wdl" # fileA 2 + as # fileA 3 + bar # fileA 4 + alias # fileA 5 + qux # fileA 6 + as # fileA 7 + Qux # fileA 8 +import "fileB.wdl" as foo # fileB +import "fileC.wdl" # fileC + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_inline_comments/source.wdl b/wdl-format/tests/format/imports_with_inline_comments/source.wdl new file mode 100644 index 00000000..f633e72d --- /dev/null +++ b/wdl-format/tests/format/imports_with_inline_comments/source.wdl @@ -0,0 +1,12 @@ +version 1.0 +import "fileB.wdl" as foo # fileB +workflow test {} +import "fileC.wdl" # fileC +import # fileA 1 +"fileA.wdl" # fileA 2 +as # fileA 3 +bar # fileA 4 +alias # fileA 5 +qux # fileA 6 +as # fileA 7 +Qux # fileA 8 diff --git a/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl new file mode 100644 index 00000000..c3a8d177 --- /dev/null +++ b/wdl-format/tests/format/imports_with_no_comments/source.formatted.wdl @@ -0,0 +1,8 @@ +version 1.1 + +import "fileA.wdl" as bar alias cows as horses alias cats as dogs +import "fileB.wdl" as foo +import "fileC.wdl" alias qux as Qux + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_no_comments/source.wdl b/wdl-format/tests/format/imports_with_no_comments/source.wdl new file mode 100644 index 00000000..e69a1a72 --- /dev/null +++ b/wdl-format/tests/format/imports_with_no_comments/source.wdl @@ -0,0 +1,7 @@ + version 1.1 + + import "fileB.wdl" as foo + import "fileA.wdl" as bar alias cows as horses + alias cats as dogs + workflow test {} + import "fileC.wdl" alias qux as Qux diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl new file mode 100644 index 00000000..ab1d380a --- /dev/null +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.formatted.wdl @@ -0,0 +1,26 @@ +version 1.1 + +# fileA 1 +import + # fileA 2.1 + # fileA 2.2 + "fileA.wdl" + # fileA 3 + as + # fileA 4 + bar + # fileA 5 + alias + # fileA 6 + qux + # fileA 7 + as + # fileA 8 + Qux +# this comment belongs to fileB +import "fileB.wdl" as foo +# this comment belongs to fileC +import "fileC.wdl" + +workflow test { +} diff --git a/wdl-format/tests/format/imports_with_preceding_comments/source.wdl b/wdl-format/tests/format/imports_with_preceding_comments/source.wdl new file mode 100644 index 00000000..a27e7a4f --- /dev/null +++ b/wdl-format/tests/format/imports_with_preceding_comments/source.wdl @@ -0,0 +1,23 @@ +version 1.1 +workflow test {} +# this comment belongs to fileC +import "fileC.wdl" +# this comment belongs to fileB +import "fileB.wdl" as foo +# fileA 1 +import +# fileA 2.1 +# fileA 2.2 +"fileA.wdl" +# fileA 3 +as +# fileA 4 +bar +# fileA 5 +alias +# fileA 6 +qux +# fileA 7 +as +# fileA 8 +Qux diff --git a/wdl-format/tests/format/interrupt_example/source.formatted.wdl b/wdl-format/tests/format/interrupt_example/source.formatted.wdl new file mode 100644 index 00000000..4797ab7c --- /dev/null +++ b/wdl-format/tests/format/interrupt_example/source.formatted.wdl @@ -0,0 +1,13 @@ +version # interrupt + 1.2 # how far should '1.2' be indented? + +workflow + # interrupt + test # should this be indented? + # interrupt +{ + meta # interrupt + { # how far should this bracket be indented? + } + +} diff --git a/wdl-format/tests/format/interrupt_example/source.wdl b/wdl-format/tests/format/interrupt_example/source.wdl new file mode 100644 index 00000000..30e66728 --- /dev/null +++ b/wdl-format/tests/format/interrupt_example/source.wdl @@ -0,0 +1,10 @@ +version # interrupt +1.2 # how far should '1.2' be indented? + +workflow +# interrupt +test # should this be indented? +# interrupt +{ meta # interrupt +{ # how far should this bracket be indented? +}} \ No newline at end of file diff --git a/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl b/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl new file mode 100644 index 00000000..a181a9fd --- /dev/null +++ b/wdl-format/tests/format/not_covered_by_other_tests/source.formatted.wdl @@ -0,0 +1,83 @@ +## This is a WDL file with Nodes not covered by other tests +version 1.2 + +task test1 { + meta { + } + + parameter_meta { + } + + input { + String? name = None + Float exponent = 2.7 ** 3 + } + + Pair[String, Float] literal = ("hello", 3.14 - 6.8) + + Boolean flag = true + Int modulo = 42 % 7 + + command # my command block + <<< + echo 'hello ~{default="world" name}' + echo '~{true="good" false="bad" flag}bye' + >>> + + output { + Int math = 42 / 7 + } + + hints { + inputs: input { + a: hints { + foo: "bar", + } + } + f: [ + 1, + 2, + 3, + ] + g: { + foo: "bar", + } + outputs: output { + foo: hints { + a: "a", + b: "b", + c: "c", + }, + baz.bar.qux: hints { + foo: "foo", + bar: "bar", + baz: "baz", + }, + } + } +} + +workflow test2 { + Pair[String, Float] literal = ("hello", 3.14 - 6.8) + + output { + Int math = 42 / 7 + } + + hints { + allow_nested_inputs: true + a: true + b: 1 + c: 1.0 + d: -1 + e: "foo" + f: [ + 1, + 2, + 3, + ] + g: { + foo: "bar" + } + } +} diff --git a/wdl-format/tests/format/not_covered_by_other_tests/source.wdl b/wdl-format/tests/format/not_covered_by_other_tests/source.wdl new file mode 100644 index 00000000..41aacd30 --- /dev/null +++ b/wdl-format/tests/format/not_covered_by_other_tests/source.wdl @@ -0,0 +1,55 @@ +## This is a WDL file with Nodes not covered by other tests +version 1.2 +task test1 { + parameter_meta {} + output {Int math = 42 / 7} + hints {inputs: input { + a: hints { + foo: "bar" + } + } + f: [1, 2, 3] + g: { foo: "bar" } + outputs: output { + foo: hints { + a: "a", + b: "b", + c: "c", + }, + baz.bar.qux: hints { + foo: "foo", + bar: "bar", + baz: "baz", + }, + }} + command # my command block + { + echo 'hello ${default='world' name}' + echo '~{false="bad" true='good' flag}bye' + } + Pair[String, Float] literal = ("hello",3.14-6.8) + + + + Boolean flag = true + Int modulo = 42 % 7 + input { + String? name = None + Float exponent = 2.7**3 + } + meta {} +} +workflow test2 { + output {Int math = 42 / 7} + hints { + allow_nested_inputs: true + a: true + b: 1 + c: 1.0 + d: -1 + e: "foo" + f: [1, 2, 3] + g: { foo: "bar" } + } + Pair[String, Float] literal = ("hello",3.14-6.8) +} \ No newline at end of file diff --git a/wdl-format/tests/format/seaseq-case/LICENSE.txt b/wdl-format/tests/format/seaseq-case/LICENSE.txt new file mode 100644 index 00000000..33522130 --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/LICENSE.txt @@ -0,0 +1,205 @@ +'source.wdl' obtained from: https://github.com/stjude/seaseq/blob/49493a7097e655671b915171e6debe40fa284200/seaseq-case.wdl +on the date 08-05-2024. +It was accompanied by the following license: + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/wdl-format/tests/format/seaseq-case/source.formatted.wdl b/wdl-format/tests/format/seaseq-case/source.formatted.wdl new file mode 100644 index 00000000..812ee076 --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/source.formatted.wdl @@ -0,0 +1,951 @@ +version 1.0 + +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/tasks/sicer.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra +import "workflows/workflows/bamtogff.wdl" +import "workflows/workflows/mapping.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/workflows/visualization.wdl" as viz + +workflow seaseq { + meta { + title: "SEAseq Analysis" + summary: "Single-End Antibody Sequencing (SEAseq) Pipeline" + description: "A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis." + version: "2.0.0" + details: { + citation: "https://doi.org/10.1186/s12859-022-04588-z", + contactEmail: "modupeore.adetunji@stjude.org", + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: "https://github.com/stjude/seaseq", + whatsNew: [ + { + version: "2.0", + changes: [ + "version of case/sample only", + "single-end sequencing with input/control sequencing data", + "Initial release", + ], + }, + ], + } + parameter_group: { + reference_genome: { + title: "Reference genome", + description: "Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .", + help: "Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.", + }, + input_genomic_data: { + title: "Input FASTQ data", + description: "Genomic input files for experiment.", + help: "Input one or more sample data and/or SRA identifiers.", + }, + analysis_parameter: { + title: "Analysis parameter", + description: "Analysis settings needed for experiment.", + help: "Analysis settings; such output analysis file name.", + }, + } + } + + parameter_meta { + reference: { + description: "Reference FASTA file", + group: "reference_genome", + patterns: [ + "*.fa", + "*.fasta", + "*.fa.gz", + "*.fasta.gz", + ], + } + blacklist: { + description: "Blacklist file in BED format", + group: "reference_genome", + help: "If defined, blacklist regions listed are excluded after reference alignment.", + patterns: [ + "*.bed", + "*.bed.gz", + ], + } + gtf: { + description: "gene annotation file (.gtf)", + group: "reference_genome", + help: "Input gene annotation file from RefSeq or GENCODE (.gtf).", + patterns: [ + "*.gtf", + "*.gtf.gz", + "*.gff", + "*.gff.gz", + "*.gff3", + "*.gff3.gz", + ], + } + bowtie_index: { + description: "bowtie v1 index files (*.ebwt)", + group: "reference_genome", + help: "If not defined, bowtie v1 index files are generated, will take a longer compute time.", + patterns: [ + "*.ebwt", + ], + } + motif_databases: { + description: "One or more of the MEME suite motif databases (*.meme)", + group: "reference_genome", + help: "Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).", + patterns: [ + "*.meme", + ], + } + sample_sraid: { + description: "One or more sample SRA (Sequence Read Archive) run identifiers", + group: "input_genomic_data", + help: "Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).", + example: "SRR12345678", + } + sample_fastq: { + description: "One or more sample FASTQs", + group: "input_genomic_data", + help: "Upload zipped FASTQ files.", + patterns: [ + "*.fq.gz", + "*.fastq.gz", + ], + } + results_name: { + description: "Experiment results custom name", + group: "analysis_parameter", + help: "Input preferred analysis results name (recommended if multiple FASTQs are provided).", + example: "AllMerge_mapped", + } + run_motifs: { + description: "Perform Motif Analysis", + group: "analysis_parameter", + help: "Setting this means Motif Discovery and Enrichment analysis will be performed.", + example: true, + } + } + + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs = true + } + + String pipeline_ver = "v2.0.0" + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 1 ----------- ### + ### ------ Pre-process Analysis Files ------ ### + ### ---------------------------------------- ### + + # Process SRRs + if (defined(sample_sraid)) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [ + 1, + ] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([ + sample_sraid, + string_sra, + ]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { input: + sra_id = eachsra, + cloud = false, + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if (!defined(bowtie_index)) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { input: reference = reference } + } + #2. Make sure indexes are six else build indexes + if (defined(bowtie_index)) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [ + 1, + ] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([ + bowtie_index, + string_bowtie_index, + ]) + if (length(int_bowtie_index) != 6) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { input: reference = reference } + } + } + Array[File] actual_bowtie_index = select_first([ + bowtie_idx_2.bowtie_indexes, + bowtie_idx.bowtie_indexes, + bowtie_index, + ]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [ + 1, + ] + if (!defined(spikein_bowtie_index) && defined(spikein_reference)) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { input: reference = select_first([ + spikein_reference, + string_spikein, + ]) } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if (defined(spikein_bowtie_index)) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([ + spikein_bowtie_index, + string_spikein_buffer, + ]) + if (length(int_spikein_bowtie_index) != 6) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { input: reference = select_first([ + spikein_reference, + string_spikein, + ]) } + } + } + Array[File] actual_spikein_bowtie_index = select_first([ + spikein_bowtie_idx_2.bowtie_indexes, + spikein_bowtie_idx.bowtie_indexes, + spikein_bowtie_index, + string_spikein_buffer, + ]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input: reference = reference } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input: reference = reference } + + # Process FASTQs + if (defined(sample_fastq)) { + + Array[String] string_fastq = [ + 1, + ] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([ + sample_fastq, + string_fastq, + ]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([ + sample_srafile, + sample_fastqfile, + ])) + + ### ------------------------------------------------- ### + ### ---------------- S E C T I O N 1 ---------------- ### + ### ----------- B: remove Spike-IN reads ------------ ### + ### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = ( + if length(original_fastqfiles) > 1 + then true + else false + ) + Boolean one_fastq = ( + if length(original_fastqfiles) == 1 + then true + else false + ) + + if (defined(spikein_bowtie_index) || defined(spikein_reference)) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { input: + inputfile = eachfastq, + default_location = ( + if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/FastQC" + ), + } + call util.basicfastqstats as spikein_indv_bfs { input: + fastqfile = eachfastq, + default_location = ( + if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + ), + } + call bowtie.spikein_SE as spikein_indv_map { input: + fastqfile = eachfastq, + index_files = actual_spikein_bowtie_index, + metricsfile = spikein_indv_bfs.metrics_out, + default_location = ( + if (one_fastq) + then sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + else "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/SpikeIn/SummaryStats" + ), + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([ + spikein_fastqfiles, + original_fastqfiles, + ]) + + ### ------------------------------------------------- ### + ### ---------------- S E C T I O N 2 ---------------- ### + ### ---- A: analysis if multiple FASTQs provided ---- ### + ### ------------------------------------------------- ### + + if (multi_fastq) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { input: + inputfile = eachfastq, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call util.basicfastqstats as indv_bfs { input: + fastqfile = eachfastq, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + + call mapping.mapping as indv_mapping { input: + fastqfile = eachfastq, + index_files = actual_bowtie_index, + metricsfile = indv_bfs.metrics_out, + blacklist = blacklist, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/BAM_files", + } + + call fastqc.fastqc as indv_bamfqc { input: + inputfile = indv_mapping.sorted_bam, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call runspp.runspp as indv_runspp { input: bamfile = select_first([ + indv_mapping.bklist_bam, + indv_mapping.sorted_bam, + ]) } + + call bedtools.bamtobed as indv_bamtobed { input: bamfile = select_first([ + indv_mapping.bklist_bam, + indv_mapping.sorted_bam, + ]) } + + call util.evalstats as indv_summarystats { input: + fastq_type = "SEAseq Sample FASTQ", + bambed = indv_bamtobed.bedfile, + sppfile = indv_runspp.spp_out, + fastqczip = indv_fastqc.zipfile, + bamflag = indv_mapping.bam_stats, + rmdupflag = indv_mapping.mkdup_stats, + bkflag = indv_mapping.bklist_stats, + fastqmetrics = indv_bfs.metrics_out, + default_location = "SAMPLE/" + sub(basename(eachfastq), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { input: + htmlfiles = indv_summarystats.xhtml, + txtfiles = indv_summarystats.textfile, + default_location = "SAMPLE", + outputfile = "AllMapped_" + length(fastqfiles) + "_seaseq-summary-stats.html", + } + + call samtools.mergebam { input: + bamfiles = indv_mapping.sorted_bam, + metricsfiles = indv_bfs.metrics_out, + default_location = ( + if defined(results_name) + then results_name + "/BAM_files" + else "AllMerge_" + length(indv_mapping.sorted_bam) + "_mapped" + "/BAM_files" + ), + outputfile = ( + if defined(results_name) + then results_name + ".sorted.bam" + else "AllMerge_" + length(fastqfiles) + "_mapped.sorted.bam" + ), + } + + call fastqc.fastqc as mergebamfqc { input: + inputfile = mergebam.mergebam, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/QC/FastQC", + } + + call samtools.indexstats as mergeindexstats { input: + bamfile = mergebam.mergebam, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + } + + if (defined(blacklist)) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([ + blacklist, + string_blacklist, + ]) + call bedtools.intersect as merge_rmblklist { input: + fileA = mergebam.mergebam, + fileB = blacklist_file, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + nooverlap = true, + } + call samtools.indexstats as merge_bklist { input: + bamfile = merge_rmblklist.intersect_out, + default_location = sub(basename(mergebam.mergebam), ".sorted.b.*$", "") + "/BAM_files", + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([ + merge_rmblklist.intersect_out, + mergebam.mergebam, + ]) + + call samtools.markdup as merge_markdup { input: + bamfile = mergebam_afterbklist, + default_location = sub(basename(mergebam_afterbklist), ".sorted.b.*$", "") + "/BAM_files", + } + + call samtools.indexstats as merge_mkdup { input: + bamfile = merge_markdup.mkdupbam, + default_location = sub(basename(mergebam_afterbklist), ".sorted.b.*$", "") + "/BAM_files", + } + } # end if length(fastqfiles) > 1: multi_fastq + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 2 ----------- ### + ### -- B: analysis if one FASTQ provided --- ### + ### ---------------------------------------- ### + + # if only one fastqfile is provided + if (one_fastq) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { input: + inputfile = fastqfiles[0], + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call util.basicfastqstats as uno_bfs { input: + fastqfile = fastqfiles[0], + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/SummaryStats", + } + + call mapping.mapping { input: + fastqfile = fastqfiles[0], + index_files = actual_bowtie_index, + metricsfile = uno_bfs.metrics_out, + blacklist = blacklist, + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/BAM_files", + } + + call fastqc.fastqc as uno_bamfqc { input: + inputfile = mapping.sorted_bam, + default_location = sub(basename(fastqfiles[0]), ".fastq.gz|.fq.gz", "") + "/QC/FastQC", + } + + call runspp.runspp as uno_runspp { input: bamfile = select_first([ + mapping.bklist_bam, + mapping.sorted_bam, + ]) } + + call bedtools.bamtobed as uno_bamtobed { input: bamfile = select_first([ + mapping.bklist_bam, + mapping.sorted_bam, + ]) } + } # end if length(fastqfiles) == 1: one_fastq + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 3 ----------- ### + ### ----------- ChIP-seq analysis ---------- ### + ### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([ + mergebam_afterbklist, + mapping.bklist_bam, + mapping.sorted_bam, + ]) + + call macs.macs { input: + bamfile = sample_bam, + pvalue = "1e-9", + keep_dup = "auto", + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-auto", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-auto", + } + + call util.addreadme { input: default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS" } + + call macs.macs as all { input: + bamfile = sample_bam, + pvalue = "1e-9", + keep_dup = "all", + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-p9_kd-all", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_p9_kd-all", + } + + call macs.macs as nomodel { input: + bamfile = sample_bam, + nomodel = true, + egs = egs.genomesize, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "-nm", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + basename(sample_bam, ".bam") + "_nm", + } + + call bamtogff.bamtogff { input: + gtffile = gtf, + chromsizes = samtools_faidx.chromsizes, + bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]), + bamindex = select_first([ + merge_mkdup.indexbam, + mapping.mkdup_index, + ]), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/BAM_Density", + } + + call bedtools.bamtobed as forsicerbed { input: bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]) } + + call sicer.sicer { input: + bedfile = forsicerbed.bedfile, + chromsizes = samtools_faidx.chromsizes, + genome_fraction = egs.genomefraction, + fragmentlength = select_first([ + uno_bfs.readlength, + mergebam.avg_readlength, + ]), + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/BROAD_peaks", + coverage_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/BROAD_peaks", + } + + call rose.rose { input: + gtffile = gtf, + bamfile = select_first([ + merge_markdup.mkdupbam, + mapping.mkdup_bam, + ]), + bamindex = select_first([ + merge_mkdup.indexbam, + mapping.mkdup_index, + ]), + bedfile_auto = macs.peakbedfile, + bedfile_all = all.peakbedfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS/STITCHED_peaks", + } + + call runspp.runspp { input: bamfile = sample_bam } + + call util.peaksanno { input: + gtffile = gtf, + bedfile = macs.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = macs.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as all_peaksanno { input: + gtffile = gtf, + bedfile = all.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = all.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as nomodel_peaksanno { input: + gtffile = gtf, + bedfile = nomodel.peakbedfile, + chromsizes = samtools_faidx.chromsizes, + summitfile = nomodel.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + } + + call util.peaksanno as sicer_peaksanno { input: + gtffile = gtf, + bedfile = sicer.scoreisland, + chromsizes = samtools_faidx.chromsizes, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/PEAKS_Annotation/BROAD_peaks", + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { input: + reference = reference, + reference_index = samtools_faidx.faidx_file, + bedfile = macs.peakbedfile, + motif_databases = motif_databases, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + + call util.flankbed { input: + bedfile = macs.summitsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + + call motifs.motifs as flank { input: + reference = reference, + reference_index = samtools_faidx.faidx_file, + bedfile = flankbed.flankbedfile, + motif_databases = motif_databases, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/MOTIFS", + } + } + + call viz.visualization { input: + wigfile = macs.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = macs.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(macs.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as vizall { input: + wigfile = all.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = all.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(all.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as viznomodel { input: + wigfile = nomodel.wigfile, + chromsizes = samtools_faidx.chromsizes, + xlsfile = nomodel.peakxlsfile, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/NARROW_peaks" + "/" + sub(basename(nomodel.peakbedfile), "_peaks.bed", ""), + } + + call viz.visualization as vizsicer { input: + wigfile = sicer.wigfile, + chromsizes = samtools_faidx.chromsizes, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/COVERAGE_files/BROAD_peaks", + } + + call bedtools.bamtobed as finalbed { input: bamfile = sample_bam } + + call sortbed.sortbed { input: bedfile = finalbed.bedfile } + + call bedtools.intersect { input: + fileA = macs.peakbedfile, + fileB = sortbed.sortbed_out, + countoverlap = true, + sorted = true, + } + + ### ---------------------------------------- ### + ### ------------ S E C T I O N 4 ----------- ### + ### ---------- Summary Statistics ---------- ### + ### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if (one_fastq) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type = "SEAseq Sample FASTQ", + bambed = finalbed.bedfile, + sppfile = runspp.spp_out, + fastqczip = select_first([ + uno_bamfqc.zipfile, + string_qual, + ]), + bamflag = mapping.bam_stats, + rmdupflag = mapping.mkdup_stats, + bkflag = mapping.bklist_stats, + fastqmetrics = uno_bfs.metrics_out, + countsfile = intersect.intersect_out, + peaksxls = macs.peakxlsfile, + enhancers = rose.enhancers, + superenhancers = rose.super_enhancers, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/QC/SummaryStats", + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html = uno_summarystats.xhtml, + overallqc_txt = uno_summarystats.textfile, + } + } # end if one_fastq + + if (multi_fastq) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type = "SEAseq Comprehensive", + bambed = finalbed.bedfile, + sppfile = runspp.spp_out, + fastqczip = select_first([ + mergebamfqc.zipfile, + string_qual, + ]), + bamflag = mergeindexstats.flagstats, + rmdupflag = merge_mkdup.flagstats, + bkflag = merge_bklist.flagstats, + countsfile = intersect.intersect_out, + peaksxls = macs.peakxlsfile, + enhancers = rose.enhancers, + superenhancers = rose.super_enhancers, + default_location = sub(basename(sample_bam), ".sorted.b.*$", "") + "/QC/SummaryStats", + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html = mergehtml.xhtml, + overallqc_html = merge_summarystats.xhtml, + sampleqc_txt = mergehtml.mergetxt, + overallqc_txt = merge_summarystats.textfile, + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + File? summaryhtml = select_first([ + uno_overallsummary.summaryhtml, + merge_overallsummary.summaryhtml, + ]) + File? summarytxt = select_first([ + uno_overallsummary.summarytxt, + merge_overallsummary.summarytxt, + ]) + } +} diff --git a/wdl-format/tests/format/seaseq-case/source.wdl b/wdl-format/tests/format/seaseq-case/source.wdl new file mode 100644 index 00000000..94c76656 --- /dev/null +++ b/wdl-format/tests/format/seaseq-case/source.wdl @@ -0,0 +1,898 @@ +version 1.0 +import "workflows/tasks/fastqc.wdl" +import "workflows/tasks/bedtools.wdl" +import "workflows/tasks/bowtie.wdl" +import "workflows/tasks/samtools.wdl" +import "workflows/tasks/macs.wdl" +import "workflows/workflows/bamtogff.wdl" +import "workflows/tasks/sicer.wdl" +import "workflows/workflows/motifs.wdl" +import "workflows/tasks/rose.wdl" +import "workflows/tasks/seaseq_util.wdl" as util +import "workflows/workflows/visualization.wdl" as viz +import "workflows/workflows/mapping.wdl" +import "workflows/tasks/runspp.wdl" +import "workflows/tasks/sortbed.wdl" +import "workflows/tasks/sratoolkit.wdl" as sra + +workflow seaseq { + String pipeline_ver = 'v2.0.0' + + meta { + title: 'SEAseq Analysis' + summary: 'Single-End Antibody Sequencing (SEAseq) Pipeline' + description: 'A comprehensive automated computational pipeline for all ChIP-Seq/CUT&RUN data analysis.' + version: '2.0.0' + details: { + citation: 'https://doi.org/10.1186/s12859-022-04588-z', + contactEmail: 'modupeore.adetunji@stjude.org', + contactOrg: "St Jude Children's Research Hospital", + contactUrl: "", + upstreamLicenses: "MIT", + upstreamUrl: 'https://github.com/stjude/seaseq', + whatsNew: [ + { + version: "2.0", + changes: ["version of case/sample only", "single-end sequencing with input/control sequencing data", "Initial release"] + } + ] + } + parameter_group: { + reference_genome: { + title: 'Reference genome', + description: 'Genome specific files. e.g. reference FASTA, GTF, blacklist, motif databases, FASTA index, bowtie index .', + help: 'Input reference genome files as defined. If some genome data are missing then analyses using such data will be skipped.' + }, + input_genomic_data: { + title: 'Input FASTQ data', + description: 'Genomic input files for experiment.', + help: 'Input one or more sample data and/or SRA identifiers.' + }, + analysis_parameter: { + title: 'Analysis parameter', + description: 'Analysis settings needed for experiment.', + help: 'Analysis settings; such output analysis file name.' + } + } + } + input { + # group: reference_genome + File reference + File? spikein_reference + File? blacklist + File gtf + Array[File]? bowtie_index + Array[File]? spikein_bowtie_index + Array[File]? motif_databases + + # group: input_genomic_data + Array[String]? sample_sraid + Array[File]? sample_fastq + + # group: analysis_parameter + String? results_name + Boolean run_motifs=true + + } + + parameter_meta { + reference: { + description: 'Reference FASTA file', + group: 'reference_genome', + patterns: ["*.fa", "*.fasta", "*.fa.gz", "*.fasta.gz"] + } + blacklist: { + description: 'Blacklist file in BED format', + group: 'reference_genome', + help: 'If defined, blacklist regions listed are excluded after reference alignment.', + patterns: ["*.bed", "*.bed.gz"] + } + gtf: { + description: 'gene annotation file (.gtf)', + group: 'reference_genome', + help: 'Input gene annotation file from RefSeq or GENCODE (.gtf).', + patterns: ["*.gtf", "*.gtf.gz", "*.gff", "*.gff.gz", "*.gff3", "*.gff3.gz"] + } + bowtie_index: { + description: 'bowtie v1 index files (*.ebwt)', + group: 'reference_genome', + help: 'If not defined, bowtie v1 index files are generated, will take a longer compute time.', + patterns: ["*.ebwt"] + } + motif_databases: { + description: 'One or more of the MEME suite motif databases (*.meme)', + group: 'reference_genome', + help: 'Input one or more motif databases available from the MEME suite (https://meme-suite.org/meme/db/motifs).', + patterns: ["*.meme"] + } + sample_sraid: { + description: 'One or more sample SRA (Sequence Read Archive) run identifiers', + group: 'input_genomic_data', + help: 'Input publicly available FASTQs (SRRs). Multiple SRRs are separated by commas (,).', + example: 'SRR12345678' + } + sample_fastq: { + description: 'One or more sample FASTQs', + group: 'input_genomic_data', + help: 'Upload zipped FASTQ files.', + patterns: ["*.fq.gz", "*.fastq.gz"] + } + results_name: { + description: 'Experiment results custom name', + group: 'analysis_parameter', + help: 'Input preferred analysis results name (recommended if multiple FASTQs are provided).', + example: 'AllMerge_mapped' + } + run_motifs: { + description: 'Perform Motif Analysis', + group: 'analysis_parameter', + help: 'Setting this means Motif Discovery and Enrichment analysis will be performed.', + example: true + } + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 1 ----------- ### +### ------ Pre-process Analysis Files ------ ### +### ---------------------------------------- ### + + # Process SRRs + if ( defined(sample_sraid) ) { + # Download sample file(s) from SRA database + # outputs: + # fastqdump.fastqfile : downloaded sample files in fastq.gz format + Array[String] string_sra = [1] #buffer to allow for sra_id optionality + Array[String] s_sraid = select_first([sample_sraid, string_sra]) + scatter (eachsra in s_sraid) { + call sra.fastqdump { + input : + sra_id=eachsra, + cloud=false + } + } # end scatter each sra + + Array[File] sample_srafile = flatten(fastqdump.fastqfile) + } # end if sample_sraid + + # Generating INDEX files + #1. Bowtie INDEX files if not provided + if ( !defined(bowtie_index) ) { + # create bowtie index when not provided + call bowtie.index as bowtie_idx { + input : + reference=reference + } + } + #2. Make sure indexes are six else build indexes + if ( defined(bowtie_index) ) { + # check total number of bowtie indexes provided + Array[String] string_bowtie_index = [1] #buffer to allow for bowtie_index optionality + Array[File] int_bowtie_index = select_first([bowtie_index, string_bowtie_index]) + if ( length(int_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as bowtie_idx_2 { + input : + reference=reference + } + } + } + Array[File] actual_bowtie_index = select_first([bowtie_idx_2.bowtie_indexes, bowtie_idx.bowtie_indexes, bowtie_index]) + + # Spike-in DNA + #3. Bowtie INDEX files if not provided + String string_spikein = "1" + Array[String] string_spikein_buffer = [1] + if ( !defined(spikein_bowtie_index) && defined(spikein_reference) ) { + # create bowtie index on spikein genome + call bowtie.index as spikein_bowtie_idx { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + + #4. Make sure indexes are six else build indexes for Spike-in DNA + if ( defined(spikein_bowtie_index) ) { + # check total number of bowtie indexes provided + Array[File] int_spikein_bowtie_index = select_first([spikein_bowtie_index, string_spikein_buffer]) + if ( length(int_spikein_bowtie_index) != 6 ) { + # create bowtie index if 6 index files aren't provided + call bowtie.index as spikein_bowtie_idx_2 { + input : + reference=select_first([spikein_reference, string_spikein]) + } + } + } + Array[File] actual_spikein_bowtie_index = select_first([spikein_bowtie_idx_2.bowtie_indexes, spikein_bowtie_idx.bowtie_indexes, spikein_bowtie_index, string_spikein_buffer]) + + # FASTA faidx and chromsizes and effective genome size + call samtools.faidx as samtools_faidx { + # create FASTA index and chrom sizes files + input : + reference=reference + } + call util.effective_genome_size as egs { + # effective genome size for FASTA + input : + reference=reference + } + + # Process FASTQs + if ( defined(sample_fastq) ) { + + Array[String] string_fastq = [1] #buffer to allow for fastq optionality + Array[File] s_fastq = select_first([sample_fastq, string_fastq]) + + Array[File] sample_fastqfile = s_fastq + } + Array[File] original_fastqfiles = flatten(select_all([sample_srafile, sample_fastqfile])) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 1 ---------------- ### +### ----------- B: remove Spike-IN reads ------------ ### +### ------------------------------------------------- ### + + # if multiple fastqfiles are provided + Boolean multi_fastq = if length(original_fastqfiles) > 1 then true else false + Boolean one_fastq = if length(original_fastqfiles) == 1 then true else false + + if ( defined(spikein_bowtie_index) || defined(spikein_reference) ) { + scatter (eachfastq in original_fastqfiles) { + call fastqc.fastqc as spikein_indv_fastqc { + input : + inputfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/FastQC' + } + call util.basicfastqstats as spikein_indv_bfs { + input : + fastqfile=eachfastq, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + call bowtie.spikein_SE as spikein_indv_map { + input : + fastqfile=eachfastq, + index_files=actual_spikein_bowtie_index, + metricsfile=spikein_indv_bfs.metrics_out, + default_location=if (one_fastq) then sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' else 'SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/SpikeIn/SummaryStats' + } + } + + Array[File] spikein_fastqfiles = spikein_indv_map.unaligned + } + Array[File] fastqfiles = select_first([spikein_fastqfiles, original_fastqfiles]) + +### ------------------------------------------------- ### +### ---------------- S E C T I O N 2 ---------------- ### +### ---- A: analysis if multiple FASTQs provided ---- ### +### ------------------------------------------------- ### + + if ( multi_fastq ) { + scatter (eachfastq in fastqfiles) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as indv_fastqc { + input : + inputfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as indv_bfs { + input : + fastqfile=eachfastq, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping as indv_mapping { + input : + fastqfile=eachfastq, + index_files=actual_bowtie_index, + metricsfile=indv_bfs.metrics_out, + blacklist=blacklist, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as indv_bamfqc { + input : + inputfile=indv_mapping.sorted_bam, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as indv_runspp { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call bedtools.bamtobed as indv_bamtobed { + input: + bamfile=select_first([indv_mapping.bklist_bam, indv_mapping.sorted_bam]) + } + + call util.evalstats as indv_summarystats { + input: + fastq_type="SEAseq Sample FASTQ", + bambed=indv_bamtobed.bedfile, + sppfile=indv_runspp.spp_out, + fastqczip=indv_fastqc.zipfile, + bamflag=indv_mapping.bam_stats, + rmdupflag=indv_mapping.mkdup_stats, + bkflag=indv_mapping.bklist_stats, + fastqmetrics=indv_bfs.metrics_out, + default_location='SAMPLE/' + sub(basename(eachfastq),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + } # end scatter (for each sample fastq) + + # MERGE BAM FILES + # Execute analysis on merge bam file + # Analysis executed: + # Merge BAM (if more than 1 fastq is provided) + # FastQC on Merge BAM (AllMerge__mapped) + + # merge bam files and perform fasTQC if more than one is provided + call util.mergehtml { + input: + htmlfiles=indv_summarystats.xhtml, + txtfiles=indv_summarystats.textfile, + default_location='SAMPLE', + outputfile = 'AllMapped_' + length(fastqfiles) + '_seaseq-summary-stats.html' + } + + call samtools.mergebam { + input: + bamfiles=indv_mapping.sorted_bam, + metricsfiles=indv_bfs.metrics_out, + default_location = if defined(results_name) then results_name + '/BAM_files' else 'AllMerge_' + length(indv_mapping.sorted_bam) + '_mapped' + '/BAM_files', + outputfile = if defined(results_name) then results_name + '.sorted.bam' else 'AllMerge_' + length(fastqfiles) + '_mapped.sorted.bam' + } + + call fastqc.fastqc as mergebamfqc { + input: + inputfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/QC/FastQC' + } + + call samtools.indexstats as mergeindexstats { + input: + bamfile=mergebam.mergebam, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + + if ( defined(blacklist) ) { + # remove blacklist regions + String string_blacklist = "" #buffer to allow for blacklist optionality + File blacklist_file = select_first([blacklist, string_blacklist]) + call bedtools.intersect as merge_rmblklist { + input : + fileA=mergebam.mergebam, + fileB=blacklist_file, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files', + nooverlap=true + } + call samtools.indexstats as merge_bklist { + input : + bamfile=merge_rmblklist.intersect_out, + default_location=sub(basename(mergebam.mergebam),'.sorted.b.*$','') + '/BAM_files' + } + } # end if blacklist provided + + File mergebam_afterbklist = select_first([merge_rmblklist.intersect_out, mergebam.mergebam]) + + call samtools.markdup as merge_markdup { + input : + bamfile=mergebam_afterbklist, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + + call samtools.indexstats as merge_mkdup { + input : + bamfile=merge_markdup.mkdupbam, + default_location=sub(basename(mergebam_afterbklist),'.sorted.b.*$','') + '/BAM_files' + } + } # end if length(fastqfiles) > 1: multi_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 2 ----------- ### +### -- B: analysis if one FASTQ provided --- ### +### ---------------------------------------- ### + + # if only one fastqfile is provided + if ( one_fastq ) { + # Execute analysis on each fastq file provided + # Analysis executed: + # FastQC + # FASTQ read length distribution + # Reference Alignment using Bowtie (-k2 -m2) + # Convert SAM to BAM + # FastQC on BAM files + # Remove Blacklists (if provided) + # Remove read duplicates + # Summary statistics on FASTQs + # Combine html files into one for easy viewing + + call fastqc.fastqc as uno_fastqc { + input : + inputfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call util.basicfastqstats as uno_bfs { + input : + fastqfile=fastqfiles[0], + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/SummaryStats' + } + + call mapping.mapping { + input : + fastqfile=fastqfiles[0], + index_files=actual_bowtie_index, + metricsfile=uno_bfs.metrics_out, + blacklist=blacklist, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/BAM_files' + } + + call fastqc.fastqc as uno_bamfqc { + input : + inputfile=mapping.sorted_bam, + default_location=sub(basename(fastqfiles[0]),'.fastq.gz|.fq.gz','') + '/QC/FastQC' + } + + call runspp.runspp as uno_runspp { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + + call bedtools.bamtobed as uno_bamtobed { + input: + bamfile=select_first([mapping.bklist_bam, mapping.sorted_bam]) + } + } # end if length(fastqfiles) == 1: one_fastq + +### ---------------------------------------- ### +### ------------ S E C T I O N 3 ----------- ### +### ----------- ChIP-seq analysis ---------- ### +### ---------------------------------------- ### + + # ChIP-seq and downstream analysis + # Execute analysis on merge bam file + # Analysis executed: + # FIRST: Check if reads are mapped + # Peaks identification (SICER, MACS, ROSE) + # Motif analysis + # Complete Summary statistics + + #collate correct files for downstream analysis + File sample_bam = select_first([mergebam_afterbklist, mapping.bklist_bam, mapping.sorted_bam]) + + call macs.macs { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="auto", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-auto', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-auto' + } + + call util.addreadme { + input : + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS' + } + + call macs.macs as all { + input : + bamfile=sample_bam, + pvalue="1e-9", + keep_dup="all", + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-p9_kd-all', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_p9_kd-all' + } + + call macs.macs as nomodel { + input : + bamfile=sample_bam, + nomodel=true, + egs=egs.genomesize, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '-nm', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + basename(sample_bam,'.bam') + '_nm' + } + + call bamtogff.bamtogff { + input : + gtffile=gtf, + chromsizes=samtools_faidx.chromsizes, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/BAM_Density' + } + + call bedtools.bamtobed as forsicerbed { + input : + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]) + } + + call sicer.sicer { + input : + bedfile=forsicerbed.bedfile, + chromsizes=samtools_faidx.chromsizes, + genome_fraction=egs.genomefraction, + fragmentlength=select_first([uno_bfs.readlength, mergebam.avg_readlength]), + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/BROAD_peaks', + coverage_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call rose.rose { + input : + gtffile=gtf, + bamfile=select_first([merge_markdup.mkdupbam, mapping.mkdup_bam]), + bamindex=select_first([merge_mkdup.indexbam, mapping.mkdup_index]), + bedfile_auto=macs.peakbedfile, + bedfile_all=all.peakbedfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS/STITCHED_peaks' + } + + call runspp.runspp { + input: + bamfile=sample_bam + } + + call util.peaksanno { + input : + gtffile=gtf, + bedfile=macs.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as all_peaksanno { + input : + gtffile=gtf, + bedfile=all.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=all.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as nomodel_peaksanno { + input : + gtffile=gtf, + bedfile=nomodel.peakbedfile, + chromsizes=samtools_faidx.chromsizes, + summitfile=nomodel.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call util.peaksanno as sicer_peaksanno { + input : + gtffile=gtf, + bedfile=sicer.scoreisland, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/PEAKS_Annotation/BROAD_peaks' + } + + # Motif Analysis + if (run_motifs) { + call motifs.motifs { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=macs.peakbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call util.flankbed { + input : + bedfile=macs.summitsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + + call motifs.motifs as flank { + input: + reference=reference, + reference_index=samtools_faidx.faidx_file, + bedfile=flankbed.flankbedfile, + motif_databases=motif_databases, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/MOTIFS' + } + } + + call viz.visualization { + input: + wigfile=macs.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=macs.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(macs.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizall { + input: + wigfile=all.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=all.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(all.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as viznomodel { + input: + wigfile=nomodel.wigfile, + chromsizes=samtools_faidx.chromsizes, + xlsfile=nomodel.peakxlsfile, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/NARROW_peaks' + '/' + sub(basename(nomodel.peakbedfile),'_peaks.bed','') + } + + call viz.visualization as vizsicer { + input: + wigfile=sicer.wigfile, + chromsizes=samtools_faidx.chromsizes, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/COVERAGE_files/BROAD_peaks' + } + + call bedtools.bamtobed as finalbed { + input: + bamfile=sample_bam + } + + call sortbed.sortbed { + input: + bedfile=finalbed.bedfile + } + + call bedtools.intersect { + input: + fileA=macs.peakbedfile, + fileB=sortbed.sortbed_out, + countoverlap=true, + sorted=true + } + +### ---------------------------------------- ### +### ------------ S E C T I O N 4 ----------- ### +### ---------- Summary Statistics ---------- ### +### ---------------------------------------- ### + + String string_qual = "" #buffer to allow for optionality in if statement + + #SUMMARY STATISTICS + if ( one_fastq ) { + call util.evalstats as uno_summarystats { + # SUMMARY STATISTICS of sample file (only 1 sample file provided) + input: + fastq_type="SEAseq Sample FASTQ", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([uno_bamfqc.zipfile, string_qual]), + bamflag=mapping.bam_stats, + rmdupflag=mapping.mkdup_stats, + bkflag=mapping.bklist_stats, + fastqmetrics=uno_bfs.metrics_out, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as uno_overallsummary { + # Presenting all quality stats for the analysis + input: + overallqc_html=uno_summarystats.xhtml, + overallqc_txt=uno_summarystats.textfile + } + } # end if one_fastq + + if ( multi_fastq ) { + call util.evalstats as merge_summarystats { + # SUMMARY STATISTICS of all samples files (more than 1 sample file provided) + input: + fastq_type="SEAseq Comprehensive", + bambed=finalbed.bedfile, + sppfile=runspp.spp_out, + fastqczip=select_first([mergebamfqc.zipfile, string_qual]), + bamflag=mergeindexstats.flagstats, + rmdupflag=merge_mkdup.flagstats, + bkflag=merge_bklist.flagstats, + countsfile=intersect.intersect_out, + peaksxls=macs.peakxlsfile, + enhancers=rose.enhancers, + superenhancers=rose.super_enhancers, + default_location=sub(basename(sample_bam),'.sorted.b.*$','') + '/QC/SummaryStats' + } + + call util.summaryreport as merge_overallsummary { + # Presenting all quality stats for the analysis + input: + sampleqc_html=mergehtml.xhtml, + overallqc_html=merge_summarystats.xhtml, + sampleqc_txt=mergehtml.mergetxt, + overallqc_txt=merge_summarystats.textfile + } + } # end if multi_fastq + + output { + #SPIKE-IN + Array[File?]? spikein_indv_s_htmlfile = spikein_indv_fastqc.htmlfile + Array[File?]? spikein_indv_s_zipfile = spikein_indv_fastqc.zipfile + Array[File?]? spikein_s_metrics_out = spikein_indv_map.mapping_output + + #FASTQC + Array[File?]? indv_s_htmlfile = indv_fastqc.htmlfile + Array[File?]? indv_s_zipfile = indv_fastqc.zipfile + Array[File?]? indv_s_bam_htmlfile = indv_bamfqc.htmlfile + Array[File?]? indv_s_bam_zipfile = indv_bamfqc.zipfile + + File? s_mergebam_htmlfile = mergebamfqc.htmlfile + File? s_mergebam_zipfile = mergebamfqc.zipfile + + File? uno_s_htmlfile = uno_fastqc.htmlfile + File? uno_s_zipfile = uno_fastqc.zipfile + File? uno_s_bam_htmlfile = uno_bamfqc.htmlfile + File? uno_s_bam_zipfile = uno_bamfqc.zipfile + + #BASICMETRICS + Array[File?]? s_metrics_out = indv_bfs.metrics_out + File? uno_s_metrics_out = uno_bfs.metrics_out + + #BAMFILES + Array[File?]? indv_s_sortedbam = indv_mapping.sorted_bam + Array[File?]? indv_s_indexbam = indv_mapping.bam_index + Array[File?]? indv_s_bkbam = indv_mapping.bklist_bam + Array[File?]? indv_s_bkindexbam = indv_mapping.bklist_index + Array[File?]? indv_s_rmbam = indv_mapping.mkdup_bam + Array[File?]? indv_s_rmindexbam = indv_mapping.mkdup_index + + File? uno_s_sortedbam = mapping.sorted_bam + File? uno_s_indexstatsbam = mapping.bam_index + File? uno_s_bkbam = mapping.bklist_bam + File? uno_s_bkindexbam = mapping.bklist_index + File? uno_s_rmbam = mapping.mkdup_bam + File? uno_s_rmindexbam = mapping.mkdup_index + + File? s_mergebamfile = mergebam.mergebam + File? s_mergebamindex = mergeindexstats.indexbam + File? s_bkbam = merge_rmblklist.intersect_out + File? s_bkindexbam = merge_bklist.indexbam + File? s_rmbam = merge_markdup.mkdupbam + File? s_rmindexbam = merge_mkdup.indexbam + + #MACS + File? peakbedfile = macs.peakbedfile + File? peakxlsfile = macs.peakxlsfile + File? summitsfile = macs.summitsfile + File? negativexlsfile = macs.negativepeaks + File? wigfile = macs.wigfile + File? all_peakbedfile = all.peakbedfile + File? all_peakxlsfile = all.peakxlsfile + File? all_summitsfile = all.summitsfile + File? all_wigfile = all.wigfile + File? all_negativexlsfile = all.negativepeaks + File? nm_peakbedfile = nomodel.peakbedfile + File? nm_peakxlsfile = nomodel.peakxlsfile + File? nm_summitsfile = nomodel.summitsfile + File? nm_wigfile = nomodel.wigfile + File? nm_negativexlsfile = nomodel.negativepeaks + File? readme_peaks = addreadme.readme_peaks + + #SICER + File? scoreisland = sicer.scoreisland + File? sicer_wigfile = sicer.wigfile + + #ROSE + File? pngfile = rose.pngfile + File? mapped_union = rose.mapped_union + File? mapped_stitch = rose.mapped_stitch + File? enhancers = rose.enhancers + File? super_enhancers = rose.super_enhancers + File? gff_file = rose.gff_file + File? gff_union = rose.gff_union + File? union_enhancers = rose.union_enhancers + File? stitch_enhancers = rose.stitch_enhancers + File? e_to_g_enhancers = rose.e_to_g_enhancers + File? g_to_e_enhancers = rose.g_to_e_enhancers + File? e_to_g_super_enhancers = rose.e_to_g_super_enhancers + File? g_to_e_super_enhancers = rose.g_to_e_super_enhancers + File? supergenes = rose.super_genes + File? allgenes = rose.all_genes + + #MOTIFS + File? flankbedfile = flankbed.flankbedfile + + File? ame_tsv = motifs.ame_tsv + File? ame_html = motifs.ame_html + File? ame_seq = motifs.ame_seq + File? meme = motifs.meme_out + File? meme_summary = motifs.meme_summary + + File? summit_ame_tsv = flank.ame_tsv + File? summit_ame_html = flank.ame_html + File? summit_ame_seq = flank.ame_seq + File? summit_meme = flank.meme_out + File? summit_meme_summary = flank.meme_summary + + #BAM2GFF + File? s_matrices = bamtogff.s_matrices + File? densityplot = bamtogff.densityplot + File? pdf_gene = bamtogff.pdf_gene + File? pdf_h_gene = bamtogff.pdf_h_gene + File? png_h_gene = bamtogff.png_h_gene + File? jpg_h_gene = bamtogff.jpg_h_gene + File? pdf_promoters = bamtogff.pdf_promoters + File? pdf_h_promoters = bamtogff.pdf_h_promoters + File? png_h_promoters = bamtogff.png_h_promoters + File? jpg_h_promoters = bamtogff.jpg_h_promoters + + #PEAKS-ANNOTATION + File? peak_promoters = peaksanno.peak_promoters + File? peak_genebody = peaksanno.peak_genebody + File? peak_window = peaksanno.peak_window + File? peak_closest = peaksanno.peak_closest + File? peak_comparison = peaksanno.peak_comparison + File? gene_comparison = peaksanno.gene_comparison + File? pdf_comparison = peaksanno.pdf_comparison + + File? all_peak_promoters = all_peaksanno.peak_promoters + File? all_peak_genebody = all_peaksanno.peak_genebody + File? all_peak_window = all_peaksanno.peak_window + File? all_peak_closest = all_peaksanno.peak_closest + File? all_peak_comparison = all_peaksanno.peak_comparison + File? all_gene_comparison = all_peaksanno.gene_comparison + File? all_pdf_comparison = all_peaksanno.pdf_comparison + + File? nomodel_peak_promoters = nomodel_peaksanno.peak_promoters + File? nomodel_peak_genebody = nomodel_peaksanno.peak_genebody + File? nomodel_peak_window = nomodel_peaksanno.peak_window + File? nomodel_peak_closest = nomodel_peaksanno.peak_closest + File? nomodel_peak_comparison = nomodel_peaksanno.peak_comparison + File? nomodel_gene_comparison = nomodel_peaksanno.gene_comparison + File? nomodel_pdf_comparison = nomodel_peaksanno.pdf_comparison + + File? sicer_peak_promoters = sicer_peaksanno.peak_promoters + File? sicer_peak_genebody = sicer_peaksanno.peak_genebody + File? sicer_peak_window = sicer_peaksanno.peak_window + File? sicer_peak_closest = sicer_peaksanno.peak_closest + File? sicer_peak_comparison = sicer_peaksanno.peak_comparison + File? sicer_gene_comparison = sicer_peaksanno.gene_comparison + File? sicer_pdf_comparison = sicer_peaksanno.pdf_comparison + + #VISUALIZATION + File? bigwig = visualization.bigwig + File? norm_wig = visualization.norm_wig + File? tdffile = visualization.tdffile + File? n_bigwig = viznomodel.bigwig + File? n_norm_wig = viznomodel.norm_wig + File? n_tdffile = viznomodel.tdffile + File? a_bigwig = vizall.bigwig + File? a_norm_wig = vizall.norm_wig + File? a_tdffile = vizall.tdffile + + File? s_bigwig = vizsicer.bigwig + File? s_norm_wig = vizsicer.norm_wig + File? s_tdffile = vizsicer.tdffile + + #QC-STATS + Array[File?]? s_qc_statsfile = indv_summarystats.statsfile + Array[File?]? s_qc_htmlfile = indv_summarystats.htmlfile + Array[File?]? s_qc_textfile = indv_summarystats.textfile + File? s_qc_mergehtml = mergehtml.mergefile + + File? s_uno_statsfile = uno_summarystats.statsfile + File? s_uno_htmlfile = uno_summarystats.htmlfile + File? s_uno_textfile = uno_summarystats.textfile + + File? statsfile = merge_summarystats.statsfile + File? htmlfile = merge_summarystats.htmlfile + File? textfile = merge_summarystats.textfile + + File? summaryhtml = select_first([uno_overallsummary.summaryhtml, merge_overallsummary.summaryhtml]) + File? summarytxt = select_first([uno_overallsummary.summarytxt,merge_overallsummary.summarytxt]) + } +} \ No newline at end of file diff --git a/wdl-grammar/CHANGELOG.md b/wdl-grammar/CHANGELOG.md index 9f26fdfe..0f442eea 100644 --- a/wdl-grammar/CHANGELOG.md +++ b/wdl-grammar/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +* `SyntaxExt` (for better handling of siblings) and `SyntaxTokenExt` (for handling of associated trivia) traits ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) +* `is_trivia()` and `is_symbolic()` methods for `SyntaxKind` ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) + ### Fixed * Fixed parsing of workflow `hints` section to no longer accept expressions ([#176](https://github.com/stjude-rust-labs/wdl/pull/176)) diff --git a/wdl-grammar/Cargo.toml b/wdl-grammar/Cargo.toml index b83f50f3..c6f3ca11 100644 --- a/wdl-grammar/Cargo.toml +++ b/wdl-grammar/Cargo.toml @@ -11,8 +11,10 @@ repository = "https://github.com/stjude-rust-labs/wdl" documentation = "https://docs.rs/wdl-grammar" [dependencies] +itertools = { workspace = true } logos = { workspace = true } rowan = { workspace = true } +strum = { version = "0.26", features = ["derive"] } codespan-reporting = { workspace = true, optional = true } [dev-dependencies] @@ -24,6 +26,9 @@ codespan-reporting = { workspace = true } [features] codespan = ["dep:codespan-reporting"] +[lints] +workspace = true + [[test]] name = "parsing" required-features = ["codespan"] diff --git a/wdl-grammar/src/tree.rs b/wdl-grammar/src/tree.rs index 34fd6e91..56254057 100644 --- a/wdl-grammar/src/tree.rs +++ b/wdl-grammar/src/tree.rs @@ -3,10 +3,15 @@ pub mod dive; use std::borrow::Cow; +use std::collections::VecDeque; use std::fmt; +use std::iter; +use itertools::Either; +use rowan::Direction; use rowan::GreenNodeBuilder; use rowan::GreenNodeData; +use strum::VariantArray; use super::Diagnostic; use super::grammar; @@ -22,7 +27,7 @@ use crate::parser::Parser; /// Tokens are terminal and represent any span of the source. /// /// This enumeration is a union of all supported WDL tokens and nodes. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, VariantArray)] #[repr(u16)] pub enum SyntaxKind { /// The token is unknown to WDL. @@ -261,9 +266,9 @@ pub enum SyntaxKind { MetadataObjectNode, /// Represents a metadata array node. MetadataArrayNode, - /// Represents a literal integer node. + /// Represents a literal integer node. LiteralIntegerNode, - /// Represents a literal float node. + /// Represents a literal float node. LiteralFloatNode, /// Represents a literal boolean node. LiteralBooleanNode, @@ -373,6 +378,23 @@ pub enum SyntaxKind { MAX, } +impl SyntaxKind { + /// Returns whether the token is a symbolic [`SyntaxKind`]. + /// + /// Generally speaking, symbolic [`SyntaxKind`]s have special meanings + /// during parsing—they are not real elements of the grammar but rather an + /// implementation detail. + pub fn is_symbolic(&self) -> bool { + matches!( + self, + SyntaxKind::Abandoned | SyntaxKind::Unknown | SyntaxKind::Unparsed | SyntaxKind::MAX + ) + } +} + +/// Every [`SyntaxKind`] variant. +pub static ALL_SYNTAX_KIND: &[SyntaxKind] = SyntaxKind::VARIANTS; + impl From for rowan::SyntaxKind { fn from(kind: SyntaxKind) -> Self { rowan::SyntaxKind(kind as u16) @@ -552,6 +574,11 @@ impl SyntaxKind { SyntaxKind::MAX => unreachable!(), } } + + /// Returns whether the [`SyntaxKind`] is trivia. + pub fn is_trivia(&self) -> bool { + matches!(self, SyntaxKind::Whitespace | SyntaxKind::Comment) + } } /// Represents the Workflow Definition Language (WDL). @@ -690,3 +717,315 @@ impl fmt::Debug for SyntaxTree { self.0.fmt(f) } } + +/// An extension trait for [`SyntaxNode`]s, [`SyntaxToken`]s, and +/// [`SyntaxElement`]s. +pub trait SyntaxExt { + /// Returns whether `self` matches the provided element. + fn matches(&self, other: &SyntaxElement) -> bool; + + /// Gets the parent of the element. + /// + /// Returns `None` for the root node. + fn parent(&self) -> Option; + + /// Gets the child index of the element. + fn index(&self) -> usize; + + /// Gets the siblings with tokens. + /// + /// **NOTE:** this is needed because Rowan does not encapsulate this + /// functionality in a trait. Once wrapped here, most of the functions + /// provided by this extension trait can just be provided, which simplifies + /// the code. Generally speaking, this should just defer to the underlying + /// `siblings_with_tokens` method for each type. + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator; + + /// Returns all of the siblings _before_ the current element. + /// + /// The siblings are returned in the order they were parsed. + fn preceding_siblings(&self) -> impl Iterator { + let index = self.index(); + self.parent() + .into_iter() + .flat_map(move |p| p.children_with_tokens().take(index)) + } + + /// Returns all of the siblings _after_ the current element. + /// + /// The siblings are returned in the order they were parsed. + fn succeeding_siblings(&self) -> impl Iterator { + self.siblings_with_tokens(Direction::Next) + // NOTE: this `skip` is necessary because `siblings_with_tokens` returns the current + // node. + .skip(1) + } + + /// Gets all elements that are adjacent to a particular element (not + /// including the element itself). This means in both the forward and + /// reverse direction. + /// + /// The siblings are returned in the order they were parsed. + fn adjacent(&self) -> impl Iterator { + self.preceding_siblings().chain(self.succeeding_siblings()) + } +} + +impl SyntaxExt for SyntaxNode { + fn matches(&self, other: &SyntaxElement) -> bool { + other.as_node().map(|n| n == self).unwrap_or(false) + } + + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { + self.siblings_with_tokens(direction) + } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() + } +} + +impl SyntaxExt for SyntaxToken { + fn matches(&self, other: &SyntaxElement) -> bool { + other.as_token().map(|n| n == self).unwrap_or(false) + } + + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { + self.siblings_with_tokens(direction) + } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() + } +} + +impl SyntaxExt for SyntaxElement { + fn matches(&self, other: &SyntaxElement) -> bool { + self == other + } + + fn siblings_with_tokens(&self, direction: Direction) -> impl Iterator { + match self { + SyntaxElement::Node(node) => Either::Left(node.siblings_with_tokens(direction)), + SyntaxElement::Token(token) => Either::Right(token.siblings_with_tokens(direction)), + } + } + + fn parent(&self) -> Option { + self.parent() + } + + fn index(&self) -> usize { + self.index() + } +} + +/// An extension trait for [`SyntaxToken`]s. +pub trait SyntaxTokenExt { + /// Gets all of the substantial preceding trivia for an element. + fn preceding_trivia(&self) -> impl Iterator; + + /// Gets all of the substantial succeeding trivia for an element. + fn succeeding_trivia(&self) -> impl Iterator; + + /// Get any inline comment directly following an element on the + /// same line. + fn inline_comment(&self) -> Option; +} + +impl SyntaxTokenExt for SyntaxToken { + fn preceding_trivia(&self) -> impl Iterator { + let mut tokens = VecDeque::new(); + let mut cur = self.prev_token(); + while let Some(token) = cur { + cur = token.prev_token(); + // Stop at first non-trivia + if !token.kind().is_trivia() { + break; + } + // Stop if a comment is not on its own line + if token.kind() == SyntaxKind::Comment { + if let Some(prev) = token.prev_token() { + if prev.kind() == SyntaxKind::Whitespace { + let has_newlines = prev.text().chars().any(|c| c == '\n'); + // If there are newlines in 'prev' then we know + // that the comment is on its own line. + // The comment may still be on its own line if + // 'prev' does not have newlines and nothing comes + // before 'prev'. + if !has_newlines && prev.prev_token().is_some() { + break; + } + } else { + // There is something else on this line before the comment. + break; + } + } + } + // Filter out whitespace that is not substantial + match token.kind() { + SyntaxKind::Whitespace + if token.text().chars().filter(|c| *c == '\n').count() > 1 => + { + tokens.push_front(token); + } + SyntaxKind::Comment => { + tokens.push_front(token); + } + _ => {} + } + } + tokens.into_iter() + } + + fn succeeding_trivia(&self) -> impl Iterator { + let mut next = self.next_token(); + iter::from_fn(move || { + let cur = next.clone()?; + next = cur.next_token(); + Some(cur) + }) + .take_while(|t| { + // Stop at first non-trivia + t.kind().is_trivia() + }) + .filter(|t| { + // Filter out whitespace that is not substantial + if t.kind() == SyntaxKind::Whitespace { + return t.text().chars().filter(|c| *c == '\n').count() > 1; + } + true + }) + } + + fn inline_comment(&self) -> Option { + let mut next = self.next_token(); + iter::from_fn(move || { + let cur = next.clone()?; + next = cur.next_token(); + Some(cur) + }) + .take_while(|t| { + // Stop at non-trivia + if !t.kind().is_trivia() { + return false; + } + // Stop on first whitespace containing a newline + if t.kind() == SyntaxKind::Whitespace { + return !t.text().chars().any(|c| c == '\n'); + } + true + }) + .find(|t| t.kind() == SyntaxKind::Comment) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::SyntaxTree; + + #[test] + fn preceding_comments() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} # This comment should not be included + +# Some +# comments +# are +# long + +# Others are short + +# and, yet another +workflow foo {} # This should not be collected. + +# This comment should not be included either.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + let token = workflow.first_token().unwrap(); + let mut trivia = token.preceding_trivia(); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); + assert_eq!(trivia.next().unwrap().text(), "# Some"); + assert_eq!(trivia.next().unwrap().text(), "# comments"); + assert_eq!(trivia.next().unwrap().text(), "# are"); + assert_eq!(trivia.next().unwrap().text(), "# long"); + assert_eq!(trivia.next().unwrap().text(), "\n \n"); + assert_eq!(trivia.next().unwrap().text(), "# Others are short"); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); + assert_eq!(trivia.next().unwrap().text(), "# and, yet another"); + assert!(trivia.next().is_none()); + } + + #[test] + fn succeeding_comments() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} + +# This should not be collected. +workflow foo {} # Here is a comment that should be collected. + +# This comment should be included too.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + let token = workflow.last_token().unwrap(); + let mut trivia = token.succeeding_trivia(); + assert_eq!( + trivia.next().unwrap().text(), + "# Here is a comment that should be collected." + ); + assert_eq!(trivia.next().unwrap().text(), "\n\n"); + assert_eq!( + trivia.next().unwrap().text(), + "# This comment should be included too." + ); + assert!(trivia.next().is_none()); + } + + #[test] + fn inline_comment() { + let (tree, diagnostics) = SyntaxTree::parse( + "version 1.2 + +# This comment should not be included +task foo {} + +# This should not be collected. +workflow foo {} # Here is a comment that should be collected. + +# This comment should not be included either.", + ); + + assert!(diagnostics.is_empty()); + + let workflow = tree.root().last_child().unwrap(); + assert_eq!(workflow.kind(), SyntaxKind::WorkflowDefinitionNode); + let comment = workflow.last_token().unwrap().inline_comment().unwrap(); + assert_eq!( + comment.text(), + "# Here is a comment that should be collected." + ); + } +} diff --git a/wdl-grammar/tests/parsing.rs b/wdl-grammar/tests/parsing.rs index a90590fe..8dda10c4 100644 --- a/wdl-grammar/tests/parsing.rs +++ b/wdl-grammar/tests/parsing.rs @@ -32,6 +32,7 @@ use rayon::prelude::*; use wdl_grammar::Diagnostic; use wdl_grammar::SyntaxTree; +/// Finds tests for this package. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -59,6 +60,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a path. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -69,6 +71,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -85,6 +88,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a test result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -116,6 +120,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path) diff --git a/wdl-lint/Cargo.toml b/wdl-lint/Cargo.toml index 289661d1..649eda3e 100644 --- a/wdl-lint/Cargo.toml +++ b/wdl-lint/Cargo.toml @@ -26,6 +26,9 @@ colored = { workspace = true } [features] codespan = ["wdl-ast/codespan"] +[lints] +workspace = true + [[test]] name = "lints" required-features = ["codespan"] diff --git a/wdl-lint/src/rules/disallowed_input_name.rs b/wdl-lint/src/rules/disallowed_input_name.rs index 588e53b5..78d3e6eb 100755 --- a/wdl-lint/src/rules/disallowed_input_name.rs +++ b/wdl-lint/src/rules/disallowed_input_name.rs @@ -1,6 +1,5 @@ //! A lint rule that disallows redundant input names. -use wdl_ast::AstNode; use wdl_ast::AstToken; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/disallowed_output_name.rs b/wdl-lint/src/rules/disallowed_output_name.rs index 099ffd7b..6ff11798 100644 --- a/wdl-lint/src/rules/disallowed_output_name.rs +++ b/wdl-lint/src/rules/disallowed_output_name.rs @@ -1,6 +1,5 @@ //! A lint rule that disallows redundant output names. -use wdl_ast::AstNode; use wdl_ast::AstToken; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/double_quotes.rs b/wdl-lint/src/rules/double_quotes.rs index 6b0c7c1b..8e23752f 100644 --- a/wdl-lint/src/rules/double_quotes.rs +++ b/wdl-lint/src/rules/double_quotes.rs @@ -1,6 +1,5 @@ //! A lint rule for using double quoted strings. -use wdl_ast::AstNode; use wdl_ast::AstNodeExt; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; diff --git a/wdl-lint/src/rules/ending_newline.rs b/wdl-lint/src/rules/ending_newline.rs index 21813b62..a05f9328 100644 --- a/wdl-lint/src/rules/ending_newline.rs +++ b/wdl-lint/src/rules/ending_newline.rs @@ -1,7 +1,6 @@ //! A lint rule for newlines at the end of the document. use wdl_ast::Ast; -use wdl_ast::AstNode; use wdl_ast::Diagnostic; use wdl_ast::Diagnostics; use wdl_ast::Document; diff --git a/wdl-lint/tests/lints.rs b/wdl-lint/tests/lints.rs index 20a42574..6ada6439 100644 --- a/wdl-lint/tests/lints.rs +++ b/wdl-lint/tests/lints.rs @@ -33,6 +33,7 @@ use wdl_ast::Document; use wdl_ast::Validator; use wdl_lint::LintVisitor; +/// Finds tests for this package. fn find_tests() -> Vec { // Check for filter arguments consisting of test names let mut filter = HashSet::new(); @@ -60,6 +61,7 @@ fn find_tests() -> Vec { tests } +/// Normalizes a path. fn normalize(s: &str, is_error: bool) -> String { if is_error { // Normalize paths in any error messages @@ -70,6 +72,7 @@ fn normalize(s: &str, is_error: bool) -> String { s.replace("\r\n", "\n") } +/// Formats diagnostics. fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String { let file = SimpleFile::new(path.as_os_str().to_str().unwrap(), source); let mut buffer = Buffer::no_color(); @@ -86,6 +89,7 @@ fn format_diagnostics(diagnostics: &[Diagnostic], path: &Path, source: &str) -> String::from_utf8(buffer.into_inner()).expect("should be UTF-8") } +/// Compares a test result. fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), String> { let result = normalize(result, is_error); if env::var_os("BLESS").is_some() { @@ -117,6 +121,7 @@ fn compare_result(path: &Path, result: &str, is_error: bool) -> Result<(), Strin Ok(()) } +/// Runs a test. fn run_test(test: &Path, ntests: &AtomicUsize) -> Result<(), String> { let path = test.join("source.wdl"); let source = std::fs::read_to_string(&path).map_err(|e| { @@ -154,6 +159,7 @@ fn main() { let ntests = AtomicUsize::new(0); + #[allow(clippy::missing_docs_in_private_items)] fn inner<'a>(test: &'a Path, ntests: &AtomicUsize) -> Option<(&'a str, String)> { let test_name = test.file_stem().and_then(OsStr::to_str).unwrap(); match std::panic::catch_unwind(|| { diff --git a/wdl-lsp/Cargo.toml b/wdl-lsp/Cargo.toml index 9193e4e2..caf37c8e 100644 --- a/wdl-lsp/Cargo.toml +++ b/wdl-lsp/Cargo.toml @@ -24,3 +24,6 @@ line-index = { workspace = true } serde_json = { workspace = true } indexmap = { workspace = true } uuid = { workspace = true, features = ["v4"] } + +[lints] +workspace = true diff --git a/wdl/CHANGELOG.md b/wdl/CHANGELOG.md index 5af004d0..6f96cb8f 100644 --- a/wdl/CHANGELOG.md +++ b/wdl/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +* Added a `format` command to the `wdl` CLI tool ([#133](https://github.com/stjude-rust-labs/wdl/pull/133)) * Added a `verbosity` flag to the `wdl` CLI tool ([#199](https://github.com/stjude-rust-labs/wdl/pull/199)). ## 0.8.0 - 09-16-2024 diff --git a/wdl/Cargo.toml b/wdl/Cargo.toml index 08c3f25f..4d63fed2 100644 --- a/wdl/Cargo.toml +++ b/wdl/Cargo.toml @@ -17,6 +17,7 @@ wdl-ast = { path = "../wdl-ast", version = "0.7.1", optional = true } wdl-lint = { path = "../wdl-lint", version = "0.6.0", optional = true } wdl-analysis = { path = "../wdl-analysis", version = "0.3.0", optional = true } wdl-lsp = { path = "../wdl-lsp", version = "0.3.0", optional = true } +wdl-format = { path = "../wdl-format", version = "0.1.0", optional = true } tracing-subscriber = { workspace = true, optional = true } clap = { workspace = true, optional = true } anyhow = { workspace = true, optional = true } @@ -34,9 +35,10 @@ anyhow = { workspace = true } codespan-reporting = { workspace = true } [features] -default = ["analysis", "ast", "grammar", "lint"] +default = ["ast", "grammar", "lint"] analysis = ["dep:wdl-analysis"] ast = ["dep:wdl-ast"] +format = ["dep:wdl-format"] grammar = ["dep:wdl-grammar"] lint = ["dep:wdl-lint"] lsp = ["dep:wdl-lsp"] @@ -56,6 +58,9 @@ cli = [ "dep:tracing", ] +[lints] +workspace = true + [[example]] name = "explore" required-features = ["codespan"] diff --git a/wdl/examples/explore.rs b/wdl/examples/explore.rs index 2a360049..3199cfac 100644 --- a/wdl/examples/explore.rs +++ b/wdl/examples/explore.rs @@ -39,6 +39,7 @@ struct Args { path: PathBuf, } +/// Emits diagnostics. fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Result<()> { let file = SimpleFile::new(path.to_str().context("path should be UTF-8")?, source); let mut stream = StandardStream::stdout(if std::io::stdout().is_terminal() { @@ -59,6 +60,7 @@ fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Re Ok(()) } +/// The main function. pub fn main() -> Result<()> { let args = Args::parse(); let source = read_to_string(&args.path).with_context(|| { @@ -112,6 +114,7 @@ pub fn main() -> Result<()> { Ok(()) } +/// Explores metadata. fn explore_metadata(metadata: &MetadataSection) { for item in metadata.items() { let value = item.value().syntax().text().to_string(); @@ -123,6 +126,7 @@ fn explore_metadata(metadata: &MetadataSection) { } } +/// Explores an input. fn explore_input(input: &InputSection) { for decl in input.declarations() { println!( @@ -133,6 +137,7 @@ fn explore_input(input: &InputSection) { } } +/// Explores an output. fn explore_output(output: &OutputSection) { for decl in output.declarations() { println!( diff --git a/wdl/examples/parse.rs b/wdl/examples/parse.rs index 4e698c57..1fef8747 100644 --- a/wdl/examples/parse.rs +++ b/wdl/examples/parse.rs @@ -25,6 +25,7 @@ struct Args { path: PathBuf, } +/// Emits diagnostics. fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Result<()> { let file = SimpleFile::new(path.to_str().context("path should be UTF-8")?, source); let mut stream = StandardStream::stdout(if std::io::stdout().is_terminal() { @@ -45,6 +46,7 @@ fn emit_diagnostics(path: &Path, source: &str, diagnostics: &[Diagnostic]) -> Re Ok(()) } +/// The main function. pub fn main() -> Result<()> { let args = Args::parse(); let source = read_to_string(&args.path).with_context(|| { diff --git a/wdl/src/bin/wdl.rs b/wdl/src/bin/wdl.rs index 469e05b1..67ab754f 100644 --- a/wdl/src/bin/wdl.rs +++ b/wdl/src/bin/wdl.rs @@ -1,3 +1,8 @@ +//! The `wdl` command line tool. +//! +//! If you're here and not a developer of the `wdl` family of crates, you're +//! probably looking for +//! [Sprocket](https://github.com/stjude-rust-labs/sprocket) instead. use std::borrow::Cow; use std::collections::HashSet; use std::fs; @@ -32,6 +37,9 @@ use wdl_analysis::AnalysisResult; use wdl_analysis::Analyzer; use wdl_analysis::Rule; use wdl_analysis::rules; +use wdl_ast::Node; +use wdl_format::Formatter; +use wdl_format::element::node::AstNodeFormatExt as _; /// Emits the given diagnostics to the output stream. /// @@ -58,6 +66,7 @@ fn emit_diagnostics(path: &str, source: &str, diagnostics: &[Diagnostic]) -> Res Ok(()) } +/// Analyzes a path. async fn analyze>( rules: impl IntoIterator, path: PathBuf, @@ -157,6 +166,7 @@ pub struct ParseCommand { } impl ParseCommand { + /// Executes the `parse` subcommand. async fn exec(self) -> Result<()> { let source = read_source(&self.path)?; let (document, diagnostics) = Document::parse(&source); @@ -232,11 +242,13 @@ pub struct CheckCommand { #[clap(value_name = "PATH")] pub path: PathBuf, + /// The analysis options. #[clap(flatten)] pub options: AnalysisOptions, } impl CheckCommand { + /// Executes the `check` subcommand. async fn exec(self) -> Result<()> { self.options.check_for_conflicts()?; analyze(self.options.into_rules(), self.path, false).await?; @@ -254,6 +266,7 @@ pub struct LintCommand { } impl LintCommand { + /// Executes the `lint` subcommand. async fn exec(self) -> Result<()> { let source = read_source(&self.path)?; let (document, diagnostics) = Document::parse(&source); @@ -291,6 +304,7 @@ pub struct AnalyzeCommand { #[clap(value_name = "PATH")] pub path: PathBuf, + /// The analysis options. #[clap(flatten)] pub options: AnalysisOptions, @@ -300,6 +314,7 @@ pub struct AnalyzeCommand { } impl AnalyzeCommand { + /// Executes the `analyze` subcommand. async fn exec(self) -> Result<()> { self.options.check_for_conflicts()?; let results = analyze(self.options.into_rules(), self.path, self.lint).await?; @@ -308,6 +323,45 @@ impl AnalyzeCommand { } } +/// Formats a WDL source file. +#[derive(Args)] +#[clap(disable_version_flag = true)] +pub struct FormatCommand { + /// The path to the source WDL file. + #[clap(value_name = "PATH")] + pub path: PathBuf, +} + +impl FormatCommand { + /// Executes the `format` subcommand. + async fn exec(self) -> Result<()> { + let source = read_source(&self.path)?; + + let (document, diagnostics) = Document::parse(&source); + assert!(diagnostics.is_empty()); + + if !diagnostics.is_empty() { + emit_diagnostics(&self.path.to_string_lossy(), &source, &diagnostics)?; + + bail!( + "aborting due to previous {count} diagnostic{s}", + count = diagnostics.len(), + s = if diagnostics.len() == 1 { "" } else { "s" } + ); + } + + let document = Node::Ast(document.ast().into_v1().unwrap()).into_format_element(); + let formatter = Formatter::default(); + + match formatter.format(&document) { + Ok(formatted) => print!("{formatted}"), + Err(err) => bail!(err), + }; + + Ok(()) + } +} + /// A tool for parsing, validating, and linting WDL source code. /// /// This command line tool is intended as an entrypoint to work with and develop @@ -325,19 +379,31 @@ impl AnalyzeCommand { arg_required_else_help = true )] struct App { + /// The subcommand to use. #[command(subcommand)] command: Command, + /// The verbosity flags. #[command(flatten)] verbose: Verbosity, } #[derive(Subcommand)] enum Command { + /// Parses a WDL file. Parse(ParseCommand), + + /// Checks a WDL file. Check(CheckCommand), + + /// Lints a WDL file. Lint(LintCommand), + + /// Analyzes a WDL workspace. Analyze(AnalyzeCommand), + + /// Formats a WDL file. + Format(FormatCommand), } #[tokio::main] @@ -356,6 +422,7 @@ async fn main() -> Result<()> { Command::Check(cmd) => cmd.exec().await, Command::Lint(cmd) => cmd.exec().await, Command::Analyze(cmd) => cmd.exec().await, + Command::Format(cmd) => cmd.exec().await, } { eprintln!( "{error}: {e:?}", diff --git a/wdl/src/lib.rs b/wdl/src/lib.rs index caecfb55..d0c0bce7 100644 --- a/wdl/src/lib.rs +++ b/wdl/src/lib.rs @@ -78,6 +78,9 @@ pub use wdl_analysis as analysis; #[cfg(feature = "ast")] #[doc(inline)] pub use wdl_ast as ast; +#[cfg(feature = "format")] +#[doc(inline)] +pub use wdl_format as format; #[cfg(feature = "grammar")] #[doc(inline)] pub use wdl_grammar as grammar;