From 26bf56f75361cac6515c5635c63d593532c8d722 Mon Sep 17 00:00:00 2001 From: Katherine Eaton Date: Thu, 26 Mar 2020 16:28:09 -0400 Subject: [PATCH] snippy pairwise docs --- pipeline0.nf | 87 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 9 deletions(-) diff --git a/pipeline0.nf b/pipeline0.nf index def5bc48f..0e39977de 100644 --- a/pipeline0.nf +++ b/pipeline0.nf @@ -402,18 +402,18 @@ if (!params.skip_reference_detect_low_complexity){ process reference_detect_low_complexity{ /* - Detect low complexity regions with dustmasker. - Convert the identified regions file to a bed format. + Detect low complexity regions with dustmasker. + Convert the identified regions file to a bed format. - Input: - ch_reference_genome_low_complexity (fasta): The reference genome fasta from the process reference_download. + Input: + ch_reference_genome_low_complexity (fasta): The reference genome fasta from the process reference_download. - Output: - ch_bed_ref_low_complexity (bed): A bed file containing regions of low-complexity regions. + Output: + ch_bed_ref_low_complexity (bed): A bed file containing regions of low-complexity regions. - Publish: - ${reference_genome_fna.baseName}.dustmasker.intervals (intervals) Interval file containing regions of low-complexity. - ${reference_genome_fna.baseName}.dustmasker.bed (bed) Bed file created from intervals and adjusted for 0-base system. + Publish: + ${reference_genome_fna.baseName}.dustmasker.intervals (intervals) Interval file containing regions of low-complexity. + ${reference_genome_fna.baseName}.dustmasker.bed (bed) Bed file created from intervals and adjusted for 0-base system. */ // Other variables and config tag "$reference_genome_fna" @@ -438,3 +438,72 @@ if (!params.skip_reference_detect_low_complexity){ } } + +// -------------------------------------------------------------------------- // +// Snippy Pipeline // +// -------------------------------------------------------------------------- // + +// --------------------------------Pairwise Alignment-------------------------// + +if(!params.skip_snippy_pairwise){ + + process snippy_pairwise{ + /* + Pairwise align contigs to reference genome with snippy. + + Input: + ch_assembly_fna_snippy_pairwise (fasta): The genomic assembly from process assembly_download. + ch_reference_genome_snippy_pairwise (fasta): The reference genome from process reference_download. + + Output: + ch_snippy_snps_variant_summary (text): Table of summarized SNP counts for process variant_summary. + ch_snippy_subs_vcf_detect_density (text): VCF of substitutions for process pairwise_detect_snp_high_density. + + Publish: + ${assembly_fna.baseName}_snippy.summary.txt (text): Table of summarized SNP counts. + ${assembly_fna.baseName}_snippy.subs.vcf (text): VCF of substitutions. + ${assembly_fna.baseName}_snippy.\* (text): All default snippy pipeline output. + */ + // Other variables and config + tag "$assembly_fna" + publishDir "${params.outdir}/snippy_pairwise", mode: 'copy' + echo true + + // IO and conditional behavior + input: + file assembly_fna from ch_assembly_fna_snippy_pairwise + file reference_genome_fna from ch_reference_genome_snippy_pairwise + output: + file "output${params.snippy_ctg_depth}X/*/*" + file "output${params.snippy_ctg_depth}X/*/*_snippy.summary.txt" into ch_snippy_snps_variant_summary + file "output${params.snippy_ctg_depth}X/*/*_snippy.subs.vcf" into ch_snippy_subs_vcf_detect_density + + // Shell script to execute + script: + """ + snippy \ + --prefix ${assembly_fna.baseName}_snippy \ + --cpus ${params.snippy_cpus} \ + --reference ${reference_genome_fna} \ + --outdir output${params.snippy_ctg_depth}X/${assembly_fna.baseName} \ + --ctgs ${assembly_fna} \ + --mapqual ${params.snippy_map_qual} \ + --mincov ${params.snippy_ctg_depth} \ + --minfrac ${params.snippy_min_frac} \ + --basequal ${params.snippy_base_qual} \ + --report; + + snippy_snps_in=output${params.snippy_ctg_depth}X/${assembly_fna.baseName}/${assembly_fna.baseName}_snippy.txt + snippy_snps_txt=output${params.snippy_ctg_depth}X/${assembly_fna.baseName}/${assembly_fna.baseName}_snippy.summary.txt + + COMPLEX=`awk 'BEGIN{count=0}{if (\$1 == "Variant-COMPLEX"){count=\$2}}END{print count}' \$snippy_snps_in;` + DEL=`awk 'BEGIN{count=0}{if (\$1 == "Variant-DEL"){count=\$2}}END{print count}' \$snippy_snps_in;` + INS=`awk 'BEGIN{count=0}{if (\$1 == "Variant-INS"){count=\$2}}END{print count}' \$snippy_snps_in;` + MNP=`awk 'BEGIN{count=0}{if (\$1 == "Variant-MNP"){count=\$2}}END{print count}' \$snippy_snps_in;` + SNP=`awk 'BEGIN{count=0}{if (\$1 == "Variant-SNP"){count=\$2}}END{print count}' \$snippy_snps_in;` + TOTAL=`awk 'BEGIN{count=0}{if (\$1 == "VariantTotal"){count=\$2}}END{print count}' \$snippy_snps_in;` + echo -e output${params.snippy_ctg_depth}X/${assembly_fna.baseName}"\\t"\$COMPLEX"\\t"\$DEL"\\t"\$INS"\\t"\$MNP"\\t"\$SNP"\\t"\$TOTAL >> \$snippy_snps_txt + """ + } + +}