-
Notifications
You must be signed in to change notification settings - Fork 67
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
genbank and other new WDL workflows #800
Changes from all commits
08158cb
1eaa09b
d66d255
a889ddd
ff43071
9fbefae
53e2ba4
c01526b
71fd973
63d7039
3f7b1dc
7876e65
39ad3b4
c40f5e6
e78bd37
737c830
c40f361
56a2aef
caa4e4a
b57bdf7
740151a
b02b50e
0514da4
07d77ba
083665b
b8855d8
b660097
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import "reports.wdl" as reports | ||
|
||
workflow coverage_table { | ||
call reports.coverage_report | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import "interhost.wdl" as interhost | ||
import "ncbi.wdl" as ncbi | ||
|
||
workflow genbank { | ||
|
||
File reference_fasta | ||
Array[File]+ assemblies_fasta # one per genome | ||
Array[File]+ ref_annotations_tbl # one per chromosome | ||
|
||
call interhost.multi_align_mafft_ref as mafft { | ||
input: | ||
reference_fasta = reference_fasta, | ||
assemblies_fasta = assemblies_fasta | ||
} | ||
|
||
call ncbi.annot_transfer as annot { | ||
input: | ||
multi_aln_fasta = mafft.alignments_by_chr, | ||
reference_fasta = reference_fasta, | ||
reference_feature_table = ref_annotations_tbl | ||
} | ||
|
||
call ncbi.prepare_genbank as prep_genbank { | ||
input: | ||
assemblies_fasta = assemblies_fasta, | ||
annotations_tbl = annot.transferred_feature_tables | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import "interhost.wdl" as interhost | ||
|
||
workflow mafft { | ||
call interhost.multi_align_mafft | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import "demux.wdl" as demux | ||
|
||
workflow merge_bams { | ||
call demux.merge_and_reheader_bams | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,22 +58,31 @@ task download_annotations { | |
} | ||
|
||
task annot_transfer { | ||
File chr_mutli_aln_fasta # fasta; multiple alignments of sample sequences for a single chr | ||
File reference_fasta # fasta (may contain multiple chrs, only one with the same name as reference_feature_table will be used) | ||
File reference_feature_table # feature table corresponding to the chr in the alignment | ||
Array[File]+ multi_aln_fasta # fasta; multiple alignments of sample sequences for each chromosome | ||
File reference_fasta # fasta; all chromosomes in one file | ||
Array[File]+ reference_feature_table # tbl; feature table corresponding to each chromosome in the alignment | ||
|
||
Array[Int] chr_nums=range(length(multi_aln_fasta)) | ||
|
||
command { | ||
ncbi.py tbl_transfer_prealigned \ | ||
${chr_mutli_aln_fasta} \ | ||
${reference_fasta} \ | ||
${reference_feature_table} \ | ||
. \ | ||
--oob_clip \ | ||
--loglevel DEBUG | ||
set -ex -o pipefail | ||
echo ${sep=' ' multi_aln_fasta} > alignments.txt | ||
echo ${sep=' ' reference_feature_table} > tbls.txt | ||
for i in ${sep=' ' chr_nums}; do | ||
_alignment_fasta=`cat alignments.txt | cut -f $(($i+1)) -d ' '` | ||
_feature_tbl=`cat tbls.txt | cut -f $(($i+1)) -d ' '` | ||
ncbi.py tbl_transfer_prealigned \ | ||
$_alignment_fasta \ | ||
${reference_fasta} \ | ||
$_feature_tbl \ | ||
. \ | ||
--oob_clip \ | ||
--loglevel DEBUG | ||
done | ||
} | ||
|
||
output { | ||
Array[File] transferred_feature_tables = glob("*.tbl") | ||
Array[File]+ transferred_feature_tables = glob("*.tbl") | ||
} | ||
runtime { | ||
docker: "quay.io/broadinstitute/viral-ngs" | ||
|
@@ -87,12 +96,13 @@ task prepare_genbank { | |
Array[File]+ assemblies_fasta | ||
Array[File]+ annotations_tbl | ||
File authors_sbt | ||
File? coverage_table # summary.assembly.txt (from Snakemake) | ||
File? genbankSourceTable | ||
File? biosampleMap | ||
String? sequencingTech | ||
String? comment | ||
String out_prefix = "ncbi_package" | ||
File biosampleMap | ||
File genbankSourceTable | ||
File? coverage_table # summary.assembly.txt (from Snakemake) -- change this to accept a list of mapped bam files and we can create this table ourselves | ||
String sequencingTech | ||
String comment # TO DO: make this optional | ||
String organism | ||
String molType = "cRNA" | ||
|
||
command { | ||
set -ex -o pipefail | ||
|
@@ -101,19 +111,25 @@ task prepare_genbank { | |
${authors_sbt} \ | ||
${sep=' ' assemblies_fasta} \ | ||
. \ | ||
${'--master_source_table=' + genbankSourceTable} \ | ||
${'--sequencing_tech=' + sequencingTech} \ | ||
${'--biosample_map=' + biosampleMap} \ | ||
${'--coverage_table=' + coverage_table} \ | ||
${'--comment=' + comment} \ | ||
--mol_type ${molType} \ | ||
--organism "${organism}" \ | ||
--biosample_map ${biosampleMap} \ | ||
--master_source_table ${genbankSourceTable} \ | ||
${'--coverage_table ' + coverage_table} \ | ||
--comment "${comment}" \ | ||
--sequencing_tech "${sequencingTech}" \ | ||
--loglevel DEBUG | ||
tar -czpvf ${out_prefix}.tar.gz *.val *.cmt *.fsa *.gbf *.sqn *.src *.tbl | ||
mv errorsummary.val errorsummary.val.txt # to keep it separate from the glob | ||
} | ||
|
||
output { | ||
Array[File] sequin_files = glob("*.sqn") | ||
File ncbi_package = "${out_prefix}.tar.gz" | ||
File errorSummary = "errorsummary.val" | ||
Array[File] structured_comment_files = glob("*.cmt") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should some of these be specified as one-or-more There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I played wth that at one point. maybe |
||
Array[File] genbank_preview_files = glob("*.gbf") | ||
Array[File] source_table_files = glob("*.src") | ||
Array[File] fasta_per_chr_files = glob("*.fsa") | ||
Array[File] validation_files = glob("*.val") | ||
File errorSummary = "errorsummary.val.txt" | ||
} | ||
|
||
runtime { | ||
|
@@ -122,4 +138,4 @@ task prepare_genbank { | |
cpu: 2 | ||
dx_instance_type: "mem1_ssd1_x2" | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is blocking this from being optional, or having a default placeholder value?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had a lot of difficulty with the WDL syntax of specifying an optional parameter with a double-quoted string as a value (just because it passes womtool validate doesn't mean dxWDL and Cromwell agree about whether they like the syntax)... gave up and just made it a mandatory field for now since it's almost always specified anyway in a submission. Double quoting is important because people are putting sentences with spaces and punctuation here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could make it an optionl text file