Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

module: deepvariant #572

Merged
merged 23 commits into from
Jan 17, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aa98d47
Add stubs for deepvariant [ci skip]
abhi18av Jul 13, 2021
940f40c
Update the stubs for deepvariant [ci skip]
abhi18av Jul 13, 2021
3278aba
Merge branch 'master' into abhinav/deepvariant
grst Aug 3, 2021
a0434ff
Merge branch 'master' into abhinav/deepvariant
abhi18av Aug 19, 2021
54dddeb
functional with google docker image
abhi18av Aug 25, 2021
99de3a0
cleanup
abhi18av Aug 25, 2021
66ea67a
consume docker container within singularity executor
abhi18av Sep 6, 2021
c7b0c1a
update the meta.yml file and ask for review
abhi18av Sep 6, 2021
c3814a7
Merge branch 'master' into abhinav/deepvariant
maxulysse Sep 13, 2021
d4a3cb2
Merge branch 'master' into abhinav/deepvariant
maxulysse Sep 16, 2021
2bb6bfa
tweak the input channel shape and test data
abhi18av Sep 17, 2021
5a94e63
tweak input data [ci skip]
abhi18av Sep 17, 2021
fb92a64
Merge branch 'master' into abhinav/deepvariant
grst Sep 29, 2021
39a1dda
Merge branch 'master' into abhinav/deepvariant
abhi18av Oct 1, 2021
8bb19c4
Merge branch 'master' into abhinav/deepvariant
abhi18av Oct 26, 2021
57f82c1
Merge branch 'master' into abhinav/deepvariant
abhi18av Jan 10, 2022
9edc146
Merge branch 'master' into abhinav/deepvariant
FriederikeHanssen Jan 13, 2022
7f89c85
Merge branch 'master' into abhinav/deepvariant
abhi18av Jan 16, 2022
9a87d41
update for the new syntax
abhi18av Jan 16, 2022
f5bd8ab
remove the functions and rename meta vars
abhi18av Jan 16, 2022
99d58d6
Update the arguments mechanism
abhi18av Jan 16, 2022
3734309
update chr, region and checksum
abhi18av Jan 17, 2022
143d83e
Merge branch 'master' into abhinav/deepvariant
FriederikeHanssen Jan 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions modules/deepvariant/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
if (!args.filename.endsWith('.version.txt')) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
}
58 changes: 58 additions & 0 deletions modules/deepvariant/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName } from './functions'

// TODO nf-core: A module file SHOULD only define input and output files as command-line parameters.
// All other parameters MUST be provided as a string i.e. "options.args"
// where "params.options" is a Groovy Map that MUST be provided via the addParams section of the including workflow.
// Any parameters that need to be evaluated in the context of a particular sample
// e.g. single-end/paired-end data MUST also be defined and evaluated appropriately.

params.options = [:]
options = initOptions(params.options)

process DEEPVARIANT {
tag "$meta.id"
label 'process_high'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "bioconda::deepvariant=1.1.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/deepvariant:1.1.0--py36hf3e76ba_2"
} else {
// TODO update the bioconda container to work with run_deepvariant.sh script
// container "quay.io/biocontainers/deepvariant:1.1.0--py36hf3e76ba_2"
container "google/deepvariant:1.1.0"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not familiar with this but is there a reason why you commented out the container hosted by quay.io? If you use the statement google/deepvariant:1.1.0 should you include the host? docker.io/google/deepvariant:1.1.0

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason is that the quay version doesn't contain the new /opt/deepvariant/bin/run_deepvariant command which combines all other scripts.

Therefore, I was planning to get started with the official Google container and then transition to the quay one - what do you suggest?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update:

Internally, the exact commands used via the run_depvariant wrapper seem to be the following (for the command mentioned here)

time seq 0 1 | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples --mode calling --ref "/input/ucsc.hg19.chr20.unittest.fasta" --reads "/input/NA12878_S1.chr20.10_10p1mb.bam" --examples "/output/intermediate_results_dir/make_examples.tfrecord@2.gz" --gvcf "/output/intermediate_results_dir/gvcf.tfrecord@2.gz" --regions "chr20:10,000,000-10,010,000" --task {}

time /opt/deepvariant/bin/call_variants --outfile "/output/intermediate_results_dir/call_variants_output.tfrecord.gz" --examples "/output/intermediate_results_dir/make_examples.tfrecord@2.gz" --checkpoint "/opt/models/wgs/model.ckpt" --openvino_model_dir "/output/intermediate_results_dir"

time /opt/deepvariant/bin/postprocess_variants --ref "/input/ucsc.hg19.chr20.unittest.fasta" --infile "/output/intermediate_results_dir/call_variants_output.tfrecord.gz" --outfile "/output/output.vcf.gz" --nonvariant_site_tfrecord_path "/output/intermediate_results_dir/gvcf.tfrecord@2.gz" --gvcf_outfile "/output/output.g.vcf.gz"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Further updates:

I tried to completely rely upon the native make_examples.py command however, that doesn't seem to work with the bioconda based distribution.

dv_make_examples.py \
    --ref ucsc.hg19.chr20.unittest.fasta \
    --sample NA12878_S1.chr20.10_10p1mb \
    --reads NA12878_S1.chr20.10_10p1mb.bam \
    --gvcf test.g.vcf.gz \
    --regions "chr20:10,000,000-10,010,000" \
    --logdir "intermediate_results_dir/logs" \
    --examples "intermediate_results_dir/make_examples.tfrecord@2.gz" 

Output:

Academic tradition requires you to cite works you base your article on.
If you use programs that use GNU Parallel to process data for an article in a
scientific publication, please cite:

  Tange, O. (2021, June 22). GNU Parallel 20210622 ('Protasevich').
  Zenodo. https://doi.org/10.5281/zenodo.5013933

This helps funding further development; AND IT WON'T COST YOU A CENT.
If you pay 10000 EUR you should feel free to use GNU Parallel without citing.

More about funding GNU Parallel and the citation notice:
https://www.gnu.org/software/parallel/parallel_design.html#Citation-notice

To silence this citation notice: run 'parallel --citation' once.

sh: /usr/local/lib/libtinfo.so.6: no version information available (required by sh)

Computers / CPU cores / Max jobs to run
1:local / 4 / 1

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
ETA: 0s Left: 1 AVG: 0.00s  local:1/0/100%/0.0s sh: /usr/local/lib/libtinfo.so.6: no version information available (required by sh)
ETA: 0s Left: 1 AVG: 0.00s  local:1/0/100%/0.0s /bin/bash: /usr/local/lib/libtinfo.so.6: no version information available (required by /bin/bash)
sh: /usr/local/lib/libtinfo.so.6: no version information available (required by sh)
Traceback (most recent call last):
  File "/usr/local/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/share/deepvariant-1.2.0-0/binaries/DeepVariant/1.2.0/DeepVariant-1.2.0/make_examples.zip/__main__.py", line 375, in <module>
  File "/usr/local/share/deepvariant-1.2.0-0/binaries/DeepVariant/1.2.0/DeepVariant-1.2.0/make_examples.zip/__main__.py", line 348, in Main
  File "/usr/local/lib/python3.6/subprocess.py", line 287, in call
    with Popen(*popenargs, **kwargs) as p:
  File "/usr/local/lib/python3.6/subprocess.py", line 729, in __init__
    restore_signals, start_new_session)
  File "/usr/local/lib/python3.6/subprocess.py", line 1364, in _execute_child
    raise child_exception_type(errno_num, err_msg, err_filename)
FileNotFoundError: [Errno 2] No such file or directory: '/usr/bin/python3': '/usr/bin/python3'
parallel: This job failed:
/usr/local/bin/python /usr/local/share/deepvariant-1.2.0-0/binaries/DeepVariant/1.2.0/DeepVariant-1.2.0/make_examples.zip --mode calling --ref ucsc.hg19.chr20.unittest.fasta --reads NA12878_S1.chr20.10_10p1mb.bam --regions chr20:10,000,000-10,010,000 --gvcf test.g.vcf.gz/NA12878_S1.chr20.10_10p1mb.gvcf.tfrecord@1.gz --sample_name NA12878_S1.chr20.10_10p1mb --examples intermediate_results_dir/make_examples.tfrecord@2.gz/NA12878_S1.chr20.10_10p1mb.tfrecord@1.gz --task 0

For the time being, I'll continue with the Google docker based module dev.

}

input:
// TODO nf-core: Where applicable all sample-specific information e.g. "id", "single_end", "read_group"
// MUST be provided as an input via a Groovy Map called "meta".
// This information may not be required in some instances e.g. indexing reference genome files:
// https://github.com/nf-core/modules/blob/master/software/bwa/index/main.nf
tuple val(meta), path(bam), path(bai)
path fasta
path fai

output:
tuple val(meta), path("*.vcf.gz"), emit: vcf
tuple val(meta), path("*g.vcf.gz"), emit: gvcf
path "*.version.txt" , emit: version

script:
def software = getSoftwareName(task.process)
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
"""
/opt/deepvariant/bin/run_deepvariant \\
--ref=${fasta} \\
--reads=${bam} \\
--output_vcf=${prefix}.vcf.gz \\
--output_gvcf=${prefix}.g.vcf.gz \\
--num_shards=${task.cpus} \\
${options.args}

echo \$(/opt/deepvariant/bin/run_deepvariant --version) > ${software}.version.txt
"""

}
47 changes: 47 additions & 0 deletions modules/deepvariant/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: deepvariant
## TODO nf-core: Add a description of the module and list keywords
description: write your description here
keywords:
- sort
tools:
- deepvariant:
## TODO nf-core: Add a description and other details for the software below
description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data
homepage: None
documentation: None
tool_dev_url: None
doi: ""
licence: ['MIT']

## TODO nf-core: Add a description of all of the variables used as input
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
## TODO nf-core: Delete / customise this example input
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"

## TODO nf-core: Add a description of all of the variables used as output
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
## TODO nf-core: Delete / customise this example output
- bam:
type: file
description: Sorted BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"

authors:
- "@abhi18av"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ deeptools/plotprofile:
- modules/deeptools/plotprofile/**
- tests/modules/deeptools/plotprofile/**

deepvariant:
- modules/deepvariant/**
- tests/modules/deepvariant/**

delly/call:
- modules/delly/call/**
- tests/modules/delly/call/**
Expand Down
13 changes: 13 additions & 0 deletions tests/modules/deepvariant/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { DEEPVARIANT } from '../../../modules/deepvariant/main.nf' addParams( options: [:] )

workflow test_deepvariant {

input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true) ]
abhi18av marked this conversation as resolved.
Show resolved Hide resolved

DEEPVARIANT ( input )
}
9 changes: 9 additions & 0 deletions tests/modules/deepvariant/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## TODO nf-core: Please run the following command to build this file:
# nf-core modules create-test-yml deepvariant
- name: deepvariant
command: nextflow run ./tests/modules/deepvariant -entry test_deepvariant -c tests/config/nextflow.config
tags:
- deepvariant
files:
- path: output/deepvariant/test.bam
md5sum: e667c7caad0bc4b7ac383fd023c654fc
abhi18av marked this conversation as resolved.
Show resolved Hide resolved