Skip to content

Commit

Permalink
Add GRIDSS preprocessing (#175)
Browse files Browse the repository at this point in the history
* add gridss nf module

* add preprocess gridss BAM process and update I/O/Script section

* add gridss version and  docker

* add preprocess_BAM_GRIDSS

* add gridss_reference_diir param to template.config

* input preprocess channels; set variables; publish formatted filenames

* add gridss2 channels, ref and process call

* add gridss2 to algorithhms in template

* update schema YAML

* add gridss ref dir to schema

* Update CHANGELOG.md

* update metadata.yaml

* add jvmheap memory arg to gridss

* 4 CPUs and 10GB memory for gridss preprocessing

* change gridss ref dir to ref FASTA

* change gridss ref dir to ref FASTA

* reconfigure gridss ref files variable

* fix gridss ref file path

* store intermediate file files in their process and store sample files in sample dirs

* save logs tintask index dir

* add F32, F72 and M64 configs

* update memory in F32, F72 and M64

---------

Co-authored-by: Mootor <mmootor@ip-0A125937.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
  • Loading branch information
Faizal-Eeman and Mootor authored Oct 1, 2024
1 parent 94daa7c commit 253dd16
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 13 deletions.
5 changes: 2 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

## [Unreleased]
### Added
- Add GRIDSS2 preprocessing
- Add supported Nextflow version to `README.md`
- Add PlantUML diagram

### Changed
- Update PlantUML action to `v1.0.1`
- Update memory allocations in `M64.config`

### Added
- Add PlantUML diagram

---

## [6.1.0] - 2024-03-12
Expand Down
13 changes: 12 additions & 1 deletion config/F16.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ process {
memory = 1.GB
}

withName: preprocess_BAM_GRIDSS {
cpus = 4
memory = 10.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 16.GB
Expand Down Expand Up @@ -36,7 +47,7 @@ process {
}
}
}

withName: call_sSV_Manta {
cpus = 1
memory = 16.GB
Expand Down
13 changes: 12 additions & 1 deletion config/F32.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ process {
memory = 1.GB
}

withName: preprocess_BAM_GRIDSS {
cpus = 4
memory = 10.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 30.GB
Expand Down Expand Up @@ -36,7 +47,7 @@ process {
}
}
}

withName: call_sSV_Manta {
cpus = 1
memory = 30.GB
Expand Down
13 changes: 12 additions & 1 deletion config/F72.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ process {
memory = 1.GB
}

withName: preprocess_BAM_GRIDSS {
cpus = 4
memory = 15.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 30.GB
Expand Down Expand Up @@ -36,7 +47,7 @@ process {
}
}
}

withName: call_sSV_Manta {
cpus = 1
memory = 30.GB
Expand Down
13 changes: 12 additions & 1 deletion config/M64.config
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@ process {
memory = 1.GB
}

withName: preprocess_BAM_GRIDSS {
cpus = 4
memory = 100.GB
retry_strategy {
memory {
strategy = 'exponential'
operand = 2
}
}
}

withName: call_sSV_Delly {
cpus = 1
memory = 120.GB
Expand Down Expand Up @@ -36,7 +47,7 @@ process {
}
}
}

withName: call_sSV_Manta {
cpus = 1
memory = 30.GB
Expand Down
2 changes: 2 additions & 0 deletions config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,14 @@ params {
// Pipeline tool versions
delly_version = '1.2.6'
manta_version = '1.6.0'
gridss_version = '2.13.2'
bcftools_version = '1.15.1'
pipeval_version = '4.0.0-rc.2'

// Docker tool versions
docker_image_delly = "${-> params.docker_container_registry}/delly:${params.delly_version}"
docker_image_manta = "${-> params.docker_container_registry}/manta:${params.manta_version}"
docker_image_gridss = "${-> params.docker_container_registry}/gridss:${params.gridss_version}"
docker_image_bcftools = "${-> params.docker_container_registry}/bcftools:${params.bcftools_version}"
docker_image_validate = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}"
}
Expand Down
7 changes: 7 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ reference_fasta:
mode: 'r'
required: true
help: 'Absolute path to a reference FASTA file'
gridss_reference_fasta:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to GRIDSS2 reference FASTA file'
exclusion_file:
type: 'Path'
mode: 'r'
Expand All @@ -20,9 +25,11 @@ algorithm:
default:
- delly
- manta
- gridss2
choices:
- delly
- manta
- gridss2
output_dir:
type: 'Path'
mode: 'w'
Expand Down
3 changes: 2 additions & 1 deletion config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,14 @@ params {
blcds_registered_dataset = false

reference_fasta = "/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"
gridss_reference_fasta = "/hot/ref/tool-specific-input/GRIDSS-2.13.2/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta"

exclusion_file = "/hot/ref/tool-specific-input/Delly/hg38/human.hg38.excl.tsv"

output_dir = "where/to/save/outputs/"

// select the tool(s) to run
algorithm = [] // algorithm = ['delly', 'manta']
algorithm = [] // algorithm = ['delly', 'manta', 'gridss2']

save_intermediate_files = false

Expand Down
23 changes: 20 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Current Configuration:
DELLY: ${params.delly_version}
BCFtools: ${params.bcftools_version}
Manta: ${params.manta_version}
GRIDSS2: ${params.gridss_version}
PipeVal: ${params.pipeval_version}
------------------------------------
Expand All @@ -54,6 +55,9 @@ include { call_sSV_Delly; filter_sSV_Delly } from './module/delly' addParams(
include { call_sSV_Manta } from './module/manta' addParams(
workflow_output_dir: "${params.output_dir_base}/Manta-${params.manta_version}"
)
include { preprocess_BAM_GRIDSS } from './module/gridss' addParams(
workflow_output_dir: "${params.output_dir_base}/GRIDSS-${params.gridss_version}"
)
include { generate_sha512 as generate_sha512_BCFtools } from './module/sha512' addParams(
workflow_output_dir: "${params.output_dir_base}/DELLY-${params.delly_version}"
)
Expand All @@ -74,10 +78,15 @@ Channel.from(params.samples_to_process)
.map{ sample -> ['index': indexFile(sample.path)] + sample }
.set{ input_ch_samples_with_index }

Channel.from(params.samples_to_process)
.map{ sample -> [sample.id, sample.path, indexFile(sample.path)] }
.set{ gridss_ch }

input_ch_samples_with_index
.map{ sample -> [sample.path, sample.index] }
.flatten()
.set{ input_validation }

if (params.verbose){
input_validation.view()
}
Expand All @@ -96,6 +105,9 @@ if (params.verbose){

reference_fasta_index = "${params.reference_fasta}.fai"

// Collect GRIDSS reference files
gridss_reference_files = Channel.fromPath( "${params.gridss_reference_fasta}.*", checkIfExists: true ).collect()

workflow {
/**
* Validate the input bams
Expand All @@ -106,7 +118,6 @@ workflow {
name: 'input_validation.txt',
storeDir: "${params.output_dir_base}/validation/run_validate_PipeVal"
)

/**
* Call "delly call -x hg19.excl -o t1.bcf -g hg19.fa tumor1.bam normal1.bam" per paired (tumor sample, normal sample)
* The sv are stored in call_sSV_Delly.out.nt_call_bcf
Expand Down Expand Up @@ -158,7 +169,6 @@ workflow {
call_sSV_Delly.out.nt_call_bcf_csi,
call_sSV_Delly.out.tumor_id
)

/**
* Filter the output bcf from filter_sSV_Delly.
* The default filter_condition is "FILTER=='PASS'", which filters out NonPass calls.
Expand All @@ -168,7 +178,6 @@ workflow {
params.filter_condition,
call_sSV_Delly.out.tumor_id
)

/**
* Generate one sha512 checksum for DELLY's output files.
*/
Expand All @@ -189,4 +198,12 @@ workflow {
call_sSV_Manta.out.manta_vcfs.flatten()
)
}
if ('gridss2' in params.algorithm) {
preprocess_BAM_GRIDSS(
gridss_ch,
params.gridss_reference_fasta,
gridss_reference_files
)
}

}
4 changes: 2 additions & 2 deletions metadata.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
category: "pipeline"
description: "Nextflow pipeline to call somatic structural variants using DELLY and Manta"
description: "Nextflow pipeline to call somatic structural variants using DELLY, Manta and GRIDSS2"
maintainers: "Boutros Lab Infrastructure <BoutrosLabInfrastructure@mednet.ucla.edu>"
languages: ["Nextflow", "Docker"]
dependencies: ["Java", "Nextflow", "Docker"]
references: "https://uclahs.box.com/s/qfzr99sc8ntmfddn30ii62wx4273utoz"
tools: ["Delly:v1.2.6", "Manta:v1.6.0", "BCFtools:v1.15.1", "PipeVal:v4.0.0-rc.2"]
tools: ["Delly:v1.2.6", "Manta:v1.6.0", "GRIDSS2:v2.13.2", "BCFtools:v1.15.1", "PipeVal:v4.0.0-rc.2"]
58 changes: 58 additions & 0 deletions module/gridss.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env nextflow

log.info """\
------------------------------------
G R I D S S 2
------------------------------------
Docker Images:
- docker_image_gridss: ${params.docker_image_gridss}
"""

include { generate_standard_filename; sanitize_string } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf'

process preprocess_BAM_GRIDSS {
container params.docker_image_gridss

publishDir "${params.workflow_output_dir}/intermediate/${task.process.replace(':', '/')}",
pattern: "${bam_name}.gridss.working/*",
mode: "copy",
saveAs: {
"${output_filename}.gridss.working/${output_filename}.${sanitize_string(file(it).getName().replace("${bam_name}.", ""))}"
}

publishDir "${params.log_output_dir}/process-log",
pattern: ".command.*",
mode: "copy",
saveAs: { "${task.process.replace(':', '/')}/${task.process}-${task.index}/log${file(it).getName()}" }

input:
tuple(val(sample_id), path(sample_bam), path(sample_index))
path(gridss_reference_fasta)
path(gridss_reference_files)

output:
path "${bam_name}.gridss.working/*", emit: gridss_preprocess
path ".command.*"

script:
gridss_mem = "${task.memory.toGiga()}g"
gridss_jar = "/usr/local/share/gridss-${params.gridss_version}-1/gridss.jar"
bam_name = file(sample_bam).getName()
output_filename = generate_standard_filename(
"GRIDSS2-${params.gridss_version}",
params.dataset_id,
sample_id,
[:]
)

"""
set -euo pipefail
gridss \
-r ${gridss_reference_fasta} \
-j ${gridss_jar} \
-s preprocess \
-t ${task.cpus} \
--jvmheap ${gridss_mem} \
${sample_bam}
"""
}

0 comments on commit 253dd16

Please sign in to comment.