Skip to content

Commit

Permalink
Replace input CSV with YAML and parse sample ID from BAM (#106)
Browse files Browse the repository at this point in the history
* update submodules

* remove CSV and add YAML input

* add custom schema

* udate ouput dir structure set up

* change patient_id to sample_id

* update pipeval

* remove empty line

* update channels to parse input from YAML

* Update CHANGELOG.md

* fix YAML linting

* fix YAML linting

* update help comments

* remove redundant code

* update comment in config

* update logging

* use params.samples_to_process to parse sample id

* set sample ID from tumor ID extracted using params.samples_to_process

* remove redundant code

* check for multiple tumor samples before setting sample param

* fix typo in error

---------

Co-authored-by: Mootor <mmootor@ip-0A125250.rhxrlfvjyzbupc03cc22jkch3c.xx.internal.cloudapp.net>
  • Loading branch information
Faizal-Eeman and Mootor authored Aug 1, 2023
1 parent 0c8def0 commit 9917c17
Show file tree
Hide file tree
Showing 11 changed files with 229 additions and 131 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
---

## [Unreleased]
- Update `README.md` to clarify adjustable parameters and note lab default values.
### Added
- YAML input

### Changed
- Parse sample ID from tumor BAM for output directory naming
- Update `README.md` to clarify adjustable parameters and note lab default values

### Removed
- CSV input

---

Expand Down
91 changes: 91 additions & 0 deletions config/custom_schema_types.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* This custom schema namespace implements a custom type for checking input BAMs for call-sSV
*/
custom_schema_types {
allowed_input_types = [
'BAM'
]
allowed_bam_types = [
'normal',
'tumor'
]

/**
* Check that input types are in allowed list
*/
check_input_type_keys = { List given, String name, List choices=custom_schema_types.allowed_input_types ->
for (elem in given) {
if (!(elem in choices)) {
throw new Exception("Invalid paramter ${name}. Valid types: ${choices}.")
}
}
}

/**
* Check if given input is a Namespace
*/
check_if_namespace = { val, String name ->
if (!(val in Map)) {
throw new Exception("${name} should be a Namespace, not ${val.getClass()}.")
}
}

/**
* Check if given input is a list
*/
check_if_list = { val, String name ->
if (!(val in List || val in Set)) {
throw new Exception("${name} should be a List, not ${val.getClass()}.")
}
}

/**
* Check that input is namespace of expected types
*/
check_input_namespace = { Map options, String name, Map properties ->
// Check parameters keys
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
custom_schema_types.check_input_type_keys(given_keys, name)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check namespace BAM
*/
check_bam_namespace = { Map options, String name, Map properties ->
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
if (given_keys.size() <= 0) {
throw new Exception("No inputs provided! Please provide inputs in the CSV or YAML.")
}
custom_schema_types.check_input_type_keys(given_keys, name, custom_schema_types.allowed_bam_types)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check if proper BAM entry list
*/
check_bam_list = { Map options, String name, Map properties ->
custom_schema_types.check_if_list(options[name], name)
for (item in options[name]) {
schema.check_path(item, 'r')
}
}

types = [
'InputNamespace': custom_schema_types.check_input_namespace,
'InputBAMNamespace': custom_schema_types.check_bam_namespace,
'BAMEntryList': custom_schema_types.check_bam_list
]
}
2 changes: 1 addition & 1 deletion config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ params {
delly_version = '1.1.3'
manta_version = '1.6.0'
bcftools_version = '1.15.1'
pipeval_version = '3.0.0'
pipeval_version = '4.0.0-rc.2'

// Docker tool versions
docker_image_delly = "${-> params.docker_container_registry}/delly:${params.delly_version}"
Expand Down
48 changes: 33 additions & 15 deletions config/methods.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"
import nextflow.util.SysHelper
includeConfig "../external/pipeline-Nextflow-config/config/bam/bam_parser.config"
includeConfig "../external/pipeline-Nextflow-config/config/methods/common_methods.config"
includeConfig "../external/pipeline-Nextflow-config/config/schema/schema.config"

includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"

methods {
check_permissions = { path ->
Expand All @@ -19,15 +21,31 @@ methods {
}
}

set_ids_from_bams = {
params.samples_to_process = [] as Set
params.input.BAM.each { k, v ->
v.each { bam_path ->
def bam_header = bam_parser.parse_bam_header(bam_path)
def sm_tags = bam_header['read_group'].collect{ it['SM'] }.unique()

if (sm_tags.size() != 1) {
throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.")
}
params.samples_to_process.add(['id': sm_tags[0], 'path': bam_path, 'sample_type': k])
}
}
}

set_output_dir = {
def sample
sample = params.samples_to_process
.findAll{ it.sample_type == 'tumor' }
.collect{ it.id }

// assumes that project and samples name are in the pipeline.config
def reader = new FileReader(params.input_csv)
reader.splitEachLine(',') { parts -> [sample = parts[1].split('/')[-1].split('.bam')[0]] }
if (sample.size() != 1) {
throw new Exception("${params.samples_to_process}\n\n Multiple Tumor BAMs found in the input! Please run pipeline one Tumor sample at a time.")
}

params.sample = "${sample}"
params.sample = sample[0]

params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
}
Expand Down Expand Up @@ -98,7 +116,7 @@ methods {

set_resources_allocation = {
// Function to ensure that resource requirements don't go beyond
// a maximum limit
// a maximum limit
node_cpus = params.max_cpus
node_memory_GB = params.max_memory.toGiga()
// Load base.config by default for all pipelines
Expand All @@ -125,7 +143,6 @@ methods {
}
}


/**
* Check the permissions and existence of workDir.
* If it doesn't exist, recursively find first existing directory and check write permission.
Expand Down Expand Up @@ -185,7 +202,7 @@ methods {

timeline.enabled = true
timeline.file = "${params.log_output_dir}/nextflow-log/timeline.html"

report.enabled = true
report.file = "${params.log_output_dir}/nextflow-log/report.html"
}
Expand All @@ -202,16 +219,17 @@ methods {

// Set up env, timeline, trace, and report above.
setup = {
methods.set_env()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
schema.validate()
methods.set_ids_from_bams()
methods.set_resources_allocation()
methods.set_output_dir()
methods.set_log_output_dir()
methods.check_permissions(params.log_output_dir)
methods.set_env()
methods.set_resources_allocation()
methods.set_pipeline_logs()
methods.set_process()
methods.set_docker_sudo()
methods.set_pipeline_logs()
retry.setup_retry()
schema.validate()
}
}

99 changes: 58 additions & 41 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -1,51 +1,68 @@
---
input_csv:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to the input CSV file'
sample_id:
type: 'String'
required: true
help: 'Sample ID'
reference_fasta:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to a reference FASTA file'
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to a reference FASTA file'
exclusion_file:
type: 'Path'
mode: 'r'
required: true
help: 'Absoulte path to an exclusion file'
type: 'Path'
mode: 'r'
required: true
help: 'Absoulte path to an exclusion file'
algorithm:
type: 'List'
required: true
help: 'List of SV caller(s) for calling'
default:
- delly
- manta
choices:
- delly
- manta
type: 'List'
required: true
help: 'List of available somatic SV callers'
default:
- delly
- manta
choices:
- delly
- manta
output_dir:
type: 'Path'
mode: 'w'
required: true
help: 'Absolute path to output directory'
type: 'Path'
mode: 'w'
required: true
help: 'Absolute path to output directory'
dataset_id:
type: 'String'
required: true
help: 'Dataset identifier'
type: 'String'
required: true
help: 'Dataset identifier'
map_qual:
type: 'Integer'
required: true
default: 20
type: 'Integer'
required: true
default: 20
min_clique_size:
type: 'Integer'
required: true
default: 5
type: 'Integer'
required: true
default: 5
mad_cutoff:
type: 'Integer'
required: true
default: 15
type: 'Integer'
required: true
default: 15
filter_condition:
type: 'String'
required: true
default: "FILTER=='PASS'"
type: 'String'
required: true
default: "FILTER=='PASS'"
input:
type: 'InputNamespace'
required: true
help: 'Input samples'
elements:
BAM:
type: 'InputBAMNamespace'
required: true
help: 'Input BAMs for somatic structural variant calling'
elements:
normal:
type: 'BAMEntryList'
required: false
help: 'Input normal BAMs'
tumor:
type: 'BAMEntryList'
required: false
help: 'Input tumor BAMs'
2 changes: 1 addition & 1 deletion external/pipeline-Nextflow-config
2 changes: 0 additions & 2 deletions input/call-sSV-input.csv

This file was deleted.

8 changes: 8 additions & 0 deletions input/call-sSV-input.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
sample_id: "sample_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
tumor:
- "/abosolute/path/to/BAM"
Loading

0 comments on commit 9917c17

Please sign in to comment.