Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace input CSV with YAML and parse sample ID from BAM #106

Merged
merged 20 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
---

## [Unreleased]
- Update `README.md` to clarify adjustable parameters and note lab default values.
### Added
- YAML input

### Changed
- Parse sample ID from tumor BAM for output directory naming
- Update `README.md` to clarify adjustable parameters and note lab default values

### Removed
- CSV input

---

Expand Down
91 changes: 91 additions & 0 deletions config/custom_schema_types.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* This custom schema namespace implements a custom type for checking input BAMs for call-sSV
*/
custom_schema_types {
allowed_input_types = [
'BAM'
]
allowed_bam_types = [
'normal',
'tumor'
]

/**
* Check that input types are in allowed list
*/
check_input_type_keys = { List given, String name, List choices=custom_schema_types.allowed_input_types ->
for (elem in given) {
if (!(elem in choices)) {
throw new Exception("Invalid paramter ${name}. Valid types: ${choices}.")
}
}
}

/**
* Check if given input is a Namespace
*/
check_if_namespace = { val, String name ->
if (!(val in Map)) {
throw new Exception("${name} should be a Namespace, not ${val.getClass()}.")
}
}

/**
* Check if given input is a list
*/
check_if_list = { val, String name ->
if (!(val in List || val in Set)) {
throw new Exception("${name} should be a List, not ${val.getClass()}.")
}
}

/**
* Check that input is namespace of expected types
*/
check_input_namespace = { Map options, String name, Map properties ->
// Check parameters keys
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
custom_schema_types.check_input_type_keys(given_keys, name)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check namespace BAM
*/
check_bam_namespace = { Map options, String name, Map properties ->
custom_schema_types.check_if_namespace(options[name], name)
def given_keys = options[name].keySet() as ArrayList
if (given_keys.size() <= 0) {
throw new Exception("No inputs provided! Please provide inputs in the CSV or YAML.")
}
custom_schema_types.check_input_type_keys(given_keys, name, custom_schema_types.allowed_bam_types)

options[name].each { entry ->
def entry_as_map = [:]
entry_as_map[entry.key] = entry.value
schema.validate_parameter(entry_as_map, entry.key, properties.elements[entry.key])
}
}

/**
* Check if proper BAM entry list
*/
check_bam_list = { Map options, String name, Map properties ->
custom_schema_types.check_if_list(options[name], name)
for (item in options[name]) {
schema.check_path(item, 'r')
}
}

types = [
'InputNamespace': custom_schema_types.check_input_namespace,
'InputBAMNamespace': custom_schema_types.check_bam_namespace,
'BAMEntryList': custom_schema_types.check_bam_list
]
}
2 changes: 1 addition & 1 deletion config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ params {
delly_version = '1.1.3'
manta_version = '1.6.0'
bcftools_version = '1.15.1'
pipeval_version = '3.0.0'
pipeval_version = '4.0.0-rc.2'

// Docker tool versions
docker_image_delly = "${-> params.docker_container_registry}/delly:${params.delly_version}"
Expand Down
46 changes: 30 additions & 16 deletions config/methods.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"
import nextflow.util.SysHelper
includeConfig "../external/pipeline-Nextflow-config/config/bam/bam_parser.config"
includeConfig "../external/pipeline-Nextflow-config/config/methods/common_methods.config"
includeConfig "../external/pipeline-Nextflow-config/config/schema/schema.config"

includeConfig "../external/pipeline-Nextflow-config/config/retry/retry.config"

methods {
check_permissions = { path ->
Expand All @@ -19,15 +21,26 @@ methods {
}
}

set_ids_from_bams = {
params.samples_to_process = [] as Set
params.input.BAM.each { k, v ->
v.each { bam_path ->
def bam_header = bam_parser.parse_bam_header(bam_path)
def sm_tags = bam_header['read_group'].collect{ it['SM'] }.unique()

set_output_dir = {
def sample

// assumes that project and samples name are in the pipeline.config
def reader = new FileReader(params.input_csv)
reader.splitEachLine(',') { parts -> [sample = parts[1].split('/')[-1].split('.bam')[0]] }
if (sm_tags.size() != 1) {
throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.")
}
Faizal-Eeman marked this conversation as resolved.
Show resolved Hide resolved
params.samples_to_process.add(['id': sm_tags[0], 'path': bam_path, 'sample_type': k])
}
}
}

params.sample = "${sample}"
set_output_dir = {
params.sample = params.samples_to_process
.findAll{ it.sample_type == 'tumor' }
.collect{ it.id }
.join()

Faizal-Eeman marked this conversation as resolved.
Show resolved Hide resolved
params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample}"
}
Expand Down Expand Up @@ -98,7 +111,7 @@ methods {

set_resources_allocation = {
// Function to ensure that resource requirements don't go beyond
// a maximum limit
// a maximum limit
node_cpus = params.max_cpus
node_memory_GB = params.max_memory.toGiga()
// Load base.config by default for all pipelines
Expand Down Expand Up @@ -185,7 +198,7 @@ methods {

timeline.enabled = true
timeline.file = "${params.log_output_dir}/nextflow-log/timeline.html"

report.enabled = true
report.file = "${params.log_output_dir}/nextflow-log/report.html"
}
Expand All @@ -202,16 +215,17 @@ methods {

// Set up env, timeline, trace, and report above.
setup = {
methods.set_env()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
schema.validate()
methods.set_ids_from_bams()
methods.set_resources_allocation()
methods.set_output_dir()
methods.set_log_output_dir()
methods.check_permissions(params.log_output_dir)
methods.set_env()
methods.set_resources_allocation()
methods.set_pipeline_logs()
methods.set_process()
methods.set_docker_sudo()
methods.set_pipeline_logs()
retry.setup_retry()
schema.validate()
}
}

99 changes: 58 additions & 41 deletions config/schema.yaml
Original file line number Diff line number Diff line change
@@ -1,51 +1,68 @@
---
input_csv:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to the input CSV file'
sample_id:
type: 'String'
required: true
help: 'Sample ID'
reference_fasta:
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to a reference FASTA file'
type: 'Path'
mode: 'r'
required: true
help: 'Absolute path to a reference FASTA file'
exclusion_file:
type: 'Path'
mode: 'r'
required: true
help: 'Absoulte path to an exclusion file'
type: 'Path'
mode: 'r'
required: true
help: 'Absoulte path to an exclusion file'
algorithm:
type: 'List'
required: true
help: 'List of SV caller(s) for calling'
default:
- delly
- manta
choices:
- delly
- manta
type: 'List'
required: true
help: 'List of available somatic SV callers'
default:
- delly
- manta
choices:
- delly
- manta
output_dir:
type: 'Path'
mode: 'w'
required: true
help: 'Absolute path to output directory'
type: 'Path'
mode: 'w'
required: true
help: 'Absolute path to output directory'
dataset_id:
type: 'String'
required: true
help: 'Dataset identifier'
type: 'String'
required: true
help: 'Dataset identifier'
map_qual:
type: 'Integer'
required: true
default: 20
type: 'Integer'
required: true
default: 20
min_clique_size:
type: 'Integer'
required: true
default: 5
type: 'Integer'
required: true
default: 5
mad_cutoff:
type: 'Integer'
required: true
default: 15
type: 'Integer'
required: true
default: 15
filter_condition:
type: 'String'
required: true
default: "FILTER=='PASS'"
type: 'String'
required: true
default: "FILTER=='PASS'"
input:
type: 'InputNamespace'
required: true
help: 'Input samples'
elements:
BAM:
type: 'InputBAMNamespace'
required: true
help: 'Input BAMs for somatic structural variant calling'
elements:
normal:
type: 'BAMEntryList'
required: false
help: 'Input normal BAMs'
tumor:
type: 'BAMEntryList'
required: false
help: 'Input tumor BAMs'
2 changes: 1 addition & 1 deletion external/pipeline-Nextflow-config
2 changes: 0 additions & 2 deletions input/call-sSV-input.csv

This file was deleted.

8 changes: 8 additions & 0 deletions input/call-sSV-input.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
sample_id: "sample_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
tumor:
- "/abosolute/path/to/BAM"
tyamaguchi-ucla marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading