-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from CCBR/filter-blacklist-subwf
Create subworkflow to filter reads from blacklisted regions
- Loading branch information
Showing
19 changed files
with
206 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
|
||
|
||
Original files downloaded from https://github.com/nf-core/test-datasets/tree/modules/data/genomics/sarscov2/illumina/fastq | ||
|
||
How subsets were created: | ||
|
||
```sh | ||
grep "^@" test_1.fastq | wc -l | ||
grep -n "^@" test_1.fastq | ||
head -n 40 test_1.fastq > test_1.subset.fastq | ||
grep -n "^@" test_2.fastq | ||
head -n 40 test_2.fastq > test_2.subset.fastq | ||
``` | ||
|
||
Check tails of subset files to make sure they don't end with fastq headers: | ||
|
||
```sh | ||
tail -n 1 *subset.fastq | ||
``` |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
|
||
|
||
include { BWA_MEM } from '../../../modules/CCBR/bwa/mem' | ||
include { SAMTOOLS_FILTERALIGNED } from '../../../modules/CCBR/samtools/filteraligned' | ||
include { PICARD_SAMTOFASTQ } from '../../../modules/CCBR/picard/samtofastq' | ||
|
||
workflow FILTER_BLACKLIST { | ||
take: | ||
ch_fastq_input // channel: [ val(meta), path(fastq) ] | ||
ch_blacklist_index // channel: [ val(meta), path(bwa/*) ] | ||
|
||
main: | ||
ch_versions = Channel.empty() | ||
|
||
BWA_MEM ( ch_fastq_input, ch_blacklist_index ) | ||
SAMTOOLS_FILTERALIGNED( BWA_MEM.out.bam ) | ||
PICARD_SAMTOFASTQ( BWA_MEM.out.bam ) | ||
|
||
ch_versions = ch_versions.mix( | ||
BWA_MEM.out.versions, | ||
SAMTOOLS_FILTERALIGNED.out.versions, | ||
PICARD_SAMTOFASTQ.out.versions | ||
) | ||
|
||
emit: | ||
reads = PICARD_SAMTOFASTQ.out.reads // channel: [ val(meta), path(fastq) ] | ||
versions = ch_versions // channel: [ path(versions.yml) ] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
name: filter_blacklist | ||
description: Filter out reads that align to an index | ||
keywords: | ||
- bwa | ||
- samtools | ||
- fastq | ||
- bam | ||
- filter | ||
- blacklist | ||
components: | ||
- bwa/mem | ||
- samtools/filteraligned | ||
- picard/samtofastq | ||
input: | ||
- ch_fastq_input: | ||
description: | | ||
A channel containing fastq files | ||
- ch_blacklist_index: | ||
description: | | ||
A BWA index created by running BWA/INDEX on a fasta file of blacklisted regions/ | ||
output: | ||
- reads: | ||
description: | | ||
Reads from the fastq files that do not align to the blacklist | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
authors: | ||
- "@kelly-sovacool" | ||
maintainers: | ||
- "@kelly-sovacool" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
params { | ||
test_data_base = 'https://raw.githubusercontent.com/CCBR/nf-modules/filter-blacklist-subwf/' | ||
|
||
test_data { | ||
test_1_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz" | ||
test_2_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_2.fastq.gz" | ||
test_1_subset_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_1.subset.fastq.gz" | ||
test_2_subset_fastq_gz = "${params.test_data_base}/data/genomics/sarscov2/illumina/fastq/test_2.subset.fastq.gz" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
nextflow.enable.dsl = 2 | ||
|
||
include { BWA_INDEX } from "../../../../modules/CCBR/bwa/index/main" | ||
include { FILTER_BLACKLIST } from "../../../../subworkflows/CCBR/filter_blacklist/main" | ||
|
||
|
||
workflow test_filter_blacklist_single { | ||
input = [ [ id:'test', single_end:true ], // meta map | ||
file(params.test_data['test_1_fastq_gz'], checkIfExists: true) | ||
] | ||
blacklist_reads = [ | ||
[ id:'test', single_end:true ], // meta map | ||
file(params.test_data['test_1_subset_fastq_gz'], checkIfExists: true) | ||
] | ||
BWA_INDEX(blacklist_reads) | ||
FILTER_BLACKLIST(input, BWA_INDEX.out.index) | ||
} | ||
|
||
workflow test_filter_blacklist_paired { | ||
input = [ [ id:'test', single_end:false ], // meta map | ||
[ file(params.test_data['test_1_fastq_gz'], checkIfExists: true), | ||
file(params.test_data['test_2_fastq_gz'], checkIfExists: true) ] | ||
] | ||
blacklist_reads = [ | ||
[ id:'test', single_end:false ], // meta map | ||
file(params.test_data['test_1_subset_fastq_gz'], checkIfExists: true) | ||
] | ||
BWA_INDEX(blacklist_reads) | ||
FILTER_BLACKLIST(input, BWA_INDEX.out.index) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
process { | ||
|
||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
- name: filter_blacklist test_filter_blacklist_single | ||
command: nextflow run ./tests/subworkflows/CCBR/filter_blacklist/main.nf -entry test_filter_blacklist_single -c tests/config/nextflow.config -c tests/config/test_data_CCBR.config | ||
tags: | ||
- subworkflows | ||
- subworkflows/filter_blacklist | ||
- picard | ||
- picard/samtofastq | ||
- samtools | ||
- samtools/filteraligned | ||
- bwa | ||
- bwa/mem | ||
files: | ||
- path: output/picard/test.fastq.gz | ||
|
||
- name: filter_blacklist test_filter_blacklist_paired | ||
command: nextflow run ./tests/subworkflows/CCBR/filter_blacklist/main.nf -entry test_filter_blacklist_paired -c tests/config/nextflow.config -c tests/config/test_data_CCBR.config | ||
tags: | ||
- subworkflows | ||
- subworkflows/filter_blacklist | ||
- picard | ||
- picard/samtofastq | ||
- samtools | ||
- samtools/filteraligned | ||
- bwa | ||
- bwa/mem | ||
files: | ||
- path: output/picard/test_1.fastq.gz | ||
- path: output/picard/test_2.fastq.gz | ||
- path: output/picard/test.unpaired.fastq.gz | ||
|
||
- name: filter_blacklist test_filter_blacklist_single stub | ||
command: nextflow run ./tests/subworkflows/CCBR/filter_blacklist/main.nf -entry test_filter_blacklist_single -c tests/config/nextflow.config -c tests/config/test_data_CCBR.config -stub | ||
tags: | ||
- subworkflows | ||
- subworkflows/filter_blacklist | ||
- picard | ||
- picard/samtofastq | ||
- samtools | ||
- samtools/filteraligned | ||
- bwa | ||
- bwa/mem | ||
files: | ||
- path: output/picard/test.fastq.gz |
13 changes: 13 additions & 0 deletions
13
tests/subworkflows/CCBR/filter_blacklist/test_filter_blacklist.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import gzip | ||
import pathlib | ||
import pytest | ||
|
||
|
||
@pytest.mark.workflow("filter_blacklist test_filter_blacklist_paired") | ||
def test_unpaired_is_empty(workflow_dir): | ||
unpaired_fastq = pathlib.Path( | ||
workflow_dir, "output", "picard", "test.unpaired.fastq.gz" | ||
) | ||
with gzip.open(unpaired_fastq, "rt") as infile: | ||
lines = infile.readlines() | ||
assert len(lines) == 0 |