Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
ae0f375
New sbwf
LouisLeNezet Jul 23, 2024
4c7945b
Merge branch 'master' into bam_downsample_samtools
LouisLeNezet Jul 23, 2024
9bf44c8
Update subworkflows/nf-core/bam_downsample_samtools/tests/main.nf.test
LouisLeNezet Jul 23, 2024
b7ad89a
Update subworkflows/nf-core/bam_downsample_samtools/tests/main.nf.test
LouisLeNezet Jul 23, 2024
bd3a2db
Update subworkflows/nf-core/bam_downsample_samtools/main.nf
LouisLeNezet Jul 23, 2024
bd0b4b8
Update subworkflows/nf-core/bam_downsample_samtools/main.nf
LouisLeNezet Jul 23, 2024
9c5c474
Ch_depth to value channel
LouisLeNezet Jul 23, 2024
98467e6
Merge branch 'bam_downsample_samtools' of github.com:LouisLeNezet/mod…
LouisLeNezet Jul 23, 2024
609f9d1
Fix sbwf
LouisLeNezet Jul 23, 2024
7044d67
Remove file
LouisLeNezet Jul 23, 2024
002fa51
Merge branch 'master' into bam_downsample_samtools
LouisLeNezet Jul 23, 2024
e06551e
Update subworkflows/nf-core/bam_downsample_samtools/main.nf
LouisLeNezet Jul 23, 2024
964e422
Update subworkflows/nf-core/bam_downsample_samtools/tests/nextflow.co…
LouisLeNezet Jul 23, 2024
377edab
Update subworkflows/nf-core/bam_downsample_samtools/main.nf
LouisLeNezet Jul 23, 2024
40685b8
Update subworkflows/nf-core/bam_downsample_samtools/meta.yml
LouisLeNezet Jul 23, 2024
1246a72
Update subworkflows/nf-core/bam_downsample_samtools/main.nf
LouisLeNezet Jul 23, 2024
1957ef3
Change subsample command way of adding
Jul 23, 2024
f2d71d8
Change depth channel organisation
Jul 23, 2024
fe954fd
Fix test
Jul 23, 2024
a6aa6f0
Put back yaml
Jul 23, 2024
9e7b665
Put back yaml
Jul 23, 2024
a0549d2
Update snapshot and add depth to meta
Jul 23, 2024
9bfaa1f
Update subworkflows/nf-core/bam_downsample_samtools/meta.yml
LouisLeNezet Jul 24, 2024
3f8a322
Change name and fix output channel
Jul 24, 2024
64bfdf6
Merge branch 'master' into bam_downsample_samtools
LouisLeNezet Jul 24, 2024
ac58fa4
Update meta.yml
LouisLeNezet Jul 24, 2024
285b96b
Update snapshot
LouisLeNezet Jul 24, 2024
e771210
Update meta.yml
LouisLeNezet Jul 29, 2024
6385c6b
Update main.nf
LouisLeNezet Jul 29, 2024
c6fef92
Merge branch 'master' into bam_downsample_samtools
LouisLeNezet Jul 29, 2024
ab2a6a4
Merge branch 'master' into bam_downsample_samtools
LouisLeNezet Aug 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions subworkflows/nf-core/bam_subsampledepth_samtools/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
include { SAMTOOLS_DEPTH } from '../../../modules/nf-core/samtools/depth'
include { GAWK } from '../../../modules/nf-core/gawk'
include { SAMTOOLS_VIEW } from '../../../modules/nf-core/samtools/view'

workflow BAM_SUBSAMPLEDEPTH_SAMTOOLS {

take:
ch_bam_bai_depth // channel: [ val(meta), path(bam), path(bai), val(depth) ]
ch_fasta // channel: [ val(meta), path(fasta) ]

main:
ch_versions = Channel.empty()

// Compute mean depth
SAMTOOLS_DEPTH(ch_bam_bai_depth.map{ it[0..2] }, [[], []])
ch_versions = ch_versions.mix(SAMTOOLS_DEPTH.out.versions.first())

// Use GAWK to get mean depth
GAWK(SAMTOOLS_DEPTH.out.tsv, [])
ch_versions = ch_versions.mix(GAWK.out.versions.first())

// Compute downsampling factor
ch_mean_depth = GAWK.out.output
.splitCsv(header: false, sep:'\t')
.map{ meta, row ->
[ meta, row[0] as Float ]
}

// Add all necessary channel for downsampling
ch_input_subsample = ch_bam_bai_depth
.join(ch_mean_depth)
.map{ meta, bam, index, depth, mean ->
[ meta + ['subsample_fraction': depth as Float / mean, 'depth': depth ], bam, index ]
}

// Downsample
SAMTOOLS_VIEW(
ch_input_subsample,
ch_fasta,
[]
)
ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first())

// Aggregate bam and index
ch_bam_subsampled = SAMTOOLS_VIEW.out.bam.mix(SAMTOOLS_VIEW.out.cram, SAMTOOLS_VIEW.out.sam)
.join(SAMTOOLS_VIEW.out.bai.mix(SAMTOOLS_VIEW.out.crai, SAMTOOLS_VIEW.out.csi))

emit:
bam_subsampled = ch_bam_subsampled // channel: [ val(meta), path(bam), path(csi) ]
versions = ch_versions // channel: [ path(versions.yml) ]
}
42 changes: 42 additions & 0 deletions subworkflows/nf-core/bam_subsampledepth_samtools/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
name: "bam_subsampledepth_samtools"
description: Subsample a BAM/CRAM/SAM file using samtools to a given mean depth
keywords:
- subsample
- bam
- sam
- cram
components:
- samtools/depth
- samtools/view
- gawk
input:
- ch_bam:
type: file
description: |
The input channel containing the BAM/CRAM/SAM files and their indexes and the depth at which to subsample them.
Structure: [ val(meta), path(bam), path(bai), val(depth) ]
pattern: "*.{bam,cram,sam}"
- ch_fasta:
type: file
description: |
The reference genome channel containing the fasta files and its index
Structure: [ val(meta), path(fasta), path(fai) ]
pattern: "*.{fa(sta)?}"
output:
- bam_subsampled:
type: file
description: |
Channel containing subsampled BAM/CRAM/SAM files and their indexes
Structure: [ val(meta), path(bam), path(csi) ]
pattern: "*.{bam,cram,sam}"
- versions:
type: file
description: |
File containing software versions
Structure: [ path(versions.yml) ]
pattern: "versions.yml"
authors:
- "@louislenezet"
maintainers:
- "@louislenezet"
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
nextflow_workflow {

name "Test Subworkflow BAM_SUBSAMPLEDEPTH_SAMTOOLS"
script "../main.nf"
config "./nextflow.config"

workflow "BAM_SUBSAMPLEDEPTH_SAMTOOLS"

tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/bam_subsampledepth_samtools"

tag "samtools"
tag "samtools/depth"
tag "samtools/view"
tag "gawk"

test("Downsample to 4X and 2X") {
when {
workflow {
"""
input[0] = Channel.fromList([
[
[id: "NA12878"],
file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam", checkIfExist:true),
file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA12878/NA12878.s.bam.bai", checkIfExist:true),
],
[
[id: "NA19401"],
file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam", checkIfExist:true),
file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/individuals/NA19401/NA19401.s.bam.bai", checkIfExist:true),
],
]). combine( Channel.of(2, 4))
input[1] = Channel.of([
[id: "GRCh38"],
file("https://raw.githubusercontent.com/nf-core/test-datasets/phaseimpute/hum_data/reference_genome/GRCh38.s.fa.gz", checkIfExist:true),
]).collect()
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
workflow.out,
workflow.out.bam_subsampled.collect{ [it[0], bam(it[1]).getReads().size()] }
).match() }
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{
"Downsample to 4X and 2X": {
"content": [
{
"0": [
[
{
"id": "NA12878",
"subsample_fraction": 0.06201800538763442,
"depth": 2
},
"NA12878.bam:md5,c998482010b83365a4889c3fa75ac578",
"NA12878.bam.csi:md5,0b4abd161cdcc2b51571c9cc651df354"
],
[
{
"id": "NA12878",
"subsample_fraction": 0.12403601077526884,
"depth": 4
},
"NA12878.bam:md5,d6045df32f7c77d5c863b2068739faa9",
"NA12878.bam.csi:md5,3f8f9a17463b6c2391c632681d3f253a"
],
[
{
"id": "NA19401",
"subsample_fraction": 0.062137851766009305,
"depth": 2
},
"NA19401.bam:md5,6b144e7adc1f9e6711aea0e4772c5937",
"NA19401.bam.csi:md5,36c29896003f2de5306ff427a969058c"
],
[
{
"id": "NA19401",
"subsample_fraction": 0.12427570353201861,
"depth": 4
},
"NA19401.bam:md5,3b0d20e5f44952135a547c6230117460",
"NA19401.bam.csi:md5,b44701d7d4de827cc1b83aaeb397deaf"
]
],
"1": [
"versions.yml:md5,8019e4c6fa3c1ddea25d64d6bfe5651f",
"versions.yml:md5,b31618773ed8a31f8635bca3da001eeb",
"versions.yml:md5,f134df55b5047c5a8222ca04cf5ec501"
],
"bam_subsampled": [
[
{
"id": "NA12878",
"subsample_fraction": 0.06201800538763442,
"depth": 2
},
"NA12878.bam:md5,c998482010b83365a4889c3fa75ac578",
"NA12878.bam.csi:md5,0b4abd161cdcc2b51571c9cc651df354"
],
[
{
"id": "NA12878",
"subsample_fraction": 0.12403601077526884,
"depth": 4
},
"NA12878.bam:md5,d6045df32f7c77d5c863b2068739faa9",
"NA12878.bam.csi:md5,3f8f9a17463b6c2391c632681d3f253a"
],
[
{
"id": "NA19401",
"subsample_fraction": 0.062137851766009305,
"depth": 2
},
"NA19401.bam:md5,6b144e7adc1f9e6711aea0e4772c5937",
"NA19401.bam.csi:md5,36c29896003f2de5306ff427a969058c"
],
[
{
"id": "NA19401",
"subsample_fraction": 0.12427570353201861,
"depth": 4
},
"NA19401.bam:md5,3b0d20e5f44952135a547c6230117460",
"NA19401.bam.csi:md5,b44701d7d4de827cc1b83aaeb397deaf"
]
],
"versions": [
"versions.yml:md5,8019e4c6fa3c1ddea25d64d6bfe5651f",
"versions.yml:md5,b31618773ed8a31f8635bca3da001eeb",
"versions.yml:md5,f134df55b5047c5a8222ca04cf5ec501"
]
},
[
[
{
"id": "NA12878",
"subsample_fraction": 0.06201800538763442,
"depth": 2
},
1164
],
[
{
"id": "NA12878",
"subsample_fraction": 0.12403601077526884,
"depth": 4
},
2402
],
[
{
"id": "NA19401",
"subsample_fraction": 0.062137851766009305,
"depth": 2
},
1196
],
[
{
"id": "NA19401",
"subsample_fraction": 0.12427570353201861,
"depth": 4
},
2321
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
"timestamp": "2024-07-24T11:40:16.846985786"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
process {
withName: GAWK {
ext.args2 = "'{ total += \$3 } END { print total/NR }'"
ext.suffix = "txt"
}
withName: SAMTOOLS_VIEW {
ext.args = { "--write-index --subsample ${meta.subsample_fraction}" }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
subworkflows/bam_subsampledepth_samtools:
- subworkflows/nf-core/bam_subsampledepth_samtools/**