Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
fa0162d
fastq_shortreads_preprocess_qc init
vagkaratzas Jan 13, 2026
c363af7
skip all nf-test init
vagkaratzas Jan 13, 2026
b3a01df
fastq_qc_stats subworkflow update (#9643)
vagkaratzas Jan 13, 2026
e1a7bbb
empty skip all test working
vagkaratzas Jan 13, 2026
4447e52
meta init
vagkaratzas Jan 13, 2026
4c88066
seqera ai filled inputs and outputs of meta
vagkaratzas Jan 13, 2026
e37b11e
stub added
vagkaratzas Jan 13, 2026
dd3a374
sarscov2 - fastq - seqfu - seqkit - deacon - single_end nf-test added
vagkaratzas Jan 13, 2026
f8e0ef1
prinseqplusplus added
vagkaratzas Jan 13, 2026
c04e29a
clumpify added, cat_fastq added, nf-test added for missing tools
vagkaratzas Jan 14, 2026
912c6b7
Update WHATSHAP/PHASE container, topic versions and task.cpus (#9642)
eliottBo Jan 13, 2026
76d0119
Feature/localcdsearch (#9632)
Ales-ibt Jan 13, 2026
afa52dd
Update RGI modules to be Nextflow strict syntax compliant (#9650)
jfy133 Jan 14, 2026
5be99dd
Generate normalised matrix with `variancepartition/dream` (#9645)
delfiterradas Jan 14, 2026
77ff8c9
Add draft of FASTQ_REMOVE_ADAPTERS_AND_MERGE subworkflow with tests (…
kornkv Jan 14, 2026
32102cc
FASTQ_REMOVEADAPTERS_MERGE added
vagkaratzas Jan 15, 2026
b215d3e
paired-end, adapterremoval test added
vagkaratzas Jan 15, 2026
a928709
adapterremoval test
vagkaratzas Jan 15, 2026
c332931
comment removed
vagkaratzas Jan 15, 2026
883d909
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 15, 2026
cc64bd2
Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml
vagkaratzas Jan 20, 2026
c6c5edc
Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml
vagkaratzas Jan 20, 2026
774df05
Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml
vagkaratzas Jan 20, 2026
d808b21
Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/meta.yml
vagkaratzas Jan 20, 2026
9a16ec1
adapter removal and merge params added to meta.yml
vagkaratzas Jan 20, 2026
de55b83
comment updated
vagkaratzas Jan 20, 2026
4c77a3b
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 20, 2026
463ffcd
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 22, 2026
a1ff0e2
fastq_complexity_filter added, nf-tests udpated, meta.yml updated
vagkaratzas Jan 22, 2026
2658ccf
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 22, 2026
0d86549
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 27, 2026
72fa54e
variable renames
vagkaratzas Jan 27, 2026
f33e1e0
dedupe flag for clumpify
vagkaratzas Jan 27, 2026
d3887fd
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Jan 27, 2026
e3b50c5
check if deterministic output
vagkaratzas Jan 27, 2026
38c59f0
updating non-deterministic snapshot
vagkaratzas Jan 27, 2026
b42e760
Update subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf
vagkaratzas Jan 27, 2026
681ad78
Merge branch 'master' into fastq_shortreads_preprocess_qc
vagkaratzas Feb 2, 2026
b037afd
warning message added in subworkflow's description, regarding require…
vagkaratzas Feb 2, 2026
a6ea2af
CAT_FASTQ logic update
vagkaratzas Feb 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
255 changes: 255 additions & 0 deletions subworkflows/nf-core/fastq_shortreads_preprocess_qc/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
// statistics
include { FASTQ_QC_STATS as PRE_STATS } from '../fastq_qc_stats/main'
include { FASTQ_QC_STATS as POST_STATS } from '../fastq_qc_stats/main'
// preprocessing
include { FASTQ_PREPROCESS_SEQKIT } from '../fastq_preprocess_seqkit/main'
// barcoding
include { UMITOOLS_EXTRACT } from '../../../modules/nf-core/umitools/extract/main'
// adapter removal and merging
include { FASTQ_REMOVEADAPTERS_MERGE } from '../fastq_removeadapters_merge/main'
// complexity filtering
include { FASTQ_COMPLEXITY_FILTER } from '../fastq_complexity_filter/main'
// deduplication
include { BBMAP_CLUMPIFY } from '../../../modules/nf-core/bbmap/clumpify/main'
// host decontamination
include { FASTQ_DECONTAMINATE_DEACON_HOSTILE } from '../fastq_decontaminate_deacon_hostile/main'
// final concatenation
include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main'

workflow FASTQ_SHORTREADS_PREPROCESS_QC {

take:
ch_reads // channel: [ val(meta), [ fastq ] ]
// statistics
skip_fastqc // boolean
skip_seqfu_check // boolean
skip_seqfu_stats // boolean
skip_seqkit_stats // boolean
skip_seqtk_comp // boolean
// preprocessing
skip_seqkit_sana_pair // boolean
skip_seqkit_seq // boolean
skip_seqkit_replace // boolean
skip_seqkit_rmdup // boolean
// barcoding
skip_umitools_extract // boolean
val_umi_discard_read // integer: 0, 1 or 2
// adapter removal and merging
skip_adapterremoval // boolean
val_adapter_tool // string: [mandatory] tool_name // choose from: ["trimmomatic", "cutadapt", "trimgalore", "bbduk", "leehom", "fastp", "adapterremoval"]
ch_custom_adapters_file // channel: [optional] [ {fasta,txt} ] // fasta, for bbduk or fastp, or txt, for adapterremoval
val_save_merged // boolean: [mandatory] if true, will return the merged reads instead, for fastp and adapterremoval
val_fastp_discard_trimmed_pass // boolean: [mandatory] // only for fastp
val_fastp_save_trimmed_fail // boolean: [mandatory] // only for fastp
// complexity filtering
skip_complexity_filtering // boolean
val_complexity_filter_tool // string: [mandatory] tool_name // choose from: ["prinseqplusplus", "bbduk", "fastp"]
// deduplication
skip_deduplication // boolean
// host decontamination
skip_decontamination // boolean
ch_decontamination_fasta // channel: [ val(meta), [ fasta ] ] (optional)
ch_decontamination_reference // channel: [ val(reference_name), path(reference_dir) ] (optional)
val_decontamination_index_name // val (optional)
val_decontamination_tool // string (enum): 'hostile' or 'deacon'
// final concatenation
skip_final_concatenation // boolean

main:

ch_versions = channel.empty()
ch_multiqc_files = channel.empty()
ch_umi_log = channel.empty()
ch_adapterremoval_discarded_reads = channel.empty()
ch_adapterremoval_logfile = channel.empty()
ch_adapterremoval_report = channel.empty()
ch_complexity_filter_log = channel.empty()
ch_complexity_filter_report = channel.empty()
ch_clumpify_log = channel.empty()
ch_hostile_reference = channel.empty()
ch_hostile_json = channel.empty()
ch_deacon_index = channel.empty()
ch_deacon_summary = channel.empty()

// pre-statistics
PRE_STATS (
ch_reads,
skip_fastqc,
skip_seqfu_check,
skip_seqfu_stats,
skip_seqkit_stats,
skip_seqtk_comp
)
ch_pre_stats_fastqc_html = PRE_STATS.out.fastqc_html
ch_pre_stats_fastqc_zip = PRE_STATS.out.fastqc_zip
ch_pre_stats_seqfu_check = PRE_STATS.out.seqfu_check
ch_pre_stats_seqfu_stats = PRE_STATS.out.seqfu_stats
ch_pre_stats_seqkit_stats = PRE_STATS.out.seqkit_stats
ch_pre_stats_seqtk_stats = PRE_STATS.out.seqtk_stats
ch_multiqc_files = ch_multiqc_files.mix(PRE_STATS.out.seqfu_multiqc)
ch_versions = ch_versions.mix(PRE_STATS.out.versions)

// preprocessing
FASTQ_PREPROCESS_SEQKIT (
ch_reads,
skip_seqkit_sana_pair,
skip_seqkit_seq,
skip_seqkit_replace,
skip_seqkit_rmdup
)
ch_reads = FASTQ_PREPROCESS_SEQKIT.out.reads
ch_versions = ch_versions.mix(FASTQ_PREPROCESS_SEQKIT.out.versions)

// barcoding
if (!skip_umitools_extract) {
UMITOOLS_EXTRACT( ch_reads )
ch_umi_reads = UMITOOLS_EXTRACT.out.reads
ch_umi_log = UMITOOLS_EXTRACT.out.log
ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first())

// Discard R1 / R2 if required
if (val_umi_discard_read in [1, 2]) {
ch_umi_reads = UMITOOLS_EXTRACT.out.reads
.map { meta, reads ->
meta.single_end ? [meta, reads] : [meta + ['single_end': true], reads[val_umi_discard_read % 2]]
}
}

ch_reads = ch_umi_reads
}

// adapter removal and merging
if (!skip_adapterremoval) {
FASTQ_REMOVEADAPTERS_MERGE (
ch_reads,
val_adapter_tool,
ch_custom_adapters_file,
val_save_merged,
val_fastp_discard_trimmed_pass,
val_fastp_save_trimmed_fail
)
ch_adapterremoval_discarded_reads = FASTQ_REMOVEADAPTERS_MERGE.out.discarded_reads
ch_adapterremoval_logfile = FASTQ_REMOVEADAPTERS_MERGE.out.logfile
ch_adapterremoval_report = FASTQ_REMOVEADAPTERS_MERGE.out.report
ch_reads = FASTQ_REMOVEADAPTERS_MERGE.out.processed_reads
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_REMOVEADAPTERS_MERGE.out.multiqc_files)
ch_versions = ch_versions.mix(FASTQ_REMOVEADAPTERS_MERGE.out.versions)
}

// complexity filtering
if (!skip_complexity_filtering) {
FASTQ_COMPLEXITY_FILTER( ch_reads, val_complexity_filter_tool )
ch_reads = FASTQ_COMPLEXITY_FILTER.out.filtered_reads
ch_complexity_filter_log = FASTQ_COMPLEXITY_FILTER.out.logfile
ch_complexity_filter_report = FASTQ_COMPLEXITY_FILTER.out.report
ch_multiqc_files = ch_multiqc_files.mix(FASTQ_COMPLEXITY_FILTER.out.multiqc_files)
ch_versions = ch_versions.mix(FASTQ_COMPLEXITY_FILTER.out.versions)
}

// deduplication
if (!skip_deduplication) {
BBMAP_CLUMPIFY( ch_reads )
ch_reads = BBMAP_CLUMPIFY.out.reads
ch_clumpify_log = BBMAP_CLUMPIFY.out.log
ch_versions = ch_versions.mix(BBMAP_CLUMPIFY.out.versions.first())
}

// host decontamination
if (!skip_decontamination) {
FASTQ_DECONTAMINATE_DEACON_HOSTILE (
ch_reads,
ch_decontamination_fasta,
ch_decontamination_reference,
val_decontamination_index_name,
val_decontamination_tool
)
ch_reads = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.fastq_filtered
ch_hostile_reference = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.reference
ch_hostile_json = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.json
ch_deacon_index = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.index
ch_deacon_summary = FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.summary
ch_versions = ch_versions.mix(FASTQ_DECONTAMINATE_DEACON_HOSTILE.out.versions)
}


// final concatenation
if (!skip_final_concatenation) {
ch_reads_for_cat_branch = ch_reads
.groupTuple()
.map { meta, reads ->
[meta, reads.flatten()]
}
.branch { meta, reads ->
cat: (meta.single_end && reads.size() > 1) || (!meta.single_end && reads.size() > 2)
skip: true
}

CAT_FASTQ(ch_reads_for_cat_branch.cat)

ch_reads = CAT_FASTQ.out.reads
.mix(ch_reads_for_cat_branch.skip)
.map { meta, reads ->
def new_reads = meta.single_end ? reads[0] : reads.flatten()
[meta, new_reads]
}
}
Comment on lines 176 to 195
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Presumably this should be skipped if there isn't more than one fastq either.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'll let the module itself handle these cases since it's been coded to do so. Else what would be the best alternative?
Branch those samples from ch_reads that have size > 1, and feed them to CAT_FASTQ and then mix back in?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the branching is what I've done elsewhere.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you take a look into the updated code for CAT_FASTQ and let me know if it's OK?
It's a mixture of mag and taxprofiler.

    // final concatenation
    if (!skip_final_concatenation) {
        ch_reads_for_cat_branch = ch_reads
            .groupTuple()
            .map { meta, reads ->
                [meta, reads.flatten()]
            }
            .branch { meta, reads ->
                cat: (meta.single_end && reads.size() > 1) || (!meta.single_end && reads.size() > 2)
                skip: true
            }

        CAT_FASTQ(ch_reads_for_cat_branch.cat)

        ch_reads = CAT_FASTQ.out.reads
            .mix(ch_reads_for_cat_branch.skip)
            .map { meta, reads ->
                def new_reads = meta.single_end ? reads[0] : reads.flatten()
                [meta, new_reads]
            }
    }

Other than this, I think the PR should be good to go..hopefully! :D

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I think that looks OK to me, best way is to test though obviously ;)


// post-statistics
POST_STATS (
ch_reads,
skip_fastqc,
skip_seqfu_check,
skip_seqfu_stats,
skip_seqkit_stats,
skip_seqtk_comp
)
ch_post_stats_fastqc_html = POST_STATS.out.fastqc_html
ch_post_stats_fastqc_zip = POST_STATS.out.fastqc_zip
ch_post_stats_seqfu_check = POST_STATS.out.seqfu_check
ch_post_stats_seqfu_stats = POST_STATS.out.seqfu_stats
ch_post_stats_seqkit_stats = POST_STATS.out.seqkit_stats
ch_post_stats_seqtk_stats = POST_STATS.out.seqtk_stats
ch_multiqc_files = ch_multiqc_files.mix(POST_STATS.out.seqfu_multiqc)
ch_versions = ch_versions.mix(POST_STATS.out.versions)

emit:
reads = ch_reads // channel: [ val(meta), [ fastq ] ]

// statistics
pre_stats_fastqc_html = ch_pre_stats_fastqc_html
pre_stats_fastqc_zip = ch_pre_stats_fastqc_zip
pre_stats_seqfu_check = ch_pre_stats_seqfu_check
pre_stats_seqfu_stats = ch_pre_stats_seqfu_stats
pre_stats_seqkit_stats = ch_pre_stats_seqkit_stats
pre_stats_seqtk_stats = ch_pre_stats_seqtk_stats
post_stats_fastqc_html = ch_post_stats_fastqc_html
post_stats_fastqc_zip = ch_post_stats_fastqc_zip
post_stats_seqfu_check = ch_post_stats_seqfu_check
post_stats_seqfu_stats = ch_post_stats_seqfu_stats
post_stats_seqkit_stats = ch_post_stats_seqkit_stats
post_stats_seqtk_stats = ch_post_stats_seqtk_stats

// barcoding
umi_log = ch_umi_log

// adapter removal and merging
adapterremoval_discarded_reads = ch_adapterremoval_discarded_reads
adapterremoval_logfile = ch_adapterremoval_logfile
adapterremoval_report = ch_adapterremoval_report

// complexity filtering
complexity_filter_log = ch_complexity_filter_log
complexity_filter_report = ch_complexity_filter_report

// deduplication
clumpify_log = ch_clumpify_log

// host decontamination
hostile_reference = ch_hostile_reference
hostile_json = ch_hostile_json
deacon_index = ch_deacon_index
deacon_summary = ch_deacon_summary

multiqc_files = ch_multiqc_files
versions = ch_versions // channel: [ versions.yml ]
}
Loading
Loading