Merge pull request #35 from fmalmeida/dev

Release v2.6.3
fmalmeida · Feb 23, 2024 · a6e51ab · a6e51ab
2 parents dad810c + 99e3e09
commit a6e51ab
Show file tree

Hide file tree

Showing 17 changed files with 92 additions and 45 deletions.
diff --git a/.github/workflows/test_pr_illumina_docker.yml b/.github/workflows/test_pr_illumina_docker.yml
@@ -7,7 +7,7 @@
 name: Testing illumina / docker from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.github/workflows/test_pr_illumina_singularity.yml b/.github/workflows/test_pr_illumina_singularity.yml
@@ -7,7 +7,7 @@
 name: Testing illumina / singularity from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.github/workflows/test_pr_nanopore_docker.yml b/.github/workflows/test_pr_nanopore_docker.yml
@@ -7,7 +7,7 @@
 name: Testing nanopore / docker from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.github/workflows/test_pr_nanopore_singularity.yml b/.github/workflows/test_pr_nanopore_singularity.yml
@@ -7,7 +7,7 @@
 name: Testing nanopore / singularity from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.github/workflows/test_pr_pacbio_docker.yml b/.github/workflows/test_pr_pacbio_docker.yml
@@ -7,7 +7,7 @@
 name: Testing pacbio / docker from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.github/workflows/test_pr_pacbio_singularity.yml b/.github/workflows/test_pr_pacbio_singularity.yml
@@ -7,7 +7,7 @@
 name: Testing  pacbio / singularity from PR
 on:
   pull_request:
-    branches: master
+    branches: [ master, dev ]
     types: [ opened, synchronize, reopened ]
 
 jobs:

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
diff --git a/.zenodo.json b/.zenodo.json
@@ -2,7 +2,7 @@
     "description": "<p>The pipeline</p>\n\n<p>ngs-preprocess is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. It is an easy to use pipeline that uses state-of-the-art software for quality check and pre-processing ngs reads of Illumina, Pacbio and Oxford Nanopore Technologies.</p>", 
     "license": "other-open", 
     "title": "fmalmeida/ngs-preprocess: A pipeline for preprocessing short and long sequencing reads", 
-    "version": "v2.6.2", 
+    "version": "v2.6.3", 
     "upload_type": "software",
     "creators": [
         {

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 The tracking for changes started in v2.2
 
+## v2.6.3 -- [2024-Fev-23]
+
+* [[#32](https://github.com/fmalmeida/ngs-preprocess/issues/32)] - Add as output a template samplesheet that can be readily used as input for MpGAP to assemble each downloaded read on its own.
+
 ## v2.6.2 -- [2024-Jan-19]
 
 * [[#24](https://github.com/fmalmeida/ngs-preprocess/issues/24)] - Added documentation on generated outputs, as requested in paper review

diff --git a/docs/outputs.md b/docs/outputs.md
@@ -18,6 +18,8 @@ preprocessed_reads
 ├── final_output                   
 │   └── nanopore
 │       └── SRR23337893.filtered.fq.gz
+# a template input ready for MpGAP
+├── mpgap_samplesheet.yml
 # directory containing the nextflow execution reports
 ├── pipeline_info
 │   ├── ngs_preprocess_report_2023-11-18_10-07-36.html
@@ -35,6 +37,22 @@ preprocessed_reads
     └── SRR23337893_sra_runInfo.csv
 ```
 
+## The pre-formatted MpGAP input samplesheet
+
+Once finished, the pipeline also generates a file called `mpgap_samplesheet.yml` (showed below). Basically this samplesheet defines all the **minimum** definitions in order to assemble these reads using the [MpGAP](https://mpgap.readthedocs.io/en/latest/) pipeline.
+
+```yaml
+samplesheet:
+  - id: SRR23337893
+    nanopore: /workspace/ngs-preprocess/testing/preprocessed_reads/final_output/nanopore/SRR23337893.filtered.fq.gz
+```
+
+!!! note
+
+    One must keep in mind that, this template samplesheet contains only the **bare minimum** to launch MpGAP but many other customizations are possible. For example, the generated samplesheet will assemble each read separately, but, MpGAP can also perform hybrid assemblies. Therefore, users can/must use this output as a template for easily customization of the assembly pipeline input to use the results of ngs-preprocess pipeline.
+
+    For more information, please refer to the [MpGAP](https://mpgap.readthedocs.io/en/latest/) documentation.
+
 ## Example of QC outputs
 
 Here I am going to display just a very few examples of results produced, focusing on the QC, as the main result is a cleaned FASTQ file.

diff --git a/main.nf b/main.nf
@@ -167,6 +167,53 @@ workflow {
   )
   ILLUMINA( shortreads_ch )
 
+  /*
+   * Collect all the generated results as a samplesheet for MpGAP
+   */
+
+  // get string of final dir
+  def final_outdir = file("${params.output}").toUriString()
+  final_outdir = "${final_outdir}/final_output"
+
+  // start samplesheet channel and feed it
+  ch_mpgap_samplesheet = Channel.value( "samplesheet:" )
+  ch_mpgap_samplesheet.concat(
+
+    // short reads data
+    ILLUMINA.out.reads
+    .map{ meta, subdir, reads ->
+      def reads_list = (meta.shortreads_type == 'paired' && !params.fastp_merge_pairs) ? "\s\s\s\s\s\s- ${final_outdir}/${subdir}/${reads[0].getName()}\n\s\s\s\s\s\s- ${final_outdir}/${subdir}/${reads[1].getName()}" : "\s\s\s\s\s\s- ${final_outdir}/${subdir}/${reads.getName()}"
+      def final_string = "\n\s\s- id: ${meta.id}"
+      final_string = final_string + "\n\s\s\s\sillumina:\n"
+      final_string = final_string + reads_list
+
+      final_string
+    },
+
+    // nanopore data
+    NANOPORE.out.reads
+    .map{ meta, subdir, reads ->
+      def final_string = "\n\s\s- id: ${meta.id}"
+      final_string = final_string + "\n\s\s\s\snanopore: ${final_outdir}/${subdir}/${reads.getName()}"
+
+      final_string
+    },
+
+    // pacbio data
+    PACBIO.out.reads
+    .map{ meta, subdir, reads ->
+      def final_string = "\n\s\s- id: ${meta.id}"
+      final_string = final_string + "\n\s\s\s\spacbio: ${final_outdir}/${subdir}/${reads.getName()}"
+
+      final_string
+    },
+
+    // end line
+    Channel.value( "\n" )
+
+  )
+  .collectFile( name: 'mpgap_samplesheet.yml', storeDir: params.output, sort: false, cache: false, newLine: false )
+
 }
 
 workflow.onComplete {

diff --git a/modules/fastp.nf b/modules/fastp.nf
@@ -13,11 +13,14 @@ process FASTP {
 
     output:
     path "*"
+    tuple val(meta), val('short_reads'), path("*${reads_output}"), emit: reads
 
     script:
+    reads_output = ".preprocessed.fq.gz"
     if (meta.shortreads_type == 'paired') {
         if (params.fastp_merge_pairs) {
-            reads_param = "--in1 ${reads[0]} --in2 ${reads[1]} --out1 ${meta.id}_R1.unmerged.fq.gz --out2 ${meta.id}_R2.unmerged.fq.gz --detect_adapter_for_pe --merge --merged_out ${meta.id}.merged.fq.gz"
+            reads_param  = "--in1 ${reads[0]} --in2 ${reads[1]} --out1 ${meta.id}_R1.unmerged.fq.gz --out2 ${meta.id}_R2.unmerged.fq.gz --detect_adapter_for_pe --merge --merged_out ${meta.id}.merged.fq.gz"
+            reads_output = ".merged.fq.gz"
         } else {
             reads_param = "--in1 ${reads[0]} --in2 ${reads[1]} --out1 ${meta.id}_R1.preprocessed.fq.gz --out2 ${meta.id}_R2.preprocessed.fq.gz --detect_adapter_for_pe"
         }

diff --git a/modules/lreads_filter.nf b/modules/lreads_filter.nf
@@ -7,7 +7,7 @@ process FILTER {
     tuple val(meta), path(reads)
 
     output:
-    path "*filtered.fq.gz", emit: fastqs
+    tuple val(meta), val("${meta.longreads_type}"), path("*filtered.fq.gz"), emit: reads
 
     when:
     !(reads =~ /input.*/)

diff --git a/nextflow.config b/nextflow.config
@@ -93,5 +93,5 @@ manifest {
   homePage        = "https://github.com/fmalmeida/ngs-preprocess"
   mainScript      = "main.nf"
   nextflowVersion = ">=21.10.0"
-  version         = "2.6.2"
+  version         = "2.6.3"
 }
diff --git a/workflows/illumina.nf b/workflows/illumina.nf
diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf
diff --git a/workflows/pacbio.nf b/workflows/pacbio.nf