Skip to content

Commit

Permalink
Merge pull request #3 from TRON-Bioinformatics/integrate-fastp
Browse files Browse the repository at this point in the history
Integrate FASTP into the pipeline
  • Loading branch information
priesgo authored Nov 8, 2021
2 parents 4298391 + c3f5308 commit 1c374be
Show file tree
Hide file tree
Showing 19 changed files with 380 additions and 215 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/automated_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Automated tests

on: [push]

jobs:
test:
runs-on: ubuntu-20.04

steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v2
with:
distribution: 'zulu' # See 'Supported distributions' for available options
java-version: '11'
- uses: conda-incubator/setup-miniconda@v2
- name: Install dependencies
run: |
apt-get update && apt-get --assume-yes install wget make procps software-properties-common
wget -qO- https://get.nextflow.io | bash && cp nextflow /usr/local/bin/nextflow
- name: Run tests
run: |
make
13 changes: 0 additions & 13 deletions Dockerfile

This file was deleted.

37 changes: 10 additions & 27 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,35 +1,18 @@

all : clean test check
all : clean test

clean:
rm -rf output
rm -f .nextflow.log*
rm -rf .nextflow*

test:
nextflow main.nf --help
nextflow main.nf -profile test,conda --output output/test1
nextflow main.nf -profile test,conda --inception --output output/test2
nextflow main.nf -profile test,conda --library single --output output/test3
nextflow main.nf -profile test,conda --algorithm mem --output output/test4
nextflow main.nf -profile test,conda --algorithm mem --library single --output output/test5
nextflow main.nf -profile test,conda --output output/test6 --input_files false \
--input_fastq1 test_data/TESTX_S1_L001_R1_001.fastq.gz \
--input_fastq2 test_data/TESTX_S1_L001_R2_001.fastq.gz --input_name test
nextflow main.nf -profile test,conda --output output/test7 --input_files false \
--input_fastq1 test_data/TESTX_S1_L001_R1_001.fastq.gz \
--library single --input_name test

check:
test -s output/test1/TESTX_S1_L001.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test1/TESTX_S1_L002.bam || { echo "Missing test 1 output file!"; exit 1; }
test -s output/test2/TESTX_S1_L001.bam || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test2/TESTX_S1_L002.bam || { echo "Missing test 2 output file!"; exit 1; }
test -s output/test3/TESTX_S1_L001.bam || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test3/TESTX_S1_L002.bam || { echo "Missing test 3 output file!"; exit 1; }
test -s output/test4/TESTX_S1_L001.bam || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test4/TESTX_S1_L002.bam || { echo "Missing test 4 output file!"; exit 1; }
test -s output/test5/TESTX_S1_L001.bam || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test5/TESTX_S1_L002.bam || { echo "Missing test 5 output file!"; exit 1; }
test -s output/test6/test.bam || { echo "Missing test 6 output file!"; exit 1; }
test -s output/test7/test.bam || { echo "Missing test 7 output file!"; exit 1; }
bash tests/run_test_0.sh
bash tests/run_test_1.sh
bash tests/run_test_2.sh
bash tests/run_test_3.sh
bash tests/run_test_4.sh
bash tests/run_test_5.sh
bash tests/run_test_6.sh
bash tests/run_test_7.sh
bash tests/run_test_8.sh
23 changes: 12 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,37 +1,40 @@
# TronFlow BWA pipeline

![GitHub tag (latest SemVer)](https://img.shields.io/github/v/release/tron-bioinformatics/tronflow-bwa?sort=semver)
[![Run tests](https://github.com/TRON-Bioinformatics/tronflow-bwa/actions/workflows/automated_tests.yml/badge.svg?branch=master)](https://github.com/TRON-Bioinformatics/tronflow-bwa/actions/workflows/automated_tests.yml)
[![DOI](https://zenodo.org/badge/327943420.svg)](https://zenodo.org/badge/latestdoi/327943420)
[![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT)
[![Powered by Nextflow](https://img.shields.io/badge/powered%20by-Nextflow-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://www.nextflow.io/)

Nextflow pipeline for the alignment of paired and single end FASTQ files with BWA aln and mem algorithms.
It includes an initial step of read trimming using FASTP.

## Requirements

There are two packages that are required for this pipeline. Both of this are preconfigured when using the conda or docker profiles.
There are three packages that are required for this pipeline. Both of this are preconfigured when using the conda profile.

- BWA 0.7.17
- samtools 1.12
- BWA
- samtools
- FASTP


## How to run it

Run it from GitHub as follows:
```
nextflow run tron-bioinformatics/tronflow-bwa --input_files $input --output $output --algorithm aln --library paired -profile conda,standard
nextflow run tron-bioinformatics/tronflow-bwa -r v1.5.0 -profile conda --input_files $input --output $output --algorithm aln --library paired
```

Otherwise download the project and run as follows:
```
nextflow main.nf --input_files $input --output $output --algorithm aln --library paired -profile conda,standard
nextflow main.nf -profile conda --input_files $input --output $output --algorithm aln --library paired
```

Find the help as follows:
```
$ nextflow run tron-bioinformatics/tronflow-bwa --help
N E X T F L O W ~ version 19.07.0
Launching `bam_preprocessing.nf` [intergalactic_shannon] - revision: e707c77d7b
Launching `main.nf` [intergalactic_shannon] - revision: e707c77d7b
Usage:
nextflow main.nf --input_files input_files [--reference reference.fasta]
Expand All @@ -53,12 +56,10 @@ Optional input:
* cpus: determines the number of CPUs for each job, with the exception of bwa sampe and samse steps which are not parallelized (default: 8)
* memory: determines the memory required by each job (default: 8g)
* inception: if enabled it uses an inception, only valid for BWA aln, it requires a fast file system such as flash (default: false)
* skip_trimming: skips the read trimming step
Output:
* A BAM file \${name}.bam
```

You can run it with a conda environment using the option `-profile` such as:
```
$ nextflow main.nf --input_files test_data/test_input.txt --reference `pwd`/test_data/ucsc.hg19.minimal.fasta -profile conda
* FASTP read trimming stats report in HTML format \${name.fastp_stats.html}
* FASTP read trimming stats report in JSON format \${name.fastp_stats.json}
```
10 changes: 0 additions & 10 deletions environment.yml

This file was deleted.

200 changes: 49 additions & 151 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { FASTP_PAIRED; FASTP_SINGLE } from './modules/fastp'
include { BWA_ALN; BWA_ALN as BWA_ALN_2; BWA_SAMPE; BWA_SAMSE; BWA_ALN_INCEPTION } from './modules/bwa_aln'
include { BWA_MEM; BWA_MEM_SE } from './modules/bwa_mem'

params.help= false
params.input_files = false
params.input_fastq1 = false
Expand All @@ -12,6 +18,7 @@ params.library = "paired"
params.cpus = 8
params.memory = "8g"
params.inception = false
params.skip_trimming = false


if (params.help) {
Expand Down Expand Up @@ -69,158 +76,49 @@ else if (params.input_files) {
exit 1, "--input_name is not provided!"
}

if (params.algorithm == "aln" && params.library == "paired" && !params.inception) {

input_files.into { input_files_1; input_files_2 }

process bwaAln1 {
cpus "${params.cpus}"
memory "${params.memory}"
tag "${name}"

input:
set name, file(fastq1), file(fastq2) from input_files_1

output:
set val("${name}"), file("${fastq1}"), file("${fastq1.baseName}.sai") into alignment_output1

"""
bwa aln -t ${task.cpus} ${params.reference} ${fastq1} > ${fastq1.baseName}.sai
"""
}

process bwaAln2 {
cpus "${params.cpus}"
memory "${params.memory}"
tag "${name}"

input:
set name, file(fastq1), file(fastq2) from input_files_2

output:
set val("${name}"), file("${fastq2}"), file("${fastq2.baseName}.sai") into alignment_output2

"""
bwa aln -t ${task.cpus} ${params.reference} ${fastq2} > ${fastq2.baseName}.sai
"""
}

process bwaSampe {
cpus 1
memory params.memory
tag "${name}"
publishDir params.output, mode: "move"

input:
// joins both channels by key using the first element in the tuple, the name
set name, file(fastq1), file(sai1), file(fastq2), file(sai2) from alignment_output1.join(alignment_output2)

output:
set val("${name}"), file("${name}.bam") into sampe_output

"""
bwa sampe ${params.reference} ${sai1} ${sai2} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam
"""
}
}
else if (params.algorithm == "aln" && params.library == "single" && !params.inception) {

process bwaAln {
cpus "${params.cpus}"
memory "${params.memory}"
tag "${name}"

input:
set name, file(fastq) from input_files

output:
set val("${name}"), file("${fastq}"), file("${fastq.baseName}.sai") into alignment_output

"""
bwa aln -t ${task.cpus} ${params.reference} ${fastq} > ${fastq.baseName}.sai
"""
}

process bwaSamse {
cpus 1
memory "${params.memory}"
tag "${name}"
publishDir params.output, mode: "move"

input:
// joins both channels by key using the first element in the tuple, the name
set name, file(fastq), file(sai) from alignment_output

output:
set val("${name}"), file("${name}.bam") into samse_output

"""
bwa samse ${params.reference} ${sai} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam
"""
}
}
else if (params.algorithm == "aln" && params.library == "paired" && params.inception) {

process bwaAlnInception {
cpus "${params.cpus}".toInteger() * 2
memory "${params.memory}"
tag "${name}"
publishDir params.output, mode: "move"

input:
// joins both channels by key using the first element in the tuple, the name
set name, file(fastq1), file(fastq2) from input_files

output:
set val("${name}"), file("${name}.bam") into sampe_output

"""
bwa sampe ${params.reference} <( bwa aln -t ${params.cpus} ${params.reference} ${fastq1} ) \
<( bwa aln -t ${params.cpus} ${params.reference} ${fastq2} ) ${fastq1} ${fastq2} \
| samtools view -uS - | samtools sort - > ${name}.bam
"""
workflow {
if (params.library == "paired") {
if (params.skip_trimming) {
trimmed_fastqs = input_files
}
else {
FASTP_PAIRED(input_files)
trimmed_fastqs = FASTP_PAIRED.out.trimmed_fastqs
}
if (params.algorithm == "aln" && !params.inception) {
BWA_ALN(trimmed_fastqs.map {name, fq1, fq2 -> tuple(name, fq1)})
BWA_ALN_2(trimmed_fastqs.map {name, fq1, fq2 -> tuple(name, fq2)})
BWA_SAMPE(BWA_ALN.out.alignment_output.join(BWA_ALN_2.out.alignment_output))
}
else if (params.algorithm == "aln" && params.inception) {
BWA_ALN_INCEPTION(trimmed_fastqs)
}
else if (params.algorithm == "mem") {
BWA_MEM(trimmed_fastqs)
}
else {
exit 1, "Unsupported configuration!"
}
}
}
else if (params.algorithm == "mem" && params.library == "paired") {

process bwaMem {
cpus "${params.cpus}"
memory "${params.memory}"
tag "${name}"
publishDir params.output, mode: "move"

input:
// joins both channels by key using the first element in the tuple, the name
set name, file(fastq1), file(fastq2) from input_files

output:
set val("${name}"), file("${name}.bam") into sampe_output

"""
bwa mem -t ${task.cpus} ${params.reference} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam
"""
else if (params.library == "single") {
if (params.skip_trimming) {
trimmed_fastqs = input_files
}
else {
FASTP_SINGLE(input_files)
trimmed_fastqs = FASTP_SINGLE.out.trimmed_fastqs
}
if (params.algorithm == "aln" && !params.inception) {
BWA_SAMSE(BWA_ALN(trimmed_fastqs))
}
else if (params.algorithm == "mem") {
BWA_MEM_SE(trimmed_fastqs)
}
else {
exit 1, "Unsupported configuration!"
}
}
}
else if (params.algorithm == "mem" && params.library == "single") {

process bwaMemSe {
cpus "${params.cpus}"
memory "${params.memory}"
tag "${name}"
publishDir params.output, mode: "move"

input:
// joins both channels by key using the first element in the tuple, the name
set name, file(fastq) from input_files

output:
set val("${name}"), file("${name}.bam") into sampe_output

"""
bwa mem -t ${task.cpus} ${params.reference} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam
"""
else {
exit 1, "Unsupported configuration!"
}
}
else {
exit 1, "Unsupported configuration!"
}
Loading

0 comments on commit 1c374be

Please sign in to comment.