Skip to content

Commit

Permalink
Implement meta.id detection for GFFREAD
Browse files Browse the repository at this point in the history
  • Loading branch information
ftabaro committed Nov 20, 2024
1 parent cb1317e commit b775286
Showing 1 changed file with 93 additions and 79 deletions.
172 changes: 93 additions & 79 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,35 @@
//

include {
GUNZIP as GUNZIP_FASTA
GUNZIP as GUNZIP_GTF
GUNZIP as GUNZIP_GFF
GUNZIP as GUNZIP_GENE_BED
GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/gunzip/main'
GUNZIP as GUNZIP_FASTA ;
GUNZIP as GUNZIP_GTF ;
GUNZIP as GUNZIP_GFF ;
GUNZIP as GUNZIP_GENE_BED ;
GUNZIP as GUNZIP_BLACKLIST
} from '../../modules/nf-core/gunzip/main'

include {
UNTAR as UNTAR_BWA_INDEX
UNTAR as UNTAR_BOWTIE2_INDEX
UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/untar/main'

include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'

include { GTF2BED } from '../../modules/local/gtf2bed'
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'
UNTAR as UNTAR_BWA_INDEX ;
UNTAR as UNTAR_BOWTIE2_INDEX ;
UNTAR as UNTAR_STAR_INDEX
} from '../../modules/nf-core/untar/main'

include { UNTARFILES } from '../../modules/nf-core/untarfiles/main'
include { GFFREAD } from '../../modules/nf-core/gffread/main'
include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main'
include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main'
include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main'
include { CHROMAP_INDEX } from '../../modules/nf-core/chromap/index/main'

include { GTF2BED } from '../../modules/local/gtf2bed'
include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions'
include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate'

workflow PREPARE_GENOME {
take:
genome // string: genome name
genomes // map: genome attributes
prepare_tool_index // string : tool to prepare index for
prepare_tool_index // string : tool to prepare index for
fasta // path: path to genome fasta file
gtf // file: /path/to/genome.gtf
gff // file: /path/to/genome.gff
Expand Down Expand Up @@ -63,25 +65,25 @@ workflow PREPARE_GENOME {
if (gtf.endsWith('.gz')) {
ch_gtf = GUNZIP_GTF([[:], gtf]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
} else {
ch_gtf = Channel.value(file(gtf))
}
else {
ch_gtf = Channel.value(file(gtf, checkIfExists: true))
}
}
else if (gff) {
if (gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF([[:], gff]).gunzip.map { it[1] }
if (gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF([[:], file(gff, checkIfExists: true)]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
} else {
ch_gff = Channel.value(file(gff))
}
else {
ch_gff = Channel.value(file(gff, checkIfExists: true)).map { [[:], it] }
ch_gff = Channel.value(file(gff, checkIfExists: true))
}

ch_gtf = GFFREAD(ch_gff, []).gtf.map { it[1] }
extension = (gff - '.gz').tokenize('.')[-1]
id = gff.toString() - '.gz' - ".${extension}"

ch_gtf = GFFREAD(ch_gff.map{[[id:id], it]}, []).gtf.map { it[1] }

ch_versions = ch_versions.mix(GFFREAD.out.versions)
}

Expand All @@ -91,9 +93,10 @@ workflow PREPARE_GENOME {
ch_blacklist = Channel.empty()
if (blacklist) {
if (blacklist.endsWith('.gz')) {
ch_blacklist = GUNZIP_BLACKLIST ( [ [:], blacklist ] ).gunzip.map{ it[1] }
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
} else {
ch_blacklist = GUNZIP_BLACKLIST([[:], blacklist]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions)
}
else {
ch_blacklist = Channel.value(file(blacklist))
}
}
Expand All @@ -107,38 +110,41 @@ workflow PREPARE_GENOME {
def make_bed = false
if (!gene_bed) {
make_bed = true
} else if (genome && gtf) {
if (genomes[ genome ].gtf != gtf) {
}
else if (genome && gtf) {
if (genomes[genome].gtf != gtf) {
make_bed = true
}
}

if (make_bed) {
ch_gene_bed = GTF2BED ( ch_gtf ).bed
ch_gene_bed = GTF2BED(ch_gtf).bed
ch_versions = ch_versions.mix(GTF2BED.out.versions)
} else {
}
else {
if (gene_bed.endsWith('.gz')) {
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map{ it[1] }
ch_gene_bed = GUNZIP_GENE_BED([[:], gene_bed]).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
} else {
}
else {
ch_gene_bed = Channel.value(file(gene_bed))
}
}

//
// Create chromosome sizes file
//
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
CUSTOM_GETCHROMSIZES(ch_fasta.map { [[:], it] })
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)

//
// Prepare genome intervals for filtering by removing regions in blacklist file
//
ch_genome_filtered_bed = Channel.empty()

GENOME_BLACKLIST_REGIONS (
GENOME_BLACKLIST_REGIONS(
ch_chrom_sizes,
ch_blacklist.ifEmpty([])
)
Expand All @@ -152,14 +158,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'bwa') {
if (bwa_index) {
if (bwa_index.endsWith('.tar.gz')) {
ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], bwa_index ] ).untar
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
} else {
ch_bwa_index = [ [:], file(bwa_index) ]
ch_bwa_index = UNTAR_BWA_INDEX([[:], bwa_index]).untar
ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions)
}
else {
ch_bwa_index = [[:], file(bwa_index)]
}
} else {
ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
}
else {
ch_bwa_index = BWA_INDEX(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(BWA_INDEX.out.versions)
}
}

Expand All @@ -170,14 +178,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'bowtie2') {
if (bowtie2_index) {
if (bowtie2_index.endsWith('.tar.gz')) {
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], bowtie2_index ] ).untar
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
} else {
ch_bowtie2_index = [ [:], file(bowtie2_index) ]
ch_bowtie2_index = UNTAR_BOWTIE2_INDEX([[:], bowtie2_index]).untar
ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions)
}
else {
ch_bowtie2_index = [[:], file(bowtie2_index)]
}
} else {
ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
}
else {
ch_bowtie2_index = BOWTIE2_BUILD(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions)
}
}

Expand All @@ -188,14 +198,16 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'chromap') {
if (chromap_index) {
if (chromap_index.endsWith('.tar.gz')) {
ch_chromap_index = UNTARFILES ( [ [:], chromap_index ] ).files
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
} else {
ch_chromap_index = [ [:], file(chromap_index) ]
ch_chromap_index = UNTARFILES([[:], chromap_index]).files
ch_versions = ch_versions.mix(UNTARFILES.out.versions)
}
else {
ch_chromap_index = [[:], file(chromap_index)]
}
} else {
ch_chromap_index = CHROMAP_INDEX ( ch_fasta.map { [ [:], it ] } ).index
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
}
else {
ch_chromap_index = CHROMAP_INDEX(ch_fasta.map { [[:], it] }).index
ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions)
}
}

Expand All @@ -206,27 +218,29 @@ workflow PREPARE_GENOME {
if (prepare_tool_index == 'star') {
if (star_index) {
if (star_index.endsWith('.tar.gz')) {
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map{ it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
} else {
ch_star_index = UNTAR_STAR_INDEX([[:], star_index]).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
}
else {
ch_star_index = Channel.value(file(star_index))
}
} else {
ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
else {
ch_star_index = STAR_GENOMEGENERATE(ch_fasta, ch_gtf).index
ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions)
}
}

emit:
fasta = ch_fasta // path: genome.fasta
fai = ch_fai // path: genome.fai
gtf = ch_gtf // path: genome.gtf
gene_bed = ch_gene_bed // path: gene.bed
chrom_sizes = ch_chrom_sizes // path: genome.sizes
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
bwa_index = ch_bwa_index // path: bwa/index/
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
chromap_index = ch_chromap_index // path: genome.index
star_index = ch_star_index // path: star/index/
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
fasta = ch_fasta // path: genome.fasta
fai = ch_fai // path: genome.fai
gtf = ch_gtf // path: genome.gtf
gene_bed = ch_gene_bed // path: gene.bed
chrom_sizes = ch_chrom_sizes // path: genome.sizes
filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed
bwa_index = ch_bwa_index // path: bwa/index/
bowtie2_index = ch_bowtie2_index // path: bowtie2/index/
chromap_index = ch_chromap_index // path: genome.index
star_index = ch_star_index // path: star/index/
versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
}

0 comments on commit b775286

Please sign in to comment.