Skip to content

Commit

Permalink
Make BBSPLIT indexing behave itself (#5005)
Browse files Browse the repository at this point in the history
  • Loading branch information
pinin4fjords authored Feb 28, 2024
1 parent 8f4a5d5 commit dcfa9a1
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 33 deletions.
83 changes: 51 additions & 32 deletions modules/nf-core/bbmap/bbsplit/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ process BBMAP_BBSPLIT {
tuple val(meta), path('*primary*fastq.gz'), optional:true, emit: primary_fastq
tuple val(meta), path('*fastq.gz') , optional:true, emit: all_fastq
tuple val(meta), path('*txt') , optional:true, emit: stats
tuple val(meta), path('*.log') , optional:true, emit: log
path "versions.yml" , emit: versions

when:
Expand All @@ -40,51 +41,69 @@ process BBMAP_BBSPLIT {
other_ref_names.eachWithIndex { name, index ->
other_refs << "ref_${name}=${other_ref_paths[index]}"
}

def fastq_in=''
def fastq_out=''
def index_files=''
def refstats_cmd=''

if (only_build_index) {
println("only building index")
if (primary_ref && other_ref_names && other_ref_paths) {
"""
bbsplit.sh \\
-Xmx${avail_mem}M \\
ref_primary=$primary_ref \\
${other_refs.join(' ')} \\
path=bbsplit \\
threads=$task.cpus \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
END_VERSIONS
"""
index_files = 'ref_primary=' +primary_ref + ' ' + other_refs.join(' ') + ' path=bbsplit'
} else {
log.error 'ERROR: Please specify as input a primary fasta file along with names and paths to non-primary fasta files.'
}
} else {
def index_files = ''
index_files = ''
if (index) {
index_files = "path=$index"
} else if (primary_ref && other_ref_names && other_ref_paths) {
index_files = "ref_primary=${primary_ref} ${other_refs.join(' ')}"
} else {
log.error 'ERROR: Please either specify a BBSplit index as input or a primary fasta file along with names and paths to non-primary fasta files.'
}
def fastq_in = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}"
def fastq_out = meta.single_end ? "basename=${prefix}_%.fastq.gz" : "basename=${prefix}_%_#.fastq.gz"
"""
bbsplit.sh \\
-Xmx${avail_mem}M \\
$index_files \\
threads=$task.cpus \\
$fastq_in \\
$fastq_out \\
refstats=${prefix}.stats.txt \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
END_VERSIONS
"""
fastq_in = meta.single_end ? "in=${reads}" : "in=${reads[0]} in2=${reads[1]}"
fastq_out = meta.single_end ? "basename=${prefix}_%.fastq.gz" : "basename=${prefix}_%_#.fastq.gz"
refstats_cmd = 'refstats=' + prefix + '.stats.txt'
}
"""
# When we stage in the index files the time stamps get disturbed, which
# bbsplit doesn't like. Fix the time stamps in its summaries. This needs to
# be done via Java to match what bbmap does
if [ $index ]; then
for summary_file in \$(find $index/ref/genome -name summary.txt); do
src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|')
mod=\$(echo "System.out.println(java.nio.file.Files.getLastModifiedTime(java.nio.file.Paths.get(\\"\$src\\")).toMillis());" | jshell -J-Djdk.lang.Process.launchMechanism=vfork -)
sed "s|^last modified.*|last modified\\t\$mod|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file}
done
fi
# Run BBSplit
bbsplit.sh \\
-Xmx${avail_mem}M \\
$index_files \\
threads=$task.cpus \\
$fastq_in \\
$fastq_out \\
$refstats_cmd \\
$args 2> >(tee ${prefix}.log >&2)
# Summary files will have an absolute path that will make the index
# impossible to use in other processes- we can fix that
for summary_file in \$(find bbsplit/ref/genome -name summary.txt); do
src=\$(grep '^source' "\$summary_file" | cut -f2- -d\$'\\t' | sed 's|.*/bbsplit|bbsplit|')
sed "s|^source.*|source\\t\$src|" "\$summary_file" > \${summary_file}.tmp && mv \${summary_file}.tmp \${summary_file}
done
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bbmap: \$(bbversion.sh | grep -v "Duplicate cpuset")
END_VERSIONS
"""

}
3 changes: 2 additions & 1 deletion modules/nf-core/bbmap/bbsplit/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ nextflow_process {
[ 'human' ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22/sequence/chr22_23800000-23980000.fa', checkIfExists: true)
])
input[4] = true
input[4] = false
"""
}
}
Expand Down Expand Up @@ -81,6 +81,7 @@ nextflow_process {

assertAll(
{ assert process.success },
{ assert path(process.out.log[0][1]).text.contains("If you wish to regenerate the index") },
{ assert snapshot(filteredFiles).match("bbsplit_index_filtered_files")},
{ assert filesExist : "One or more files to exclude do not exist" },
{ assert snapshot(process.out.versions).match("versions")}
Expand Down

0 comments on commit dcfa9a1

Please sign in to comment.