Skip to content
This repository has been archived by the owner on Jan 27, 2020. It is now read-only.

Code polishing #597

Merged
merged 5 commits into from
Jun 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions annotate.nf
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def sarekMessage() {

def startMessage() {
// Display start message
SarekUtils.sarek_ascii()
this.sarekMessage()
this.minimalInformationMessage()
}
Expand Down
1 change: 1 addition & 0 deletions buildContainers.nf
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def sarekMessage() {

def startMessage() {
// Display start message
SarekUtils.sarek_ascii()
this.sarekMessage()
this.minimalInformationMessage()
}
Expand Down
1 change: 1 addition & 0 deletions buildReferences.nf
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,7 @@ def sarekMessage() {

def startMessage() {
// Display start message
SarekUtils.sarek_ascii()
this.sarekMessage()
this.minimalInformationMessage()
}
Expand Down
106 changes: 5 additions & 101 deletions germlineVC.nf
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ directoryMap = SarekUtils.defineDirectoryMap(params.outDir)
referenceMap = defineReferenceMap()
toolList = defineToolList()

if (!checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
if (!checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'
if (!SarekUtils.checkReferenceMap(referenceMap)) exit 1, 'Missing Reference file(s), see --help for more information'
if (!SarekUtils.checkParameterList(tools,toolList)) exit 1, 'Unknown tool(s), see --help for more information'

if (params.test && params.genome in ['GRCh37', 'GRCh38']) {
referenceMap.intervals = file("$workflow.projectDir/repeats/tiny_${params.genome}.list")
Expand All @@ -88,11 +88,9 @@ else tsvPath = "${directoryMap.recalibrated}/recalibrated.tsv"
bamFiles = Channel.empty()
if (tsvPath) {
tsvFile = file(tsvPath)
bamFiles = extractBams(tsvFile)
bamFiles = SarekUtils.extractBams(tsvFile, "germline")
} else exit 1, 'No sample were defined, see --help'

(patientGenders, bamFiles) = extractGenders(bamFiles)

/*
================================================================================
= P R O C E S S E S =
Expand Down Expand Up @@ -273,9 +271,7 @@ recalTables = recalTables
[patient, sample, bam, bai, intervalBed, recalTable] }

// re-associate the BAMs and samples with the recalibration table
bamsForHC = bamsForHC
.phase(recalTables) { it[0..4] }
.map { it1, it2 -> it1 + [it2[6]] }
bamsForHC = bamsForHC.join(recalTables, by:[0,1,2,3,4])

bamsAll = bamsNormal.combine(bamsTumor)

Expand Down Expand Up @@ -643,50 +639,11 @@ process GetVersionVCFtools {
================================================================================
*/

def checkFileExtension(it, extension) {
// Check file extension
if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information"
}

def checkParameterExistence(it, list) {
// Check parameter existence
if (!list.contains(it)) {
println("Unknown parameter: ${it}")
return false
}
return true
}

def checkParameterList(list, realList) {
// Loop through all parameters to check their existence and spelling
return list.every{ checkParameterExistence(it, realList) }
}

def checkParamReturnFile(item) {
params."${item}" = params.genomes[params.genome]."${item}"
return file(params."${item}")
}

def checkReferenceMap(referenceMap) {
// Loop through all the references files to check their existence
referenceMap.every {
referenceFile, fileToCheck ->
checkRefExistence(referenceFile, fileToCheck)
}
}

def checkRefExistence(referenceFile, fileToCheck) {
if (fileToCheck instanceof List) return fileToCheck.every{ checkRefExistence(referenceFile, it) }
def f = file(fileToCheck)
// this is an expanded wildcard: we can assume all files exist
if (f instanceof List && f.size() > 0) return true
else if (!f.exists()) {
log.info "Missing references: ${referenceFile} ${fileToCheck}"
return false
}
return true
}

def checkUppmaxProject() {
// check if UPPMAX project number is specified
return !(workflow.profile == 'slurm' && !params.project)
Expand Down Expand Up @@ -720,39 +677,6 @@ def defineToolList() {
]
}

def extractBams(tsvFile) {
// Channeling the TSV file containing BAM.
// Format is: "subject gender status sample bam bai"
Channel
.from(tsvFile.readLines())
.map{line ->
def list = returnTSV(line.split(),6)
def idPatient = list[0]
def gender = list[1]
def status = returnStatus(list[2].toInteger())
def idSample = list[3]
def bamFile = returnFile(list[4])
def baiFile = returnFile(list[5])

checkFileExtension(bamFile,".bam")
checkFileExtension(baiFile,".bai")

[ idPatient, gender, status, idSample, bamFile, baiFile ]
}
}

def extractGenders(channel) {
def genders = [:] // an empty map
channel = channel.map{ it ->
def idPatient = it[0]
def gender = it[1]
genders[idPatient] = gender

[idPatient] + it[2..-1]
}
[genders, channel]
}

def generateIntervalsForVC(bams, intervals) {
def (bamsNew, bamsForVC) = bams.into(2)
def (intervalsNew, vcIntervals) = intervals.into(2)
Expand Down Expand Up @@ -826,34 +750,14 @@ def nextflowMessage() {
log.info "N E X T F L O W ~ version ${workflow.nextflow.version} ${workflow.nextflow.build}"
}

def returnFile(it) {
// return file if it exists
if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
return file(it)
}

def returnStatus(it) {
// Return status if it's correct
// Status should be only 0 or 1
// 0 being normal
// 1 being tumor (or relapse or anything that is not normal...)
if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
return it
}

def returnTSV(it, number) {
// return TSV if it has the correct number of items in row
if (it.size() != number) exit 1, "Malformed row in TSV file: ${it}, see --help for more information"
return it
}

def sarekMessage() {
// Display Sarek message
log.info "Sarek - Workflow For Somatic And Germline Variations ~ ${params.version} - " + this.grabRevision() + (workflow.commitId ? " [${workflow.commitId}]" : "")
}

def startMessage() {
// Display start message
SarekUtils.sarek_ascii()
this.sarekMessage()
this.minimalInformationMessage()
}
Expand Down
158 changes: 126 additions & 32 deletions lib/SarekUtils.groovy
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
class MyUtils {
// Check if params is in this given list
import static nextflow.Nextflow.file
import nextflow.Channel

class SarekUtils {

// Check file extension
static def checkFileExtension(it, extension) {
if (!it.toString().toLowerCase().endsWith(extension.toLowerCase())) exit 1, "File: ${it} has the wrong extension: ${extension} see --help for more information"
}

// Check parameter existence
static def checkParameterExistence(it, list) {
if (!list.contains(it)) {
println("Unknown parameter: ${it}")
return false
}
return true
}

// Compare each parameter with a list of parameters
static def checkParameterList(list, realList) {
return list.every{ checkParameterExistence(it, realList) }
}

// Return element in list of allowed params
static def checkParams(it) {
return it in [
'annotate-tools',
Expand Down Expand Up @@ -68,15 +91,22 @@ class MyUtils {
'version']
}

// Loop through all parameters to check their existence and spelling
static def checkParameterList(list, realList) {
return list.every{ checkParameterExistence(it, realList) }
// Loop through all the references files to check their existence
static def checkReferenceMap(referenceMap) {
referenceMap.every {
referenceFile, fileToCheck ->
SarekUtils.checkRefExistence(referenceFile, fileToCheck)
}
}

// Check parameter existence
static def checkParameterExistence(it, list) {
if (!list.contains(it)) {
println("Unknown parameter: ${it}")
// Loop through all the references files to check their existence
static def checkRefExistence(referenceFile, fileToCheck) {
if (fileToCheck instanceof List) return fileToCheck.every{ SarekUtils.checkRefExistence(referenceFile, it) }
def f = file(fileToCheck)
// this is an expanded wildcard: we can assume all files exist
if (f instanceof List && f.size() > 0) return true
else if (!f.exists()) {
this.log.info "Missing references: ${referenceFile} ${fileToCheck}"
return false
}
return true
Expand All @@ -85,32 +115,66 @@ class MyUtils {
// Define map of directories
static def defineDirectoryMap(outDir) {
return [
'nonRealigned' : "${outDir}/Preprocessing/NonRealigned",
'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated",
'recalibrated' : "${outDir}/Preprocessing/Recalibrated",
'ascat' : "${outDir}/VariantCalling/Ascat",
'freebayes' : "${outDir}/VariantCalling/FreeBayes",
'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${outDir}/VariantCalling/Manta",
'mutect1' : "${outDir}/VariantCalling/MuTect1",
'mutect2' : "${outDir}/VariantCalling/MuTect2",
'strelka' : "${outDir}/VariantCalling/Strelka",
'strelkabp' : "${outDir}/VariantCalling/StrelkaBP",
'snpeff' : "${outDir}/Annotation/SnpEff",
'vep' : "${outDir}/Annotation/VEP",
'bamQC' : "${outDir}/Reports/bamQC",
'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats",
'fastQC' : "${outDir}/Reports/FastQC",
'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
'multiQC' : "${outDir}/Reports/MultiQC",
'samtoolsStats' : "${outDir}/Reports/SamToolsStats",
'snpeffReports' : "${outDir}/Reports/SnpEff",
'vcftools' : "${outDir}/Reports/VCFTools",
'version' : "${outDir}/Reports/ToolsVersion"
'nonRealigned' : "${outDir}/Preprocessing/NonRealigned",
'nonRecalibrated' : "${outDir}/Preprocessing/NonRecalibrated",
'recalibrated' : "${outDir}/Preprocessing/Recalibrated",
'ascat' : "${outDir}/VariantCalling/Ascat",
'freebayes' : "${outDir}/VariantCalling/FreeBayes",
'gvcf-hc' : "${outDir}/VariantCalling/HaplotypeCallerGVCF",
'haplotypecaller' : "${outDir}/VariantCalling/HaplotypeCaller",
'manta' : "${outDir}/VariantCalling/Manta",
'mutect1' : "${outDir}/VariantCalling/MuTect1",
'mutect2' : "${outDir}/VariantCalling/MuTect2",
'strelka' : "${outDir}/VariantCalling/Strelka",
'strelkabp' : "${outDir}/VariantCalling/StrelkaBP",
'snpeff' : "${outDir}/Annotation/SnpEff",
'vep' : "${outDir}/Annotation/VEP",
'bamQC' : "${outDir}/Reports/bamQC",
'bcftoolsStats' : "${outDir}/Reports/BCFToolsStats",
'fastQC' : "${outDir}/Reports/FastQC",
'markDuplicatesQC' : "${outDir}/Reports/MarkDuplicates",
'multiQC' : "${outDir}/Reports/MultiQC",
'samtoolsStats' : "${outDir}/Reports/SamToolsStats",
'snpeffReports' : "${outDir}/Reports/SnpEff",
'vcftools' : "${outDir}/Reports/VCFTools",
'version' : "${outDir}/Reports/ToolsVersion"
]
}

// Channeling the TSV file containing BAM.
// Format is: "subject gender status sample bam bai"
static def extractBams(tsvFile, mode) {
Channel
.from(tsvFile.readLines())
.map{line ->
def list = SarekUtils.returnTSV(line.split(),6)
def idPatient = list[0]
def gender = list[1]
def status = SarekUtils.returnStatus(list[2].toInteger())
def idSample = list[3]
def bamFile = SarekUtils.returnFile(list[4])
def baiFile = SarekUtils.returnFile(list[5])

SarekUtils.checkFileExtension(bamFile,".bam")
SarekUtils.checkFileExtension(baiFile,".bai")

if (mode == "germline") return [ idPatient, status, idSample, bamFile, baiFile ]
else return [ idPatient, gender, status, idSample, bamFile, baiFile ]
}
}

// Extract gender from Channel as it's only used for CNVs
static def extractGenders(channel) {
def genders = [:]
channel = channel.map{ it ->
def idPatient = it[0]
def gender = it[1]
genders[idPatient] = gender
[idPatient] + it[2..-1]
}
[genders, channel]
}

// Compare params to list of verified params
static def isAllowedParams(params) {
final test = true
Expand All @@ -122,4 +186,34 @@ class MyUtils {
}
return test
}

// Return file if it exists
static def returnFile(it) {
if (!file(it).exists()) exit 1, "Missing file in TSV file: ${it}, see --help for more information"
return file(it)
}

// Return status [0,1]
// 0 == Normal, 1 == Tumor
static def returnStatus(it) {
if (!(it in [0, 1])) exit 1, "Status is not recognized in TSV file: ${it}, see --help for more information"
return it
}

// Return TSV if it has the correct number of items in row
static def returnTSV(it, number) {
if (it.size() != number) exit 1, "Malformed row in TSV file: ${it}, see --help for more information"
return it
}

// Sarek ascii art
static def sarek_ascii() {
println " ____ _____ _ "
println " .' _ `. / ____| | | "
println " / |\\`-_ \\ | (___ __ _ _ __ ___| | __ "
println "| | \\ `-| \\___ \\ / _` | '__/ __| |/ / "
println " \\ | \\ / ____) | (_| | | | __| < "
println " `|____\\' |_____/ \\__,_|_| \\___|_|\\_\\ "
}

}
Loading