Merge pull request #682 from alneberg/awsbatch

Awsbatch cpu and memory config
SciLifeLab · Dec 17, 2018 · e8469c6 · e8469c6
2 parents ac53e63 + 43a380a
commit e8469c6
Show file tree

Hide file tree

Showing 8 changed files with 79 additions and 14 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 -   [#671](https://github.com/SciLifeLab/Sarek/pull/671) - publishDir modes are now params
 -   [#677](https://github.com/SciLifeLab/Sarek/pull/677) - Update docs
 -   [#679](https://github.com/SciLifeLab/Sarek/pull/679) - Update old awsbatch configuration
+-   [#682](https://github.com/SciLifeLab/Sarek/pull/682) - Specifications for memory and cpus for awsbatch
 -   [#693](https://github.com/SciLifeLab/Sarek/pull/693) - Qualimap bamQC is now ran after mapping and after recalibration for better QC
 -   [#700](https://github.com/SciLifeLab/Sarek/pull/700) - Update GATK to `4.0.9.0`
 -   [#702](https://github.com/SciLifeLab/Sarek/pull/702) - update FastQC to `0.11.8`

diff --git a/conf/aws-batch.config b/conf/aws-batch.config
@@ -10,13 +10,21 @@
 params {
   genome_base = params.genome == 'GRCh37' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37" : params.genome == 'GRCh38' ? "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38" : "s3://sarek-references/small"
   publishDirMode = 'copy'
+  singleCPUMem  = 7.GB // To make the uppmax slurm copy paste work.
+  localReportDir = 'Reports'
 }
 
 executor {
   name = 'awsbatch'
   awscli = '/home/ec2-user/miniconda/bin/aws'
 }
 
+/* Rolling files are currently not supported on s3 */
+report.file = "${params.localReportDir}/Sarek_report.html"
+timeline.file = "${params.localReportDir}/Sarek_timeline.html"
+dag.file = "${params.localReportDir}/Sarek_DAG.svg"
+trace.file = "${params.localReportDir}/Sarek_trace.txt"
+
 process {
   queue = params.awsqueue
 
@@ -26,4 +34,29 @@ process {
   cpus = 2
   memory = 8.GB
 
+  withName:RunBcftoolsStats {
+    cpus = 1
+    memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
+    // Use a tiny queue for this one, so storage doesn't run out
+    queue = params.awsqueue_tiny
+  }
+  withName:RunVcftools {
+    cpus = 1
+    memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
+    // Use a tiny queue for this one, so storage doesn't run out
+    queue = params.awsqueue_tiny
+  }
+  withName:RunHaplotypecaller {
+    cpus = 1
+    // Increase memory quadratically
+    memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
+    // Use a tiny queue for this one, so storage doesn't run out
+    queue = params.awsqueue_tiny
+  }
+  withName:RunGenotypeGVCFs {
+    cpus = 1
+    memory = {params.singleCPUMem * 2} // Memory is doubled so that it won't run two on the same instance
+    // Use a tiny queue for this one, so storage doesn't run out
+    queue = params.awsqueue_tiny
+  }
 }
diff --git a/conf/base.config b/conf/base.config
@@ -38,6 +38,8 @@ params {
   test = false // Not testing by default
   verbose = false // Enable for more verbose information
   awsqueue = false // Queue has to be provided when using awsbatch executor
+  awsqueue_tiny = params.awsqueue // A separate queue with smaller instance types
+  localReportDir = false // Used by AWS since reporting is not fully supported on s3 buckets
 }
 
 process {
@@ -67,6 +69,6 @@ dag { // Turning on dag by default
 
 trace { // Turning on trace tracking by default
   enabled = true
-  fields = 'process,task_id,hash,name,attempt,status,exit,realtime,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
+  fields = 'process,task_id,hash,name,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
   file = "${params.outDir}/Reports/Sarek_trace.txt"
 }
diff --git a/conf/resources.config b/conf/resources.config
@@ -21,25 +21,27 @@ process {
 
   withName:MapReads {
     memory = { check_max( 60.GB * task.attempt, 'memory' ) }
-    cpus = { check_max( 10, 'cpus' ) }
+    cpus = { check_max( 16, 'cpus' ) }
   } 
   withName:CreateRecalibrationTable {
-    cpus = { check_max( 12, 'cpus' ) }
-    memory = {params.singleCPUMem * 8 * task.attempt}
+    cpus = { check_max( 1, 'cpus' ) }
+    memory = { check_max( 60.GB * task.attempt, 'memory') }
   }
   withName:MarkDuplicates {
-		// Actually the -Xmx value should be kept lower
+	// Actually the -Xmx value should be kept lower,
+    // and is set through the markdup_java_options
     cpus = { check_max( 8, 'cpus' ) }
     memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   }
   withName:MergeBams {
+    cpus = { check_max( 4, 'cpus') }
     memory = {params.singleCPUMem * task.attempt}
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:RecalibrateBam {
-    cpus = { check_max( 12, 'cpus' ) }
-    memory = { check_max( 7.GB * 8 * task.attempt, 'memory' ) }
-    time = { check_max( 5.h * task.attempt, 'time' ) }
+    cpus = { check_max( 2, 'cpus' ) }
+    memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) }
+    time = { check_max( 10.h * task.attempt, 'time' ) }
   }
   withName:RunAlleleCount {
     cpus = { check_max( 1, 'cpus' ) }
@@ -49,6 +51,14 @@ process {
     cpus = { check_max( 1, 'cpus' ) }
     memory = { check_max( 14.GB * task.attempt, 'memory' ) }
   }
+  withName:RunBamQCmapped {
+    cpus = { check_max( 6, 'cpus' ) }
+    memory = { check_max( 70.GB, 'memory' ) }
+  }
+  withName:RunBamQCrecalibrated {
+    cpus = { check_max( 6, 'cpus' ) }
+    memory = { check_max( 70.GB, 'memory' ) }
+  }
   withName:RunBcftoolsStats {
     cpus = { check_max( 1, 'cpus' ) }
   }
@@ -65,13 +75,13 @@ process {
     memory = { check_max( 8.GB * task.attempt, 'memory' ) }
   }
   withName:RunHaplotypecaller {
-    cpus = { check_max( 20, 'cpus' ) }
+    cpus = { check_max( 1, 'cpus' ) }
     // Increase memory quadratically
     memory = { check_max( 7.GB * 2 * task.attempt, 'memory' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:RunGenotypeGVCFs {
-    cpus = { check_max( 20, 'cpus' ) }
+    cpus = { check_max( 1, 'cpus' ) }
     memory = { check_max( 7.GB * task.attempt, 'memory' ) }
   }
   withName:RunMultiQC {
@@ -86,20 +96,24 @@ process {
     cpus = { check_max( 2, 'cpus' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
+  withName:RunSingleManta {
+    cpus = { check_max( 20, 'cpus' ) }
+    memory = { check_max( 16.GB, 'memory') }
+  }
   withName:RunSingleStrelka {
+    cpus = { check_max( 20, 'cpus' ) }
+    memory = { check_max( 16.GB, 'memory') }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:RunSnpeff {
     cpus = { check_max( 1, 'cpus' ) }
-    errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' }
   }
   withName:RunStrelka {
     cpus = { check_max( 1, 'cpus' ) }
     time = { check_max( 5.h * task.attempt, 'time' ) }
   }
   withName:RunVEP {
-    cpus = { check_max( 1, 'cpus' ) }
+    cpus = { check_max( 16, 'cpus' ) }
     memory = {check_max (32.GB * task.attempt, 'memory' ) }
-    errorStrategy = { task.exitStatus == 143 ? 'retry' : 'ignore' }
   }
-}
+}
diff --git a/conf/uppmax-slurm.config b/conf/uppmax-slurm.config
@@ -12,6 +12,9 @@ params {
   singleCPUMem  = 7.GB // for processes that are using more memory but a single CPU only. Use the 'core' queue for these
 }
 
+// Extended set of fields, e.g. native_id, cpu and memory:
+trace.fields = 'process,task_id,hash,name,native_id,attempt,status,exit,realtime,cpus,memory,%cpu,vmem,rss,submit,start,complete,duration,realtime,rchar,wchar'
+
 process {
   clusterOptions = {"-A $params.project"}
   cpus = 16

diff --git a/docs/PARAMETERS.md b/docs/PARAMETERS.md
@@ -62,6 +62,14 @@ So you can write `--tools mutect2,ascat` or `--tools MuTect2,ASCAT` without worr
 
 Only required if you use the awsbatch profile. This parameter specifies the queue for which jobs are submitted in AWS Batch.
 
+### --awsqueue_tiny `BatchQueueName`
+
+Only used if you use the awsbatch profile. This parameter specifies a queue used for certain small jobs that might still require a significant amount of disk storage. 
+
+### --localReportDir `Directory`
+
+Only used if you use the awsbatch profile. This parameter specifies an output directory for nextflow reports, such as Sarek_timeline.html, which currently is not fully supported to store on s3. 
+
 ### --verbose
 
 Display more information about files being processed.

diff --git a/lib/SarekUtils.groovy b/lib/SarekUtils.groovy
@@ -36,6 +36,7 @@ class SarekUtils {
       'annotateTools',
       'annotateVCF',
       'awsqueue',
+      'awsqueue_tiny',
       'build',
       'call-name',
       'callName',
@@ -52,6 +53,8 @@ class SarekUtils {
       'genome',
       'genomes',
       'help',
+      'localReportDir',
+      'local-report-dir',
       'markdup_java_options',
       'max_cpus',
       'max_memory',

diff --git a/nextflow.config b/nextflow.config
@@ -67,6 +67,7 @@ profiles {
     includeConfig 'conf/igenomes.config'
     includeConfig 'conf/aws-batch.config'
     includeConfig 'conf/docker.config'
+    includeConfig 'conf/resources.config'
     includeConfig 'conf/containers.config'
   }
   // Small testing with Singularity profile