diff --git a/conf/aws_ignite.config b/conf/aws_ignite.config new file mode 100644 index 0000000..01dc365 --- /dev/null +++ b/conf/aws_ignite.config @@ -0,0 +1,13 @@ +params { + cloud_autoscale_enabled = true + cloud_autoscale_max_instances = 2 +} + +docker.enabled = true + +cloud { + autoscale { + enabled = params.cloud_autoscale_enabled + maxInstances = params.cloud_autoscale_max_instances + } +} diff --git a/conf/awsbatch.config b/conf/awsbatch.config new file mode 100644 index 0000000..f69ffd8 --- /dev/null +++ b/conf/awsbatch.config @@ -0,0 +1,20 @@ +params { + executor = 'awsbatch' + dataLocation = 's3://lifebit-featured-datasets/pipelines/spammer-nf/input_files' + aws_batch_cliPath = '~/miniconda/bin/aws' + aws_batch_fetchInstanceType = true + aws_batch_process_queue = "nf-batch-4" + aws_batch_docker_run_options = "--ulimit nofile=65535:65535 --ulimit nproc=65535 --user 0" +} + +docker { + enabled = true + runOptions = params.aws_batch_docker_run_options +} + +process { + queue = params.aws_batch_process_queue +} + +aws.batch.cliPath = params.aws_batch_cliPath +aws.batch.fetchInstanceType = params.aws_batch_fetchInstanceType diff --git a/conf/google.config b/conf/google.config index 7597c9f..19fabb0 100644 --- a/conf/google.config +++ b/conf/google.config @@ -1,13 +1,23 @@ params { executor = 'google-lifesciences' dataLocation = 'gs://lifebit-featured-datasets/pipelines/spammer-nf/input_files' + + // Nextflow default values with correction so that all pulled from gcr.io + gls_copyImage = 'gcr.io/google.com/cloudsdktool/cloud-sdk:alpine' + gls_sshImage = 'gcr.io/cloud-genomics-pipelines/tools' + container = 'gcr.io/nextflow-250616/ubuntu:latest' } google { + // both images must be hosted in gcr.io of using private ips (gls_usePrivateAddress true) + lifeSciences.copyImage = params.gls_copyImage + lifeSciences.sshImage = params.gls_sshImage + lifeSciences.bootDiskSize = params.gls_bootDiskSize lifeSciences.preemptible = params.gls_preemptible zone = params.zone - network = params.network - subnetwork = params.subnetwork + lifeSciences.network = params.network + lifeSciences.subnetwork = params.subnetwork + lifeSciences.usePrivateAddress = params.gls_usePrivateAddress + google.lifeSciences.sshDaemon = params.gls_sshDaemon } - diff --git a/conf/qa.config b/conf/qa.config new file mode 100644 index 0000000..ea9cece --- /dev/null +++ b/conf/qa.config @@ -0,0 +1,8 @@ +params { + dataLocation = false + fileSufix = "cram" + repsProcessA = 2 + processA_cpus = 4 + processATimeRange = "30-120" + errorStrategy = 'terminate' +} diff --git a/main.nf b/main.nf index 53c64c4..1aab7cf 100644 --- a/main.nf +++ b/main.nf @@ -4,7 +4,6 @@ fileSystem = params.dataLocation.contains(':') ? params.dataLocation.split(':')[ // Header log info log.info "\nPARAMETERS SUMMARY" log.info "mainScript : ${params.mainScript}" -log.info "defaultBranch : ${params.defaultBranch}" log.info "config : ${params.config}" log.info "fileSystem : ${fileSystem}" log.info "dataLocation : ${params.dataLocation}" @@ -27,16 +26,38 @@ log.info "container : ${params.container}" log.info "maxForks : ${params.maxForks}" log.info "queueSize : ${params.queueSize}" log.info "executor : ${params.executor}" +if(params.executor == 'awsbatch') { +log.info "aws_batch_cliPath : ${params.aws_batch_cliPath}" +log.info "aws_batch_fetchInstanceType : ${params.aws_batch_fetchInstanceType}" +log.info "aws_batch_process_queue : ${params.aws_batch_process_queue}" +log.info "aws_batch_docker_run_options : ${params.aws_batch_docker_run_options}" +} +if(params.config == 'conf/aws_ignite.config') { +log.info "cloud_autoscale_enabled : ${params.cloud_autoscale_enabled}" +log.info "cloud.autoscale.enabled : cloud.autoscale.enabled" +log.info "cloud_autoscale_max_instances : ${params.cloud_autoscale_max_instances}" +log.info "cloud.autoscale.maxInstances : cloud.autoscale.maxInstances " +} +if(params.executor == 'google-lifesciences') { +log.info "gls_bootDiskSize : ${params.gls_bootDiskSize}" +log.info "gls_preemptible : ${params.gls_preemptible}" +log.info "gls_usePrivateAddress : ${params.gls_usePrivateAddress}" +log.info "zone : ${params.zone}" +log.info "network : ${params.network}" +log.info "subnetwork : ${params.subnetwork}" +log.info "lifeSciences.usePrivateAddress : ${params.gls_usePrivateAddress}" +log.info "google.lifeSciences.sshDaemon : ${params.gls_sshDaemon}" +} log.info "" numberRepetitionsForProcessA = params.repsProcessA numberFilesForProcessA = params.filesProcessA processAWriteToDiskMb = params.processAWriteToDiskMb processAInput = Channel.from([1] * numberRepetitionsForProcessA) -processAInputFiles = Channel.fromPath("${params.dataLocation}/*${params.fileSuffix}").take( numberRepetitionsForProcessA ) +processAInputFiles = Channel.fromPath("${params.dataLocation}/**${params.fileSuffix}").take( numberRepetitionsForProcessA ) process processA { - publishDir "${params.output}/${task.hash}", mode: 'copy' + publishDir "${params.output}/${task.hash}/", mode: 'copy' tag "cpus: ${task.cpus}, cloud storage: ${cloud_storage_file}" input: @@ -48,6 +69,7 @@ process processA { val x into processCInput val x into processDInput file "*.txt" + file("command-logs") optional true script: """ @@ -61,6 +83,8 @@ process processA { done; sleep \$timeToWait echo "task cpus: ${task.cpus}" + + ${params.savescript} """ } @@ -102,4 +126,3 @@ process processD { sleep \$timeToWait """ } - diff --git a/nextflow.config b/nextflow.config index f40e392..2f4eadf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -3,8 +3,10 @@ docker.enabled = true // NOTE: // Initialise the values of the params to the preferred default value or to false params { + processA_memory = '4.GB' + processA_cpus = 1 + processA_disk = '20.GB' mainScript = 'main.nf' - defaultBranch = 'stage-s3-files' config = 'conf/standard.config' dataLocation = 's3://lifebit-featured-datasets/pipelines/spammer-nf/input_files' fileSuffix = '' @@ -13,6 +15,7 @@ params { processATimeRange = "1-2" filesProcessA = 1 processATimeBetweenFileCreationInSecs = 0 + savescript = 'mkdir command-logs; cp .command.* command-logs' processBTimeRange = "2-3" processBWriteToDiskMb = 10 @@ -23,7 +26,6 @@ params { echo = false cpus = 1 - processA_cpus = 1 errorStrategy = 'ignore' container = 'quay.io/lifebitai/ubuntu:18.10' maxForks = 200 @@ -32,11 +34,21 @@ params { executor = 'ignite' // google-lifesciences + gls_usePrivateAddress = false + gls_sshDaemon = false gls_bootDiskSize = '50.GB' gls_preemptible = true zone = 'us-east1-b' network = 'default' subnetwork = 'default' + + // values defined in conf/google.config + gls_copyImage = false + gls_sshImage = false + + // ignite on aws + cloud_autoscale_enabled = false + cloud_autoscale_max_instances = 2 } // Do not update the order because the values set in params scope will not be overwritten @@ -56,7 +68,9 @@ process { errorStrategy = params.errorStrategy withName: processA { + disk = params.processA_disk cpus = params.processA_cpus + memory = params.processA_memory } } @@ -69,5 +83,4 @@ manifest { homePage = 'https://github.com/lifebit-ai/spammer-nf' description = 'A stampede of processes at your fingertips' mainScript = params.mainScript - defaultBranch = params.defaultBranch -} \ No newline at end of file +}