Fixed job creation (#138)

brnleehng · web-flow · commit 169e75f9b4bb · 2017-10-19T09:47:18.000-07:00
diff --git a/R/doAzureParallel.R b/R/doAzureParallel.R
@@ -253,174 +253,183 @@ setHttpTraffic <- function(value = FALSE) {
   assign("enableCloudCombine", enableCloudCombine, envir = .doAzureBatchGlobals)
   assign("cloudCombine", cloudCombine, envir = .doAzureBatchGlobals)
 
-  retryCounter <- 0
-  maxRetryCount <- 5
-  startupFolderName <- "startup"
-  containerResponse <- NULL
-  jobquotaReachedResponse <- NULL
-  while (retryCounter < maxRetryCount) {
-    sprintf("job id is: %s", id)
-    # try to submit the job. We may run into naming conflicts. If so, try again
-    tryCatch({
-      retryCounter <- retryCounter + 1
-
-      response <- rAzureBatch::createContainer(id, content = "text")
-      if (grepl("ContainerAlreadyExists", response)) {
-        if (!is.null(obj$options$azure$job)) {
-          containerResponse <- grepl("ContainerAlreadyExists", response)
-          break
-
-        }
+  resourceFiles <- list()
+  if (!is.null(obj$options$azure$resourceFiles)) {
+    resourceFiles <- obj$options$azure$resourceFiles
+  }
 
-        stop("Container already exists. Multiple jobs may possibly be running.")
-      }
+  if (!is.null(obj$options$azure$resourcefiles)) {
+    resourceFiles <- obj$options$azure$resourcefiles
+  }
 
-      rAzureBatch::uploadBlob(id,
-                              system.file(startupFolderName, "worker.R", package = "doAzureParallel"))
-      rAzureBatch::uploadBlob(id,
-                              system.file(startupFolderName, "merger.R", package = "doAzureParallel"))
-      rAzureBatch::uploadBlob(
-        id,
-        system.file(startupFolderName, "install_github.R", package = "doAzureParallel")
-      )
-      rAzureBatch::uploadBlob(
-        id,
-        system.file(startupFolderName, "install_cran.R", package = "doAzureParallel")
-      )
-      rAzureBatch::uploadBlob(
-        id,
-        system.file(startupFolderName, "install_bioconductor.R", package = "doAzureParallel")
-      )
+  enableCloudCombineKeyValuePair <-
+    list(name = "enableCloudCombine", value = as.character(enableCloudCombine))
 
-      # Setting up common job environment for all tasks
-      jobFileName <- paste0(id, ".rds")
-      saveRDS(.doAzureBatchGlobals, file = jobFileName)
+  chunkSize <- 1
 
-      rAzureBatch::uploadBlob(id, paste0(getwd(), "/", jobFileName))
+  if (!is.null(obj$options$azure$chunkSize)) {
+    chunkSize <- obj$options$azure$chunkSize
+  }
 
-      file.remove(jobFileName)
+  if (!is.null(obj$options$azure$chunksize)) {
+    chunkSize <- obj$options$azure$chunksize
+  }
 
-      resourceFiles <- list()
-      if (!is.null(obj$options$azure$resourceFiles)) {
-        resourceFiles <- obj$options$azure$resourceFiles
-      }
+  if (exists("chunkSize", envir = .doAzureBatchGlobals)) {
+    chunkSize <- get("chunkSize", envir = .doAzureBatchGlobals)
+  }
 
-      if (!is.null(obj$options$azure$resourcefiles)) {
-        resourceFiles <- obj$options$azure$resourcefiles
-      }
+  chunkSizeKeyValuePair <-
+    list(name = "chunkSize", value = as.character(chunkSize))
+
+  if (is.null(obj$packages)) {
+    metadata <-
+      list(enableCloudCombineKeyValuePair, chunkSizeKeyValuePair)
+  } else {
+    packagesKeyValuePair <-
+      list(name = "packages",
+           value = paste(obj$packages, collapse = ";"))
+
+    metadata <-
+      list(enableCloudCombineKeyValuePair,
+           chunkSizeKeyValuePair,
+           packagesKeyValuePair)
+  }
 
-      sasToken <- rAzureBatch::createSasToken("r", "c", id)
-      workerScriptUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken)
-      mergerScriptUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken)
-      installGithubScriptUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name,
-                                   id,
-                                   "install_github.R",
-                                   sasToken)
-      installCranScriptUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken)
-        installBioConductorScriptUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken)
-      jobCommonFileUrl <-
-        rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken)
-
-      requiredJobResourceFiles <- list(
-        rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"),
-        rAzureBatch::createResourceFile(url = mergerScriptUrl, fileName = "merger.R"),
-        rAzureBatch::createResourceFile(url = installGithubScriptUrl, fileName = "install_github.R"),
-        rAzureBatch::createResourceFile(url = installCranScriptUrl, fileName = "install_cran.R"),
-        rAzureBatch::createResourceFile(url = installBioConductorScriptUrl, fileName = "install_bioconductor.R"),
-        rAzureBatch::createResourceFile(url = jobCommonFileUrl, fileName = jobFileName)
+  retryCounter <- 0
+  maxRetryCount <- 5
+  startupFolderName <- "startup"
+  repeat {
+    if (retryCounter > maxRetryCount) {
+      stop(
+        sprintf(
+          "Error creating job: Maximum number of retries (%d) exceeded",
+          maxRetryCount
+        )
       )
+    }
+    else {
+      retryCounter <- retryCounter + 1
+    }
 
-      # We need to merge any files passed by the calling lib with the resource files specified here
-
-      resourceFiles <-
-        append(resourceFiles, requiredJobResourceFiles)
-
-      enableCloudCombineKeyValuePair <-
-        list(name = "enableCloudCombine", value = as.character(enableCloudCombine))
+    containerResponse <- rAzureBatch::createContainer(id, content = "response")
 
-      chunkSize <- 1
+    if (containerResponse$status_code >= 400 && containerResponse$status_code <= 499) {
+      containerContent <- xml2::as_list(httr::content(containerResponse))
 
-      if (!is.null(obj$options$azure$chunkSize)) {
-        chunkSize <- obj$options$azure$chunkSize
+      if (!is.null(obj$options$azure$job) && containerContent$Code[[1]] == "ContainerAlreadyExists") {
+        stop(paste("Error creating job: Job's storage container already exists for an unique job id.",
+                 "Either delete the storage container or change the job argument in the foreach."))
       }
 
-      if (!is.null(obj$options$azure$chunksize)) {
-        chunkSize <- obj$options$azure$chunksize
-      }
+      Sys.sleep(retryCounter * retryCounter)
 
-      if (exists("chunkSize", envir = .doAzureBatchGlobals)) {
-        chunkSize <- get("chunkSize", envir = .doAzureBatchGlobals)
-      }
+      time <- format(Sys.time(), "%Y%m%d%H%M%S", tz = "GMT")
+      id <-  sprintf("%s%s",
+                     "job",
+                     time)
+      next
+    }
+    else if (containerResponse$status_code >= 500 && containerResponse$status_code <= 599) {
+      containerContent <- xml2::as_list(httr::content(containerResponse))
+      stop(paste0("Error creating job: ", containerContent$message$value))
+    }
 
-      chunkSizeKeyValuePair <-
-        list(name = "chunkSize", value = as.character(chunkSize))
-
-      if (is.null(obj$packages)) {
-        metadata <-
-          list(enableCloudCombineKeyValuePair, chunkSizeKeyValuePair)
-      } else {
-        packagesKeyValuePair <-
-          list(name = "packages",
-               value = paste(obj$packages, collapse = ";"))
-
-        metadata <-
-          list(enableCloudCombineKeyValuePair,
-               chunkSizeKeyValuePair,
-               packagesKeyValuePair)
-      }
+    # Uploading common job files for the worker node
+    rAzureBatch::uploadBlob(id,
+                            system.file(startupFolderName, "worker.R", package = "doAzureParallel"))
+    rAzureBatch::uploadBlob(id,
+                            system.file(startupFolderName, "merger.R", package = "doAzureParallel"))
+    rAzureBatch::uploadBlob(
+      id,
+      system.file(startupFolderName, "install_github.R", package = "doAzureParallel")
+    )
+    rAzureBatch::uploadBlob(
+      id,
+      system.file(startupFolderName, "install_cran.R", package = "doAzureParallel")
+    )
+    rAzureBatch::uploadBlob(
+      id,
+      system.file(startupFolderName, "install_bioconductor.R", package = "doAzureParallel")
+    )
 
-      response <- .addJob(
-        jobId = id,
-        poolId = data$poolId,
-        resourceFiles = resourceFiles,
-        metadata = metadata,
-        packages = obj$packages
-      )
+    # Creating common job environment for all tasks
+    jobFileName <- paste0(id, ".rds")
+    saveRDS(.doAzureBatchGlobals, file = jobFileName)
+    rAzureBatch::uploadBlob(id, paste0(getwd(), "/", jobFileName))
+    file.remove(jobFileName)
+
+    # Creating read-only SAS token blob resource file urls
+    sasToken <- rAzureBatch::createSasToken("r", "c", id)
+    workerScriptUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken)
+    mergerScriptUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken)
+    installGithubScriptUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name,
+                                 id,
+                                 "install_github.R",
+                                 sasToken)
+    installCranScriptUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken)
+    installBioConductorScriptUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken)
+    jobCommonFileUrl <-
+      rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken)
+
+    requiredJobResourceFiles <- list(
+      rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"),
+      rAzureBatch::createResourceFile(url = mergerScriptUrl, fileName = "merger.R"),
+      rAzureBatch::createResourceFile(url = installGithubScriptUrl, fileName = "install_github.R"),
+      rAzureBatch::createResourceFile(url = installCranScriptUrl, fileName = "install_cran.R"),
+      rAzureBatch::createResourceFile(url = installBioConductorScriptUrl, fileName = "install_bioconductor.R"),
+      rAzureBatch::createResourceFile(url = jobCommonFileUrl, fileName = jobFileName)
+    )
 
-      if (grepl("ActiveJobAndScheduleQuotaReached", response)) {
-        jobquotaReachedResponse <-
-          grepl("ActiveJobAndScheduleQuotaReached", response)
-      }
+    resourceFiles <-
+      append(resourceFiles, requiredJobResourceFiles)
 
-      if (grepl("JobExists", response)) {
-        stop("The specified job already exists.")
-      }
+    response <- .addJob(
+      jobId = id,
+      poolId = data$poolId,
+      resourceFiles = resourceFiles,
+      metadata = metadata,
+      packages = obj$packages
+    )
 
+    if (response$status_code == 201) {
       break
+    }
+    else {
+      jobContent <- httr::content(response, content = "parsed")
 
-    },
-    error = function(e) {
-      if (retryCounter == maxRetryCount) {
-        cat(sprintf("Error creating job: %s\n",
-                    conditionMessage(e)))
+      if (jobContent$code == "JobExists" && !is.null(obj$options$azure$job)) {
+        stop(paste("Error in creating job: Job already exists with the unique job id.",
+                   "Either delete the job or change the job argument in the foreach loop.",
+                   jobContent$message$value))
+      }
+      else if (jobContent$code == "JobExists") {
+        Sys.sleep(retryCounter * retryCounter)
+
+        time <- format(Sys.time(), "%Y%m%d%H%M%S", tz = "GMT")
+        id <-  sprintf("%s%s",
+                       "job",
+                       time)
+        next
       }
 
-      print(e)
-      time <- format(Sys.time(), "%Y%m%d%H%M%S", tz = "GMT")
-      id <-  sprintf("%s%s",
-                     "job",
-                     time)
-    })
-  }
-
-  if (!is.null(containerResponse)) {
-    stop(
-      "Aborted mission. The container has already exist with user's specific job id. Please use a different job id."
-    )
-  }
+      if (jobContent$code == "ActiveJobAndScheduleQuotaReached") {
+        stop(
+          paste(
+            "Error in creating job: Your active job quota has been reached.",
+            "To increase your active job quota,",
+            "go to https://docs.microsoft.com/en-us/azure/batch/batch-quota-limit"
+          )
+        )
+      }
 
-  if (!is.null(jobquotaReachedResponse)) {
-    stop(
-      paste0(
-        "Aborted mission. Your active job quota has been reached. To increase your active job quota, ",
-        "go to https://docs.microsoft.com/en-us/azure/batch/batch-quota-limit"
-      )
-    )
+      stop("Error in creating job: ", jobContent$message$value)
+    }
   }
 
   cat("Job Summary: ", fill = TRUE)
diff --git a/R/helpers.R b/R/helpers.R
@@ -169,7 +169,7 @@
     poolInfo = poolInfo,
     jobPreparationTask = jobPreparationTask,
     usesTaskDependencies = usesTaskDependencies,
-    content = "text",
+    content = "response",
     metadata = metadata
   )
 

Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,7 @@`
`169`	`169`	`poolInfo = poolInfo,`
`170`	`170`	`jobPreparationTask = jobPreparationTask,`
`171`	`171`	`usesTaskDependencies = usesTaskDependencies,`
`172`		`- content = "text",`
	`172`	`+ content = "response",`
`173`	`173`	`metadata = metadata`
`174`	`174`	`)`
`175`	`175`