Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Bundling worker scripts into zip (#212)
Browse files Browse the repository at this point in the history
* Added zip file extraction

* Fixed cluster setup

* Added cluster script and quiet zip function

* Replaced url name with correct zip file name

* Removed startup folder name

* Added apt-get install on job prep

* Fixed branch names
  • Loading branch information
brnleehng authored Feb 20, 2018
1 parent b5b01cd commit 9bba37f
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 36 deletions.
1 change: 1 addition & 0 deletions R/cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ makeCluster <-
config$containerImage <- dockerImage
installAndStartContainerCommand <- "cluster_setup.sh"

# Note: Revert it to master once PR is approved
dockerInstallCommand <- c(
paste0(
"wget https://raw.githubusercontent.com/Azure/doAzureParallel/",
Expand Down
48 changes: 13 additions & 35 deletions R/doAzureParallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ setHttpTraffic <- function(value = FALSE) {

retryCounter <- 0
maxRetryCount <- 5
startupFolderName <- "startup"

repeat {
if (retryCounter > maxRetryCount) {
stop(
Expand Down Expand Up @@ -395,23 +395,17 @@ setHttpTraffic <- function(value = FALSE) {
stop(paste0("Error creating job: ", containerContent$message$value))
}

nodeScriptsDir <- system.file("startup", package = "doAzureParallel")
nodeScriptsFiles <- list.files(nodeScriptsDir, full.names = TRUE)

nodeScriptsZip <- "node_scripts.zip"
# Zip Flags: Keeping console output clean and removing junk paths
utils::zip(nodeScriptsZip, files = nodeScriptsFiles, extras = "-j -q")

# Uploading common job files for the worker node
rAzureBatch::uploadBlob(id,
system.file(startupFolderName, "worker.R", package = "doAzureParallel"))
rAzureBatch::uploadBlob(id,
system.file(startupFolderName, "merger.R", package = "doAzureParallel"))
rAzureBatch::uploadBlob(
id,
system.file(startupFolderName, "install_github.R", package = "doAzureParallel")
)
rAzureBatch::uploadBlob(
id,
system.file(startupFolderName, "install_cran.R", package = "doAzureParallel")
)
rAzureBatch::uploadBlob(
id,
system.file(startupFolderName, "install_bioconductor.R", package = "doAzureParallel")
)
nodeScriptsZip)
file.remove(nodeScriptsZip)

# Creating common job environment for all tasks
jobFileName <- paste0(id, ".rds")
Expand All @@ -421,28 +415,13 @@ setHttpTraffic <- function(value = FALSE) {

# Creating read-only SAS token blob resource file urls
sasToken <- rAzureBatch::createSasToken("r", "c", id)
workerScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken)
mergerScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken)
installGithubScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name,
id,
"install_github.R",
sasToken)
installCranScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken)
installBioConductorScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken)
nodeScriptsZipUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, nodeScriptsZip, sasToken)
jobCommonFileUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken)

requiredJobResourceFiles <- list(
rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"),
rAzureBatch::createResourceFile(url = mergerScriptUrl, fileName = "merger.R"),
rAzureBatch::createResourceFile(url = installGithubScriptUrl, fileName = "install_github.R"),
rAzureBatch::createResourceFile(url = installCranScriptUrl, fileName = "install_cran.R"),
rAzureBatch::createResourceFile(url = installBioConductorScriptUrl, fileName = "install_bioconductor.R"),
rAzureBatch::createResourceFile(url = nodeScriptsZipUrl, fileName = nodeScriptsZip),
rAzureBatch::createResourceFile(url = jobCommonFileUrl, fileName = jobFileName)
)

Expand Down Expand Up @@ -495,7 +474,6 @@ setHttpTraffic <- function(value = FALSE) {
}
}


job <- rAzureBatch::getJob(id)

printJobInformation(
Expand Down
8 changes: 7 additions & 1 deletion R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,13 @@
poolInfo <- list("poolId" = poolId)

# Default command for job preparation task
commands <- c("ls")
# Supports backwards compatibility if zip packages are missing, it will be installed
# Eventually, apt-get install command will be deprecated
commands <- c(
"apt-get -y install zip unzip",
"unzip -j $AZ_BATCH_JOB_PREP_WORKING_DIR/node_scripts.zip"
)

if (!is.null(packages)) {
jobPackages <-
dockerRunCommand(containerImage,
Expand Down
3 changes: 3 additions & 0 deletions inst/startup/cluster_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(
apt-get -y update
apt-get -y install docker-ce

# Unzip resource files and set permissions
apt-get -y install zip unzip

# Check docker is running
docker info > /dev/null 2>&1
if [ $? -ne 0 ]; then
Expand Down

0 comments on commit 9bba37f

Please sign in to comment.