Skip to content
This repository was archived by the owner on Oct 12, 2023. It is now read-only.

Commit 7e8154e

Browse files
committed
merge from master to feature/longrunjob
2 parents 11f6484 + 6e78b09 commit 7e8154e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1865
-1038
lines changed

.lintr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
exclusions: list("R/validators.R")
1+
exclusions: list("R/validationUtilities.R")

CHANGELOG.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,26 @@
11
# Change Log
2-
## [0.5.2] 2017-10-30
2+
## [0.6.1] 2017-11-10
33
### Added
44
- Support for users to delete job and terminate job
55
### Changed
66
- Add retry to get job result
77
- Add errorHandling and wait option to job metadata
88
- Save job metadata to job result storage blob
99

10+
## [0.6.0] 2017-11-03
11+
### Added
12+
- Support for users to run custom versions of R via Docker containers
13+
- GitHub and BioConductor support as parameters in the foreach
14+
15+
### Changed
16+
- [BREAKING CHANGE] Host OS distribution is now Debian instead of CentOS
17+
- [BREAKING CHANGE] Command line no longer updates the environment of R
18+
- [BREAKING CHANGE] Default version of R changed from MRO 3.3.2 to latest version of CRAN R
19+
20+
### Fixed
21+
- Packages installed in foreach are only present and visible to a single foreach loop and then deleted from the cluster
22+
- Linux clients would get stuck waiting for the job to finish when using the .packages() option in the foreach loop
23+
1024
## [0.5.1] 2017-09-28
1125
### Added
1226
- Support for users to get job and job results for long running job

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: doAzureParallel
22
Type: Package
33
Title: doAzureParallel
4-
Version: 0.5.0
4+
Version: 0.6.1
55
Author: Brian Hoang
66
Maintainer: Brian Hoang <brhoan@microsoft.com>
77
Description: The project is for data experts who use R at scale. The project
@@ -17,7 +17,7 @@ Depends:
1717
foreach (>= 1.4.3),
1818
iterators (>= 1.0.8)
1919
Imports:
20-
rAzureBatch (>= 0.5.1),
20+
rAzureBatch (>= 0.5.3),
2121
jsonlite,
2222
rjson,
2323
xml2,
@@ -27,5 +27,5 @@ Suggests:
2727
caret,
2828
plyr,
2929
lintr
30-
Remotes: Azure/rAzureBatch@v0.5.1
30+
Remotes: Azure/rAzureBatch@v0.5.3
3131
RoxygenNote: 6.0.1

R/cluster.R

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,11 @@ generateClusterConfig <- function(fileName) {
8686
max = 3),
8787
autoscaleFormula = "QUEUE"
8888
),
89+
containerImage = "rocker/tidyverse:latest",
8990
rPackages = list(
9091
cran = vector(),
9192
github = vector(),
93+
bioconductor = vector(),
9294
githubAuthenticationToken = ""
9395
),
9496
commandLine = vector()
@@ -143,6 +145,7 @@ makeCluster <-
143145

144146
installCranCommand <- NULL
145147
installGithubCommand <- NULL
148+
installBioconductorCommand <- NULL
146149

147150
if (!is.null(poolConfig$rPackages) &&
148151
!is.null(poolConfig$rPackages$cran) &&
@@ -158,21 +161,62 @@ makeCluster <-
158161
getPoolPackageInstallationCommand("github", poolConfig$rPackages$github)
159162
}
160163

161-
packages <- NULL
162-
if (!is.null(installCranCommand)) {
163-
packages <- installCranCommand
164+
if (!is.null(poolConfig$rPackages) &&
165+
!is.null(poolConfig$rPackages$bioconductor) &&
166+
length(poolConfig$rPackages$bioconductor) > 0) {
167+
installBioconductorCommand <-
168+
getPoolPackageInstallationCommand("bioconductor", poolConfig$rPackages$bioconductor)
164169
}
165170

166-
if (!is.null(installGithubCommand) && is.null(packages)) {
167-
packages <- installGithubCommand
171+
packages <- c()
172+
if (!is.null(installCranCommand)) {
173+
packages <- c(installCranCommand, packages)
174+
}
175+
if (!is.null(installGithubCommand)) {
176+
packages <- c(installGithubCommand, packages)
177+
}
178+
if (!is.null(installBioconductorCommand)) {
179+
packages <- c(installBioconductorCommand, packages)
168180
}
169-
else if (!is.null(installGithubCommand) && !is.null(packages)) {
170-
packages <- c(installCranCommand, installGithubCommand)
181+
182+
if (length(packages) == 0) {
183+
packages <- NULL
171184
}
172185

173186
commandLine <- NULL
187+
188+
# install docker and create docker container
189+
dockerImage <- "rocker/tidyverse:latest"
190+
if (!is.null(poolConfig$containerImage)) {
191+
dockerImage <- poolConfig$containerImage
192+
}
193+
194+
config$containerImage <- dockerImage
195+
installAndStartContainerCommand <- paste("cluster_setup.sh",
196+
dockerImage,
197+
sep = " ")
198+
199+
containerInstallCommand <- c(
200+
paste0(
201+
"wget https://raw.githubusercontent.com/Azure/doAzureParallel/",
202+
"master/inst/startup/cluster_setup.sh"),
203+
"chmod u+x cluster_setup.sh",
204+
paste0(
205+
"wget https://raw.githubusercontent.com/Azure/doAzureParallel/",
206+
"master/inst/startup/install_bioconductor.R"),
207+
"chmod u+x install_bioconductor.R",
208+
installAndStartContainerCommand
209+
)
210+
174211
if (!is.null(poolConfig$commandLine)) {
175-
commandLine <- poolConfig$commandLine
212+
commandLine <- c(containerInstallCommand, poolConfig$commandLine)
213+
}
214+
215+
if (!is.null(packages)) {
216+
# install packages
217+
commandLine <-
218+
c(commandLine,
219+
dockerRunCommand(dockerImage, packages, NULL, FALSE, FALSE))
176220
}
177221

178222
environmentSettings <- NULL
@@ -189,17 +233,17 @@ makeCluster <-
189233
}
190234

191235
if (!is.null(poolConfig[["pool"]])) {
192-
validateDeprecatedClusterConfig(clusterSetting)
236+
validation$isValidDeprecatedClusterConfig(clusterSetting)
193237
poolConfig <- poolConfig[["pool"]]
194238
}
195239
else {
196-
validateClusterConfig(clusterSetting)
240+
validation$isValidClusterConfig(clusterSetting)
197241
}
198242

199243
tryCatch({
200-
`Validators`$isValidPoolName(poolConfig$name)
244+
validation$isValidPoolName(poolConfig$name)
201245
},
202-
error = function(e){
246+
error = function(e) {
203247
stop(paste("Invalid pool name: \n",
204248
e))
205249
})
@@ -219,19 +263,19 @@ makeCluster <-
219263
if (grepl("PoolBeingDeleted", response)) {
220264
pool <- rAzureBatch::getPool(poolConfig$name)
221265

222-
cat(
223-
sprintf(
224-
paste("Cluster '%s' already exists and is being deleted.",
225-
"Another cluster with the same name cannot be created",
226-
"until it is deleted. Please wait for the cluster to be deleted",
227-
"or create one with a different name"),
228-
poolConfig$name
266+
cat(sprintf(
267+
paste(
268+
"Cluster '%s' already exists and is being deleted.",
269+
"Another cluster with the same name cannot be created",
270+
"until it is deleted. Please wait for the cluster to be deleted",
271+
"or create one with a different name"
229272
),
230-
fill = TRUE
231-
)
273+
poolConfig$name
274+
),
275+
fill = TRUE)
232276

233277
while (areShallowEqual(rAzureBatch::getPool(poolConfig$name)$state,
234-
"deleting")) {
278+
"deleting")) {
235279
cat(".")
236280
Sys.sleep(10)
237281
}

R/commandLineUtilities.R

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
getJobPackageInstallationCommand <- function(type, packages) {
2+
script <- ""
3+
if (type == "cran") {
4+
script <- "Rscript $AZ_BATCH_JOB_PREP_WORKING_DIR/install_cran.R"
5+
}
6+
else if (type == "github") {
7+
script <- "Rscript $AZ_BATCH_JOB_PREP_WORKING_DIR/install_github.R"
8+
}
9+
else if (type == "bioconductor") {
10+
script <-
11+
"Rscript $AZ_BATCH_JOB_PREP_WORKING_DIR/install_bioconductor.R"
12+
}
13+
else {
14+
stop("Using an incorrect package source")
15+
}
16+
17+
if (!is.null(packages) && length(packages) > 0) {
18+
packageCommands <- paste0(packages, collapse = " ")
19+
script <- paste0(script, " ", packageCommands)
20+
}
21+
}
22+
23+
getPoolPackageInstallationCommand <- function(type, packages) {
24+
poolInstallationCommand <- character(length(packages))
25+
26+
sharedPackagesDirectory <- "/mnt/batch/tasks/shared/R/packages"
27+
28+
libPathsCommand <- paste0('\'.libPaths( c( \\\"',
29+
sharedPackagesDirectory,
30+
'\\\", .libPaths()));')
31+
32+
installCommand <-
33+
paste("Rscript -e \'args <- commandArgs(TRUE)\'",
34+
"-e \'options(warn=2)\'")
35+
36+
# At this point we cannot use install_cran.R and install_github.R because they are not yet available.
37+
if (type == "cran") {
38+
script <-
39+
paste(installCommand,
40+
paste("-e",
41+
libPathsCommand,
42+
"install.packages(args[1])\' %s")
43+
)
44+
}
45+
else if (type == "github") {
46+
script <-
47+
paste(
48+
installCommand,
49+
paste(
50+
"-e",
51+
libPathsCommand,
52+
"devtools::install_github(args[1])\' %s"
53+
)
54+
)
55+
}
56+
else if (type == "bioconductor") {
57+
script <- "Rscript /mnt/batch/tasks/startup/wd/install_bioconductor.R %s"
58+
}
59+
else {
60+
stop("Using an incorrect package source")
61+
}
62+
63+
for (i in 1:length(packages)) {
64+
poolInstallationCommand[i] <- sprintf(script, packages[i])
65+
}
66+
67+
poolInstallationCommand
68+
}
69+
70+
dockerRunCommand <-
71+
function(containerImage,
72+
command,
73+
containerName = NULL,
74+
runAsDaemon = FALSE,
75+
includeEnvironmentVariables = TRUE) {
76+
dockerOptions <- paste(
77+
"--rm",
78+
"-v $AZ_BATCH_NODE_ROOT_DIR:$AZ_BATCH_NODE_ROOT_DIR",
79+
"-e AZ_BATCH_NODE_ROOT_DIR=$AZ_BATCH_NODE_ROOT_DIR",
80+
"-e AZ_BATCH_NODE_STARTUP_DIR=$AZ_BATCH_NODE_STARTUP_DIR",
81+
sep = " "
82+
)
83+
84+
if (runAsDaemon) {
85+
dockerOptions <- paste(dockerOptions, "-d", dockerOptions, sep = " ")
86+
}
87+
88+
if (!is.null(containerName)) {
89+
dockerOptions <-
90+
paste(dockerOptions, "--name", containerName, dockerOptions, sep = " ")
91+
}
92+
93+
if (includeEnvironmentVariables) {
94+
dockerOptions <-
95+
paste(
96+
dockerOptions,
97+
"-e AZ_BATCH_TASK_ID=$AZ_BATCH_TASK_ID",
98+
"-e AZ_BATCH_JOB_ID=$AZ_BATCH_JOB_ID",
99+
"-e AZ_BATCH_TASK_WORKING_DIR=$AZ_BATCH_TASK_WORKING_DIR",
100+
"-e AZ_BATCH_JOB_PREP_WORKING_DIR=$AZ_BATCH_JOB_PREP_WORKING_DIR",
101+
"-e BLOBXFER_SASKEY=$BLOBXFER_SASKEY",
102+
sep = " "
103+
)
104+
}
105+
106+
dockerRunCommand <-
107+
paste("docker run", dockerOptions, containerImage, command, sep = " ")
108+
dockerRunCommand
109+
}
110+
111+
linuxWrapCommands <- function(commands = c()) {
112+
# Sanitize the vector and don't allow empty values
113+
cleanCommands <- commands[lapply(commands, length) > 0]
114+
115+
commandLine <- ""
116+
if (length(cleanCommands) > 0) {
117+
# Do not allow absolute paths is enforced in lintr
118+
commandLine <-
119+
sprintf("/bin/bash -c \"set -e; set -o pipefail; %s wait\"",
120+
paste0(paste(
121+
cleanCommands, sep = " ", collapse = "; "
122+
), ";"))
123+
}
124+
125+
commandLine
126+
}

0 commit comments

Comments
 (0)