From 49bcfa42004b3a762284cc68f87cee580f1aff95 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 7 May 2018 15:58:21 -0700 Subject: [PATCH 01/11] Renamed operations --- R/autoscale.R | 2 +- R/{batchApi.R => batch-api.R} | 0 R/{logging.R => file-operations.R} | 0 R/{storage_management.R => storage-api.R} | 2 +- R/{commandLineUtilities.R => utility-commands.R} | 0 R/{jobUtilities.R => utility-job.R} | 0 R/{stringUtilities.R => utility-string.R} | 0 R/{validationUtilities.R => utility-validation.R} | 0 R/utility.R | 3 +-- docs/{02-getting-started-script.md => 02-getting-started.md} | 2 +- docs/README.md | 4 ++-- man/deleteJob.Rd | 2 +- man/deleteStorageContainer.Rd | 2 +- man/deleteStorageFile.Rd | 2 +- man/getClusterFile.Rd | 2 +- man/getJob.Rd | 2 +- man/getJobFile.Rd | 2 +- man/getJobList.Rd | 2 +- man/getJobResult.Rd | 2 +- man/getStorageFile.Rd | 2 +- man/listStorageContainers.Rd | 2 +- man/listStorageFiles.Rd | 2 +- man/terminateJob.Rd | 2 +- man/waitForTasksToComplete.Rd | 2 +- 24 files changed, 19 insertions(+), 20 deletions(-) rename R/{batchApi.R => batch-api.R} (100%) rename R/{logging.R => file-operations.R} (100%) rename R/{storage_management.R => storage-api.R} (98%) rename R/{commandLineUtilities.R => utility-commands.R} (100%) rename R/{jobUtilities.R => utility-job.R} (100%) rename R/{stringUtilities.R => utility-string.R} (100%) rename R/{validationUtilities.R => utility-validation.R} (100%) rename docs/{02-getting-started-script.md => 02-getting-started.md} (99%) diff --git a/R/autoscale.R b/R/autoscale.R index 0bfbffa0..61a16a79 100644 --- a/R/autoscale.R +++ b/R/autoscale.R @@ -108,7 +108,7 @@ resizeCluster <- function(cluster, dedicatedMax, lowPriorityMin, lowPriorityMax, - maxTasksPerNode = pool$maxTasksPerNode + maxTasksPerNode = cluster$maxTasksPerNode ), autoscaleInterval = timeInterval ) diff --git a/R/batchApi.R b/R/batch-api.R similarity index 100% rename from R/batchApi.R rename to R/batch-api.R diff --git a/R/logging.R b/R/file-operations.R similarity index 100% rename from R/logging.R rename to R/file-operations.R diff --git a/R/storage_management.R b/R/storage-api.R similarity index 98% rename from R/storage_management.R rename to R/storage-api.R index f95fce76..62771521 100644 --- a/R/storage_management.R +++ b/R/storage-api.R @@ -14,7 +14,7 @@ listStorageContainers <- function(prefix = "") { storageClient <- config$storageClient xmlResponse <- - storageClient$containerOperations$deleteContainer$listContainers( + storageClient$containerOperations$listContainers( prefix, content = "parsed") name <- getXmlValues(xmlResponse, ".//Container/Name") diff --git a/R/commandLineUtilities.R b/R/utility-commands.R similarity index 100% rename from R/commandLineUtilities.R rename to R/utility-commands.R diff --git a/R/jobUtilities.R b/R/utility-job.R similarity index 100% rename from R/jobUtilities.R rename to R/utility-job.R diff --git a/R/stringUtilities.R b/R/utility-string.R similarity index 100% rename from R/stringUtilities.R rename to R/utility-string.R diff --git a/R/validationUtilities.R b/R/utility-validation.R similarity index 100% rename from R/validationUtilities.R rename to R/utility-validation.R diff --git a/R/utility.R b/R/utility.R index 25260fe6..0bf689d4 100644 --- a/R/utility.R +++ b/R/utility.R @@ -178,7 +178,6 @@ createOutputFile <- function(filePattern, url) { # parsedValue[2] is "" # parsedValue[3] is "accountname.blob.core.windows.net" # parsedValue[4] is "outputs?se=2017-07-31&sr=c&st=2017-07-12" - parsedValue <- strsplit(url, "/")[[1]] baseUrl <- paste0(parsedValue[1], "//", parsedValue[3]) @@ -304,4 +303,4 @@ getHttpErrorMessage <- function(responseObj) { } detailMessage <- paste0(detailMessage, "\r\nodata.metadata: ", responseObj$odata.metadata) return(detailMessage) -} \ No newline at end of file +} diff --git a/docs/02-getting-started-script.md b/docs/02-getting-started.md similarity index 99% rename from docs/02-getting-started-script.md rename to docs/02-getting-started.md index 2d6d88b1..02b77ac2 100644 --- a/docs/02-getting-started-script.md +++ b/docs/02-getting-started.md @@ -1,4 +1,4 @@ -# Getting Started Script +# Getting Started The provided account setup script creates and configures all of the required Azure resources. diff --git a/docs/README.md b/docs/README.md index fd0ef229..23ae6c81 100644 --- a/docs/README.md +++ b/docs/README.md @@ -5,9 +5,9 @@ This section will provide information about how Azure works, how best to take ad Using the *Data Science Virtual Machine (DSVM)* & *Azure Batch* -2. **Getting Started Script** [(link)](./02-getting-started-script.md) +2. **Getting Started** [(link)](./02-getting-started.md) - Using the *Getting Started Script* to create credentials + Using the *Getting Started* to create credentials 3. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) diff --git a/man/deleteJob.Rd b/man/deleteJob.Rd index b691fabb..e76ee4d2 100644 --- a/man/deleteJob.Rd +++ b/man/deleteJob.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{deleteJob} \alias{deleteJob} \title{Delete a job} diff --git a/man/deleteStorageContainer.Rd b/man/deleteStorageContainer.Rd index 513dd3a5..b043434b 100644 --- a/man/deleteStorageContainer.Rd +++ b/man/deleteStorageContainer.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{deleteStorageContainer} \alias{deleteStorageContainer} \title{Delete a storage container from Azure Storage} diff --git a/man/deleteStorageFile.Rd b/man/deleteStorageFile.Rd index ce13f5df..6d20ce27 100644 --- a/man/deleteStorageFile.Rd +++ b/man/deleteStorageFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{deleteStorageFile} \alias{deleteStorageFile} \title{Delete a storage file from a container.} diff --git a/man/getClusterFile.Rd b/man/getClusterFile.Rd index 9f20bb81..7da86ba1 100644 --- a/man/getClusterFile.Rd +++ b/man/getClusterFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/logging.R +% Please edit documentation in R/file-operations.R \name{getClusterFile} \alias{getClusterFile} \title{Get node files from compute nodes. By default, this operation will print the files on screen.} diff --git a/man/getJob.Rd b/man/getJob.Rd index 5113b368..aae18ee7 100644 --- a/man/getJob.Rd +++ b/man/getJob.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{getJob} \alias{getJob} \title{Get a job for the given job id} diff --git a/man/getJobFile.Rd b/man/getJobFile.Rd index ecb6dd5f..1f5718e7 100644 --- a/man/getJobFile.Rd +++ b/man/getJobFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/logging.R +% Please edit documentation in R/file-operations.R \name{getJobFile} \alias{getJobFile} \title{Get job-related files from cluster node. By default, this operation will print the files on screen.} diff --git a/man/getJobList.Rd b/man/getJobList.Rd index 93313b5b..4ad8e599 100644 --- a/man/getJobList.Rd +++ b/man/getJobList.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{getJobList} \alias{getJobList} \title{Get a list of job statuses from the given filter} diff --git a/man/getJobResult.Rd b/man/getJobResult.Rd index fa03e026..7cb48c2d 100644 --- a/man/getJobResult.Rd +++ b/man/getJobResult.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{getJobResult} \alias{getJobResult} \title{Download the results of the job} diff --git a/man/getStorageFile.Rd b/man/getStorageFile.Rd index 75cb885e..e7800e1b 100644 --- a/man/getStorageFile.Rd +++ b/man/getStorageFile.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{getStorageFile} \alias{getStorageFile} \title{Get a storage file from Azure Storage. By default, this operation will print the files on screen.} diff --git a/man/listStorageContainers.Rd b/man/listStorageContainers.Rd index 7676c58b..74e5033b 100644 --- a/man/listStorageContainers.Rd +++ b/man/listStorageContainers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{listStorageContainers} \alias{listStorageContainers} \title{List storage containers from Azure Storage.} diff --git a/man/listStorageFiles.Rd b/man/listStorageFiles.Rd index 8f43731f..53595cf6 100644 --- a/man/listStorageFiles.Rd +++ b/man/listStorageFiles.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/storage_management.R +% Please edit documentation in R/storage-api.R \name{listStorageFiles} \alias{listStorageFiles} \title{List storage files from Azure storage.} diff --git a/man/terminateJob.Rd b/man/terminateJob.Rd index 8f6aaedd..a2bc2486 100644 --- a/man/terminateJob.Rd +++ b/man/terminateJob.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{terminateJob} \alias{terminateJob} \title{Terminate a job} diff --git a/man/waitForTasksToComplete.Rd b/man/waitForTasksToComplete.Rd index 47696d4b..4f05b0ba 100644 --- a/man/waitForTasksToComplete.Rd +++ b/man/waitForTasksToComplete.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jobUtilities.R +% Please edit documentation in R/utility-job.R \name{waitForTasksToComplete} \alias{waitForTasksToComplete} \title{Wait for current tasks to complete} From d6bdcfcf0ad1fc0592b8a7f25810430089b56cf6 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 8 May 2018 16:55:39 -0700 Subject: [PATCH 02/11] Fixing docs --- README.md | 11 ----- ...tarted.md => 01-getting-started-script.md} | 2 +- docs/13-cluster-management.md | 29 +++++++++++++ docs/31-long-running-job.md | 42 +++++++++++++++++++ docs/40-troubleshooting.md | 30 ++++++++++++- docs/50-performance-optimization.md | 0 6 files changed, 100 insertions(+), 14 deletions(-) rename docs/{02-getting-started.md => 01-getting-started-script.md} (99%) create mode 100644 docs/13-cluster-management.md create mode 100644 docs/50-performance-optimization.md diff --git a/README.md b/README.md index 2b57c20d..5d0e3a69 100644 --- a/README.md +++ b/README.md @@ -366,17 +366,6 @@ resizeCluster(cluster, lowPriorityMax = 0) ``` -### Setting Verbose Mode to Debug - -To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: - -```R -# turn on verbose mode -setVerbose(TRUE) - -# turn off verbose mode -setVerbose(FALSE) -``` ### Bypassing merge task Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: diff --git a/docs/02-getting-started.md b/docs/01-getting-started-script.md similarity index 99% rename from docs/02-getting-started.md rename to docs/01-getting-started-script.md index 02b77ac2..2d6d88b1 100644 --- a/docs/02-getting-started.md +++ b/docs/01-getting-started-script.md @@ -1,4 +1,4 @@ -# Getting Started +# Getting Started Script The provided account setup script creates and configures all of the required Azure resources. diff --git a/docs/13-cluster-management.md b/docs/13-cluster-management.md new file mode 100644 index 00000000..30114c79 --- /dev/null +++ b/docs/13-cluster-management.md @@ -0,0 +1,29 @@ +### Resizing Your Cluster + +At some point, you may also want to resize your cluster manually. You can do this simply with the command *resizeCluster*. + +```R +cluster <- makeCluster("cluster.json") + +# resize so that we have a min of 10 dedicated nodes and a max of 20 dedicated nodes +# AND a min of 10 low priority nodes and a max of 20 low priority nodes +resizeCluster( + cluster, + dedicatedMin = 10, + dedicatedMax = 20, + lowPriorityMin = 10, + lowPriorityMax = 20, + algorithm = 'QUEUE', + timeInterval = '5m' ) +``` + +If your cluster is using autoscale but you want to set it to a static size of 10, you can also use this method: + +```R +# resize to a static cluster of 10 +resizeCluster(cluster, + dedicatedMin = 10, + dedicatedMax = 10, + lowPriorityMin = 0, + lowPriorityMax = 0) +``` diff --git a/docs/31-long-running-job.md b/docs/31-long-running-job.md index f9a7ebe4..37de5cf5 100644 --- a/docs/31-long-running-job.md +++ b/docs/31-long-running-job.md @@ -6,6 +6,48 @@ The doAzureParallel package allows you to manage long running jobs easily. There Long running job should run in asynchronous mode. +### Long-running Jobs + Job Management + +doAzureParallel also helps you manage your jobs so that you can run many jobs at once while managing it through a few simple methods. + + +```R +# List your jobs: +getJobList() +# Get your job by job id: +getJob(jobId = 'unique_job_id', verbose = TRUE) +``` + +This will also let you run *long running jobs* easily. + +With long running jobs, you will need to keep track of your jobs as well as set your job to a non-blocking state. You can do this with the *.options.azure* options: + +```R +# set the .options.azure option in the foreach loop +opt <- list(job = 'unique_job_id', wait = FALSE) + +# NOTE - if the option wait = FALSE, foreach will return your unique job id +job_id <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar % { ... } + +# get back your job results with your unique job id +results <- getJobResult(job_id) +``` + +Finally, you may also want to track the status of jobs by state (active, completed etc): + +```R +# List jobs in completed state: +filter <- list() +filter$state <- c("active", "completed") +jobList <- getJobList(filter) +View(jobList) +``` + +You can learn more about how to execute long-running jobs [here](./docs/23-persistent-storage.md). + +With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). + + ## How to configure a job to run asynchronously You can configure a job to run asynchronously by specifying wait = FALSE in job options: diff --git a/docs/40-troubleshooting.md b/docs/40-troubleshooting.md index bee49c3f..6f61b876 100644 --- a/docs/40-troubleshooting.md +++ b/docs/40-troubleshooting.md @@ -1,4 +1,31 @@ -## Debugging and Troubleshooting +# Debugging and Troubleshooting + +## Debugging Tools + +### Setting Verbose Mode to Debug + +To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: + +```R +# turn on verbose mode +setVerbose(TRUE) + +# turn off verbose mode +setVerbose(FALSE) +``` +### Setting HttpTraffic to Debug + +To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: + +```R +# turn on verbose mode +setVerbose(TRUE) + +# turn off verbose mode +setVerbose(FALSE) +``` + +## Common Scenarios ### After creating my cluster, my nodes go to a 'startTaskFailed' state. Why? The most common case for this is that there was an issue with package installation or the custom script failed to run. To troubleshoot this you can simply download the output logs from the node. @@ -10,7 +37,6 @@ tvm-769611554_2-20170912t183413z-p The following steps show how to debug this by pulling logs off of the nodes: - ```r cluster <- makeCluster('myConfig.json') diff --git a/docs/50-performance-optimization.md b/docs/50-performance-optimization.md new file mode 100644 index 00000000..e69de29b From f6d81521ff2db17eecd77df6d2f882f6ff015655 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 9 May 2018 14:09:47 -0700 Subject: [PATCH 03/11] Removed stuff from README.md --- README.md | 53 ------------ ...-optimization.md => 02-getting-started.md} | 0 ...3-cluster-management.md => 13-clusters.md} | 22 ++++- docs/23-persistent-storage.md | 4 +- docs/31-long-running-job.md | 80 +++++++++--------- docs/40-troubleshooting.md | 3 + docs/42-faq.md | 3 +- docs/50-performance-tuning.md | 81 +++++++++++++++++++ 8 files changed, 149 insertions(+), 97 deletions(-) rename docs/{50-performance-optimization.md => 02-getting-started.md} (100%) rename docs/{13-cluster-management.md => 13-clusters.md} (73%) create mode 100644 docs/50-performance-tuning.md diff --git a/README.md b/README.md index 5d0e3a69..ee25f6c1 100644 --- a/README.md +++ b/README.md @@ -313,59 +313,6 @@ You can learn more about how to execute long-running jobs [here](./docs/23-persi With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). -### Using the 'chunkSize' option - -doAzureParallel also supports custom chunk sizes. This option allows you to group iterations of the foreach loop together and execute them in a single R session. - -```R -# set the chunkSize option -opt <- list(chunkSize = 3) -results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } -``` - -You should consider using the chunkSize if each iteration in the loop executes very quickly. - -If you have a static cluster and want to have a single chunk for each worker, you can compute the chunkSize as follows: - -```R -# compute the chunk size -cs <- ceiling(number_of_iterations / getDoParWorkers()) - -# run the foreach loop with chunkSize optimized -opt <- list(chunkSize = cs) -results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } -``` - -### Resizing Your Cluster - -At some point, you may also want to resize your cluster manually. You can do this simply with the command *resizeCluster*. - -```R -cluster <- makeCluster("cluster.json") - -# resize so that we have a min of 10 dedicated nodes and a max of 20 dedicated nodes -# AND a min of 10 low priority nodes and a max of 20 low priority nodes -resizeCluster( - cluster, - dedicatedMin = 10, - dedicatedMax = 20, - lowPriorityMin = 10, - lowPriorityMax = 20, - algorithm = 'QUEUE', - timeInterval = '5m' ) -``` - -If your cluster is using autoscale but you want to set it to a static size of 10, you can also use this method: - -```R -# resize to a static cluster of 10 -resizeCluster(cluster, - dedicatedMin = 10, - dedicatedMax = 10, - lowPriorityMin = 0, - lowPriorityMax = 0) -``` - ### Bypassing merge task Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: diff --git a/docs/50-performance-optimization.md b/docs/02-getting-started.md similarity index 100% rename from docs/50-performance-optimization.md rename to docs/02-getting-started.md diff --git a/docs/13-cluster-management.md b/docs/13-clusters.md similarity index 73% rename from docs/13-cluster-management.md rename to docs/13-clusters.md index 30114c79..56c86cbc 100644 --- a/docs/13-cluster-management.md +++ b/docs/13-clusters.md @@ -1,4 +1,24 @@ -### Resizing Your Cluster +# Clusters + +## Commands + +### Listing clusters + +You can list all clusters currently running in your account by running: + +``` R +cluster <- listClusters() +``` + +### Viewing a Cluster + +To view details about your cluster: + +``` R +cluster <- getCluster("pool-001") +``` + +### Resizing a Cluster At some point, you may also want to resize your cluster manually. You can do this simply with the command *resizeCluster*. diff --git a/docs/23-persistent-storage.md b/docs/23-persistent-storage.md index 8c4c60d6..8526b4db 100644 --- a/docs/23-persistent-storage.md +++ b/docs/23-persistent-storage.md @@ -26,10 +26,10 @@ When the user is ready to get their results in a new session, the user uses the ```R my_job_id <- "my_unique_job_id" -results <- GetJobResult(my_job_id) +results <- getJobResult(my_job_id) ``` -If the job is not completed, GetJobResult will return the state of your job. Otherwise, GetJobResult will return the results. +If the job is not completed, getJobResult will return the state of your job. Otherwise, GetJobResult will return the results. ### Output Files Batch will automatically handle your output files when the user assigns a file pattern and storage container url. diff --git a/docs/31-long-running-job.md b/docs/31-long-running-job.md index 37de5cf5..d18103ed 100644 --- a/docs/31-long-running-job.md +++ b/docs/31-long-running-job.md @@ -1,19 +1,17 @@ -# Long Running Job Management +# Job Management and Asynchronous Jobs The doAzureParallel package allows you to manage long running jobs easily. There are 2 ways to run a job: - Synchronous - Asynchronous -Long running job should run in asynchronous mode. - -### Long-running Jobs + Job Management +Long-running job should be run in non-interactive and asynchronous mode. doAzureParallel also helps you manage your jobs so that you can run many jobs at once while managing it through a few simple methods. - ```R # List your jobs: getJobList() + # Get your job by job id: getJob(jobId = 'unique_job_id', verbose = TRUE) ``` @@ -47,8 +45,7 @@ You can learn more about how to execute long-running jobs [here](./docs/23-persi With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). - -## How to configure a job to run asynchronously +## Configuring an asynchronous job You can configure a job to run asynchronously by specifying wait = FALSE in job options: ```R @@ -63,35 +60,14 @@ You can optionally specify the job Id in options as shown below: foreach(i = 1:number_of_iterations, .options.azure = options) %dopar% { ... } ``` -## Get job status +## Listing jobs +You can list all jobs currently running in your account by running: -getJob returns job metadata, such as chunk size, whether cloud combine is enabled, and packages specified for the job, it also returns task counts in different state - -```R - getJob(jobId) - getJob(jobId, verbose = TRUE) - - sample output: - -------------- - job metadata: - chunkSize: 1 - enableCloudCombine: TRUE - packages: httr - - tasks: - active: 1 - running: 0 - completed: 5 - succeeded: 0 - failed: 5 - total: 6 - - job state: completed +``` R + getJobList() ``` -## Get job list -You can use getJobList() to get a summary of all jobs. - +Example output: ```R getJobList() @@ -116,21 +92,45 @@ You can also filter job list by job state such as active or completed getJobList(filter) ``` -## Retrieve long running job result -Once job is completed successfully, you can call getJobResult to retrieve the job result: +## Viewing a Job + +getJob returns job metadata, such as chunk size, whether cloud combine is enabled, and packages specified for the job, it also returns task counts in different state ```R - jobResult <- getJobResult(jobId) + getJob(jobId) + getJob(jobId, verbose = TRUE) + + sample output: + -------------- + job metadata: + chunkSize: 1 + enableCloudCombine: TRUE + packages: httr + + tasks: + active: 1 + running: 0 + completed: 5 + succeeded: 0 + failed: 5 + total: 6 + + job state: completed ``` -### Clean up -Once you get the job result, you can delete the job and its result. +## Retrieving the Results + +Once job is completed successfully, you can call getJobResult to retrieve the job result: + ```R - deleteJob(jobId) + jobResult <- getJobResult(jobId) ``` -Please note deleteJob will delete the job at batch service and the storage container holding the job result. +### Deleting a Job + +Once you get the job result, you can delete the job and its result. Please note deleteJob will delete the job at batch service and the storage container holding the job result. + ```R deleteJob(jobId) ``` diff --git a/docs/40-troubleshooting.md b/docs/40-troubleshooting.md index 6f61b876..d0333c73 100644 --- a/docs/40-troubleshooting.md +++ b/docs/40-troubleshooting.md @@ -27,6 +27,9 @@ setVerbose(FALSE) ## Common Scenarios +## My job failed but I can't find my job and its result? +if you set wait = TRUE, job and its result is automatically deleted, to keep them for investigation purpose, you can set global option using setAutoDeleteJob(FALSE), or use autoDeleteJob option at foreach level. + ### After creating my cluster, my nodes go to a 'startTaskFailed' state. Why? The most common case for this is that there was an issue with package installation or the custom script failed to run. To troubleshoot this you can simply download the output logs from the node. diff --git a/docs/42-faq.md b/docs/42-faq.md index 71d2d162..0df61a5f 100644 --- a/docs/42-faq.md +++ b/docs/42-faq.md @@ -4,9 +4,10 @@ No. At the moment doAzureParallel is only being distributed via GitHub. ## Which version of R does doAzureParallel use? -By default, doAzureParallel uses Microsoft R Open 3.3. +By default, doAzureParallel uses _rocker/tidyverse:latest_, the latest R environment provided by the R Studio community pre-packaged with a large number of popular R packages. ## Does doAzureParallel support a custom version of R? + No. We are looking into support for different versions of R as well as custom versions of R but that is not supported today. ## How much does doAzureParallel cost? diff --git a/docs/50-performance-tuning.md b/docs/50-performance-tuning.md new file mode 100644 index 00000000..fd4aa9ea --- /dev/null +++ b/docs/50-performance-tuning.md @@ -0,0 +1,81 @@ + +# Performance Tuning + +## Parallelizing Cores +If you are using a VM size that have more than one core, you may want your R code running on all the cores in each VM. + +There are two methods to do this today: + + +### MaxTasksPerNode +MaxTasksPerNode is a property that tells Azure how many tasks it should send to each node in your cluster. + +The maxTasksPerNode property can be configured in the configuration json file when creating your Azure pool. By default, we set this equal to 1, meaning that only one iteration of the foreach loop will execute on each node at a time. However, if you want to maximize the different cores in your cluster, you can set this number up to four times (4X) the number of cores in each node. For example, if you select the VM Size of Standard_F2 which has 2 cores, then can set the maxTasksPerNode property up to 8. + +However, because R is single threaded, we recommend setting the maxTasksPerNode equal to the number of cores in the VM size that you selected. For example, if you select a VM Size of Standard_F2 which has 2 cores, then we recommend that you set the maxTasksPerNode property to 2. This way, Azure will know to run each iteration of the foreach loop on each core (as opposed to each node). + +Here's an example of how you may want to set your JSON configuration file: +```javascript +{ + ... + "vmSize": "Standard_F2", + "maxTasksPerNode": 2 + ... +} +``` + +### Nested doParallel +To take advantage of all the cores on each node, you can nest a *foreach* loop using *doParallel* package inside the outer *foreach* loop that uses doAzureParallel. + +The *doParallel* package can detect the number of cores on a computer and parallelizes each iteration of the *foreach* loop across those cores. Pairing this with the doAzureParallel package, we can schedule work to each core of each VM in the pool. + +```R + +# register your Azure pool as the parallel backend +registerDoAzureParallel(pool) + +# execute your outer foreach loop to schedule work to the pool +number_of_outer_iterations <- 10 +results <- foreach(i = 1:number_of_outer_iterations, .packages='doParallel') %dopar% { + + # detect the number of cores on the VM + cores <- detectCores() + + # make your 'cluster' using the nodes on the VM + cl <- makeCluster(cores) + + # register the above pool as the parallel backend within each VM + registerDoParallel(cl) + + # execute your inner foreach loop that will use all the cores in the VM + number_of_inner_iterations <- 20 + inner_results <- foreach(j = 1:number_of_inner_iterations) %dopar% { + runAlgorithm() + } + + return(inner_results) +} +``` + +## Using the 'chunkSize' option + +doAzureParallel also supports custom chunk sizes. This option allows you to group iterations of the foreach loop together and execute them in a single R session. + +```R +# set the chunkSize option +opt <- list(chunkSize = 3) +results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } +``` + +You should consider using the chunkSize if each iteration in the loop executes very quickly. + +If you have a static cluster and want to have a single chunk for each worker, you can compute the chunkSize as follows: + +```R +# compute the chunk size +cs <- ceiling(number_of_iterations / getDoParWorkers()) + +# run the foreach loop with chunkSize optimized +opt <- list(chunkSize = cs) +results <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar% { ... } +``` From b59c6578a15f648a7beeb50a9bd91d63a174929e Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 09:27:23 -0700 Subject: [PATCH 04/11] Fixed links for TOC --- docs/40-troubleshooting.md | 86 +++++++++++++++++++------------------- docs/42-faq.md | 3 +- docs/README.md | 66 ++++++++++++++++++----------- 3 files changed, 84 insertions(+), 71 deletions(-) diff --git a/docs/40-troubleshooting.md b/docs/40-troubleshooting.md index d0333c73..c487ca55 100644 --- a/docs/40-troubleshooting.md +++ b/docs/40-troubleshooting.md @@ -24,6 +24,48 @@ setVerbose(TRUE) # turn off verbose mode setVerbose(FALSE) ``` +### Viewing files from Azure Storage +In every foreach run, the job will push its logs into Azure Storage that can be fetched by the user. For more information on reading log files, check out [managing storage](./41-managing-storage-via-R.md). + +By default, when wait is set to TRUE, job and its result is automatically deleted after the run is completed. To keep the job and its result for investigation purpose, you can set a global environment setting or specify an option in foreach loop to keep it. + +```R +# This will set a global setting to keep job and its result after run is completed. +setAutoDeleteJob(FALSE) + +# This will keep job and its result at each job level after run is completed. +options <- list(autoDeleteJob = FALSE) +foreach::foreach(i = 1:4, .options.azure = opt) %dopar% { ... } +``` + +### Viewing files directly from compute node +Cluster setup logs are not persisted. `getClusterFile` function will fetch any files including stdout and stderr log files in the cluster. This is particularly useful for users that utilizing [customize script](./30-customize-cluster.md) on their nodes and installing specific [packages](./20-package-management.md). + +Cluster setup files include: +File name | Description +--- | --- +stdout.txt | Contains the standard output of files. This includes any additional logging done during cluster setup time +stderr.txt | Contains the verbose and error logging during cluster setup + +```R +# This will download stderr.txt directly from the cluster. +getClusterFile(cluster, "tvm-1170471534_2-20170829t072146z", "stderr.txt", downloadPath = "pool-errors.txt") +``` + +When executing long-running jobs, users might want to check the status of the job by checking the logs. The logs and results are not uploaded to Azure Storage until tasks are completed. By running `getJobFile` function, the user is able to view log files in real time. + +Job-related files include: +File name | Description +--- | --- +stdout.txt | Contains the standard output of files. This includes any additional logging done during job execution +stderr.txt | Contains the verbose and error logging during job execution +[jobId]-[taskId].txt | Contains R specific output thats produced by the foreach iteration + +```R +# Allows users to read the stdout file in memory +stdoutFile <- getJobFile("job20170824195123", "job20170824195123-task1", "stdout.txt") +cat(stdoutFile) +``` ## Common Scenarios @@ -91,49 +133,5 @@ This issue is due to certain compiler flags not available in the default version ] ``` - ### Why do some of my packages install an older version of the package instead of the latest? Since doAzureParallel uses Microsoft R Open version 3.3 as the default version of R, it will automatically try to pull package from [MRAN](https://mran.microsoft.com/) rather than CRAN. This is a big benefit when wanting to use a constant version of a package but does not always contain references to the latest versions. To use a specific version from CRAN or a different MRAN snapshot date, use the [command line](./30-customize-cluster.md#running-commands-when-the-cluster-starts) in the cluster configuration to manually install the packages you need. - -## Viewing files from Azure Storage -In every foreach run, the job will push its logs into Azure Storage that can be fetched by the user. For more information on reading log files, check out [managing storage](./41-managing-storage-via-R.md). - -By default, when wait is set to TRUE, job and its result is automatically deleted after the run is completed. To keep the job and its result for investigation purpose, you can set a global environment setting or specify an option in foreach loop to keep it. - -```R -# This will set a global setting to keep job and its result after run is completed. -setAutoDeleteJob(FALSE) - -# This will keep job and its result at each job level after run is completed. -options <- list(autoDeleteJob = FALSE) -foreach::foreach(i = 1:4, .options.azure = opt) %dopar% { ... } -``` - -## Viewing files directly from compute node -Cluster setup logs are not persisted. `getClusterFile` function will fetch any files including stdout and stderr log files in the cluster. This is particularly useful for users that utilizing [customize script](./30-customize-cluster.md) on their nodes and installing specific [packages](./20-package-management.md). - -Cluster setup files include: -File name | Description ---- | --- -stdout.txt | Contains the standard output of files. This includes any additional logging done during cluster setup time -stderr.txt | Contains the verbose and error logging during cluster setup - -```R -# This will download stderr.txt directly from the cluster. -getClusterFile(cluster, "tvm-1170471534_2-20170829t072146z", "stderr.txt", downloadPath = "pool-errors.txt") -``` - -When executing long-running jobs, users might want to check the status of the job by checking the logs. The logs and results are not uploaded to Azure Storage until tasks are completed. By running `getJobFile` function, the user is able to view log files in real time. - -Job-related files include: -File name | Description ---- | --- -stdout.txt | Contains the standard output of files. This includes any additional logging done during job execution -stderr.txt | Contains the verbose and error logging during job execution -[jobId]-[taskId].txt | Contains R specific output thats produced by the foreach iteration - -```R -# Allows users to read the stdout file in memory -stdoutFile <- getJobFile("job20170824195123", "job20170824195123-task1", "stdout.txt") -cat(stdoutFile) -``` diff --git a/docs/42-faq.md b/docs/42-faq.md index 0df61a5f..5d665b8f 100644 --- a/docs/42-faq.md +++ b/docs/42-faq.md @@ -7,7 +7,6 @@ No. At the moment doAzureParallel is only being distributed via GitHub. By default, doAzureParallel uses _rocker/tidyverse:latest_, the latest R environment provided by the R Studio community pre-packaged with a large number of popular R packages. ## Does doAzureParallel support a custom version of R? - No. We are looking into support for different versions of R as well as custom versions of R but that is not supported today. ## How much does doAzureParallel cost? @@ -17,7 +16,7 @@ doAzureParallel itself is free to use and is built on top of the Azure Batch ser Yes. The [command line](./30-customize-cluster.md#running-commands-when-the-cluster-starts) feature in the cluster configuration enables running custom commands on each node in the cluster before it is ready to do work. Leverage this mechanism to do any custom installations such as installing custom software or mounting network drives. ## Does doAzureParallel work with Windows-specific packages? -No. doAzureParallel is built on top of the Linux CentOS distribution and will not work with Windows-specific packages. +No. doAzureParallel is built on top of the Linux Ubuntu distribution and will not work with Windows-specific packages. ## Why am I getting the error: could not find function "startsWith"? doAzureParallel requires you to run R 3.3 or greater on you local machine. diff --git a/docs/README.md b/docs/README.md index 23ae6c81..252b0417 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,53 +7,69 @@ This section will provide information about how Azure works, how best to take ad 2. **Getting Started** [(link)](./02-getting-started.md) - Using the *Getting Started* to create credentials + Using the *Getting Started* to create credentials + + a. + + b. **Programmatically** [(link)](./33-programmatically-generate-config.md) + Generate credentials and cluster config at runtime programmatically + + c. **National Cloud Support** [(link)](./34-national-clouds.md) -3. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) + How to run workload in Azure national clouds - How do you choose the best VM type/size for your workload? +3. **Customize Cluster** [(link)](./30-customize-cluster.md) -4. **Autoscale** [(link)](./11-autoscale.md) + Setting up your cluster to user's specific needs + a. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) + How do you choose the best VM type/size for your workload? + b. **Autoscale** [(link)](./11-autoscale.md) + Automatically scale up/down your cluster to save time and/or money. + c. **Building Containers** [(link)](./32-building-containers.md) - Automatically scale up/down your cluster to save time and/or money. +4. **Managing Cluster** [(link)](./33-clusters.md) -5. **Azure Limitations** [(link)](./12-quota-limitations.md) + Setting up your cluster to user's specific needs + + a. Setting up your cluster to user's specific needs - Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. - 6. **Package Management** [(link)](./20-package-management.md) Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. - -7. **Distributing your Data** [(link)](./21-distributing-data.md) - Best practices and limitations for working with distributed data. - -8. **Parallelizing on each VM Core** [(link)](./22-parallelizing-cores.md) +7. **Storage Management** [(link)](./31-long-running-job.md) - Best practices and limitations for parallelizing your R code to each core in each VM in your pool + a. **Distributing your Data** [(link)](./21-distributing-data.md) -9. **Persistent Storage** [(link)](./23-persistent-storage.md) + Best practices and limitations for working with distributed data. + + b. **Persistent Storage** [(link)](./23-persistent-storage.md) Taking advantage of persistent storage for long-running jobs + + c. **Accessing Azure Storage through R** [(link)](./23-persistent-storage.md) -10. **Customize Cluster** [(link)](./30-customize-cluster.md) + Taking advantage of persistent storage for long-running jobs - Setting up your cluster to user's specific needs +8. **Performance Tuning** [(link)](./30-customize-cluster.md) -11. **Long Running Job** [(link)](./31-long-running-job.md) + Setting up your cluster to user's specific needs + + a. **Parallelizing on each VM Core** [(link)](./22-parallelizing-cores.md) + Best practices and limitations for parallelizing your R code to each core in each VM in your pool + b. - Best practices for managing long running jobs +9. **Asynchronous Jobs** [(link)](./31-long-running-job.md) -12. **Programmatically generated config** [(link)](./33-programmatically-generate-config.md) + Best practices for managing long running jobs - Generate credentials and cluster config at runtime programmatically +10. **Debugging and Troubleshooting** [(link)](./40-troubleshooting.md) -13. **National Cloud configuration" [(link)](./34-national-clouds.md) + Best practices on diagnosing common issues - How to run workload in Azure national clouds +5. **Azure Limitations** [(link)](./12-quota-limitations.md) + Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. + ## Additional Documentation -Take a look at our [**Troubleshooting Guide**](./40-troubleshooting.md) for information on how to diagnose common issues. - Read our [**FAQ**](./42-faq.md) for known issues and common questions. From be4ac16f916323abda0ad6e82393f45b2a40e8c6 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 10:04:20 -0700 Subject: [PATCH 05/11] Added descriptions for TOC --- docs/README.md | 82 +++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/docs/README.md b/docs/README.md index 252b0417..98b173b6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -7,69 +7,81 @@ This section will provide information about how Azure works, how best to take ad 2. **Getting Started** [(link)](./02-getting-started.md) - Using the *Getting Started* to create credentials + Using the *Getting Started* to create credentials - a. - - b. **Programmatically** [(link)](./33-programmatically-generate-config.md) - Generate credentials and cluster config at runtime programmatically + i. **Cluster and Credentials Objects** [(link)](./33-programmatically-generate-config.md) - c. **National Cloud Support** [(link)](./34-national-clouds.md) - - How to run workload in Azure national clouds + Generate credentials and cluster configuration objects at runtime programmatically + + ii. **National Cloud Support** [(link)](./34-national-clouds.md) + + How to run workload in Azure national clouds 3. **Customize Cluster** [(link)](./30-customize-cluster.md) Setting up your cluster to user's specific needs - a. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) - How do you choose the best VM type/size for your workload? - b. **Autoscale** [(link)](./11-autoscale.md) - Automatically scale up/down your cluster to save time and/or money. - c. **Building Containers** [(link)](./32-building-containers.md) + + i. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) + + How do you choose the best VM type/size for your workload? + + ii. **Autoscale** [(link)](./11-autoscale.md) + + Automatically scale up/down your cluster to save time and/or money. + + iii. **Building Containers** [(link)](./32-building-containers.md) + + Creating your own Docker containers for reproducibility 4. **Managing Cluster** [(link)](./33-clusters.md) - Setting up your cluster to user's specific needs + Managing your cluster's lifespan + +5. **Customize Job** + + Setting up your job to user's specific needs - a. Setting up your cluster to user's specific needs + i. **Asynchronous Jobs** [(link)](./31-long-running-job.md) + Best practices for managing long running jobs + + ii. **Foreach Azure Options** [(link)](./) + + Use Azure package-defined foreach options to improve performance and user experience + + iii. **Error Handling** + + How Azure handles errors in your Foreach loop? + 6. **Package Management** [(link)](./20-package-management.md) Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. -7. **Storage Management** [(link)](./31-long-running-job.md) +7. **Storage Management** - a. **Distributing your Data** [(link)](./21-distributing-data.md) + i. **Distributing your Data** [(link)](./21-distributing-data.md) - Best practices and limitations for working with distributed data. + Best practices and limitations for working with distributed data. - b. **Persistent Storage** [(link)](./23-persistent-storage.md) + ii. **Persistent Storage** [(link)](./23-persistent-storage.md) - Taking advantage of persistent storage for long-running jobs + Taking advantage of persistent storage for long-running jobs - c. **Accessing Azure Storage through R** [(link)](./23-persistent-storage.md) - - Taking advantage of persistent storage for long-running jobs - -8. **Performance Tuning** [(link)](./30-customize-cluster.md) + iii. **Accessing Azure Storage through R** [(link)](./23-persistent-storage.md) - Setting up your cluster to user's specific needs - - a. **Parallelizing on each VM Core** [(link)](./22-parallelizing-cores.md) - Best practices and limitations for parallelizing your R code to each core in each VM in your pool - b. + Manage your Azure Storage files via R -9. **Asynchronous Jobs** [(link)](./31-long-running-job.md) +8. **Performance Tuning** [(link)](./50-performance-tuning.md) - Best practices for managing long running jobs + Best practices on optimizing your Foreach loop -10. **Debugging and Troubleshooting** [(link)](./40-troubleshooting.md) +9. **Debugging and Troubleshooting** [(link)](./40-troubleshooting.md) Best practices on diagnosing common issues -5. **Azure Limitations** [(link)](./12-quota-limitations.md) +10. **Azure Limitations** [(link)](./12-quota-limitations.md) - Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. + Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. ## Additional Documentation Read our [**FAQ**](./42-faq.md) for known issues and common questions. From 6c59033074a356d42e5275e9419789dcf16579d8 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 10:43:03 -0700 Subject: [PATCH 06/11] Major renaming of files --- README.md | 108 ------------------ ...nerate-config.md => 01-getting-started.md} | 62 +++++++++- ...script.md => 02-getting-started-script.md} | 0 docs/02-getting-started.md | 0 ...tional-clouds.md => 03-national-clouds.md} | 0 docs/{10-vm-sizes.md => 31-vm-sizes.md} | 0 docs/{11-autoscale.md => 32-autoscale.md} | 0 ...ontainers.md => 33-building-containers.md} | 0 docs/{13-clusters.md => 40-clusters.md} | 0 ...-running-job.md => 51-long-running-job.md} | 1 - docs/52-azure-foreach-options.md | 33 ++++++ docs/53-error-handling.md | 50 ++++++++ ...buting-data.md => 71-distributing-data.md} | 0 ...nt-storage.md => 72-persistent-storage.md} | 0 ...torage-via-R.md => 73-managing-storage.md} | 0 ...nce-tuning.md => 80-performance-tuning.md} | 0 ...oubleshooting.md => 90-troubleshooting.md} | 0 ...limitations.md => 91-quota-limitations.md} | 0 docs/{42-faq.md => 92-faq.md} | 0 docs/README.md | 38 +++--- 20 files changed, 156 insertions(+), 136 deletions(-) rename docs/{33-programmatically-generate-config.md => 01-getting-started.md} (50%) rename docs/{01-getting-started-script.md => 02-getting-started-script.md} (100%) delete mode 100644 docs/02-getting-started.md rename docs/{34-national-clouds.md => 03-national-clouds.md} (100%) rename docs/{10-vm-sizes.md => 31-vm-sizes.md} (100%) rename docs/{11-autoscale.md => 32-autoscale.md} (100%) rename docs/{32-building-containers.md => 33-building-containers.md} (100%) rename docs/{13-clusters.md => 40-clusters.md} (100%) rename docs/{31-long-running-job.md => 51-long-running-job.md} (99%) create mode 100644 docs/52-azure-foreach-options.md create mode 100644 docs/53-error-handling.md rename docs/{21-distributing-data.md => 71-distributing-data.md} (100%) rename docs/{23-persistent-storage.md => 72-persistent-storage.md} (100%) rename docs/{41-managing-storage-via-R.md => 73-managing-storage.md} (100%) rename docs/{50-performance-tuning.md => 80-performance-tuning.md} (100%) rename docs/{40-troubleshooting.md => 90-troubleshooting.md} (100%) rename docs/{12-quota-limitations.md => 91-quota-limitations.md} (100%) rename docs/{42-faq.md => 92-faq.md} (100%) diff --git a/README.md b/README.md index ee25f6c1..19725fb4 100644 --- a/README.md +++ b/README.md @@ -207,9 +207,6 @@ For more information about low-priority VMs, please visit the [documentation](ht You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). -### Distributing Data -When developing at scale, you may also want to chunk up your data and distribute the data across your nodes. Learn more about that [here](./docs/21-distributing-data.md#chunking-data) - ### Using %do% vs %dopar% When developing at scale, it is always recommended that you test and debug your code locally first. Switch between *%dopar%* and *%do%* to toggle between running in parallel on Azure and running in sequence on your local machine. @@ -221,111 +218,6 @@ results <- foreach(i = 1:number_of_iterations) %do% { ... } results <- foreach(i = 1:number_of_iterations) %dopar% { ... } ``` -### Error Handling -The errorhandling option specifies how failed tasks should be evaluated. By default, the error handling is 'stop' to ensure users' can have reproducible results. If a combine function is assigned, it must be able to handle error objects. - -Error Handling Type | Description ---- | --- -stop | The execution of the foreach will stop if an error occurs -pass | The error object of the task is included the results -remove | The result of a failed task will not be returned - -```R -# Remove R error objects from the results -res <- foreach::foreach(i = 1:4, .errorhandling = "remove") %dopar% { - if (i == 2 || i == 4) { - randomObject - } - - mean(1:3) -} - -#> res -#[[1]] -#[1] 2 -# -#[[2]] -#[1] 2 -``` - -```R -# Passing R error objects into the results -res <- foreach::foreach(i = 1:4, .errorhandling = "pass") %dopar% { - if (i == 2|| i == 4) { - randomObject - } - - sum(i, 1) -} - -#> res -#[[1]] -#[1] 2 -# -#[[2]] -# -# -#[[3]] -#[1] 4 -# -#[[4]] -# -``` - -### Long-running Jobs + Job Management - -doAzureParallel also helps you manage your jobs so that you can run many jobs at once while managing it through a few simple methods. - - -```R -# List your jobs: -getJobList() -# Get your job by job id: -getJob(jobId = 'unique_job_id', verbose = TRUE) -``` - -This will also let you run *long running jobs* easily. - -With long running jobs, you will need to keep track of your jobs as well as set your job to a non-blocking state. You can do this with the *.options.azure* options: - -```R -# set the .options.azure option in the foreach loop -opt <- list(job = 'unique_job_id', wait = FALSE) - -# NOTE - if the option wait = FALSE, foreach will return your unique job id -job_id <- foreach(i = 1:number_of_iterations, .options.azure = opt) %dopar % { ... } - -# get back your job results with your unique job id -results <- getJobResult(job_id) -``` - -Finally, you may also want to track the status of jobs by state (active, completed etc): - -```R -# List jobs in completed state: -filter <- list() -filter$state <- c("active", "completed") -jobList <- getJobList(filter) -View(jobList) -``` - -You can learn more about how to execute long-running jobs [here](./docs/23-persistent-storage.md). - -With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). - -### Bypassing merge task - -Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: - -```R -# Enable merge task -foreach(i = 1:3, .options.azure = list(enableMerge = TRUE)) - -# Disable merge task -foreach(i = 1:3, .options.azure = list(enableMerge = FALSE)) -``` -Note: User defined functions for the merge task is on our list of features that we are planning on doing. - ## Next Steps For more information, please visit [our documentation](./docs/README.md). diff --git a/docs/33-programmatically-generate-config.md b/docs/01-getting-started.md similarity index 50% rename from docs/33-programmatically-generate-config.md rename to docs/01-getting-started.md index 1202b386..8f68f713 100644 --- a/docs/33-programmatically-generate-config.md +++ b/docs/01-getting-started.md @@ -1,8 +1,62 @@ -# Programmatically generated credential and cluster configuration +# Azure Cluster and Credentials Objects + +### Configuration JSON files + +#### Credentials +Use your credential config JSON file to enter your credentials. + +```javascript +{ + "sharedKey": { + "batchAccount": { + "name": , + "key": , + "url": + }, + "storageAccount": { + "name": , + "key": + } + }, + "githubAuthenticationToken": {} +} +``` +Learn more: + - [Batch account / Storage account](./README.md#azure-requirements) + - [Create your secrets configuration in code](./docs/33-programmatically-generate-config.md) + + +#### Cluster Settings +Use your pool configuration JSON file to define your pool in Azure. + +```javascript +{ + "name": , // example: "myazurecluster" + "vmSize": , // example: "Standard_F2" + "maxTasksPerNode": , // example: "2" + "poolSize": { + "dedicatedNodes": { // dedicated vms + "min": 2, + "max": 2 + }, + "lowPriorityNodes": { // low priority vms + "min": 1, + "max": 10 + }, + "autoscaleFormula": "QUEUE" + }, + "rPackages": { + "cran": ["some_cran_package", "some_other_cran_package"], + "github": ["username/some_github_package", "another_username/some_other_github_package"] + }, + "commandLine": [] +} +``` +NOTE: If you do **not** want your cluster to autoscale, simply set the number of min nodes equal to max nodes for low-priority and dedicated. In addition to setting credentials and cluster configuration through json files, you can specify them programmatically. This allows users to generate the configuration on the fly at runtime. -## Programmatically generated credentials +## Create Azure Cluster and Credential Objects via Programmatically You can generate credentials by creating a R object as shown below: @@ -28,11 +82,7 @@ You can generate credentials by creating a R object as shown below: doAzureParallel::setCredentials(credentials) ``` - -## Programmatically generated cluster configuration - You can generate cluster configuration by creating a R object as shown below: - ```R clusterConfig <- list( "name" = "clustername", diff --git a/docs/01-getting-started-script.md b/docs/02-getting-started-script.md similarity index 100% rename from docs/01-getting-started-script.md rename to docs/02-getting-started-script.md diff --git a/docs/02-getting-started.md b/docs/02-getting-started.md deleted file mode 100644 index e69de29b..00000000 diff --git a/docs/34-national-clouds.md b/docs/03-national-clouds.md similarity index 100% rename from docs/34-national-clouds.md rename to docs/03-national-clouds.md diff --git a/docs/10-vm-sizes.md b/docs/31-vm-sizes.md similarity index 100% rename from docs/10-vm-sizes.md rename to docs/31-vm-sizes.md diff --git a/docs/11-autoscale.md b/docs/32-autoscale.md similarity index 100% rename from docs/11-autoscale.md rename to docs/32-autoscale.md diff --git a/docs/32-building-containers.md b/docs/33-building-containers.md similarity index 100% rename from docs/32-building-containers.md rename to docs/33-building-containers.md diff --git a/docs/13-clusters.md b/docs/40-clusters.md similarity index 100% rename from docs/13-clusters.md rename to docs/40-clusters.md diff --git a/docs/31-long-running-job.md b/docs/51-long-running-job.md similarity index 99% rename from docs/31-long-running-job.md rename to docs/51-long-running-job.md index d18103ed..f472f044 100644 --- a/docs/31-long-running-job.md +++ b/docs/51-long-running-job.md @@ -1,5 +1,4 @@ # Job Management and Asynchronous Jobs - The doAzureParallel package allows you to manage long running jobs easily. There are 2 ways to run a job: - Synchronous - Asynchronous diff --git a/docs/52-azure-foreach-options.md b/docs/52-azure-foreach-options.md new file mode 100644 index 00000000..f6792403 --- /dev/null +++ b/docs/52-azure-foreach-options.md @@ -0,0 +1,33 @@ +## Azure-specific Optional Flags + +| Flag Name | Default | Type | Meaning | + | ------------- |:-------------:| -----:| -----:| + | chunkSize | 1 | Integer | Groups the number of foreach loop iterations into one task and execute them in a single R session. Consider using the chunkSize option if each iteration in the loop executes very quickly. | + | maxTaskRetryCount | 3 | Integer | The number of retries the task will perform. | + | enableCloudCombine | TRUE | Boolean | Enables the merge task to be performed | + | wait | TRUE | Boolean | Set the job to a non-blocking state. This allows you to perform R tasks while waiting for your results to be complete. | + | autoDeleteJob | TRUE | Boolean | Deletes the job metadata and result after the foreach loop has been executed. | + | job | The time of job creation | Character | The name of you job. This name will appear in the RStudio console, Azure Batch, and Azure Storage. | + +## Azure-specific Package Installation Flags + + | Flag Name | Default | Type | Meaning | + | ------------- |:-------------:| -----:| -----:| + | github | c() | Vector | A vector of github package names. The proper name format of installing a github package is the repository address: username/repo[/subdir] | + | bioconductor | c() | Vector | A vector of bioconductor package names | + + + +### Bypassing merge task + +Skipping the merge task is useful when the tasks results don't need to be merged into a list. To bypass the merge task, you can pass the *enableMerge* flag to the foreach object: + +```R +# Enable merge task +foreach(i = 1:3, .options.azure = list(enableMerge = TRUE)) + +# Disable merge task +foreach(i = 1:3, .options.azure = list(enableMerge = FALSE)) +``` +Note: User defined functions for the merge task is on our list of features that we are planning on doing. + diff --git a/docs/53-error-handling.md b/docs/53-error-handling.md new file mode 100644 index 00000000..d4b3b7c3 --- /dev/null +++ b/docs/53-error-handling.md @@ -0,0 +1,50 @@ +### Error Handling +The errorhandling option specifies how failed tasks should be evaluated. By default, the error handling is 'stop' to ensure users' can have reproducible results. If a combine function is assigned, it must be able to handle error objects. + +Error Handling Type | Description +--- | --- +stop | The execution of the foreach will stop if an error occurs +pass | The error object of the task is included the results +remove | The result of a failed task will not be returned + +```R +# Remove R error objects from the results +res <- foreach::foreach(i = 1:4, .errorhandling = "remove") %dopar% { + if (i == 2 || i == 4) { + randomObject + } + + mean(1:3) +} + +#> res +#[[1]] +#[1] 2 +# +#[[2]] +#[1] 2 +``` + +```R +# Passing R error objects into the results +res <- foreach::foreach(i = 1:4, .errorhandling = "pass") %dopar% { + if (i == 2|| i == 4) { + randomObject + } + + sum(i, 1) +} + +#> res +#[[1]] +#[1] 2 +# +#[[2]] +# +# +#[[3]] +#[1] 4 +# +#[[4]] +# +``` diff --git a/docs/21-distributing-data.md b/docs/71-distributing-data.md similarity index 100% rename from docs/21-distributing-data.md rename to docs/71-distributing-data.md diff --git a/docs/23-persistent-storage.md b/docs/72-persistent-storage.md similarity index 100% rename from docs/23-persistent-storage.md rename to docs/72-persistent-storage.md diff --git a/docs/41-managing-storage-via-R.md b/docs/73-managing-storage.md similarity index 100% rename from docs/41-managing-storage-via-R.md rename to docs/73-managing-storage.md diff --git a/docs/50-performance-tuning.md b/docs/80-performance-tuning.md similarity index 100% rename from docs/50-performance-tuning.md rename to docs/80-performance-tuning.md diff --git a/docs/40-troubleshooting.md b/docs/90-troubleshooting.md similarity index 100% rename from docs/40-troubleshooting.md rename to docs/90-troubleshooting.md diff --git a/docs/12-quota-limitations.md b/docs/91-quota-limitations.md similarity index 100% rename from docs/12-quota-limitations.md rename to docs/91-quota-limitations.md diff --git a/docs/42-faq.md b/docs/92-faq.md similarity index 100% rename from docs/42-faq.md rename to docs/92-faq.md diff --git a/docs/README.md b/docs/README.md index 98b173b6..b59faaea 100644 --- a/docs/README.md +++ b/docs/README.md @@ -3,17 +3,13 @@ This section will provide information about how Azure works, how best to take ad 1. **Azure Introduction** [(link)](./00-azure-introduction.md) - Using the *Data Science Virtual Machine (DSVM)* & *Azure Batch* + Using *Azure Batch* -2. **Getting Started** [(link)](./02-getting-started.md) +2. **Getting Started** [(link)](./01-getting-started.md) Using the *Getting Started* to create credentials - - i. **Cluster and Credentials Objects** [(link)](./33-programmatically-generate-config.md) - - Generate credentials and cluster configuration objects at runtime programmatically - ii. **National Cloud Support** [(link)](./34-national-clouds.md) + i. **National Cloud Support** [(link)](./03-national-clouds.md) How to run workload in Azure national clouds @@ -21,19 +17,19 @@ This section will provide information about how Azure works, how best to take ad Setting up your cluster to user's specific needs - i. **Virtual Machine Sizes** [(link)](./10-vm-sizes.md) + i. **Virtual Machine Sizes** [(link)](./31-vm-sizes.md) How do you choose the best VM type/size for your workload? - ii. **Autoscale** [(link)](./11-autoscale.md) + ii. **Autoscale** [(link)](./32-autoscale.md) Automatically scale up/down your cluster to save time and/or money. - iii. **Building Containers** [(link)](./32-building-containers.md) + iii. **Building Containers** [(link)](./33-building-containers.md) Creating your own Docker containers for reproducibility -4. **Managing Cluster** [(link)](./33-clusters.md) +4. **Managing Cluster** [(link)](./40-clusters.md) Managing your cluster's lifespan @@ -41,15 +37,15 @@ This section will provide information about how Azure works, how best to take ad Setting up your job to user's specific needs - i. **Asynchronous Jobs** [(link)](./31-long-running-job.md) + i. **Asynchronous Jobs** [(link)](./51-long-running-job.md) Best practices for managing long running jobs - ii. **Foreach Azure Options** [(link)](./) + ii. **Foreach Azure Options** [(link)](./52-azure-foreach-options.md) Use Azure package-defined foreach options to improve performance and user experience - iii. **Error Handling** + iii. **Error Handling** [(link)](./53-azure-foreach-options.md) How Azure handles errors in your Foreach loop? @@ -59,29 +55,29 @@ This section will provide information about how Azure works, how best to take ad 7. **Storage Management** - i. **Distributing your Data** [(link)](./21-distributing-data.md) + i. **Distributing your Data** [(link)](./71-distributing-data.md) Best practices and limitations for working with distributed data. - ii. **Persistent Storage** [(link)](./23-persistent-storage.md) + ii. **Persistent Storage** [(link)](./72-persistent-storage.md) Taking advantage of persistent storage for long-running jobs - iii. **Accessing Azure Storage through R** [(link)](./23-persistent-storage.md) + iii. **Accessing Azure Storage through R** [(link)](./73-managing-storage.md) Manage your Azure Storage files via R -8. **Performance Tuning** [(link)](./50-performance-tuning.md) +8. **Performance Tuning** [(link)](./80-performance-tuning.md) Best practices on optimizing your Foreach loop -9. **Debugging and Troubleshooting** [(link)](./40-troubleshooting.md) +9. **Debugging and Troubleshooting** [(link)](./90-troubleshooting.md) Best practices on diagnosing common issues -10. **Azure Limitations** [(link)](./12-quota-limitations.md) +10. **Azure Limitations** [(link)](./91-quota-limitations.md) Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. ## Additional Documentation -Read our [**FAQ**](./42-faq.md) for known issues and common questions. +Read our [**FAQ**](./92-faq.md) for known issues and common questions. From 5b327eeec11bad2d84eb606845d800a62e1dd05d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 11:43:13 -0700 Subject: [PATCH 07/11] Added TOC to main README.md --- README.md | 147 ++++++++++++++++++------------- docs/01-getting-started.md | 31 ++++--- docs/52-azure-foreach-options.md | 2 - docs/90-troubleshooting.md | 11 +++ docs/README.md | 67 +++++++------- 5 files changed, 152 insertions(+), 106 deletions(-) diff --git a/README.md b/README.md index 19725fb4..615f59c0 100644 --- a/README.md +++ b/README.md @@ -127,60 +127,6 @@ After you finish running your R code in Azure, you may want to shut down your cl stopCluster(cluster) ``` -### Configuration JSON files - -#### Credentials -Use your credential config JSON file to enter your credentials. - -```javascript -{ - "sharedKey": { - "batchAccount": { - "name": , - "key": , - "url": - }, - "storageAccount": { - "name": , - "key": - } - }, - "githubAuthenticationToken": {} -} -``` -Learn more: - - [Batch account / Storage account](./README.md#azure-requirements) - - [Create your secrets configuration in code](./docs/33-programmatically-generate-config.md) - - -#### Cluster Settings -Use your pool configuration JSON file to define your pool in Azure. - -```javascript -{ - "name": , // example: "myazurecluster" - "vmSize": , // example: "Standard_F2" - "maxTasksPerNode": , // example: "2" - "poolSize": { - "dedicatedNodes": { // dedicated vms - "min": 2, - "max": 2 - }, - "lowPriorityNodes": { // low priority vms - "min": 1, - "max": 10 - }, - "autoscaleFormula": "QUEUE" - }, - "rPackages": { - "cran": ["some_cran_package", "some_other_cran_package"], - "github": ["username/some_github_package", "another_username/some_other_github_package"] - }, - "commandLine": [] -} -``` -NOTE: If you do **not** want your cluster to autoscale, simply set the number of min nodes equal to max nodes for low-priority and dedicated. - Learn more: - [Choosing VM size](./docs/10-vm-sizes.md#vm-size-table) - [Create your cluster configuration in code](./docs/33-programmatically-generate-config.md) @@ -207,16 +153,93 @@ For more information about low-priority VMs, please visit the [documentation](ht You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). -### Using %do% vs %dopar% -When developing at scale, it is always recommended that you test and debug your code locally first. Switch between *%dopar%* and *%do%* to toggle between running in parallel on Azure and running in sequence on your local machine. +## doAzureParallel Guide +This section will provide information about how Azure works, how best to take advantage of Azure, and best practices when using the doAzureParallel package. -```R -# run your code sequentially on your local machine -results <- foreach(i = 1:number_of_iterations) %do% { ... } +1. **Azure Introduction** [(link)](./docs/00-azure-introduction.md) -# use the doAzureParallel backend to run your code in parallel across your Azure cluster -results <- foreach(i = 1:number_of_iterations) %dopar% { ... } -``` + Using *Azure Batch* + +2. **Getting Started** [(link)](./docs/01-getting-started.md) + + Using the *Getting Started* to create credentials + + i. **Generate Credentials Script** [(link)](./docs/02-getting-started-script.md) + + - Pre-built bash script for getting Azure credentials without Azure Portal + + ii. **National Cloud Support** [(link)](./docs/03-national-clouds.md) + + - How to run workload in Azure national clouds + +3. **Customize Cluster** [(link)](./docs/30-customize-cluster.md) + + Setting up your cluster to user's specific needs + + i. **Virtual Machine Sizes** [(link)](./docs/31-vm-sizes.md) + + - How do you choose the best VM type/size for your workload? + + ii. **Autoscale** [(link)](./docs/32-autoscale.md) + + - Automatically scale up/down your cluster to save time and/or money. + + iii. **Building Containers** [(link)](./docs/33-building-containers.md) + + - Creating your own Docker containers for reproducibility + +4. **Managing Cluster** [(link)](./docs/40-clusters.md) + + Managing your cluster's lifespan + +5. **Customize Job** + + Setting up your job to user's specific needs + + i. **Asynchronous Jobs** [(link)](./docs/51-long-running-job.md) + + - Best practices for managing long running jobs + + ii. **Foreach Azure Options** [(link)](./docs/52-azure-foreach-options.md) + + - Use Azure package-defined foreach options to improve performance and user experience + + iii. **Error Handling** [(link)](./docs/53-azure-foreach-options.md) + + - How Azure handles errors in your Foreach loop? + +6. **Package Management** [(link)](./docs/20-package-management.md) + + Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. + +7. **Storage Management** + + i. **Distributing your Data** [(link)](./docs/71-distributing-data.md) + + - Best practices and limitations for working with distributed data. + + ii. **Persistent Storage** [(link)](./docs/72-persistent-storage.md) + + - Taking advantage of persistent storage for long-running jobs + + iii. **Accessing Azure Storage through R** [(link)](./docs/73-managing-storage.md) + + - Manage your Azure Storage files via R + +8. **Performance Tuning** [(link)](./docs/80-performance-tuning.md) + + Best practices on optimizing your Foreach loop + +9. **Debugging and Troubleshooting** [(link)](./docs/90-troubleshooting.md) + + Best practices on diagnosing common issues + +10. **Azure Limitations** [(link)](./docs/91-quota-limitations.md) + + Learn about the limitations around the size of your cluster and the number of foreach jobs you can run in Azure. + +## Additional Documentation +Read our [**FAQ**](./docs/92-faq.md) for known issues and common questions. ## Next Steps diff --git a/docs/01-getting-started.md b/docs/01-getting-started.md index 8f68f713..1a13653a 100644 --- a/docs/01-getting-started.md +++ b/docs/01-getting-started.md @@ -1,6 +1,9 @@ -# Azure Cluster and Credentials Objects +## Cluster and Credentials Objects +To create a cluster, the user needs to set their credentials via **setCredentials** function in order to create the correct HTTP requests to the Batch service. Then the user will have to pass a cluster file/object to **makeCluster** function. The next following sections will demonstrate how JSON files can be used and how you can create them programatically. -### Configuration JSON files +Note: doAzureParallel has a bash script that will generate your credentials JSON file. For more information, see [Getting Started Scripts](./02-getting-started-script.md) + +### JSON Configuration files #### Credentials Use your credential config JSON file to enter your credentials. @@ -15,24 +18,28 @@ Use your credential config JSON file to enter your credentials. }, "storageAccount": { "name": , - "key": + "key": , + "endpointSuffix": "core.windows.net" } }, - "githubAuthenticationToken": {} + "githubAuthenticationToken": "", + "dockerAuthentication": { + "username": "", + "password": "", + "registry": "" + } } ``` Learn more: - [Batch account / Storage account](./README.md#azure-requirements) - - [Create your secrets configuration in code](./docs/33-programmatically-generate-config.md) - #### Cluster Settings -Use your pool configuration JSON file to define your pool in Azure. +Use your cluster configuration JSON file to define your cluster in Azure. ```javascript { - "name": , // example: "myazurecluster" - "vmSize": , // example: "Standard_F2" + "name": , // example: "myazurecluster" + "vmSize": , // example: "Standard_F2" "maxTasksPerNode": , // example: "2" "poolSize": { "dedicatedNodes": { // dedicated vms @@ -45,11 +52,13 @@ Use your pool configuration JSON file to define your pool in Azure. }, "autoscaleFormula": "QUEUE" }, + "containerImage": "rocker/tidyverse:latest", "rPackages": { "cran": ["some_cran_package", "some_other_cran_package"], "github": ["username/some_github_package", "another_username/some_other_github_package"] }, - "commandLine": [] + "commandLine": [], + "subnetId": "" } ``` NOTE: If you do **not** want your cluster to autoscale, simply set the number of min nodes equal to max nodes for low-priority and dedicated. @@ -58,6 +67,8 @@ In addition to setting credentials and cluster configuration through json files, ## Create Azure Cluster and Credential Objects via Programmatically +The JSON configuration files are essentially list of lists R objects. You can also programatically generate your own configuration files by following the list of lists format. + You can generate credentials by creating a R object as shown below: ```R diff --git a/docs/52-azure-foreach-options.md b/docs/52-azure-foreach-options.md index f6792403..2eb7b816 100644 --- a/docs/52-azure-foreach-options.md +++ b/docs/52-azure-foreach-options.md @@ -15,8 +15,6 @@ | ------------- |:-------------:| -----:| -----:| | github | c() | Vector | A vector of github package names. The proper name format of installing a github package is the repository address: username/repo[/subdir] | | bioconductor | c() | Vector | A vector of bioconductor package names | - - ### Bypassing merge task diff --git a/docs/90-troubleshooting.md b/docs/90-troubleshooting.md index c487ca55..4761137f 100644 --- a/docs/90-troubleshooting.md +++ b/docs/90-troubleshooting.md @@ -2,6 +2,17 @@ ## Debugging Tools +### Using %do% vs %dopar% +When developing at scale, it is always recommended that you test and debug your code locally first. Switch between *%dopar%* and *%do%* to toggle between running in parallel on Azure and running in sequence on your local machine. + +```R +# run your code sequentially on your local machine +results <- foreach(i = 1:number_of_iterations) %do% { ... } + +# use the doAzureParallel backend to run your code in parallel across your Azure cluster +results <- foreach(i = 1:number_of_iterations) %dopar% { ... } +``` + ### Setting Verbose Mode to Debug To debug your doAzureParallel jobs, you can set the package to operate on *verbose* mode: diff --git a/docs/README.md b/docs/README.md index b59faaea..2c475e98 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,27 +8,30 @@ This section will provide information about how Azure works, how best to take ad 2. **Getting Started** [(link)](./01-getting-started.md) Using the *Getting Started* to create credentials - - i. **National Cloud Support** [(link)](./03-national-clouds.md) - - How to run workload in Azure national clouds + + i. **Generate Credentials Script** [(link)](./02-getting-started-script.md) + + - Pre-built bash script for getting Azure credentials without Azure Portal + + ii. **National Cloud Support** [(link)](./03-national-clouds.md) + + - How to run workload in Azure national clouds 3. **Customize Cluster** [(link)](./30-customize-cluster.md) Setting up your cluster to user's specific needs - i. **Virtual Machine Sizes** [(link)](./31-vm-sizes.md) - - How do you choose the best VM type/size for your workload? + i. **Virtual Machine Sizes** [(link)](./31-vm-sizes.md) + + - How do you choose the best VM type/size for your workload? - ii. **Autoscale** [(link)](./32-autoscale.md) + ii. **Autoscale** [(link)](./32-autoscale.md) - Automatically scale up/down your cluster to save time and/or money. + - Automatically scale up/down your cluster to save time and/or money. - iii. **Building Containers** [(link)](./33-building-containers.md) + iii. **Building Containers** [(link)](./33-building-containers.md) - Creating your own Docker containers for reproducibility - + - Creating your own Docker containers for reproducibility 4. **Managing Cluster** [(link)](./40-clusters.md) Managing your cluster's lifespan @@ -37,42 +40,42 @@ This section will provide information about how Azure works, how best to take ad Setting up your job to user's specific needs - i. **Asynchronous Jobs** [(link)](./51-long-running-job.md) - - Best practices for managing long running jobs - - ii. **Foreach Azure Options** [(link)](./52-azure-foreach-options.md) + i. **Asynchronous Jobs** [(link)](./51-long-running-job.md) + + - Best practices for managing long running jobs - Use Azure package-defined foreach options to improve performance and user experience + ii. **Foreach Azure Options** [(link)](./52-azure-foreach-options.md) + + - Use Azure package-defined foreach options to improve performance and user experience - iii. **Error Handling** [(link)](./53-azure-foreach-options.md) - - How Azure handles errors in your Foreach loop? + iii. **Error Handling** [(link)](./53-azure-foreach-options.md) + + - How Azure handles errors in your Foreach loop? 6. **Package Management** [(link)](./20-package-management.md) - Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. + Best practices for managing your R packages in code. This includes installation at the cluster or job level as well as how to use different package providers. 7. **Storage Management** + + i. **Distributing your Data** [(link)](./71-distributing-data.md) + + - Best practices and limitations for working with distributed data. - i. **Distributing your Data** [(link)](./71-distributing-data.md) - - Best practices and limitations for working with distributed data. - - ii. **Persistent Storage** [(link)](./72-persistent-storage.md) + ii. **Persistent Storage** [(link)](./72-persistent-storage.md) - Taking advantage of persistent storage for long-running jobs + - Taking advantage of persistent storage for long-running jobs - iii. **Accessing Azure Storage through R** [(link)](./73-managing-storage.md) - - Manage your Azure Storage files via R + iii. **Accessing Azure Storage through R** [(link)](./73-managing-storage.md) + + - Manage your Azure Storage files via R 8. **Performance Tuning** [(link)](./80-performance-tuning.md) Best practices on optimizing your Foreach loop 9. **Debugging and Troubleshooting** [(link)](./90-troubleshooting.md) - + Best practices on diagnosing common issues 10. **Azure Limitations** [(link)](./91-quota-limitations.md) From 3a6e4b3081ac05706005c01a86a8679fcbbfdabe Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 12:16:56 -0700 Subject: [PATCH 08/11] Added low pri link --- README.md | 94 ++++++++++----------------------------------- docs/31-vm-sizes.md | 15 ++++++++ 2 files changed, 36 insertions(+), 73 deletions(-) diff --git a/README.md b/README.md index 615f59c0..f0822b35 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,6 @@ [![Build Status](https://travis-ci.org/Azure/doAzureParallel.svg?branch=master)](https://travis-ci.org/Azure/doAzureParallel) # doAzureParallel -```R -# set your credentials -setCredentials("credentials.json") - -# setup your cluster with a simple config file -cluster<- makeCluster("cluster.json") - -# register the cluster as your parallel backend -registerDoAzureParallel(cluster) - -# run your foreach loop on a distributed cluster in Azure -number_of_iterations <- 10 -results <- foreach(i = 1:number_of_iterations) %dopar% { - myParallelAlgorithm() -} -``` - ## Introduction The *doAzureParallel* package is a parallel backend for the widely popular *foreach* package. With *doAzureParallel*, each iteration of the *foreach* loop runs in parallel on an Azure Virtual Machine (VM), allowing users to scale up their R jobs to tens or hundreds of machines. @@ -26,6 +9,10 @@ The *doAzureParallel* package is a parallel backend for the widely popular *fore NOTE: The terms *pool* and *cluster* are used interchangably throughout this document. +## Notable Features +- Ability to use low-priority VMs for an 80% discount [link](./docs/31-vm-sizes.md#low-priority-vms) +- + ## Dependencies - R (>= 3.3.1) @@ -37,9 +24,9 @@ NOTE: The terms *pool* and *cluster* are used interchangably throughout this doc - iterators (>= 1.0.8) - bitops (>= 1.0.5) -## Installation +## Setup -Install doAzureParallel directly from Github. +1) Install doAzureParallel directly from Github. ```R # install the package devtools @@ -50,36 +37,23 @@ devtools::install_github("Azure/rAzureBatch") devtools::install_github("Azure/doAzureParallel") ``` -## Azure Requirements - -To run your R code across a cluster in Azure, we'll need to get keys and account information. - -### Setup Azure Account -First, set up your Azure Account ([Get started for free!](https://azure.microsoft.com/en-us/free/)) - -Once you have an Azure account, you'll need to create the following two services in the Azure portal: -- Azure Batch Account ([Create an Azure Batch Account in the Portal](https://docs.microsoft.com/en-us/azure/Batch/batch-account-create-portal)) -- Azure Storage Account (this can be created with the Batch Account) - -### Get Keys and Account Information -For your Azure Batch Account, we need to get: -- Batch Account Name -- Batch Account URL -- Batch Account Access Key - -This information can be found in the Azure Portal inside your Batch Account: - -![Azure Batch Acccount in the Portal](./vignettes/doAzureParallel-azurebatch-instructions.PNG "Azure Batch Acccount in the Portal") - -For your Azure Storage Account, we need to get: -- Storage Account Name -- Storage Account Access Key +2) Create an doAzureParallel's credentials file +``` R +library(doAzureParallel) +generateCredentials.json("credentials.json") +``` -This information can be found in the Azure Portal inside your Azure Storage Account: +3) Login or register for an Azure Account, navigate to [Azure Cloud Shell](https://shell.azure.com) -![Azure Storage Acccount in the Portal](./vignettes/doAzureParallel-azurestorage-instructions.PNG "Azure Storage Acccount in the Portal") +``` sh +wget -q https://raw.githubusercontent.com/Azure/doAzureParallel/master/account_setup.sh && +chmod 755 account_setup.sh && +/bin/bash account_setup.sh +``` +4) Follow the on screen prompts to create the necessary Azure resources and copy the output into your credentials file. For more information, see [Getting Started Scripts](./docs/02-getting-started-script.md). -Keep track of the above keys and account information as it will be used to connect your R session with Azure. +To Learn More: +- [Azure Account Requirements for doAzureParallel](./docs/04-azure-requirements.md) ## Getting Started @@ -127,33 +101,7 @@ After you finish running your R code in Azure, you may want to shut down your cl stopCluster(cluster) ``` -Learn more: - - [Choosing VM size](./docs/10-vm-sizes.md#vm-size-table) - - [Create your cluster configuration in code](./docs/33-programmatically-generate-config.md) - - [MaxTasksPerNode](./docs/22-parallelizing-cores.md) - - [LowPriorityNodes](#low-priority-vms) - - [Autoscale](./docs/11-autoscale.md) - - [PoolSize Limitations](./docs/12-quota-limitations.md) - - [rPackages](./docs/20-package-management.md) - -### Low Priority VMs -Low-priority VMs are a way to obtain and consume Azure compute at a much lower price using Azure Batch. Since doAzureParallel is built on top of Azure Batch, this package is able to take advantage of low-priority VMs and allocate compute resources from Azure's surplus capacity at up to **80% discount**. - -Low-priority VMs come with the understanding that when you request it, there is the possibility that we'll need to take some or all of it back. Hence the name *low-priority* - VMs may not be allocated or may be preempted due to higher priority allocations, which equate to full-priced VMs that have an SLA. - -And as the name suggests, this significant cost reduction is ideal for *low priority* workloads that do not have a strict performance requirement. - -With Azure Batch's first-class support for low-priority VMs, you can use them in conjunction with normal on-demand VMs (*dedicated VMs*) and enable job cost to be balanced with job execution flexibility: - - * Batch pools can contain both on-demand nodes and low-priority nodes. The two types can be independently scaled, either explicitly with the resize operation or automatically using auto-scale. Different configurations can be used, such as maximizing cost savings by always using low-priority nodes or spinning up on-demand nodes at full price, to maintain capacity by replacing any preempted low-priority nodes. - * If any low-priority nodes are preempted, then Batch will automatically attempt to replace the lost capacity, continually seeking to maintain the target amount of low-priority capacity in the pool. - * If tasks are interrupted when the node on which it is running is preempted, then the tasks are automatically re-queued to be re-run. - -For more information about low-priority VMs, please visit the [documentation](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms). - -You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). - -## doAzureParallel Guide +## Table of Contents This section will provide information about how Azure works, how best to take advantage of Azure, and best practices when using the doAzureParallel package. 1. **Azure Introduction** [(link)](./docs/00-azure-introduction.md) diff --git a/docs/31-vm-sizes.md b/docs/31-vm-sizes.md index 6d49c93f..8ff67d7d 100644 --- a/docs/31-vm-sizes.md +++ b/docs/31-vm-sizes.md @@ -63,4 +63,19 @@ The list above covers most scenarios that run R jobs. For special scenarios (suc To get a sense of what each VM costs, please visit the Azure Virtual Machine pricing page [here](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/linux/). +# Low Priority VMs +Low-priority VMs are a way to obtain and consume Azure compute at a much lower price using Azure Batch. Since doAzureParallel is built on top of Azure Batch, this package is able to take advantage of low-priority VMs and allocate compute resources from Azure's surplus capacity at up to **80% discount**. +Low-priority VMs come with the understanding that when you request it, there is the possibility that we'll need to take some or all of it back. Hence the name *low-priority* - VMs may not be allocated or may be preempted due to higher priority allocations, which equate to full-priced VMs that have an SLA. + +And as the name suggests, this significant cost reduction is ideal for *low priority* workloads that do not have a strict performance requirement. + +With Azure Batch's first-class support for low-priority VMs, you can use them in conjunction with normal on-demand VMs (*dedicated VMs*) and enable job cost to be balanced with job execution flexibility: + + * Batch pools can contain both on-demand nodes and low-priority nodes. The two types can be independently scaled, either explicitly with the resize operation or automatically using auto-scale. Different configurations can be used, such as maximizing cost savings by always using low-priority nodes or spinning up on-demand nodes at full price, to maintain capacity by replacing any preempted low-priority nodes. + * If any low-priority nodes are preempted, then Batch will automatically attempt to replace the lost capacity, continually seeking to maintain the target amount of low-priority capacity in the pool. + * If tasks are interrupted when the node on which it is running is preempted, then the tasks are automatically re-queued to be re-run. + +For more information about low-priority VMs, please visit the [documentation](https://docs.microsoft.com/en-us/azure/batch/batch-low-pri-vms). + +You can also check out information on low-priority pricing [here](https://azure.microsoft.com/en-us/pricing/details/batch/). From fa364c763f2874c51db8abea36666bec8e13f47f Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 12:28:59 -0700 Subject: [PATCH 09/11] Added link to vm priority --- README.md | 5 ++--- docs/README.md | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f0822b35..b7347ebe 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,7 @@ The *doAzureParallel* package is a parallel backend for the widely popular *fore NOTE: The terms *pool* and *cluster* are used interchangably throughout this document. ## Notable Features -- Ability to use low-priority VMs for an 80% discount [link](./docs/31-vm-sizes.md#low-priority-vms) -- +- Ability to use low-priority VMs for an 80% discount [(link)](./docs/31-vm-sizes.md#low-priority-vms) ## Dependencies @@ -152,7 +151,7 @@ This section will provide information about how Azure works, how best to take ad - Use Azure package-defined foreach options to improve performance and user experience - iii. **Error Handling** [(link)](./docs/53-azure-foreach-options.md) + iii. **Error Handling** [(link)](./docs/53-error-handling.md) - How Azure handles errors in your Foreach loop? diff --git a/docs/README.md b/docs/README.md index 2c475e98..be42518f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -48,7 +48,7 @@ This section will provide information about how Azure works, how best to take ad - Use Azure package-defined foreach options to improve performance and user experience - iii. **Error Handling** [(link)](./53-azure-foreach-options.md) + iii. **Error Handling** [(link)](./53-error-handling.md) - How Azure handles errors in your Foreach loop? From cf3d491859a8f0d8c15551dbdc234b84e1ee3e2b Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 12:39:46 -0700 Subject: [PATCH 10/11] Fix broken links --- docs/30-customize-cluster.md | 2 +- docs/51-long-running-job.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/30-customize-cluster.md b/docs/30-customize-cluster.md index 8d14eac2..6f96cffa 100644 --- a/docs/30-customize-cluster.md +++ b/docs/30-customize-cluster.md @@ -63,7 +63,7 @@ FROM ubuntu:16.04 CMD ["R"] ``` -For more information and samples on how to build images, deploy them to dockerhub and use them in your cluster please refer to the [Building Containers](./32-building-containers.md) documentation. +For more information and samples on how to build images, deploy them to dockerhub and use them in your cluster please refer to the [Building Containers](./33-building-containers.md) documentation. There is no requirement to be debian based. For consistency with other packages it is recommeneded though. Please note though that the container **must be based off a Linux distribution as Windows is not supported**. diff --git a/docs/51-long-running-job.md b/docs/51-long-running-job.md index f472f044..ce445752 100644 --- a/docs/51-long-running-job.md +++ b/docs/51-long-running-job.md @@ -40,9 +40,9 @@ jobList <- getJobList(filter) View(jobList) ``` -You can learn more about how to execute long-running jobs [here](./docs/23-persistent-storage.md). +You can learn more about how to execute long-running jobs [here](./docs/72-persistent-storage.md). -With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/11-autoscale.md). +With long-running jobs, you can take advantage of Azure's autoscaling capabilities to save time and/or money. Learn more about autoscale [here](./docs/32-autoscale.md). ## Configuring an asynchronous job You can configure a job to run asynchronously by specifying wait = FALSE in job options: From c84cdc6edeafc2f3cf24d63a43a02e9979d4cc6d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 14 May 2018 12:52:59 -0700 Subject: [PATCH 11/11] Added Notable features --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index b7347ebe..e2562c2f 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,9 @@ NOTE: The terms *pool* and *cluster* are used interchangably throughout this doc ## Notable Features - Ability to use low-priority VMs for an 80% discount [(link)](./docs/31-vm-sizes.md#low-priority-vms) +- Users can bring their own Docker Image +- AAD and VNets Support +- Built in support for Azure Blob Storage ## Dependencies