From cea055058c9ac214a1f555f8b05f7e70492f4ae6 Mon Sep 17 00:00:00 2001 From: zfengms Date: Wed, 4 Apr 2018 15:40:57 -0700 Subject: [PATCH] Feature/nationalcloud (#239) * support national cloud * fix hardcoded domain name in createOutputFile * update rAzureBatch version etc * auto discovery of storage account endpoint suffix * styling fix * fix test failure * add back endpointSuffix for storage account * add storage account endpoint suffix to downloadBlob call * update docs --- DESCRIPTION | 4 +-- R/credentials.R | 13 +++++++-- R/doAzureParallel.R | 31 +++++++++++++++------ R/helpers.R | 10 ++++--- R/utility.R | 14 ++++++---- docs/33-programmatically-generate-config.md | 3 +- docs/34-national-clouds.md | 30 ++++++++++++++++++++ docs/README.md | 4 +++ 8 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 docs/34-national-clouds.md diff --git a/DESCRIPTION b/DESCRIPTION index 952b4bd3..b5f69210 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,7 +17,7 @@ Depends: foreach (>= 1.4.3), iterators (>= 1.0.8) Imports: - rAzureBatch (>= 0.5.3), + rAzureBatch (>= 0.5.7), jsonlite, rjson, xml2, @@ -27,5 +27,5 @@ Suggests: caret, plyr, lintr -Remotes: Azure/rAzureBatch@v0.5.6 +Remotes: Azure/rAzureBatch@v0.5.7 RoxygenNote: 6.0.1 diff --git a/R/credentials.R b/R/credentials.R index a705f87c..b41d1d50 100644 --- a/R/credentials.R +++ b/R/credentials.R @@ -22,7 +22,10 @@ #' generateCredentialsConfig("test_config.json") #' generateCredentialsConfig("test_config.json", batchAccount = "testbatchaccount", #' batchKey = "test_batch_account_key", batchUrl = "http://testbatchaccount.azure.com", -#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key") +#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key", +#' storageEndpointSuffix = "core.windows.net") +#' supported storage account endpoint suffix: core.windows.net (default), +#' core.chinacloudapi.cn, core.cloudapi.de, core.usgovcloudapi.net, etc. #' } #' @export generateCredentialsConfig <- function(fileName, ...) { @@ -46,6 +49,11 @@ generateCredentialsConfig <- function(fileName, ...) { "storage_account_key", args$storageKey) + storageSuffix <- + ifelse(is.null(args$storageEndpointSuffix), + "core.windows.net", + args$storageEndpointSuffix) + githubAuthenticationToken <- ifelse(is.null(args$githubAuthenticationToken), "", @@ -77,7 +85,8 @@ generateCredentialsConfig <- function(fileName, ...) { key = batchKey, url = batchUrl), storageAccount = list(name = storageName, - key = storageKey), + key = storageKey, + endpointSuffix = storageEndpointSuffix), githubAuthenticationToken = githubAuthenticationToken, dockerAuthentication = list(username = dockerUsername, password = dockerPassword, diff --git a/R/doAzureParallel.R b/R/doAzureParallel.R index 1ef774f7..a5c4084f 100644 --- a/R/doAzureParallel.R +++ b/R/doAzureParallel.R @@ -272,7 +272,8 @@ setHttpTraffic <- function(value = FALSE) { assign( "inputs", list(name = storageCredentials$name, - sasToken = sasToken), + sasToken = sasToken, + endpointSuffix = storageCredentials$endpointSuffix), .doAzureBatchGlobals ) } @@ -417,20 +418,33 @@ setHttpTraffic <- function(value = FALSE) { # Creating read-only SAS token blob resource file urls sasToken <- rAzureBatch::createSasToken("r", "c", id) workerScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken) + rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken, storageCredentials$endpointSuffix) mergerScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken) + rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken, storageCredentials$endpointSuffix) installGithubScriptUrl <- rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_github.R", - sasToken) + sasToken, + storageCredentials$endpointSuffix) installCranScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken) + rAzureBatch::createBlobUrl(storageCredentials$name, + id, + "install_cran.R", + sasToken, + storageCredentials$endpointSuffix) installBioConductorScriptUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken) + rAzureBatch::createBlobUrl(storageCredentials$name, + id, + "install_bioconductor.R", + sasToken, + storageCredentials$endpointSuffix) jobCommonFileUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken) + rAzureBatch::createBlobUrl(storageCredentials$name, + id, + jobFileName, + sasToken, + storageCredentials$endpointSuffix) requiredJobResourceFiles <- list( rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"), @@ -608,6 +622,7 @@ setHttpTraffic <- function(value = FALSE) { paste0("result/", "merge-result.rds"), sasToken = sasToken, accountName = storageCredentials$name, + endpointSuffix = storageCredentials$endpointSuffix, downloadPath = tempFile, overwrite = TRUE ) @@ -702,7 +717,7 @@ setHttpTraffic <- function(value = FALSE) { azureStorageUrl <- paste0("http://", storageCredentials$name, - ".blob.core.windows.net/", + sprintf(".blob.%s/", storageCredentials$endpointSuffix), id) staticHtml <- "

Errors:

" diff --git a/R/helpers.R b/R/helpers.R index 275ed528..acd7f0e4 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -26,7 +26,7 @@ readToken <- rAzureBatch::createSasToken("r", "c", jobId) envFileUrl <- - rAzureBatch::createBlobUrl(storageCredentials$name, jobId, envFile, readToken) + rAzureBatch::createBlobUrl(storageCredentials$name, jobId, envFile, readToken, storageCredentials$endpointSuffix) resourceFiles <- list(rAzureBatch::createResourceFile(url = envFileUrl, fileName = envFile)) } @@ -38,10 +38,11 @@ if (!is.null(cloudCombine)) { assign("cloudCombine", cloudCombine, .doAzureBatchGlobals) copyCommand <- sprintf( - "%s %s %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include result/*.rds", + "%s %s %s --endpoint %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include result/*.rds", accountName, jobId, - "$AZ_BATCH_TASK_WORKING_DIR" + "$AZ_BATCH_TASK_WORKING_DIR", + storageCredentials$endpointSuffix ) downloadCommand <- @@ -61,7 +62,8 @@ rAzureBatch::createBlobUrl( storageAccount = storageCredentials$name, containerName = jobId, - sasToken = rAzureBatch::createSasToken("w", "c", jobId) + sasToken = rAzureBatch::createSasToken("w", "c", jobId), + storageEndpointSuffix = storageCredentials$endpointSuffix ) outputFiles <- list( diff --git a/R/utility.R b/R/utility.R index 839f8b98..9b3551b1 100644 --- a/R/utility.R +++ b/R/utility.R @@ -170,13 +170,17 @@ createOutputFile <- function(filePattern, url) { ) # Parsing url to obtain container's virtual directory path - azureDomain <- "blob.core.windows.net" - parsedValue <- strsplit(url, azureDomain)[[1]] + # sample url: "https://accountname.blob.core.windows.net/outputs?se=2017-07-31&sr=c&st=2017-07-12" + # after split by "/" + # parsedValue[1] is "https" + # parsedValue[2] is "" + # parsedValue[3] is "accountname.blob.core.windows.net" + # parsedValue[4] is "outputs?se=2017-07-31&sr=c&st=2017-07-12" - accountName <- parsedValue[1] - urlPath <- parsedValue[2] + parsedValue <- strsplit(url, "/")[[1]] - baseUrl <- paste0(accountName, azureDomain) + baseUrl <- paste0(parsedValue[1], "//", parsedValue[3]) + urlPath <- sub(baseUrl, "", url) parsedUrlPath <- strsplit(urlPath, "?", fixed = TRUE)[[1]] storageContainerPath <- parsedUrlPath[1] diff --git a/docs/33-programmatically-generate-config.md b/docs/33-programmatically-generate-config.md index c4e77fc7..eafd4587 100644 --- a/docs/33-programmatically-generate-config.md +++ b/docs/33-programmatically-generate-config.md @@ -15,7 +15,8 @@ You can generate credentials by creating a R object as shown below: ), "storageAccount" = list( "name" = "storageaccountname", - "key" = "storageaccountkey" + "key" = "storageaccountkey", + "endpointSuffix" = "core.windows.net" ), "githubAuthenticationToken" = "", "dockerAuthentication" = list("username" = "", diff --git a/docs/34-national-clouds.md b/docs/34-national-clouds.md new file mode 100644 index 00000000..fa53a896 --- /dev/null +++ b/docs/34-national-clouds.md @@ -0,0 +1,30 @@ +# Configuration for national clouds + +doAzureParallel is configured to run in public Azure cloud by default. To run workloads in national clouds, configure endpoint suffix for storage account in the cluster config which tells doAzureParallel which national cloud environment the storage account resides. + +EndpointSuffix is the last part of the connection string shown in the Storage Account Access keys blade from Azure portal. The possible values usually are: + +Azure public cloud: core.windows.net +Azure China cloud: core.chinacloudapi.cn +Azure US government cloud: core.usgovcloudapi.net +Azure German cloud: core.cloudapi.de + +The value may be different if a DNS redirect is used, so it is better to double check its value on Storage Account Access keys blade. + +Below is a sample of credential config with endpoint suffix specified: + +```R +{ + "batchAccount": { + "name": , + "key": , + "url": + }, + "storageAccount": { + "name": , + "key": , + "endpointSuffix": + }, + "githubAuthenticationToken": {} +} +``` \ No newline at end of file diff --git a/docs/README.md b/docs/README.md index 8a0fbd58..cb6de45b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -45,6 +45,10 @@ This section will provide information about how Azure works, how best to take ad Generate credentials and cluster config at runtime programmatically +11. **National Cloud configuration" [(link)](/.34-national-clouds.md) + + How to run workload in Azure national clouds + ## Additional Documentation Take a look at our [**Troubleshooting Guide**](./40-troubleshooting.md) for information on how to diagnose common issues.