Skip to content
This repository has been archived by the owner on Oct 12, 2023. It is now read-only.

Commit

Permalink
Feature/nationalcloud (#239)
Browse files Browse the repository at this point in the history
* support national cloud

* fix hardcoded domain name in createOutputFile

* update rAzureBatch version etc

* auto discovery of storage account endpoint suffix

* styling fix

* fix test failure

* add back endpointSuffix for storage account

* add storage account endpoint suffix to downloadBlob call

* update docs
  • Loading branch information
zfengms authored Apr 4, 2018
1 parent 3d84350 commit cea0550
Show file tree
Hide file tree
Showing 8 changed files with 87 additions and 22 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Depends:
foreach (>= 1.4.3),
iterators (>= 1.0.8)
Imports:
rAzureBatch (>= 0.5.3),
rAzureBatch (>= 0.5.7),
jsonlite,
rjson,
xml2,
Expand All @@ -27,5 +27,5 @@ Suggests:
caret,
plyr,
lintr
Remotes: Azure/rAzureBatch@v0.5.6
Remotes: Azure/rAzureBatch@v0.5.7
RoxygenNote: 6.0.1
13 changes: 11 additions & 2 deletions R/credentials.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
#' generateCredentialsConfig("test_config.json")
#' generateCredentialsConfig("test_config.json", batchAccount = "testbatchaccount",
#' batchKey = "test_batch_account_key", batchUrl = "http://testbatchaccount.azure.com",
#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key")
#' storageAccount = "teststorageaccount", storageKey = "test_storage_account_key",
#' storageEndpointSuffix = "core.windows.net")
#' supported storage account endpoint suffix: core.windows.net (default),
#' core.chinacloudapi.cn, core.cloudapi.de, core.usgovcloudapi.net, etc.
#' }
#' @export
generateCredentialsConfig <- function(fileName, ...) {
Expand All @@ -46,6 +49,11 @@ generateCredentialsConfig <- function(fileName, ...) {
"storage_account_key",
args$storageKey)

storageSuffix <-
ifelse(is.null(args$storageEndpointSuffix),
"core.windows.net",
args$storageEndpointSuffix)

githubAuthenticationToken <-
ifelse(is.null(args$githubAuthenticationToken),
"",
Expand Down Expand Up @@ -77,7 +85,8 @@ generateCredentialsConfig <- function(fileName, ...) {
key = batchKey,
url = batchUrl),
storageAccount = list(name = storageName,
key = storageKey),
key = storageKey,
endpointSuffix = storageEndpointSuffix),
githubAuthenticationToken = githubAuthenticationToken,
dockerAuthentication = list(username = dockerUsername,
password = dockerPassword,
Expand Down
31 changes: 23 additions & 8 deletions R/doAzureParallel.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,8 @@ setHttpTraffic <- function(value = FALSE) {
assign(
"inputs",
list(name = storageCredentials$name,
sasToken = sasToken),
sasToken = sasToken,
endpointSuffix = storageCredentials$endpointSuffix),
.doAzureBatchGlobals
)
}
Expand Down Expand Up @@ -417,20 +418,33 @@ setHttpTraffic <- function(value = FALSE) {
# Creating read-only SAS token blob resource file urls
sasToken <- rAzureBatch::createSasToken("r", "c", id)
workerScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken)
rAzureBatch::createBlobUrl(storageCredentials$name, id, "worker.R", sasToken, storageCredentials$endpointSuffix)
mergerScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken)
rAzureBatch::createBlobUrl(storageCredentials$name, id, "merger.R", sasToken, storageCredentials$endpointSuffix)
installGithubScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name,
id,
"install_github.R",
sasToken)
sasToken,
storageCredentials$endpointSuffix)
installCranScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_cran.R", sasToken)
rAzureBatch::createBlobUrl(storageCredentials$name,
id,
"install_cran.R",
sasToken,
storageCredentials$endpointSuffix)
installBioConductorScriptUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, "install_bioconductor.R", sasToken)
rAzureBatch::createBlobUrl(storageCredentials$name,
id,
"install_bioconductor.R",
sasToken,
storageCredentials$endpointSuffix)
jobCommonFileUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, id, jobFileName, sasToken)
rAzureBatch::createBlobUrl(storageCredentials$name,
id,
jobFileName,
sasToken,
storageCredentials$endpointSuffix)

requiredJobResourceFiles <- list(
rAzureBatch::createResourceFile(url = workerScriptUrl, fileName = "worker.R"),
Expand Down Expand Up @@ -608,6 +622,7 @@ setHttpTraffic <- function(value = FALSE) {
paste0("result/", "merge-result.rds"),
sasToken = sasToken,
accountName = storageCredentials$name,
endpointSuffix = storageCredentials$endpointSuffix,
downloadPath = tempFile,
overwrite = TRUE
)
Expand Down Expand Up @@ -702,7 +717,7 @@ setHttpTraffic <- function(value = FALSE) {
azureStorageUrl <-
paste0("http://",
storageCredentials$name,
".blob.core.windows.net/",
sprintf(".blob.%s/", storageCredentials$endpointSuffix),
id)

staticHtml <- "<h1>Errors:</h1>"
Expand Down
10 changes: 6 additions & 4 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

readToken <- rAzureBatch::createSasToken("r", "c", jobId)
envFileUrl <-
rAzureBatch::createBlobUrl(storageCredentials$name, jobId, envFile, readToken)
rAzureBatch::createBlobUrl(storageCredentials$name, jobId, envFile, readToken, storageCredentials$endpointSuffix)
resourceFiles <-
list(rAzureBatch::createResourceFile(url = envFileUrl, fileName = envFile))
}
Expand All @@ -38,10 +38,11 @@
if (!is.null(cloudCombine)) {
assign("cloudCombine", cloudCombine, .doAzureBatchGlobals)
copyCommand <- sprintf(
"%s %s %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include result/*.rds",
"%s %s %s --endpoint %s --download --saskey $BLOBXFER_SASKEY --remoteresource . --include result/*.rds",
accountName,
jobId,
"$AZ_BATCH_TASK_WORKING_DIR"
"$AZ_BATCH_TASK_WORKING_DIR",
storageCredentials$endpointSuffix
)

downloadCommand <-
Expand All @@ -61,7 +62,8 @@
rAzureBatch::createBlobUrl(
storageAccount = storageCredentials$name,
containerName = jobId,
sasToken = rAzureBatch::createSasToken("w", "c", jobId)
sasToken = rAzureBatch::createSasToken("w", "c", jobId),
storageEndpointSuffix = storageCredentials$endpointSuffix
)

outputFiles <- list(
Expand Down
14 changes: 9 additions & 5 deletions R/utility.R
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,17 @@ createOutputFile <- function(filePattern, url) {
)

# Parsing url to obtain container's virtual directory path
azureDomain <- "blob.core.windows.net"
parsedValue <- strsplit(url, azureDomain)[[1]]
# sample url: "https://accountname.blob.core.windows.net/outputs?se=2017-07-31&sr=c&st=2017-07-12"
# after split by "/"
# parsedValue[1] is "https"
# parsedValue[2] is ""
# parsedValue[3] is "accountname.blob.core.windows.net"
# parsedValue[4] is "outputs?se=2017-07-31&sr=c&st=2017-07-12"

accountName <- parsedValue[1]
urlPath <- parsedValue[2]
parsedValue <- strsplit(url, "/")[[1]]

baseUrl <- paste0(accountName, azureDomain)
baseUrl <- paste0(parsedValue[1], "//", parsedValue[3])
urlPath <- sub(baseUrl, "", url)
parsedUrlPath <- strsplit(urlPath, "?", fixed = TRUE)[[1]]

storageContainerPath <- parsedUrlPath[1]
Expand Down
3 changes: 2 additions & 1 deletion docs/33-programmatically-generate-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ You can generate credentials by creating a R object as shown below:
),
"storageAccount" = list(
"name" = "storageaccountname",
"key" = "storageaccountkey"
"key" = "storageaccountkey",
"endpointSuffix" = "core.windows.net"
),
"githubAuthenticationToken" = "",
"dockerAuthentication" = list("username" = "",
Expand Down
30 changes: 30 additions & 0 deletions docs/34-national-clouds.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Configuration for national clouds

doAzureParallel is configured to run in public Azure cloud by default. To run workloads in national clouds, configure endpoint suffix for storage account in the cluster config which tells doAzureParallel which national cloud environment the storage account resides.

EndpointSuffix is the last part of the connection string shown in the Storage Account Access keys blade from Azure portal. The possible values usually are:

Azure public cloud: core.windows.net
Azure China cloud: core.chinacloudapi.cn
Azure US government cloud: core.usgovcloudapi.net
Azure German cloud: core.cloudapi.de

The value may be different if a DNS redirect is used, so it is better to double check its value on Storage Account Access keys blade.

Below is a sample of credential config with endpoint suffix specified:

```R
{
"batchAccount": {
"name": <Azure Batch Account Name>,
"key": <Azure Batch Account Key>,
"url": <Azure Batch Account URL>
},
"storageAccount": {
"name": <Azure Storage Account Name>,
"key": <Azure Storage Account Key>,
"endpointSuffix": <Azure Storage Account Endpoint Suffix>
},
"githubAuthenticationToken": {}
}
```
4 changes: 4 additions & 0 deletions docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ This section will provide information about how Azure works, how best to take ad

Generate credentials and cluster config at runtime programmatically

11. **National Cloud configuration" [(link)](/.34-national-clouds.md)

How to run workload in Azure national clouds

## Additional Documentation
Take a look at our [**Troubleshooting Guide**](./40-troubleshooting.md) for information on how to diagnose common issues.

Expand Down

0 comments on commit cea0550

Please sign in to comment.