diff --git a/scripts/variantstore/azure/HelloAzure.wdl b/scripts/variantstore/azure/HelloAzure.wdl new file mode 100644 index 00000000000..3b40a5805a0 --- /dev/null +++ b/scripts/variantstore/azure/HelloAzure.wdl @@ -0,0 +1,127 @@ +version 1.0 + +workflow HelloAzure { + input { + String sql_server + String sql_database + File utf8_token_file + File python_script + File ammonite_script + } + meta { + description: "Workflow to say Hello to Azure SQL database from sqlcmd, Python, and Ammonite (Java ecosystem) contexts" + } + parameter_meta { + sql_server: { + description: "Name of the Azure SQL Database Server without .database.windows.net suffix" + } + sql_database: { + description: "Name of the Database within the Azure SQL Database Server" + } + token_file: { + description: "A file with a UTF-8 encoded access token generated with auth that can access the Azure SQL Database Server. e.g. `az account get-access-token --resource=https://database.windows.net/ --query accessToken --output tsv > db_access_token.txt" + } + } + + call HelloFromSqlcmd { + input: + sql_server = sql_server, + sql_database = sql_database, + token_file = utf8_token_file + } + + call HelloFromPython { + input: + sql_server = sql_server, + sql_database = sql_database, + python_script = python_script, + token_file = utf8_token_file + } + + call HelloFromAmmonite { + input: + sql_server = sql_server, + sql_database = sql_database, + ammonite_script = ammonite_script, + token_file = utf8_token_file + } +} + +task HelloFromSqlcmd { + input { + String sql_server + String sql_database + File token_file + } + meta { + description: "Say hello to Azure SQL Database from sqlcmd using a database access token" + } + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + # sqlcmd is particular about the formatting and encoding of its access token: no whitespace and UTF-16LE. + # Python is particular too but these manipulations are sprinkled into the code. Java / Ammonite doesn't + # seem to care about encoding or autodetects and adapts? + cat ~{token_file} | cut -f 1 | tr -d '\n' | iconv -f ascii -t UTF-16LE > /tmp/db_access_token.txt + + sqlcmd -S tcp:~{sql_server}.database.windows.net,1433 -d ~{sql_database} -G -Q 'select @@version as "Hello Azure SQL Database!"' -P /tmp/db_access_token.txt + >>> + runtime { + docker: "us.gcr.io/broad-dsde-methods/variantstore:coa-2023-02-22" + } + output { + String out = read_string(stdout()) + } +} + +task HelloFromPython { + input { + String sql_server + String sql_database + File python_script + File token_file + } + meta { + description: "Say hello to Azure SQL Database from Python -> pyodbc -> unixodbc -> MS ODBC driver" + } + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + python3 ~{python_script} --server ~{sql_server} --database ~{sql_database} --token-file ~{token_file} + >>> + runtime { + docker: "us.gcr.io/broad-dsde-methods/variantstore:coa-2023-02-22" + } + output { + String out = read_string(stdout()) + } +} + +task HelloFromAmmonite { + input { + String sql_server + String sql_database + File ammonite_script + File token_file + } + meta { + description: "Say hello to Azure SQL Database from Ammonite/Java -> JDBC -> MS JDBC driver" + } + command <<< + # Prepend date, time and pwd to xtrace log entries. + PS4='\D{+%F %T} \w $ ' + set -o errexit -o nounset -o pipefail -o xtrace + + amm ~{ammonite_script} --server ~{sql_server} --database ~{sql_database} --tokenFile ~{token_file} + >>> + runtime { + docker: "us.gcr.io/broad-dsde-methods/variantstore:coa-2023-02-22" + } + output { + String out = read_string(stdout()) + } +} diff --git a/scripts/variantstore/azure/build_coa_docker.sh b/scripts/variantstore/azure/build_coa_docker.sh new file mode 100755 index 00000000000..d290b68c2ca --- /dev/null +++ b/scripts/variantstore/azure/build_coa_docker.sh @@ -0,0 +1,18 @@ +if [ $# -lt 1 ]; then + echo "USAGE: ./build_coa_docker.sh [DOCKER_TAG_STRING] [OPTIONAL:LATEST]" + echo " e.g.: ./build_coa_docker.sh $(date -I)" + exit 1 +fi + +set -o errexit -o nounset -o pipefail -o xtrace + +BASE_REPO="broad-dsde-methods/variantstore" +REPO_WITH_TAG="${BASE_REPO}:coa-${1}" +GCR_TAG="us.gcr.io/${REPO_WITH_TAG}" + +docker build . -t "${REPO_WITH_TAG}" -f cromwell_on_azure.Dockerfile + +docker tag "${REPO_WITH_TAG}" "${GCR_TAG}" +docker push "${GCR_TAG}" + +echo "Docker image pushed to \"${GCR_TAG}\"" diff --git a/scripts/variantstore/azure/cromwell_on_azure.Dockerfile b/scripts/variantstore/azure/cromwell_on_azure.Dockerfile new file mode 100644 index 00000000000..e4c3dedbcf3 --- /dev/null +++ b/scripts/variantstore/azure/cromwell_on_azure.Dockerfile @@ -0,0 +1,65 @@ +# Docker image with a grab bag of utilities for Cromwell on Azure exploration spikes. Not currently optimized for size +# or anything else, this is currently just all the potentially useful things. +FROM ubuntu:20.04 + +# Azure CLI +# https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-linux?pivots=apt#option-2-step-by-step-installation-instructions +RUN apt-get update +RUN apt-get install --assume-yes ca-certificates curl apt-transport-https lsb-release gnupg + +RUN mkdir -p /etc/apt/keyrings +RUN curl -sLS https://packages.microsoft.com/keys/microsoft.asc | \ + gpg --dearmor | \ + tee /etc/apt/keyrings/microsoft.gpg > /dev/null +RUN chmod go+r /etc/apt/keyrings/microsoft.gpg + +# ENV AZ_REPO=$(lsb_release -cs) +# Hardcode to focal/20.04 for consistency with the base image above and sqlcmd setup below +ENV AZ_REPO=focal +RUN echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/keyrings/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ $AZ_REPO main" | \ + tee /etc/apt/sources.list.d/azure-cli.list + +RUN apt-get update +RUN apt-get install --assume-yes azure-cli + +# Install sqlcmd (Microsoft SQL client) +# https://learn.microsoft.com/en-us/sql/linux/sql-server-linux-setup-tools?view=sql-server-ver16&tabs=ubuntu-install%2Credhat-offline#install-tools-on-linux +# Also sneak in an installation of the driver for Microsoft databases via `msodbcsql18`. +RUN curl https://packages.microsoft.com/keys/microsoft.asc | \ + apt-key add - + +RUN curl https://packages.microsoft.com/config/ubuntu/20.04/prod.list | \ + tee /etc/apt/sources.list.d/msprod.list + +RUN apt-get update + +# sneaky EULA "acceptance" https://stackoverflow.com/a/42383714 +ENV ACCEPT_EULA=Y + +# ODBC and Microsoft ODBC SQL driver +RUN apt-get install --assume-yes mssql-tools unixodbc-dev msodbcsql18 +ENV PATH=$PATH:/opt/mssql-tools/bin + +# Python +RUN apt-get install --assume-yes python3-pip +COPY requirements.txt requirements.txt +RUN pip3 install -r requirements.txt + +# Temurin 11 JDK +# https://askubuntu.com/a/1386901 +RUN apt-get install --assume-yes wget +RUN wget -O - https://packages.adoptium.net/artifactory/api/gpg/key/public | apt-key add - +RUN echo "deb https://packages.adoptium.net/artifactory/deb $(awk -F= '/^VERSION_CODENAME/{print$2}' /etc/os-release) main" | tee /etc/apt/sources.list.d/adoptium.list +RUN apt update && apt install --assume-yes temurin-11-jdk + +# Coursier / Ammonite for scripting in the Java ecosystem +# https://get-coursier.io/docs/cli-installation#linux +# +# Use the statically linked version for now to get around a broken dynamically linked launcher +# https://github.com/coursier/coursier/issues/2624 +# https://stackoverflow.com/a/75232986/21269164 +RUN curl -fL "https://github.com/coursier/launchers/raw/master/cs-x86_64-pc-linux-static.gz" | gzip -d > /usr/local/bin/cs +RUN chmod +x /usr/local/bin/cs +RUN mkdir -p /coursier/bin +ENV PATH=/coursier/bin\:$PATH +RUN cs setup --install-dir /coursier/bin --yes diff --git a/scripts/variantstore/azure/hello_from_ammonite.sc b/scripts/variantstore/azure/hello_from_ammonite.sc new file mode 100644 index 00000000000..f282fc578e2 --- /dev/null +++ b/scripts/variantstore/azure/hello_from_ammonite.sc @@ -0,0 +1,65 @@ +// Ammonite script to say Hello to Azure SQL Database from the Java ecosystem. + +// Package management +import $ivy.`com.microsoft.sqlserver:mssql-jdbc:12.2.0.jre11` +import $ivy.`com.azure:azure-identity:1.4.6` +// ANTLR 4 appears to be required by one of the above Microsoft packages but the dependency is not expressed explicitly +// so it is not imported automatically. Without adding this import ourselves Ammonite fails to compile this script. +import $ivy.`org.antlr:antlr4:4.12.0` + +// Imports +import com.azure.core.credential.* +import com.azure.identity.* +import com.microsoft.sqlserver.jdbc.SQLServerDataSource +import java.nio.charset.StandardCharsets +import java.nio.file.* +import java.sql.* +import java.util.* + + +// Nearly everything taken from +// https://learn.microsoft.com/en-us/azure/app-service/tutorial-connect-msi-azure-database?tabs=sqldatabase%2Csystemassigned%2Cjava%2Cwindowsclient#3-modify-your-code +def getAccessTokenViaRequest(): String = { + val creds = new DefaultAzureCredentialBuilder().build() + val request = new TokenRequestContext() + request.addScopes("https://database.windows.net//.default"); + val accessToken = creds.getToken(request).block() + accessToken.getToken() +} + +// Generate a token via +// az account get-access-token --resource=https://database.windows.net/ --query accessToken --output tsv > db_access_token.txt +// Note this produces a token file with a confounding trailing newline, the code below has to `trim()`. +// Also note that unlike the sqlcmd and Python contexts, there is nothing here about UTF-16LE encoding; the Java +// ecosystem seems to deal with the ASCII / UTF-8 access token just fine without the caller doing anything special. +def getAccessTokenViaFile(tokenFile: String): String = { + // https://www.digitalocean.com/community/tutorials/java-read-file-to-string + val token = new String(Files.readAllBytes(Paths.get(tokenFile))) + token.trim() +} + +@main +def main(server: String, database: String, tokenFile: Option[String] = None) = { + val ds = new SQLServerDataSource() + + val token = tokenFile match { + case Some(file) => getAccessTokenViaFile(file) + case None => getAccessTokenViaRequest() + } + ds.setAccessToken(token) + ds.setServerName(s"${server}.database.windows.net") + ds.setDatabaseName(database) + + val connection = ds.getConnection() + val statement = connection.createStatement() + + val resultSet = statement.executeQuery(""" + + select @@version as "Hello Azure SQL Database!" + + """) + + resultSet.next() + val result = resultSet.getString(1) + print(result) +} diff --git a/scripts/variantstore/azure/hello_from_python.py b/scripts/variantstore/azure/hello_from_python.py new file mode 100644 index 00000000000..6c64ab1f2d8 --- /dev/null +++ b/scripts/variantstore/azure/hello_from_python.py @@ -0,0 +1,83 @@ +from azure.identity import DefaultAzureCredential + +import argparse +import pyodbc +import struct + + +def read_token_from_file(token_file_name): + # https://learn.microsoft.com/en-us/azure/app-service/tutorial-connect-msi-azure-database?tabs=sqldatabase%2Csystemassigned%2Cpython%2Cwindowsclient#3-modify-your-code + with open(token_file_name) as token_file: + token_str = token_file.read().rstrip().encode("UTF-16-LE") + return token_str + + +def fetch_token(): + credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True) + token_str = credential.get_token("https://database.windows.net/.default").token.encode("UTF-16-LE") + return token_str + + +def token_to_struct(token_str: bytes): + token_struct = struct.pack(f' 1: + message = f"Found multiple {kind}!" + if describer: + message = message + " : " + ", ".join([describer(f) for f in found]) + raise ValueError(message) + + return found[0] + + +def get_subscription(credentials): + subscription_client = SubscriptionClient(credentials) + return exactly_one_or_die(subscription_client.subscriptions.list(), + "subscription", + describer=lambda s: s.id) + + +def get_resource_group_filter(resource_group_name): + if resource_group_name: + resource_group_filter = lambda g: g.name.startswith(resource_group_name) + resource_group_descriptor = f"resource group '{resource_group_name}'" + else: + pattern = f"{os.environ['USER']}-[a-f0-9]+$" + resource_group_descriptor = f"resource group matching pattern '{pattern}'" + resource_group_filter = lambda g: re.match(pattern, g.name) + return resource_group_filter, resource_group_descriptor + + +def get_resource_group(credentials, subscription, resource_group_name=None): + resource_group_filter, resource_group_descriptor = get_resource_group_filter(resource_group_name) + resource_client = ResourceManagementClient(credentials, subscription.subscription_id) + return exactly_one_or_die(resource_client.resource_groups.list(), + resource_group_descriptor, + filter=resource_group_filter) + + +def get_storage_account(credentials, subscription, resource_group): + storage_client = StorageManagementClient(credentials, subscription.subscription_id) + # `az` returns storage account JSONs with a `resourceGroup` attribute, but the objects returned by the Python API do + # not have this attribute. However the `id`s of these Python objects do contain the resource group in a predictable + # pattern, so look for that instead. + id_prefix = f"/subscriptions/{subscription.subscription_id}/resourceGroups/{resource_group.name}" + return exactly_one_or_die(storage_client.storage_accounts.list(), "storage account", + filter=lambda a: a.id.startswith(id_prefix), + describer=lambda a: a.name) + + +def get_sql_server(credentials, subscription): + sql_management_client = SqlManagementClient(credentials, subscription.subscription_id) + return exactly_one_or_die(sql_management_client.servers.list(), 'Azure SQL Server') + + +def get_sql_database(credentials, subscription, resource_group, server): + sql_management_client = SqlManagementClient(credentials, subscription.subscription_id) + resource_group_filter, _ = get_resource_group_filter(resource_group.name) + return exactly_one_or_die(sql_management_client.databases.list_by_server(resource_group.name, server.name), + 'Azure SQL Database', + resource_group_filter) + + +def get_blob_service_client(): + return BlobServiceClient.from_connection_string(os.getenv('AZURE_CONNECTION_STRING')) + + +def generate_inputs_json(): + workflow_name = Path(args.workflow).stem + return f""" +{{ + "{workflow_name}.utf8_token_file": "{access_token_storage_path}", + "{workflow_name}.python_script": "{python_script_storage_path}", + "{workflow_name}.ammonite_script": "{ammonite_script_storage_path}", + "{workflow_name}.sql_server": "{args.sql_server}", + "{workflow_name}.sql_database": "{args.sql_database}" +}} +""" + + +def generate_trigger_json(): + """ + Creates a trigger JSON of the form accepted by CromwellOnAzure. This is conceptually similar to the JSON payload + of a workflow submission POST that would normally go to Cromwell's REST interface. + """ + return f""" +{{ + "WorkflowUrl": "{workflow_storage_path}", + "WorkflowInputsUrl": "{inputs_storage_path}", + "WorkflowInputsUrls": null, + "WorkflowOptionsUrl": null, + "WorkflowDependenciesUrl": null +}} + """.strip() + + +def stage_input(input_file, blob_address=None): + workflow_path = Path(args.workflow) + input_path = Path(input_file) + with open(input_file, "rb") as input_bytes: + # `name` is the filename without leading directory components. + # e.g. name for /path/to/Hello.wdl is Hello.wdl + # `stem` is the filename without leading directory components and without an extension. + # e.g. stem for /path/to/Hello.wdl is Hello + + # `inflection.underscore` snake-cases Pascal-cased workflow names. Not strictly required here but nice. + # e.g. "HelloAzure" ==> "hello_azure" + if not blob_address: + blob_address = f"{inflection.underscore(workflow_path.stem)}/{input_path.name}" + blob_client = inputs_client.get_blob_client(blob_address) + blob_client.upload_blob(input_bytes, overwrite=True) + storage_path = f'/{storage_account.name}/inputs/{blob_address}' + return storage_path + + +def stage_trigger_json(): + workflow_path = Path(args.workflow) + # Create the trigger JSON and stage into //workflows/new. + trigger_json = generate_trigger_json() + workflows_client = blob_service_client.get_container_client('workflows') + + blob_address = f'new/{workflow_path.stem}-{uuid4()}.json' + blob_client = workflows_client.get_blob_client(blob_address) + blob_client.upload_blob(bytes(trigger_json, 'utf8')) + + print(f"Trigger JSON staged to /{storage_account.name}/workflows/{blob_address}.") + + +def stage_inputs_json(): + workflow_path = Path(args.workflow) + # Stage the workflow inputs into //inputs//.inputs.json + blob_address = f"{inflection.underscore(workflow_path.stem)}/{workflow_path.stem}.inputs.json" + blob_client = inputs_client.get_blob_client(blob_address) + blob_client.upload_blob(bytes(inputs_json, 'utf8'), overwrite=True) + inputs_storage_path = f'/{storage_account.name}/inputs/{blob_address}' + return inputs_storage_path + + +if __name__ == '__main__': + description = """ + + Cromwell on Azure (CoA) "Hello Azure!" workflow submission script that does the following: + + 1. Stages the workflow and its `File` inputs (scripts, database access token) to the CoA inputs container. + 2. Generates an inputs JSON corresponding to the inputs file from 1. and stages this to the CoA inputs container. + 3. Generates a trigger JSON for this workflow + inputs and stages this to the CoA workflows container under 'new'. + + The script does *not* attempt to poll the submitted workflow for status, this is simple "fire and forget". + Observing workflow progress involves poking around the 'workflows' and 'cromwell-executions' containers within the + storage account created as part of the Cromwell on Azure deployment. + """ + + parser = argparse.ArgumentParser(allow_abbrev=False, description=description) + parser.add_argument('--workflow', type=str, help='Workflow WDL source', required=True) + parser.add_argument('--python-script', type=str, help="Hello World Python script", required=True) + parser.add_argument('--ammonite-script', type=str, help="Hello World Ammonite script", required=True) + parser.add_argument('--sql-server', type=str, help='Azure SQL Server name', required=True) + parser.add_argument('--sql-database', type=str, help='Azure SQL Server database', required=True) + parser.add_argument('--utf8-access-token', type=str, help='UTF-8 encoded Azure SQL Database access token', + required=True) + parser.add_argument('--resource-group', type=str, help='Azure Resource Group name', required=False) + args = parser.parse_args() + + if not os.getenv('AZURE_CONNECTION_STRING'): + raise ValueError("Must define 'AZURE_CONNECTION_STRING' as a SAS token with write permissions to the CoA storage account, see https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string#store-a-connection-string") + + # The shared token cache was causing issues with attempts to use expired tokens so disable that. + # `DefaultAzureCredential` appears to fall back to Azure CLI credentials which works fine. + # https://github.com/Azure/azure-sdk-for-python/issues/22822#issuecomment-1024668507 + credentials = DefaultAzureCredential(exclude_shared_token_cache_credential=True) + + # Figure out the particulars of our Cromwell on Azure deployment + subscription = get_subscription(credentials) + resource_group = get_resource_group(credentials, subscription, args.resource_group) + storage_account = get_storage_account(credentials, subscription, resource_group) + sql_server = get_sql_server(credentials, subscription) + sql_database = get_sql_database(credentials, subscription, resource_group, sql_server) + + blob_service_client = get_blob_service_client() + inputs_client = blob_service_client.get_container_client('inputs') + + # Stage the inputs into //inputs//. + workflow_storage_path = stage_input(args.workflow) + python_script_storage_path = stage_input(args.python_script) + ammonite_script_storage_path = stage_input(args.ammonite_script) + access_token_storage_path = stage_input(args.utf8_access_token, blob_address=f"{os.environ['USER']}/db_access_token.txt") + + # Generate the inputs JSON using the values above and stage + inputs_json = generate_inputs_json() + inputs_storage_path = stage_inputs_json() + + # Finally stage the trigger JSON that will kick off workflow execution + stage_trigger_json() diff --git a/scripts/variantstore/docs/azure/cromwell_on_azure.md b/scripts/variantstore/docs/azure/cromwell_on_azure.md index ecdabc14ff2..63814fed2c8 100644 --- a/scripts/variantstore/docs/azure/cromwell_on_azure.md +++ b/scripts/variantstore/docs/azure/cromwell_on_azure.md @@ -1,3 +1,9 @@ +# **Note** + +The following describes setup for Microsoft's [CromwellOnAzure](https://github.com/microsoft/CromwellOnAzure) which is +*not* the same or necessarily even similar to Cromwell on Azure in Terra. Much of what follows below may not be relevant +to running Cromwell in Terra. + # Setup for Mac * Install the [Azure CLI locally](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-macos). @@ -50,6 +56,11 @@ EXTERNAL_IP=$(curl --silent ifconfig.me) az sql server firewall-rule create -n AllowYourIp --start-ip-address $EXTERNAL_IP --end-ip-address $EXTERNAL_IP +# Add an allow rule for all Azure traffic so the Azure Batch VMs spun up by Cromwell on Azure can connect. +# Hopefully this can be done more narrowly in the future, but likely we won't know the Batch VMs exact IP addresses in advance. +# https://learn.microsoft.com/en-us/azure/azure-sql/database/firewall-configure?view=azuresql#connections-from-inside-azure +az sql server firewall-rule create -n AllowAllWindowsAzureIps --start-ip-address 0.0.0.0 --end-ip-address 0.0.0.0 + # Create an AD admin group for the Azure SQL Database. AZ_SQLDB_AD_ADMIN_GROUP_ID=$(az ad group create --display-name "${RESOURCE_GROUP} Azure SQL Database AD Admin Group" --mail-nickname "${RESOURCE_GROUP}-ad-admin" | jq -r .id) @@ -61,35 +72,30 @@ az ad group member add --group $AZ_SQLDB_AD_ADMIN_GROUP_ID --member-id ${VARIANT COA_UAMI_PRINCIPAL_ID=$(az identity list | jq -r ".[] | select(.name == \"${RESOURCE_GROUP}-identity\") | .principalId") az ad group member add --group $AZ_SQLDB_AD_ADMIN_GROUP_ID --member-id ${COA_UAMI_PRINCIPAL_ID} +# Grab the server ID as we want to the SQL Security Manager role to the UAMI to be able to add the VM to the server +# firewall's allowed IP addresses. +SQL_SERVER_ID=$(az sql server list | jq -r ".[] | select(test(\"${RESOURCE_GROUP}\")) | .id" + +az role assignment create --role "SQL Security Manager" --assignee "${COA_UAMI_PRINCIPAL_ID}" --scope "${SQL_SERVER_ID}" + # Make the AD Admin group the AD Admin for the Azure SQL Server. All members of this group will be able to act as # Azure AD Admins for this server. az sql server ad-admin create --object-id ${AZ_SQLDB_AD_ADMIN_GROUP_ID} --display-name "${RESOURCE_GROUP} Azure SQL Database AD Admin" -# **NOTE** `sqlcmd` behavior with respect to the `-G` Azure Directory authentication parameter is incompatible between -# Linux and Mac versions. Mac `sqlcmd` is a [port to the Go language](https://github.com/microsoft/go-sqlcmd) and thus -# a completely different code base than the Microsoft version of `sqlcmd` for Linux. - get_db_token() { + # Fetches a database access token with ~1 hour TTL, ASCII / UTF-8 encoded and newline terminated # https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/connecting-with-sqlcmd?view=azuresqldb-current - az account get-access-token --resource https://database.windows.net --output tsv | cut -f 1 | tr -d '\n' | iconv -f ascii -t UTF-16LE + az account get-access-token --resource https://database.windows.net --query accessToken --output tsv } -# On Mac: - -# The Mac version of `sqlcmd` does not support `-G` authentication with `-P`, so use the `SQLCMDPASSWORD` environment variable. -SQLCMDPASSWORD=$(get_db_token) - -# Say hello to Azure SQL Database! -sqlcmd -S tcp:${SQL_SERVER}.database.windows.net,1433 -d ${SQL_DATABASE} -G -Q 'select @@version as "Hello Azure SQL Database!"' - -# On Linux: - -# The Linux version of `sqlcmd` does not support `-G` authentication with the `SQLCMDPASSWORD` environment variable, so use `-P`. -# https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/connecting-with-sqlcmd?view=azuresqldb-current -get_db_token > db_token +# Be aware that there are at least two versions of `sqlcmd` circulating and they are not compatible with respect to the +# handling of Azure Active Directory credentials. The instructions below are for the `mssql-tools` version (as the +# package is called in both Ubuntu and Homebrew). Note that the `sqlcmd` formula in Homebrew installs a Golang-based +# version of `sqlcmd` which uses an environment variable SQLCMDPASSWORD to hold the access token rather than a file +# specified with the -P option. # Say hello to Azure SQL Database! -sqlcmd -S tcp:${SQL_SERVER}.database.windows.net,1433 -d ${SQL_DATABASE} -G -Q 'select @@version as "Hello Azure SQL Database!"' -P db_token +sqlcmd -S tcp:${SQL_SERVER}.database.windows.net,1433 -d ${SQL_DATABASE} -G -Q 'select @@version as "Hello Azure SQL Database!"' -P =(get_db_token | tr -d '\n' | iconv -f ascii -t UTF-16LE) Hello Azure SQL Database! @@ -100,3 +106,7 @@ Microsoft SQL Azure (RTM) - 12.0.2000.8 (1 rows affected) + +# Yes sqlcmd (or more specifically the Microsoft ODBC Driver) really does require newline-stripped UTF-16 Little +# Endian encoded tokens and will fail to log in with no useful diagnostics if it gets anything else. +# =() is some zsh trickery that uses temporary files; the usual bash <() construct does not work here.