Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding OHDSI Deployment #1853

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ build-and-push-resource-processor: build-resource-processor-vm-porter-image push
build-and-push-gitea: build-gitea-image push-gitea-image
build-and-push-guacamole: build-guacamole-image push-guacamole-image
build-and-push-mlflow: build-mlflow-image push-mlflow-image
build-and-push-ohdsi: build-ohdsi-broadsea-methods build-ohdsi-broadsea-webtools push-ohdsi-methods push-ohdsi-webtools
build-and-push-airlock-processor: build-airlock-processor push-airlock-processor


deploy-shared-services: firewall-install
. ${MAKEFILE_DIR}/devops/scripts/load_env.sh ./templates/core/.env \
&& if [ "$${DEPLOY_GITEA}" == "true" ]; then $(MAKE) gitea-install; fi \
Expand Down Expand Up @@ -90,6 +92,12 @@ build-guacamole-image:
build-mlflow-image:
$(call build_image,"mlflow-server","${MAKEFILE_DIR}/templates/workspace_services/mlflow/mlflow-server/version.txt","${MAKEFILE_DIR}/templates/workspace_services/mlflow/mlflow-server/docker/Dockerfile","${MAKEFILE_DIR}/templates/workspace_services/mlflow/mlflow-server")

build-ohdsi-broadsea-methods:
$(call build_image,"broadsea-methods","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-methods/version.txt","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-methods/Dockerfile","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-methods")
build-ohdsi-broadsea-webtools:
$(call build_image,"broadsea-webtools","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-webtools/version.txt","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-webtools/Dockerfile","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-webtools")

# templates/workspace_services/ohdsi/broadsea/apps/broadsea-webtools/Dockerfile
build-airlock-processor:
$(call build_image,"airlock-processor","${MAKEFILE_DIR}/airlock_processor/_version.py","${MAKEFILE_DIR}/airlock_processor/Dockerfile","${MAKEFILE_DIR}/airlock_processor/")

Expand Down Expand Up @@ -125,6 +133,12 @@ push-guacamole-image:
push-mlflow-image:
$(call push_image,"mlflow-server","${MAKEFILE_DIR}/templates/workspace_services/mlflow/mlflow-server/version.txt")

push-ohdsi-methods:
$(call push_image,"broadsea-methods","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-methods/version.txt")

push-ohdsi-webtools:
$(call push_image,"broadsea-webtools","${MAKEFILE_DIR}/templates/workspace_services/ohdsi/broadsea/apps/broadsea-webtools/version.txt")

push-airlock-processor:
$(call push_image,"airlock-processor","${MAKEFILE_DIR}/airlock_processor/_version.py")

Expand Down
7 changes: 7 additions & 0 deletions templates/workspace_services/ohdsi/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Local .terraform directories
**/.terraform/*

# TF backend files
**/*_backend.tf

Dockerfile.tmpl
32 changes: 32 additions & 0 deletions templates/workspace_services/ohdsi/Dockerfile.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM debian:buster

# Install Azure CLI
RUN apt-get update \
&& apt-get install -y ca-certificates jq curl apt-transport-https lsb-release gnupg \
&& curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null \
&& AZ_REPO=$(lsb_release -cs) \
&& echo "deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ $AZ_REPO main" | tee /etc/apt/sources.list.d/azure-cli.list \
&& apt-get update && apt-get -y install azure-cli

RUN az config set extension.use_dynamic_install=yes_without_prompt

RUN az extension add --name azure-firewall

ARG BUNDLE_DIR

# This is a template Dockerfile for the bundle's invocation image
# You can customize it to use different base images, install tools and copy configuration files.
#
# Porter will use it as a template and append lines to it for the mixins
# and to set the CMD appropriately for the CNAB specification.
#
# Add the following line to porter.yaml to instruct Porter to use this template
# dockerfile: Dockerfile.tmpl

# You can control where the mixin's Dockerfile lines are inserted into this file by moving "# PORTER_MIXINS" line
# another location in this file. If you remove that line, the mixins generated content is appended to this file.
# PORTER_MIXINS

# Use the BUNDLE_DIR build argument to copy files into the bundle

COPY . $BUNDLE_DIR
19 changes: 19 additions & 0 deletions templates/workspace_services/ohdsi/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Introduction


```sh
make terraform-deploy DIR=./templates/workspace_services/ohdsi/
```

```sh
make bundle-build DIR=./templates/workspace_services/ohdsi/
```

```sh
make bundle-install DIR=./templates/workspace_services/ohdsi/
```

```sh
porter invoke [INSTALLATION] --action ACTION [flags]
porter invoke tre-service-ohdsi-installer --action docker_stuff --debug
```
38 changes: 38 additions & 0 deletions templates/workspace_services/ohdsi/broadsea/apps/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# OHDSI on Azure - Application Deployment

This guide will outline how to deploy OHDSI applications to your CDM on Azure.

To automate the application deployments, Azure DevOps pipelines are used. The pipelines will deploy the following applications:

- [WebAPI](https://github.com/OHDSI/WebAPI)
- [ATLAS](https://github.com/OHDSI/Atlas/)
- [ETL-Synthea](https://github.com/OHDSI/ETL-Synthea) (_optional_)
- [Achilles](https://github.com/OHDSI/Achilles)

## Prerequisites

- Azure resources spun up from Terraform (more information can be found in [Infrastructure Deployment](/infra/README.md))
- CDM Vocabulary uploaded to Storage Account
- Pipelines imported using YAML files

## Setup

The application deployment heavily relies on the use of docker images and Azure DevOps pipelines. There are two Docker images that are modified from OHDSI in order to be compatible with Azure: (1) [Broadsea WebTools Library](https://github.com/OHDSI/Broadsea-WebTools) (WebAPI + ATLAS) and (2) [Broadsea Methods Library](https://github.com/OHDSI/Broadsea-MethodsLibrary) (ETL-Synthea + Achilles).

You can also review the [setup Atlas / WebApi notes](/docs/setup/setup_atlas_webapi.md) and [setup Achilles / Synthea notes](/docs/setup/setup_achilles_synthea.md) for more details.

### 1. Broadsea Build and Push Pipeline (CI)

- This CI pipeline publishes the [WebAPI script](/sql/scripts/Web_Api_Refresh.sql) that will write sources to the `webapi` tables as a pipeline artifact.
- This CI pipeline also builds and pushes a custom OHDSI [Broadsea WebTools](/apps/broadsea-webtools/Dockerfile) Docker image to the environment container registry.

For more details, you can review the [broadsea build pipeline notes](/pipelines/README.md/#broadsea-build-pipeline)

### 2. Broadsea Release Pipeline (CD)

- The CD pipeline deploys the latest [Broadsea Webtools](/apps/broadsea-webtools/Dockerfile) Docker image to the App Service. This will download and install OHDSI WebAPI by creating the webapi schema in the CDM database.
- This pipeline serves to load synthetic test data to an OMOP CDM database.
- This pipeline leverages OHDSI's [ETL from Synthea](https://github.com/OHDSI/ETL-Synthea) project, which is an R library made to convert synthetic patient data from the [Synthea](https://github.com/synthetichealth/synthea) tool.
- This pipeline serves to run [Achilles](https://github.com/OHDSI/Achilles) characterization on a specific data set in the OMOP CDM, which should run each time new data is imported into the CDM.

For more details, you can review the [broadsea release pipeline notes](/pipelines/README.md/#broadsea-release-pipeline).
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# hadolint ignore=DL3006
FROM rocker/tidyverse

# hadolint ignore=DL3008,DL3009,DL3015
RUN apt-get update && apt-get install -y python-dev openjdk-8-jdk liblzma-dev libbz2-dev \
&& R CMD javareconf

## Install Rserve
RUN install2.r \
Rserve \
RSclient \
openssl \
httr \
xml2 \
remotes \
&& rm -rf /tmp/download_packages/ /tmp/*.rds

## TODO - Download and unzip jdbc drivers and delete after COPY step.
# https://docs.microsoft.com/en-us/sql/connect/jdbc/download-microsoft-jdbc-driver-for-sql-server?view=sql-server-ver15

## Copy JDBC Drivers
COPY jdbc_drivers /home/jdbc_drivers

## Install OHDSI R packages
RUN installGithub.r \
OHDSI/Achilles \
OHDSI/ETL-Synthea \
&& rm -rf /tmp/downloaded_packages/ /tmp/*.rds

CMD ["Rscript"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# Broadsea-Methods

This is a combined image for [Achilles](https://github.com/OHDSI/Achilles) and [ETL-Synthea](https://github.com/OHDSI/ETL-Synthea).

You can review how [Achilles](#achilles) and [ETL-Synthea](#synthea-etl) are used in this project.

## Achilles

[Achilles](https://github.com/OHDSI/Achilles) is an R package used to automate the characterization of OMOP CDM data. It provides descriptive statistics and data quality checks on the OMOP CDM databases. Achilles will generally only succeed if there is data that exists in the OMOP CDM, so it is important to run this step after data has been imported.

For convenience, you may want to understand more about the [achilles.R script](#script-notes) and the [Achilles-test.R script](#achilles-testr).

### Prerequisites

In order to set up Achilles, you can work through the following steps:

## Step 1. Import Data

As a prerequisite to running Achilles, you will need to have OMOP CDM data. You can use one of the following approaches to accomplish this step:

1. For development and test purposes, you can import data to your OMOP CDM using the [broadsea_release_pipeline](/pipelines/broadsea_release_pipeline.yaml) which includes a step to run [synthea-etl](/pipelines/README.md/#broadsea-release-pipeline).

## Step 2. Build broadsea-methods Image

1. You can build and push the broadsea-methods (which includes achilles and ETL-Synthea) image to Azure Container Registry using the [broadsea_build_pipeline](/pipelines/broadsea_build_pipeline.yaml). You can refer to these [Pipeline Notes](/pipelines/README.md/#broadsea-build-pipeline) for more details.

### Script Notes

The following scripts will be mounted as part of the [broadsea_release_pipeline](/pipelines/broadsea_release_pipeline.yaml).

## Achilles.R

The [achilles.R script](/apps/broadsea-methods/achilles.R) will be loaded with the Docker container. This script will connect to the OMOP CDM and perform the following steps:

1. Set database [compatibility level](https://docs.microsoft.com/en-us/sql/t-sql/statements/alter-database-transact-sql-compatibility-level?view=sql-server-ver15) to a lower level for running `achilles`

```sql
ALTER DATABASE [my_sql_database_name] SET compatibility_level = 110
```

> This step is included in the script as a workaround for the issue where ['the query processor ran out of internal resources and could not produce a query plan'](/docs/troubleshooting/troubleshooting_achilles_synthea.md#the-query-processor-ran-out-of-internal-resources-and-could-not-produce-a-query-plan).
By setting the compatibility level to 110, Azure SQL will take the default compatibility level associated with SQL Server 2012, which will cause Azure SQL to use an [older query optimizer](https://docs.microsoft.com/en-us/sql/t-sql/statements/alter-database-transact-sql-compatibility-level?view=sql-server-ver15#differences-between-lower-compatibility-levels-and-level-120) to produce the query plan. Using this setting has a tradeoff which is Azure SQL will not be able to run SQL queries which require the default compatibility level for Azure SQL.
2. Run [achilles](https://raw.githubusercontent.com/OHDSI/Achilles/master/extras/Achilles.pdf)
> You may run into an known issue with an [arithmetic overflow error](/docs/troubleshooting/troubleshooting_achilles_synthea.md/#arithmetic-overflow-error-converting-numeric-to-data-type-varchar). You will need to ensure you're picking up the latest changes for Achilles by [rebuilding Achilles](#step-2-build-achilles-synthea-etl-image) to pick up the [Achilles committed](https://github.com/OHDSI/Achilles/commit/e21c7e16cb4cbd653e3e572db86b536cdda86aca) fix.
3. Set database compatibility level back to the default for Azure SQL

```sql
ALTER DATABASE [my_sql_database_name] SET compatibility_level = 150
```

> If Azure SQL is set to a compatibility_level lower than the default you will notice issues when running queries against Azure SQL which may not be available in the lower compatibility levels as part of [Prerequisite step 1](#step-1-import-data).

This script also uses the following environment variables:

| Environment Variable Name | Description |
|--------------|-----------|
| SQL_SERVER_NAME | Azure SQL Server Name (e.g. `my-sql-server` if you using `my-sql-server.database.windows.net`) |
| SQL_DATABASE_NAME | Azure SQL Database Name (e.g. `my-sql-server-db`) which has the CDM |
| CDM_SCHEMA | Schema for CDM (e.g. `dbo`) |
| CDM_VERSION | CDM Version (e.g. for `5.3.1` use `5.3`, see [Achilles Validation](https://github.com/OHDSI/Achilles/blob/c6b7adb6330e75c2311880db2eb3dc4c12341c4f/inst/sql/sql_server/validate_schema.sql#L501)) |
| RESULTS_SCHEMA | Schema for Results used by Achilles (e.g. `webapi`) |
| VOCAB_SCHEMA | Schema for Vocabulary (e.g. `dbo`) |
| SOURCE_NAME | CDM source name, the default is `OHDSI CDM V5 Database` |
| NUM_THREADS | Number of threads to use with Achilles, the default is `1` |

### Achilles-test.R

The [achilles-test.R script](/apps/broadsea-methods/achilles-test.R) will perform a smoke test which checks if the `achilles_results` and `achilles_analysis` tables are populated.

This script also uses the following environment variables:

| Environment Variable Name | Description |
|--------------|-----------|
| SQL_SERVER_NAME | Azure SQL Server Name (e.g. `my-sql-server` if you using `my-sql-server.database.windows.net`) |
| SQL_DATABASE_NAME | Azure SQL Database Name (e.g. `my-sql-server-db`) which has the CDM |

## Synthea-ETL

This [directory](/apps/broadsea-methods/) contains exploratory work for generating and loading synthetic patient data via scripts found here: [https://github.com/OHDSI/ETL-Synthea](https://github.com/OHDSI/ETL-Synthea).

For convenience, you may want to learn more about the [synthea-etl.R script](#synthea-etlr-notes), the [synthea-etl-test.R script](#synthea-etl-testr-notes), and other [local development notes](#running-r-example-packages-via-dockerfile).

### Synthea-etl.R Notes

This [script](/apps/broadsea-methods/synthea-etl.R) will perform an ETL to transfer synthea generated data into your Azure SQL CDM.

This script uses the following environment variables:

| Environment Variable Name | Description |
|--------------|-----------|
| SQL_SERVER_NAME | Azure SQL Server Name (e.g. `my-sql-server` if you using `my-sql-server.database.windows.net`) |
| SQL_DATABASE_NAME | Azure SQL Database Name (e.g. `my-sql-server-db`) which has the CDM |
| CDM_SCHEMA | Schema for CDM (e.g. `dbo`) |
| CDM_VERSION | CDM Version (e.g. for `5.3.1` use `5.3`, see [Synthea Validation](https://github.com/OHDSI/ETL-Synthea/blob/master/R/CreateVocabMapTables.r#L25)) |
| SYNTHEA_SCHEMA | Schema for Synthea (e.g. `synthea`) |
| SYNTHEA_VERSION | Synthea Version (e.g. `2.7.0`) |
| SYNTHEA_PATH | Synthea Directory for the synthea CSV data (e.g. `/home/docker/synthea_data/csv/`) |
| VOCAB_PATH | Vocabulary files path, e.g. `/home/docker/vocab_files` |

### Synthea-etl-test.R Notes

This [script](/apps/broadsea-methods/synthea-etl-test.R) is a smoke test to validate that the ETL from [Synthea](#synthea-etlr-notes) ran successfully in your Azure SQL CDM.

This script uses the following environment variables:

| Environment Variable Name | Description |
|--------------|-----------|
| SQL_SERVER_NAME | Azure SQL Server Name (e.g. `my-sql-server` if you using `my-sql-server.database.windows.net`) |
| SQL_DATABASE_NAME | Azure SQL Database Name (e.g. `my-sql-server-db`) which has the CDM |

### Running R example packages via Dockerfile

You may find it helpful to review some of the local development notes (that have since been incorporated into the [broadsea_release_pipeline](/pipelines/README.md/#broadsea-release-pipeline) for environment release) for debugging or future development purposes:

1. How to use Synthea to [generate synthetic patient data](#use-synthea-to-generate-synthetic-patient-data)
2. How to [build and use the Docker Container with R Dependencies](#build-and-use-the-docker-container-with-r-dependencies)
3. Review other helpful [exploration notes](#exploration-notes)

#### Use Synthea to generate synthetic patient data

1. You can use Synthea to generate synthetic patient data (which is incorporated into the [broadsea_release_pipeline](/pipelines/README.md/#broadsea-release-pipeline))

Generate Synthea files via release jar (Synthea v2.7.0)

```sh
# From this /apps/broadsea-methods directory
wget https://github.com/synthetichealth/synthea/releases/download/v2.7.0/synthea-with-dependencies.jar

SAMPLE_SIZE=10 # Will generate 10 live patients, possibly extra dead patients as well.

java -jar synthea-with-dependencies.jar -p $SAMPLE_SIZE -c synthea-settings.conf
```

[Other generation seeds and configurations can be specified as well.](https://github.com/synthetichealth/synthea#generate-synthetic-patients)

2. You can also test the vocabulary files locally if they're unzipped and saved to `/vocab_files`

> OPTIONAL - Download Vocabulary files from [ATHENA](https://athena.ohdsi.org/vocabulary/list)

```sh
# From this /apps/broadsea-methods directory
# Downloading default selected vocabulary from Athena and unzipped to: ./vocab_files/
```

- Given that the [broadsea_release_pipeline](/pipelines/README.md/#broadsea-release-pipeline) assumes the vocabulary exists in the target Azure SQL CDM, you can utilize this approach for **local development** purposes.

#### Build and use the Docker Container with R Dependencies

1. You can use the following command to build Docker container with R dependencies

```sh
# From this /apps/broadsea-methods directory
docker build -t achilles-synthea-etl .
```

2. You can upload to the Azure SQL DB via R Script running in Docker Container

```sh
# From this /apps/broadsea-methods directory

export SQL_SERVER_NAME='omop-sql-server'
export SQL_DATABASE_NAME='synthea830'

# database schema used for connecting to the CDM.
export CDM_SCHEMA='cdm'
export CDM_VERSION='5.3'
export SYNTHEA_SCHEMA='synthea'
export SYNTHEA_VERSION='2.7.0'

# Location of the synthea output CSV files.
export SYNTHEA_PATH='/home/docker/synthea_data/csv/'

# TODO: Remove when no longer needed.
export VOCAB_PATH='/home/docker/vocab_files'
export CREATE_CDM_SCHEMA='true'

# Run with volume mount and env vars as parameters
docker run -t --rm -v "$PWD":/home/docker -w /home/docker \
-e OMOP_USER -e OMOP_PASS -e SQL_SERVER_NAME -e SQL_DATABASE_NAME \
-e CDM_SCHEMA -e CDM_VERSION -e SYNTHEA_SCHEMA -e SYNTHEA_VERSION \
-e SYNTHEA_PATH -e VOCAB_PATH -e CREATE_CDM_SCHEMA synthea-etl Rscript synthea-etl.R
```

#### Exploration Notes

- [Sql Server DatabaseConnector - prefix schema with database name.](https://forums.ohdsi.org/t/how-to-use-databaseconnector-createconnectiondetails-for-sql-server-to-connect-to-the-right-database/12725)
- [Learnings from using the Synthea data generator for use with ETL-Synthea](https://github.com/OHDSI/ETL-Synthea/issues/45)
- [Fixes required to load Synthea Tables](https://github.com/OHDSI/ETL-Synthea/commit/af15bc1f42097fb08b2291066daf399ed2b68fa1)
Loading