diff --git a/.github/workflows/docker_build_test_push.yml b/.github/workflows/docker_build_test_push.yml index 829879c95b..a9127b2a53 100644 --- a/.github/workflows/docker_build_test_push.yml +++ b/.github/workflows/docker_build_test_push.yml @@ -10,6 +10,32 @@ on: branches: - master +env: + # - change per env + TF_VAR_project_name: fffeastcd + # - optionally change per env + TERRAFORM_BACKEND_STORAGE_ACCOUNT_RESOURCE_GROUP_NAME: fffeasttf + # - change per env + # - manually crate this storage account in the + # resource group TERRAFORM_BACKEND_STORAGE_ACCOUNT_RESOURCE_GROUP_NAME + TERRAFORM_BACKEND_STORAGE_ACCOUNT_NAME: fffeastcdtf + # - manually create this container in the created storage account + TERRAFORM_BACKEND_CONTAINER_NAME: terraform + + # no need to change those + + # Base name for images published into ACR (with -core, -serving suffix) + CONTAINERREGISTRY_IMAGENAMEBASE: fffeast + + # Blob names used to store state in Terraform backend + TERRAFORM_BACKEND_BLOB_PATH_INFRA: infra + TERRAFORM_BACKEND_BLOB_PATH_APP: app + + # Name of end-to-end test pod + E2E_TEST_POD: e2e-test + + DOCKER_COMPOSE: docker-compose -f infra/docker-compose/docker-compose.yml -f infra/docker-compose/docker-compose.online.yml -f infra/docker-compose/docker-compose.databricks.yml + jobs: build-and-test: @@ -25,6 +51,48 @@ jobs: # Checkout sources from Git - uses: actions/checkout@v2 + - name: Setup Terraform + uses: hashicorp/setup-terraform@v1 + with: + terraform_version: 0.12.24 + terraform_wrapper: false + + - name: Set Terraform variables infra + run: | + echo ::set-env name=ARM_CLIENT_ID::${{ secrets.ARM_CLIENT_ID }} + echo ::set-env name=ARM_CLIENT_SECRET::${{ secrets.ARM_CLIENT_SECRET }} + echo ::set-env name=ARM_SUBSCRIPTION_ID::${{ secrets.ARM_SUBSCRIPTION_ID }} + echo ::set-env name=ARM_TENANT_ID::${{ secrets.ARM_TENANT_ID }} + + - name: Terraform init infra + # cache plugin locally to avoid re-downloading it at every run + run: TF_PLUGIN_CACHE_DIR=$HOME/.terraform.d/plugin-cache terraform init -input=false -backend-config=container_name="$TERRAFORM_BACKEND_CONTAINER_NAME" -backend-config=key="$TERRAFORM_BACKEND_BLOB_PATH_INFRA" -backend-config=storage_account_name="$TERRAFORM_BACKEND_STORAGE_ACCOUNT_NAME" -backend-config=resource_group_name="$TERRAFORM_BACKEND_STORAGE_ACCOUNT_RESOURCE_GROUP_NAME" + working-directory: infra/terraform/infra + + - name: Terraform plan infra + run: terraform plan -out=tfplan -input=false + working-directory: infra/terraform/infra + + - name: Terraform apply infra + run: terraform apply -input=false -auto-approve tfplan + working-directory: infra/terraform/infra + + - name: Terraform outputs infra + run: | + # Set a TF_VAR_ environment variable for each terraform output. + # Mask any secret terraform output. + terraform output -json | jq -r ' + . as $in + | keys[] + | ($in[.].value | tostring | gsub("\\\\"; "\\\\") | gsub("\n"; "\\n")) as $value + | ($in[.].sensitive | tostring) as $sensitive + | [ + if $in[.].sensitive then "::add-mask::" + $value else "" end, # mask value in console output + "::set-env name=TF_VAR_" + . + "::" + $value # set value as environment variable + ] + | .[]' + working-directory: infra/terraform/infra + # Cache the Maven repository across runs. - name: Cache Maven repository id: cache-maven @@ -43,36 +111,146 @@ jobs: if: steps.cache-maven.outputs.cache-hit != 'true' run: docker run --rm --user $(id -u):$(id -g) -e MAVEN_CONFIG=/build/.m2 -v $PWD:/build maven:3.6-jdk-11 bash -c "cp -R /build /tmp && mvn -f /tmp/build -Dmaven.repo.local=/build/.m2/repository -Dgpg.skip=true -B clean verify -DskipTests" + - name: Set image name and version settings for docker-compose + run: | + echo "::set-env name=COMPOSE_PROJECT_NAME::feast" + echo "::set-env name=FEAST_VERSION::v${{ github.sha }}" + echo "::set-env name=FEAST_CORE_IMAGE::$TF_VAR_acr_login_server/$CONTAINERREGISTRY_IMAGENAMEBASE-core" + echo "::set-env name=FEAST_SERVING_IMAGE::$TF_VAR_acr_login_server/$CONTAINERREGISTRY_IMAGENAMEBASE-serving" + echo "::set-env name=FEAST_JUPYTER_IMAGE::$TF_VAR_acr_login_server/$CONTAINERREGISTRY_IMAGENAMEBASE-jupyter" + echo "::set-env name=FEAST_DATABRICKS_EMULATOR_IMAGE::databricks-emulator-dev" + # Build images in parallel. - name: Build Feast runtime images - run: docker-compose -f infra/docker-compose/docker-compose.yml -f infra/docker-compose/docker-compose.online.yml build --parallel core online-serving jupyter - env: - FEAST_VERSION: v${{ github.sha }} - FEAST_CORE_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-core - FEAST_SERVING_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-serving - FEAST_JUPYTER_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-jupyter - - - name: test docker compose - run: ./infra/scripts/test-docker-compose-databricks.sh - env: - COMPOSE_PROJECT_NAME: feast - FEAST_VERSION: v${{ github.sha }} - FEAST_CORE_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-core - FEAST_SERVING_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-serving - FEAST_JUPYTER_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-jupyter + run: $DOCKER_COMPOSE build --parallel core online-serving jupyter databricks-emulator # Login to Azure Container Registry. - name: Login to Azure Container Registry uses: azure/docker-login@v1 with: - login-server: ${{ secrets.CONTAINERREGISTRY_URL }} - username: ${{ secrets.CONTAINERREGISTRY_USERNAME }} - password: ${{ secrets.CONTAINERREGISTRY_PASSWORD }} + login-server: ${{ env.TF_VAR_acr_login_server }} + username: ${{ env.TF_VAR_acr_admin_username }} + password: ${{ env.TF_VAR_acr_admin_password }} # Promote successfully tested container to Azure Container Registry. - name: Push image to ACR + run: $DOCKER_COMPOSE push core online-serving jupyter + + # Install python to enable Databricks CLI install (for workaround for + # databricks provider DBFS bug) + - uses: actions/setup-python@v2 + with: + python-version: '2.x' + + - name: Upload Spark JARs run: | - docker-compose -f infra/docker-compose/docker-compose.yml push core + set -ex + docker rm spark-jobs || true + docker create --name spark-jobs ${FEAST_DATABRICKS_EMULATOR_IMAGE}:${FEAST_VERSION} + mkdir -p sparkjars + docker cp spark-jobs:/opt/sparkjars sparkjars + docker rm spark-jobs + echo "::set-env name=TF_VAR_spark_job_jars::$PWD/sparkjars" + + - name: Set kubeconfig + run: | + base64 -d <<<"$TF_VAR_kube_config" > kube_config + echo "::set-env name=KUBECONFIG::$PWD/kube_config" + + - name: Install Terraform databricks provider + # TODO: should simply run this command: + # curl https://raw.githubusercontent.com/databrickslabs/databricks-terraform/master/godownloader-databricks-provider.sh | bash -xs -- -b $HOME/.terraform.d/plugins + # but blocked by issues: + # - https://github.com/databrickslabs/terraform-provider-databricks/issues/128 + # - https://github.com/databrickslabs/terraform-provider-databricks/issues/123 + run: | + PLUGINS=$HOME/.terraform.d/plugins + test -s $PLUGINS/terraform-provider-databricks_v0.2.0 && exit + wget -O databricks-provider.tgz https://github.com/databrickslabs/terraform-provider-databricks/releases/download/v0.2.0/databricks-terraform_0.2.0_Linux_64-bit.tar.gz + mkdir -p $PLUGINS + tar -C $PLUGINS -zxvf databricks-provider.tgz terraform-provider-databricks_v0.2.0 + + - name: Set Terraform variables app + run: | + echo "::set-env name=TF_VAR_feast_core_image_repository::$FEAST_CORE_IMAGE" + echo "::set-env name=TF_VAR_feast_serving_image_repository::$FEAST_SERVING_IMAGE" + echo "::set-env name=TF_VAR_feast_version::$FEAST_VERSION" + echo "::set-env name=TF_VAR_run_number::$GITHUB_RUN_NUMBER" + + - name: Terraform init app + # cache plugin locally to avoid re-downloading it at every run + run: TF_PLUGIN_CACHE_DIR=$HOME/.terraform.d/plugin-cache terraform init -input=false -backend-config=container_name="$TERRAFORM_BACKEND_CONTAINER_NAME" -backend-config=key="$TERRAFORM_BACKEND_BLOB_PATH_APP" -backend-config=storage_account_name="$TERRAFORM_BACKEND_STORAGE_ACCOUNT_NAME" -backend-config=resource_group_name="$TERRAFORM_BACKEND_STORAGE_ACCOUNT_RESOURCE_GROUP_NAME" + working-directory: infra/terraform/app + + - name: Terraform destroy app + run: terraform destroy -input=false -auto-approve + working-directory: infra/terraform/app + + - name: Terraform plan app + run: terraform plan -out=tfplan -input=false + working-directory: infra/terraform/app + + - name: Terraform apply app + run: terraform apply -input=false -auto-approve tfplan + working-directory: infra/terraform/app env: - FEAST_VERSION: v${{ github.sha }} - FEAST_CORE_IMAGE: ${{ secrets.CONTAINERREGISTRY_URL }}/${{ secrets.CONTAINERREGISTRY_IMAGENAMEBASE }}-core + # capture debugging info, see https://www.terraform.io/docs/internals/debugging.html + TF_LOG: debug + + - name: Cancel any active Databricks runs + run: | + set -euo pipefail + export DATABRICKS_HOST="$TF_VAR_databricks_workspace_url" + export DATABRICKS_TOKEN=$(terraform output databricks_token) + set -x + pip install databricks-cli + databricks runs list --active-only | cut -f1 -d ' ' | xargs -tr -n1 databricks runs cancel --run-id + working-directory: infra/terraform/app + + - uses: azure/setup-kubectl@v1 + + - name: Prepare e2e test pod and clear Redis data + run: | + set -ex + # Start e2e test pod + kubectl delete pod "$E2E_TEST_POD" || true + kubectl run "$E2E_TEST_POD" --restart=Never --image="$FEAST_JUPYTER_IMAGE:$FEAST_VERSION" --limits 'memory=4Gi' + + # While the test pod is starting, clear all data from Redis + kubectl run -it --rm --restart=Never --image=redis redisflush$GITHUB_RUN_NUMBER --command -- redis-cli -h $TF_VAR_redis_hostname FLUSHALL + + kubectl wait --timeout 45m --for=condition=ContainersReady pod "$E2E_TEST_POD" + + - name: Run e2e tests for Redis + run: | + set -ex + kubectl exec "$E2E_TEST_POD" -- bash -c " + cd /feast/tests/e2e + pytest -x -rA -s basic-ingest-redis-serving.py --core_url feast-core:6565 --serving_url=feast-serving-feast-online-serving:6566 + " + + - name: Run ingestion tests for FF Data Science scenarios + run: | + set -ex + kubectl exec "$E2E_TEST_POD" -- bash -c " + cd /feast/tests/ds_scenarios + pytest -x -rA -s test-ingest.py --core_url feast-core:6565 --serving_url=feast-serving-feast-online-serving:6566 + " + + - name: Delete e2e test pod + run: kubectl delete pod "$E2E_TEST_POD" + + - name: Output logs + if: ${{ always() }} + run: | + set +e + set -x + kubectl get event + kubectl logs svc/feast-core + kubectl logs svc/feast-serving-feast-online-serving + # Get number of Redis keys + kubectl run -it --rm --restart=Never --image=redis redisflush$GITHUB_RUN_NUMBER --command -- redis-cli -h $TF_VAR_redis_hostname DBSIZE + # Get consumer group offsets + kubectl exec -it svc/feast-services-kafka -- bash -c 'KCG="kafka-consumer-groups --bootstrap-server localhost:9092"; $KCG --list | xargs -rt -n1 $KCG --describe --group' + # Get Kafka offsets + kubectl exec -it svc/feast-services-kafka -- bash -c 'kafka-run-class kafka.tools.GetOffsetShell --broker-list localhost:9092 --topic feast-features' diff --git a/.gitignore b/.gitignore index 30916d43dc..f06ac1c349 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,8 @@ vendor .terraform/ *.tfvars +*.tfstate +*.tfstate.backup # python # Byte-compiled / optimized / DLL files diff --git a/core/src/main/java/feast/core/job/databricks/DatabricksJobManager.java b/core/src/main/java/feast/core/job/databricks/DatabricksJobManager.java index a2a1e79794..1bd73175d9 100644 --- a/core/src/main/java/feast/core/job/databricks/DatabricksJobManager.java +++ b/core/src/main/java/feast/core/job/databricks/DatabricksJobManager.java @@ -292,9 +292,6 @@ private RunNowRequest getRunNowRequest(long databricksJobId) { } private JobsCreateRequest getJobRequest(String jobName, List params) { - Arrays.stream(newClusterConfigOptions.getSparkConf().strip().split("\n")); - Arrays.stream(newClusterConfigOptions.getSparkConf().strip().split("\n")) - .map(s -> s.strip().split("\\s+", 2)); Map sparkConf = Arrays.stream(newClusterConfigOptions.getSparkConf().strip().split("\n")) .map(s -> s.strip().split("\\s+", 2)) diff --git a/infra/charts/feast/charts/feast-serving/templates/configmap-store.yaml b/infra/charts/feast/charts/feast-serving/templates/configmap-store.yaml deleted file mode 100644 index 2f8d81bee6..0000000000 --- a/infra/charts/feast/charts/feast-serving/templates/configmap-store.yaml +++ /dev/null @@ -1,30 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ template "feast-serving.fullname" . }}-store - namespace: {{ .Release.Namespace }} - labels: - app: {{ template "feast-serving.name" . }} - component: serving - chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - store.yaml: | - {{- $store := index .Values "store.yaml"}} - - {{- if and .Values.redis.enabled (eq $store.type "REDIS") }} - - {{- if eq .Values.redis.master.service.type "ClusterIP" }} - {{- $newConfig := dict "redis_config" (dict "host" (printf "%s-redis-headless" .Release.Name) "port" .Values.redis.redisPort) }} - {{- $config := mergeOverwrite $store $newConfig }} - {{- end }} - - {{- if and (eq .Values.redis.master.service.type "LoadBalancer") (not (empty .Values.redis.master.service.loadBalancerIP)) }} - {{- $newConfig := dict "redis_config" (dict "host" .Values.redis.master.service.loadBalancerIP "port" .Values.redis.redisPort) }} - {{- $config := mergeOverwrite $store $newConfig }} - {{- end }} - - {{- end }} - - {{- toYaml $store | nindent 4 }} diff --git a/infra/docker-compose/core/databricks.yml b/infra/docker-compose/core/databricks.yml index 6b2d569f49..9d2fd28053 100644 --- a/infra/docker-compose/core/databricks.yml +++ b/infra/docker-compose/core/databricks.yml @@ -9,7 +9,7 @@ feast: options: host: http://databricks-emulator:8080 token: unused - jarFile: /opt/spark-ingestion-job.jar + jarFile: /opt/sparkjars/spark-ingestion-job.jar maxRetries: 0 timeoutSeconds: -1 newCluster: diff --git a/infra/docker/databricks-emulator/Dockerfile b/infra/docker/databricks-emulator/Dockerfile index 6e19bf327d..5365ecf3dc 100644 --- a/infra/docker/databricks-emulator/Dockerfile +++ b/infra/docker/databricks-emulator/Dockerfile @@ -62,7 +62,7 @@ ENV SPARK_HOME /spark COPY --from=builder /spark /spark COPY --from=builder /build/spark/databricks-emulator/target/databricks-emulator-$REVISION.jar /opt/databricks-emulator.jar -COPY --from=builder /build/spark/spark-ingestion-job/target/spark-ingestion-job-$REVISION.jar /opt/spark-ingestion-job.jar +COPY --from=builder /build/spark/spark-ingestion-job/target/spark-ingestion-job-$REVISION.jar /opt/sparkjars/spark-ingestion-job.jar CMD ["java",\ "-Xms2048m",\ "-Xmx2048m",\ diff --git a/infra/docker/jupyter/Dockerfile b/infra/docker/jupyter/Dockerfile index 39f191be42..eb4ca763e5 100644 --- a/infra/docker/jupyter/Dockerfile +++ b/infra/docker/jupyter/Dockerfile @@ -10,6 +10,7 @@ RUN git init . COPY sdk/python sdk/python COPY Makefile Makefile COPY protos protos +COPY tests tests COPY README.md README.md # Install Python dependencies diff --git a/infra/terraform/app/backend.tf b/infra/terraform/app/backend.tf new file mode 100644 index 0000000000..1e7dc70373 --- /dev/null +++ b/infra/terraform/app/backend.tf @@ -0,0 +1,4 @@ +#Set the terraform backend +terraform { + backend "azurerm" {} +} diff --git a/infra/terraform/app/main.tf b/infra/terraform/app/main.tf new file mode 100644 index 0000000000..cb5f47a507 --- /dev/null +++ b/infra/terraform/app/main.tf @@ -0,0 +1,291 @@ +data "azurerm_storage_account" "datalake" { + name = var.datalake_name + resource_group_name = var.datalake_resource_group_name +} + +data "azurerm_postgresql_server" "postgres" { + name = var.postgresql_name + resource_group_name = var.postgresql_resource_group_name +} + +locals { + databricks_secret_scope = "feast" + databricks_secret_datalake_key = "azure_account_key" + databricks_dbfs_jar_folder = "dbfs:/feast/run${var.run_number}" + databricks_spark_version = "6.6.x-scala2.11" +} + +resource "azurerm_postgresql_database" "feast" { + name = "feast" + resource_group_name = var.postgresql_resource_group_name + server_name = var.postgresql_name + charset = "UTF8" + collation = "English_United States.1252" +} + +resource "databricks_token" "feast" { + lifetime_seconds = 315569520 # ten years + comment = "Token used by CI/CD pipeline" +} + +resource "databricks_secret_scope" "feast" { + name = local.databricks_secret_scope + initial_manage_principal = "users" +} + +resource "databricks_secret" "azure_account_key" { + key = local.databricks_secret_datalake_key + string_value = data.azurerm_storage_account.datalake.primary_access_key + scope = databricks_secret_scope.feast.name +} + +resource "null_resource" "dbfs-ingestion" { + triggers = { + dbfs_jar_folder = local.databricks_dbfs_jar_folder + } + provisioner "local-exec" { + command = <