System Tests Enterprise #1647
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2023 Iguazio | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
name: System Tests Enterprise | |
on: | |
push: | |
branches: | |
- '.+-system-tests' | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
# Run the system tests every 8 hours (cron hours should divide 24 equally, otherwise there will be an overlap at the end of the day) | |
- cron: '0 0 * * *' | |
workflow_dispatch: | |
inputs: | |
docker_registry: | |
description: 'Docker registry to pull images from (default: ghcr.io/, use registry.hub.docker.com/ for docker hub)' | |
required: true | |
default: 'ghcr.io/' | |
clean_resources_in_teardown: | |
description: 'Clean resources created by test (like project) in each test teardown (default: true - perform clean)' | |
required: true | |
default: 'true' | |
type: choice | |
options: | |
- 'true' | |
- 'false' | |
override_iguazio_version: | |
description: 'Override the configured target system iguazio version (leave empty to resolve automatically)' | |
required: false | |
concurrency: one-at-a-time | |
jobs: | |
system-test-cleanup: | |
name: System Test Cleanup | |
runs-on: [ self-hosted, Linux ] | |
container: | |
image: ubuntu:latest | |
timeout-minutes: 10 | |
# let's not run this on every fork, change to your fork when developing | |
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch' | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
apt-get update -qqy && apt-get install -y sshpass | |
- name: cleanup docker images from registries | |
# SSH to datanode and delete all docker images created by the system tests by restarting the docker-registry | |
# deployment and the datanode docker-registry | |
run: | | |
sshpass \ | |
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
ssh \ | |
-o StrictHostKeyChecking=no \ | |
-o ServerAliveInterval=180 \ | |
-o ServerAliveCountMax=3 \ | |
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }} \ | |
kubectl -n default-tenant rollout restart deployment docker-registry | |
sshpass \ | |
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
scp \ | |
automation/system_test/cleanup.py \ | |
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/cleanup.py | |
sshpass \ | |
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
scp \ | |
automation/system_test/dev_utilities.py \ | |
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/dev_utilities.py | |
prepare-system-tests-enterprise-ci: | |
# When increasing the timeout make sure it's not larger than the schedule cron interval | |
timeout-minutes: 55 | |
name: Prepare System Tests Enterprise | |
runs-on: [ self-hosted, Linux ] | |
container: | |
image: python:3.9 | |
needs: [system-test-cleanup] | |
# let's not run this on every fork, change to your fork when developing | |
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch' | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: | | |
apt-get update -qqy && apt-get install -y sshpass jq curl gnupg nodejs | |
- uses: actions/setup-node@v4 | |
with: | |
node-version: 18 | |
- name: Copy state branch file from remote | |
run: | | |
sshpass \ | |
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
scp \ | |
-o StrictHostKeyChecking=no \ | |
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/system-tests-branches-list.txt \ | |
system-tests-branches-list.txt | |
- name: Resolve Branch To Run System Tests | |
id: current-branch | |
shell: bash | |
# we store a file named /tmp/system-tests-branches-list.txt which contains a list of branches to run system tests | |
# on the branches are separated with commas, so each run we pop the first branch in the list and append it to the | |
# end of the list. | |
# This mechanism allows us to run on multiple branches without the need to modify the file or secrets each time | |
# a new branch is added or removed | |
run: | | |
# Read branches from local file | |
branches=$(cat system-tests-branches-list.txt) | |
echo "branches found in system-tests-branches-list.txt: $branches" | |
# Split branches into an array | |
IFS=',' read -ra branches_array <<< "$branches" | |
# Get the first branch in the list to work on | |
first_branch="${branches_array[0]}" | |
echo "working on $first_branch" | |
# Remove the first branch from the list | |
branches_array=("${branches_array[@]:1}") | |
# Add the first branch at the end of the list | |
branches_array+=("$first_branch") | |
# Join branches back into a string | |
branches=$(printf ",%s" "${branches_array[@]}") | |
branches=${branches:1} | |
# Output the new list of branches | |
echo "$branches" | |
# Write new branches order to a local file | |
echo "$branches" | cat > system-tests-branches-list.txt | |
# Set output | |
echo "name=$(echo $first_branch)" >> $GITHUB_OUTPUT | |
- name: Override remote file from local resolved branch list | |
run: | | |
# Override the remote file with the new list of branches | |
sshpass \ | |
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
scp \ | |
-o StrictHostKeyChecking=no system-tests-branches-list.txt \ | |
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/ | |
# checking out to base branch and not the target(resolved) branch, to be able to run the changed preparation code | |
# before merging the changes to upstream. | |
- name: Checkout base branch | |
uses: actions/checkout@v4 | |
- name: Install automation scripts dependencies | |
run: | | |
python -m pip install -r automation/requirements.txt | |
- name: Extract git hashes from upstream and latest version | |
id: git_upstream_info | |
run: | | |
# Get the latest commit of mlrun/mlrun (that is older than 1 hour) | |
echo "mlrun_hash=$( \ | |
cd /tmp && \ | |
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/mlrun.git mlrun-upstream 2> /dev/null && \ | |
cd mlrun-upstream && \ | |
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit --abbrev=8 HEAD && \ | |
cd .. && \ | |
rm -rf mlrun-upstream)" >> $GITHUB_OUTPUT | |
# Get the latest commit of mlrun/ui (that is older than 1 hour) | |
echo "ui_hash=$( \ | |
cd /tmp && \ | |
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/ui.git mlrun-ui 2> /dev/null && \ | |
cd mlrun-ui && \ | |
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit --abbrev=8 HEAD && \ | |
cd .. && \ | |
rm -rf mlrun-ui)" >> $GITHUB_OUTPUT | |
# Get the tested mlrun version | |
echo "unstable_version_prefix=$( \ | |
curl https://raw.githubusercontent.com/mlrun/mlrun/${{ steps.current-branch.outputs.name }}/automation/version/unstable_version_prefix \ | |
)" >> $GITHUB_OUTPUT | |
- name: Set computed versions params | |
id: computed_params | |
run: | | |
action_mlrun_hash=${{ steps.git_action_info.outputs.mlrun_hash }} && \ | |
upstream_mlrun_hash=${{ steps.git_upstream_info.outputs.mlrun_hash }} && \ | |
export mlrun_hash=${upstream_mlrun_hash:-`echo $action_mlrun_hash`} | |
echo "mlrun_hash=$(echo $mlrun_hash)" >> $GITHUB_OUTPUT | |
action_mlrun_ui_hash=${{ steps.git_action_ui_info.outputs.ui_hash }} && \ | |
upstream_mlrun_ui_hash=${{ steps.git_upstream_info.outputs.ui_hash }} && \ | |
export ui_hash=${upstream_mlrun_ui_hash:-`echo $action_mlrun_ui_hash`} | |
echo "ui_hash=$(echo $ui_hash)" >> $GITHUB_OUTPUT | |
echo "mlrun_version=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}+$mlrun_hash)" >> $GITHUB_OUTPUT | |
echo "mlrun_docker_tag=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$mlrun_hash)" >> $GITHUB_OUTPUT | |
echo "mlrun_ui_version=${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$ui_hash" >> $GITHUB_OUTPUT | |
echo "mlrun_docker_registry=$( \ | |
input_docker_registry=$INPUT_DOCKER_REGISTRY && \ | |
echo ${input_docker_registry:-ghcr.io/})" >> $GITHUB_OUTPUT | |
echo "mlrun_system_tests_clean_resources=$( \ | |
input_system_tests_clean_resources=$INPUT_CLEAN_RESOURCES_IN_TEARDOWN && \ | |
echo ${input_system_tests_clean_resources:-true})" >> $GITHUB_OUTPUT | |
echo "override_iguazio_version=$INPUT_OVERRIDE_IGUAZIO_VERSION" >> $GITHUB_OUTPUT | |
env: | |
INPUT_DOCKER_REGISTRY: ${{ github.event.inputs.docker_registry }} | |
INPUT_OVERRIDE_IGUAZIO_VERSION: ${{ github.event.inputs.override_iguazio_version }} | |
INPUT_CLEAN_RESOURCES_IN_TEARDOWN: ${{ github.event.inputs.clean_resources_in_teardown }} | |
- name: Prepare System Test Environment and Install MLRun | |
env: | |
IP_ADDR_PREFIX: ${{ secrets.IP_ADDR_PREFIX }} | |
timeout-minutes: 50 | |
run: | | |
python automation/system_test/prepare.py run \ | |
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \ | |
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \ | |
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
--provctl-download-url "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_PATH }}" \ | |
--provctl-download-s3-access-key "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_ACCESS_KEY }}" \ | |
--provctl-download-s3-key-id "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_KEY_ID }}" \ | |
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \ | |
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \ | |
--iguazio-version "${{ steps.computed_params.outputs.iguazio_version }}" \ | |
--mlrun-version "${{ steps.computed_params.outputs.mlrun_version }}" \ | |
--mlrun-ui-version "${{ steps.computed_params.outputs.mlrun_ui_version }}" \ | |
--mlrun-commit "${{ steps.computed_params.outputs.mlrun_hash }}" \ | |
--override-image-registry "${{ steps.computed_params.outputs.mlrun_docker_registry }}" | |
- name: Prepare System Test env.yml and MLRun installation from current branch | |
timeout-minutes: 5 | |
run: | | |
python automation/system_test/prepare.py env \ | |
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \ | |
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \ | |
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \ | |
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \ | |
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \ | |
--slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \ | |
--branch "${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \ | |
--github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" \ | |
--save-to-path "$GITHUB_WORKSPACE/env.yml" \ | |
--kubeconfig-content "${{ secrets.LATEST_SYSTEM_TEST_KUBECONFIG }}" | |
- name: Encrypt file | |
id: encrypt_file | |
run: | | |
gpg \ | |
--batch \ | |
--passphrase "${{ env.GPG_PASSPHRASE }}" \ | |
--output "$GITHUB_WORKSPACE/env.yml.gpg" \ | |
--symmetric "$GITHUB_WORKSPACE/env.yml" | |
echo "env_file_path=$(echo $GITHUB_WORKSPACE/env.yml.gpg)" >> $GITHUB_OUTPUT | |
env: | |
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
- name: Upload env file | |
uses: actions/upload-artifact@v4 | |
with: | |
name: env | |
path: "${{ steps.encrypt_file.outputs.env_file_path }}" | |
if-no-files-found: error | |
outputs: | |
mlrunVersion: ${{ steps.computed_params.outputs.mlrun_version }} | |
mlrunBranch: ${{ steps.current-branch.outputs.name }} | |
mlrunSystemTestsCleanResources: ${{ steps.computed_params.outputs.mlrun_system_tests_clean_resources }} | |
run-system-tests-enterprise-ci: | |
# When increasing the timeout make sure it's not larger than the schedule cron interval | |
timeout-minutes: 360 | |
name: Test ${{ matrix.test_component }} [${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}] | |
# requires prepare to finish before starting | |
needs: [prepare-system-tests-enterprise-ci] | |
runs-on: [ self-hosted, Linux ] | |
container: | |
image: python:3.9 | |
# let's not run this on every fork, change to your fork when developing | |
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch' | |
strategy: | |
fail-fast: false | |
max-parallel: 1 | |
matrix: | |
test_component: | |
- api | |
- runtimes | |
- projects | |
- model_monitoring | |
- examples | |
- backwards_compatibility | |
- datastore | |
- logs | |
- feature_store | |
- alerts | |
steps: | |
- name: Install Dependencies | |
run: | | |
# gnupg for decrypting env.yml.gpg | |
# nodejs for installing github action dependencies | |
# graphviz for generating graphs (feature store tests) | |
# git-core for cloning repos and etc, required by many suites | |
# gcc, make for installing python packages | |
apt-get update -qqy && \ | |
apt-get install -y \ | |
gnupg \ | |
nodejs \ | |
graphviz \ | |
git-core \ | |
gcc \ | |
make | |
- uses: actions/setup-node@v4 | |
with: | |
node-version: 18 | |
- uses: actions/checkout@v4 | |
# checking out to the resolved branch to run system tests on, as now we run the actual tests, we don't want to run | |
# the system tests of the branch that triggered the system tests as it might be in a different version | |
# than the mlrun version we deployed on the previous job (can have features that the resolved branch doesn't have) | |
with: | |
ref: ${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }} | |
- uses: actions/download-artifact@v4 | |
with: | |
name: env | |
path: /tmp | |
- name: Decrypt file | |
run: | | |
gpg \ | |
--batch \ | |
--passphrase "${{ env.GPG_PASSPHRASE }}" \ | |
--output "$GITHUB_WORKSPACE/tests/system/env.yml" \ | |
--decrypt "/tmp/env.yml.gpg" | |
# ensure file is created | |
test -f "$GITHUB_WORKSPACE/tests/system/env.yml" | |
env: | |
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }} | |
- name: Pre Tests Requirements | |
run: | | |
# due to | |
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9 | |
# ensure mlrun git is safe to use | |
git config --global --add safe.directory "$GITHUB_WORKSPACE" | |
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \ | |
make install-requirements install-complete-kfp-requirements update-version-file | |
- name: Run System Tests | |
run: | | |
MLRUN_SYSTEM_TESTS_GITHUB_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \ | |
MLRUN_SYSTEM_TESTS_CLEAN_RESOURCES="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunSystemTestsCleanResources }}" \ | |
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \ | |
MLRUN_SYSTEM_TESTS_COMPONENT="${{ matrix.test_component }}" \ | |
MLRUN_SYSTEM_TESTS_BRANCH="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \ | |
make test-system |