Use benchmarks workspace #2507

Workflow file for this run

.github/workflows/ab_tests.yml at d395d6a

	name: A/B Tests
	on:
	push:
	branches-ignore:
	- main

	concurrency:
	# Include `github.event_name` to avoid pushes to `main` and
	# scheduled jobs canceling one another
	group: ab_tests-${{ github.event_name }}-${{ github.ref }}
	cancel-in-progress: true

	defaults:
	# Required shell entrypoint to have properly activated conda environments
	run:
	shell: bash -l {0}

	jobs:
	discover_ab_envs:
	name: Discover A/B environments
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.10"

	- name: Install dependencies
	run: pip install PyYaml

	- name: Generate dynamic matrix
	id: set-matrix
	run: echo "matrix=$(python ci/scripts/discover_ab_environments.py)" >> $GITHUB_OUTPUT

	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}

	# Everything below this point runs iff there are files matching
	# AB_environments/AB_*.{conda,dask}.yaml
	# and AB_environments/config.yaml set repeat > 0

	tests:
	name: A/B Tests - ${{ matrix.runtime-version }}
	needs: discover_ab_envs
	if: fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
	runs-on: ubuntu-latest
	timeout-minutes: 120
	strategy:
	fail-fast: false
	max-parallel: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).max_parallel }}
	matrix:
	pytest_args: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).pytest_args }}
	runtime-version: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).runtime }}
	repeat: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).repeat }}
	h2o_datasets: ${{ fromJson(needs.discover_ab_envs.outputs.matrix).h2o_datasets }}
	env:
	AB_TEST_NAME: ${{ matrix.runtime-version }}
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Create null hypothesis as a copy of baseline
	if: matrix.runtime-version == 'AB_null_hypothesis'
	run: \|
	cd AB_environments
	cp AB_baseline.conda.yaml AB_null_hypothesis.conda.yaml
	cp AB_baseline.dask.yaml AB_null_hypothesis.dask.yaml
	cp AB_baseline.cluster.yaml AB_null_hypothesis.cluster.yaml

	- name: Set up environment
	uses: conda-incubator/setup-miniconda@v3
	with:
	miniforge-variant: Mambaforge
	use-mamba: true
	condarc-file: ci/condarc
	environment-file: AB_environments/${{ matrix.runtime-version }}.conda.yaml

	- name: Add test dependencies
	run: mamba env update --file ci/environment-test.yml

	- name: Dump environment
	run: \|
	# For debugging
	echo -e "--\n--Conda Environment (re-create this with \`conda env create --name <name> -f <output_file>\`)\n--"
	mamba env export \| grep -E -v '^prefix:.*$'

	- name: Convert dask config into environment variables
	run: python ci/scripts/dask_config_to_env.py AB_environments/${{ matrix.runtime-version }}.dask.yaml >> $GITHUB_ENV

	- name: Run Coiled Runtime Tests
	env:
	DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }}
	AWS_ACCESS_KEY_ID: ${{ secrets.RUNTIME_CI_BOT_AWS_ACCESS_KEY_ID }}
	AWS_DEFAULT_REGION: us-east-2 # this is needed for boto for some reason
	AWS_SECRET_ACCESS_KEY: ${{ secrets.RUNTIME_CI_BOT_AWS_SECRET_ACCESS_KEY }}
	PYTHON_STUB_PAT: ${{ secrets.PYTHON_STUB_PAT }}
	SNOWFLAKE_USER: ${{ secrets.SNOWFLAKE_USER }}
	SNOWFLAKE_PASSWORD: ${{ secrets.SNOWFLAKE_PASSWORD }}
	SNOWFLAKE_ACCOUNT: ${{ secrets.SNOWFLAKE_ACCOUNT }}
	SNOWFLAKE_WAREHOUSE: ${{ secrets.SNOWFLAKE_WAREHOUSE }}
	SNOWFLAKE_ROLE: ${{ secrets.SNOWFLAKE_ROLE }}
	AB_VERSION: ${{ matrix.runtime-version }}
	DB_NAME: ${{ matrix.runtime-version }}-${{ matrix.repeat }}.db
	CLUSTER_DUMP: always
	CLUSTER_KWARGS: AB_environments/${{ matrix.runtime-version }}.cluster.yaml
	H2O_DATASETS: ${{ matrix.h2o_datasets }}
	run: pytest --benchmark ${{ matrix.pytest_args }}

	- name: Dump coiled.Cluster kwargs
	run: cat cluster_kwargs.merged.yaml

	- name: Upload benchmark results
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: ${{ matrix.runtime-version }}-${{ matrix.repeat }}
	path: \|
	${{ matrix.runtime-version }}-${{ matrix.repeat }}.db
	cluster_kwargs..
	mamba_env_export.yml

	process-results:
	needs: [discover_ab_envs, tests]
	name: Combine separate benchmark results
	if: always() && fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
	runs-on: ubuntu-latest
	concurrency:
	# Fairly strict concurrency rule to avoid stepping on benchmark db.
	# Could eventually replace with a real db in coiled, RDS, or litestream
	group: process-benchmarks
	cancel-in-progress: false
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Install Python
	uses: actions/setup-python@v5
	with:
	python-version: "3.10"

	- name: Install dependencies
	run: pip install alembic

	- name: Download artifacts
	uses: actions/download-artifact@v4
	with:
	path: benchmarks

	- name: Combine benchmarks
	run: \|
	ls -lhR benchmarks
	bash ci/scripts/combine-dbs.sh

	- name: Upload benchmark results as artifact
	uses: actions/upload-artifact@v4
	with:
	name: benchmark
	path: benchmark.db

	static-site:
	needs: [discover_ab_envs, process-results]
	# Always generate the site, as this can be skipped even if an indirect dependency fails (like a test run)
	if: always() && fromJson(needs.discover_ab_envs.outputs.matrix).run_AB
	name: Build static dashboards
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Download artifacts
	uses: actions/download-artifact@v4
	with:
	name: benchmark

	- name: Set up environment
	uses: conda-incubator/setup-miniconda@v3
	with:
	miniforge-variant: Mambaforge
	use-mamba: true
	environment-file: ci/environment-dashboard.yml

	- name: Generate dashboards
	run: python dashboard.py -d benchmark.db -o static -b all

	- name: Upload artifact
	uses: actions/upload-artifact@v4
	with:
	name: static-dashboard
	path: static

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Use benchmarks workspace #2507

Workflow file

Use benchmarks workspace #2507

Jobs

Run details

Workflow file for this run