Add Baseline for SGLang Benchmark Test #51

Workflow file for this run

.github/workflows/ci-sglang-benchmark.yml at d78ab73

	# Copyright 2024 Advanced Micro Devices, Inc.
	#
	# Licensed under the Apache License v2.0 with LLVM Exceptions.
	# See https://llvm.org/LICENSE.txt for license information.
	# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

	# =================================== README ===================================
	# The `benchmark_sglang` job in this CI is mostly dependent on code outside
	# of the `shark-ai` repo itself. By including it here, we are able to maintain
	# an apples-to-apples comparison between shortfin and SGLang performance in a
	# centralized location, as we place more effort in shortfin LLM performance, and
	# WHILE WE WORK TOWARDS A BETTER ALTERNATIVE.

	# We should not be generally repeating this pattern, and should never repeat
	# this pattern outside of specifically benchmarking shortfin apps against
	# external projects, as part of an organized and clearly defined effort.
	# ==============================================================================

	name: SGLang Llama Benchmarking Tests

	on:
	# TODO: Remove after validating in CI
	pull_request:
	workflow_dispatch:
	schedule:
	# Weekdays at 4:00 AM UTC = 9:00 PM PST.
	- cron: "0 4 * * 1-5"

	concurrency:
	# A PR number if a pull request and otherwise the commit hash. This cancels
	# queued and in-progress runs for the same PR (presubmit) or commit
	# (postsubmit). The workflow name is prepended to avoid conflicts between
	# different workflows.
	group: ${{ github.workflow }}-${{ github.event.number \|\| github.sha }}
	cancel-in-progress: true

	jobs:
	benchmark_shortfin:
	if: ${{ github.repository_owner == 'nod-ai' \|\| github.event_name != 'schedule' }}
	name: "SGLang Serving Benchmark With Shortfin"
	strategy:
	matrix:
	version: [3.11]
	fail-fast: false
	runs-on: mi300x-4
	defaults:
	run:
	shell: bash
	env:
	PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
	steps:
	- name: "Setting up Python"
	id: setup_python
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: ${{matrix.version}}

	- name: "Checkout Code"
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

	- name: Cache Pip Packages
	uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
	id: cache-pip
	with:
	path: ${{ env.PIP_CACHE_DIR }}
	key: pip-${{ matrix.version }}-${{ hashFiles('requirements.txt','shortfin/requirements.txt','sharktank/requirements.txt') }}

	- name: Install pip deps
	run: \|
	python -m pip install --no-compile --upgrade pip
	# Note: We install in three steps in order to satisfy requirements
	# from non default locations first. Installing the PyTorch CPU
	# wheels saves multiple minutes and a lot of bandwidth on runner setup.
	pip install --no-compile -r pytorch-cpu-requirements.txt
	pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
	-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
	pip install --no-compile -r requirements.txt -e sharktank/ shortfin/

	# Try with the latest nightly releases, not what iree-turbine pins.
	# We could also pin to a known working or stable version.
	# This should eventually stabilize. Do the best we can for now.
	pip install -f https://iree.dev/pip-release-links.html --upgrade --pre \
	iree-base-compiler \
	iree-base-runtime \
	"numpy<2.0"

	- name: Install SGLang
	run: pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python"

	- name: Run Shortfin Benchmark Tests
	run: pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py --log-cli-level=INFO --html=shortfin_index.html --self-contained-html

	- name: Upload pytest report
	uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
	with:
	name: shortfin_benchmark
	path: shortfin_index.html

	benchmark_sglang:
	name: "SGLang Serving Benchmark With SGLang"
	strategy:
	matrix:
	version: [3.11]
	fail-fast: false
	runs-on: mi300x-4
	defaults:
	run:
	shell: bash
	env:
	PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
	steps:
	- name: "Setting up Python"
	id: setup_python
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: ${{matrix.version}}

	- name: "Checkout Code"
	uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

	- name: Cache Pip Packages
	uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
	id: cache-pip
	with:
	path: ${{ env.PIP_CACHE_DIR }}
	key: pip-${{ matrix.version }}

	- name: Install SGLang
	run: \|
	python -m pip install --no-compile --upgrade pip
	pip install "git+https://github.com/nod-ai/sglang.git#subdirectory=python"

	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3

	# Instruction for SGLang image sourced from here:
	# https://sgl-project.github.io/start/install.html#method-3-using-docker
	# We have to run in a docker container due to their vLLM dependency.
	# From their pyproject.toml:
	# HIP (Heterogeneous-computing Interface for Portability) for AMD
	# => base docker rocm/vllm-dev:20241022, not from public vllm whl
	# srt_hip = ["sglang[runtime_common]", "torch", "vllm==0.6.3.dev13"]
	- name: Pull SGLang Image (Had issues with sglang:v0.3.5.post2-rocm620)
	run: \|
	docker pull lmsysorg/sglang:v0.3.5.post1-rocm620

	- name: Run SGLang Server
	run: \|
	docker run --rm -d \
	--name=sglang-server \
	--device=/dev/kfd \
	--device=/dev/dri \
	--ipc=host \
	--shm-size 16G \
	--group-add video \
	--cap-add=SYS_PTRACE \
	--security-opt seccomp=unconfined \
	-v $HOME/dockerx:/dockerx \
	-v /data:/data \
	-p 30000:30000 \
	-v ~/.cache/huggingface:/root/.cache/huggingface \
	--env HF_TOKEN=${{ secrets.HF_TOKEN }} \
	lmsysorg/sglang:v0.3.5.post1-rocm620 \
	python3 -m sglang.launch_server \
	--model-path meta-llama/Llama-3.1-8B-Instruct \
	--host 0.0.0.0 \
	--port 30000 \
	--tp 1 \
	--dtype float16 \
	--disable-cuda-graph

	- name: Run SGLang Benchmark Tests
	run: \|
	pytest -v app_tests/benchmark_tests/llm/sglang_benchmarks/sglang_benchmark_test.py --port 30000 --log-cli-level=INFO --html=sglang_index.html --self-contained-html

	- name: Stop sglang-server
	run: docker stop sglang-server \|\| true # Stop container if it's running

	# Deleting image after run due to large disk space requirement (83 GB)
	- name: Cleanup SGLang Image
	run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620

	- name: Upload pytest report
	uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
	with:
	name: sglang_benchmark
	path: sglang_index.html

	merge_and_upload_reports:
	name: "Merge and upload benchmark reports"
	if: success() \|\| needs.benchmark_shortfin.result == 'success' \|\| needs.benchmark_sglang.result == 'success'
	runs-on: ubuntu-24.04
	defaults:
	run:
	shell: bash
	steps:
	- name: "Setting up Python"
	id: setup_python
	uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
	with:
	python-version: 3.11

	- name: Install pytest-html-merger
	run: pip install pytest-html-merger

	- name: Download shortfin report
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16
	with:
	name: shortfin_benchmark
	path: reports
	continue-on-error: true

	- name: Download sglang report
	uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16
	with:
	name: sglang_benchmark
	path: reports
	continue-on-error: true

	- name: Merge html reports
	run: \|
	mkdir merged_reports
	pytest_html_merger -i reports -o merged_reports/index.html

	- name: Deploy to GitHub Pages
	uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
	with:
	github_token: ${{ secrets.SHARK_PLATFORM_GH_TOKEN }}
	publish_dir: merged_reports
	destination_dir: ./llm/sglang
	keep_files: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Baseline for SGLang Benchmark Test #51

Workflow file

Add Baseline for SGLang Benchmark Test #51

Jobs

Run details

Workflow file for this run