Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions .github/workflows/nightly_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,18 @@ jobs:
test:
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}

name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}
name: Benchmarks/vLLM=${{ matrix.vllm_branch }}, vLLM-Ascend=${{ matrix.vllm_ascend_branch }}, use_v1=${{ matrix.vllm_use_v1 }}
runs-on: 'linux-arm64-npu-static-8'
strategy:
matrix:
include:
- vllm_branch: v0.9.0
vllm_ascend_branch: main
vllm_use_v1: 0
- vllm_branch: v0.9.0
vllm_ascend_branch: main
vllm_use_v1: 1
max-parallel: 1
container:
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
volumes:
Expand All @@ -71,6 +76,7 @@ jobs:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
steps:
- name: Check npu and CANN info
run: |
Expand Down Expand Up @@ -140,7 +146,7 @@ jobs:
- name: Install elastic_tool
if: github.event_name != 'pull_request'
run: |
pip install escli-tool==0.2.1
pip install escli-tool==0.2.2

- name: Collect pr info from vllm-project/vllm-ascend
if: github.event_name != 'pull_request'
Expand Down Expand Up @@ -177,17 +183,17 @@ jobs:
echo "vllm branch: ${{ matrix.vllm_branch }}"
echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
echo "------------------------"

cd /github/home
bash benchmarks/scripts/run-performance-benchmarks.sh
# send the result to es
if [[ "${{ github.event_name }}" != "pull request" ]]; then
escli add --vllm_branch ${{ matrix.vllm_branch }} \
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
--commit_id $commit_id \
--commit_title "$commit_title" \
--created_at "$commit_time_no_tz" \
--res_dir ./benchmarks/results
rm -rf ./benchmarks/results
fi
escli add --vllm_branch ${{ matrix.vllm_branch }} \
--vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
--commit_id $commit_id \
--commit_title "$commit_title" \
--created_at "$commit_time_no_tz" \
--res_dir ./benchmarks/results \
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this a separate col in es?

Is convenient to filter with it in frontend?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, this is a series of key-value pairs for more new expansions feat in the feature, for example, VLLM_USE_V1 can seen as using a feature, can convenient to be filter

rm -rf ./benchmarks/results
cd -
done < commit_log.txt
10 changes: 10 additions & 0 deletions benchmarks/tests/latency-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,15 @@
"num_iters_warmup": 5,
"num_iters": 15
}
},
{
"test_name": "latency_qwen2_5_7B_tp1",
"parameters": {
"model": "Qwen/Qwen2.5-7B-Instruct",
"tensor_parallel_size": 1,
"load_format": "dummy",
"num_iters_warmup": 5,
"num_iters": 15
}
}
]
24 changes: 24 additions & 0 deletions benchmarks/tests/serving-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,29 @@
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200
}
},
{
"test_name": "serving_qwen2_5_7B_tp1",
"qps_list": [
1,
4,
16,
"inf"
],
"server_parameters": {
"model": "Qwen/Qwen2.5-7B-Instruct",
"tensor_parallel_size": 1,
"swap_space": 16,
"disable_log_stats": "",
"disable_log_requests": "",
"load_format": "dummy"
},
"client_parameters": {
"model": "Qwen/Qwen2.5-7B-Instruct",
"backend": "vllm",
"dataset_name": "sharegpt",
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200
}
}
]
11 changes: 11 additions & 0 deletions benchmarks/tests/throughput-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@
"dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
"num_prompts": 200
}
},
{
"test_name": "throughput_qwen2_5_7B_tp1",
"parameters": {
"model": "Qwen/Qwen2.5-7B-Instruct",
"tensor_parallel_size": 1,
"load_format": "dummy",
"dataset_path": "/github/home/.cache/datasets/ShareGPT_V3_unfiltered_cleaned_split.json",
"num_prompts": 200,
"backend": "vllm"
}
}
]