vllm-project
diff --git a/‎.github/ISSUE_TEMPLATE/900-release-checklist.yml‎
Lines changed: 100 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/900-release-checklist.yml‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/accuracy_test.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/nightly_benchmarks.yaml‎
Lines changed: 9 additions & 5 deletions b/‎.github/workflows/nightly_benchmarks.yaml‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 100 additions & 100 deletions b/‎.github/workflows/vllm_ascend_test.yaml‎
Lines changed: 100 additions & 100 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎benchmarks/scripts/run-performance-benchmarks.sh‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/scripts/run-performance-benchmarks.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/scripts/run_accuracy.py‎
Lines changed: 0 additions & 2 deletions b/‎benchmarks/scripts/run_accuracy.py‎
Lines changed: 0 additions & 2 deletions
@@ -0,0 +1,100 @@
+name: Release Checklist
+description: Generate a release checklist issue when prepare a new release.(Used for release team)
+title: "[Release]: Release checklist for v"
+
+body:
+- type: textarea
+  attributes:
+    description: >
+      Brief info for the new release.
+    label: Release Checklist
+    value: >
+      **Release Version**: 
+
+      **Release Branch**: 
+
+      **Release Date**: 
+
+      **Release Manager**: 
+- type: textarea
+  attributes:
+    description: >
+      Release notes.
+    label: Prepare Release Note
+    value: >
+      - [ ] Create a new issue for release feedback
+
+      - [ ] Write the release note PR.
+
+        - [ ] Update the feedback issue link in docs/source/faqs.md
+
+        - [ ] Add release note to docs/source/user_guide/release_notes.md
+
+        - [ ] Update version info in docs/source/community/versioning_policy.md
+
+        - [ ] Update contributor info in docs/source/community/contributors.md
+
+        - [ ] Update package version in docs/conf.py
+- type: textarea
+  attributes:
+    description: >
+      Make sure the code is merged.
+    label: PR need Merge
+    value: >
+      - [ ] PR link1
+
+      - [ ] PR link2
+
+      - [ ] ...
+- type: textarea
+  attributes:
+    description: >
+      Make sure the new Feature/Function is tested
+    label: Functional Test
+    value: >
+      - [ ] Feature1
+
+      - [ ] Bug1
+
+      - [ ] ...
+- type: textarea
+  attributes:
+    description: >
+      Make sure the doc is updated.
+    label: Doc Test
+    value: >
+      - [ ] Tutorial is updated.
+
+      - [ ] User Guide is updated.
+
+      - [ ] Developer Guide is updated.
+- type: textarea
+  attributes:
+    description: >
+      Make sure the artifacts is ready
+    label: Prepare Artifacts
+    value: >
+      - [ ] Docker image is ready.
+
+      - [ ] Wheel package is ready.
+- type: textarea
+  attributes:
+    description: >
+      Start to release.
+    label: Release Step
+    value: >
+      - [ ] Release note PR is merged.
+
+      - [ ] Post the release on GitHub release page.
+
+      - [ ] Generate official doc page on https://app.readthedocs.org/dashboard/
+
+      - [ ] Wait for the wheel package to be available on https://pypi.org/project/vllm-ascend
+
+      - [ ] Wait for the docker image to be available on https://quay.io/ascend/vllm-ascend
+
+      - [ ] Upload 310p wheel to Github release page
+
+      - [ ] Brodcast the release news (By message, blog , etc)
+
+      - [ ] Close this issue
@@ -380,7 +380,7 @@ jobs:
             const pr = await github.rest.pulls.create({
               owner: 'vllm-project',
               repo: 'vllm-ascend',
-              head: `${{ github.actor }}:${{ env.BRANCH_NAME }}`,
+              head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
               base: '${{ github.event.inputs.vllm-ascend-version }}',
               title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
               body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
 
@@ -145,8 +145,8 @@ jobs:
       - name: Install elastic_tool
         if: github.event_name != 'pull_request'
         run: |
-          pip install escli-tool==0.2.2
-          
+          pip install escli-tool==0.2.3
+
       - name: Collect pr info from vllm-project/vllm-ascend
         if: github.event_name != 'pull_request'
         run: |
@@ -176,24 +176,28 @@ jobs:
             commit_time=$(git show -s --format=%cd $commit_hash --date=iso-strict)
             commit_time_no_tz=${commit_time::19}
             pip install -e .
-            
+
             echo "------------------------"
             echo "commit_id: $commit_id"
             echo "commit_title: $commit_title"
             echo "commit_time: $commit_time_no_tz"
             echo "vllm branch: ${{ matrix.vllm_branch }}"
             echo "vllm-ascend branch: ${{ matrix.vllm_ascend_branch }}"
             echo "------------------------"
-            
+
             cd /github/home
-            bash benchmarks/scripts/run-performance-benchmarks.sh
+            ERROR_MSG=""
+            if ! bash benchmarks/scripts/run-performance-benchmarks.sh; then
+              ERROR_MSG="Benchmark failed to run"
+            fi
             # send the result to es
             escli add --vllm_branch ${{ matrix.vllm_branch }} \
             --vllm_ascend_branch ${{ matrix.vllm_ascend_branch }} \
             --commit_id $commit_id \
             --commit_title "$commit_title" \
             --created_at "$commit_time_no_tz" \
             --res_dir ./benchmarks/results \
+            --error $ERROR_MSG \
             --extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
             rm -rf ./benchmarks/results
             cd -
 
@@ -144,7 +144,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, ]
     steps:
       - name: Install packages
         run: |
@@ -193,111 +193,111 @@ jobs:
           name: vllm-ascend
           verbose: true
 
-  e2e:
-    needs: [lint]
-    # only trigger e2e test on pull request after lint passed
-    if: ${{ needs.lint.result == 'success' && github.event_name == 'pull_request' }}
-    strategy:
-      max-parallel: 2
-      matrix:
-        os: [linux-arm64-npu-1]
-        vllm_version: [main, v0.9.1]
-    name: singlecard e2e test
-    runs-on: ${{ matrix.os }}
-    container:
-      # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
-      image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
-      env:
-        VLLM_LOGGING_LEVEL: ERROR
-    steps:
-      - name: Check npu and CANN info
-        run: |
-          npu-smi info
-          cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
-
-      - name: Config mirrors
-        run: |
-          sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
-          pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
-          apt-get update -y
-          apt install git -y
-          git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
-
-      - name: Checkout vllm-project/vllm-ascend repo
-        uses: actions/checkout@v4
-
-      - name: Install system dependencies
-        run: |
-          apt-get -y install `cat packages.txt`
-          apt-get -y install gcc g++ cmake libnuma-dev
-
-      - name: Checkout vllm-project/vllm repo
-        uses: actions/checkout@v4
-        with:
-          repository: vllm-project/vllm
-          ref: ${{ matrix.vllm_version }}
-          path: ./vllm-empty
-
-      - name: Install vllm-project/vllm from source
-        working-directory: ./vllm-empty
-        run: |
-          VLLM_TARGET_DEVICE=empty pip install -e .
-
-      - name: Install vllm-project/vllm-ascend
-        env:
-          PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
-        run: |
-          pip install -r requirements-dev.txt
-          pip install -v -e .
-
-      - name: Run e2e test for V1 Engine
-        env:
-          VLLM_USE_V1: 1
-          VLLM_WORKER_MULTIPROC_METHOD: spawn
-          VLLM_USE_MODELSCOPE: True
-        run: |
-          pytest -sv tests/e2e/singlecard/test_offline_inference.py
-          # TODO: switch hf to modelscope
-          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/ \
-          --ignore=tests/e2e/singlecard/test_offline_inference.py \
-          --ignore=tests/e2e/singlecard/test_ilama_lora.py \
-          --ignore=tests/e2e/singlecard/test_guided_decoding.py \
-          --ignore=tests/e2e/singlecard/test_camem.py
-
-      - name: Run e2e test on V0 engine
-        if: ${{ github.event_name == 'schedule' }}
-        env:
-          VLLM_USE_V1: 0
-          VLLM_USE_MODELSCOPE: True
-        run: |
-          pytest -sv tests/e2e/singlecard/test_offline_inference.py
-          # TODO: switch hf to modelscope
-          VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
-            pytest -sv tests/e2e/singlecard/test_ilama_lora.py
-          pytest -sv tests/e2e/singlecard/test_guided_decoding.py
-          pytest -sv tests/e2e/singlecard/test_camem.py
-          pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
-          pytest -sv tests/e2e/singlecard/ \
-            --ignore=tests/e2e/singlecard/test_offline_inference.py \
-            --ignore=tests/e2e/singlecard/test_ilama_lora.py \
-            --ignore=tests/e2e/singlecard/test_guided_decoding.py \
-            --ignore=tests/e2e/singlecard/test_camem.py \
-            --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
-            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
-            --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
+  # e2e:
+  #   needs: [lint]
+  #   # only trigger e2e test on pull request after lint passed
+  #   if: ${{ needs.lint.result == 'success' && github.event_name == 'pull_request' }}
+  #   strategy:
+  #     max-parallel: 2
+  #     matrix:
+  #       os: [linux-arm64-npu-1]
+  #       vllm_version: [main, ]
+  #   name: singlecard e2e test
+  #   runs-on: ${{ matrix.os }}
+  #   container:
+  #     # TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
+  #     image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
+  #     env:
+  #       VLLM_LOGGING_LEVEL: ERROR
+  #   steps:
+  #     - name: Check npu and CANN info
+  #       run: |
+  #         npu-smi info
+  #         cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
+
+  #     - name: Config mirrors
+  #       run: |
+  #         sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
+  #         pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+  #         apt-get update -y
+  #         apt install git -y
+  #         git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
+
+  #     - name: Checkout vllm-project/vllm-ascend repo
+  #       uses: actions/checkout@v4
+
+  #     - name: Install system dependencies
+  #       run: |
+  #         apt-get -y install `cat packages.txt`
+  #         apt-get -y install gcc g++ cmake libnuma-dev
+
+  #     - name: Checkout vllm-project/vllm repo
+  #       uses: actions/checkout@v4
+  #       with:
+  #         repository: vllm-project/vllm
+  #         ref: ${{ matrix.vllm_version }}
+  #         path: ./vllm-empty
+
+  #     - name: Install vllm-project/vllm from source
+  #       working-directory: ./vllm-empty
+  #       run: |
+  #         VLLM_TARGET_DEVICE=empty pip install -e .
+
+  #     - name: Install vllm-project/vllm-ascend
+  #       env:
+  #         PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
+  #       run: |
+  #         pip install -r requirements-dev.txt
+  #         pip install -v -e .
+
+  #     - name: Run e2e test for V1 Engine
+  #       env:
+  #         VLLM_USE_V1: 1
+  #         VLLM_WORKER_MULTIPROC_METHOD: spawn
+  #         VLLM_USE_MODELSCOPE: True
+  #       run: |
+  #         pytest -sv tests/e2e/singlecard/test_offline_inference.py
+  #         # TODO: switch hf to modelscope
+  #         VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
+  #           pytest -sv tests/e2e/singlecard/test_ilama_lora.py
+  #         pytest -sv tests/e2e/singlecard/test_guided_decoding.py
+  #         pytest -sv tests/e2e/singlecard/test_camem.py
+  #         pytest -sv tests/e2e/singlecard/ \
+  #         --ignore=tests/e2e/singlecard/test_offline_inference.py \
+  #         --ignore=tests/e2e/singlecard/test_ilama_lora.py \
+  #         --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+  #         --ignore=tests/e2e/singlecard/test_camem.py
+
+  #     - name: Run e2e test on V0 engine
+  #       if: ${{ github.event_name == 'schedule' }}
+  #       env:
+  #         VLLM_USE_V1: 0
+  #         VLLM_USE_MODELSCOPE: True
+  #       run: |
+  #         pytest -sv tests/e2e/singlecard/test_offline_inference.py
+  #         # TODO: switch hf to modelscope
+  #         VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
+  #           pytest -sv tests/e2e/singlecard/test_ilama_lora.py
+  #         pytest -sv tests/e2e/singlecard/test_guided_decoding.py
+  #         pytest -sv tests/e2e/singlecard/test_camem.py
+  #         pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
+  #         pytest -sv tests/e2e/singlecard/ \
+  #           --ignore=tests/e2e/singlecard/test_offline_inference.py \
+  #           --ignore=tests/e2e/singlecard/test_ilama_lora.py \
+  #           --ignore=tests/e2e/singlecard/test_guided_decoding.py \
+  #           --ignore=tests/e2e/singlecard/test_camem.py \
+  #           --ignore=tests/e2e/singlecard/test_prompt_embedding.py \
+  #           --ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
+  #           --ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
 
   e2e-4-cards:
-    needs: [e2e]
-    if: ${{ needs.e2e.result == 'success' }}
+    # needs: [e2e]
+    # if: ${{ needs.e2e.result == 'success' }}
     strategy:
       max-parallel: 1
       matrix:
         os: [linux-arm64-npu-4]
-        vllm_version: [main, v0.9.1]
+        vllm_version: [main, ]
     name: multicard e2e test
     runs-on: ${{ matrix.os }}
     container:
 
@@ -196,3 +196,5 @@ kernel_meta/
 
 # version file generated by setuptools-scm
 /vllm_ascend/_version.py
+# build info file generated by setup.py
+/vllm_ascend/_build_info.py
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+set -e
 
 check_npus() {
   # shellcheck disable=SC2155
 
@@ -138,8 +138,6 @@ def generate_md(model_name, tasks_list, args, datasets):
 ```bash
 {run_cmd}
 ```
-  </div>
-  <div>&nbsp;</div>
   """
 
     header = (