CI for vLLM with vllm-xpu-kernel (vllm-project#372)

1pikachu · jikunshang · yma11 · jikunshang · commit 93c0b8d471c9 · 2025-11-09T19:09:29.000-08:00
* layernorm use vllm_xpu_kernels Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * [ww34] switch silu_and_mul, reshape_and_cache_flash, rope to xpu kernel Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * update activation kernels Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * try remove ipex Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * switch to xpu kernel for w8a16 gemm (vllm-project#323) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * enable cutlass chunked-prefill (vllm-project#330) * enable cutlass chunked-prefill Signed-off-by: Yan Ma <yan.ma@intel.com> * add required pkg for xpu-kernels compilation Signed-off-by: Yan Ma <yan.ma@intel.com> --------- Signed-off-by: Yan Ma <yan.ma@intel.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * enable topk/grouped_gemm based on llama4 (vllm-project#354) * enable topk/grouped_gemm based on llama4 Signed-off-by: Yan Ma <yan.ma@intel.com> * address comments Signed-off-by: Yan Ma <yan.ma@intel.com> --------- Signed-off-by: Yan Ma <yan.ma@intel.com> Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * enable CI * replace lora kernels (vllm-project#347) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * remove ipex (vllm-project#370) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * update QA CI branch * update QA CI yaml * update QA CI yaml * update QA CI yaml * update QA CI yaml * update QA CI yaml * update QA CI yaml * fix conflict --------- Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Yan Ma <yan.ma@intel.com> Co-authored-by: Liu, Wenjun <wenjun.liu@intel.com>
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -0,0 +1,187 @@
+name: Run Intel XPU BMG CI
+
+on:
+  pull_request:
+    branches:
+      - '**xpu-kernels**'  
+    types: [opened, synchronize, reopened]  # 
+
+jobs:
+  run-xpu-BMG-CI:
+    if: |
+        github.event_name == 'pull_request' ||
+        (github.event_name == 'issue_comment' &&
+        github.event.issue.pull_request &&
+        contains(github.event.comment.body, '/BMG_CI'))
+    runs-on: BMG 
+
+    steps:
+    - name: Fix workspace permissions
+      run: |
+        sudo chown -R $(whoami):$(whoami) "$GITHUB_WORKSPACE"
+        sudo chmod -R 755 "$GITHUB_WORKSPACE"
+        sudo rm -f "$GITHUB_WORKSPACE/.git/index.lock" || true
+
+    - name: Checkout QA_ci (test code)  #
+      uses: actions/checkout@v4
+      with:
+        ref: QA_ci
+        path: qa_ci_code
+
+    - name: Checkout PR + Release Branch (DUT code)
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.ref }}
+        path: target_code
+        fetch-depth: 0  # 
+
+    - name: Merge PR into Release Branch
+      if: github.event_name == 'pull_request'
+      run: |
+        cd target_code
+        echo "Merging PR branch into base: ${{ github.base_ref }}"
+        git fetch origin ${{ github.base_ref }}
+        git merge origin/${{ github.base_ref }} --no-commit
+      shell: bash
+
+    - name: Build docker image
+      run: |
+        echo "start to build image"
+        cd target_code
+        if [ -n "${{ github.event.pull_request.number }}" ]; then
+          image_name="vllm_xpu_ci_${{ github.event.pull_request.number }}"
+        else
+          image_name="vllm_xpu_ci_$(echo $GITHUB_REF | awk -F '/' '{print $3}')"
+        fi
+        image_name=$(echo "$image_name" | tr '[:upper:]' '[:lower:]')
+        #!/bin/bash
+
+        # Configuration
+        MAX_RETRIES=6                      # Maximum number of retry attempts
+        TIMEOUT=1800                       # 30-minute timeout per attempt (in seconds)
+        LOG_FILE="docker_build.log"         # Log file path
+
+        # Proxy configurations - add more if needed
+        PROXIES=(
+          "http://child-prc.intel.com:913"    # First fallback
+          "http://proxy.ims.intel.com:911"    # Primary proxy
+          "http://child-prc.intel.com:913"    # First fallback
+        )
+
+        # No-proxy configuration
+        NO_PROXY=".intel.com,intel.com,localhost,127.0.0.1"
+        #docker builder prune -f  #clean cache     
+        docker builder prune --all --force
+
+        #Loop through proxy configurations
+        for (( attempt=1; attempt<=$MAX_RETRIES; attempt++ )); do
+          proxy_index=$(( (attempt-1) % ${#PROXIES[@]} ))
+          proxy=${PROXIES[$proxy_index]}
+          echo "=== Attempt $attempt/$MAX_RETRIES (Proxy: $proxy) ===" | tee -a "$LOG_FILE"
+          
+          if [ $attempt -eq 1 ]; then
+            # First attempt without no_proxy
+            timeout $TIMEOUT docker build \
+              --build-arg http_proxy=$proxy \
+              --build-arg https_proxy=$proxy \
+              -f docker/Dockerfile.xpu \
+              -t "$image_name" \
+              --shm-size=4g . 2>&1 | tee -a "$LOG_FILE"
+          else
+            # Subsequent attempts with no_proxy
+            timeout $TIMEOUT docker build \
+              --build-arg http_proxy=$proxy \
+              --build-arg https_proxy=$proxy \
+              --build-arg no_proxy="$NO_PROXY" \
+              -f docker/Dockerfile.xpu \
+              -t "$image_name" \
+              --shm-size=4g . 2>&1 | tee -a "$LOG_FILE"
+          fi
+
+          # Check if build succeeded
+          if [ ${PIPESTATUS[0]} -eq 0 ]; then
+            echo "=== Build succeeded on attempt $attempt ===" | tee -a "$LOG_FILE"
+            exit 0
+          fi
+        done
+
+        echo "=== ERROR: All $MAX_RETRIES attempts failed. Check $LOG_FILE for details. ===" | tee -a "$LOG_FILE"
+        exit 1
+
+    - name: Prepare environment (clean up old processes and containers)
+      run: |
+        echo "Killing any process on port 8000..."
+        lsof -t -i:8000 | xargs -r kill -9 || true
+
+        echo "Killing old vllm server processes..."
+        pkill -f "python3 -m vllm.entrypoints.openai.api_server" || true
+
+        echo "Removing old container if exists..."
+        docker rm -f vllm_internal_ci || true
+    
+    - name: Run benchmark inside local Docker image
+      run: |
+        # Reuse the image_name from previous step
+        if [ -n "${{ github.event.pull_request.number }}" ]; then
+          image_name="vllm_xpu_ci_${{ github.event.pull_request.number }}"
+        else
+          image_name="vllm_xpu_ci_$(echo $GITHUB_REF | awk -F '/' '{print $3}')"
+        fi
+        image_name=$(echo "$image_name" | tr '[:upper:]' '[:lower:]')
+
+        echo "Running benchmark using image: $image_name"
+        docker run -t --rm --name vllm_internal_ci --shm-size 10g \
+          --net=host \
+          --ipc=host \
+          --privileged \
+          -v ${HOME}/actions-runner/_work/vllm-xpu/vllm-xpu/qa_ci_code:/WORKSPACE \
+          -v /dev/dri/by-path:/dev/dri/by-path \
+          -v ${HOME}/.cache:/root/.cache/ \
+          -e http_proxy=${http_proxy:-"http://proxy-dmz.intel.com:912"} \
+          -e https_proxy=${http_proxy:-"http://proxy-dmz.intel.com:912"} \
+          -e no_proxy=${no_proxy:-"127.0.0.1,localhost"} \
+          --device /dev/dri:/dev/dri \
+          -w /workspace \
+          --entrypoint='' \
+          --mount type=bind,source="$HOME/.secrets/my_token",target=/run/secrets/my_token,readonly \
+          $image_name \
+          bash -c "bash /WORKSPACE/.buildkite/nightly-benchmarks/scripts/CI_run_server_benchmarks.sh BMG_KERNEL || true; chown -R \$(id -u):\$(id -g) /WORKSPACE"
+
+    - name: Validate server benchmark results
+      run: |
+            python3 ${HOME}/actions-runner/_work/vllm-xpu/vllm-xpu/qa_ci_code/.buildkite/nightly-benchmarks/scripts/analyze_benchmark_results_final.py --test-selector BMG_KERNEL
+            cat ${HOME}/actions-runner/_work/vllm-xpu/vllm-xpu/qa_ci_code/benchmarks/results/benchmark_analysis_final.json
+
+    - name: Fix permissions
+      run: sudo chmod -R 755 ${{ runner.workspace }}/vllm-xpu/qa_ci_code/benchmarks/results/
+    
+    - name: Debug path
+      run: ls -la ${{ runner.workspace }}/vllm-xpu/qa_ci_code/benchmarks/results/
+
+    - name: Upload benchmark results
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        name: benchmark-results
+        path: ${{ runner.workspace }}/vllm-xpu/qa_ci_code/benchmarks/results/
+
+    - name: Analyze and validate benchmark results
+      if: always()
+      run: |
+        RESULTS_FILE="$HOME/actions-runner/_work/vllm-xpu/vllm-xpu/qa_ci_code/benchmarks/results/benchmark_analysis_final.json"
+        if [ ! -f "$RESULTS_FILE" ]; then
+          echo "❌ Benchmark analysis file not found!"
+          exit 1
+        fi
+        
+        echo "📊 Benchmark Results:"
+        cat "$RESULTS_FILE"
+        FAILURES=$(jq -r '.[] | select(.function != "pass") | .case_name' "$RESULTS_FILE")
+        
+        if [ -n "$FAILURES" ]; then
+          echo "❌ Failed cases detected:"
+          echo "$FAILURES"
+          exit 1
+        else
+          echo "✅ All benchmarks passed"
+        fi
diff --git a/requirements/xpu.txt b/requirements/xpu.txt
@@ -15,4 +15,4 @@ torchaudio
 torchvision
 --extra-index-url=https://download.pytorch.org/whl/xpu
 
-intel-extension-for-pytorch @ https://intel-extension-for-pytorch.s3.us-east-1.amazonaws.com/ipex_dev/xpu/intel_extension_for_pytorch-2.8.10.post1%2Bxpu-cp312-cp312-linux_x86_64.whl
+vllm-xpu-kernels @ https://ubit-artifactory-ba.intel.com/artifactory/aipc_releases-ba-local/gpu/new/validation/IPEX/nightly/PVC/UBUNTU/VLLM_nightly/vllm_kernel/20251014/e50cca090e/vllm_xpu_kernels-0.0.1-cp312-cp312-linux_x86_64.whl