vllm-project · WoosukKwon · Jun 22, 2024 · Jun 18, 2024 · Jun 19, 2024 · Jun 20, 2024
diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2
@@ -42,12 +42,18 @@ steps:
     command: bash .buildkite/run-neuron-test.sh
     soft_fail: false
 
-  - label: "Intel Test"
+  - label: "Intel CPU Test"
     depends_on: ~
     agents:
-      queue: intel
+      queue: intel-cpu
     command: bash .buildkite/run-cpu-test.sh
 
+  - label: "Intel GPU Test"
+    depends_on: ~
+    agents:
+      queue: intel-gpu
+    command: bash .buildkite/run-xpu-test.sh
+
   {% for step in steps %}
   - label: "{{ step.label }}"
     agents:

diff --git a/README.md b/README.md
@@ -59,7 +59,7 @@ vLLM is flexible and easy to use with:
 - Tensor parallelism support for distributed inference
 - Streaming outputs
 - OpenAI-compatible API server
-- Support NVIDIA GPUs, AMD GPUs, and Intel CPUs
+- Support NVIDIA GPUs, AMD GPUs, Intel CPUs and GPUs
 - (Experimental) Prefix caching support
 - (Experimental) Multi-lora support