Incorporate the new kernel changes by not to specify the vmem_limit

vanbasten23 · vanbasten23 · commit e1be425f89ee · 2025-07-14T04:35:23.000Z
Signed-off-by: Xiongfei Wei &lt;isaacwxf23@gmail.com&gt;
diff --git a/requirements/tpu.txt b/requirements/tpu.txt
@@ -18,9 +18,9 @@ setuptools==78.1.0
 --find-links https://storage.googleapis.com/libtpu-releases/index.html
 --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html
 --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html
-torch==2.9.0.dev20250710
-torchvision==0.24.0.dev20250710
-torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250710-cp39-cp39-linux_x86_64.whl ; python_version == "3.9"
-torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250710-cp310-cp310-linux_x86_64.whl ; python_version == "3.10"
-torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.8.0.dev20250710-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
+torch==2.9.0.dev20250711
+torchvision==0.24.0.dev20250711
+torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250711-cp39-cp39-linux_x86_64.whl ; python_version == "3.9"
+torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250711-cp310-cp310-linux_x86_64.whl ; python_version == "3.10"
+torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250711-cp311-cp311-linux_x86_64.whl ; python_version == "3.11"
 
diff --git a/vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py b/vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py
@@ -97,7 +97,6 @@ def apply_weights(self,
             w_q,
             w_s,
             quantize_activation=True,
-            vmem_limit_bytes=96 * 1024 * 1024,
         )
 
         # Explicitly capture control flow to make dynamo happy.

Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,6 @@ def apply_weights(self,`
`97`	`97`	`w_q,`
`98`	`98`	`w_s,`
`99`	`99`	`quantize_activation=True,`
`100`		`- vmem_limit_bytes=96 * 1024 * 1024,`
`101`	`100`	`)`
`102`	`101`
`103`	`102`	`# Explicitly capture control flow to make dynamo happy.`