fix lint

TmacAaron · TmacAaron · commit ce53d123379d · 2025-11-29T03:59:57.000Z
Signed-off-by: yyt &lt;yangyit139@gmail.com&gt;
diff --git a/tests/ut/quantization/test_w8a16.py b/tests/ut/quantization/test_w8a16.py
@@ -1,4 +1,3 @@
-import unittest
 from unittest.mock import MagicMock, patch
 
 import torch
@@ -38,7 +37,8 @@ def test_apply_with_x_is_int8(self, mock_npu_weight_quant_batchmatmul):
     @patch('vllm_ascend.utils.get_ascend_device_type',
            return_value=AscendDeviceType._310P)
     @patch("torch_npu.npu_weight_quant_batchmatmul")
-    def test_apply_with_x_is_310p(self, mock_npu_weight_quant_batchmatmul, mock_soc_version):
+    def test_apply_with_x_is_310p(self, mock_npu_weight_quant_batchmatmul,
+                                  mock_soc_version):
         layer = MagicMock()
         layer.weight.data = torch.randn(128, 256)
         layer.weight_scale.data = torch.randn(128, 1)
@@ -87,4 +87,4 @@ def test_process_weights_after_loading_nz(self, mock_npu_format_cast,
 
         self.assertEqual(layer.weight_scale.data.shape, (128, ))
         self.assertEqual(layer.weight_offset.data.shape, (128, ))
-        mock_npu_format_cast.assert_called_once()
+        mock_npu_format_cast.assert_called_once()
diff --git a/vllm_ascend/quantization/w8a16.py b/vllm_ascend/quantization/w8a16.py
@@ -19,7 +19,9 @@
 
 import torch
 import torch_npu
-from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, AscendDeviceType, get_ascend_device_type, is_enable_nz
+
+from vllm_ascend.utils import (ACL_FORMAT_FRACTAL_NZ, AscendDeviceType,
+                               get_ascend_device_type, is_enable_nz)
 
 
 class AscendW8A16LinearMethod:
@@ -29,7 +31,8 @@ class AscendW8A16LinearMethod:
 
     def __init__(self) -> None:
         # aclnn quant matmul requires to transpose matrix B, set to true by default.
-        self.transpose_weight = get_ascend_device_type() != AscendDeviceType._310P
+        self.transpose_weight = get_ascend_device_type(
+        ) != AscendDeviceType._310P
 
     @staticmethod
     def get_weight(
@@ -82,16 +85,14 @@ def apply(
                 weight=layer.weight.data.transpose(0, 1),
                 antiquant_scale=layer.weight_scale,
                 antiquant_offset=layer.weight_offset,
-                bias=bias
-            )
+                bias=bias)
         else:
             output = torch_npu.npu_weight_quant_batchmatmul(
                 x=x,
                 weight=layer.weight,
                 antiquant_scale=layer.weight_scale,
                 antiquant_offset=layer.weight_offset,
-                bias=bias
-            )
+                bias=bias)
         return output
 
     def process_weights_after_loading(self, layer):