chore: ignore SIM rules

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
vllm-project · Sep 13, 2024 · c1cc0ef · c1cc0ef
1 parent 2dadfea
commit c1cc0ef
Show file tree

Hide file tree

Showing 8 changed files with 18 additions and 20 deletions.
diff --git a/format.sh b/format.sh
@@ -159,7 +159,7 @@ echo 'vLLM codespell: Done'
 
 # Lint specified files
 lint() {
-    ruff "$@"
+    ruff check "$@"
 }
 
 # Lint files that differ from main branch. Ignores dirs that are not slated
@@ -175,7 +175,7 @@ lint_changed() {
 
     if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
         git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             ruff
+             ruff check
     fi
 
 }

diff --git a/pyproject.toml b/pyproject.toml
@@ -12,8 +12,8 @@ requires = [
 build-backend = "setuptools.build_meta"
 
 [tool.ruff]
-# Allow lines to be as long as 80.
-line-length = 80
+# Allow lines to be as long as 119
+line-length = 119
 exclude = [
     # External file, leaving license intact
     "examples/fp8/quantizer/quantize.py"
@@ -26,11 +26,9 @@ select = [
     # Pyflakes
     "F",
     # pyupgrade
-    "UP",
+    # "UP",
     # flake8-bugbear
     "B",
-    # flake8-simplify
-    "SIM",
     # isort
     # "I",
     "G",

diff --git a/tests/multimodal/test_base.py b/tests/multimodal/test_base.py
@@ -5,7 +5,7 @@
 
 def assert_nested_tensors_equal(expected: NestedTensors,
                                 actual: NestedTensors):
-    assert type(expected) == type(actual)
+    assert isinstance(expected, actual)
     if isinstance(expected, torch.Tensor):
         assert torch.equal(expected, actual)
     else:

diff --git a/tests/test_logger.py b/tests/test_logger.py
@@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
     configuration occurs."""
     with pytest.raises(RuntimeError) as ex_info:
         _configure_vllm_root_logger()
-    assert ex_info.type == RuntimeError
+    assert isinstance(ex_info.type, RuntimeError)
     assert "File does not exist" in str(ex_info)
 
 
@@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
                    logging_config_file.name):
             with pytest.raises(ValueError) as ex_info:
                 _configure_vllm_root_logger()
-            assert ex_info.type == ValueError
+            assert isinstance(ex_info.type, ValueError)
             assert "Invalid logging config. Expected Dict, got" in str(ex_info)
 
 

diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py
@@ -126,7 +126,7 @@ def _attn_fwd_inner(
         # We start from end of seqlen_k so only the first iteration would need
         # to be checked for padding if it is not a multiple of block_n
         # TODO: This can be optimized to only be true for the padded block.
-        if MASK_STEPS:  # noqa: SIM102
+        if MASK_STEPS:
             # If this is the last block / iteration, we want to
             # mask if the sequence length is not a multiple of block size
             # a solution is to always do BLOCK_M // BLOCK_N + 1 steps
@@ -621,7 +621,7 @@ def attn_fwd(
     start_m_idx = start_m * BLOCK_M
     causal_start_idx = seqlen_q - seqlen_k
     acc = acc.to(Out.type.element_ty)
-    if IS_CAUSAL:  # noqa: SIM102
+    if IS_CAUSAL:
         if causal_start_idx > start_m_idx and causal_start_idx < end_m_idx:
             out_mask_boundary = tl.full((BLOCK_DMODEL, ),
                                         causal_start_idx,

diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
@@ -806,7 +806,7 @@ async def generate(
             request_id: The unique id of the request.
             lora_request: LoRA request to use for generation, if any.
             trace_headers: OpenTelemetry trace headers.
-            prompt_adapter_request: Prompt Adapter request to use 
+            prompt_adapter_request: Prompt Adapter request to use
                                             for generation, if any.
 
         Yields:
@@ -1022,15 +1022,15 @@ def remove_logger(self, logger_name: str) -> None:
     async def start_profile(self) -> None:
         # using type instead of isinstance to check to avoid capturing
         # inherited classes
-        if type(self.engine.model_executor) == GPUExecutorAsync:
+        if isinstance(self.engine.model_executor, GPUExecutorAsync):
             self.engine.model_executor.start_profile()
         else:
             self.engine.model_executor._run_workers("start_profile")
 
     async def stop_profile(self) -> None:
         # using type instead of isinstance to check to avoid capturing
         # inherited classes
-        if type(self.engine.model_executor) == GPUExecutorAsync:
+        if isinstance(self.engine.model_executor, GPUExecutorAsync):
             self.engine.model_executor.stop_profile()
         else:
             self.engine.model_executor._run_workers("stop_profile")
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
@@ -144,7 +144,7 @@ class LLMEngine:
             decoding.
         executor_class: The model executor class for managing distributed
             execution.
-        prompt_adapter_config (Optional): The configuration related to serving 
+        prompt_adapter_config (Optional): The configuration related to serving
             prompt adapters.
         log_stats: Whether to log statistics.
         usage_context: Specified entry point, used for usage info collection.
@@ -1600,15 +1600,15 @@ def check_health(self) -> None:
     def start_profile(self) -> None:
         # using type instead of isinstance to check to avoid capturing
         # inherited classes (MultiprocessingGPUExecutor)
-        if type(self.model_executor) == GPUExecutor:
+        if isinstance(self.model_executor, GPUExecutor):
             self.model_executor.start_profile()
         else:
             self.model_executor._run_workers("start_profile")
 
     def stop_profile(self) -> None:
         # using type instead of isinstance to check to avoid capturing
         # inherited classes (MultiprocessingGPUExecutor)
-        if type(self.model_executor) == GPUExecutor:
+        if isinstance(self.model_executor, GPUExecutor):
             self.model_executor.stop_profile()
         else:
             self.model_executor._run_workers("stop_profile")

diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py
@@ -67,9 +67,9 @@ def __call__(self, input_ids: List[int],
         instruction = self._guide.get_next_instruction(
             state=self._fsm_state[seq_id])
 
-        if type(instruction) == Generate:
+        if isinstance(instruction, Generate):
             allowed_tokens = instruction.tokens
-        elif type(instruction) == Write:
+        elif isinstance(instruction, Write):
             # TODO: support fast forward tokens
             allowed_tokens = [instruction.tokens[0]]
         else: