diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index 1a794af572fef..9a793fffade82 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -25,10 +25,10 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
+        pip install ruff==0.5.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
     - name: Analysing the code with ruff
       run: |
-        ruff .
+        ruff check .
     - name: Spelling check with codespell
       run: |
         codespell --toml pyproject.toml
diff --git a/format.sh b/format.sh
index a8fd95a1ea445..07ea45c22efb0 100755
--- a/format.sh
+++ b/format.sh
@@ -161,7 +161,7 @@ echo 'vLLM codespell: Done'
 
 # Lint specified files
 lint() {
-    ruff "$@"
+    ruff check "$@"
 }
 
 # Lint files that differ from main branch. Ignores dirs that are not slated
@@ -177,7 +177,7 @@ lint_changed() {
 
     if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
         git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
-             ruff
+             ruff check
     fi
 
 }
diff --git a/requirements-lint.txt b/requirements-lint.txt
index bd34227d3e824..e16767403299c 100644
--- a/requirements-lint.txt
+++ b/requirements-lint.txt
@@ -2,7 +2,7 @@
 yapf==0.32.0
 toml==0.10.2
 tomli==2.0.1
-ruff==0.1.5
+ruff==0.5.5
 codespell==2.3.0
 isort==5.13.2
 clang-format==18.1.5
diff --git a/tests/conftest.py b/tests/conftest.py
index c7a349f1e9e2a..3212d4307d50e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -98,11 +98,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
     This can provide a ~10x speedup for non-GPU unit tests since they don't need
     to initialize torch.
     """
-
-    if request.node.get_closest_marker("skip_global_cleanup"):
-        return False
-
-    return True
+    return request.node.get_closest_marker("skip_global_cleanup")
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py
index 0bcae5b0c96dc..1c03c1515b091 100644
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -64,11 +64,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
     This can provide a ~10x speedup for non-GPU unit tests since they don't need
     to initialize torch.
     """
-
-    if request.node.get_closest_marker("skip_global_cleanup"):
-        return False
-
-    return True
+    return request.node.get_closest_marker("skip_global_cleanup")
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/spec_decode/e2e/conftest.py b/tests/spec_decode/e2e/conftest.py
index f9f246436c0f7..b46df7108f9cb 100644
--- a/tests/spec_decode/e2e/conftest.py
+++ b/tests/spec_decode/e2e/conftest.py
@@ -165,7 +165,7 @@ def create_llm_generator(baseline_or_test, request, common_llm_kwargs,
     test_name = request.node.name
 
     model = kwargs["model"]
-    draft_model = kwargs.get("speculative_model", None)
+    draft_model = kwargs.get("speculative_model")
     same_draft_target_model = (draft_model is not None
                                and draft_model == model)
 
diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py
index fe413d1228021..3576a4834ebc3 100644
--- a/tests/test_cache_block_hashing.py
+++ b/tests/test_cache_block_hashing.py
@@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
 
             hashes.append([])
             prompts = [prefix + prompt for prompt in sample_prompts]
-            seq_id = 0
-            for prompt in prompts:
+            for seq_id, prompt in enumerate(prompts):
                 hashes[-1].append([])
                 prompt_token_ids = tokenizer.encode(prompt)
                 seq = Sequence(seq_id,
@@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
                 for idx in range(num_blocks):
                     hashes[-1][-1].append(seq.hash_of_block(idx))
 
-                seq_id += 1
-
     # Check that hashes made with two prefixes with different first blocks are
     # different everywhere.
     for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):
diff --git a/tests/test_logger.py b/tests/test_logger.py
index 52aa73761fd68..7004c64fd5fac 100644
--- a/tests/test_logger.py
+++ b/tests/test_logger.py
@@ -110,7 +110,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
     configuration occurs."""
     with pytest.raises(RuntimeError) as ex_info:
         _configure_vllm_root_logger()
-    assert ex_info.type == RuntimeError
+    assert ex_info.type is RuntimeError
     assert "File does not exist" in str(ex_info)
 
 
@@ -151,7 +151,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
                    logging_config_file.name):
             with pytest.raises(ValueError) as ex_info:
                 _configure_vllm_root_logger()
-            assert ex_info.type == ValueError
+            assert ex_info.type is ValueError
             assert "Invalid logging config. Expected Dict, got" in str(ex_info)
 
 
diff --git a/tests/worker/test_model_runner.py b/tests/worker/test_model_runner.py
index 4a0e2b4184936..3c23ce261461c 100644
--- a/tests/worker/test_model_runner.py
+++ b/tests/worker/test_model_runner.py
@@ -235,11 +235,6 @@ def test_prepare_decode_cuda_graph(batch_size):
     torch.allclose(input_tokens, input_positions)
 
     # Verify Sampling
-    expected_selected_token_indices = []
-    selected_token_start_idx = 0
-    for _ in context_lens:
-        expected_selected_token_indices.append(selected_token_start_idx)
-        selected_token_start_idx += 1
     sampling_metadata = SamplingMetadata.prepare(
         seq_group_metadata_list,
         seq_lens,
@@ -248,7 +243,7 @@ def test_prepare_decode_cuda_graph(batch_size):
         device=model_runner.device,
         pin_memory=model_runner.pin_memory)
     actual = sampling_metadata.selected_token_indices
-    expected = torch.tensor(expected_selected_token_indices,
+    expected = torch.arange(len(context_lens),
                             device=actual.device,
                             dtype=actual.dtype)
     torch.testing.assert_close(actual, expected)
diff --git a/vllm/adapter_commons/utils.py b/vllm/adapter_commons/utils.py
index 6c5411f7d3d5c..1e9adca50093b 100644
--- a/vllm/adapter_commons/utils.py
+++ b/vllm/adapter_commons/utils.py
@@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]:
 
 def get_adapter(adapter_id: int,
                 registered_adapters: Dict[int, Any]) -> Optional[Any]:
-    return registered_adapters.get(adapter_id, None)
+    return registered_adapters.get(adapter_id)
 
 
 ## worker functions
diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py
index f7cb2ee996501..0b2c065a84e32 100644
--- a/vllm/attention/backends/utils.py
+++ b/vllm/attention/backends/utils.py
@@ -23,10 +23,9 @@ def is_block_tables_empty(block_tables: Union[None, Dict]):
     """
     if block_tables is None:
         return True
-    if isinstance(block_tables, dict) and all(
-            value is None for value in block_tables.values()):
-        return True
-    return False
+
+    return isinstance(block_tables, dict) and all(
+        value is None for value in block_tables.values())
 
 
 def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int,
diff --git a/vllm/core/block/prefix_caching_block.py b/vllm/core/block/prefix_caching_block.py
index d102ad4045591..08b381f352b0b 100644
--- a/vllm/core/block/prefix_caching_block.py
+++ b/vllm/core/block/prefix_caching_block.py
@@ -406,9 +406,7 @@ def all_block_ids(self) -> FrozenSet[int]:
 
     def is_block_cached(self, block: Block) -> bool:
         assert block.content_hash is not None
-        if block.content_hash in self._cached_blocks:
-            return True
-        return False
+        return block.content_hash in self._cached_blocks
 
     def promote_to_immutable_block(self, block: Block) -> BlockId:
         """Once a mutable block is full, it can be promoted to an immutable
diff --git a/vllm/core/block_manager_v2.py b/vllm/core/block_manager_v2.py
index b48ea1b19b82a..ced23c036c4dc 100644
--- a/vllm/core/block_manager_v2.py
+++ b/vllm/core/block_manager_v2.py
@@ -394,9 +394,7 @@ def can_swap_out(self, seq_group: SequenceGroup) -> bool:
         """
         alloc_status = self._can_swap(seq_group, Device.CPU,
                                       SequenceStatus.RUNNING)
-        if alloc_status == AllocStatus.OK:
-            return True
-        return False
+        return alloc_status == AllocStatus.OK
 
     def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
         """Returns the block id mapping (from GPU to CPU) generated by
diff --git a/vllm/model_executor/guided_decoding/outlines_logits_processors.py b/vllm/model_executor/guided_decoding/outlines_logits_processors.py
index 554dcc0ed43ed..be80d901d899d 100644
--- a/vllm/model_executor/guided_decoding/outlines_logits_processors.py
+++ b/vllm/model_executor/guided_decoding/outlines_logits_processors.py
@@ -67,9 +67,9 @@ def __call__(self, input_ids: List[int],
         instruction = self._guide.get_next_instruction(
             state=self._fsm_state[seq_id])
 
-        if type(instruction) == Generate:
+        if isinstance(instruction, Generate):
             allowed_tokens = instruction.tokens
-        elif type(instruction) == Write:
+        elif isinstance(instruction, Write):
             # TODO: support fast forward tokens
             allowed_tokens = [instruction.tokens[0]]
         else:
diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py
index 2cc080608c7a9..144014f397672 100644
--- a/vllm/model_executor/layers/quantization/awq_marlin.py
+++ b/vllm/model_executor/layers/quantization/awq_marlin.py
@@ -110,9 +110,9 @@ def get_scaled_act_names(self) -> List[str]:
     def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]):
         # Extract data from quant config.
         quant_method = quant_config.get("quant_method", "").lower()
-        num_bits = quant_config.get("bits", None)
-        group_size = quant_config.get("group_size", None)
-        has_zp = quant_config.get("zero_point", None)
+        num_bits = quant_config.get("bits")
+        group_size = quant_config.get("group_size")
+        has_zp = quant_config.get("zero_point")
 
         if quant_method != "awq":
             return False
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
index 39d00bd5733ff..ed73b00ff021c 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import torch
 from pydantic import BaseModel
@@ -25,7 +25,7 @@ class CompressedTensorsConfig(QuantizationConfig):
     def __init__(self,
                  target_scheme_map: Dict[str, Any],
                  ignore: List[str],
-                 quant_format: str,
+                 quant_format: Optional[str],
                  kv_cache_scheme: Optional[Dict[str, Any]] = None):
 
         self.ignore = ignore
@@ -67,8 +67,8 @@ def get_quant_method(
     @classmethod
     def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig":
         target_scheme_map: Dict[str, Any] = dict()
-        ignore: List[str] = config.get("ignore", None)
-        quant_format: str = config.get("format", None)
+        ignore: List[str] = config.get("ignore", [])
+        quant_format: Union[str, None] = config.get("format")
 
         # The quant_config has multiple config_groups, each containing
         # an input_activations key with details about how the activations are
@@ -169,7 +169,8 @@ def _is_fp8_w8a8(self, weight_quant: BaseModel,
         is_symmetric_activation = input_quant.symmetric
         is_per_tensor_activation = (
             input_quant.strategy == QuantizationStrategy.TENSOR)
-        if not (is_symmetric_activation and is_per_tensor_activation):
+        if not (is_symmetric_activation  # noqa: SIM103
+                and is_per_tensor_activation):
             return False
 
         # All conditions satisfied.
@@ -191,7 +192,7 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel,
         is_per_tensor_or_channel_weight = (weight_quant.strategy in [
             QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL
         ])
-        if not (is_symmetric_weight and is_static_weight
+        if not (is_symmetric_weight and is_static_weight  # noqa: SIM103
                 and is_per_tensor_or_channel_weight):
             return False
 
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
index 7912cbde5721f..a9559179460a9 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py
@@ -80,7 +80,7 @@ class QuantizationArgs(BaseModel):
     )
 
 
-def is_activation_quantization_format(format: str) -> bool:
+def is_activation_quantization_format(format: Optional[str]) -> bool:
     _ACTIVATION_QUANTIZATION_FORMATS = [
         CompressionFormat.naive_quantized.value,
         CompressionFormat.int_quantized.value,
diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py
index 4a11b14971076..45c572c3c3e47 100644
--- a/vllm/model_executor/layers/quantization/gptq_marlin.py
+++ b/vllm/model_executor/layers/quantization/gptq_marlin.py
@@ -119,10 +119,10 @@ def get_scaled_act_names(self) -> List[str]:
     def is_gptq_marlin_compatible(cls, quant_config: Dict[str, Any]):
         # Extract data from quant config.
         quant_method = quant_config.get("quant_method", "").lower()
-        num_bits = quant_config.get("bits", None)
-        group_size = quant_config.get("group_size", None)
-        sym = quant_config.get("sym", None)
-        desc_act = quant_config.get("desc_act", None)
+        num_bits = quant_config.get("bits")
+        group_size = quant_config.get("group_size")
+        sym = quant_config.get("sym")
+        desc_act = quant_config.get("desc_act")
 
         if quant_method != "gptq":
             return False
diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py
index b009ad8c882d4..2025e1fdeef81 100644
--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -401,9 +401,8 @@ def is_vllm_tensorized(tensorizer_config: "TensorizerConfig") -> bool:
             "inferred as vLLM models, so setting vllm_tensorized=True is "
             "only necessary for models serialized prior to this change.")
         return True
-    if (".vllm_tensorized_marker" in deserializer):
-        return True
-    return False
+
+    return ".vllm_tensorized_marker" in deserializer
 
 
 def serialize_vllm_model(
diff --git a/vllm/spec_decode/draft_model_runner.py b/vllm/spec_decode/draft_model_runner.py
index b76a1ab4cf243..03a6af7ddf0f6 100644
--- a/vllm/spec_decode/draft_model_runner.py
+++ b/vllm/spec_decode/draft_model_runner.py
@@ -241,10 +241,7 @@ def supports_gpu_multi_step(self, execute_model_req: ExecuteModelRequest):
             return False
 
         # TODO: Add soft-tuning prompt adapter support
-        if self.prompt_adapter_config:
-            return False
-
-        return True
+        return self.prompt_adapter_config
 
     @torch.inference_mode()
     def execute_model(
diff --git a/vllm/spec_decode/metrics.py b/vllm/spec_decode/metrics.py
index 9036d117041f0..e6be820fecf43 100644
--- a/vllm/spec_decode/metrics.py
+++ b/vllm/spec_decode/metrics.py
@@ -102,10 +102,8 @@ def _should_collect_rejsample_metrics(self, now: float) -> bool:
         if self._rank != 0:
             return False
 
-        if (now - self._last_metrics_collect_time <
-                self._rejsample_metrics_collect_interval_s):
-            return False
-        return True
+        return (now - self._last_metrics_collect_time <
+                self._rejsample_metrics_collect_interval_s)
 
     def _copy_rejsample_metrics_async(self) -> torch.cuda.Event:
         """Copy rejection/typical-acceptance sampling metrics 
diff --git a/vllm/triton_utils/libentry.py b/vllm/triton_utils/libentry.py
index ae00af44a048a..3c3a8492a4de5 100644
--- a/vllm/triton_utils/libentry.py
+++ b/vllm/triton_utils/libentry.py
@@ -35,9 +35,9 @@ def key(self, spec_args, dns_args, const_args):
         dns_key = [
             arg.dtype if hasattr(
                 arg, "data_ptr") else type(arg) if not isinstance(arg, int)
-            else "i32" if -(2**31) <= arg and arg <= 2**31 -
-            1 else "u64" if 2**63 <= arg and arg <= 2**64 - 1 else "i64"
-            for arg in dns_args
+            else "i32" if -(2**31) <= arg and arg <= 2**31 - 1  # noqa: SIM300
+            else "u64" if 2**63 <= arg and arg <= 2**64 -  # noqa: SIM300
+            1 else "i64" for arg in dns_args
         ]
         # const args passed by position
         return tuple(spec_key + dns_key + const_args)