Skip to content

Commit

Permalink
[CI/Build] bump ruff version, fix linting issues
Browse files Browse the repository at this point in the history
  • Loading branch information
dtrifiro committed Aug 6, 2024
1 parent 1f26efb commit 6be352d
Show file tree
Hide file tree
Showing 22 changed files with 43 additions and 69 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
pip install ruff==0.5.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
- name: Analysing the code with ruff
run: |
ruff .
ruff check .
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
Expand Down
4 changes: 2 additions & 2 deletions format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ echo 'vLLM codespell: Done'

# Lint specified files
lint() {
ruff "$@"
ruff check "$@"
}

# Lint files that differ from main branch. Ignores dirs that are not slated
Expand All @@ -177,7 +177,7 @@ lint_changed() {

if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
ruff
ruff check
fi

}
Expand Down
2 changes: 1 addition & 1 deletion requirements-lint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
yapf==0.32.0
toml==0.10.2
tomli==2.0.1
ruff==0.1.5
ruff==0.5.5
codespell==2.3.0
isort==5.13.2
clang-format==18.1.5
Expand Down
6 changes: 1 addition & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
This can provide a ~10x speedup for non-GPU unit tests since they don't need
to initialize torch.
"""

if request.node.get_closest_marker("skip_global_cleanup"):
return False

return True
return request.node.get_closest_marker("skip_global_cleanup")


@pytest.fixture(autouse=True)
Expand Down
6 changes: 1 addition & 5 deletions tests/lora/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
This can provide a ~10x speedup for non-GPU unit tests since they don't need
to initialize torch.
"""

if request.node.get_closest_marker("skip_global_cleanup"):
return False

return True
return request.node.get_closest_marker("skip_global_cleanup")


@pytest.fixture(autouse=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/spec_decode/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def create_llm_generator(baseline_or_test, request, common_llm_kwargs,
test_name = request.node.name

model = kwargs["model"]
draft_model = kwargs.get("speculative_model", None)
draft_model = kwargs.get("speculative_model")
same_draft_target_model = (draft_model is not None
and draft_model == model)

Expand Down
5 changes: 1 addition & 4 deletions tests/test_cache_block_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,

hashes.append([])
prompts = [prefix + prompt for prompt in sample_prompts]
seq_id = 0
for prompt in prompts:
for seq_id, prompt in enumerate(prompts):
hashes[-1].append([])
prompt_token_ids = tokenizer.encode(prompt)
seq = Sequence(seq_id,
Expand All @@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
for idx in range(num_blocks):
hashes[-1][-1].append(seq.hash_of_block(idx))

seq_id += 1

# Check that hashes made with two prefixes with different first blocks are
# different everywhere.
for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
configuration occurs."""
with pytest.raises(RuntimeError) as ex_info:
_configure_vllm_root_logger()
assert ex_info.type == RuntimeError
assert ex_info.type is RuntimeError
assert "File does not exist" in str(ex_info)


Expand Down Expand Up @@ -151,7 +151,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
logging_config_file.name):
with pytest.raises(ValueError) as ex_info:
_configure_vllm_root_logger()
assert ex_info.type == ValueError
assert ex_info.type is ValueError
assert "Invalid logging config. Expected Dict, got" in str(ex_info)


Expand Down
7 changes: 1 addition & 6 deletions tests/worker/test_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,6 @@ def test_prepare_decode_cuda_graph(batch_size):
torch.allclose(input_tokens, input_positions)

# Verify Sampling
expected_selected_token_indices = []
selected_token_start_idx = 0
for _ in context_lens:
expected_selected_token_indices.append(selected_token_start_idx)
selected_token_start_idx += 1
sampling_metadata = SamplingMetadata.prepare(
seq_group_metadata_list,
seq_lens,
Expand All @@ -248,7 +243,7 @@ def test_prepare_decode_cuda_graph(batch_size):
device=model_runner.device,
pin_memory=model_runner.pin_memory)
actual = sampling_metadata.selected_token_indices
expected = torch.tensor(expected_selected_token_indices,
expected = torch.arange(len(context_lens),
device=actual.device,
dtype=actual.dtype)
torch.testing.assert_close(actual, expected)
Expand Down
2 changes: 1 addition & 1 deletion vllm/adapter_commons/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]:

def get_adapter(adapter_id: int,
registered_adapters: Dict[int, Any]) -> Optional[Any]:
return registered_adapters.get(adapter_id, None)
return registered_adapters.get(adapter_id)


## worker functions
Expand Down
7 changes: 3 additions & 4 deletions vllm/attention/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ def is_block_tables_empty(block_tables: Union[None, Dict]):
"""
if block_tables is None:
return True
if isinstance(block_tables, dict) and all(
value is None for value in block_tables.values()):
return True
return False

return isinstance(block_tables, dict) and all(
value is None for value in block_tables.values())


def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int,
Expand Down
4 changes: 1 addition & 3 deletions vllm/core/block/prefix_caching_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,9 +406,7 @@ def all_block_ids(self) -> FrozenSet[int]:

def is_block_cached(self, block: Block) -> bool:
assert block.content_hash is not None
if block.content_hash in self._cached_blocks:
return True
return False
return block.content_hash in self._cached_blocks

def promote_to_immutable_block(self, block: Block) -> BlockId:
"""Once a mutable block is full, it can be promoted to an immutable
Expand Down
4 changes: 1 addition & 3 deletions vllm/core/block_manager_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,9 +394,7 @@ def can_swap_out(self, seq_group: SequenceGroup) -> bool:
"""
alloc_status = self._can_swap(seq_group, Device.CPU,
SequenceStatus.RUNNING)
if alloc_status == AllocStatus.OK:
return True
return False
return alloc_status == AllocStatus.OK

def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
"""Returns the block id mapping (from GPU to CPU) generated by
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def __call__(self, input_ids: List[int],
instruction = self._guide.get_next_instruction(
state=self._fsm_state[seq_id])

if type(instruction) == Generate:
if isinstance(instruction, Generate):
allowed_tokens = instruction.tokens
elif type(instruction) == Write:
elif isinstance(instruction, Write):
# TODO: support fast forward tokens
allowed_tokens = [instruction.tokens[0]]
else:
Expand Down
6 changes: 3 additions & 3 deletions vllm/model_executor/layers/quantization/awq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ def get_scaled_act_names(self) -> List[str]:
def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]):
# Extract data from quant config.
quant_method = quant_config.get("quant_method", "").lower()
num_bits = quant_config.get("bits", None)
group_size = quant_config.get("group_size", None)
has_zp = quant_config.get("zero_point", None)
num_bits = quant_config.get("bits")
group_size = quant_config.get("group_size")
has_zp = quant_config.get("zero_point")

if quant_method != "awq":
return False
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union

import torch
from pydantic import BaseModel
Expand All @@ -25,7 +25,7 @@ class CompressedTensorsConfig(QuantizationConfig):
def __init__(self,
target_scheme_map: Dict[str, Any],
ignore: List[str],
quant_format: str,
quant_format: Optional[str],
kv_cache_scheme: Optional[Dict[str, Any]] = None):

self.ignore = ignore
Expand Down Expand Up @@ -67,8 +67,8 @@ def get_quant_method(
@classmethod
def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig":
target_scheme_map: Dict[str, Any] = dict()
ignore: List[str] = config.get("ignore", None)
quant_format: str = config.get("format", None)
ignore: List[str] = config.get("ignore", [])
quant_format: Union[str, None] = config.get("format")

# The quant_config has multiple config_groups, each containing
# an input_activations key with details about how the activations are
Expand Down Expand Up @@ -169,7 +169,8 @@ def _is_fp8_w8a8(self, weight_quant: BaseModel,
is_symmetric_activation = input_quant.symmetric
is_per_tensor_activation = (
input_quant.strategy == QuantizationStrategy.TENSOR)
if not (is_symmetric_activation and is_per_tensor_activation):
if not (is_symmetric_activation # noqa: SIM103
and is_per_tensor_activation):
return False

# All conditions satisfied.
Expand All @@ -191,7 +192,7 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel,
is_per_tensor_or_channel_weight = (weight_quant.strategy in [
QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL
])
if not (is_symmetric_weight and is_static_weight
if not (is_symmetric_weight and is_static_weight # noqa: SIM103
and is_per_tensor_or_channel_weight):
return False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class QuantizationArgs(BaseModel):
)


def is_activation_quantization_format(format: str) -> bool:
def is_activation_quantization_format(format: Optional[str]) -> bool:
_ACTIVATION_QUANTIZATION_FORMATS = [
CompressionFormat.naive_quantized.value,
CompressionFormat.int_quantized.value,
Expand Down
8 changes: 4 additions & 4 deletions vllm/model_executor/layers/quantization/gptq_marlin.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ def get_scaled_act_names(self) -> List[str]:
def is_gptq_marlin_compatible(cls, quant_config: Dict[str, Any]):
# Extract data from quant config.
quant_method = quant_config.get("quant_method", "").lower()
num_bits = quant_config.get("bits", None)
group_size = quant_config.get("group_size", None)
sym = quant_config.get("sym", None)
desc_act = quant_config.get("desc_act", None)
num_bits = quant_config.get("bits")
group_size = quant_config.get("group_size")
sym = quant_config.get("sym")
desc_act = quant_config.get("desc_act")

if quant_method != "gptq":
return False
Expand Down
5 changes: 2 additions & 3 deletions vllm/model_executor/model_loader/tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,8 @@ def is_vllm_tensorized(tensorizer_config: "TensorizerConfig") -> bool:
"inferred as vLLM models, so setting vllm_tensorized=True is "
"only necessary for models serialized prior to this change.")
return True
if (".vllm_tensorized_marker" in deserializer):
return True
return False

return ".vllm_tensorized_marker" in deserializer


def serialize_vllm_model(
Expand Down
5 changes: 1 addition & 4 deletions vllm/spec_decode/draft_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,7 @@ def supports_gpu_multi_step(self, execute_model_req: ExecuteModelRequest):
return False

# TODO: Add soft-tuning prompt adapter support
if self.prompt_adapter_config:
return False

return True
return self.prompt_adapter_config

@torch.inference_mode()
def execute_model(
Expand Down
6 changes: 2 additions & 4 deletions vllm/spec_decode/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,8 @@ def _should_collect_rejsample_metrics(self, now: float) -> bool:
if self._rank != 0:
return False

if (now - self._last_metrics_collect_time <
self._rejsample_metrics_collect_interval_s):
return False
return True
return (now - self._last_metrics_collect_time <
self._rejsample_metrics_collect_interval_s)

def _copy_rejsample_metrics_async(self) -> torch.cuda.Event:
"""Copy rejection/typical-acceptance sampling metrics
Expand Down
6 changes: 3 additions & 3 deletions vllm/triton_utils/libentry.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def key(self, spec_args, dns_args, const_args):
dns_key = [
arg.dtype if hasattr(
arg, "data_ptr") else type(arg) if not isinstance(arg, int)
else "i32" if -(2**31) <= arg and arg <= 2**31 -
1 else "u64" if 2**63 <= arg and arg <= 2**64 - 1 else "i64"
for arg in dns_args
else "i32" if -(2**31) <= arg and arg <= 2**31 - 1 # noqa: SIM300
else "u64" if 2**63 <= arg and arg <= 2**64 - # noqa: SIM300
1 else "i64" for arg in dns_args
]
# const args passed by position
return tuple(spec_key + dns_key + const_args)
Expand Down

0 comments on commit 6be352d

Please sign in to comment.