Skip to content

Commit 439a0ea

Browse files
DarkLight1337Akshat-Tripathi
authored andcommitted
[CI/Build] Fix pre-commit errors (vllm-project#13696)
1 parent e4f5b9c commit 439a0ea

File tree

6 files changed

+24
-17
lines changed

6 files changed

+24
-17
lines changed

benchmarks/benchmark_latency.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ def main(args: argparse.Namespace):
4343
# the engine will automatically process the request in multiple batches.
4444
llm = LLM(**dataclasses.asdict(engine_args))
4545
assert llm.llm_engine.model_config.max_model_len >= (
46-
args.input_len + args.output_len), (
47-
"Please ensure that max_model_len is greater than"
48-
" the sum of input_len and output_len.")
46+
args.input_len +
47+
args.output_len), ("Please ensure that max_model_len is greater than"
48+
" the sum of input_len and output_len.")
4949

5050
sampling_params = SamplingParams(
5151
n=args.n,

vllm/entrypoints/openai/serving_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ def _get_decoded_token(logprob: Logprob,
523523
return logprob.decoded_token
524524
return tokenizer.decode(token_id)
525525

526-
def _is_model_supported(self, model_name) -> bool:
526+
def _is_model_supported(self, model_name: Optional[str]) -> bool:
527527
if not model_name:
528528
return True
529529
return self.models.is_base_model(model_name)

vllm/entrypoints/openai/serving_score.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,12 @@ async def do_rerank(
358358
request.truncate_prompt_tokens,
359359
)
360360
return self.request_output_to_rerank_response(
361-
final_res_batch, request_id, self._get_model_name(request.model), documents, top_n)
361+
final_res_batch,
362+
request_id,
363+
self._get_model_name(request.model),
364+
documents,
365+
top_n,
366+
)
362367
except asyncio.CancelledError:
363368
return self.create_error_response("Client disconnected")
364369
except ValueError as e:

vllm/model_executor/layers/mamba/mamba_mixer2.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def extra_groups_for_head_shards(ngroups: int, tp_size: int):
134134
return 0
135135

136136
# for n_groups == 1, this is exactly tp_size - n_groups
137-
return tp_size - ngroups
137+
return tp_size - ngroups
138138

139139

140140
def mamba_v2_sharded_weight_loader(
@@ -168,12 +168,9 @@ def loader(param: torch.Tensor, loaded_weight: torch.Tensor) -> None:
168168
# - compute the rank into the loaded shard.
169169
# - if there is replication, different TP shards will
170170
# take from the same rank.
171-
if duplicate_groups:
172-
# NOTE: currently we only support duplication
173-
# in the case where num_groups == 1
174-
rank = 0
175-
else:
176-
rank = tp_rank
171+
# NOTE: currently we only support duplication
172+
# in the case where num_groups == 1
173+
rank = 0 if duplicate_groups else tp_rank
177174

178175
# - leftmost boundary index into loaded weight.
179176
loaded_skip = rank * shard_size
@@ -247,7 +244,7 @@ def __init__(self,
247244
assert num_heads % self.tp_size == 0, \
248245
"Tensor parallel world size must divide num heads."
249246

250-
247+
251248
assert (n_groups % self.tp_size) == 0 or n_groups == 1, \
252249
(
253250
"If tensor parallel world size does not divide num_heads, "

vllm/utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,10 +1198,12 @@ def check_port(self, value):
11981198
try:
11991199
value = int(value)
12001200
except ValueError:
1201-
raise argparse.ArgumentTypeError("Port must be an integer")
1201+
msg = "Port must be an integer"
1202+
raise argparse.ArgumentTypeError(msg) from None
12021203

12031204
if not (1024 <= value <= 65535):
1204-
raise argparse.ArgumentTypeError("Port must be between 1024 and 65535")
1205+
raise argparse.ArgumentTypeError(
1206+
"Port must be between 1024 and 65535")
12051207

12061208
return value
12071209

vllm/v1/worker/gpu_model_runner.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1319,13 +1319,16 @@ def profile_run(self) -> None:
13191319
generators={},
13201320
max_num_logprobs=None,
13211321
no_penalties=True,
1322-
prompt_token_ids=torch.ones_like(logits, dtype=torch.int64),
1322+
prompt_token_ids=torch.ones_like(logits,
1323+
dtype=torch.int64),
13231324
frequency_penalties=dummy_tensors(0.1),
13241325
presence_penalties=dummy_tensors(0.1),
13251326
repetition_penalties=dummy_tensors(0.1),
13261327
output_token_ids=[[] for _ in range(num_reqs)],
13271328
min_tokens={},
1328-
logit_bias=[None for _ in range(num_reqs)])
1329+
logit_bias=[None for _ in range(num_reqs)],
1330+
allowed_token_ids_mask=None,
1331+
)
13291332
sampler_output = self.model.sample(
13301333
logits=logits, sampling_metadata=dummy_metadata)
13311334
else:

0 commit comments

Comments
 (0)