Skip to content

Commit 7cd9ea1

Browse files
committed
using list for typing
1 parent 87bf00a commit 7cd9ea1

File tree

2 files changed

+18
-18
lines changed

2 files changed

+18
-18
lines changed

benchmarks/profiling/benchmark_latency.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import time
88
from contextlib import contextmanager, nullcontext
99
from pathlib import Path
10-
from typing import List, Optional
10+
from typing import Optional
1111

1212
import numpy as np
1313
import torch
@@ -88,7 +88,7 @@ def get_profiling_context(profile_result_dir: Optional[str] = None):
8888
dummy_prompt_token_ids = np.random.randint(10000,
8989
size=(args.batch_size,
9090
args.input_len))
91-
dummy_prompts: List[PromptType] = [{
91+
dummy_prompts: list[PromptType] = [{
9292
"prompt_token_ids": batch
9393
} for batch in dummy_prompt_token_ids.tolist()]
9494

benchmarks/profiling/benchmark_throughput.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from contextlib import contextmanager, nullcontext
1010
from functools import cache
1111
from pathlib import Path
12-
from typing import Dict, List, Optional, Tuple
12+
from typing import Optional
1313

1414
import torch
1515
import uvloop
@@ -75,12 +75,12 @@ def lora_path_on_disk(lora_path: str) -> str:
7575
return get_adapter_absolute_path(lora_path)
7676

7777

78-
lora_tokenizer_cache: Dict[int, AnyTokenizer] = {}
78+
lora_tokenizer_cache: dict[int, AnyTokenizer] = {}
7979

8080

8181
def get_random_lora_request(
8282
args: argparse.Namespace
83-
) -> Tuple[LoRARequest, Optional[AnyTokenizer]]:
83+
) -> tuple[LoRARequest, Optional[AnyTokenizer]]:
8484
global lora_tokenizer_cache
8585
lora_id = random.randint(1, args.max_loras)
8686
lora_request = LoRARequest(lora_name=str(lora_id),
@@ -92,7 +92,7 @@ def get_random_lora_request(
9292

9393

9494
def sample_requests(tokenizer: PreTrainedTokenizerBase,
95-
args: argparse.Namespace) -> List[SampleRequest]:
95+
args: argparse.Namespace) -> list[SampleRequest]:
9696

9797
dataset_path: str = args.dataset
9898
num_requests: int = args.num_prompts
@@ -110,7 +110,7 @@ def sample_requests(tokenizer: PreTrainedTokenizerBase,
110110
random.shuffle(dataset)
111111

112112
# Filter out sequences that are too long or too short
113-
filtered_dataset: List[SampleRequest] = []
113+
filtered_dataset: list[SampleRequest] = []
114114
for data in tqdm(dataset,
115115
total=len(filtered_dataset),
116116
desc="sampling requests"):
@@ -166,7 +166,7 @@ def sample_requests(tokenizer: PreTrainedTokenizerBase,
166166

167167

168168
def run_vllm(
169-
requests: List[SampleRequest],
169+
requests: list[SampleRequest],
170170
n: int,
171171
engine_args: EngineArgs,
172172
) -> float:
@@ -222,8 +222,8 @@ def get_profiling_context(profile_dir: Optional[str] = None):
222222
llm = LLM(**dataclasses.asdict(engine_args))
223223

224224
# Add the requests to the engine.
225-
prompts: List[TextPrompt] = []
226-
sampling_params: List[SamplingParams] = []
225+
prompts: list[TextPrompt] = []
226+
sampling_params: list[SamplingParams] = []
227227
for request in requests:
228228
prompts.append(
229229
TextPrompt(prompt=request.prompt,
@@ -236,7 +236,7 @@ def get_profiling_context(profile_dir: Optional[str] = None):
236236
ignore_eos=True,
237237
max_tokens=request.expected_output_len,
238238
))
239-
lora_requests: Optional[List[LoRARequest]] = None
239+
lora_requests: Optional[list[LoRARequest]] = None
240240
if engine_args.enable_lora:
241241
lora_requests = [request.lora_request for request in requests]
242242

@@ -274,7 +274,7 @@ def get_profiling_context(profile_dir: Optional[str] = None):
274274

275275

276276
async def run_vllm_async(
277-
requests: List[SampleRequest],
277+
requests: list[SampleRequest],
278278
n: int,
279279
engine_args: AsyncEngineArgs,
280280
disable_frontend_multiprocessing: bool = False,
@@ -285,9 +285,9 @@ async def run_vllm_async(
285285
engine_args, disable_frontend_multiprocessing) as llm:
286286

287287
# Add the requests to the engine.
288-
prompts: List[TextPrompt] = []
289-
sampling_params: List[SamplingParams] = []
290-
lora_requests: List[Optional[LoRARequest]] = []
288+
prompts: list[TextPrompt] = []
289+
sampling_params: list[SamplingParams] = []
290+
lora_requests: list[Optional[LoRARequest]] = []
291291
for request in requests:
292292
prompts.append(
293293
TextPrompt(prompt=request.prompt,
@@ -319,7 +319,7 @@ async def run_vllm_async(
319319

320320

321321
def run_hf(
322-
requests: List[SampleRequest],
322+
requests: list[SampleRequest],
323323
model: str,
324324
tokenizer: PreTrainedTokenizerBase,
325325
n: int,
@@ -335,7 +335,7 @@ def run_hf(
335335

336336
pbar = tqdm(total=len(requests))
337337
start = time.perf_counter()
338-
batch: List[str] = []
338+
batch: list[str] = []
339339
max_prompt_len = 0
340340
max_output_len = 0
341341
for i in range(len(requests)):
@@ -377,7 +377,7 @@ def run_hf(
377377

378378

379379
def run_mii(
380-
requests: List[SampleRequest],
380+
requests: list[SampleRequest],
381381
model: str,
382382
tensor_parallel_size: int,
383383
output_len: int,

0 commit comments

Comments
 (0)