Skip to content

Commit

Permalink
debug benchmark_serving
Browse files Browse the repository at this point in the history
  • Loading branch information
mchen644 committed Oct 23, 2024
1 parent 88419ff commit 36b40b1
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 33 deletions.
10 changes: 5 additions & 5 deletions benchmarks/1_serving_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ result_dir="${pwd}/result"
# scheduler_policy=(infer)
# swap_policies=(partial)
declare -a scheduler_swap_policies
# scheduler_swap_policies[0]="tfittradeoff partial"
scheduler_swap_policies[0]="tfittradeoff partial"
# scheduler_swap_policies[1]="fcfs full"
# scheduler_swap_policies[2]="las full"
# scheduler_swap_policies[1]="tfittradeoff full"
# scheduler_swap_policies[2]="sjf full"
scheduler_swap_policies[2]="srjf full"
# scheduler_swap_policies[2]="srjf full"
# scheduler_swap_policies[3]="sjmlfq full"
# scheduler_swap_policies[3]="infer partial"
# scheduler_swap_policies[4]="inferpreempt full"
Expand All @@ -45,10 +45,10 @@ iter_theshold=15
max_serving_time=1200
# request_rates[0]=0.5
# request_rates[1]=1.0
request_rates[1]=5.0
# request_rates[1]=5.0
# request_rates[2]=10.0
# request_rates[3]=10.0
# request_rates[4]=20.0
request_rates[4]=20.0
# request_rates[5]=50.0
# request_rates[5]=30.0
# request_rates[5]=50.0
Expand All @@ -57,7 +57,7 @@ request_rates[1]=5.0
# request_rates=(2.0)
swap_out_partial_rates=(0.5)
waiting_iter_base=(0.1)
gpu_devices=0
gpu_devices=1
for i in {0..0}; do
for waiting_iter in "${waiting_iter_base[@]}"; do
for swap_out_partial_rate in "${swap_out_partial_rates[@]}"; do
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/backend_request_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ async def async_request_openai_completions(

async with aiohttp.ClientSession(timeout=AIOHTTP_TIMEOUT) as session:
assert not request_func_input.use_beam_search
if policy in ["srjf", "sjf", "fcfs"]:
if policy in ["srjf", "sjf"]:
if request_func_input.min_tokens == None:
raise ValueError(f"For policy: {policy}, should specify min_tokens")
payload = {
Expand Down
10 changes: 7 additions & 3 deletions benchmarks/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@ async def handle_requests():
pbar=pbar,)
)
)

else:
tasks.append(
asyncio.create_task(
Expand Down Expand Up @@ -564,7 +565,7 @@ async def benchmark(
# data_worker.append(data1)

for i in range(num_workers):
if scheduler_policy in ["srjf", "sjf", "fcfs"]:
if scheduler_policy in ["srjf", "sjf"]:
worker = multiprocessing.Process(target=process_requests, args=(backend, args, pbar, request_func))
else:
worker = multiprocessing.Process(target=process_requests, args=(backend, args, pbar, request_func))
Expand All @@ -573,7 +574,10 @@ async def benchmark(

async for request in get_request_duration(input_requests, request_rate, request_duration, scheduler_policy):
prompt, prompt_len, output_len = request
min_tokens = copy.deepcopy(data[prompt])
if scheduler_policy in ["srjf", "sjf"]:
min_tokens = copy.deepcopy(data[prompt])
else:
min_tokens = None
request_func_input = RequestFuncInput(
model=model_id,
prompt=prompt,
Expand Down Expand Up @@ -781,7 +785,7 @@ def main(args: argparse.Namespace):
else:
raise ValueError(f"Unknown dataset: {args.dataset_name}")

if args.scheduler_policy in ["srjf", "sjf", "fcfs"]:
if args.scheduler_policy in ["srjf", "sjf"]:
data = None
file_path = get_json_file()
if file_path:
Expand Down
48 changes: 24 additions & 24 deletions benchmarks/result/analysis/result_analysis_1.ipynb

Large diffs are not rendered by default.

0 comments on commit 36b40b1

Please sign in to comment.