Skip to content

Commit d45417b

Browse files
authored
fix ci issue distributed 4 gpu test (#20204)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
1 parent a29e62e commit d45417b

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

examples/offline_inference/data_parallel.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ def parse_args():
6464
parser.add_argument(
6565
"--trust-remote-code", action="store_true", help="Trust remote code."
6666
)
67+
parser.add_argument(
68+
"--max-num-seqs",
69+
type=int,
70+
default=64,
71+
help=("Maximum number of sequences to be processed in a single iteration."),
72+
)
73+
parser.add_argument(
74+
"--gpu-memory-utilization",
75+
type=float,
76+
default=0.8,
77+
help=("Fraction of GPU memory vLLM is allowed to allocate (0.0, 1.0]."),
78+
)
6779
return parser.parse_args()
6880

6981

@@ -77,6 +89,8 @@ def main(
7789
GPUs_per_dp_rank,
7890
enforce_eager,
7991
trust_remote_code,
92+
max_num_seqs,
93+
gpu_memory_utilization,
8094
):
8195
os.environ["VLLM_DP_RANK"] = str(global_dp_rank)
8296
os.environ["VLLM_DP_RANK_LOCAL"] = str(local_dp_rank)
@@ -127,6 +141,8 @@ def start(rank):
127141
enforce_eager=enforce_eager,
128142
enable_expert_parallel=True,
129143
trust_remote_code=trust_remote_code,
144+
max_num_seqs=max_num_seqs,
145+
gpu_memory_utilization=gpu_memory_utilization,
130146
)
131147
outputs = llm.generate(prompts, sampling_params)
132148
# Print the outputs.
@@ -181,6 +197,8 @@ def start(rank):
181197
tp_size,
182198
args.enforce_eager,
183199
args.trust_remote_code,
200+
args.max_num_seqs,
201+
args.gpu_memory_utilization,
184202
),
185203
)
186204
proc.start()

0 commit comments

Comments
 (0)