diff --git a/benchmark/config/countdown-template.yaml b/benchmark/config/countdown-template.yaml index 5f1c9801c5..96726c51e6 100644 --- a/benchmark/config/countdown-template.yaml +++ b/benchmark/config/countdown-template.yaml @@ -54,7 +54,7 @@ explorer: rollout_model: engine_num: 2 tensor_parallel_size: 1 - enforce_eager: true + enforce_eager: false enable_prefix_caching: false enable_chunked_prefill: false gpu_memory_utilization: 0.9 diff --git a/docs/sphinx_doc/source/tutorial/example_step_wise.md b/docs/sphinx_doc/source/tutorial/example_step_wise.md index c9703c905d..8463b3ad77 100644 --- a/docs/sphinx_doc/source/tutorial/example_step_wise.md +++ b/docs/sphinx_doc/source/tutorial/example_step_wise.md @@ -140,7 +140,7 @@ explorer: engine_num: 2 tensor_parallel_size: 2 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false dtype: bfloat16 seed: 42 gpu_memory_utilization: 0.7 diff --git a/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md b/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md index 2909add310..a796d5d296 100644 --- a/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md +++ b/docs/sphinx_doc/source_zh/tutorial/example_step_wise.md @@ -135,7 +135,7 @@ explorer: engine_num: 2 tensor_parallel_size: 2 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false dtype: bfloat16 seed: 42 gpu_memory_utilization: 0.7 diff --git a/examples/agentscope_react/gsm8k.yaml b/examples/agentscope_react/gsm8k.yaml index ecc1477cf5..c1b79f7016 100644 --- a/examples/agentscope_react/gsm8k.yaml +++ b/examples/agentscope_react/gsm8k.yaml @@ -43,7 +43,7 @@ explorer: engine_num: 4 tensor_parallel_size: 1 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false enable_openai_api: true enable_history: true enable_auto_tool_choice: true diff --git a/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml index 9463de670b..e03664557e 100644 --- a/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml +++ b/examples/agentscope_tool_react/agentscopev0_tool_react_dapo.yaml @@ -44,7 +44,7 @@ explorer: engine_num: 4 tensor_parallel_size: 1 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false enable_openai_api: true enable_history: true dtype: bfloat16 diff --git a/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml b/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml index e5c3ed1d71..4715ee9f58 100644 --- a/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml +++ b/examples/agentscope_tool_react/agentscopev0_tool_react_gsm8k.yaml @@ -44,7 +44,7 @@ explorer: engine_num: 4 tensor_parallel_size: 1 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false enable_openai_api: true enable_history: true dtype: bfloat16 diff --git a/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml b/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml index 0fe9343077..4dd3ee76c8 100644 --- a/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml +++ b/examples/agentscope_tool_react/agentscopev1_tool_react_dapo.yaml @@ -44,7 +44,7 @@ explorer: engine_num: 4 tensor_parallel_size: 1 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false enable_openai_api: true enable_history: true dtype: bfloat16 diff --git a/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml b/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml index 5002a26b4a..a6ad09cef8 100644 --- a/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml +++ b/examples/agentscope_websearch/agentscopev1_websearch_agent.yaml @@ -68,7 +68,7 @@ explorer: engine_num: 4 tensor_parallel_size: 1 enable_prefix_caching: false - enforce_eager: true + enforce_eager: false dtype: bfloat16 seed: 42 gpu_memory_utilization: 0.7 diff --git a/trinity/common/config.py b/trinity/common/config.py index dd328acd55..c722959b96 100644 --- a/trinity/common/config.py +++ b/trinity/common/config.py @@ -447,7 +447,7 @@ class InferenceModelConfig: engine_num: int = 1 tensor_parallel_size: int = 1 use_v1: bool = True - enforce_eager: bool = True + enforce_eager: bool = False enable_prefix_caching: bool = False enable_chunked_prefill: bool = False gpu_memory_utilization: float = 0.9 diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py index 11df1263bd..0133586f7c 100644 --- a/trinity/common/models/vllm_model.py +++ b/trinity/common/models/vllm_model.py @@ -53,6 +53,13 @@ def __init__( os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" if get_vllm_version() >= parse_version("0.11.0"): os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0" + if not config.enforce_eager: + # To avoid torch compile conflicts when multiple model are started simultaneously. + # remove this when the following PR is released: + # https://github.com/vllm-project/vllm/pull/27616 + os.environ["VLLM_CACHE_ROOT"] = os.path.expanduser( + f"~/.cache/vllm/{config.bundle_indices}" + ) self.default_sampling_params = vllm.SamplingParams( n=1, temperature=0.0,