@@ -557,8 +557,17 @@ def _check_ray_cgraph_installation(self):
557557 def _compiled_ray_dag (self , enable_asyncio : bool ):
558558 assert self .parallel_config .use_ray
559559 self ._check_ray_cgraph_installation ()
560+ # Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
561+ # (it is 10 seconds by default). This is a Ray environment variable to
562+ # control the timeout of getting result from a compiled graph execution,
563+ # i.e., the distributed execution that includes model forward runs and
564+ # intermediate tensor communications, in the case of vllm.
565+ # Note: we should set this env var before importing
566+ # ray.dag, otherwise it will not take effect.
567+ os .environ .setdefault ("RAY_CGRAPH_get_timeout" , "300" ) # noqa: SIM112
560568 from ray .dag import InputNode , MultiOutputNode
561-
569+ logger .info ("RAY_CGRAPH_get_timeout is set to %s" ,
570+ os .environ ["RAY_CGRAPH_get_timeout" ]) # noqa: SIM112
562571 logger .info ("VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE = %s" ,
563572 envs .VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE )
564573 logger .info ("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s" ,
@@ -570,15 +579,6 @@ def _compiled_ray_dag(self, enable_asyncio: bool):
570579 "Invalid value for VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: "
571580 f"{ channel_type } . Valid values are: 'auto', 'nccl', or 'shm'." )
572581
573- # Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
574- # (it is 10 seconds by default). This is a Ray environment variable to
575- # control the timeout of getting result from a compiled graph execution,
576- # i.e., the distributed execution that includes model forward runs and
577- # intermediate tensor communications, in the case of vllm.
578- os .environ .setdefault ("RAY_CGRAPH_get_timeout" , "300" ) # noqa: SIM112
579- logger .info ("RAY_CGRAPH_get_timeout is set to %s" ,
580- os .environ ["RAY_CGRAPH_get_timeout" ]) # noqa: SIM112
581-
582582 with InputNode () as input_data :
583583 # Example DAG: PP=2, TP=4
584584 #
0 commit comments