diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py index 44ea17f666e..55a20336bc7 100644 --- a/python/sglang/lang/interpreter.py +++ b/python/sglang/lang/interpreter.py @@ -54,7 +54,14 @@ def run_internal(state, program, func_args, func_kwargs, sync): def run_program( - program, backend, func_args, func_kwargs, default_sampling_para, stream, sync=False + program, + backend, + func_args, + func_kwargs, + default_sampling_para, + stream, + sync=False, + use_thread=True, ): if hasattr(backend, "endpoint"): backend = backend.endpoint @@ -67,6 +74,7 @@ def run_program( chat_template=None, stream=stream, num_api_spec_tokens=program.num_api_spec_tokens, + use_thread=use_thread, ) state = ProgramState(stream_executor) diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py index 8164478ed39..d3c010108e3 100644 --- a/python/sglang/lang/ir.py +++ b/python/sglang/lang/ir.py @@ -168,6 +168,7 @@ def run( return_text_in_logprobs: Optional[bool] = None, stream: bool = False, backend=None, + use_thread: bool = True, **kwargs, ): from sglang.lang.interpreter import run_program @@ -195,7 +196,15 @@ def run( return_text_in_logprobs=return_text_in_logprobs, ) backend = backend or global_config.default_backend - return run_program(self, backend, args, kwargs, default_sampling_para, stream) + return run_program( + self, + backend, + args, + kwargs, + default_sampling_para, + stream, + use_thread=use_thread, + ) def run_batch( self,