11
11
from llama_stack_client .types .agent_create_params import AgentConfig
12
12
from llama_stack_client .types .agents .turn import CompletionMessage , Turn
13
13
from llama_stack_client .types .agents .turn_create_params import Document , Toolgroup
14
- from llama_stack_client .types .agents .turn_create_response import (
15
- AgentTurnResponseStreamChunk ,
16
- )
14
+ from llama_stack_client .types .agents .turn_create_response import AgentTurnResponseStreamChunk
17
15
from llama_stack_client .types .shared .tool_call import ToolCall
18
16
19
17
from .client_tool import ClientTool
@@ -143,7 +141,6 @@ def _create_turn_streaming(
143
141
documents : Optional [List [Document ]] = None ,
144
142
) -> Iterator [AgentTurnResponseStreamChunk ]:
145
143
n_iter = 0
146
- max_iter = self .agent_config .get ("max_infer_iters" , DEFAULT_MAX_ITER )
147
144
148
145
# 1. create an agent turn
149
146
turn_response = self .client .agents .turn .create (
@@ -170,12 +167,18 @@ def _create_turn_streaming(
170
167
yield chunk
171
168
else :
172
169
is_turn_complete = False
170
+ # End of turn is reached, do not resume even if there's a tool call
171
+ if chunk .event .payload .turn .output_message .stop_reason in {"end_of_turn" }:
172
+ yield chunk
173
+ break
174
+
173
175
turn_id = self ._get_turn_id (chunk )
174
176
if n_iter == 0 :
175
177
yield chunk
176
178
177
179
# run the tools
178
180
tool_response_message = self ._run_tool (tool_calls )
181
+
179
182
# pass it to next iteration
180
183
turn_response = self .client .agents .turn .resume (
181
184
agent_id = self .agent_id ,
@@ -185,7 +188,3 @@ def _create_turn_streaming(
185
188
stream = True ,
186
189
)
187
190
n_iter += 1
188
- break
189
-
190
- if n_iter >= max_iter :
191
- raise Exception (f"Turn did not complete in { max_iter } iterations" )
0 commit comments