55import json
66import re
77import logging
8- from typing import Any , AsyncIterator , Iterator
8+ from typing import Annotated , Any , AsyncIterator , Iterator
99
1010from llama_stack_client import APIConnectionError
1111from llama_stack_client .lib .agents .agent import AsyncAgent # type: ignore
2020from fastapi .responses import StreamingResponse
2121
2222from auth import get_auth_dependency
23+ from auth .interface import AuthTuple
2324from client import AsyncLlamaStackClientHolder
2425from configuration import configuration
2526import metrics
2627from models .requests import QueryRequest
2728from utils .endpoints import check_configuration_loaded , get_system_prompt
28- from utils .common import retrieve_user_id
2929from utils .mcp_headers import mcp_headers_dependency , handle_mcp_headers_with_toolgroups
3030from utils .suid import get_suid
3131from utils .types import GraniteToolParser
@@ -415,7 +415,7 @@ def _handle_heartbeat_event(chunk_id: int) -> Iterator[str]:
415415async def streaming_query_endpoint_handler (
416416 _request : Request ,
417417 query_request : QueryRequest ,
418- auth : Any = Depends (auth_dependency ),
418+ auth : Annotated [ AuthTuple , Depends (auth_dependency )] ,
419419 mcp_headers : dict [str , dict [str , str ]] = Depends (mcp_headers_dependency ),
420420) -> StreamingResponse :
421421 """Handle request to the /streaming_query endpoint."""
@@ -424,7 +424,7 @@ async def streaming_query_endpoint_handler(
424424 llama_stack_config = configuration .llama_stack_configuration
425425 logger .info ("LLama stack config: %s" , llama_stack_config )
426426
427- _user_id , _user_name , token = auth
427+ user_id , _user_name , token = auth
428428
429429 try :
430430 # try to get Llama Stack client
@@ -463,7 +463,7 @@ async def response_generator(turn_response: Any) -> AsyncIterator[str]:
463463 logger .debug ("Transcript collection is disabled in the configuration" )
464464 else :
465465 store_transcript (
466- user_id = retrieve_user_id ( auth ) ,
466+ user_id = user_id ,
467467 conversation_id = conversation_id ,
468468 query_is_valid = True , # TODO(lucasagomes): implement as part of query validation
469469 query = query_request .query ,
0 commit comments