55import json
66import re
77import logging
8- from typing import Any , AsyncIterator , Iterator
8+ from typing import Annotated , Any , AsyncIterator , Iterator
99
1010from llama_stack_client import APIConnectionError
1111from llama_stack_client .lib .agents .agent import AsyncAgent # type: ignore
2020from fastapi .responses import StreamingResponse
2121
2222from auth import get_auth_dependency
23+ from auth .interface import AuthTuple
2324from client import AsyncLlamaStackClientHolder
2425from configuration import configuration
2526import metrics
2627from models .requests import QueryRequest
2728from utils .endpoints import check_configuration_loaded , get_system_prompt
28- from utils .common import retrieve_user_id
2929from utils .mcp_headers import mcp_headers_dependency , handle_mcp_headers_with_toolgroups
3030from utils .suid import get_suid
3131from utils .types import GraniteToolParser
@@ -431,7 +431,7 @@ def _handle_heartbeat_event(chunk_id: int) -> Iterator[str]:
431431async def streaming_query_endpoint_handler (
432432 _request : Request ,
433433 query_request : QueryRequest ,
434- auth : Any = Depends (auth_dependency ),
434+ auth : Annotated [ AuthTuple , Depends (auth_dependency )] ,
435435 mcp_headers : dict [str , dict [str , str ]] = Depends (mcp_headers_dependency ),
436436) -> StreamingResponse :
437437 """Handle request to the /streaming_query endpoint."""
@@ -440,7 +440,7 @@ async def streaming_query_endpoint_handler(
440440 llama_stack_config = configuration .llama_stack_configuration
441441 logger .info ("LLama stack config: %s" , llama_stack_config )
442442
443- _user_id , _user_name , token = auth
443+ user_id , _user_name , token = auth
444444
445445 try :
446446 # try to get Llama Stack client
@@ -483,7 +483,7 @@ async def response_generator(turn_response: Any) -> AsyncIterator[str]:
483483 logger .debug ("Transcript collection is disabled in the configuration" )
484484 else :
485485 store_transcript (
486- user_id = retrieve_user_id ( auth ) ,
486+ user_id = user_id ,
487487 conversation_id = conversation_id ,
488488 query_is_valid = True , # TODO(lucasagomes): implement as part of query validation
489489 query = query_request .query ,
0 commit comments