Added rag_chunks to streaming_query

bsatapat-jpg · bsatapat-jpg · commit a417236ef16b · 2025-09-25T16:10:50.000+05:30
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -29,7 +29,7 @@
 from metrics.utils import update_llm_token_count_from_turn
 from models.config import Action
 from models.requests import QueryRequest
-from models.responses import UnauthorizedResponse, ForbiddenResponse
+from models.responses import UnauthorizedResponse, ForbiddenResponse, RAGChunk, ReferencedDocument
 from models.database.conversations import UserConversation
 from utils.endpoints import check_configuration_loaded, get_agent, get_system_prompt
 from utils.mcp_headers import mcp_headers_dependency, handle_mcp_headers_with_toolgroups
@@ -135,7 +135,7 @@ def stream_start_event(conversation_id: str) -> str:
     )
 
 
-def stream_end_event(metadata_map: dict) -> str:
+def stream_end_event(metadata_map: dict, summary: TurnSummary) -> str:
     """
     Yield the end of the data stream.
 
@@ -151,20 +151,44 @@ def stream_end_event(metadata_map: dict) -> str:
         str: A Server-Sent Events (SSE) formatted string
         representing the end of the data stream.
     """
+    # Process RAG chunks
+    rag_chunks = [
+        {
+            "content": chunk.content,
+            "source": chunk.source,
+            "score": chunk.score
+        }
+        for chunk in summary.rag_chunks
+    ]
+
+    # Extract referenced documents from RAG chunks
+    referenced_docs = []
+    doc_sources = set()
+    for chunk in summary.rag_chunks:
+        if chunk.source and chunk.source not in doc_sources:
+            doc_sources.add(chunk.source)
+            referenced_docs.append({
+                "doc_url": chunk.source if chunk.source.startswith("http") else None,
+                "doc_title": chunk.source.split("/")[-1] if chunk.source else None,
+            })
+
+    # Add any additional referenced documents from metadata_map
+    for v in filter(
+        lambda v: ("docs_url" in v) and ("title" in v),
+        metadata_map.values(),
+    ):
+        if v["docs_url"] not in doc_sources:
+            referenced_docs.append({
+                "doc_url": v["docs_url"],
+                "doc_title": v["title"],
+            })
+
     return format_stream_data(
         {
             "event": "end",
             "data": {
-                "referenced_documents": [
-                    {
-                        "doc_url": v["docs_url"],
-                        "doc_title": v["title"],
-                    }
-                    for v in filter(
-                        lambda v: ("docs_url" in v) and ("title" in v),
-                        metadata_map.values(),
-                    )
-                ],
+                "rag_chunks": rag_chunks,
+                "referenced_documents": referenced_docs,
                 "truncated": None,  # TODO(jboos): implement truncated
                 "input_tokens": 0,  # TODO(jboos): implement input tokens
                 "output_tokens": 0,  # TODO(jboos): implement output tokens
@@ -680,11 +704,20 @@ async def response_generator(
                     chunk_id += 1
                     yield event
 
-            yield stream_end_event(metadata_map)
+            yield stream_end_event(metadata_map, summary)
 
             if not is_transcripts_enabled():
                 logger.debug("Transcript collection is disabled in the configuration")
             else:
+                # Convert RAG chunks to serializable format for store_transcript
+                rag_chunks_for_transcript = [
+                    {
+                        "content": chunk.content,
+                        "source": chunk.source,
+                        "score": chunk.score
+                    }
+                    for chunk in summary.rag_chunks
+                ]
                 store_transcript(
                     user_id=user_id,
                     conversation_id=conversation_id,
@@ -694,7 +727,7 @@ async def response_generator(
                     query=query_request.query,
                     query_request=query_request,
                     summary=summary,
-                    rag_chunks=[],  # TODO(lucasagomes): implement rag_chunks
+                    rag_chunks=rag_chunks_for_transcript,
                     truncated=False,  # TODO(lucasagomes): implement truncation as part
                     # of quota work
                     attachments=query_request.attachments or [],