feat: Add progressive SSE streaming feature

xuanyang15 · copybara-github · commit a5ac1d5e14f5 · 2025-11-17T13:55:38.000-08:00
Co-authored-by: Xuan Yang &lt;xygoogle@google.com&gt;
PiperOrigin-RevId: 833483804
diff --git a/src/google/adk/features/_feature_registry.py b/src/google/adk/features/_feature_registry.py
@@ -24,8 +24,9 @@
 class FeatureName(str, Enum):
   """Feature names."""
 
-  JSON_SCHEMA_FOR_FUNC_DECL = "JSON_SCHEMA_FOR_FUNC_DECL"
   COMPUTER_USE = "COMPUTER_USE"
+  JSON_SCHEMA_FOR_FUNC_DECL = "JSON_SCHEMA_FOR_FUNC_DECL"
+  PROGRESSIVE_SSE_STREAMING = "PROGRESSIVE_SSE_STREAMING"
 
 
 class FeatureStage(Enum):
@@ -58,11 +59,14 @@ class FeatureConfig:
 
 # Central registry: FeatureName -> FeatureConfig
 _FEATURE_REGISTRY: dict[FeatureName, FeatureConfig] = {
+    FeatureName.COMPUTER_USE: FeatureConfig(
+        FeatureStage.EXPERIMENTAL, default_on=True
+    ),
     FeatureName.JSON_SCHEMA_FOR_FUNC_DECL: FeatureConfig(
         FeatureStage.WIP, default_on=False
     ),
-    FeatureName.COMPUTER_USE: FeatureConfig(
-        FeatureStage.EXPERIMENTAL, default_on=True
+    FeatureName.PROGRESSIVE_SSE_STREAMING: FeatureConfig(
+        FeatureStage.WIP, default_on=False
     ),
 }
 
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -38,6 +38,8 @@
 from ...agents.run_config import StreamingMode
 from ...agents.transcription_entry import TranscriptionEntry
 from ...events.event import Event
+from ...features import FeatureName
+from ...features import is_feature_enabled
 from ...models.base_llm_connection import BaseLlmConnection
 from ...models.llm_request import LlmRequest
 from ...models.llm_response import LlmResponse
@@ -525,6 +527,16 @@ async def _postprocess_async(
 
     # Handles function calls.
     if model_response_event.get_function_calls():
+
+      if is_feature_enabled(FeatureName.PROGRESSIVE_SSE_STREAMING):
+        # In progressive SSE streaming mode stage 1, we skip partial FC events
+        # Only execute FCs in the final aggregated event (partial=False)
+        if (
+            invocation_context.run_config.streaming_mode == StreamingMode.SSE
+            and model_response_event.partial
+        ):
+          return
+
       async with Aclosing(
           self._postprocess_handle_function_calls_async(
               invocation_context, model_response_event, llm_request
diff --git a/src/google/adk/utils/streaming_utils.py b/src/google/adk/utils/streaming_utils.py
@@ -19,6 +19,8 @@
 
 from google.genai import types
 
+from ..features import FeatureName
+from ..features import is_feature_enabled
 from ..models.llm_response import LlmResponse
 
 
@@ -35,6 +37,30 @@ def __init__(self):
     self._usage_metadata = None
     self._response = None
 
+    # For progressive SSE streaming mode: accumulate parts in order
+    self._parts_sequence: list[types.Part] = []
+    self._current_text_buffer: str = ''
+    self._current_text_is_thought: Optional[bool] = None
+    self._finish_reason: Optional[types.FinishReason] = None
+
+  def _flush_text_buffer_to_sequence(self):
+    """Flush current text buffer to parts sequence.
+
+    This helper is used in progressive SSE mode to maintain part ordering.
+    It only merges consecutive text parts of the same type (thought or regular).
+    """
+    if self._current_text_buffer:
+      if self._current_text_is_thought:
+        self._parts_sequence.append(
+            types.Part(text=self._current_text_buffer, thought=True)
+        )
+      else:
+        self._parts_sequence.append(
+            types.Part.from_text(text=self._current_text_buffer)
+        )
+      self._current_text_buffer = ''
+      self._current_text_is_thought = None
+
   async def process_response(
       self, response: types.GenerateContentResponse
   ) -> AsyncGenerator[LlmResponse, None]:
@@ -51,6 +77,42 @@ async def process_response(
     self._response = response
     llm_response = LlmResponse.create(response)
     self._usage_metadata = llm_response.usage_metadata
+
+    # ========== Progressive SSE Streaming (new feature) ==========
+    # Save finish_reason for final aggregation
+    if llm_response.finish_reason:
+      self._finish_reason = llm_response.finish_reason
+
+    if is_feature_enabled(FeatureName.PROGRESSIVE_SSE_STREAMING):
+      # Accumulate parts while preserving their order
+      # Only merge consecutive text parts of the same type (thought or regular)
+      if llm_response.content and llm_response.content.parts:
+        for part in llm_response.content.parts:
+          if part.text:
+            # Check if we need to flush the current buffer first
+            # (when text type changes from thought to regular or vice versa)
+            if (
+                self._current_text_buffer
+                and part.thought != self._current_text_is_thought
+            ):
+              self._flush_text_buffer_to_sequence()
+
+            # Accumulate text to buffer
+            if not self._current_text_buffer:
+              self._current_text_is_thought = part.thought
+            self._current_text_buffer += part.text
+          else:
+            # Non-text part (function_call, bytes, etc.)
+            # Flush any buffered text first, then add the non-text part
+            self._flush_text_buffer_to_sequence()
+            self._parts_sequence.append(part)
+
+      # Mark ALL intermediate chunks as partial
+      llm_response.partial = True
+      yield llm_response
+      return
+
+    # ========== Non-Progressive SSE Streaming (old behavior) ==========
     if (
         llm_response.content
         and llm_response.content.parts
@@ -89,6 +151,36 @@ def close(self) -> Optional[LlmResponse]:
     Returns:
       The aggregated LlmResponse.
     """
+    # ========== Progressive SSE Streaming (new feature) ==========
+    if is_feature_enabled(FeatureName.PROGRESSIVE_SSE_STREAMING):
+      # Always generate final aggregated response in progressive mode
+      if self._response and self._response.candidates:
+        # Flush any remaining text buffer to complete the sequence
+        self._flush_text_buffer_to_sequence()
+
+        # Use the parts sequence which preserves original ordering
+        final_parts = self._parts_sequence
+
+        if final_parts:
+          candidate = self._response.candidates[0]
+          finish_reason = self._finish_reason or candidate.finish_reason
+
+          return LlmResponse(
+              content=types.ModelContent(parts=final_parts),
+              error_code=None
+              if finish_reason == types.FinishReason.STOP
+              else finish_reason,
+              error_message=None
+              if finish_reason == types.FinishReason.STOP
+              else candidate.finish_message,
+              usage_metadata=self._usage_metadata,
+              finish_reason=finish_reason,
+              partial=False,
+          )
+
+        return None
+
+    # ========== Non-Progressive SSE Streaming (old behavior) ==========
     if (
         (self._text or self._thought_text)
         and self._response
diff --git a/tests/unittests/flows/llm_flows/test_progressive_sse_streaming.py b/tests/unittests/flows/llm_flows/test_progressive_sse_streaming.py