Filter empty text parts when streaming (#8736)

dlarocque · web-flow · commit 554c7bdc12cf · 2025-02-12T12:43:34.000-05:00
* Filter empty text parts when streaming

* Add changeset

* Remove unused filterEmptyTextParts()

* Move logic into a function

* final

* dont throw if mock response file isn't found

* update responses version to 6

* Only ignore empty text parts in the aggregated response.

* review fixes

* Throw in `aggregateResponses` if `newPart` has no properties
diff --git a/.changeset/seven-oranges-care.md b/.changeset/seven-oranges-care.md
@@ -0,0 +1,5 @@
+---
+'@firebase/vertexai': patch
+---
+
+Filter out empty text parts from streaming responses.
diff --git a/packages/vertexai/src/requests/stream-reader.test.ts b/packages/vertexai/src/requests/stream-reader.test.ts
@@ -33,8 +33,10 @@ import {
   GenerateContentResponse,
   HarmCategory,
   HarmProbability,
-  SafetyRating
+  SafetyRating,
+  VertexAIErrorCode
 } from '../types';
+import { VertexAIError } from '../errors';
 
 use(sinonChai);
 
@@ -220,6 +222,23 @@ describe('processStream', () => {
     }
     expect(foundCitationMetadata).to.be.true;
   });
+  it('removes empty text parts', async () => {
+    const fakeResponse = getMockResponseStreaming(
+      'streaming-success-empty-text-part.txt'
+    );
+    const result = processStream(fakeResponse as Response);
+    const aggregatedResponse = await result.response;
+    expect(aggregatedResponse.text()).to.equal('1');
+    expect(aggregatedResponse.candidates?.length).to.equal(1);
+    expect(aggregatedResponse.candidates?.[0].content.parts.length).to.equal(1);
+
+    // The chunk with the empty text part will still go through the stream
+    let numChunks = 0;
+    for await (const _ of result.stream) {
+      numChunks++;
+    }
+    expect(numChunks).to.equal(2);
+  });
 });
 
 describe('aggregateResponses', () => {
@@ -403,4 +422,49 @@ describe('aggregateResponses', () => {
       ).to.equal(150);
     });
   });
+
+  it('throws if a part has no properties', () => {
+    const responsesToAggregate: GenerateContentResponse[] = [
+      {
+        candidates: [
+          {
+            index: 0,
+            content: {
+              role: 'user',
+              parts: [{} as any] // Empty
+            },
+            finishReason: FinishReason.STOP,
+            finishMessage: 'something',
+            safetyRatings: [
+              {
+                category: HarmCategory.HARM_CATEGORY_HARASSMENT,
+                probability: HarmProbability.NEGLIGIBLE
+              } as SafetyRating
+            ]
+          }
+        ],
+        promptFeedback: {
+          blockReason: BlockReason.SAFETY,
+          safetyRatings: [
+            {
+              category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+              probability: HarmProbability.LOW
+            } as SafetyRating
+          ]
+        }
+      }
+    ];
+
+    try {
+      aggregateResponses(responsesToAggregate);
+    } catch (e) {
+      expect((e as VertexAIError).code).includes(
+        VertexAIErrorCode.INVALID_CONTENT
+      );
+      expect((e as VertexAIError).message).to.include(
+        'Part should have at least one property, but there are none. This is likely caused ' +
+          'by a malformed response from the backend.'
+      );
+    }
+  });
 });
diff --git a/packages/vertexai/src/requests/stream-reader.ts b/packages/vertexai/src/requests/stream-reader.ts
@@ -62,6 +62,7 @@ async function getResponsePromise(
       );
       return enhancedResponse;
     }
+
     allResponses.push(value);
   }
 }
@@ -184,14 +185,24 @@ export function aggregateResponses(
           }
           const newPart: Partial<Part> = {};
           for (const part of candidate.content.parts) {
-            if (part.text) {
+            if (part.text !== undefined) {
+              // The backend can send empty text parts. If these are sent back
+              // (e.g. in chat history), the backend will respond with an error.
+              // To prevent this, ignore empty text parts.
+              if (part.text === '') {
+                continue;
+              }
               newPart.text = part.text;
             }
             if (part.functionCall) {
               newPart.functionCall = part.functionCall;
             }
             if (Object.keys(newPart).length === 0) {
-              newPart.text = '';
+              throw new VertexAIError(
+                VertexAIErrorCode.INVALID_CONTENT,
+                'Part should have at least one property, but there are none. This is likely caused ' +
+                  'by a malformed response from the backend.'
+              );
             }
             aggregatedResponse.candidates[i].content.parts.push(
               newPart as Part
diff --git a/scripts/update_vertexai_responses.sh b/scripts/update_vertexai_responses.sh
@@ -17,7 +17,7 @@
 # This script replaces mock response files for Vertex AI unit tests with a fresh
 # clone of the shared repository of Vertex AI test data.
 
-RESPONSES_VERSION='v5.*' # The major version of mock responses to use
+RESPONSES_VERSION='v6.*' # The major version of mock responses to use
 REPO_NAME="vertexai-sdk-test-data"
 REPO_LINK="https://github.com/FirebaseExtended/$REPO_NAME.git"
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'@firebase/vertexai': patch
 +---
++
 +Filter out empty text parts from streaming responses.
Original file line number	Diff line number	Diff line change
`@@ -62,6 +62,7 @@ async function getResponsePromise(`
`62`	`62`	`);`
`63`	`63`	`return enhancedResponse;`
`64`	`64`	`}`
	`65`	`+`
`65`	`66`	`allResponses.push(value);`
`66`	`67`	`}`
`67`	`68`	`}`
`@@ -184,14 +185,24 @@ export function aggregateResponses(`
`184`	`185`	`}`
`185`	`186`	`const newPart: Partial<Part> = {};`
`186`	`187`	`for (const part of candidate.content.parts) {`
`187`		`- if (part.text) {`
	`188`	`+ if (part.text !== undefined) {`
	`189`	`+ // The backend can send empty text parts. If these are sent back`
	`190`	`+ // (e.g. in chat history), the backend will respond with an error.`
	`191`	`+ // To prevent this, ignore empty text parts.`
	`192`	`+ if (part.text === '') {`
	`193`	`+ continue;`
	`194`	`+ }`
`188`	`195`	`newPart.text = part.text;`
`189`	`196`	`}`
`190`	`197`	`if (part.functionCall) {`
`191`	`198`	`newPart.functionCall = part.functionCall;`
`192`	`199`	`}`
`193`	`200`	`if (Object.keys(newPart).length === 0) {`
`194`		`- newPart.text = '';`
	`201`	`+ throw new VertexAIError(`
	`202`	`+ VertexAIErrorCode.INVALID_CONTENT,`
	`203`	`+ 'Part should have at least one property, but there are none. This is likely caused ' +`
	`204`	`+ 'by a malformed response from the backend.'`
	`205`	`+ );`
`195`	`206`	`}`
`196`	`207`	`aggregatedResponse.candidates[i].content.parts.push(`
`197`	`208`	`newPart as Part`