Lightning-AI · aniketmaurya · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024 · Nov 11, 2024
@@ -365,7 +365,7 @@ async def chat_completion(self, request: ChatCompletionRequest, background_tasks
         if request.stream:
             return StreamingResponse(
                 self.streaming_completion(request, responses),
-                media_type="application/x-ndjson",
+                media_type="text/event-stream",
                 background=background_tasks,
             )
 
@@ -394,9 +394,9 @@ async def streaming_completion(self, request: ChatCompletionRequest, pipe_respon
 
             # Only use the last item from encode_response
             usage_info = sum(usage_infos)
-            chunk = ChatCompletionChunk(model=model, choices=choices, usage=None).json()
+            chunk = ChatCompletionChunk(model=model, choices=choices, usage=None)
             logger.debug(chunk)
-            yield f"data: {chunk}\n\n"
+            yield f"data: {json.dumps(chunk.model_dump())}\n\n"
 
         choices = [
             ChatCompletionStreamingChoice(
@@ -410,8 +410,8 @@ async def streaming_completion(self, request: ChatCompletionRequest, pipe_respon
             model=model,
             choices=choices,
             usage=usage_info,
-        ).json()
-        yield f"data: {last_chunk}\n\n"
+        )
+        yield f"data: {json.dumps(last_chunk.model_dump())}\n\n"
         yield "data: [DONE]\n\n"
 
     async def non_streaming_completion(self, request: ChatCompletionRequest, generator_list: List[AsyncGenerator]):