xorbitsai · ChengjieLi28 · Mar 7, 2024 · Feb 29, 2024 · Mar 1, 2024 · Mar 1, 2024
diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
@@ -1256,6 +1256,7 @@ async def stream_results():
                         self.handle_request_limit_error(re)
                     async for item in iterator:
                         yield item
+                    yield "[DONE]"
                 except Exception as ex:
                     logger.exception("Chat completion stream got an error: %s", ex)
                     await self._report_error_event(model_uid, str(ex))

diff --git a/xinference/client/common.py b/xinference/client/common.py
@@ -43,6 +43,8 @@ def streaming_response_iterator(
         line = line.strip()
         if line.startswith(b"data:"):
             json_str = line[len(b"data:") :].strip()
+            if json_str == b"[DONE]":
+                continue
             data = json.loads(json_str.decode("utf-8"))
             error = data.get("error")
             if error is not None:

diff --git a/xinference/client/tests/test_client.py b/xinference/client/tests/test_client.py
@@ -274,7 +274,9 @@ def _check_stream():
             generate_config={"stream": True, "max_tokens": 5},
         )
         for chunk in streaming_response:
-            assert "content" or "role" in chunk["choices"][0]["delta"]
+            assert ("content" in chunk["choices"][0]["delta"]) or (
+                "role" in chunk["choices"][0]["delta"]
+            )
 
     _check_stream()