From cff63ebed0402a481518db54be18f3c000dddeed Mon Sep 17 00:00:00 2001 From: "Zhang, Tianrong" Date: Thu, 29 Feb 2024 05:54:00 -0500 Subject: [PATCH 1/3] add [DONE] to the end of stream generation for better compatibility --- xinference/api/restful_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py index 05f4e02707..8bda031c1c 100644 --- a/xinference/api/restful_api.py +++ b/xinference/api/restful_api.py @@ -1256,6 +1256,7 @@ async def stream_results(): self.handle_request_limit_error(re) async for item in iterator: yield item + yield "[DONE]" except Exception as ex: logger.exception("Chat completion stream got an error: %s", ex) await self._report_error_event(model_uid, str(ex)) From ac4fe4e8e74f0b02ce57bf3338c25777cbf34c7b Mon Sep 17 00:00:00 2001 From: "Zhang, Tianrong" Date: Fri, 1 Mar 2024 01:22:32 -0500 Subject: [PATCH 2/3] handle [DONE] according to OpenAI convention --- xinference/client/common.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xinference/client/common.py b/xinference/client/common.py index f58dfad108..936574fee6 100644 --- a/xinference/client/common.py +++ b/xinference/client/common.py @@ -43,6 +43,8 @@ def streaming_response_iterator( line = line.strip() if line.startswith(b"data:"): json_str = line[len(b"data:") :].strip() + if json_str == b"[DONE]": + continue data = json.loads(json_str.decode("utf-8")) error = data.get("error") if error is not None: From e99706098a36d1cf63f3658d88430a2ec3881c97 Mon Sep 17 00:00:00 2001 From: "Zhang, Tianrong" Date: Fri, 1 Mar 2024 01:24:02 -0500 Subject: [PATCH 3/3] fix assert statement that always evaluates to true --- xinference/client/tests/test_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xinference/client/tests/test_client.py b/xinference/client/tests/test_client.py index 65ec3651bc..8c7a86f6f0 100644 --- a/xinference/client/tests/test_client.py +++ b/xinference/client/tests/test_client.py @@ -274,7 +274,9 @@ def _check_stream(): generate_config={"stream": True, "max_tokens": 5}, ) for chunk in streaming_response: - assert "content" or "role" in chunk["choices"][0]["delta"] + assert ("content" in chunk["choices"][0]["delta"]) or ( + "role" in chunk["choices"][0]["delta"] + ) _check_stream()