Skip to content

Commit

Permalink
Fix broken AsyncInferenceClient on [DONE] signal (#2458)
Browse files Browse the repository at this point in the history
* fix trailing newlines

* fix cassettes

* quality
  • Loading branch information
Wauplin committed Aug 19, 2024
1 parent f2fbf5b commit b2261a1
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 43 deletions.
6 changes: 3 additions & 3 deletions src/huggingface_hub/inference/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def _format_text_generation_stream_output(
if not byte_payload.startswith(b"data:"):
return None # empty line

if byte_payload == b"data: [DONE]":
if byte_payload.strip() == b"data: [DONE]":
raise StopIteration("[DONE] signal received.")

# Decode payload
Expand Down Expand Up @@ -344,7 +344,7 @@ def _format_chat_completion_stream_output(
if not byte_payload.startswith(b"data:"):
return None # empty line

if byte_payload == b"data: [DONE]":
if byte_payload.strip() == b"data: [DONE]":
raise StopIteration("[DONE] signal received.")

# Decode payload
Expand All @@ -355,7 +355,7 @@ def _format_chat_completion_stream_output(

async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
async for byte_payload in response.content:
yield byte_payload
yield byte_payload.strip()
await client.close()


Expand Down
53 changes: 22 additions & 31 deletions tests/cassettes/test_async_chat_completion_with_stream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,80 +3,71 @@ interactions:
body: null
headers:
user-agent:
- unknown/None; hf_hub/0.22.0.dev0; python/3.10.12; torch/2.2.0; tensorflow/2.15.0.post1;
- unknown/None; hf_hub/0.25.0.dev0; python/3.10.12; torch/2.3.1; tensorflow/2.17.0;
fastcore/1.5.23
method: POST
uri: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
response:
body:
string: 'data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
string: 'data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"Deep"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
Learning"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
learning"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
is"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
a"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
sub"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"field"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
of"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
Machine"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
machine"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
Learning"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
learning"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{"role":"assistant","content":"
that"},"logprobs":null,"finish_reason":null}]}
data:{"id":"","object":"text_completion","created":1724071633,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"2.0.4-sha-f426a33","choices":[{"index":0,"delta":{"role":"assistant","content":"
that"},"logprobs":null,"finish_reason":"length"}]}
data:{"id":"","object":"text_completion","created":1710439508,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-sha-e6bb3ff","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"length"}]}
'
headers:
Access-Control-Allow-Credentials:
- 'true'
Access-Control-Allow-Origin:
- '*'
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Length:
- '2526'
Content-Type:
- text/event-stream
Date:
- Thu, 14 Mar 2024 18:05:08 GMT
Transfer-Encoding:
- chunked
- Mon, 19 Aug 2024 13:01:29 GMT
Vary:
- origin, Origin, Access-Control-Request-Method, Access-Control-Request-Headers
x-accel-buffering:
- 'no'
x-compute-characters:
- '103'
- Origin, Access-Control-Request-Method, Access-Control-Request-Headers
x-compute-type:
- 1-a10-g
- cache
x-request-id:
- idvh81inTm9FBUT-za5t7
- w9oS4KSPCoEAOi6QV7-cX
x-sha:
- b70e0c9a2d9e14bd1e812d3c398e5f313e93b473
status:
code: 200
message: OK
url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions
version: 1
8 changes: 3 additions & 5 deletions tests/test_inference_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,18 +212,16 @@ async def test_async_chat_completion_with_stream() -> None:

all_items = [item async for item in output]
generated_text = ""
for item in all_items[:-1]: # all but last item
for item in all_items:
assert isinstance(item, ChatCompletionStreamOutput)
assert len(item.choices) == 1
generated_text += item.choices[0].delta.content
last_item = all_items[-1]

assert generated_text == "Deep Learning is a subfield of Machine Learning that"
assert generated_text == "Deep learning is a subfield of machine learning that"

# Last item has a finish reason but no role/content delta
# Last item has a finish reason
assert last_item.choices[0].finish_reason == "length"
assert last_item.choices[0].delta.role is None
assert last_item.choices[0].delta.content is None


@pytest.mark.vcr
Expand Down
10 changes: 6 additions & 4 deletions tests/test_inference_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -974,13 +974,14 @@ def test_chat_completion_base_url_works_with_v1(base_url: str):
assert post_mock.call_args_list[0].kwargs["model"] == "http://0.0.0.0:8080/v1/chat/completions"


def test_stream_text_generation_response():
@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
def test_stream_text_generation_response(stop_signal: bytes):
data = [
b'data: {"index":1,"token":{"id":4560,"text":" trying","logprob":-2.078125,"special":false},"generated_text":null,"details":null}',
b"", # Empty line is skipped
b"\n", # Newline is skipped
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
b"data: [DONE]", # Stop signal
stop_signal, # Stop signal
# Won't parse after
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
]
Expand All @@ -989,13 +990,14 @@ def test_stream_text_generation_response():
assert output == [" trying", " to"]


def test_stream_chat_completion_response():
@pytest.mark.parametrize("stop_signal", [b"data: [DONE]", b"data: [DONE]\n", b"data: [DONE] "])
def test_stream_chat_completion_response(stop_signal: bytes):
data = [
b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":"Both"},"logprobs":null,"finish_reason":null}]}',
b"", # Empty line is skipped
b"\n", # Newline is skipped
b'data: {"object":"chat.completion.chunk","id":"","created":1721737661,"model":"","system_fingerprint":"2.1.2-dev0-sha-5fca30e","choices":[{"index":0,"delta":{"role":"assistant","content":" Rust"},"logprobs":null,"finish_reason":null}]}',
b"data: [DONE]", # Stop signal
stop_signal, # Stop signal
# Won't parse after
b'data: {"index":2,"token":{"id":311,"text":" to","logprob":-0.026245117,"special":false},"generated_text":" trying to","details":null}',
]
Expand Down

0 comments on commit b2261a1

Please sign in to comment.