fix llama.cpp streaming bug

freckletonj · Dec 26, 2023 · 7e2edc8 · 7e2edc8
1 parent 1e4199c
commit 7e2edc8
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/uniteai/llm_server.py b/uniteai/llm_server.py
@@ -201,14 +201,14 @@ def f(input_ids: torch.LongTensor,
 
     stream = model(
         request.text,
-        max_tokens=128,
+        max_tokens=200,
         stream=True,
         echo=False,  # echo the prompt back as output
         stopping_criteria=stopping_criteria,
     )
 
     for output in stream:
-        if output['choices'][0]['finish_reason'] == 'stop':
+        if output['choices'][0]['finish_reason'] in {'stop', 'length'}:
             streamer.put(None)
         else:
             streamer.put(output['choices'][0]['text'])