Skip to content

Commit

Permalink
small changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mobius committed Jan 11, 2024
1 parent 7e2edc8 commit 31fa7dd
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ node_modules/


# Misc
test.md
test.txt
/test*.md
/test*.txt
/test*.py
/sandbox
*.log
debug_transcription.wav
t12_outputs
Expand Down
9 changes: 6 additions & 3 deletions uniteai/llm_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
import llama_cpp
import queue
import time

import traceback

device = 'cuda' if torch.cuda.is_available() else 'cpu' # note: this isn't relevant to GGUF models

Expand Down Expand Up @@ -78,7 +78,8 @@ def load_model(args):
from llama_cpp import Llama
model = Llama(
model_path=name_or_path,
verbose=False
verbose=False,
n_ctx=2048,
)
tokenizer = None
return tokenizer, model
Expand Down Expand Up @@ -201,7 +202,8 @@ def f(input_ids: torch.LongTensor,

stream = model(
request.text,
max_tokens=200,
# max_tokens=200,
max_tokens=999999999,
stream=True,
echo=False, # echo the prompt back as output
stopping_criteria=stopping_criteria,
Expand Down Expand Up @@ -250,6 +252,7 @@ def f(input_ids: torch.LongTensor,
try:
model.generate(**generation_kwargs) # blocks
except RuntimeError as e:
traceback.print_exc()
streamer.on_finalized_text(f'\n<LLM SERVER ERROR: {e}>', stream_end=True)
print('DONE GENERATING')

Expand Down

0 comments on commit 31fa7dd

Please sign in to comment.