-
Notifications
You must be signed in to change notification settings - Fork 2
/
safespace.py
100 lines (79 loc) · 4.6 KB
/
safespace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
__version__ = '1.0.0+mps' # platform specific label
print('Loading...')
import os, sys, requests, readline
from datetime import datetime as dt
from rich.progress import track
from rich.console import Console
console = Console()
os.system('cls' if os.name == 'nt' else 'clear')
default_model = "https://huggingface.co/danlou/safespace-7b-gguf/resolve/main/safespace-1.0-7b-q4_0.gguf"
ctx_size = 4096
def sys_print(text, color='bright_cyan'):
console.print(f"[{color}][bold]>[/bold] {text}[/{color}]", highlight=False)
def user_input(max_len=2048):
console.print('[bright_white][bold]>[/bold] ', end='')
input_str = console.input().strip()
if input_str in {'q', 'quit', 'exit', 'close', 'bye'}:
os.system('cls' if os.name == 'nt' else 'clear')
sys.exit()
elif len(input_str) == 0:
input_str = '(no answer)'
elif len(input_str) > max_len: # truncate if too long
input_str = input_str[:max_len] + '...'
input_str = input_str.replace('ASSISTANT:', 'ASSISTANT') # avoid injections
return input_str
def download_model(url, models_dir='safespace_models/', block_sz=8192):
response = requests.get(url, stream=True)
response.raise_for_status()
model_sz = int(response.headers.get('content-length', 0))
destination = os.path.join(models_dir, url.split('/')[-1])
with open(destination, 'wb') as out_file:
for chunk in track(response.iter_content(block_sz),
description=f"{url.split('/')[-1]} ({round(model_sz / (1024 ** 3), 2)} GB)",
total=model_sz//block_sz):
out_file.write(chunk)
def get_model_path(models_dir='safespace_models/', ext='gguf'): # models_dir in the same path as binary
os.makedirs(models_dir, exist_ok=True)
fns = [os.path.join(models_dir, f) for f in os.listdir(models_dir) if f.endswith(f'.{ext}')]
if len(fns) == 0:
sys_print("No model detected, let's download the default (only needed once).")
download_model(default_model, models_dir)
return os.path.join(models_dir, default_model.split('/')[-1])
return max(fns, key=os.path.getmtime) # use most recent if several found
pct_warned = {pct: False for pct in range(0, 100)}
def warn_session_length(total_tokens):
remaining = ((ctx_size - total_tokens)/ctx_size) * 100
if remaining <= 50 and pct_warned[50] is False:
sys_print('[u]Note[/u]: We are half-way through our session.')
pct_warned[50] = True
elif remaining <= 10 and pct_warned[10] is False:
sys_print('[u]Note[/u]: We are close to the end of our session.')
pct_warned[10] = True
elif remaining <= 1.0:
sys_print('[u]Note[/u]: This session has become too large. Please start again to continue.')
sys.exit()
if __name__ == '__main__':
console.print(f'[bold][bright_cyan]safespace[/bright_cyan][/bold] (v{__version__}) - Private and local AI counseling.', highlight=False)
console.print('Visit https://github.com/danlou/safespace for more details.\n')
if '--force-download' in sys.argv[1:]:
download_model(default_model)
model_path = get_model_path()
with console.status("[bright_cyan]Loading...", spinner='dots', spinner_style='bold bright_cyan'):
from llama_cpp import Llama
llm = Llama(model_path, n_ctx=ctx_size, verbose=False,
use_mlock=True, use_mmap=False) # ~5GB of RAM with default model (depends on platform)
sys_msg = "This app is called safespace, and you are a Rogerian counselor."
sys_msg += " You facilitate an environment in which the user can quickly bring about positive change."
sys_msg += " You do not remember past conversations, and pay close attention to every detail of the current conversation."
sys_msg += f" Time/Date: {dt.now().strftime('%I:%M %p / %d %B %Y')}. There is no internet access."
sys_print("[u]Ready[/u]. Tell me what troubles you ('q' to exit).")
starter = user_input()
prompt = f"{sys_msg} USER: {starter} ASSISTANT: Hi! What would you like to talk about? USER: {starter} ASSISTANT:"
while True: # exits when context size exceeds 99% capacity
with console.status("[bright_cyan]Thinking...", spinner='dots', spinner_style='bold bright_cyan'):
output = llm(prompt, max_tokens=ctx_size, temperature=0.7, echo=True)
model_response = output['choices'][0]['text'].split('ASSISTANT: ')[-1].strip()
sys_print(model_response)
warn_session_length(output['usage']['total_tokens'])
reply = user_input()
prompt = f"{output['choices'][0]['text']} USER: {reply} ASSISTANT:"