forked from su77ungr/CASALIOY
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.env
45 lines (40 loc) · 1.28 KB
/
example.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# Generic
MODEL_N_CTX=2048
MODEL_MAX_TOKENS=1024
MODEL_STOP=[STOP]
MODEL_TEMP=0.5
MODEL_REPEAT_PENALTY=1.0
MODEL_TOP_K=40
MODEL_TOP_P=0.95
MODEL_NO_REPEAT_NGRAM_SIZE=6
MODEL_N_PREDICT=10000
#MODEL_N_PREDICT=-1
MODEL_VERBOSE=true
MODEL_N_THREADS=16
MODEL_N_BATCH=1000
N_GPU_LAYERS=4
USE_MLOCK=false
# keep model loaded in memory
MODEL_USE_MMAP=false
# LlamaCpp or HF
TEXT_EMBEDDINGS_MODEL_TYPE=HF
TEXT_EMBEDDINGS_MODEL=sentence-transformers/all-MiniLM-L6-v2
# Ingestion
PERSIST_DIRECTORY=db
COLLECTION_NAME=test
DOCUMENTS_DIRECTORY=source_documents
# all-MiniLM-L6-v2 truncates at 256 length of input
INGEST_CHUNK_SIZE=256
INGEST_CHUNK_OVERLAP=50
INGEST_N_THREADS=8
# Generation # GPT4All or LlamaCpp
MODEL_TYPE=LlamaCpp
MODEL_PATH=models/WizardLM-7B-uncensored.ggmlv3.q6_K.bin
#stuff or refine or betterstuff or betterrefine ##### map_reduce # map-rerank
CHAIN_TYPE=betterrefine
# option helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. The default value is 1.1.
# document retrieval
# How many documents to retrieve from the db
N_RETRIEVE_DOCUMENTS=100
# How many documents to forward to the LLM, chosen among those retrieved
N_FORWARD_DOCUMENTS=5