Skip to content

Commit

Permalink
Add 1.6b, add fix some other models (#82)
Browse files Browse the repository at this point in the history
* update known models list

* add required model memory and auto torch dtype for huggingface models

* refmove system prompt from refact chat scratchpad

* Refact/1.6B-fim

* fix model regex

* Refact/1.6B name

* context size

---------

Co-authored-by: mitya <dimitry.ageev@gmail.com>
  • Loading branch information
olegklimov and mitya52 authored Aug 31, 2023
1 parent 12b4c1b commit 79e152d
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 32 deletions.
16 changes: 4 additions & 12 deletions known_models_db/refact_known_models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@
"model_path": "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ",
"diff_scratchpad_class": None,
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceWizard",
"model_class_kwargs": {
"model_basename": "wizardlm-7b-v1.0-uncensored-GPTQ-4bit-128g.no-act.order",
},
"model_class_kwargs": {},
"required_memory_mb": 8000,
"filter_caps": ["wizardlm"],
},
Expand All @@ -51,9 +49,7 @@
"model_path": "TheBloke/WizardLM-13B-V1.1-GPTQ",
"diff_scratchpad_class": None,
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceWizard",
"model_class_kwargs": {
"model_basename": "wizardlm-13b-v1.1-GPTQ-4bit-128g.no-act.order",
},
"model_class_kwargs": {},
"required_memory_mb": 14000,
"filter_caps": ["wizardlm"],
},
Expand All @@ -62,9 +58,7 @@
"model_path": "TheBloke/Llama-2-7b-Chat-GPTQ",
"diff_scratchpad_class": None,
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceLlama2",
"model_class_kwargs": {
"model_basename": "gptq_model-4bit-128g",
},
"model_class_kwargs": {},
"required_memory_mb": 8000,
"filter_caps": ["llama2"],
},
Expand All @@ -73,9 +67,7 @@
"model_path": "TheBloke/Llama-2-13B-chat-GPTQ",
"diff_scratchpad_class": None,
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceLlama2",
"model_class_kwargs": {
"model_basename": "gptq_model-4bit-128g",
},
"model_class_kwargs": {},
"required_memory_mb": 14000,
"filter_caps": ["llama2"],
},
Expand Down
20 changes: 10 additions & 10 deletions known_models_db/refact_known_models/refact.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
refact_mini_db = {
"Refact/1.6B": {
"backend": "transformers",
"model_path": "smallcloudai/Refact-1_6B-fim",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact",
"model_class_kwargs": {},
"required_memory_mb": 6000,
"filter_caps": ["Refact", "completion"],
},

"CONTRASTcode/medium/multi": {
"model_path_type": "huggingface",
"model_path": "smallcloudai/codify_medium_multi",
Expand All @@ -20,14 +30,4 @@
"required_memory_mb": 8500,
"filter_caps": ["CONTRASTcode", "completion", "finetune"],
},

"Refact/2b": {
"backend": "transformers",
"model_path": "smallcloudai/Refact-2b",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact",
"model_class_kwargs": {},
"filter_caps": ["Refact", "completion"],
"hidden": True, # only for debugging because model is still training
},
}
12 changes: 5 additions & 7 deletions refact_scratchpads/scratchpad_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,19 +451,17 @@ class ScratchpadHuggingfaceRefact(ScratchpadChatBase):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._esc_token = self._encode_one_token("<empty_output>")
self._esc = "<empty_output>"

def _prompt(self) -> str:
esc = self._tokenizer.decode(self._esc_token)
system_prompt = "You are a chat bot"
text = f"{esc}SYSTEM {system_prompt}\n"
text = ""
for message in self._messages:
if message["content"] == "":
continue
if message["role"] == "user":
text += f"{esc}USER "
text += f"{self._esc}USER "
else:
text += f"{esc}ASSISTANT "
text += f"{self._esc}ASSISTANT "
text += message["content"] + "\n"
text += f"{esc}ASSISTANT "
text += f"{self._esc}ASSISTANT "
return text
10 changes: 7 additions & 3 deletions self_hosting_machinery/inference/inference_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,9 @@ def __init__(self,

if model_dict["backend"] == "transformers":
self._model = AutoModelForCausalLM.from_pretrained(
self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS, device_map="auto",
trust_remote_code=True, **self._model_dict["model_class_kwargs"])
self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS,
device_map="auto", torch_dtype="auto", trust_remote_code=True,
**self._model_dict["model_class_kwargs"])
elif model_dict["backend"] == "autogptq":
self._model = CustomAutoGPTQForCausalLM.from_quantized(
self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS, device=self._device,
Expand All @@ -154,7 +155,10 @@ def logger(*args):
Scratchpad = ScratchpadHuggingfaceCompletion

scratchpad = Scratchpad(tokenizer=self._tokenizer, logger=logger, **request)
p = scratchpad.prompt(self._tokenizer.max_len_single_sentence)
T = self._tokenizer.max_len_single_sentence
if not isinstance(T, int) or T <= 0 or T > 4096:
T = 2048
p = scratchpad.prompt(T)
if len(p) == 0:
raise RuntimeError("empty tokens prompt")

Expand Down

0 comments on commit 79e152d

Please sign in to comment.