From 79e152dad29770fcde2d61402ab6d3de417cedf8 Mon Sep 17 00:00:00 2001 From: Oleg Klimov Date: Thu, 31 Aug 2023 12:10:48 +0200 Subject: [PATCH] Add 1.6b, add fix some other models (#82) * update known models list * add required model memory and auto torch dtype for huggingface models * refmove system prompt from refact chat scratchpad * Refact/1.6B-fim * fix model regex * Refact/1.6B name * context size --------- Co-authored-by: mitya --- .../refact_known_models/huggingface.py | 16 ++++----------- known_models_db/refact_known_models/refact.py | 20 +++++++++---------- refact_scratchpads/scratchpad_hf.py | 12 +++++------ .../inference/inference_hf.py | 10 +++++++--- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/known_models_db/refact_known_models/huggingface.py b/known_models_db/refact_known_models/huggingface.py index 1bbd6912..3fa5e90e 100644 --- a/known_models_db/refact_known_models/huggingface.py +++ b/known_models_db/refact_known_models/huggingface.py @@ -40,9 +40,7 @@ "model_path": "TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ", "diff_scratchpad_class": None, "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceWizard", - "model_class_kwargs": { - "model_basename": "wizardlm-7b-v1.0-uncensored-GPTQ-4bit-128g.no-act.order", - }, + "model_class_kwargs": {}, "required_memory_mb": 8000, "filter_caps": ["wizardlm"], }, @@ -51,9 +49,7 @@ "model_path": "TheBloke/WizardLM-13B-V1.1-GPTQ", "diff_scratchpad_class": None, "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceWizard", - "model_class_kwargs": { - "model_basename": "wizardlm-13b-v1.1-GPTQ-4bit-128g.no-act.order", - }, + "model_class_kwargs": {}, "required_memory_mb": 14000, "filter_caps": ["wizardlm"], }, @@ -62,9 +58,7 @@ "model_path": "TheBloke/Llama-2-7b-Chat-GPTQ", "diff_scratchpad_class": None, "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceLlama2", - "model_class_kwargs": { - "model_basename": "gptq_model-4bit-128g", - }, + "model_class_kwargs": {}, "required_memory_mb": 8000, "filter_caps": ["llama2"], }, @@ -73,9 +67,7 @@ "model_path": "TheBloke/Llama-2-13B-chat-GPTQ", "diff_scratchpad_class": None, "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceLlama2", - "model_class_kwargs": { - "model_basename": "gptq_model-4bit-128g", - }, + "model_class_kwargs": {}, "required_memory_mb": 14000, "filter_caps": ["llama2"], }, diff --git a/known_models_db/refact_known_models/refact.py b/known_models_db/refact_known_models/refact.py index 32cbc7e3..35bd2a12 100644 --- a/known_models_db/refact_known_models/refact.py +++ b/known_models_db/refact_known_models/refact.py @@ -1,4 +1,14 @@ refact_mini_db = { + "Refact/1.6B": { + "backend": "transformers", + "model_path": "smallcloudai/Refact-1_6B-fim", + "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", + "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact", + "model_class_kwargs": {}, + "required_memory_mb": 6000, + "filter_caps": ["Refact", "completion"], + }, + "CONTRASTcode/medium/multi": { "model_path_type": "huggingface", "model_path": "smallcloudai/codify_medium_multi", @@ -20,14 +30,4 @@ "required_memory_mb": 8500, "filter_caps": ["CONTRASTcode", "completion", "finetune"], }, - - "Refact/2b": { - "backend": "transformers", - "model_path": "smallcloudai/Refact-2b", - "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", - "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact", - "model_class_kwargs": {}, - "filter_caps": ["Refact", "completion"], - "hidden": True, # only for debugging because model is still training - }, } diff --git a/refact_scratchpads/scratchpad_hf.py b/refact_scratchpads/scratchpad_hf.py index 085660e2..470565b4 100644 --- a/refact_scratchpads/scratchpad_hf.py +++ b/refact_scratchpads/scratchpad_hf.py @@ -451,19 +451,17 @@ class ScratchpadHuggingfaceRefact(ScratchpadChatBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._esc_token = self._encode_one_token("") + self._esc = "" def _prompt(self) -> str: - esc = self._tokenizer.decode(self._esc_token) - system_prompt = "You are a chat bot" - text = f"{esc}SYSTEM {system_prompt}\n" + text = "" for message in self._messages: if message["content"] == "": continue if message["role"] == "user": - text += f"{esc}USER " + text += f"{self._esc}USER " else: - text += f"{esc}ASSISTANT " + text += f"{self._esc}ASSISTANT " text += message["content"] + "\n" - text += f"{esc}ASSISTANT " + text += f"{self._esc}ASSISTANT " return text diff --git a/self_hosting_machinery/inference/inference_hf.py b/self_hosting_machinery/inference/inference_hf.py index 25502709..d11ffca2 100644 --- a/self_hosting_machinery/inference/inference_hf.py +++ b/self_hosting_machinery/inference/inference_hf.py @@ -128,8 +128,9 @@ def __init__(self, if model_dict["backend"] == "transformers": self._model = AutoModelForCausalLM.from_pretrained( - self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS, device_map="auto", - trust_remote_code=True, **self._model_dict["model_class_kwargs"]) + self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS, + device_map="auto", torch_dtype="auto", trust_remote_code=True, + **self._model_dict["model_class_kwargs"]) elif model_dict["backend"] == "autogptq": self._model = CustomAutoGPTQForCausalLM.from_quantized( self._model_dict["model_path"], cache_dir=env.DIR_WEIGHTS, device=self._device, @@ -154,7 +155,10 @@ def logger(*args): Scratchpad = ScratchpadHuggingfaceCompletion scratchpad = Scratchpad(tokenizer=self._tokenizer, logger=logger, **request) - p = scratchpad.prompt(self._tokenizer.max_len_single_sentence) + T = self._tokenizer.max_len_single_sentence + if not isinstance(T, int) or T <= 0 or T > 4096: + T = 2048 + p = scratchpad.prompt(T) if len(p) == 0: raise RuntimeError("empty tokens prompt")