diff --git a/known_models_db/refact_known_models/huggingface.py b/known_models_db/refact_known_models/huggingface.py index 3fa5e90e..9aa55925 100644 --- a/known_models_db/refact_known_models/huggingface.py +++ b/known_models_db/refact_known_models/huggingface.py @@ -2,7 +2,7 @@ "starcoder/15b/base": { "backend": "autogptq", "model_path": "TheBloke/starcoder-GPTQ", - "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", + "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 18000, @@ -11,7 +11,7 @@ "starcoder/15b/plus": { "backend": "autogptq", "model_path": "TheBloke/starcoderplus-GPTQ", - "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", + "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 18000, @@ -29,7 +29,7 @@ "wizardcoder/15b": { "backend": "autogptq", "model_path": "TheBloke/WizardCoder-15B-1.0-GPTQ", - "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", + "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 18000, diff --git a/known_models_db/refact_known_models/refact.py b/known_models_db/refact_known_models/refact.py index cae8bb4d..336386b9 100644 --- a/known_models_db/refact_known_models/refact.py +++ b/known_models_db/refact_known_models/refact.py @@ -2,11 +2,11 @@ "Refact/1.6B": { "backend": "transformers", "model_path": "smallcloudai/Refact-1_6B-fim", - "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface", + "diff_scratchpad_class": "refact_scratchpads:ScratchpadSPM", "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact", "model_class_kwargs": {}, "required_memory_mb": 6000, - "filter_caps": ["Refact", "completion", "finetune"], + "filter_caps": ["Refact", "completion"], }, "CONTRASTcode/medium/multi": { diff --git a/refact_scratchpads/__init__.py b/refact_scratchpads/__init__.py index 4db0e151..43018657 100644 --- a/refact_scratchpads/__init__.py +++ b/refact_scratchpads/__init__.py @@ -4,7 +4,8 @@ from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceBase from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceCompletion -from refact_scratchpads.scratchpad_hf import ScratchpadHuggingface +from refact_scratchpads.scratchpad_hf import ScratchpadSPM +from refact_scratchpads.scratchpad_hf import ScratchpadPSM from refact_scratchpads.scratchpad_hf import ScratchpadCodeLlama from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceStarChat from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceWizard diff --git a/refact_scratchpads/scratchpad_hf.py b/refact_scratchpads/scratchpad_hf.py index 470565b4..f7eb55cb 100644 --- a/refact_scratchpads/scratchpad_hf.py +++ b/refact_scratchpads/scratchpad_hf.py @@ -1,7 +1,5 @@ import torch as th import time -import json -import os import termcolor from refact_scratchpads.scratchpad_utils import trim_context_infill @@ -33,7 +31,6 @@ def __init__( **unused ): self._tokenizer = tokenizer - self._tokenizer_skip_first = bool(tokenizer.encode("")) # XXX: replace with add_special_tokens=False ? self._max_tokens = max_tokens self._logger = logger self._created = created @@ -96,14 +93,12 @@ def after_token_selection(self, m, chosen_token: th.Tensor, **unused) -> Dict[st return dict() def _encode_one_token(self, text: str) -> int: - tokens = self._tokenizer.encode(text) - if self._tokenizer_skip_first: - tokens = tokens[1:] + tokens = self._tokenizer.encode(text, add_special_tokens=False) if len(tokens) != 1: raise ValueError(f"Must be single token, have {tokens} for '{text}'") return tokens[0] - def encode_without_special_tokens(self, txt: str) -> List[int]: + def _encode_without_special_tokens(self, txt: str) -> List[int]: if hasattr(self._tokenizer, "tokenizer_copy_but_does_not_encode_special_tokens"): t = self._tokenizer.tokenizer_copy_but_does_not_encode_special_tokens else: @@ -141,7 +136,7 @@ def completion(self, final: bool): return {"text": self._tokenizer.decode(self._completion)} -class ScratchpadHuggingface(ScratchpadHuggingfaceBase): +class ScratchpadFIM(ScratchpadHuggingfaceBase): def __init__( self, @@ -149,7 +144,6 @@ def __init__( cursor_file: str, cursor0: int, cursor1: int, - ignore_special_tokens: bool = True, **kwargs ): super().__init__(**kwargs) @@ -158,7 +152,6 @@ def __init__( self._cursor_file = cursor_file self._cursor = cursor0 - self._ignore_special_tokens = ignore_special_tokens self._code = sources[cursor_file] self._prefix: Optional[str] = None @@ -171,6 +164,9 @@ def __init__( self._fim_suffix = self._encode_one_token("") self._fim_middle = self._encode_one_token("") + def _prompt_format(self, prefix_tokens, suffix_tokens): + raise NotImplementedError() + def prompt(self, T: int): self._prefix = self._code[:self._cursor] # Why we need to cut the line right of the cursor? @@ -182,34 +178,23 @@ def prompt(self, T: int): # ^^ but we stop here because we need single line completion # => we have two closing parenthesis. # self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:]) - self._suffix = self._code[self._cursor:] + self._suffix = self._code[self._cursor:].lstrip(" \t") self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:]) self._completion.clear() prefix_cut, suffix_cut = trim_context_infill( self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens ) + prefix_cut_tokens = self._encode_without_special_tokens(prefix_cut) + suffix_cut_tokens = self._encode_without_special_tokens(suffix_cut) self.debuglog( - f"ScratchpadHuggingfaceFIM prompt prefix {len(prefix_cut)} chars, " - f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}" + "ScratchpadFIM prompt prefix %d chars -> %d tokens, suffix %d chars -> %d tokens, T=%d max_new_tokens=%d" % + (len(prefix_cut), len(prefix_cut_tokens), len(suffix_cut), len(suffix_cut_tokens), T, self._max_tokens) ) - if self._ignore_special_tokens: - prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut) - suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut) - else: - prefix_cut_tokens = self._tokenizer.encode(prefix_cut) - suffix_cut_tokens = self._tokenizer.encode(suffix_cut) - - prompt: List[int] = [ - self._fim_prefix, - *prefix_cut_tokens, - self._fim_suffix, - *suffix_cut_tokens, - self._fim_middle, - ] - # self.debuglog("-"*40) - # self.debuglog(self._tokenizer.decode(prompt)) - # self.debuglog("-"*40) + prompt: List[int] = self._prompt_format(prefix_cut_tokens, suffix_cut_tokens) + self.debuglog("-"*40) + self.debuglog(self._tokenizer.decode(prompt)) + self.debuglog("-"*40) return prompt def completion(self, final: bool): @@ -224,88 +209,28 @@ def completion(self, final: bool): return {self._cursor_file: self._prefix + completion + self._suffix_line0cut} -class ScratchpadRefactFIM(ScratchpadHuggingfaceBase): - - def __init__( - self, - sources: Dict[str, str], - cursor_file: str, - cursor0: int, - cursor1: int, - ignore_special_tokens: bool = True, - **kwargs - ): - super().__init__(**kwargs) - - assert cursor0 == cursor1 - - self._cursor_file = cursor_file - self._cursor = cursor0 - self._ignore_special_tokens = ignore_special_tokens - self._code = sources[cursor_file] - - self._prefix: Optional[str] = None - self._suffix: Optional[str] = None - self._suffix_line0cut: Optional[str] = None - self._completion = [] - - self._tokens_produced = 0 - self._fim_prefix = self._encode_one_token("") - self._fim_suffix = self._encode_one_token("") - self._fim_middle = self._encode_one_token("") - - def prompt(self, T: int): - self._prefix = self._code[:self._cursor] - # Why we need to cut the line right of the cursor? - # Example 1: - # function_call(param1, GENERATED_TONENS) - # => everything works right - # Example 2: - # function_call(param1, GENERATED_TONENS)\nMORE_TOKENS\nSOME_OTHER_CALL(OTHER_PARAM) - # ^^ but we stop here because we need single line completion - # => we have two closing parenthesis. - # self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:]) - self._suffix = self._code[self._cursor:] - self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:]) - self._completion.clear() - - prefix_cut, suffix_cut = trim_context_infill( - self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens - ) - self.debuglog( - f"ScratchpadRefactFIM prompt prefix {len(prefix_cut)} chars, " - f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}" - ) - if self._ignore_special_tokens: - prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut) - suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut) - else: - prefix_cut_tokens = self._tokenizer.encode(prefix_cut) - suffix_cut_tokens = self._tokenizer.encode(suffix_cut) +class ScratchpadSPM(ScratchpadFIM): - prompt: List[int] = [ + def _prompt_format(self, prefix_tokens, suffix_tokens): + return [ self._fim_suffix, - *suffix_cut_tokens, + *suffix_tokens, self._fim_prefix, - *prefix_cut_tokens, + *prefix_tokens, self._fim_middle, ] - # self.debuglog("-"*40) - # self.debuglog(self._tokenizer.decode(prompt)) - # self.debuglog("-"*40) - return prompt - def completion(self, final: bool): - assert self._prefix is not None - assert self._suffix is not None - completion = self._tokenizer.decode(self._completion) - if self.finish_reason == "eot": - # Correct stop - return {self._cursor_file: self._prefix + completion + self._suffix} - else: - # "stop-lf" or "length" or not stopped yet (empty reason), it's better to remove first line remainder - return {self._cursor_file: self._prefix + completion + self._suffix_line0cut} +class ScratchpadPSM(ScratchpadFIM): + + def _prompt_format(self, prefix_tokens, suffix_tokens): + return [ + self._fim_prefix, + *prefix_tokens, + self._fim_suffix, + *suffix_tokens, + self._fim_middle, + ] class ScratchpadCodeLlama(ScratchpadHuggingfaceBase): @@ -454,14 +379,21 @@ def __init__(self, *args, **kwargs): self._esc = "" def _prompt(self) -> str: - text = "" + if len(self._messages) <= 2: + text = self._esc + ("SYSTEM You are a programming assistant. " + "If you don't understand the question, just say: " + "I don't understand the question.\n") + else: + # We are ignoring the `system prompt` here 'cause the model + # haven't seen more than two messages with a `system prompt` while training + # Going to fix this later with the next iteration + text = "" for message in self._messages: if message["content"] == "": continue if message["role"] == "user": - text += f"{self._esc}USER " + text += self._esc + "USER " + message["content"].strip() + "\n" else: - text += f"{self._esc}ASSISTANT " - text += message["content"] + "\n" - text += f"{self._esc}ASSISTANT " + text += self._esc + "ASSISTANT " + message["content"].strip() + "\n" + text += self._esc + "ASSISTANT" return text diff --git a/refact_scratchpads_no_gpu/stream_results.py b/refact_scratchpads_no_gpu/stream_results.py index e43db73d..0d1d9b4f 100644 --- a/refact_scratchpads_no_gpu/stream_results.py +++ b/refact_scratchpads_no_gpu/stream_results.py @@ -1,7 +1,18 @@ -import os, sys, json, re, time, datetime, termcolor, multiprocessing, copy, queue +import os +import json +import re +import time +import datetime +import termcolor +import multiprocessing +import copy +import queue import requests -from typing import Dict, Any, List, Optional, Set +import setproctitle import logging + +from typing import Dict, Any, List, Optional, Set + logger = logging.getLogger("INFSERVER") @@ -241,7 +252,6 @@ def check_cancelled(self): def _upload_results_loop(upload_q: multiprocessing.Queue, cancelled_q: multiprocessing.Queue): - import setproctitle setproctitle.setproctitle("upload_results_loop") req_session = infserver_session() exit_flag = False diff --git a/self_hosting_machinery/inference/inference_base.py b/self_hosting_machinery/inference/inference_base.py index d8bf3adc..832ee3c0 100644 --- a/self_hosting_machinery/inference/inference_base.py +++ b/self_hosting_machinery/inference/inference_base.py @@ -9,6 +9,8 @@ def modload(import_str): import_mod, import_class = import_str.rsplit(":", 1) model = importlib.import_module(import_mod) Class = getattr(model, import_class, None) + if Class is None: + raise ValueError("cannot find \"%s\"" % import_str) return Class diff --git a/self_hosting_machinery/inference/inference_hf.py b/self_hosting_machinery/inference/inference_hf.py index d11ffca2..df9c9608 100644 --- a/self_hosting_machinery/inference/inference_hf.py +++ b/self_hosting_machinery/inference/inference_hf.py @@ -3,6 +3,7 @@ import time import torch import traceback +import termcolor from auto_gptq import AutoGPTQForCausalLM from transformers import AutoModelForCausalLM @@ -41,15 +42,26 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa return False -class StopTokenStoppingCriteria(StoppingCriteria): +class FeedScratchoadCriteria(StoppingCriteria): - def __init__(self, scratchpad: ScratchpadHuggingfaceBase): + def __init__(self, tokenizer, t0: float, scratchpad: ScratchpadHuggingfaceBase): StoppingCriteria.__init__(self) + self.tokenizer = tokenizer self.scratchpad = scratchpad + self.t0 = t0 def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: - last_tokens = input_ids[0][-1] - self.scratchpad.after_token_selection(None, last_tokens) + token = input_ids[0][-1] + if DEBUG: + def _format(t: str, color: str): + return "\"%s\"" % termcolor.colored(t.replace("\n", "\\n").replace("\r", "\\r"), color) + text = _format(self.tokenizer.decode([token.item()]), "green") + text = text.ljust(40) + # for tok, logprob in sorted(logprobs.items(), key=lambda x: -x[-1]): + # text += " %i %s" % (tok, _format(self.tokenizer.decode([tok]), "yellow")) + # text += " %0.2f%%" % (100 * math.exp(logprob)) + logging.getLogger("MODEL").info("%6.1fms %s" % (1000 * (time.time() - self.t0), text)) + self.scratchpad.after_token_selection(None, token) return bool(self.scratchpad.finish_reason) @@ -159,6 +171,7 @@ def logger(*args): if not isinstance(T, int) or T <= 0 or T > 4096: T = 2048 p = scratchpad.prompt(T) + logger("prompt %i tokens, max_new_tokens %i" % (len(p), request["max_tokens"])) if len(p) == 0: raise RuntimeError("empty tokens prompt") @@ -166,6 +179,7 @@ def logger(*args): return scratchpad, tokens_prompt def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy_args: Dict): + t0 = time.time() request_id = request["id"] try: scratchpad, tokens_prompt = self._prepare_scratchpad(request) @@ -176,7 +190,7 @@ def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy with torch.inference_mode(): stopping_criteria = StoppingCriteriaList([ CancellationStoppingCriteria(scratchpad, request_id, upload_proxy), - StopTokenStoppingCriteria(scratchpad), + FeedScratchoadCriteria(self._tokenizer, t0, scratchpad), ]) streamer = SMCStream(self._tokenizer, request_id, upload_proxy, upload_proxy_args, scratchpad) generation_kwargs = dict(input_ids=tokens_prompt.view(1, *tokens_prompt.shape), @@ -184,7 +198,10 @@ def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy max_new_tokens=request["max_tokens"], stopping_criteria=stopping_criteria, return_dict_in_generate=True, - output_scores=True) + output_scores=True, + top_p=request.get('top_p', 1.0), + temperature=request.get('temperature', 0.2)) + self._model.generate(**generation_kwargs) if not scratchpad.finish_reason: scratchpad.finish_reason = "maxlen" diff --git a/self_hosting_machinery/scripts/best_lora.py b/self_hosting_machinery/scripts/best_lora.py index dfc876fb..88e90338 100644 --- a/self_hosting_machinery/scripts/best_lora.py +++ b/self_hosting_machinery/scripts/best_lora.py @@ -68,4 +68,10 @@ def find_best_lora(model_name: str) -> Dict[str, str]: if __name__ == "__main__": - print(find_best_lora(default_finetune_model)) + from argparse import ArgumentParser + + parser = ArgumentParser() + parser.add_argument("--model", type=str, default=default_finetune_model) + args = parser.parse_args() + + print(find_best_lora(args.model)) diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py index 93f7810d..2dc1702a 100644 --- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py +++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py @@ -35,7 +35,7 @@ def chat_limit_messages(messages: List[Dict[str, str]]): raise HTTPException(status_code=400, detail="No messages") while len(messages) > 10: del messages[0:2] # user, assistant - while sum([len(m["content"] + m["role"]) for m in messages]) > 4000: + while sum([len(m["content"] + m["role"]) for m in messages]) > 8000: del messages[0:2] # user, assistant return messages diff --git a/self_hosting_machinery/webgui/static/tab-finetune.html b/self_hosting_machinery/webgui/static/tab-finetune.html index c91ddedb..7732bf31 100644 --- a/self_hosting_machinery/webgui/static/tab-finetune.html +++ b/self_hosting_machinery/webgui/static/tab-finetune.html @@ -1,12 +1,12 @@
-
+

Select Model

-
+

Use Finetuned Model

@@ -196,7 +196,7 @@
- Max 1, Max 100 + Min 1, Max 100
diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js index ff243dd5..f7091c44 100644 --- a/self_hosting_machinery/webgui/static/tab-finetune.js +++ b/self_hosting_machinery/webgui/static/tab-finetune.js @@ -22,6 +22,9 @@ let fine_tuning_button = null; let fine_tuning_settings = null; // let checkpoint_name = "best"; // let selected_model = ""; // we don't have model choice, empty for now +let use_model_pane = null; +let select_model_pane = null; + function finetine_status() { fetch("tab-finetune-get") @@ -593,12 +596,18 @@ function finetune_status() { if(!downloaded_stats.finetune_working_now) { switch(downloaded_stats.finetune_filter_stats.status) { case 'starting': - fine_tuning_pane.classList.add('pane-disabled'); + fine_tuning_pane.classList.add('pane-disabled'); + use_model_pane.classList.add('pane-disabled'); + select_model_pane.classList.add('pane-disabled'); + console.log('..starting'); break; case 'filtering': fine_filter_settings.disabled = true; progress_container.classList.remove('d-none') eta_state.innerHTML = 'ETA: ' + downloaded_stats.finetune_filter_stats.eta_minutes + ' minute(s)'; + use_model_pane.classList.remove('pane-disabled'); + select_model_pane.classList.remove('pane-disabled'); + console.log('..filtering'); break; case 'failed': document.querySelector('.ftf-error').classList.remove('d-none'); @@ -607,6 +616,8 @@ function finetune_status() { progress_container.classList.add('d-none'); ftf_bar.style.width = "0%"; fine_filter_settings.disabled = false; + use_model_pane.classList.remove('pane-disabled'); + select_model_pane.classList.remove('pane-disabled'); break; case 'finished': progress_container.classList.add('d-none'); @@ -616,6 +627,8 @@ function finetune_status() { fine_filter_button.disabled = false; fine_tuning_button.disabled = false; fine_tuning_pane.classList.remove('pane-disabled'); + use_model_pane.classList.remove('pane-disabled'); + select_model_pane.classList.remove('pane-disabled'); break; } } @@ -628,6 +641,8 @@ function finetune_status() { // filter working if(downloaded_stats.filter_working_now && !downloaded_stats.finetune_working_now) { fine_tuning_pane.classList.add('pane-disabled'); + use_model_pane.classList.add('pane-disabled'); + select_model_pane.classList.add('pane-disabled'); if(!fine_filter_button.querySelector('.spinner-border')) { fine_filter_button.innerHTML = ` Stop filtering`; } @@ -635,7 +650,7 @@ function finetune_status() { } // both not working if(!downloaded_stats.filter_working_now && !downloaded_stats.finetune_working_now) { - if(tab_files_data && !tab_files_data.hasOwnProperty('scan_finished') || !tab_files_data.scan_finished) { + if(tab_files_data && !tab_files_data.hasOwnProperty('scan_finished') && !tab_files_data.scan_finished) { return; } fine_filter_button.innerHTML = ` Run filter`; @@ -644,6 +659,8 @@ function finetune_status() { fine_filter_button.disabled = false; fine_tuning_button.disabled = false; fine_tuning_settings.disabled = false; + use_model_pane.classList.remove('pane-disabled'); + select_model_pane.classList.remove('pane-disabled'); } } @@ -805,6 +822,9 @@ export async function init() { fine_tuning_button.disabled = true; fine_tuning_settings = document.querySelector('.tab-finetune-fine-settings'); + use_model_pane = document.querySelector('.use-model-pane'); + select_model_pane = document.querySelector('.select-model-pane'); + const log_container = document.querySelector('.log-container'); function handle_auto_scroll() { if (log_container.scrollHeight - log_container.scrollTop === log_container.clientHeight) { diff --git a/self_hosting_machinery/webgui/static/tab-upload.js b/self_hosting_machinery/webgui/static/tab-upload.js index 72063bf7..6102a178 100644 --- a/self_hosting_machinery/webgui/static/tab-upload.js +++ b/self_hosting_machinery/webgui/static/tab-upload.js @@ -317,7 +317,7 @@ function save_filter_setup() { } let success_toast_div = document.querySelector('.upload-tab-scan-success-toast'); const success_toast = bootstrap.Toast.getOrCreateInstance(success_toast_div); - if(!show_info_toast) { + if(!show_info_toast && checked_types.length > 0) { show_info_toast = true; document.querySelector('.upload-tab-scan-success-toast .toast-body').innerHTML = 'Source files are ready, you can start GPU filtering now.'; setTimeout(function() { diff --git a/self_hosting_machinery/webgui/tab_finetune.py b/self_hosting_machinery/webgui/tab_finetune.py index c9ab357e..5b4ab7e0 100644 --- a/self_hosting_machinery/webgui/tab_finetune.py +++ b/self_hosting_machinery/webgui/tab_finetune.py @@ -59,7 +59,7 @@ class TabFinetuneConfig(BaseModel): class TabFinetuneActivate(BaseModel): model: str - lora_mode: str = Query(default="default", regex="off|latest-best|specific") + lora_mode: str = Query(default="off", regex="off|latest-best|specific") specific_lora_run_id: str = Query(default="") specific_checkpoint: str = Query(default="") @@ -238,6 +238,8 @@ async def _tab_finetune_remove(self, run_id: str): return JSONResponse("OK") async def _tab_finetune_activate(self, activate: TabFinetuneActivate): + active_loras = get_active_loras() + active_loras[activate.model] = activate.dict() with open(env.CONFIG_ACTIVE_LORA, "w") as f: - f.write(activate.json(indent=4)) + json.dump(active_loras, f, indent=4) return JSONResponse("OK") diff --git a/setup.py b/setup.py index 9588b496..9e6cbf6c 100644 --- a/setup.py +++ b/setup.py @@ -33,21 +33,22 @@ class PyPackage: requires=["termcolor", "torch"], requires_packages=["refact_encoding", "code_contrast", "refact_scratchpads_no_gpu"]), "refact_scratchpads_no_gpu": PyPackage( - requires=["termcolor", "aiohttp", "tiktoken", "openai", "ujson"]), + requires=["termcolor", "aiohttp", "tiktoken", "openai", "ujson", "setproctitle"]), "refact_data_pipeline": PyPackage( requires=["numpy", "tokenizers", "torch", "requests", "cloudpickle", "tqdm", "dataclasses_json", "termcolor", 'more_itertools', "cdifflib", "ujson", "zstandard", "scipy", "deepspeed==0.9.2", "peft", "einops", "mpi4py", "matplotlib", "transformers", - "triton==2.0.0.post1", "torchinfo", "giturlparse", "jsonlines"], + "triton==2.0.0.post1", "torchinfo", "giturlparse", "jsonlines", + "binpacking", "filelock", "tables"], requires_packages=["refact_encoding", "code_contrast", "self_hosting_machinery", "refact_models"], data=["git_command.exp"], ), "self_hosting_machinery": PyPackage( requires=["aiohttp", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic==1.10.10", - "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.3.2", - "torch", "transformers", "termcolor"], + "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.4.2", + "torch", "transformers", "termcolor", "accelerate", "bitsandbytes"], requires_packages=["refact_scratchpads", "refact_scratchpads_no_gpu", "refact_models", "known_models_db", "refact_data_pipeline"], data=["webgui/static/*", "webgui/static/js/*", "watchdog/watchdog.d/*"]),