Skip to content

Commit

Permalink
Finetune and models fixes (#85)
Browse files Browse the repository at this point in the history
* model finetune activate new format

* typos, msgs, panel block

* lora mode off by default, best lora script args

* add missed requirements

* fixed refact1.6 chat weird behaviour

* add system prompt to somehow handle out-of-distribution answers

* added top3-like logs (no top3 tokens though)

* add missing import

* 1.6b chat prompt, strip() in messages

* ScratchpadHuggingface -> ScratchpadFIM, remove copy-paste

* fim spad refactoring

* tokenizer for one token refactoring

* temporary disable finetune property for refact model

* ignoring the `system prompt` in some cases for refact1.6b model

* ignoring the `system prompt` in some cases for refact1.6b model

* fix required modules

---------

Co-authored-by: oxyplay <max@oxyplay.com>
Co-authored-by: JegernOUTT <sergey.vakhreev@gmail.com>
Co-authored-by: Oleg Klimov <omgtech@gmail.com>
  • Loading branch information
4 people authored Sep 2, 2023
1 parent 228cb97 commit 7431a01
Show file tree
Hide file tree
Showing 14 changed files with 130 additions and 139 deletions.
6 changes: 3 additions & 3 deletions known_models_db/refact_known_models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"starcoder/15b/base": {
"backend": "autogptq",
"model_path": "TheBloke/starcoder-GPTQ",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
Expand All @@ -11,7 +11,7 @@
"starcoder/15b/plus": {
"backend": "autogptq",
"model_path": "TheBloke/starcoderplus-GPTQ",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
Expand All @@ -29,7 +29,7 @@
"wizardcoder/15b": {
"backend": "autogptq",
"model_path": "TheBloke/WizardCoder-15B-1.0-GPTQ",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
Expand Down
4 changes: 2 additions & 2 deletions known_models_db/refact_known_models/refact.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
"Refact/1.6B": {
"backend": "transformers",
"model_path": "smallcloudai/Refact-1_6B-fim",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"diff_scratchpad_class": "refact_scratchpads:ScratchpadSPM",
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact",
"model_class_kwargs": {},
"required_memory_mb": 6000,
"filter_caps": ["Refact", "completion", "finetune"],
"filter_caps": ["Refact", "completion"],
},

"CONTRASTcode/medium/multi": {
Expand Down
3 changes: 2 additions & 1 deletion refact_scratchpads/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceBase
from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceCompletion
from refact_scratchpads.scratchpad_hf import ScratchpadHuggingface
from refact_scratchpads.scratchpad_hf import ScratchpadSPM
from refact_scratchpads.scratchpad_hf import ScratchpadPSM
from refact_scratchpads.scratchpad_hf import ScratchpadCodeLlama
from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceStarChat
from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceWizard
Expand Down
152 changes: 42 additions & 110 deletions refact_scratchpads/scratchpad_hf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import torch as th
import time
import json
import os
import termcolor

from refact_scratchpads.scratchpad_utils import trim_context_infill
Expand Down Expand Up @@ -33,7 +31,6 @@ def __init__(
**unused
):
self._tokenizer = tokenizer
self._tokenizer_skip_first = bool(tokenizer.encode("")) # XXX: replace with add_special_tokens=False ?
self._max_tokens = max_tokens
self._logger = logger
self._created = created
Expand Down Expand Up @@ -96,14 +93,12 @@ def after_token_selection(self, m, chosen_token: th.Tensor, **unused) -> Dict[st
return dict()

def _encode_one_token(self, text: str) -> int:
tokens = self._tokenizer.encode(text)
if self._tokenizer_skip_first:
tokens = tokens[1:]
tokens = self._tokenizer.encode(text, add_special_tokens=False)
if len(tokens) != 1:
raise ValueError(f"Must be single token, have {tokens} for '{text}'")
return tokens[0]

def encode_without_special_tokens(self, txt: str) -> List[int]:
def _encode_without_special_tokens(self, txt: str) -> List[int]:
if hasattr(self._tokenizer, "tokenizer_copy_but_does_not_encode_special_tokens"):
t = self._tokenizer.tokenizer_copy_but_does_not_encode_special_tokens
else:
Expand Down Expand Up @@ -141,15 +136,14 @@ def completion(self, final: bool):
return {"text": self._tokenizer.decode(self._completion)}


class ScratchpadHuggingface(ScratchpadHuggingfaceBase):
class ScratchpadFIM(ScratchpadHuggingfaceBase):

def __init__(
self,
sources: Dict[str, str],
cursor_file: str,
cursor0: int,
cursor1: int,
ignore_special_tokens: bool = True,
**kwargs
):
super().__init__(**kwargs)
Expand All @@ -158,7 +152,6 @@ def __init__(

self._cursor_file = cursor_file
self._cursor = cursor0
self._ignore_special_tokens = ignore_special_tokens
self._code = sources[cursor_file]

self._prefix: Optional[str] = None
Expand All @@ -171,6 +164,9 @@ def __init__(
self._fim_suffix = self._encode_one_token("<fim_suffix>")
self._fim_middle = self._encode_one_token("<fim_middle>")

def _prompt_format(self, prefix_tokens, suffix_tokens):
raise NotImplementedError()

def prompt(self, T: int):
self._prefix = self._code[:self._cursor]
# Why we need to cut the line right of the cursor?
Expand All @@ -182,34 +178,23 @@ def prompt(self, T: int):
# ^^ but we stop here because we need single line completion
# => we have two closing parenthesis.
# self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
self._suffix = self._code[self._cursor:]
self._suffix = self._code[self._cursor:].lstrip(" \t")
self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
self._completion.clear()

prefix_cut, suffix_cut = trim_context_infill(
self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens
)
prefix_cut_tokens = self._encode_without_special_tokens(prefix_cut)
suffix_cut_tokens = self._encode_without_special_tokens(suffix_cut)
self.debuglog(
f"ScratchpadHuggingfaceFIM prompt prefix {len(prefix_cut)} chars, "
f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}"
"ScratchpadFIM prompt prefix %d chars -> %d tokens, suffix %d chars -> %d tokens, T=%d max_new_tokens=%d" %
(len(prefix_cut), len(prefix_cut_tokens), len(suffix_cut), len(suffix_cut_tokens), T, self._max_tokens)
)
if self._ignore_special_tokens:
prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut)
suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut)
else:
prefix_cut_tokens = self._tokenizer.encode(prefix_cut)
suffix_cut_tokens = self._tokenizer.encode(suffix_cut)

prompt: List[int] = [
self._fim_prefix,
*prefix_cut_tokens,
self._fim_suffix,
*suffix_cut_tokens,
self._fim_middle,
]
# self.debuglog("-"*40)
# self.debuglog(self._tokenizer.decode(prompt))
# self.debuglog("-"*40)
prompt: List[int] = self._prompt_format(prefix_cut_tokens, suffix_cut_tokens)
self.debuglog("-"*40)
self.debuglog(self._tokenizer.decode(prompt))
self.debuglog("-"*40)
return prompt

def completion(self, final: bool):
Expand All @@ -224,88 +209,28 @@ def completion(self, final: bool):
return {self._cursor_file: self._prefix + completion + self._suffix_line0cut}


class ScratchpadRefactFIM(ScratchpadHuggingfaceBase):

def __init__(
self,
sources: Dict[str, str],
cursor_file: str,
cursor0: int,
cursor1: int,
ignore_special_tokens: bool = True,
**kwargs
):
super().__init__(**kwargs)

assert cursor0 == cursor1

self._cursor_file = cursor_file
self._cursor = cursor0
self._ignore_special_tokens = ignore_special_tokens
self._code = sources[cursor_file]

self._prefix: Optional[str] = None
self._suffix: Optional[str] = None
self._suffix_line0cut: Optional[str] = None
self._completion = []

self._tokens_produced = 0
self._fim_prefix = self._encode_one_token("<fim_prefix>")
self._fim_suffix = self._encode_one_token("<fim_suffix>")
self._fim_middle = self._encode_one_token("<fim_middle>")

def prompt(self, T: int):
self._prefix = self._code[:self._cursor]
# Why we need to cut the line right of the cursor?
# Example 1:
# function_call(param1, GENERATED_TONENS<EOF>)
# => everything works right
# Example 2:
# function_call(param1, GENERATED_TONENS)\nMORE_TOKENS\nSOME_OTHER_CALL(OTHER_PARAM<EOF>)
# ^^ but we stop here because we need single line completion
# => we have two closing parenthesis.
# self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
self._suffix = self._code[self._cursor:]
self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
self._completion.clear()

prefix_cut, suffix_cut = trim_context_infill(
self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens
)
self.debuglog(
f"ScratchpadRefactFIM prompt prefix {len(prefix_cut)} chars, "
f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}"
)
if self._ignore_special_tokens:
prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut)
suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut)
else:
prefix_cut_tokens = self._tokenizer.encode(prefix_cut)
suffix_cut_tokens = self._tokenizer.encode(suffix_cut)
class ScratchpadSPM(ScratchpadFIM):

prompt: List[int] = [
def _prompt_format(self, prefix_tokens, suffix_tokens):
return [
self._fim_suffix,
*suffix_cut_tokens,
*suffix_tokens,
self._fim_prefix,
*prefix_cut_tokens,
*prefix_tokens,
self._fim_middle,
]
# self.debuglog("-"*40)
# self.debuglog(self._tokenizer.decode(prompt))
# self.debuglog("-"*40)
return prompt

def completion(self, final: bool):
assert self._prefix is not None
assert self._suffix is not None
completion = self._tokenizer.decode(self._completion)
if self.finish_reason == "eot":
# Correct stop
return {self._cursor_file: self._prefix + completion + self._suffix}
else:
# "stop-lf" or "length" or not stopped yet (empty reason), it's better to remove first line remainder
return {self._cursor_file: self._prefix + completion + self._suffix_line0cut}

class ScratchpadPSM(ScratchpadFIM):

def _prompt_format(self, prefix_tokens, suffix_tokens):
return [
self._fim_prefix,
*prefix_tokens,
self._fim_suffix,
*suffix_tokens,
self._fim_middle,
]


class ScratchpadCodeLlama(ScratchpadHuggingfaceBase):
Expand Down Expand Up @@ -454,14 +379,21 @@ def __init__(self, *args, **kwargs):
self._esc = "<empty_output>"

def _prompt(self) -> str:
text = ""
if len(self._messages) <= 2:
text = self._esc + ("SYSTEM You are a programming assistant. "
"If you don't understand the question, just say: "
"I don't understand the question.\n")
else:
# We are ignoring the `system prompt` here 'cause the model
# haven't seen more than two messages with a `system prompt` while training
# Going to fix this later with the next iteration
text = ""
for message in self._messages:
if message["content"] == "":
continue
if message["role"] == "user":
text += f"{self._esc}USER "
text += self._esc + "USER " + message["content"].strip() + "\n"
else:
text += f"{self._esc}ASSISTANT "
text += message["content"] + "\n"
text += f"{self._esc}ASSISTANT "
text += self._esc + "ASSISTANT " + message["content"].strip() + "\n"
text += self._esc + "ASSISTANT"
return text
16 changes: 13 additions & 3 deletions refact_scratchpads_no_gpu/stream_results.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
import os, sys, json, re, time, datetime, termcolor, multiprocessing, copy, queue
import os
import json
import re
import time
import datetime
import termcolor
import multiprocessing
import copy
import queue
import requests
from typing import Dict, Any, List, Optional, Set
import setproctitle
import logging

from typing import Dict, Any, List, Optional, Set

logger = logging.getLogger("INFSERVER")


Expand Down Expand Up @@ -241,7 +252,6 @@ def check_cancelled(self):


def _upload_results_loop(upload_q: multiprocessing.Queue, cancelled_q: multiprocessing.Queue):
import setproctitle
setproctitle.setproctitle("upload_results_loop")
req_session = infserver_session()
exit_flag = False
Expand Down
2 changes: 2 additions & 0 deletions self_hosting_machinery/inference/inference_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def modload(import_str):
import_mod, import_class = import_str.rsplit(":", 1)
model = importlib.import_module(import_mod)
Class = getattr(model, import_class, None)
if Class is None:
raise ValueError("cannot find \"%s\"" % import_str)
return Class


Expand Down
Loading

0 comments on commit 7431a01

Please sign in to comment.