diff --git a/known_models_db/refact_known_models/huggingface.py b/known_models_db/refact_known_models/huggingface.py
index 3fa5e90e..9aa55925 100644
--- a/known_models_db/refact_known_models/huggingface.py
+++ b/known_models_db/refact_known_models/huggingface.py
@@ -2,7 +2,7 @@
     "starcoder/15b/base": {
         "backend": "autogptq",
         "model_path": "TheBloke/starcoder-GPTQ",
-        "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
+        "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
         "chat_scratchpad_class": None,
         "model_class_kwargs": {},
         "required_memory_mb": 18000,
@@ -11,7 +11,7 @@
     "starcoder/15b/plus": {
         "backend": "autogptq",
         "model_path": "TheBloke/starcoderplus-GPTQ",
-        "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
+        "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
         "chat_scratchpad_class": None,
         "model_class_kwargs": {},
         "required_memory_mb": 18000,
@@ -29,7 +29,7 @@
     "wizardcoder/15b": {
         "backend": "autogptq",
         "model_path": "TheBloke/WizardCoder-15B-1.0-GPTQ",
-        "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
+        "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM",
         "chat_scratchpad_class": None,
         "model_class_kwargs": {},
         "required_memory_mb": 18000,
diff --git a/known_models_db/refact_known_models/refact.py b/known_models_db/refact_known_models/refact.py
index cae8bb4d..336386b9 100644
--- a/known_models_db/refact_known_models/refact.py
+++ b/known_models_db/refact_known_models/refact.py
@@ -2,11 +2,11 @@
     "Refact/1.6B": {
         "backend": "transformers",
         "model_path": "smallcloudai/Refact-1_6B-fim",
-        "diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
+        "diff_scratchpad_class": "refact_scratchpads:ScratchpadSPM",
         "chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceRefact",
         "model_class_kwargs": {},
         "required_memory_mb": 6000,
-        "filter_caps": ["Refact", "completion", "finetune"],
+        "filter_caps": ["Refact", "completion"],
     },
 
     "CONTRASTcode/medium/multi": {
diff --git a/refact_scratchpads/__init__.py b/refact_scratchpads/__init__.py
index 4db0e151..43018657 100644
--- a/refact_scratchpads/__init__.py
+++ b/refact_scratchpads/__init__.py
@@ -4,7 +4,8 @@
 
 from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceBase
 from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceCompletion
-from refact_scratchpads.scratchpad_hf import ScratchpadHuggingface
+from refact_scratchpads.scratchpad_hf import ScratchpadSPM
+from refact_scratchpads.scratchpad_hf import ScratchpadPSM
 from refact_scratchpads.scratchpad_hf import ScratchpadCodeLlama
 from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceStarChat
 from refact_scratchpads.scratchpad_hf import ScratchpadHuggingfaceWizard
diff --git a/refact_scratchpads/scratchpad_hf.py b/refact_scratchpads/scratchpad_hf.py
index 470565b4..f7eb55cb 100644
--- a/refact_scratchpads/scratchpad_hf.py
+++ b/refact_scratchpads/scratchpad_hf.py
@@ -1,7 +1,5 @@
 import torch as th
 import time
-import json
-import os
 import termcolor
 
 from refact_scratchpads.scratchpad_utils import trim_context_infill
@@ -33,7 +31,6 @@ def __init__(
         **unused
     ):
         self._tokenizer = tokenizer
-        self._tokenizer_skip_first = bool(tokenizer.encode(""))    # XXX: replace with add_special_tokens=False ?
         self._max_tokens = max_tokens
         self._logger = logger
         self._created = created
@@ -96,14 +93,12 @@ def after_token_selection(self, m, chosen_token: th.Tensor, **unused) -> Dict[st
         return dict()
 
     def _encode_one_token(self, text: str) -> int:
-        tokens = self._tokenizer.encode(text)
-        if self._tokenizer_skip_first:
-            tokens = tokens[1:]
+        tokens = self._tokenizer.encode(text, add_special_tokens=False)
         if len(tokens) != 1:
             raise ValueError(f"Must be single token, have {tokens} for '{text}'")
         return tokens[0]
 
-    def encode_without_special_tokens(self, txt: str) -> List[int]:
+    def _encode_without_special_tokens(self, txt: str) -> List[int]:
         if hasattr(self._tokenizer, "tokenizer_copy_but_does_not_encode_special_tokens"):
             t = self._tokenizer.tokenizer_copy_but_does_not_encode_special_tokens
         else:
@@ -141,7 +136,7 @@ def completion(self, final: bool):
         return {"text": self._tokenizer.decode(self._completion)}
 
 
-class ScratchpadHuggingface(ScratchpadHuggingfaceBase):
+class ScratchpadFIM(ScratchpadHuggingfaceBase):
 
     def __init__(
             self,
@@ -149,7 +144,6 @@ def __init__(
             cursor_file: str,
             cursor0: int,
             cursor1: int,
-            ignore_special_tokens: bool = True,
             **kwargs
     ):
         super().__init__(**kwargs)
@@ -158,7 +152,6 @@ def __init__(
 
         self._cursor_file = cursor_file
         self._cursor = cursor0
-        self._ignore_special_tokens = ignore_special_tokens
         self._code = sources[cursor_file]
 
         self._prefix: Optional[str] = None
@@ -171,6 +164,9 @@ def __init__(
         self._fim_suffix = self._encode_one_token("<fim_suffix>")
         self._fim_middle = self._encode_one_token("<fim_middle>")
 
+    def _prompt_format(self, prefix_tokens, suffix_tokens):
+        raise NotImplementedError()
+
     def prompt(self, T: int):
         self._prefix = self._code[:self._cursor]
         # Why we need to cut the line right of the cursor?
@@ -182,34 +178,23 @@ def prompt(self, T: int):
         #                                        ^^ but we stop here because we need single line completion
         # => we have two closing parenthesis.
         # self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
-        self._suffix = self._code[self._cursor:]
+        self._suffix = self._code[self._cursor:].lstrip(" \t")
         self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
         self._completion.clear()
 
         prefix_cut, suffix_cut = trim_context_infill(
             self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens
         )
+        prefix_cut_tokens = self._encode_without_special_tokens(prefix_cut)
+        suffix_cut_tokens = self._encode_without_special_tokens(suffix_cut)
         self.debuglog(
-            f"ScratchpadHuggingfaceFIM prompt prefix {len(prefix_cut)} chars, "
-            f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}"
+            "ScratchpadFIM prompt prefix %d chars -> %d tokens, suffix %d chars -> %d tokens, T=%d max_new_tokens=%d" %
+            (len(prefix_cut), len(prefix_cut_tokens), len(suffix_cut), len(suffix_cut_tokens), T, self._max_tokens)
         )
-        if self._ignore_special_tokens:
-            prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut)
-            suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut)
-        else:
-            prefix_cut_tokens = self._tokenizer.encode(prefix_cut)
-            suffix_cut_tokens = self._tokenizer.encode(suffix_cut)
-
-        prompt: List[int] = [
-            self._fim_prefix,
-            *prefix_cut_tokens,
-            self._fim_suffix,
-            *suffix_cut_tokens,
-            self._fim_middle,
-        ]
-        # self.debuglog("-"*40)
-        # self.debuglog(self._tokenizer.decode(prompt))
-        # self.debuglog("-"*40)
+        prompt: List[int] = self._prompt_format(prefix_cut_tokens, suffix_cut_tokens)
+        self.debuglog("-"*40)
+        self.debuglog(self._tokenizer.decode(prompt))
+        self.debuglog("-"*40)
         return prompt
 
     def completion(self, final: bool):
@@ -224,88 +209,28 @@ def completion(self, final: bool):
             return {self._cursor_file: self._prefix + completion + self._suffix_line0cut}
 
 
-class ScratchpadRefactFIM(ScratchpadHuggingfaceBase):
-
-    def __init__(
-            self,
-            sources: Dict[str, str],
-            cursor_file: str,
-            cursor0: int,
-            cursor1: int,
-            ignore_special_tokens: bool = True,
-            **kwargs
-    ):
-        super().__init__(**kwargs)
-
-        assert cursor0 == cursor1
-
-        self._cursor_file = cursor_file
-        self._cursor = cursor0
-        self._ignore_special_tokens = ignore_special_tokens
-        self._code = sources[cursor_file]
-
-        self._prefix: Optional[str] = None
-        self._suffix: Optional[str] = None
-        self._suffix_line0cut: Optional[str] = None
-        self._completion = []
-
-        self._tokens_produced = 0
-        self._fim_prefix = self._encode_one_token("<fim_prefix>")
-        self._fim_suffix = self._encode_one_token("<fim_suffix>")
-        self._fim_middle = self._encode_one_token("<fim_middle>")
-
-    def prompt(self, T: int):
-        self._prefix = self._code[:self._cursor]
-        # Why we need to cut the line right of the cursor?
-        # Example 1:
-        # function_call(param1, GENERATED_TONENS<EOF>)
-        # => everything works right
-        # Example 2:
-        # function_call(param1, GENERATED_TONENS)\nMORE_TOKENS\nSOME_OTHER_CALL(OTHER_PARAM<EOF>)
-        #                                        ^^ but we stop here because we need single line completion
-        # => we have two closing parenthesis.
-        # self._suffix = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
-        self._suffix = self._code[self._cursor:]
-        self._suffix_line0cut = "".join(self._code[self._cursor:].splitlines(keepends=True)[1:])
-        self._completion.clear()
-
-        prefix_cut, suffix_cut = trim_context_infill(
-            self._prefix, self._suffix, EncodingWrapper(self._tokenizer), T - self._max_tokens
-        )
-        self.debuglog(
-            f"ScratchpadRefactFIM prompt prefix {len(prefix_cut)} chars, "
-            f"suffix {len(suffix_cut)} chars, T={T} max_tokens={self._max_tokens}"
-        )
-        if self._ignore_special_tokens:
-            prefix_cut_tokens = self.encode_without_special_tokens(prefix_cut)
-            suffix_cut_tokens = self.encode_without_special_tokens(suffix_cut)
-        else:
-            prefix_cut_tokens = self._tokenizer.encode(prefix_cut)
-            suffix_cut_tokens = self._tokenizer.encode(suffix_cut)
+class ScratchpadSPM(ScratchpadFIM):
 
-        prompt: List[int] = [
+    def _prompt_format(self, prefix_tokens, suffix_tokens):
+        return [
             self._fim_suffix,
-            *suffix_cut_tokens,
+            *suffix_tokens,
             self._fim_prefix,
-            *prefix_cut_tokens,
+            *prefix_tokens,
             self._fim_middle,
         ]
-        # self.debuglog("-"*40)
-        # self.debuglog(self._tokenizer.decode(prompt))
-        # self.debuglog("-"*40)
-        return prompt
 
-    def completion(self, final: bool):
-        assert self._prefix is not None
-        assert self._suffix is not None
-        completion = self._tokenizer.decode(self._completion)
-        if self.finish_reason == "eot":
-            # Correct stop
-            return {self._cursor_file: self._prefix + completion + self._suffix}
-        else:
-            # "stop-lf" or "length" or not stopped yet (empty reason), it's better to remove first line remainder
-            return {self._cursor_file: self._prefix + completion + self._suffix_line0cut}
 
+class ScratchpadPSM(ScratchpadFIM):
+
+    def _prompt_format(self, prefix_tokens, suffix_tokens):
+        return [
+            self._fim_prefix,
+            *prefix_tokens,
+            self._fim_suffix,
+            *suffix_tokens,
+            self._fim_middle,
+        ]
 
 
 class ScratchpadCodeLlama(ScratchpadHuggingfaceBase):
@@ -454,14 +379,21 @@ def __init__(self, *args, **kwargs):
         self._esc = "<empty_output>"
 
     def _prompt(self) -> str:
-        text = ""
+        if len(self._messages) <= 2:
+            text = self._esc + ("SYSTEM You are a programming assistant. "
+                                "If you don't understand the question, just say: "
+                                "I don't understand the question.\n")
+        else:
+            # We are ignoring the `system prompt` here 'cause the model
+            # haven't seen more than two messages with a `system prompt` while training
+            # Going to fix this later with the next iteration
+            text = ""
         for message in self._messages:
             if message["content"] == "":
                 continue
             if message["role"] == "user":
-                text += f"{self._esc}USER "
+                text += self._esc + "USER " + message["content"].strip() + "\n"
             else:
-                text += f"{self._esc}ASSISTANT "
-            text += message["content"] + "\n"
-        text += f"{self._esc}ASSISTANT "
+                text += self._esc + "ASSISTANT " + message["content"].strip() + "\n"
+        text += self._esc + "ASSISTANT"
         return text
diff --git a/refact_scratchpads_no_gpu/stream_results.py b/refact_scratchpads_no_gpu/stream_results.py
index e43db73d..0d1d9b4f 100644
--- a/refact_scratchpads_no_gpu/stream_results.py
+++ b/refact_scratchpads_no_gpu/stream_results.py
@@ -1,7 +1,18 @@
-import os, sys, json, re, time, datetime, termcolor, multiprocessing, copy, queue
+import os
+import json
+import re
+import time
+import datetime
+import termcolor
+import multiprocessing
+import copy
+import queue
 import requests
-from typing import Dict, Any, List, Optional, Set
+import setproctitle
 import logging
+
+from typing import Dict, Any, List, Optional, Set
+
 logger = logging.getLogger("INFSERVER")
 
 
@@ -241,7 +252,6 @@ def check_cancelled(self):
 
 
 def _upload_results_loop(upload_q: multiprocessing.Queue, cancelled_q: multiprocessing.Queue):
-    import setproctitle
     setproctitle.setproctitle("upload_results_loop")
     req_session = infserver_session()
     exit_flag = False
diff --git a/self_hosting_machinery/inference/inference_base.py b/self_hosting_machinery/inference/inference_base.py
index d8bf3adc..832ee3c0 100644
--- a/self_hosting_machinery/inference/inference_base.py
+++ b/self_hosting_machinery/inference/inference_base.py
@@ -9,6 +9,8 @@ def modload(import_str):
     import_mod, import_class = import_str.rsplit(":", 1)
     model = importlib.import_module(import_mod)
     Class = getattr(model, import_class, None)
+    if Class is None:
+        raise ValueError("cannot find \"%s\"" % import_str)
     return Class
 
 
diff --git a/self_hosting_machinery/inference/inference_hf.py b/self_hosting_machinery/inference/inference_hf.py
index d11ffca2..df9c9608 100644
--- a/self_hosting_machinery/inference/inference_hf.py
+++ b/self_hosting_machinery/inference/inference_hf.py
@@ -3,6 +3,7 @@
 import time
 import torch
 import traceback
+import termcolor
 
 from auto_gptq import AutoGPTQForCausalLM
 from transformers import AutoModelForCausalLM
@@ -41,15 +42,26 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa
         return False
 
 
-class StopTokenStoppingCriteria(StoppingCriteria):
+class FeedScratchoadCriteria(StoppingCriteria):
 
-    def __init__(self, scratchpad: ScratchpadHuggingfaceBase):
+    def __init__(self, tokenizer, t0: float, scratchpad: ScratchpadHuggingfaceBase):
         StoppingCriteria.__init__(self)
+        self.tokenizer = tokenizer
         self.scratchpad = scratchpad
+        self.t0 = t0
 
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        last_tokens = input_ids[0][-1]
-        self.scratchpad.after_token_selection(None, last_tokens)
+        token = input_ids[0][-1]
+        if DEBUG:
+            def _format(t: str, color: str):
+                return "\"%s\"" % termcolor.colored(t.replace("\n", "\\n").replace("\r", "\\r"), color)
+            text = _format(self.tokenizer.decode([token.item()]), "green")
+            text = text.ljust(40)
+            # for tok, logprob in sorted(logprobs.items(), key=lambda x: -x[-1]):
+            #     text += " %i %s" % (tok, _format(self.tokenizer.decode([tok]), "yellow"))
+            #     text += " %0.2f%%" % (100 * math.exp(logprob))
+            logging.getLogger("MODEL").info("%6.1fms %s" % (1000 * (time.time() - self.t0), text))
+        self.scratchpad.after_token_selection(None, token)
         return bool(self.scratchpad.finish_reason)
 
 
@@ -159,6 +171,7 @@ def logger(*args):
         if not isinstance(T, int) or T <= 0 or T > 4096:
             T = 2048
         p = scratchpad.prompt(T)
+        logger("prompt %i tokens, max_new_tokens %i" % (len(p), request["max_tokens"]))
         if len(p) == 0:
             raise RuntimeError("empty tokens prompt")
 
@@ -166,6 +179,7 @@ def logger(*args):
         return scratchpad, tokens_prompt
 
     def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy_args: Dict):
+        t0 = time.time()
         request_id = request["id"]
         try:
             scratchpad, tokens_prompt = self._prepare_scratchpad(request)
@@ -176,7 +190,7 @@ def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy
             with torch.inference_mode():
                 stopping_criteria = StoppingCriteriaList([
                     CancellationStoppingCriteria(scratchpad, request_id, upload_proxy),
-                    StopTokenStoppingCriteria(scratchpad),
+                    FeedScratchoadCriteria(self._tokenizer, t0, scratchpad),
                 ])
                 streamer = SMCStream(self._tokenizer, request_id, upload_proxy, upload_proxy_args, scratchpad)
                 generation_kwargs = dict(input_ids=tokens_prompt.view(1, *tokens_prompt.shape),
@@ -184,7 +198,10 @@ def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy
                                          max_new_tokens=request["max_tokens"],
                                          stopping_criteria=stopping_criteria,
                                          return_dict_in_generate=True,
-                                         output_scores=True)
+                                         output_scores=True,
+                                         top_p=request.get('top_p', 1.0),
+                                         temperature=request.get('temperature', 0.2))
+
                 self._model.generate(**generation_kwargs)
             if not scratchpad.finish_reason:
                 scratchpad.finish_reason = "maxlen"
diff --git a/self_hosting_machinery/scripts/best_lora.py b/self_hosting_machinery/scripts/best_lora.py
index dfc876fb..88e90338 100644
--- a/self_hosting_machinery/scripts/best_lora.py
+++ b/self_hosting_machinery/scripts/best_lora.py
@@ -68,4 +68,10 @@ def find_best_lora(model_name: str) -> Dict[str, str]:
 
 
 if __name__ == "__main__":
-    print(find_best_lora(default_finetune_model))
+    from argparse import ArgumentParser
+
+    parser = ArgumentParser()
+    parser.add_argument("--model", type=str, default=default_finetune_model)
+    args = parser.parse_args()
+
+    print(find_best_lora(args.model))
diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
index 93f7810d..2dc1702a 100644
--- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
+++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py
@@ -35,7 +35,7 @@ def chat_limit_messages(messages: List[Dict[str, str]]):
         raise HTTPException(status_code=400, detail="No messages")
     while len(messages) > 10:
         del messages[0:2]  # user, assistant
-    while sum([len(m["content"] + m["role"]) for m in messages]) > 4000:
+    while sum([len(m["content"] + m["role"]) for m in messages]) > 8000:
         del messages[0:2]  # user, assistant
     return messages
 
diff --git a/self_hosting_machinery/webgui/static/tab-finetune.html b/self_hosting_machinery/webgui/static/tab-finetune.html
index c91ddedb..7732bf31 100644
--- a/self_hosting_machinery/webgui/static/tab-finetune.html
+++ b/self_hosting_machinery/webgui/static/tab-finetune.html
@@ -1,12 +1,12 @@
 <div class="row">
 
   <div class="col-5">
-    <div class="pane">
+    <div class="pane select-model-pane">
       <h3>Select Model</h3>
       <select id="finetune-model" class="form-select">
       </select>
     </div>
-    <div class="pane">
+    <div class="pane use-model-pane">
       <h3>Use Finetuned Model</h3>
       <div class="lora-group">
         <div class="btn-group" role="group" aria-label="basic radio toggle button group">
@@ -196,7 +196,7 @@ <h5 class="modal-title" id="urlModalLabel">Finetune Settings</h5>
                 <input type="text" data-min="1" data-max="100" class="validate form-control" id="warmup_num_steps">
                 <button class="form-clear-default"><i class="bi bi-arrow-clockwise"></i></button>
                 <div class="form-text">
-                  Max 1, Max 100
+                  Min 1, Max 100
                 </div>
               </div>
             </div>
diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js
index ff243dd5..f7091c44 100644
--- a/self_hosting_machinery/webgui/static/tab-finetune.js
+++ b/self_hosting_machinery/webgui/static/tab-finetune.js
@@ -22,6 +22,9 @@ let fine_tuning_button = null;
 let fine_tuning_settings = null;
 // let checkpoint_name = "best";
 // let selected_model = ""; // we don't have model choice, empty for now
+let use_model_pane = null;
+let select_model_pane = null;
+
 
 function finetine_status() {
     fetch("tab-finetune-get")
@@ -593,12 +596,18 @@ function finetune_status() {
     if(!downloaded_stats.finetune_working_now) {
         switch(downloaded_stats.finetune_filter_stats.status) {
             case 'starting':
-                fine_tuning_pane.classList.add('pane-disabled');       
+                fine_tuning_pane.classList.add('pane-disabled');
+                use_model_pane.classList.add('pane-disabled');
+                select_model_pane.classList.add('pane-disabled');
+                console.log('..starting');
                 break;
             case 'filtering':
                 fine_filter_settings.disabled = true;
                 progress_container.classList.remove('d-none')
                 eta_state.innerHTML = 'ETA: ' + downloaded_stats.finetune_filter_stats.eta_minutes + ' minute(s)';
+                use_model_pane.classList.remove('pane-disabled');
+                select_model_pane.classList.remove('pane-disabled');
+                console.log('..filtering');
                 break;
             case 'failed':
                 document.querySelector('.ftf-error').classList.remove('d-none');
@@ -607,6 +616,8 @@ function finetune_status() {
                 progress_container.classList.add('d-none');
                 ftf_bar.style.width = "0%";
                 fine_filter_settings.disabled = false;
+                use_model_pane.classList.remove('pane-disabled');
+                select_model_pane.classList.remove('pane-disabled');
                 break;
             case 'finished':
                 progress_container.classList.add('d-none');
@@ -616,6 +627,8 @@ function finetune_status() {
                 fine_filter_button.disabled = false;
                 fine_tuning_button.disabled = false;
                 fine_tuning_pane.classList.remove('pane-disabled');
+                use_model_pane.classList.remove('pane-disabled');
+                select_model_pane.classList.remove('pane-disabled');
                 break;
         }
     }
@@ -628,6 +641,8 @@ function finetune_status() {
     // filter working
     if(downloaded_stats.filter_working_now && !downloaded_stats.finetune_working_now) {
         fine_tuning_pane.classList.add('pane-disabled');
+        use_model_pane.classList.add('pane-disabled');
+        select_model_pane.classList.add('pane-disabled');
         if(!fine_filter_button.querySelector('.spinner-border')) {
             fine_filter_button.innerHTML = `<span class="spinner-border spinner-border-sm" role="status" aria-hidden="true"></span></i> Stop filtering`;
         }
@@ -635,7 +650,7 @@ function finetune_status() {
     }
     // both not working
     if(!downloaded_stats.filter_working_now && !downloaded_stats.finetune_working_now) {
-        if(tab_files_data && !tab_files_data.hasOwnProperty('scan_finished') || !tab_files_data.scan_finished) {
+        if(tab_files_data && !tab_files_data.hasOwnProperty('scan_finished') && !tab_files_data.scan_finished) {
             return;
         }
         fine_filter_button.innerHTML = `<i class="bi bi-funnel-fill"></i> Run filter`;
@@ -644,6 +659,8 @@ function finetune_status() {
         fine_filter_button.disabled = false;
         fine_tuning_button.disabled = false;
         fine_tuning_settings.disabled = false;
+        use_model_pane.classList.remove('pane-disabled');
+        select_model_pane.classList.remove('pane-disabled');
     }
 }
 
@@ -805,6 +822,9 @@ export async function init() {
     fine_tuning_button.disabled = true;
     fine_tuning_settings = document.querySelector('.tab-finetune-fine-settings');
 
+    use_model_pane = document.querySelector('.use-model-pane');
+    select_model_pane = document.querySelector('.select-model-pane');
+
     const log_container = document.querySelector('.log-container');
     function handle_auto_scroll() {
         if (log_container.scrollHeight - log_container.scrollTop === log_container.clientHeight) {
diff --git a/self_hosting_machinery/webgui/static/tab-upload.js b/self_hosting_machinery/webgui/static/tab-upload.js
index 72063bf7..6102a178 100644
--- a/self_hosting_machinery/webgui/static/tab-upload.js
+++ b/self_hosting_machinery/webgui/static/tab-upload.js
@@ -317,7 +317,7 @@ function save_filter_setup() {
         }
         let success_toast_div = document.querySelector('.upload-tab-scan-success-toast');
         const success_toast = bootstrap.Toast.getOrCreateInstance(success_toast_div);
-        if(!show_info_toast) {
+        if(!show_info_toast && checked_types.length > 0) {
             show_info_toast = true;
             document.querySelector('.upload-tab-scan-success-toast .toast-body').innerHTML = 'Source files are ready, you can start GPU filtering now.';
             setTimeout(function() {
diff --git a/self_hosting_machinery/webgui/tab_finetune.py b/self_hosting_machinery/webgui/tab_finetune.py
index c9ab357e..5b4ab7e0 100644
--- a/self_hosting_machinery/webgui/tab_finetune.py
+++ b/self_hosting_machinery/webgui/tab_finetune.py
@@ -59,7 +59,7 @@ class TabFinetuneConfig(BaseModel):
 
 class TabFinetuneActivate(BaseModel):
     model: str
-    lora_mode: str = Query(default="default", regex="off|latest-best|specific")
+    lora_mode: str = Query(default="off", regex="off|latest-best|specific")
     specific_lora_run_id: str = Query(default="")
     specific_checkpoint: str = Query(default="")
 
@@ -238,6 +238,8 @@ async def _tab_finetune_remove(self, run_id: str):
         return JSONResponse("OK")
 
     async def _tab_finetune_activate(self, activate: TabFinetuneActivate):
+        active_loras = get_active_loras()
+        active_loras[activate.model] = activate.dict()
         with open(env.CONFIG_ACTIVE_LORA, "w") as f:
-            f.write(activate.json(indent=4))
+            json.dump(active_loras, f, indent=4)
         return JSONResponse("OK")
diff --git a/setup.py b/setup.py
index 9588b496..9e6cbf6c 100644
--- a/setup.py
+++ b/setup.py
@@ -33,21 +33,22 @@ class PyPackage:
         requires=["termcolor", "torch"],
         requires_packages=["refact_encoding", "code_contrast", "refact_scratchpads_no_gpu"]),
     "refact_scratchpads_no_gpu": PyPackage(
-        requires=["termcolor", "aiohttp", "tiktoken", "openai", "ujson"]),
+        requires=["termcolor", "aiohttp", "tiktoken", "openai", "ujson", "setproctitle"]),
     "refact_data_pipeline": PyPackage(
         requires=["numpy", "tokenizers", "torch", "requests", "cloudpickle",
                   "tqdm", "dataclasses_json", "termcolor", 'more_itertools',
                   "cdifflib", "ujson", "zstandard", "scipy", "deepspeed==0.9.2",
                   "peft", "einops", "mpi4py", "matplotlib", "transformers",
-                  "triton==2.0.0.post1", "torchinfo", "giturlparse", "jsonlines"],
+                  "triton==2.0.0.post1", "torchinfo", "giturlparse", "jsonlines",
+                  "binpacking", "filelock", "tables"],
         requires_packages=["refact_encoding", "code_contrast", "self_hosting_machinery",
                            "refact_models"],
         data=["git_command.exp"],
     ),
     "self_hosting_machinery": PyPackage(
         requires=["aiohttp", "cryptography", "fastapi==0.100.0", "giturlparse", "pydantic==1.10.10",
-                  "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.3.2",
-                  "torch", "transformers", "termcolor"],
+                  "starlette==0.27.0", "uvicorn", "uvloop", "python-multipart", "auto-gptq==0.4.2",
+                  "torch", "transformers", "termcolor", "accelerate", "bitsandbytes"],
         requires_packages=["refact_scratchpads", "refact_scratchpads_no_gpu", "refact_models",
                            "known_models_db", "refact_data_pipeline"],
         data=["webgui/static/*", "webgui/static/js/*", "watchdog/watchdog.d/*"]),