Skip to content

Commit

Permalink
Share gpu (#75)
Browse files Browse the repository at this point in the history
* share gpu initial

* list of model statuses for one device

* share gpu + multiple statuses

* gui-backend integration fixes

* add estimated required memory

* low memory alert

* check if models data is ready

* space fix

* jumping status fix

---------

Co-authored-by: oxyplay <max@oxyplay.com>
  • Loading branch information
mitya52 and oxyplay authored Aug 18, 2023
1 parent dc56fa8 commit d09e260
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 75 deletions.
8 changes: 8 additions & 0 deletions known_models_db/refact_known_models/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
"filter_caps": ["completion"],
},
"starcoder/15b/plus": {
Expand All @@ -13,6 +14,7 @@
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
"filter_caps": ["completion"],
},
"starchat/15b/beta": {
Expand All @@ -21,6 +23,7 @@
"diff_scratchpad_class": None,
"chat_scratchpad_class": "refact_scratchpads:ScratchpadHuggingfaceStarChat",
"model_class_kwargs": {},
"required_memory_mb": 18000,
"filter_caps": ["starchat"],
},
"wizardcoder/15b": {
Expand All @@ -29,6 +32,7 @@
"diff_scratchpad_class": "refact_scratchpads:ScratchpadHuggingface",
"chat_scratchpad_class": None,
"model_class_kwargs": {},
"required_memory_mb": 18000,
"filter_caps": ["completion"],
},
"wizardlm/7b": {
Expand All @@ -39,6 +43,7 @@
"model_class_kwargs": {
"model_basename": "wizardlm-7b-v1.0-uncensored-GPTQ-4bit-128g.no-act.order",
},
"required_memory_mb": 8000,
"filter_caps": ["wizardlm"],
},
"wizardlm/13b": {
Expand All @@ -49,6 +54,7 @@
"model_class_kwargs": {
"model_basename": "wizardlm-13b-v1.1-GPTQ-4bit-128g.no-act.order",
},
"required_memory_mb": 14000,
"filter_caps": ["wizardlm"],
},
"llama2/7b": {
Expand All @@ -59,6 +65,7 @@
"model_class_kwargs": {
"model_basename": "gptq_model-4bit-128g",
},
"required_memory_mb": 8000,
"filter_caps": ["llama2"],
},
"llama2/13b": {
Expand All @@ -69,6 +76,7 @@
"model_class_kwargs": {
"model_basename": "gptq_model-4bit-128g",
},
"required_memory_mb": 14000,
"filter_caps": ["llama2"],
},
"wizardlm/30b/4bit": {
Expand Down
2 changes: 2 additions & 0 deletions known_models_db/refact_known_models/refact.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"chat_scratchpad_class": None,
"model_class": "refact_models:CodifyModel",
"T": 2048,
"required_memory_mb": 3500,
"filter_caps": ["CONTRASTcode", "completion"],
},

Expand All @@ -16,6 +17,7 @@
"chat_scratchpad_class": None,
"model_class": "refact_models:CodifyModel",
"T": 2048,
"required_memory_mb": 8500,
"filter_caps": ["CONTRASTcode", "completion", "finetune"],
},

Expand Down
45 changes: 25 additions & 20 deletions self_hosting_machinery/watchdog/docker_watchdog.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import sys
import time
import uuid
from typing import Dict, Optional
from typing import Dict, Optional, List

from self_hosting_machinery import env

Expand Down Expand Up @@ -188,7 +188,7 @@ def maybe_can_start(self):
if can_start:
self._start()
elif "always_on_low_priority" in policy:
can_start = low_priority_can_start(self.cfg.get("gpus", []))
can_start = low_priority_can_start(self)
if can_start:
self._start()
elif "at_night" in policy:
Expand All @@ -197,6 +197,15 @@ def maybe_can_start(self):
if self.start_ts + self.cfg["restart_every"] < time.time():
self._start()

def __str__(self):
return f"TrackedJob:\n" \
f" pid: {self.p.pid if self.p else None}\n" \
f" cmd: '{self.cmdline_str}'\n" \
f" start_ts: {self.start_ts}\n" \
f" cfg: {self.cfg}\n" \
f" shutdown: {self.please_shutdown}\n" \
f" remove: {self.remove_this}\n" \
f" status: {self.status_from_stderr}\n"


tracked: Dict[str, TrackedJob] = {}
Expand Down Expand Up @@ -244,11 +253,13 @@ def preempt_low_priority(gpus):
return can_start


def low_priority_can_start(gpus):
def low_priority_can_start(job: TrackedJob):
can_start = True
for job in tracked.values():
if set(gpus) & set(job.cfg["gpus"]):
if job.p is not None:
for tracked_job in tracked.values():
if job.cfg.get("share_gpu", False) and tracked_job.cfg.get("share_gpu", False):
continue
if set(job.cfg.get("gpus", [])) & set(tracked_job.cfg.get("gpus", [])):
if tracked_job.p is not None:
can_start = False
return can_start

Expand All @@ -258,26 +269,20 @@ def low_priority_can_start(gpus):

def inform_about_gpu_status():
global _inform_about_gpu_status
MAX = 16
gpu_command = [""] * MAX
gpu_status = [""] * MAX
gpu_status: Dict[int, List[Dict]] = {}
for job in tracked.values():
if job.p is None:
continue
for gpu in job.cfg["gpus"]:
if gpu >= 0 and gpu < len(gpu_status):
for gpu in map(int, job.cfg["gpus"]):
if gpu >= 0:
t = job.cmdline_str
if t.startswith("python -m"):
t = t[len("python -m"):]
gpu_command[gpu] = t.strip()
gpu_status[gpu] = job.status_from_stderr
j = {"gpus": [{}]*16}
for i in range(MAX):
j["gpus"][i] = {
"command": gpu_command[i],
"status": gpu_status[i],
}
s = json.dumps(j, indent=4) + "\n"
gpu_status.setdefault(gpu, []).append({
"command": t.strip(),
"status": job.status_from_stderr,
})
s = json.dumps({"gpus": gpu_status}, indent=4) + "\n"
if s != _inform_about_gpu_status:
with open(env.CONFIG_BUSY_GPUS + ".tmp", "w") as f:
f.write(s)
Expand Down
3 changes: 2 additions & 1 deletion self_hosting_machinery/watchdog/watchdog.d/model.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
"command_line": ["python", "-m", "self_hosting_machinery.inference.inference_worker"],
"unfinished": true,
"needs_compile": true,
"gpus": []
"gpus": [],
"share_gpu": false
}
7 changes: 6 additions & 1 deletion self_hosting_machinery/webgui/static/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ h3 {
}
.gpus-item {
display: flex;
justify-content: center;
justify-content: flex-start;
text-align: center;
border: 1px solid rgba(0,0,0,0.15);
border-radius: 8px;
Expand Down Expand Up @@ -626,6 +626,11 @@ h3 {
}
.model-hosting-error {
color: darkred;
text-align: right;
}
.model-memory-error {
color: darkred;
text-align: right;
}
.disabled-group {
opacity: 0.4;
Expand Down
6 changes: 5 additions & 1 deletion self_hosting_machinery/webgui/static/tab-model-hosting.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ <h3>Hosted Models</h3>
<th>Model</th>
<th>Completion</th>
<th>Sharding</th>
<th>Sharing</th>
<th></th>
</tr>
</thead>
Expand All @@ -19,7 +20,10 @@ <h3>Hosted Models</h3>
</table>
<div class="model-hosting-controls">
<button data-bs-toggle="modal" data-bs-target="#add-model-modal" class="btn btn-primary model-hosting-add"><i class="bi bi-plus-lg"></i> Add Model</button>
<div class="model-hosting-error d-none"><i class="bi bi-exclamation-triangle-fill"></i> You have more models selected than GPUs available.</div>
<div>
<div class="model-hosting-error"><i class="bi bi-exclamation-triangle-fill"></i> You have more models selected than GPUs available.</div>
<div class="model-memory-error"><i class="bi bi-exclamation-triangle-fill"></i> Required memory exceeds the GPU's memory.</div>
</div>
</div>
</div>
<div class="pane">
Expand Down
71 changes: 49 additions & 22 deletions self_hosting_machinery/webgui/static/tab-model-hosting.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,37 +34,39 @@ function render_gpus(gpus) {
gpu_mem.classList.add('gpus-mem');
const gpu_temp = document.createElement("div");
gpu_temp.classList.add('gpus-temp');
const gpu_command = document.createElement("div");
gpu_command.classList.add('gpus-command');
const gpu_status = document.createElement("div");
gpu_status.classList.add('gpus-status');

const used_gb = format_memory(element.mem_used_mb);
const total_gb = format_memory(element.mem_total_mb);
const used_mem = Math.round(element.mem_used_mb / (element.mem_total_mb / 100));
gpu_name.innerHTML = element.name;
gpu_mem.innerHTML = `<b>Mem</b><div class="gpus-mem-wrap"><div class="gpus-mem-bar"><span style="width: ${used_mem}%"></span></div>${used_gb}/${total_gb} GB</div>`;
gpu_temp.innerHTML = `<b>Temp</b>` + element.temp_celsius + '°C';
gpu_command.innerHTML = `<span class="gpus-current-status">${element.status}</span>`;
gpu_status.innerHTML += `<div><b>Command</b>${element.command}</div>`;
gpu_status.innerHTML += `<div><b>Status</b>${element.status}</div>`;
gpu_command.appendChild(gpu_status);
gpu_command.addEventListener('mouseover',function(e) {
gpus_popup = true;
this.querySelector('.gpus-status').classList.add('gpus-status-visible');
});
gpu_command.addEventListener('mouseout',function(e) {
gpus_popup = false;
this.querySelector('.gpus-status').classList.remove('gpus-status-visible');
});
if(!element.status || element.status === '') {
gpu_command.classList.add('gpus-status-invisible');
}
row.appendChild(gpu_image);
gpu_wrapper.appendChild(gpu_name);
gpu_wrapper.appendChild(gpu_mem);
gpu_wrapper.appendChild(gpu_temp);
gpu_wrapper.appendChild(gpu_command);
element.statuses.forEach(status => {
const gpu_command = document.createElement("div");
gpu_command.classList.add('gpus-command');
const gpu_status = document.createElement("div");
gpu_status.classList.add('gpus-status');
gpu_command.innerHTML = `<span class="gpus-current-status">${status.status}</span>`;
gpu_status.innerHTML += `<div><b>Command</b>${status.command}</div>`;
gpu_status.innerHTML += `<div><b>Status</b>${status.status}</div>`;
gpu_command.appendChild(gpu_status);
gpu_command.addEventListener('mouseover',function(e) {
gpus_popup = true;
this.querySelector('.gpus-status').classList.add('gpus-status-visible');
});
gpu_command.addEventListener('mouseout',function(e) {
gpus_popup = false;
this.querySelector('.gpus-status').classList.remove('gpus-status-visible');
});
if(!status.status || status.status === '') {
gpu_command.classList.add('gpus-status-invisible');
}
gpu_wrapper.appendChild(gpu_command);
});

row.appendChild(gpu_wrapper);
gpus_list.appendChild(row);
});
Expand All @@ -84,11 +86,17 @@ function get_models()
enable_chat_gpt_switch.checked = models_data['openai_api_enable'];
enable_chat_gpt_switch.addEventListener('change', save_model_assigned);
const more_gpus_notification = document.querySelector('.model-hosting-error');
if(models_data.more_models_than_gpus) {
if(models_data && models_data.length > 0 && models_data.more_models_than_gpus) {
more_gpus_notification.classList.remove('d-none');
} else {
more_gpus_notification.classList.add('d-none');
}
const required_memory_exceed_available = document.querySelector('.model-memory-error');
if(models_data && models_data.length > 0 && models_data.required_memory_exceed_available) {
required_memory_exceed_available.classList.remove('d-none');
} else {
required_memory_exceed_available.classList.add('d-none');
}
});
}

Expand Down Expand Up @@ -187,6 +195,24 @@ function render_models_assigned(models) {
// // models_gpus_change = true;
// });
gpus.appendChild(gpus_input);
const gpus_share = document.createElement("td");
const gpus_checkbox = document.createElement("input");
gpus_checkbox.setAttribute('type','checkbox');
gpus_checkbox.setAttribute('value',index);
gpus_checkbox.setAttribute('name',`share-${index}`);
gpus_checkbox.classList.add('form-check-input');
if(models_data.model_assign[index].share_gpu) {
gpus_checkbox.checked = true;
}
gpus_checkbox.addEventListener('change', function() {
if(this.checked) {
models_data.model_assign[index].share_gpu = true;
} else {
models_data.model_assign[index].share_gpu = false;
}
save_model_assigned();
});
gpus_share.appendChild(gpus_checkbox);
del_button.innerHTML = `<i class="bi bi-trash3-fill"></i>`;
del_button.dataset.model = index;
del_button.addEventListener('click', function() {
Expand All @@ -199,6 +225,7 @@ function render_models_assigned(models) {
row.appendChild(model_name);
row.appendChild(completion);
row.appendChild(select_gpus);
row.appendChild(gpus_share);
row.appendChild(del);
models_table.appendChild(row);
}
Expand Down
Loading

0 comments on commit d09e260

Please sign in to comment.