Skip to content

Commit

Permalink
Interface update & use huggingface models
Browse files Browse the repository at this point in the history
  • Loading branch information
leo4life2 committed Sep 30, 2024
1 parent 73ffde0 commit 425d78d
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 27 deletions.
52 changes: 49 additions & 3 deletions exo/api/chatgpt_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from exo.orchestration import Node
from exo.models import model_base_shards
from typing import Callable
import requests # Add this import


class Message:
Expand Down Expand Up @@ -208,12 +209,41 @@ async def handle_post_chat_completions(self, request):
if DEBUG >= 2: print(f"Handling chat completions request from {request.remote}: {data}")
stream = data.get("stream", False)
chat_request = parse_chat_request(data)

if chat_request.model and chat_request.model.startswith("gpt-"): # to be compatible with ChatGPT tools, point all gpt- model requests to llama instead
chat_request.model = "llama-3.1-8b"
if not chat_request.model or chat_request.model not in model_base_shards:
if DEBUG >= 1: print(f"Invalid model: {chat_request.model}. Supported: {list(model_base_shards.keys())}. Defaulting to llama-3.1-8b")

if not chat_request.model:
if DEBUG >= 1: print(f"No model provided. Defaulting to llama-3.1-8b")
chat_request.model = "llama-3.1-8b"
shard = model_base_shards[chat_request.model].get(self.inference_engine_classname, None)

shard = None
if chat_request.model in model_base_shards:
shard = model_base_shards[chat_request.model].get(self.inference_engine_classname, None)
else:
# HF models
hf_model_url = f"https://huggingface.co/{chat_request.model}"
response = requests.get(hf_model_url)
if response.status_code == 404:
return web.json_response(
{"detail": f"Model {chat_request.model} does not exist on HuggingFace."},
status=400,
)
elif response.status_code == 200:
# Placeholder logic for creating a shard
try:
shard = create_shard_from_hf_model(chat_request.model, self.inference_engine_classname)
except Exception as e:
return web.json_response(
{"detail": f"Error loading model {chat_request.model}: {str(e)}"},
status=500,
)
else:
return web.json_response(
{"detail": f"Error finding model on HuggingFace: {response.status_code}"},
status=500,
)

if not shard:
supported_models = [model for model, engines in model_base_shards.items() if self.inference_engine_classname in engines]
return web.json_response(
Expand Down Expand Up @@ -338,3 +368,19 @@ async def run(self, host: str = "0.0.0.0", port: int = 8000):
await runner.setup()
site = web.TCPSite(runner, host, port)
await site.start()


def create_shard_from_hf_model(model_name: str, inference_engine_classname: str):
config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
response = requests.get(config_url)
response.raise_for_status()
config = response.json()
n_layers = config.get("num_hidden_layers")
if n_layers is None:
raise ValueError("num_hidden_layers not found in config")
if inference_engine_classname.startswith("MLX") and not model_name.startswith("mlx-community"):
raise ValueError("MLX engine only supports mlx models")
if not inference_engine_classname.startswith("MLX") and model_name.startswith("mlx-community"):
raise ValueError("Non-MLX engine cannot use mlx models")

return Shard(model_id=model_name, start_layer=0, end_layer=0, n_layers=n_layers)
42 changes: 40 additions & 2 deletions tinychat/examples/tinychat/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -310,16 +310,39 @@ p {
display: flex;
justify-content: center;
padding: 20px 0;
gap: 10px; /* Add gap between elements */
}
.model-selector select {

.model-selector select,
.model-selector .model-input,
.model-selector .model-add-button {
padding: 10px 20px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 5px;
background-color: #f8f8f8;
cursor: pointer;
}
.model-selector select:focus {

.model-selector .model-input {
flex: 1; /* Make the input take up available space */
}

.model-selector .model-add-button {
background-color: var(--secondary-color);
color: var(--foreground-color);
border: none;
cursor: pointer;
transition: all 0.3s ease;
}

.model-selector .model-add-button:hover {
background-color: var(--secondary-color-transparent);
}

.model-selector select:focus,
.model-selector .model-input:focus,
.model-selector .model-add-button:focus {
outline: none;
border-color: #007bff;
box-shadow: 0 0 0 2px rgba(0,123,255,.25);
Expand Down Expand Up @@ -393,4 +416,19 @@ p {
max-width: 100%;
max-height: 100%;
object-fit: contain;
}

.model-selector .model-remove-button {
background-color: var(--red-color);
color: var(--foreground-color);
border: none;
padding: 10px 20px;
font-size: 16px;
border-radius: 5px;
cursor: pointer;
transition: all 0.3s ease;
}

.model-selector .model-remove-button:hover {
background-color: var(--red-color-transparent);
}
45 changes: 24 additions & 21 deletions tinychat/examples/tinychat/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,29 @@
<div x-show="errorMessage" x-transition.opacity x-text="errorMessage" class="toast">
</div>
<div class="model-selector">
<select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel">
<option selected="" value="llama-3.2-1b">Llama 3.2 1B</option>
<option value="llama-3.2-3b">Llama 3.2 3B</option>
<option value="llama-3.1-8b">Llama 3.1 8B</option>
<option value="llama-3.1-70b">Llama 3.1 70B</option>
<option value="llama-3.1-70b-bf16">Llama 3.1 70B (BF16)</option>
<option value="llama-3.1-405b">Llama 3.1 405B</option>
<option value="llama-3-8b">Llama 3 8B</option>
<option value="llama-3-70b">Llama 3 70B</option>
<option value="mistral-nemo">Mistral Nemo</option>
<option value="mistral-large">Mistral Large</option>
<option value="deepseek-coder-v2-lite">Deepseek Coder V2 Lite</option>
<option value="deepseek-coder-v2.5">Deepseek Coder V2.5</option>
<option value="llava-1.5-7b-hf">LLaVa 1.5 7B (Vision Model)</option>
<option value="qwen-2.5-7b">Qwen 2.5 7B</option>
<option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
<option value="qwen-2.5-14b">Qwen 2.5 14B</option>
<option value="qwen-2.5-72b">Qwen 2.5 72B</option>
<option value="qwen-2.5-math-72b">Qwen 2.5 72B (Math)</option>
</select>
<button @click="removeModel" class="model-remove-button">Remove</button>
<select @change="if (cstate) cstate.selectedModel = $event.target.value" x-model="cstate.selectedModel">
<option selected="" value="llama-3.2-1b">Llama 3.2 1B</option>
<option value="llama-3.2-3b">Llama 3.2 3B</option>
<option value="llama-3.1-8b">Llama 3.1 8B</option>
<option value="llama-3.1-70b">Llama 3.1 70B</option>
<option value="llama-3.1-70b-bf16">Llama 3.1 70B (BF16)</option>
<option value="llama-3.1-405b">Llama 3.1 405B</option>
<option value="llama-3-8b">Llama 3 8B</option>
<option value="llama-3-70b">Llama 3 70B</option>
<option value="mistral-nemo">Mistral Nemo</option>
<option value="mistral-large">Mistral Large</option>
<option value="deepseek-coder-v2-lite">Deepseek Coder V2 Lite</option>
<option value="deepseek-coder-v2.5">Deepseek Coder V2.5</option>
<option value="llava-1.5-7b-hf">LLaVa 1.5 7B (Vision Model)</option>
<option value="qwen-2.5-7b">Qwen 2.5 7B</option>
<option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
<option value="qwen-2.5-14b">Qwen 2.5 14B</option>
<option value="qwen-2.5-72b">Qwen 2.5 72B</option>
<option value="qwen-2.5-math-72b">Qwen 2.5 72B (Math)</option>
</select>
<input type="text" placeholder="Add model" x-model="newModel" class="model-input"/>
<button @click="addModel" class="model-add-button">Add</button>
</div>
<div @popstate.window="
if (home === 2) {
Expand Down Expand Up @@ -198,4 +201,4 @@ <h3 x-text="new Date(_state.time).toLocaleString()"></h3>
</div>
</div>
</main>
</body>
</body>
23 changes: 22 additions & 1 deletion tinychat/examples/tinychat/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,27 @@ document.addEventListener("alpine:init", () => {
}
}
},

newModel: '',

addModel() {
if (this.newModel.trim() !== '') {
const newOption = document.createElement('option');
newOption.value = this.newModel;
newOption.text = this.newModel;
document.querySelector('.model-selector select').appendChild(newOption);
this.newModel = '';
}
},

removeModel() {
const select = document.querySelector('.model-selector select');
const selectedOption = select.options[select.selectedIndex];
if (selectedOption) {
select.removeChild(selectedOption);
this.cstate.selectedModel = select.options.length > 0 ? select.options[0].value : '';
}
},
}));
});

Expand Down Expand Up @@ -413,4 +434,4 @@ function createParser(onParse) {
const BOM = [239, 187, 191];
function hasBom(buffer) {
return BOM.every((charCode, index) => buffer.charCodeAt(index) === charCode);
}
}

0 comments on commit 425d78d

Please sign in to comment.