Skip to content

Commit

Permalink
[Model] Add Llama 3.1 to prebuilt models (mlc-ai#513)
Browse files Browse the repository at this point in the history
Add Llama3.1 8B and 70B to prebuilt models. Update examples from Llama3
to Llama3.1 Llama 3 models are still kept.

Related PRs:
- mlc-ai/binary-mlc-llm-libs#131
- mlc-ai/mlc-llm#2682
  • Loading branch information
CharlieFRuan authored and jzhao62 committed Dec 8, 2024
1 parent 4f59b45 commit e460e3c
Show file tree
Hide file tree
Showing 14 changed files with 110 additions and 44 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ import { CreateMLCEngine } from "@mlc-ai/web-llm";
const initProgressCallback = (initProgress) => {
console.log(initProgress);
}
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine = await CreateMLCEngine(
selectedModel,
Expand Down
4 changes: 2 additions & 2 deletions examples/get-started-web-worker/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async function mainNonStreaming() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine: webllm.MLCEngineInterface =
await webllm.CreateWebWorkerMLCEngine(
Expand Down Expand Up @@ -56,7 +56,7 @@ async function mainStreaming() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine: webllm.MLCEngineInterface =
await webllm.CreateWebWorkerMLCEngine(
Expand Down
10 changes: 5 additions & 5 deletions examples/get-started/src/get_started.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async function main() {
setLabel("init-label", report.text);
};
// Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{
Expand All @@ -32,12 +32,12 @@ async function main() {
// const appConfig: webllm.AppConfig = {
// model_list: [
// {
// model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
// model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
// model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
// model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
// model_lib:
// webllm.modelLibURLPrefix +
// webllm.modelVersion +
// "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
// "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
// overrides: {
// context_window_size: 2048,
// },
Expand All @@ -62,7 +62,7 @@ async function main() {
n: 3,
temperature: 1.5,
max_tokens: 256,
// 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3-8B-Instruct
// 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
// So we would have a higher chance of seeing the latter two, but never the first in the answer
logit_bias: {
"46510": -100,
Expand Down
2 changes: 1 addition & 1 deletion examples/json-mode/src/json_mode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ async function main() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback },
Expand Down
4 changes: 2 additions & 2 deletions examples/json-schema/src/json_schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async function simpleStructuredTextExample() {
setLabel("init-label", report.text);
};
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
"Llama-3-8B-Instruct-q4f16_1-MLC",
"Llama-3.1-8B-Instruct-q4f16_1-MLC",
{ initProgressCallback: initProgressCallback },
);

Expand Down Expand Up @@ -107,7 +107,7 @@ async function harryPotterExample() {
};

const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
"Llama-3-8B-Instruct-q4f16_1-MLC",
"Llama-3.1-8B-Instruct-q4f16_1-MLC",
{ initProgressCallback: initProgressCallback },
);

Expand Down
2 changes: 1 addition & 1 deletion examples/multi-round-chat/src/multi_round_chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async function main() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback },
Expand Down
2 changes: 1 addition & 1 deletion examples/next-simple-chat/src/utils/chat_ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export default class ChatUI {
this.engine.setInitProgressCallback(initProgressCallback);

try {
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
// const selectedModel = "TinyLlama-1.1B-Chat-v0.4-q4f16_1-MLC-1k";
await this.engine.reload(selectedModel);
} catch (err: unknown) {
Expand Down
2 changes: 1 addition & 1 deletion examples/seed-to-reproduce/src/seed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async function main() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback },
Expand Down
4 changes: 2 additions & 2 deletions examples/service-worker/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ async function mainNonStreaming() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine: webllm.MLCEngineInterface =
await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
Expand Down Expand Up @@ -75,7 +75,7 @@ async function mainStreaming() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine: webllm.ServiceWorkerMLCEngine =
await webllm.CreateServiceWorkerMLCEngine(selectedModel, {
Expand Down
2 changes: 1 addition & 1 deletion examples/simple-chat-js/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const messages = [
const availableModels = webllm.prebuiltAppConfig.model_list.map(
(m) => m.model_id,
);
let selectedModel = "Llama-3-8B-Instruct-q4f32_1-1k";
let selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-1k";

// Callback function for initializing progress
function updateEngineInitProgressCallback(report) {
Expand Down
2 changes: 1 addition & 1 deletion examples/streaming/src/streaming.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ async function main() {
const initProgressCallback = (report: webllm.InitProgressReport) => {
setLabel("init-label", report.text);
};
const selectedModel = "Llama-3-8B-Instruct-q4f32_1-MLC";
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
selectedModel,
{ initProgressCallback: initProgressCallback },
Expand Down
92 changes: 79 additions & 13 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -300,51 +300,51 @@ export const prebuiltAppConfig: AppConfig = {
model_list: [
// Llama-3
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC-1k",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
"/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 5295.7,
low_resource_required: true,
overrides: {
context_window_size: 1024,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
"/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 4598.34,
low_resource_required: true,
overrides: {
context_window_size: 1024,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
"/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 6101.01,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f16_1-MLC",
model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3.1-8B-Instruct-q4f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
"/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 5001.0,
low_resource_required: false,
overrides: {
Expand Down Expand Up @@ -1031,7 +1031,73 @@ export const prebuiltAppConfig: AppConfig = {
},
},
// BELOW ARE MODELS OF OLDER VERSIONS OR NOT AS PRACTICAL
// Llama-3 70B
// Llama-3.1 70B
{
model: "https://huggingface.co/mlc-ai/Llama-3.1-70B-Instruct-q3f16_1-MLC",
model_id: "Llama-3.1-70B-Instruct-q3f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3_1-70B-Instruct-q3f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 31153.13,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
// Llama-3
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f32_1-MLC-1k",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 5295.7,
low_resource_required: true,
overrides: {
context_window_size: 1024,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f16_1-MLC-1k",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 4598.34,
low_resource_required: true,
overrides: {
context_window_size: 1024,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f32_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f32_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 6101.01,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-8B-Instruct-q4f16_1-MLC",
model_id: "Llama-3-8B-Instruct-q4f16_1-MLC",
model_lib:
modelLibURLPrefix +
modelVersion +
"/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
vram_required_MB: 5001.0,
low_resource_required: false,
overrides: {
context_window_size: 4096,
},
},
{
model: "https://huggingface.co/mlc-ai/Llama-3-70B-Instruct-q3f16_1-MLC",
model_id: "Llama-3-70B-Instruct-q3f16_1-MLC",
Expand Down
2 changes: 1 addition & 1 deletion src/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ export class MLCEngine implements MLCEngineInterface {
`WARNING: the current maxStorageBufferBindingSize ` +
`(${computeMB(maxStorageBufferBindingSize)}) ` +
`may only work for a limited number of models, e.g.: \n` +
`- Llama-3-8B-Instruct-q4f16_1-MLC-1k \n` +
`- Llama-3.1-8B-Instruct-q4f16_1-MLC-1k \n` +
`- Llama-2-7b-chat-hf-q4f16_1-MLC-1k \n` +
`- RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC-1k \n` +
`- RedPajama-INCITE-Chat-3B-v1-q4f32_1-MLC-1k \n` +
Expand Down
Loading

0 comments on commit e460e3c

Please sign in to comment.