mlc-ai · tqchen · Dec 19, 2023 · Dec 17, 2023 · Dec 18, 2023
diff --git a/README.md b/README.md
@@ -120,7 +120,7 @@ To generate the wasm needed by WebLLM, you can run with `--target webgpu` in the
 There are two elements of the WebLLM package that enables new models and weight variants.
 
 - model_url: Contains a URL to model artifacts, such as weights and meta-data.
-- model_lib: The web assembly libary that contains the executables to accelerate the model computations.
+- model_lib_url: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations.
 
 Both are customizable in the WebLLM.
 
@@ -132,11 +132,9 @@ async main() {
       {
         "model_url": myLlamaUrl,
         "local_id": "MyLlama-3b-v1-q4f32_0"
+        "model_lib_url": "/url/to/myllama3b.wasm",
       }
     ],
-    "model_lib_map": {
-      "llama-v1-3b-q4f32_0": "/url/to/myllama3b.wasm",
-    }
   };
   // override default
   const chatOpts = {
@@ -148,22 +146,23 @@ async main() {
   // with a chat option override and app config
   // under the hood, it will load the model from myLlamaUrl
   // and cache it in the browser cache
-  //
-  // Let us assume that myLlamaUrl/mlc-config.json contains a model_lib
-  // field that points to "llama-v1-3b-q4f32_0"
-  // then chat module will initialize with these information
+  // The chat will also load the model library from "/url/to/myllama3b.wasm",
+  // assuming that it is compatible to the model in myLlamaUrl.
   await chat.reload("MyLlama-3b-v1-q4f32_0", chatOpts, appConfig);
 }
 ```
 
 In many cases, we only want to supply the model weight variant, but
-not necessarily a new model. In such cases, we can reuse the model lib.
-In such cases, we can just pass in the `model_list` field and skip the model lib,
-and make sure the `mlc-chat-config.json` in the model url has a model lib
-that points to a prebuilt version, right now the prebuilt lib includes
-
-- `Llama-2-7b-chat-hf-q4f32_1`: llama-7b models.
-- `RedPajama-INCITE-Chat-3B-v1-q4f32_1`: RedPajama-3B variant.
+not necessarily a new model (e.g. `NeuralHermes-Mistral` can reuse `Mistral`'s
+model library; `WizardMath` can reuse `Llama-2`'s model library). For
+an example of how a model library is shared by different model variants,
+see `examples/simple-chat/src/gh-config.js`. We also provide
+a plethora of prebuilt model libraries, including:
+
+- `Llama-2-7b-chat-hf-q4f32_1`: Llama-7b models.
+- `RedPajama-INCITE-Chat-3B-v1-q4f32_1`: RedPajama-3B variants.
+- `Mistral-7B-Instruct-v0.1-q4f16_1`: Mistral-7B variants.
+- and many more at [binary-mlc-llm-libs](https://github.com/mlc-ai/binary-mlc-llm-libs).
 
 ## Use WebLLM Package
 

diff --git a/examples/chrome-extension/src/popup.ts b/examples/chrome-extension/src/popup.ts
@@ -5,8 +5,8 @@
 
 import './popup.css';
 
-import {ChatModule, AppConfig, InitProgressReport} from "@mlc-ai/web-llm";
-import {ProgressBar, Line} from "progressbar.js";
+import { ChatModule, AppConfig, InitProgressReport } from "@mlc-ai/web-llm";
+import { ProgressBar, Line } from "progressbar.js";
 
 // TODO: Surface this as an experimental option to the user
 const useWebGPU = true;
@@ -38,19 +38,17 @@ if (useWebGPU) {
         color: '#ffd166',
         trailColor: '#eee',
         trailWidth: 1,
-        svgStyle: {width: '100%', height: '100%'}
+        svgStyle: { width: '100%', height: '100%' }
     });
 
-    const appConfig : AppConfig = {
+    const appConfig: AppConfig = {
         model_list: [
             {
                 "model_url": "https://huggingface.co/mlc-ai/mlc-chat-Mistral-7B-Instruct-v0.1-q4f32_1/resolve/main/",
-                "local_id": "Mistral-7B-Instruct-v0.1-q4f32_1"
+                "local_id": "Mistral-7B-Instruct-v0.1-q4f32_1",
+                "model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4096_cs1024-webgpu.wasm",
             }
-        ],
-    model_lib_map: {
-        "Mistral-7B-Instruct-v0.1-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4096_cs1024-webgpu.wasm"
-    }                                       
+        ]
     }
 
     cm.setInitProgressCallback((report: InitProgressReport) => {
@@ -120,7 +118,7 @@ async function handleClick() {
         // Generate response
         var inp = message;
         if (context.length > 0) {
-            inp = "Use only the following context when answering the question at the end. Don't use any other knowledge.\n"+ context + "\n\nQuestion: " + message + "\n\nHelpful Answer: ";
+            inp = "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" + context + "\n\nQuestion: " + message + "\n\nHelpful Answer: ";
         }
         console.log("Input:", inp);
         const response = await cm.generate(inp, generateProgressCallback);
@@ -143,12 +141,12 @@ function updateAnswer(answer: string) {
     document.getElementById("answer")!.innerHTML = answerWithBreaks;
     // Add event listener to copy button
     document.getElementById("copyAnswer")!.addEventListener("click", () => {
-      // Get the answer text
-      const answerText = answer;
-      // Copy the answer text to the clipboard
-      navigator.clipboard.writeText(answerText)
-        .then(() => console.log("Answer text copied to clipboard"))
-        .catch((err) => console.error("Could not copy text: ", err));
+        // Get the answer text
+        const answerText = answer;
+        // Copy the answer text to the clipboard
+        navigator.clipboard.writeText(answerText)
+            .then(() => console.log("Answer text copied to clipboard"))
+            .catch((err) => console.error("Could not copy text: ", err));
     });
     const options: Intl.DateTimeFormatOptions = { month: 'short', day: '2-digit', hour: '2-digit', minute: '2-digit', second: '2-digit' };
     const time = new Date().toLocaleString('en-US', options);
@@ -159,10 +157,10 @@ function updateAnswer(answer: string) {
 }
 
 function fetchPageContents() {
-    chrome.tabs.query({currentWindow: true, active: true}, function(tabs){
-        var port = chrome.tabs.connect(tabs[0].id,{name: "channelName"});
+    chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
+        var port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
         port.postMessage({});
-        port.onMessage.addListener(function(msg) {
+        port.onMessage.addListener(function (msg) {
             console.log("Page contents:", msg.contents);
             if (useWebGPU) {
                 context = msg.contents
@@ -174,7 +172,7 @@ function fetchPageContents() {
 }
 
 // Grab the page contents when the popup is opened
-window.onload = function() {
+window.onload = function () {
     if (!useWebGPU) {
         fetchPageContents();
     }

diff --git a/examples/next-simple-chat/src/utils/chat_ui.ts b/examples/next-simple-chat/src/utils/chat_ui.ts
@@ -62,12 +62,10 @@ export default class ChatUI {
                 "model_list": [
                     {
                         "model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f32_1/resolve/main/",
-                        "local_id": "Llama-2-7b-chat-hf-q4f32_1"
+                        "local_id": "Llama-2-7b-chat-hf-q4f32_1",
+                        "model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
                     },
-                ],
-                "model_lib_map": {
-                    "Llama-2-7b-chat-hf-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
-                },
+                ]
             });
         } catch (err: unknown) {
             messageUpdate("error", "Init error, " + (err?.toString() ?? ""), true);

diff --git a/examples/simple-chat/src/gh-config.js b/examples/simple-chat/src/gh-config.js
@@ -2,131 +2,128 @@ export default {
 	"model_list": [
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f32_1/resolve/main/",
-			"local_id": "Llama-2-7b-chat-hf-q4f32_1"
+			"local_id": "Llama-2-7b-chat-hf-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-13b-chat-hf-q4f32_1/resolve/main/",
-			"local_id": "Llama-2-13b-chat-hf-q4f32_1"
+			"local_id": "Llama-2-13b-chat-hf-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f32_1-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f16_1/resolve/main/",
 			"local_id": "Llama-2-7b-chat-hf-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-13b-chat-hf-q4f16_1/resolve/main/",
 			"local_id": "Llama-2-13b-chat-hf-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-70b-chat-hf-q4f16_1/resolve/main/",
 			"local_id": "Llama-2-70b-chat-hf-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-70b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_1/resolve/main/",
 			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f32_1/resolve/main/",
-			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_1"
+			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_1-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardCoder-15B-V1.0-q4f16_1/resolve/main/",
 			"local_id": "WizardCoder-15B-V1.0-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/WizardCoder-15B-V1.0-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardCoder-15B-V1.0-q4f32_1/resolve/main/",
-			"local_id": "WizardCoder-15B-V1.0-q4f32_1"
+			"local_id": "WizardCoder-15B-V1.0-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/WizardCoder-15B-V1.0-q4f32_1-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardMath-7B-V1.0-q4f16_1/resolve/main/",
 			"local_id": "WizardMath-7B-V1.0-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardMath-7B-V1.0-q4f32_1/resolve/main/",
-			"local_id": "WizardMath-7B-V1.0-q4f32_1"
+			"local_id": "WizardMath-7B-V1.0-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardMath-13B-V1.0-q4f16_1/resolve/main/",
 			"local_id": "WizardMath-13B-V1.0-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-WizardMath-70B-V1.0-q4f16_1/resolve/main/",
 			"local_id": "WizardMath-70B-V1.0-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-70b-chat-hf-q4f16_1-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Mistral-7B-Instruct-v0.1-q4f16_1/resolve/main/",
 			"local_id": "Mistral-7B-Instruct-v0.1-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f16_1-sw4k_cs1k-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Mistral-7B-Instruct-v0.1-q4f32_1/resolve/main/",
 			"local_id": "Mistral-7B-Instruct-v0.1-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-OpenHermes-2.5-Mistral-7B-q4f16_1/resolve/main/",
 			"local_id": "OpenHermes-2.5-Mistral-7B-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f16_1-sw4k_cs1k-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-OpenHermes-2.5-Mistral-7B-q4f32_1/resolve/main/",
 			"local_id": "OpenHermes-2.5-Mistral-7B-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-NeuralHermes-2.5-Mistral-7B-q4f16_1/resolve/main/",
 			"local_id": "NeuralHermes-2.5-Mistral-7B-q4f16_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f16_1-sw4k_cs1k-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-NeuralHermes-2.5-Mistral-7B-q4f32_1/resolve/main/",
 			"local_id": "NeuralHermes-2.5-Mistral-7B-q4f32_1",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
 		},
 		// Models below fit for 128MB buffer limit (e.g. webgpu on Android)
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-Llama-2-7b-chat-hf-q4f16_1-1k/resolve/main/",
 			"local_id": "Llama-2-7b-chat-hf-q4f16_1-1k",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-1k-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k/resolve/main/",
 			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k-webgpu.wasm",
 			"required_features": ["shader-f16"],
 		},
 		{
 			"model_url": "https://huggingface.co/mlc-ai/mlc-chat-RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k/resolve/main/",
-			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k"
+			"local_id": "RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k",
+			"model_lib_url": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k-webgpu.wasm",
 		},
 	],
-	"model_lib_map": {
-		"Llama-2-7b-chat-hf-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
-		"Llama-2-13b-chat-hf-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f32_1-webgpu.wasm",
-		"Llama-2-7b-chat-hf-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-webgpu.wasm",
-		"Llama-2-13b-chat-hf-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f16_1-webgpu.wasm",
-		"Llama-2-70b-chat-hf-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-70b-chat-hf-q4f16_1-webgpu.wasm",
-		"RedPajama-INCITE-Chat-3B-v1-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_1-webgpu.wasm",
-		"RedPajama-INCITE-Chat-3B-v1-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_1-webgpu.wasm",
-		"WizardCoder-15B-V1.0-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/WizardCoder-15B-V1.0-q4f16_1-webgpu.wasm",
-		"WizardCoder-15B-V1.0-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/WizardCoder-15B-V1.0-q4f32_1-webgpu.wasm",
-		"WizardMath-7B-V1.0-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-webgpu.wasm",
-		"WizardMath-7B-V1.0-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f32_1-webgpu.wasm",
-		"WizardMath-13B-V1.0-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-13b-chat-hf-q4f16_1-webgpu.wasm",
-		"WizardMath-70B-V1.0-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-70b-chat-hf-q4f16_1-webgpu.wasm",
-		"Mistral-7B-Instruct-v0.1-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f16_1-sw4k_cs1k-webgpu.wasm",
-		"Mistral-7B-Instruct-v0.1-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
-		"OpenHermes-2.5-Mistral-7B-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
-		"OpenHermes-2.5-Mistral-7B-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
-		"NeuralHermes-2.5-Mistral-7B-q4f16_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
-		"NeuralHermes-2.5-Mistral-7B-q4f32_1": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Mistral-7B-Instruct-v0.1-q4f32_1-sw4k_cs1k-webgpu.wasm",
-		// Models below fit for 128MB buffer limit (e.g. webgpu on Android)
-		"Llama-2-7b-chat-hf-q4f16_1-1k": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/Llama-2-7b-chat-hf-q4f16_1-1k-webgpu.wasm",
-		"RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f16_1-1k-webgpu.wasm",
-		"RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k": "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/RedPajama-INCITE-Chat-3B-v1-q4f32_1-1k-webgpu.wasm",
-	},
 	"use_web_worker": true
 }