From 75f93d877bbed21c352ea9f634f53c7f697c2d89 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 13 May 2024 23:21:11 +0200 Subject: [PATCH] gguf: update README (#663) Follow up #655 and https://github.com/huggingface/huggingface.js/pull/656#issuecomment-2104265961 Added some examples on how to use local file + strictly typed --------- Co-authored-by: Julien Chaumond Co-authored-by: Mishig --- packages/gguf/README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/packages/gguf/README.md b/packages/gguf/README.md index 260d60298..763b49470 100644 --- a/packages/gguf/README.md +++ b/packages/gguf/README.md @@ -18,6 +18,8 @@ npm install @huggingface/gguf ## Usage +### Basic usage + ```ts import { GGMLQuantizationType, gguf } from "@huggingface/gguf"; @@ -56,6 +58,44 @@ console.log(tensorInfos); ``` +### Reading a local file + +```ts +// Reading a local file. (Not supported on browser) +const { metadata, tensorInfos } = await gguf( + './my_model.gguf', + { allowLocalFile: true }, +); +``` + +### Strictly typed + +By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml). + +```ts +const { metadata, tensorInfos } = await gguf(URL_MODEL); + +// Type check for model architecture at runtime +if (metadata["general.architecture"] === "llama") { + + // "llama.attention.head_count" is a valid key for llama architecture, this is typed as a number + console.log(model["llama.attention.head_count"]); + + // "mamba.ssm.conv_kernel" is an invalid key, because it requires model architecture to be mamba + console.log(model["mamba.ssm.conv_kernel"]); // error +} +``` + +### Disable strictly typed + +Because GGUF format can be used to store tensors, we can technically use it for other usages. For example, storing [control vectors](https://github.com/ggerganov/llama.cpp/pull/5970), [lora weights](https://github.com/ggerganov/llama.cpp/pull/2632), etc. + +In case you want to use your own GGUF metadata structure, you can disable strict typing by casting the parse output to `GGUFParseOutput<{ strict: false }>`: + +```ts +const { metadata, tensorInfos }: GGUFParseOutput<{ strict: false }> = await gguf(URL_LLAMA); +``` + ## Hugging Face Hub The Hub supports all file formats and has built-in features for GGUF format.