From 75f93d877bbed21c352ea9f634f53c7f697c2d89 Mon Sep 17 00:00:00 2001
From: Xuan Son Nguyen <thichthat@gmail.com>
Date: Mon, 13 May 2024 23:21:11 +0200
Subject: [PATCH] gguf: update README (#663)

Follow up #655 and
https://github.com/huggingface/huggingface.js/pull/656#issuecomment-2104265961

Added some examples on how to use local file + strictly typed

---------

Co-authored-by: Julien Chaumond <julien@huggingface.co>
Co-authored-by: Mishig <mishig.davaadorj@coloradocollege.edu>
---
 packages/gguf/README.md | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/packages/gguf/README.md b/packages/gguf/README.md
index 260d60298..763b49470 100644
--- a/packages/gguf/README.md
+++ b/packages/gguf/README.md
@@ -18,6 +18,8 @@ npm install @huggingface/gguf
 
 ## Usage
 
+### Basic usage
+
 ```ts
 import { GGMLQuantizationType, gguf } from "@huggingface/gguf";
 
@@ -56,6 +58,44 @@ console.log(tensorInfos);
 
 ```
 
+### Reading a local file
+
+```ts
+// Reading a local file. (Not supported on browser)
+const { metadata, tensorInfos } = await gguf(
+  './my_model.gguf',
+  { allowLocalFile: true },
+);
+```
+
+### Strictly typed
+
+By default, known fields in `metadata` are typed. This includes various fields found in [llama.cpp](https://github.com/ggerganov/llama.cpp), [whisper.cpp](https://github.com/ggerganov/whisper.cpp) and [ggml](https://github.com/ggerganov/ggml).
+
+```ts
+const { metadata, tensorInfos } = await gguf(URL_MODEL);
+
+// Type check for model architecture at runtime
+if (metadata["general.architecture"] === "llama") {
+
+  // "llama.attention.head_count" is a valid key for llama architecture, this is typed as a number
+  console.log(model["llama.attention.head_count"]);
+
+  // "mamba.ssm.conv_kernel" is an invalid key, because it requires model architecture to be mamba
+  console.log(model["mamba.ssm.conv_kernel"]); // error
+}
+```
+
+### Disable strictly typed
+
+Because GGUF format can be used to store tensors, we can technically use it for other usages. For example, storing [control vectors](https://github.com/ggerganov/llama.cpp/pull/5970), [lora weights](https://github.com/ggerganov/llama.cpp/pull/2632), etc.
+
+In case you want to use your own GGUF metadata structure, you can disable strict typing by casting the parse output to `GGUFParseOutput<{ strict: false }>`:
+
+```ts
+const { metadata, tensorInfos }: GGUFParseOutput<{ strict: false }> = await gguf(URL_LLAMA);
+```
+
 ## Hugging Face Hub
 
 The Hub supports all file formats and has built-in features for GGUF format.