From e0610178cd93fa2051edba529c989bb11030a3db Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Sat, 4 May 2024 22:52:44 +0200
Subject: [PATCH 01/10] refactor typing + usage

---
 packages/gguf/scripts/generate-llm.ts | 57 ++++++++++++----------
 packages/gguf/src/transformer-llm.ts  | 70 +++++++++++++++------------
 packages/gguf/src/types.ts            | 37 +++++++++-----
 3 files changed, 95 insertions(+), 69 deletions(-)
diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts
index 4da57c46e..6cbdb3798 100644
--- a/packages/gguf/scripts/generate-llm.ts
+++ b/packages/gguf/scripts/generate-llm.ts
@@ -8,28 +8,39 @@ import { writeFileSync } from "node:fs";
 const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp";
 const DEST_FILE_PATH = "./src/transformer-llm.ts";
 const DEST_COMMON_SOURCE = `
-type Attention<TArchitecture extends string> =
-	& { [K in \`\${TArchitecture}.attention.head_count\`]: number }
-	& { [K in \`\${TArchitecture}.attention.head_count_kv\`]: number }
-	& { [K in \`\${TArchitecture}.attention.layer_norm_epsilon\`]: number }
-	& { [K in \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`]: number }
-	& { [K in \`\${TArchitecture}.attention.alibi_bias_max\`]: number }
-	& { [K in \`\${TArchitecture}.attention.clip_kqv\`]: number }
-	& { [K in \`\${TArchitecture}.attention.use_norm\`]: number };
-
-type Rope<TArchitecture extends LLMArchitecture> =
-	& { [K in \`\${TArchitecture}.rope.dimension_count\`]: number }
-	& { [K in \`\${TArchitecture}.rope.freq_base\`]: number }
-	& { [K in \`\${TArchitecture}.rope.scale\`]: number }
-	& { [K in \`\${TArchitecture}.rope.scale_linear\`]: number };
-
-type MOE<TArchitecture extends LLMArchitecture> =
-	& { [K in \`\${TArchitecture}.expert_count\`]: number }
-	& { [K in \`\${TArchitecture}.expert_used_count\`]: number };
+/** This file is auto-generated by generate-llm.ts */
+
+import type { ModelBase, GGUFGeneralInfo } from "./types";
+
+type Attention<TArchitecture extends string> = Record<
+	| \`\${TArchitecture}.attention.head_count\`
+	| \`\${TArchitecture}.attention.head_count_kv\`
+	| \`\${TArchitecture}.attention.layer_norm_epsilon\`
+	| \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`
+	| \`\${TArchitecture}.attention.alibi_bias_max\`
+	| \`\${TArchitecture}.attention.clip_kqv\`
+	| \`\${TArchitecture}.attention.use_norm\`,
+	number
+>;
+
+type Rope<TArchitecture extends LLMArchitecture> = Record<
+	| \`\${TArchitecture}.rope.dimension_count\`
+	| \`\${TArchitecture}.rope.freq_base\`
+	| \`\${TArchitecture}.rope.scale\`
+	| \`\${TArchitecture}.rope.scale_linear\`,
+	number
+>;
+
+type MOE<TArchitecture extends LLMArchitecture> = Record<
+	| \`\${TArchitecture}.expert_count\`
+	| \`\${TArchitecture}.expert_used_count\`,
+	number
+>;
 
 export type TransformerLLMArchitecture = LLMArchitecture; // type alias
-export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture>
-	& MOE<TArchitecture>
+export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture>
+	& ModelBase<TArchitecture>
+	& Partial<MOE<TArchitecture>>
 	& Attention<TArchitecture>
 	& Rope<TArchitecture>;
 
@@ -163,15 +174,11 @@ async function main() {
 	/////////////////////////////////////
 	// write result to file
 	const content = [
-		"/** This file is auto-generated by generate-llm.ts */",
-		"",
-		'import type { ModelBase } from "./types";',
-		"",
+		DEST_COMMON_SOURCE,
 		"export const LLM_ARCHITECTURES = [",
 		...archList.map((a) => `\t${JSON.stringify(a.name)},`),
 		"] as const;",
 		"type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];",
-		DEST_COMMON_SOURCE,
 		...archList.map((a) => {
 			let code = `export type ${a.tsName} = TransformerLLMBase<${JSON.stringify(a.name)}>`;
 			if (a.hparams.length) {
diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts
index a9ef34296..aea40459e 100644
--- a/packages/gguf/src/transformer-llm.ts
+++ b/packages/gguf/src/transformer-llm.ts
@@ -1,6 +1,44 @@
 /** This file is auto-generated by generate-llm.ts */
 
-import type { ModelBase } from "./types";
+import type { ModelBase, GGUFGeneralInfo } from "./types";
+
+type Attention<TArchitecture extends string> = Record<
+	| `${TArchitecture}.attention.head_count`
+	| `${TArchitecture}.attention.head_count_kv`
+	| `${TArchitecture}.attention.layer_norm_epsilon`
+	| `${TArchitecture}.attention.layer_norm_rms_epsilon`
+	| `${TArchitecture}.attention.alibi_bias_max`
+	| `${TArchitecture}.attention.clip_kqv`
+	| `${TArchitecture}.attention.use_norm`,
+	number
+>;
+
+type Rope<TArchitecture extends LLMArchitecture> = Record<
+	| `${TArchitecture}.rope.dimension_count`
+	| `${TArchitecture}.rope.freq_base`
+	| `${TArchitecture}.rope.scale`
+	| `${TArchitecture}.rope.scale_linear`,
+	number
+>;
+
+type MOE<TArchitecture extends LLMArchitecture> = Record<
+	`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`,
+	number
+>;
+
+export type TransformerLLMArchitecture = LLMArchitecture; // type alias
+export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> &
+	ModelBase<TArchitecture> &
+	Partial<MOE<TArchitecture>> &
+	Attention<TArchitecture> &
+	Rope<TArchitecture>;
+
+export enum TransformerLLMPoolingType {
+	UNSPECIFIED = -1,
+	NONE = 0,
+	MEAN = 1,
+	CLS = 2,
+}
 
 export const LLM_ARCHITECTURES = [
 	"llama",
@@ -37,36 +75,6 @@ export const LLM_ARCHITECTURES = [
 	"olmo",
 ] as const;
 type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];
-
-type Attention<TArchitecture extends string> = { [K in `${TArchitecture}.attention.head_count`]: number } & {
-	[K in `${TArchitecture}.attention.head_count_kv`]: number;
-} & { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } & {
-	[K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number;
-} & { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } & {
-	[K in `${TArchitecture}.attention.clip_kqv`]: number;
-} & { [K in `${TArchitecture}.attention.use_norm`]: number };
-
-type Rope<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.rope.dimension_count`]: number } & {
-	[K in `${TArchitecture}.rope.freq_base`]: number;
-} & { [K in `${TArchitecture}.rope.scale`]: number } & { [K in `${TArchitecture}.rope.scale_linear`]: number };
-
-type MOE<TArchitecture extends LLMArchitecture> = { [K in `${TArchitecture}.expert_count`]: number } & {
-	[K in `${TArchitecture}.expert_used_count`]: number;
-};
-
-export type TransformerLLMArchitecture = LLMArchitecture; // type alias
-export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = ModelBase<TArchitecture> &
-	MOE<TArchitecture> &
-	Attention<TArchitecture> &
-	Rope<TArchitecture>;
-
-export enum TransformerLLMPoolingType {
-	UNSPECIFIED = -1,
-	NONE = 0,
-	MEAN = 1,
-	CLS = 2,
-}
-
 export type ArchLlama = TransformerLLMBase<"llama"> & {
 	"llama.attention.layer_norm_rms_epsilon": number;
 };
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 69405d66a..0bba630a6 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -50,11 +50,11 @@ export enum GGUFValueType {
 const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const;
 export type Architecture = (typeof ARCHITECTURES)[number];
 
-interface General {
-	"general.architecture": Architecture;
+export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
+	"general.architecture": TArchitecture;
 	"general.name": string;
-	"general.file_type": number;
-	"general.quantization_version": number;
+	"general.file_type"?: number;
+	"general.quantization_version"?: number;
 }
 
 export type ModelBase<
@@ -62,9 +62,16 @@ export type ModelBase<
 		| Architecture
 		| `encoder.${Extract<Architecture, "whisper">}`
 		| `decoder.${Extract<Architecture, "whisper">}`,
-> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & {
-	[K in `${TArchitecture}.context_length`]: number;
-} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number };
+> = Record<
+	| `${TArchitecture}.layer_count`
+	| `${TArchitecture}.feed_forward_length`
+	| `${TArchitecture}.context_length`
+	| `${TArchitecture}.embedding_length`
+	| `${TArchitecture}.block_count`,
+	number
+>;
+
+/// Tokenizer
 
 type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert";
 interface Tokenizer {
@@ -77,18 +84,22 @@ interface Tokenizer {
 	"tokenizer.ggml.add_bos_token": boolean;
 	"tokenizer.chat_template": string;
 }
+type NoTokenizer = Record<keyof Tokenizer, undefined>;
+
+/// Models outside of llama.cpp: "rwkv" and "whisper"
+
+export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
+
+export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
 
-export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
-export type LLM = TransformerLLM | RWKV;
-export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
-export type Model = (LLM | Whisper) & Partial<Tokenizer>;
+/// Types for parse output
 
 export type GGUFMetadata = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
-} & Partial<General> &
-	Partial<Model> &
+} & (Whisper | RWKV | TransformerLLM) &
+	(NoTokenizer | Tokenizer) &
 	Record<string, MetadataValue>;
 
 export interface GGUFTensorInfo {

From 9a9e77100a99baf3cbc6ac070ea9ed4ca05d06ef Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Sun, 5 May 2024 12:10:24 +0200
Subject: [PATCH 02/10] correct optional fields

---
 packages/gguf/scripts/generate-llm.ts | 53 +++++++++++++++++----------
 packages/gguf/src/transformer-llm.ts  | 50 +++++++++++++++----------
 packages/gguf/src/types.ts            | 25 +++++++++----
 3 files changed, 82 insertions(+), 46 deletions(-)

diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts
index 6cbdb3798..2ad1d46b9 100644
--- a/packages/gguf/scripts/generate-llm.ts
+++ b/packages/gguf/scripts/generate-llm.ts
@@ -12,35 +12,50 @@ const DEST_COMMON_SOURCE = `
 
 import type { ModelBase, GGUFGeneralInfo } from "./types";
 
-type Attention<TArchitecture extends string> = Record<
-	| \`\${TArchitecture}.attention.head_count\`
-	| \`\${TArchitecture}.attention.head_count_kv\`
-	| \`\${TArchitecture}.attention.layer_norm_epsilon\`
-	| \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`
-	| \`\${TArchitecture}.attention.alibi_bias_max\`
-	| \`\${TArchitecture}.attention.clip_kqv\`
-	| \`\${TArchitecture}.attention.use_norm\`,
+type LLMBase<TArchitecture extends string> = Partial<Record<
+	  \`\${TArchitecture}.vocab_size\`
+	| \`\${TArchitecture}.use_parallel_residual\`
+	| \`\${TArchitecture}.tensor_data_layout\`,
 	number
->;
+>>;
 
-type Rope<TArchitecture extends LLMArchitecture> = Record<
-	| \`\${TArchitecture}.rope.dimension_count\`
-	| \`\${TArchitecture}.rope.freq_base\`
-	| \`\${TArchitecture}.rope.scale\`
-	| \`\${TArchitecture}.rope.scale_linear\`,
+type Attention<TArchitecture extends string> = Record<
+	  \`\${TArchitecture}.attention.head_count\`,
 	number
+> & Partial<Record<
+	  \`\${TArchitecture}.attention.head_count_kv\`
+	| \`\${TArchitecture}.attention.key_length\`
+	| \`\${TArchitecture}.attention.value_length\`,
+	number
+>>;
+
+type RopeScalingType = "none" | "linear" | "yarn";
+type Rope<TArchitecture extends LLMArchitecture> = Partial<
+	Record<
+			\`\${TArchitecture}.rope.dimension_count\`
+		| \`\${TArchitecture}.rope.freq_base\`
+		| \`\${TArchitecture}.rope.scale_linear\`
+		| \`\${TArchitecture}.rope.scaling.factor\`
+		| \`\${TArchitecture}.rope.scaling.original_context_length\`,
+		number
+	>
+	& Record<\`\${TArchitecture}.rope.scaling.type\`, RopeScalingType>
+	& Record<\`\${TArchitecture}.rope.finetuned\`, boolean>
 >;
 
-type MOE<TArchitecture extends LLMArchitecture> = Record<
-	| \`\${TArchitecture}.expert_count\`
-	| \`\${TArchitecture}.expert_used_count\`,
-	number
+type MOE<TArchitecture extends LLMArchitecture> = Partial<
+	Record<
+			\`\${TArchitecture}.expert_count\`
+		| \`\${TArchitecture}.expert_used_count\`,
+		number
+	>
 >;
 
 export type TransformerLLMArchitecture = LLMArchitecture; // type alias
 export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture>
+	& LLMBase<TArchitecture>
 	& ModelBase<TArchitecture>
-	& Partial<MOE<TArchitecture>>
+	& MOE<TArchitecture>
 	& Attention<TArchitecture>
 	& Rope<TArchitecture>;
 
diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts
index aea40459e..1449125b8 100644
--- a/packages/gguf/src/transformer-llm.ts
+++ b/packages/gguf/src/transformer-llm.ts
@@ -2,34 +2,46 @@
 
 import type { ModelBase, GGUFGeneralInfo } from "./types";
 
-type Attention<TArchitecture extends string> = Record<
-	| `${TArchitecture}.attention.head_count`
-	| `${TArchitecture}.attention.head_count_kv`
-	| `${TArchitecture}.attention.layer_norm_epsilon`
-	| `${TArchitecture}.attention.layer_norm_rms_epsilon`
-	| `${TArchitecture}.attention.alibi_bias_max`
-	| `${TArchitecture}.attention.clip_kqv`
-	| `${TArchitecture}.attention.use_norm`,
-	number
+type LLMBase<TArchitecture extends string> = Partial<
+	Record<
+		`${TArchitecture}.vocab_size` | `${TArchitecture}.use_parallel_residual` | `${TArchitecture}.tensor_data_layout`,
+		number
+	>
 >;
 
-type Rope<TArchitecture extends LLMArchitecture> = Record<
-	| `${TArchitecture}.rope.dimension_count`
-	| `${TArchitecture}.rope.freq_base`
-	| `${TArchitecture}.rope.scale`
-	| `${TArchitecture}.rope.scale_linear`,
-	number
+type Attention<TArchitecture extends string> = Record<`${TArchitecture}.attention.head_count`, number> &
+	Partial<
+		Record<
+			| `${TArchitecture}.attention.head_count_kv`
+			| `${TArchitecture}.attention.key_length`
+			| `${TArchitecture}.attention.value_length`,
+			number
+		>
+	>;
+
+type RopeScalingType = "none" | "linear" | "yarn";
+type Rope<TArchitecture extends LLMArchitecture> = Partial<
+	Record<
+		| `${TArchitecture}.rope.dimension_count`
+		| `${TArchitecture}.rope.freq_base`
+		| `${TArchitecture}.rope.scale_linear`
+		| `${TArchitecture}.rope.scaling.factor`
+		| `${TArchitecture}.rope.scaling.original_context_length`,
+		number
+	> &
+		Record<`${TArchitecture}.rope.scaling.type`, RopeScalingType> &
+		Record<`${TArchitecture}.rope.finetuned`, boolean>
 >;
 
-type MOE<TArchitecture extends LLMArchitecture> = Record<
-	`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`,
-	number
+type MOE<TArchitecture extends LLMArchitecture> = Partial<
+	Record<`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, number>
 >;
 
 export type TransformerLLMArchitecture = LLMArchitecture; // type alias
 export type TransformerLLMBase<TArchitecture extends LLMArchitecture> = GGUFGeneralInfo<TArchitecture> &
+	LLMBase<TArchitecture> &
 	ModelBase<TArchitecture> &
-	Partial<MOE<TArchitecture>> &
+	MOE<TArchitecture> &
 	Attention<TArchitecture> &
 	Rope<TArchitecture>;
 
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 0bba630a6..7a13b44d3 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -52,7 +52,7 @@ export type Architecture = (typeof ARCHITECTURES)[number];
 
 export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
 	"general.architecture": TArchitecture;
-	"general.name": string;
+	"general.name"?: string;
 	"general.file_type"?: number;
 	"general.quantization_version"?: number;
 }
@@ -63,11 +63,10 @@ export type ModelBase<
 		| `encoder.${Extract<Architecture, "whisper">}`
 		| `decoder.${Extract<Architecture, "whisper">}`,
 > = Record<
-	| `${TArchitecture}.layer_count`
-	| `${TArchitecture}.feed_forward_length`
 	| `${TArchitecture}.context_length`
+	| `${TArchitecture}.block_count`
 	| `${TArchitecture}.embedding_length`
-	| `${TArchitecture}.block_count`,
+	| `${TArchitecture}.feed_forward_length`,
 	number
 >;
 
@@ -82,15 +81,25 @@ interface Tokenizer {
 	"tokenizer.ggml.bos_token_id": number;
 	"tokenizer.ggml.eos_token_id": number;
 	"tokenizer.ggml.add_bos_token": boolean;
-	"tokenizer.chat_template": string;
+	"tokenizer.chat_template"?: string;
 }
 type NoTokenizer = Record<keyof Tokenizer, undefined>;
 
 /// Models outside of llama.cpp: "rwkv" and "whisper"
 
-export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number };
-
-export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">;
+export type RWKV = GGUFGeneralInfo<"rwkv"> &
+	ModelBase<"rwkv"> & {
+		"rwkv.architecture_version": number;
+	};
+
+// TODO: whisper.cpp doesn't yet support gguf. This maybe changed in the future.
+export type Whisper = GGUFGeneralInfo<"whisper"> &
+	ModelBase<"encoder.whisper"> &
+	ModelBase<"decoder.whisper"> & {
+		"whisper.encoder.mels_count": number;
+		"whisper.encoder.attention.head_count": number;
+		"whisper.decoder.attention.head_count": number;
+	};
 
 /// Types for parse output
 

From 6d704bc6927f4ed89485c8ef1433cc766c7e7f38 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Sun, 5 May 2024 13:10:19 +0200
Subject: [PATCH 03/10] add non-strict & strict typing mode

---
 packages/gguf/src/gguf.spec.ts  | 33 ++++++++++++++------------
 packages/gguf/src/gguf.ts       |  3 +++
 packages/gguf/src/types.spec.ts | 42 +++++++++++++++++++++++++++++++++
 packages/gguf/src/types.ts      | 24 +++++++++++++------
 4 files changed, 80 insertions(+), 22 deletions(-)
 create mode 100644 packages/gguf/src/types.spec.ts

diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index 2e6f2c21a..84604348d 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -37,22 +37,25 @@ describe("gguf", () => {
 			"llama.rope.dimension_count": 128,
 		});
 
-		const tokens = metadata["tokenizer.ggml.tokens"];
-		if (!Array.isArray(tokens)) {
-			throw new Error();
+		expect(!!metadata["tokenizer.ggml.model"]);
+		if (!!metadata["tokenizer.ggml.model"]) {
+			const tokens = metadata["tokenizer.ggml.tokens"];
+			if (!Array.isArray(tokens)) {
+				throw new Error();
+			}
+			expect(tokens.slice(0, 10)).toEqual([
+				"<unk>",
+				"<s>",
+				"</s>",
+				"<0x00>",
+				"<0x01>",
+				"<0x02>",
+				"<0x03>",
+				"<0x04>",
+				"<0x05>",
+				"<0x06>",
+			]);
 		}
-		expect(tokens.slice(0, 10)).toEqual([
-			"<unk>",
-			"<s>",
-			"</s>",
-			"<0x00>",
-			"<0x01>",
-			"<0x02>",
-			"<0x03>",
-			"<0x04>",
-			"<0x05>",
-			"<0x06>",
-		]);
 
 		/// Tensor infos
 		/// By convention we test the first and last tensor.
diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index 25f41252f..d39fd367b 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -308,6 +308,9 @@ export async function gguf(
 			}
 		}
 		offset += valueResult.length;
+		/// TODO(fix typing)
+		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
+		// @ts-ignore
 		metadata[keyResult.value] = valueResult.value;
 	}
 
diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
new file mode 100644
index 000000000..a4afb699e
--- /dev/null
+++ b/packages/gguf/src/types.spec.ts
@@ -0,0 +1,42 @@
+import { describe, it } from "vitest";
+import { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types";
+
+describe("gguf-types", () => {
+  it("GGUFNonStrictType should be correct (at compile time)", async () => {
+		const model: GGUFMetadata<GGUFNonStrictType> = null as any;
+    model.kv_count = 123n;
+    model.abc = 456; // PASS, because it can be anything
+	});
+
+	it("GGUFStrictType should be correct (at compile time)", async () => {
+		const model: GGUFMetadata<GGUFStrictType> = null as any;
+
+		if (model["general.architecture"] === "whisper") {
+			model["encoder.whisper.block_count"] = 0;
+			// @ts-expect-error
+			model["encoder.whisper.block_count"] = "abc"; // error, because it must be a number
+		}
+
+		if (model["tokenizer.ggml.model"] === undefined) {
+			// @ts-expect-error
+			model["tokenizer.ggml.eos_token_id"] = 1; // error, because it's undefined
+		}
+		if (model["tokenizer.ggml.model"] === "gpt2") {
+			// @ts-expect-error
+			model["tokenizer.ggml.eos_token_id"] = undefined; // error, because it must be a number
+			model["tokenizer.ggml.eos_token_id"] = 1;
+		}
+
+		if (model["general.architecture"] === "mamba") {
+			model["mamba.ssm.conv_kernel"] = 0;
+			// @ts-expect-error
+			model["mamba.ssm.conv_kernel"] = "abc"; // error, because it must be a number
+		}
+		if (model["general.architecture"] === "llama") {
+      // @ts-expect-error
+			model["mamba.ssm.conv_kernel"] = 0;
+      // @ts-expect-error
+			model["mamba.ssm.conv_kernel"] = "abc"; // PASS, because it can be anything
+		}
+	});
+});
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 7a13b44d3..1a13ea70e 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -57,6 +57,11 @@ export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
 	"general.quantization_version"?: number;
 }
 
+type ModelMetadata = Whisper | RWKV | TransformerLLM;
+type NoModelMetadata = {
+	"general.architecture"?: undefined,
+};
+
 export type ModelBase<
 	TArchitecture extends
 		| Architecture
@@ -83,7 +88,9 @@ interface Tokenizer {
 	"tokenizer.ggml.add_bos_token": boolean;
 	"tokenizer.chat_template"?: string;
 }
-type NoTokenizer = Record<keyof Tokenizer, undefined>;
+type NoTokenizer = {
+	"tokenizer.ggml.model"?: undefined,
+};
 
 /// Models outside of llama.cpp: "rwkv" and "whisper"
 
@@ -103,13 +110,16 @@ export type Whisper = GGUFGeneralInfo<"whisper"> &
 
 /// Types for parse output
 
-export type GGUFMetadata = {
+export type GGUFStrictType = true;
+export type GGUFNonStrictType = false;
+
+export type GGUFMetadata<T extends GGUFStrictType | GGUFNonStrictType = GGUFStrictType> = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
-} & (Whisper | RWKV | TransformerLLM) &
-	(NoTokenizer | Tokenizer) &
-	Record<string, MetadataValue>;
+} & (T extends GGUFStrictType ? GGUFModelKV : Record<string, MetadataValue>);
+
+export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
 
 export interface GGUFTensorInfo {
 	name: string;
@@ -119,7 +129,7 @@ export interface GGUFTensorInfo {
 	offset: bigint;
 }
 
-export interface GGUFParseOutput {
-	metadata: GGUFMetadata;
+export interface GGUFParseOutput<T extends GGUFStrictType | GGUFNonStrictType = GGUFStrictType> {
+	metadata: GGUFMetadata<T>;
 	tensorInfos: GGUFTensorInfo[];
 }

From 87677264d9d23a1d388695dbe06484eafeaa0ac7 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Sun, 5 May 2024 13:14:16 +0200
Subject: [PATCH 04/10] lint & format

---
 packages/gguf/src/gguf.spec.ts  |  4 ++--
 packages/gguf/src/types.spec.ts | 30 +++++++++++++++---------------
 packages/gguf/src/types.ts      | 12 ++++++------
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts
index 84604348d..04ceda7ab 100644
--- a/packages/gguf/src/gguf.spec.ts
+++ b/packages/gguf/src/gguf.spec.ts
@@ -37,8 +37,8 @@ describe("gguf", () => {
 			"llama.rope.dimension_count": 128,
 		});
 
-		expect(!!metadata["tokenizer.ggml.model"]);
-		if (!!metadata["tokenizer.ggml.model"]) {
+		expect(metadata["tokenizer.ggml.model"]);
+		if (metadata["tokenizer.ggml.model"]) {
 			const tokens = metadata["tokenizer.ggml.tokens"];
 			if (!Array.isArray(tokens)) {
 				throw new Error();
diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index a4afb699e..617db7e37 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -1,42 +1,42 @@
 import { describe, it } from "vitest";
-import { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types";
+import type { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types";
 
 describe("gguf-types", () => {
-  it("GGUFNonStrictType should be correct (at compile time)", async () => {
+	it("GGUFNonStrictType should be correct (at compile time)", async () => {
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
 		const model: GGUFMetadata<GGUFNonStrictType> = null as any;
-    model.kv_count = 123n;
-    model.abc = 456; // PASS, because it can be anything
+		model.kv_count = 123n;
+		model.abc = 456; // PASS, because it can be anything
 	});
 
 	it("GGUFStrictType should be correct (at compile time)", async () => {
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
 		const model: GGUFMetadata<GGUFStrictType> = null as any;
 
 		if (model["general.architecture"] === "whisper") {
 			model["encoder.whisper.block_count"] = 0;
-			// @ts-expect-error
-			model["encoder.whisper.block_count"] = "abc"; // error, because it must be a number
+			// @ts-expect-error because it must be a number
+			model["encoder.whisper.block_count"] = "abc";
 		}
 
 		if (model["tokenizer.ggml.model"] === undefined) {
-			// @ts-expect-error
-			model["tokenizer.ggml.eos_token_id"] = 1; // error, because it's undefined
+			// @ts-expect-error because it's undefined
+			model["tokenizer.ggml.eos_token_id"] = 1;
 		}
 		if (model["tokenizer.ggml.model"] === "gpt2") {
-			// @ts-expect-error
-			model["tokenizer.ggml.eos_token_id"] = undefined; // error, because it must be a number
+			// @ts-expect-error because it must be a number
+			model["tokenizer.ggml.eos_token_id"] = undefined;
 			model["tokenizer.ggml.eos_token_id"] = 1;
 		}
 
 		if (model["general.architecture"] === "mamba") {
 			model["mamba.ssm.conv_kernel"] = 0;
-			// @ts-expect-error
-			model["mamba.ssm.conv_kernel"] = "abc"; // error, because it must be a number
+			// @ts-expect-error because it must be a number
+			model["mamba.ssm.conv_kernel"] = "abc";
 		}
 		if (model["general.architecture"] === "llama") {
-      // @ts-expect-error
+			// @ts-expect-error llama does not have ssm.* keys
 			model["mamba.ssm.conv_kernel"] = 0;
-      // @ts-expect-error
-			model["mamba.ssm.conv_kernel"] = "abc"; // PASS, because it can be anything
 		}
 	});
 });
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 1a13ea70e..6089cb1db 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -58,9 +58,9 @@ export interface GGUFGeneralInfo<TArchitecture extends Architecture> {
 }
 
 type ModelMetadata = Whisper | RWKV | TransformerLLM;
-type NoModelMetadata = {
-	"general.architecture"?: undefined,
-};
+interface NoModelMetadata {
+	"general.architecture"?: undefined;
+}
 
 export type ModelBase<
 	TArchitecture extends
@@ -88,9 +88,9 @@ interface Tokenizer {
 	"tokenizer.ggml.add_bos_token": boolean;
 	"tokenizer.chat_template"?: string;
 }
-type NoTokenizer = {
-	"tokenizer.ggml.model"?: undefined,
-};
+interface NoTokenizer {
+	"tokenizer.ggml.model"?: undefined;
+}
 
 /// Models outside of llama.cpp: "rwkv" and "whisper"
 

From c2afbdc080980c81ad37a933dfcea840dde9d3bc Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Mon, 6 May 2024 11:54:39 +0200
Subject: [PATCH 05/10] cast between strict & nonStrict

---
 packages/gguf/scripts/generate-llm.ts |  4 ++--
 packages/gguf/src/transformer-llm.ts  |  4 ++--
 packages/gguf/src/types.spec.ts       | 23 ++++++++++++++++++-----
 packages/gguf/src/types.ts            | 15 +++++++++------
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts
index 2ad1d46b9..6ac73149a 100644
--- a/packages/gguf/scripts/generate-llm.ts
+++ b/packages/gguf/scripts/generate-llm.ts
@@ -29,7 +29,7 @@ type Attention<TArchitecture extends string> = Record<
 	number
 >>;
 
-type RopeScalingType = "none" | "linear" | "yarn";
+export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn";
 type Rope<TArchitecture extends LLMArchitecture> = Partial<
 	Record<
 			\`\${TArchitecture}.rope.dimension_count\`
@@ -39,7 +39,7 @@ type Rope<TArchitecture extends LLMArchitecture> = Partial<
 		| \`\${TArchitecture}.rope.scaling.original_context_length\`,
 		number
 	>
-	& Record<\`\${TArchitecture}.rope.scaling.type\`, RopeScalingType>
+	& Record<\`\${TArchitecture}.rope.scaling.type\`, TransformerLLMRopeScalingType>
 	& Record<\`\${TArchitecture}.rope.finetuned\`, boolean>
 >;
 
diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts
index 1449125b8..8bad0261e 100644
--- a/packages/gguf/src/transformer-llm.ts
+++ b/packages/gguf/src/transformer-llm.ts
@@ -19,7 +19,7 @@ type Attention<TArchitecture extends string> = Record<`${TArchitecture}.attentio
 		>
 	>;
 
-type RopeScalingType = "none" | "linear" | "yarn";
+export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn";
 type Rope<TArchitecture extends LLMArchitecture> = Partial<
 	Record<
 		| `${TArchitecture}.rope.dimension_count`
@@ -29,7 +29,7 @@ type Rope<TArchitecture extends LLMArchitecture> = Partial<
 		| `${TArchitecture}.rope.scaling.original_context_length`,
 		number
 	> &
-		Record<`${TArchitecture}.rope.scaling.type`, RopeScalingType> &
+		Record<`${TArchitecture}.rope.scaling.type`, TransformerLLMRopeScalingType> &
 		Record<`${TArchitecture}.rope.finetuned`, boolean>
 >;
 
diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index 617db7e37..7c5d3787b 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -1,17 +1,30 @@
 import { describe, it } from "vitest";
-import type { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types";
+import type { gguf } from "./gguf";
+import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types";
 
 describe("gguf-types", () => {
-	it("GGUFNonStrictType should be correct (at compile time)", async () => {
+	it("gguf() type can be casted (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFNonStrictType> = null as any;
+		const result: Awaited<ReturnType<typeof gguf>> = null as any;
+		const strictType = result as GGUFParseOutput<GGUFType.strict>;
+		// @ts-expect-error because the key "abc" does not exist
+		strictType.metadata.abc = 123;
+		const nonStrictType = result as GGUFParseOutput<GGUFType.nonStrict>;
+		nonStrictType.metadata.abc = 123; // PASS, because it can be anything
+		// @ts-expect-error because ArrayBuffer is not a MetadataValue
+		nonStrictType.metadata.fff = ArrayBuffer;
+	});
+
+	it("GGUFType.nonStrict should be correct (at compile time)", async () => {
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
+		const model: GGUFMetadata<GGUFType.nonStrict> = null as any;
 		model.kv_count = 123n;
 		model.abc = 456; // PASS, because it can be anything
 	});
 
-	it("GGUFStrictType should be correct (at compile time)", async () => {
+	it("GGUFType.strict should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFStrictType> = null as any;
+		const model: GGUFMetadata<GGUFType.strict> = null as any;
 
 		if (model["general.architecture"] === "whisper") {
 			model["encoder.whisper.block_count"] = 0;
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 6089cb1db..b96b899ef 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -110,14 +110,17 @@ export type Whisper = GGUFGeneralInfo<"whisper"> &
 
 /// Types for parse output
 
-export type GGUFStrictType = true;
-export type GGUFNonStrictType = false;
+export enum GGUFType {
+	strict,
+	nonStrict,
+}
 
-export type GGUFMetadata<T extends GGUFStrictType | GGUFNonStrictType = GGUFStrictType> = {
+export type GGUFMetadata<TGGUFType extends GGUFType = GGUFType.strict> = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
-} & (T extends GGUFStrictType ? GGUFModelKV : Record<string, MetadataValue>);
+} & GGUFModelKV &
+	(TGGUFType extends GGUFType.strict ? unknown : Record<string, MetadataValue>);
 
 export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
 
@@ -129,7 +132,7 @@ export interface GGUFTensorInfo {
 	offset: bigint;
 }
 
-export interface GGUFParseOutput<T extends GGUFStrictType | GGUFNonStrictType = GGUFStrictType> {
-	metadata: GGUFMetadata<T>;
+export interface GGUFParseOutput<TGGUFType extends GGUFType = GGUFType.strict> {
+	metadata: GGUFMetadata<TGGUFType>;
 	tensorInfos: GGUFTensorInfo[];
 }

From 5f547dd882ad07dad612225f6b8193845fb318a9 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Mon, 6 May 2024 12:16:00 +0200
Subject: [PATCH 06/10] style nits

---
 packages/gguf/src/types.spec.ts | 14 +++++++-------
 packages/gguf/src/types.ts      | 10 +++++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index 7c5d3787b..693b47159 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -3,28 +3,28 @@ import type { gguf } from "./gguf";
 import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types";
 
 describe("gguf-types", () => {
-	it("gguf() type can be casted (at compile time)", async () => {
+	it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
 		const result: Awaited<ReturnType<typeof gguf>> = null as any;
-		const strictType = result as GGUFParseOutput<GGUFType.strict>;
+		const strictType = result as GGUFParseOutput<GGUFType.STRICT>;
 		// @ts-expect-error because the key "abc" does not exist
 		strictType.metadata.abc = 123;
-		const nonStrictType = result as GGUFParseOutput<GGUFType.nonStrict>;
+		const nonStrictType = result as GGUFParseOutput<GGUFType.NON_STRICT>;
 		nonStrictType.metadata.abc = 123; // PASS, because it can be anything
 		// @ts-expect-error because ArrayBuffer is not a MetadataValue
 		nonStrictType.metadata.fff = ArrayBuffer;
 	});
 
-	it("GGUFType.nonStrict should be correct (at compile time)", async () => {
+	it("GGUFType.NON_STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFType.nonStrict> = null as any;
+		const model: GGUFMetadata<GGUFType.NON_STRICT> = null as any;
 		model.kv_count = 123n;
 		model.abc = 456; // PASS, because it can be anything
 	});
 
-	it("GGUFType.strict should be correct (at compile time)", async () => {
+	it("GGUFType.STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFType.strict> = null as any;
+		const model: GGUFMetadata<GGUFType.STRICT> = null as any;
 
 		if (model["general.architecture"] === "whisper") {
 			model["encoder.whisper.block_count"] = 0;
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index b96b899ef..6b8e76595 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -111,16 +111,16 @@ export type Whisper = GGUFGeneralInfo<"whisper"> &
 /// Types for parse output
 
 export enum GGUFType {
-	strict,
-	nonStrict,
+	STRICT,
+	NON_STRICT,
 }
 
-export type GGUFMetadata<TGGUFType extends GGUFType = GGUFType.strict> = {
+export type GGUFMetadata<TGGUFType extends GGUFType = GGUFType.STRICT> = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
 } & GGUFModelKV &
-	(TGGUFType extends GGUFType.strict ? unknown : Record<string, MetadataValue>);
+	(TGGUFType extends GGUFType.STRICT ? unknown : Record<string, MetadataValue>);
 
 export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
 
@@ -132,7 +132,7 @@ export interface GGUFTensorInfo {
 	offset: bigint;
 }
 
-export interface GGUFParseOutput<TGGUFType extends GGUFType = GGUFType.strict> {
+export interface GGUFParseOutput<TGGUFType extends GGUFType = GGUFType.STRICT> {
 	metadata: GGUFMetadata<TGGUFType>;
 	tensorInfos: GGUFTensorInfo[];
 }

From 31bac8bbba365e13c517c48452fdee1d5843abd3 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Tue, 7 May 2024 15:15:10 +0200
Subject: [PATCH 07/10] refactor options

---
 packages/gguf/src/gguf.ts       |  5 +----
 packages/gguf/src/types.spec.ts | 10 +++++-----
 packages/gguf/src/types.ts      | 18 +++++++++++-------
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts
index d39fd367b..775623eae 100644
--- a/packages/gguf/src/gguf.ts
+++ b/packages/gguf/src/gguf.ts
@@ -273,7 +273,7 @@ export async function gguf(
 	offset += tensorCount.length;
 	const numKv = readVersionedSize(r.view, offset, version, littleEndian);
 	offset += numKv.length;
-	const metadata: GGUFMetadata = {
+	const metadata: GGUFMetadata<{ strict: false }> = {
 		version,
 		tensor_count: tensorCount.value,
 		kv_count: numKv.value,
@@ -308,9 +308,6 @@ export async function gguf(
 			}
 		}
 		offset += valueResult.length;
-		/// TODO(fix typing)
-		// eslint-disable-next-line @typescript-eslint/ban-ts-comment
-		// @ts-ignore
 		metadata[keyResult.value] = valueResult.value;
 	}
 
diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index 693b47159..0911b81a1 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -1,15 +1,15 @@
 import { describe, it } from "vitest";
 import type { gguf } from "./gguf";
-import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types";
+import type { GGUFMetadata, GGUFParseOutput } from "./types";
 
 describe("gguf-types", () => {
 	it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
 		const result: Awaited<ReturnType<typeof gguf>> = null as any;
-		const strictType = result as GGUFParseOutput<GGUFType.STRICT>;
+		const strictType = result as GGUFParseOutput<{ strict: true }>;
 		// @ts-expect-error because the key "abc" does not exist
 		strictType.metadata.abc = 123;
-		const nonStrictType = result as GGUFParseOutput<GGUFType.NON_STRICT>;
+		const nonStrictType = result as GGUFParseOutput<{ strict: false }>;
 		nonStrictType.metadata.abc = 123; // PASS, because it can be anything
 		// @ts-expect-error because ArrayBuffer is not a MetadataValue
 		nonStrictType.metadata.fff = ArrayBuffer;
@@ -17,14 +17,14 @@ describe("gguf-types", () => {
 
 	it("GGUFType.NON_STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFType.NON_STRICT> = null as any;
+		const model: GGUFMetadata<{ strict: false }> = null as any;
 		model.kv_count = 123n;
 		model.abc = 456; // PASS, because it can be anything
 	});
 
 	it("GGUFType.STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<GGUFType.STRICT> = null as any;
+		const model: GGUFMetadata<{ strict: true }> = null as any;
 
 		if (model["general.architecture"] === "whisper") {
 			model["encoder.whisper.block_count"] = 0;
diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index 6b8e76595..e0a75eaf4 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -110,17 +110,21 @@ export type Whisper = GGUFGeneralInfo<"whisper"> &
 
 /// Types for parse output
 
-export enum GGUFType {
-	STRICT,
-	NON_STRICT,
+export interface GGUFMetadataOptions {
+	/**
+   * Enable strict type for known GGUF fields.
+   * 
+	 * @default true
+   */
+  strict: boolean;
 }
 
-export type GGUFMetadata<TGGUFType extends GGUFType = GGUFType.STRICT> = {
+export type GGUFMetadata<Options extends GGUFMetadataOptions = { strict: true }> = {
 	version: Version;
 	tensor_count: bigint;
 	kv_count: bigint;
 } & GGUFModelKV &
-	(TGGUFType extends GGUFType.STRICT ? unknown : Record<string, MetadataValue>);
+	(Options extends { strict: true } ? unknown : Record<string, MetadataValue>);
 
 export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer);
 
@@ -132,7 +136,7 @@ export interface GGUFTensorInfo {
 	offset: bigint;
 }
 
-export interface GGUFParseOutput<TGGUFType extends GGUFType = GGUFType.STRICT> {
-	metadata: GGUFMetadata<TGGUFType>;
+export interface GGUFParseOutput<Options extends GGUFMetadataOptions = { strict: true }> {
+	metadata: GGUFMetadata<Options>;
 	tensorInfos: GGUFTensorInfo[];
 }

From 8c1bce07b3a13978f8c6a3ff7a59046f1bd8c3d6 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Tue, 7 May 2024 15:15:54 +0200
Subject: [PATCH 08/10] format

---
 packages/gguf/src/types.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts
index e0a75eaf4..9e6f89dbf 100644
--- a/packages/gguf/src/types.ts
+++ b/packages/gguf/src/types.ts
@@ -112,11 +112,11 @@ export type Whisper = GGUFGeneralInfo<"whisper"> &
 
 export interface GGUFMetadataOptions {
 	/**
-   * Enable strict type for known GGUF fields.
-   * 
+	 * Enable strict type for known GGUF fields.
+	 *
 	 * @default true
-   */
-  strict: boolean;
+	 */
+	strict: boolean;
 }
 
 export type GGUFMetadata<Options extends GGUFMetadataOptions = { strict: true }> = {

From 2e62e41ef028322fcd9afbd42602efecc9d2882f Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Tue, 7 May 2024 15:18:10 +0200
Subject: [PATCH 09/10] fix CI

---
 packages/gguf/src/types.spec.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index 0911b81a1..547886d31 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -5,7 +5,7 @@ import type { GGUFMetadata, GGUFParseOutput } from "./types";
 describe("gguf-types", () => {
 	it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const result: Awaited<ReturnType<typeof gguf>> = null as any;
+		const result: Awaited<ReturnType<typeof gguf>> = {} as any;
 		const strictType = result as GGUFParseOutput<{ strict: true }>;
 		// @ts-expect-error because the key "abc" does not exist
 		strictType.metadata.abc = 123;
@@ -17,14 +17,14 @@ describe("gguf-types", () => {
 
 	it("GGUFType.NON_STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<{ strict: false }> = null as any;
+		const model: GGUFMetadata<{ strict: false }> = {} as any;
 		model.kv_count = 123n;
 		model.abc = 456; // PASS, because it can be anything
 	});
 
 	it("GGUFType.STRICT should be correct (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const model: GGUFMetadata<{ strict: true }> = null as any;
+		const model: GGUFMetadata<{ strict: true }> = {} as any;
 
 		if (model["general.architecture"] === "whisper") {
 			model["encoder.whisper.block_count"] = 0;

From a2250d3b5502727eb9fa14cb2d7ce10880183510 Mon Sep 17 00:00:00 2001
From: ngxson <thichthat@gmail.com>
Date: Tue, 7 May 2024 15:19:40 +0200
Subject: [PATCH 10/10] fix CI (2)

---
 packages/gguf/src/types.spec.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts
index 547886d31..9d20bfa8c 100644
--- a/packages/gguf/src/types.spec.ts
+++ b/packages/gguf/src/types.spec.ts
@@ -5,7 +5,7 @@ import type { GGUFMetadata, GGUFParseOutput } from "./types";
 describe("gguf-types", () => {
 	it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => {
 		// eslint-disable-next-line @typescript-eslint/no-explicit-any
-		const result: Awaited<ReturnType<typeof gguf>> = {} as any;
+		const result: Awaited<ReturnType<typeof gguf>> = { metadata: {} } as any;
 		const strictType = result as GGUFParseOutput<{ strict: true }>;
 		// @ts-expect-error because the key "abc" does not exist
 		strictType.metadata.abc = 123;