From e0610178cd93fa2051edba529c989bb11030a3db Mon Sep 17 00:00:00 2001 From: ngxson Date: Sat, 4 May 2024 22:52:44 +0200 Subject: [PATCH 01/10] refactor typing + usage --- packages/gguf/scripts/generate-llm.ts | 57 ++++++++++++---------- packages/gguf/src/transformer-llm.ts | 70 +++++++++++++++------------ packages/gguf/src/types.ts | 37 +++++++++----- 3 files changed, 95 insertions(+), 69 deletions(-) diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts index 4da57c46e..6cbdb3798 100644 --- a/packages/gguf/scripts/generate-llm.ts +++ b/packages/gguf/scripts/generate-llm.ts @@ -8,28 +8,39 @@ import { writeFileSync } from "node:fs"; const SOURCE_CPP_URL = "https://raw.githubusercontent.com/ggerganov/llama.cpp/master/llama.cpp"; const DEST_FILE_PATH = "./src/transformer-llm.ts"; const DEST_COMMON_SOURCE = ` -type Attention = - & { [K in \`\${TArchitecture}.attention.head_count\`]: number } - & { [K in \`\${TArchitecture}.attention.head_count_kv\`]: number } - & { [K in \`\${TArchitecture}.attention.layer_norm_epsilon\`]: number } - & { [K in \`\${TArchitecture}.attention.layer_norm_rms_epsilon\`]: number } - & { [K in \`\${TArchitecture}.attention.alibi_bias_max\`]: number } - & { [K in \`\${TArchitecture}.attention.clip_kqv\`]: number } - & { [K in \`\${TArchitecture}.attention.use_norm\`]: number }; - -type Rope = - & { [K in \`\${TArchitecture}.rope.dimension_count\`]: number } - & { [K in \`\${TArchitecture}.rope.freq_base\`]: number } - & { [K in \`\${TArchitecture}.rope.scale\`]: number } - & { [K in \`\${TArchitecture}.rope.scale_linear\`]: number }; - -type MOE = - & { [K in \`\${TArchitecture}.expert_count\`]: number } - & { [K in \`\${TArchitecture}.expert_used_count\`]: number }; +/** This file is auto-generated by generate-llm.ts */ + +import type { ModelBase, GGUFGeneralInfo } from "./types"; + +type Attention = Record< + | \`\${TArchitecture}.attention.head_count\` + | \`\${TArchitecture}.attention.head_count_kv\` + | \`\${TArchitecture}.attention.layer_norm_epsilon\` + | \`\${TArchitecture}.attention.layer_norm_rms_epsilon\` + | \`\${TArchitecture}.attention.alibi_bias_max\` + | \`\${TArchitecture}.attention.clip_kqv\` + | \`\${TArchitecture}.attention.use_norm\`, + number +>; + +type Rope = Record< + | \`\${TArchitecture}.rope.dimension_count\` + | \`\${TArchitecture}.rope.freq_base\` + | \`\${TArchitecture}.rope.scale\` + | \`\${TArchitecture}.rope.scale_linear\`, + number +>; + +type MOE = Record< + | \`\${TArchitecture}.expert_count\` + | \`\${TArchitecture}.expert_used_count\`, + number +>; export type TransformerLLMArchitecture = LLMArchitecture; // type alias -export type TransformerLLMBase = ModelBase - & MOE +export type TransformerLLMBase = GGUFGeneralInfo + & ModelBase + & Partial> & Attention & Rope; @@ -163,15 +174,11 @@ async function main() { ///////////////////////////////////// // write result to file const content = [ - "/** This file is auto-generated by generate-llm.ts */", - "", - 'import type { ModelBase } from "./types";', - "", + DEST_COMMON_SOURCE, "export const LLM_ARCHITECTURES = [", ...archList.map((a) => `\t${JSON.stringify(a.name)},`), "] as const;", "type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number];", - DEST_COMMON_SOURCE, ...archList.map((a) => { let code = `export type ${a.tsName} = TransformerLLMBase<${JSON.stringify(a.name)}>`; if (a.hparams.length) { diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts index a9ef34296..aea40459e 100644 --- a/packages/gguf/src/transformer-llm.ts +++ b/packages/gguf/src/transformer-llm.ts @@ -1,6 +1,44 @@ /** This file is auto-generated by generate-llm.ts */ -import type { ModelBase } from "./types"; +import type { ModelBase, GGUFGeneralInfo } from "./types"; + +type Attention = Record< + | `${TArchitecture}.attention.head_count` + | `${TArchitecture}.attention.head_count_kv` + | `${TArchitecture}.attention.layer_norm_epsilon` + | `${TArchitecture}.attention.layer_norm_rms_epsilon` + | `${TArchitecture}.attention.alibi_bias_max` + | `${TArchitecture}.attention.clip_kqv` + | `${TArchitecture}.attention.use_norm`, + number +>; + +type Rope = Record< + | `${TArchitecture}.rope.dimension_count` + | `${TArchitecture}.rope.freq_base` + | `${TArchitecture}.rope.scale` + | `${TArchitecture}.rope.scale_linear`, + number +>; + +type MOE = Record< + `${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, + number +>; + +export type TransformerLLMArchitecture = LLMArchitecture; // type alias +export type TransformerLLMBase = GGUFGeneralInfo & + ModelBase & + Partial> & + Attention & + Rope; + +export enum TransformerLLMPoolingType { + UNSPECIFIED = -1, + NONE = 0, + MEAN = 1, + CLS = 2, +} export const LLM_ARCHITECTURES = [ "llama", @@ -37,36 +75,6 @@ export const LLM_ARCHITECTURES = [ "olmo", ] as const; type LLMArchitecture = (typeof LLM_ARCHITECTURES)[number]; - -type Attention = { [K in `${TArchitecture}.attention.head_count`]: number } & { - [K in `${TArchitecture}.attention.head_count_kv`]: number; -} & { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } & { - [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number; -} & { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } & { - [K in `${TArchitecture}.attention.clip_kqv`]: number; -} & { [K in `${TArchitecture}.attention.use_norm`]: number }; - -type Rope = { [K in `${TArchitecture}.rope.dimension_count`]: number } & { - [K in `${TArchitecture}.rope.freq_base`]: number; -} & { [K in `${TArchitecture}.rope.scale`]: number } & { [K in `${TArchitecture}.rope.scale_linear`]: number }; - -type MOE = { [K in `${TArchitecture}.expert_count`]: number } & { - [K in `${TArchitecture}.expert_used_count`]: number; -}; - -export type TransformerLLMArchitecture = LLMArchitecture; // type alias -export type TransformerLLMBase = ModelBase & - MOE & - Attention & - Rope; - -export enum TransformerLLMPoolingType { - UNSPECIFIED = -1, - NONE = 0, - MEAN = 1, - CLS = 2, -} - export type ArchLlama = TransformerLLMBase<"llama"> & { "llama.attention.layer_norm_rms_epsilon": number; }; diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 69405d66a..0bba630a6 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -50,11 +50,11 @@ export enum GGUFValueType { const ARCHITECTURES = [...LLM_ARCHITECTURES, "rwkv", "whisper"] as const; export type Architecture = (typeof ARCHITECTURES)[number]; -interface General { - "general.architecture": Architecture; +export interface GGUFGeneralInfo { + "general.architecture": TArchitecture; "general.name": string; - "general.file_type": number; - "general.quantization_version": number; + "general.file_type"?: number; + "general.quantization_version"?: number; } export type ModelBase< @@ -62,9 +62,16 @@ export type ModelBase< | Architecture | `encoder.${Extract}` | `decoder.${Extract}`, -> = { [K in `${TArchitecture}.layer_count`]: number } & { [K in `${TArchitecture}.feed_forward_length`]: number } & { - [K in `${TArchitecture}.context_length`]: number; -} & { [K in `${TArchitecture}.embedding_length`]: number } & { [K in `${TArchitecture}.block_count`]: number }; +> = Record< + | `${TArchitecture}.layer_count` + | `${TArchitecture}.feed_forward_length` + | `${TArchitecture}.context_length` + | `${TArchitecture}.embedding_length` + | `${TArchitecture}.block_count`, + number +>; + +/// Tokenizer type TokenizerModel = "no_vocab" | "llama" | "gpt2" | "bert"; interface Tokenizer { @@ -77,18 +84,22 @@ interface Tokenizer { "tokenizer.ggml.add_bos_token": boolean; "tokenizer.chat_template": string; } +type NoTokenizer = Record; + +/// Models outside of llama.cpp: "rwkv" and "whisper" + +export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; + +export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; -export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; -export type LLM = TransformerLLM | RWKV; -export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; -export type Model = (LLM | Whisper) & Partial; +/// Types for parse output export type GGUFMetadata = { version: Version; tensor_count: bigint; kv_count: bigint; -} & Partial & - Partial & +} & (Whisper | RWKV | TransformerLLM) & + (NoTokenizer | Tokenizer) & Record; export interface GGUFTensorInfo { From 9a9e77100a99baf3cbc6ac070ea9ed4ca05d06ef Mon Sep 17 00:00:00 2001 From: ngxson Date: Sun, 5 May 2024 12:10:24 +0200 Subject: [PATCH 02/10] correct optional fields --- packages/gguf/scripts/generate-llm.ts | 53 +++++++++++++++++---------- packages/gguf/src/transformer-llm.ts | 50 +++++++++++++++---------- packages/gguf/src/types.ts | 25 +++++++++---- 3 files changed, 82 insertions(+), 46 deletions(-) diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts index 6cbdb3798..2ad1d46b9 100644 --- a/packages/gguf/scripts/generate-llm.ts +++ b/packages/gguf/scripts/generate-llm.ts @@ -12,35 +12,50 @@ const DEST_COMMON_SOURCE = ` import type { ModelBase, GGUFGeneralInfo } from "./types"; -type Attention = Record< - | \`\${TArchitecture}.attention.head_count\` - | \`\${TArchitecture}.attention.head_count_kv\` - | \`\${TArchitecture}.attention.layer_norm_epsilon\` - | \`\${TArchitecture}.attention.layer_norm_rms_epsilon\` - | \`\${TArchitecture}.attention.alibi_bias_max\` - | \`\${TArchitecture}.attention.clip_kqv\` - | \`\${TArchitecture}.attention.use_norm\`, +type LLMBase = Partial; +>>; -type Rope = Record< - | \`\${TArchitecture}.rope.dimension_count\` - | \`\${TArchitecture}.rope.freq_base\` - | \`\${TArchitecture}.rope.scale\` - | \`\${TArchitecture}.rope.scale_linear\`, +type Attention = Record< + \`\${TArchitecture}.attention.head_count\`, number +> & Partial>; + +type RopeScalingType = "none" | "linear" | "yarn"; +type Rope = Partial< + Record< + \`\${TArchitecture}.rope.dimension_count\` + | \`\${TArchitecture}.rope.freq_base\` + | \`\${TArchitecture}.rope.scale_linear\` + | \`\${TArchitecture}.rope.scaling.factor\` + | \`\${TArchitecture}.rope.scaling.original_context_length\`, + number + > + & Record<\`\${TArchitecture}.rope.scaling.type\`, RopeScalingType> + & Record<\`\${TArchitecture}.rope.finetuned\`, boolean> >; -type MOE = Record< - | \`\${TArchitecture}.expert_count\` - | \`\${TArchitecture}.expert_used_count\`, - number +type MOE = Partial< + Record< + \`\${TArchitecture}.expert_count\` + | \`\${TArchitecture}.expert_used_count\`, + number + > >; export type TransformerLLMArchitecture = LLMArchitecture; // type alias export type TransformerLLMBase = GGUFGeneralInfo + & LLMBase & ModelBase - & Partial> + & MOE & Attention & Rope; diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts index aea40459e..1449125b8 100644 --- a/packages/gguf/src/transformer-llm.ts +++ b/packages/gguf/src/transformer-llm.ts @@ -2,34 +2,46 @@ import type { ModelBase, GGUFGeneralInfo } from "./types"; -type Attention = Record< - | `${TArchitecture}.attention.head_count` - | `${TArchitecture}.attention.head_count_kv` - | `${TArchitecture}.attention.layer_norm_epsilon` - | `${TArchitecture}.attention.layer_norm_rms_epsilon` - | `${TArchitecture}.attention.alibi_bias_max` - | `${TArchitecture}.attention.clip_kqv` - | `${TArchitecture}.attention.use_norm`, - number +type LLMBase = Partial< + Record< + `${TArchitecture}.vocab_size` | `${TArchitecture}.use_parallel_residual` | `${TArchitecture}.tensor_data_layout`, + number + > >; -type Rope = Record< - | `${TArchitecture}.rope.dimension_count` - | `${TArchitecture}.rope.freq_base` - | `${TArchitecture}.rope.scale` - | `${TArchitecture}.rope.scale_linear`, - number +type Attention = Record<`${TArchitecture}.attention.head_count`, number> & + Partial< + Record< + | `${TArchitecture}.attention.head_count_kv` + | `${TArchitecture}.attention.key_length` + | `${TArchitecture}.attention.value_length`, + number + > + >; + +type RopeScalingType = "none" | "linear" | "yarn"; +type Rope = Partial< + Record< + | `${TArchitecture}.rope.dimension_count` + | `${TArchitecture}.rope.freq_base` + | `${TArchitecture}.rope.scale_linear` + | `${TArchitecture}.rope.scaling.factor` + | `${TArchitecture}.rope.scaling.original_context_length`, + number + > & + Record<`${TArchitecture}.rope.scaling.type`, RopeScalingType> & + Record<`${TArchitecture}.rope.finetuned`, boolean> >; -type MOE = Record< - `${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, - number +type MOE = Partial< + Record<`${TArchitecture}.expert_count` | `${TArchitecture}.expert_used_count`, number> >; export type TransformerLLMArchitecture = LLMArchitecture; // type alias export type TransformerLLMBase = GGUFGeneralInfo & + LLMBase & ModelBase & - Partial> & + MOE & Attention & Rope; diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 0bba630a6..7a13b44d3 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -52,7 +52,7 @@ export type Architecture = (typeof ARCHITECTURES)[number]; export interface GGUFGeneralInfo { "general.architecture": TArchitecture; - "general.name": string; + "general.name"?: string; "general.file_type"?: number; "general.quantization_version"?: number; } @@ -63,11 +63,10 @@ export type ModelBase< | `encoder.${Extract}` | `decoder.${Extract}`, > = Record< - | `${TArchitecture}.layer_count` - | `${TArchitecture}.feed_forward_length` | `${TArchitecture}.context_length` + | `${TArchitecture}.block_count` | `${TArchitecture}.embedding_length` - | `${TArchitecture}.block_count`, + | `${TArchitecture}.feed_forward_length`, number >; @@ -82,15 +81,25 @@ interface Tokenizer { "tokenizer.ggml.bos_token_id": number; "tokenizer.ggml.eos_token_id": number; "tokenizer.ggml.add_bos_token": boolean; - "tokenizer.chat_template": string; + "tokenizer.chat_template"?: string; } type NoTokenizer = Record; /// Models outside of llama.cpp: "rwkv" and "whisper" -export type RWKV = GGUFGeneralInfo<"rwkv"> & ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; - -export type Whisper = GGUFGeneralInfo<"whisper"> & ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; +export type RWKV = GGUFGeneralInfo<"rwkv"> & + ModelBase<"rwkv"> & { + "rwkv.architecture_version": number; + }; + +// TODO: whisper.cpp doesn't yet support gguf. This maybe changed in the future. +export type Whisper = GGUFGeneralInfo<"whisper"> & + ModelBase<"encoder.whisper"> & + ModelBase<"decoder.whisper"> & { + "whisper.encoder.mels_count": number; + "whisper.encoder.attention.head_count": number; + "whisper.decoder.attention.head_count": number; + }; /// Types for parse output From 6d704bc6927f4ed89485c8ef1433cc766c7e7f38 Mon Sep 17 00:00:00 2001 From: ngxson Date: Sun, 5 May 2024 13:10:19 +0200 Subject: [PATCH 03/10] add non-strict & strict typing mode --- packages/gguf/src/gguf.spec.ts | 33 ++++++++++++++------------ packages/gguf/src/gguf.ts | 3 +++ packages/gguf/src/types.spec.ts | 42 +++++++++++++++++++++++++++++++++ packages/gguf/src/types.ts | 24 +++++++++++++------ 4 files changed, 80 insertions(+), 22 deletions(-) create mode 100644 packages/gguf/src/types.spec.ts diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 2e6f2c21a..84604348d 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -37,22 +37,25 @@ describe("gguf", () => { "llama.rope.dimension_count": 128, }); - const tokens = metadata["tokenizer.ggml.tokens"]; - if (!Array.isArray(tokens)) { - throw new Error(); + expect(!!metadata["tokenizer.ggml.model"]); + if (!!metadata["tokenizer.ggml.model"]) { + const tokens = metadata["tokenizer.ggml.tokens"]; + if (!Array.isArray(tokens)) { + throw new Error(); + } + expect(tokens.slice(0, 10)).toEqual([ + "", + "", + "", + "<0x00>", + "<0x01>", + "<0x02>", + "<0x03>", + "<0x04>", + "<0x05>", + "<0x06>", + ]); } - expect(tokens.slice(0, 10)).toEqual([ - "", - "", - "", - "<0x00>", - "<0x01>", - "<0x02>", - "<0x03>", - "<0x04>", - "<0x05>", - "<0x06>", - ]); /// Tensor infos /// By convention we test the first and last tensor. diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 25f41252f..d39fd367b 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -308,6 +308,9 @@ export async function gguf( } } offset += valueResult.length; + /// TODO(fix typing) + // eslint-disable-next-line @typescript-eslint/ban-ts-comment + // @ts-ignore metadata[keyResult.value] = valueResult.value; } diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts new file mode 100644 index 000000000..a4afb699e --- /dev/null +++ b/packages/gguf/src/types.spec.ts @@ -0,0 +1,42 @@ +import { describe, it } from "vitest"; +import { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types"; + +describe("gguf-types", () => { + it("GGUFNonStrictType should be correct (at compile time)", async () => { + const model: GGUFMetadata = null as any; + model.kv_count = 123n; + model.abc = 456; // PASS, because it can be anything + }); + + it("GGUFStrictType should be correct (at compile time)", async () => { + const model: GGUFMetadata = null as any; + + if (model["general.architecture"] === "whisper") { + model["encoder.whisper.block_count"] = 0; + // @ts-expect-error + model["encoder.whisper.block_count"] = "abc"; // error, because it must be a number + } + + if (model["tokenizer.ggml.model"] === undefined) { + // @ts-expect-error + model["tokenizer.ggml.eos_token_id"] = 1; // error, because it's undefined + } + if (model["tokenizer.ggml.model"] === "gpt2") { + // @ts-expect-error + model["tokenizer.ggml.eos_token_id"] = undefined; // error, because it must be a number + model["tokenizer.ggml.eos_token_id"] = 1; + } + + if (model["general.architecture"] === "mamba") { + model["mamba.ssm.conv_kernel"] = 0; + // @ts-expect-error + model["mamba.ssm.conv_kernel"] = "abc"; // error, because it must be a number + } + if (model["general.architecture"] === "llama") { + // @ts-expect-error + model["mamba.ssm.conv_kernel"] = 0; + // @ts-expect-error + model["mamba.ssm.conv_kernel"] = "abc"; // PASS, because it can be anything + } + }); +}); diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 7a13b44d3..1a13ea70e 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -57,6 +57,11 @@ export interface GGUFGeneralInfo { "general.quantization_version"?: number; } +type ModelMetadata = Whisper | RWKV | TransformerLLM; +type NoModelMetadata = { + "general.architecture"?: undefined, +}; + export type ModelBase< TArchitecture extends | Architecture @@ -83,7 +88,9 @@ interface Tokenizer { "tokenizer.ggml.add_bos_token": boolean; "tokenizer.chat_template"?: string; } -type NoTokenizer = Record; +type NoTokenizer = { + "tokenizer.ggml.model"?: undefined, +}; /// Models outside of llama.cpp: "rwkv" and "whisper" @@ -103,13 +110,16 @@ export type Whisper = GGUFGeneralInfo<"whisper"> & /// Types for parse output -export type GGUFMetadata = { +export type GGUFStrictType = true; +export type GGUFNonStrictType = false; + +export type GGUFMetadata = { version: Version; tensor_count: bigint; kv_count: bigint; -} & (Whisper | RWKV | TransformerLLM) & - (NoTokenizer | Tokenizer) & - Record; +} & (T extends GGUFStrictType ? GGUFModelKV : Record); + +export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer); export interface GGUFTensorInfo { name: string; @@ -119,7 +129,7 @@ export interface GGUFTensorInfo { offset: bigint; } -export interface GGUFParseOutput { - metadata: GGUFMetadata; +export interface GGUFParseOutput { + metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; } From 87677264d9d23a1d388695dbe06484eafeaa0ac7 Mon Sep 17 00:00:00 2001 From: ngxson Date: Sun, 5 May 2024 13:14:16 +0200 Subject: [PATCH 04/10] lint & format --- packages/gguf/src/gguf.spec.ts | 4 ++-- packages/gguf/src/types.spec.ts | 30 +++++++++++++++--------------- packages/gguf/src/types.ts | 12 ++++++------ 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 84604348d..04ceda7ab 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -37,8 +37,8 @@ describe("gguf", () => { "llama.rope.dimension_count": 128, }); - expect(!!metadata["tokenizer.ggml.model"]); - if (!!metadata["tokenizer.ggml.model"]) { + expect(metadata["tokenizer.ggml.model"]); + if (metadata["tokenizer.ggml.model"]) { const tokens = metadata["tokenizer.ggml.tokens"]; if (!Array.isArray(tokens)) { throw new Error(); diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index a4afb699e..617db7e37 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -1,42 +1,42 @@ import { describe, it } from "vitest"; -import { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types"; +import type { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types"; describe("gguf-types", () => { - it("GGUFNonStrictType should be correct (at compile time)", async () => { + it("GGUFNonStrictType should be correct (at compile time)", async () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any const model: GGUFMetadata = null as any; - model.kv_count = 123n; - model.abc = 456; // PASS, because it can be anything + model.kv_count = 123n; + model.abc = 456; // PASS, because it can be anything }); it("GGUFStrictType should be correct (at compile time)", async () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any const model: GGUFMetadata = null as any; if (model["general.architecture"] === "whisper") { model["encoder.whisper.block_count"] = 0; - // @ts-expect-error - model["encoder.whisper.block_count"] = "abc"; // error, because it must be a number + // @ts-expect-error because it must be a number + model["encoder.whisper.block_count"] = "abc"; } if (model["tokenizer.ggml.model"] === undefined) { - // @ts-expect-error - model["tokenizer.ggml.eos_token_id"] = 1; // error, because it's undefined + // @ts-expect-error because it's undefined + model["tokenizer.ggml.eos_token_id"] = 1; } if (model["tokenizer.ggml.model"] === "gpt2") { - // @ts-expect-error - model["tokenizer.ggml.eos_token_id"] = undefined; // error, because it must be a number + // @ts-expect-error because it must be a number + model["tokenizer.ggml.eos_token_id"] = undefined; model["tokenizer.ggml.eos_token_id"] = 1; } if (model["general.architecture"] === "mamba") { model["mamba.ssm.conv_kernel"] = 0; - // @ts-expect-error - model["mamba.ssm.conv_kernel"] = "abc"; // error, because it must be a number + // @ts-expect-error because it must be a number + model["mamba.ssm.conv_kernel"] = "abc"; } if (model["general.architecture"] === "llama") { - // @ts-expect-error + // @ts-expect-error llama does not have ssm.* keys model["mamba.ssm.conv_kernel"] = 0; - // @ts-expect-error - model["mamba.ssm.conv_kernel"] = "abc"; // PASS, because it can be anything } }); }); diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 1a13ea70e..6089cb1db 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -58,9 +58,9 @@ export interface GGUFGeneralInfo { } type ModelMetadata = Whisper | RWKV | TransformerLLM; -type NoModelMetadata = { - "general.architecture"?: undefined, -}; +interface NoModelMetadata { + "general.architecture"?: undefined; +} export type ModelBase< TArchitecture extends @@ -88,9 +88,9 @@ interface Tokenizer { "tokenizer.ggml.add_bos_token": boolean; "tokenizer.chat_template"?: string; } -type NoTokenizer = { - "tokenizer.ggml.model"?: undefined, -}; +interface NoTokenizer { + "tokenizer.ggml.model"?: undefined; +} /// Models outside of llama.cpp: "rwkv" and "whisper" From c2afbdc080980c81ad37a933dfcea840dde9d3bc Mon Sep 17 00:00:00 2001 From: ngxson Date: Mon, 6 May 2024 11:54:39 +0200 Subject: [PATCH 05/10] cast between strict & nonStrict --- packages/gguf/scripts/generate-llm.ts | 4 ++-- packages/gguf/src/transformer-llm.ts | 4 ++-- packages/gguf/src/types.spec.ts | 23 ++++++++++++++++++----- packages/gguf/src/types.ts | 15 +++++++++------ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/packages/gguf/scripts/generate-llm.ts b/packages/gguf/scripts/generate-llm.ts index 2ad1d46b9..6ac73149a 100644 --- a/packages/gguf/scripts/generate-llm.ts +++ b/packages/gguf/scripts/generate-llm.ts @@ -29,7 +29,7 @@ type Attention = Record< number >>; -type RopeScalingType = "none" | "linear" | "yarn"; +export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn"; type Rope = Partial< Record< \`\${TArchitecture}.rope.dimension_count\` @@ -39,7 +39,7 @@ type Rope = Partial< | \`\${TArchitecture}.rope.scaling.original_context_length\`, number > - & Record<\`\${TArchitecture}.rope.scaling.type\`, RopeScalingType> + & Record<\`\${TArchitecture}.rope.scaling.type\`, TransformerLLMRopeScalingType> & Record<\`\${TArchitecture}.rope.finetuned\`, boolean> >; diff --git a/packages/gguf/src/transformer-llm.ts b/packages/gguf/src/transformer-llm.ts index 1449125b8..8bad0261e 100644 --- a/packages/gguf/src/transformer-llm.ts +++ b/packages/gguf/src/transformer-llm.ts @@ -19,7 +19,7 @@ type Attention = Record<`${TArchitecture}.attentio > >; -type RopeScalingType = "none" | "linear" | "yarn"; +export type TransformerLLMRopeScalingType = "none" | "linear" | "yarn"; type Rope = Partial< Record< | `${TArchitecture}.rope.dimension_count` @@ -29,7 +29,7 @@ type Rope = Partial< | `${TArchitecture}.rope.scaling.original_context_length`, number > & - Record<`${TArchitecture}.rope.scaling.type`, RopeScalingType> & + Record<`${TArchitecture}.rope.scaling.type`, TransformerLLMRopeScalingType> & Record<`${TArchitecture}.rope.finetuned`, boolean> >; diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index 617db7e37..7c5d3787b 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -1,17 +1,30 @@ import { describe, it } from "vitest"; -import type { GGUFStrictType, GGUFMetadata, GGUFNonStrictType } from "./types"; +import type { gguf } from "./gguf"; +import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types"; describe("gguf-types", () => { - it("GGUFNonStrictType should be correct (at compile time)", async () => { + it("gguf() type can be casted (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const result: Awaited> = null as any; + const strictType = result as GGUFParseOutput; + // @ts-expect-error because the key "abc" does not exist + strictType.metadata.abc = 123; + const nonStrictType = result as GGUFParseOutput; + nonStrictType.metadata.abc = 123; // PASS, because it can be anything + // @ts-expect-error because ArrayBuffer is not a MetadataValue + nonStrictType.metadata.fff = ArrayBuffer; + }); + + it("GGUFType.nonStrict should be correct (at compile time)", async () => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const model: GGUFMetadata = null as any; model.kv_count = 123n; model.abc = 456; // PASS, because it can be anything }); - it("GGUFStrictType should be correct (at compile time)", async () => { + it("GGUFType.strict should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const model: GGUFMetadata = null as any; if (model["general.architecture"] === "whisper") { model["encoder.whisper.block_count"] = 0; diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 6089cb1db..b96b899ef 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -110,14 +110,17 @@ export type Whisper = GGUFGeneralInfo<"whisper"> & /// Types for parse output -export type GGUFStrictType = true; -export type GGUFNonStrictType = false; +export enum GGUFType { + strict, + nonStrict, +} -export type GGUFMetadata = { +export type GGUFMetadata = { version: Version; tensor_count: bigint; kv_count: bigint; -} & (T extends GGUFStrictType ? GGUFModelKV : Record); +} & GGUFModelKV & + (TGGUFType extends GGUFType.strict ? unknown : Record); export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer); @@ -129,7 +132,7 @@ export interface GGUFTensorInfo { offset: bigint; } -export interface GGUFParseOutput { - metadata: GGUFMetadata; +export interface GGUFParseOutput { + metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; } From 5f547dd882ad07dad612225f6b8193845fb318a9 Mon Sep 17 00:00:00 2001 From: ngxson Date: Mon, 6 May 2024 12:16:00 +0200 Subject: [PATCH 06/10] style nits --- packages/gguf/src/types.spec.ts | 14 +++++++------- packages/gguf/src/types.ts | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index 7c5d3787b..693b47159 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -3,28 +3,28 @@ import type { gguf } from "./gguf"; import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types"; describe("gguf-types", () => { - it("gguf() type can be casted (at compile time)", async () => { + it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any const result: Awaited> = null as any; - const strictType = result as GGUFParseOutput; + const strictType = result as GGUFParseOutput; // @ts-expect-error because the key "abc" does not exist strictType.metadata.abc = 123; - const nonStrictType = result as GGUFParseOutput; + const nonStrictType = result as GGUFParseOutput; nonStrictType.metadata.abc = 123; // PASS, because it can be anything // @ts-expect-error because ArrayBuffer is not a MetadataValue nonStrictType.metadata.fff = ArrayBuffer; }); - it("GGUFType.nonStrict should be correct (at compile time)", async () => { + it("GGUFType.NON_STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const model: GGUFMetadata = null as any; model.kv_count = 123n; model.abc = 456; // PASS, because it can be anything }); - it("GGUFType.strict should be correct (at compile time)", async () => { + it("GGUFType.STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const model: GGUFMetadata = null as any; if (model["general.architecture"] === "whisper") { model["encoder.whisper.block_count"] = 0; diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index b96b899ef..6b8e76595 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -111,16 +111,16 @@ export type Whisper = GGUFGeneralInfo<"whisper"> & /// Types for parse output export enum GGUFType { - strict, - nonStrict, + STRICT, + NON_STRICT, } -export type GGUFMetadata = { +export type GGUFMetadata = { version: Version; tensor_count: bigint; kv_count: bigint; } & GGUFModelKV & - (TGGUFType extends GGUFType.strict ? unknown : Record); + (TGGUFType extends GGUFType.STRICT ? unknown : Record); export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer); @@ -132,7 +132,7 @@ export interface GGUFTensorInfo { offset: bigint; } -export interface GGUFParseOutput { +export interface GGUFParseOutput { metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; } From 31bac8bbba365e13c517c48452fdee1d5843abd3 Mon Sep 17 00:00:00 2001 From: ngxson Date: Tue, 7 May 2024 15:15:10 +0200 Subject: [PATCH 07/10] refactor options --- packages/gguf/src/gguf.ts | 5 +---- packages/gguf/src/types.spec.ts | 10 +++++----- packages/gguf/src/types.ts | 18 +++++++++++------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index d39fd367b..775623eae 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -273,7 +273,7 @@ export async function gguf( offset += tensorCount.length; const numKv = readVersionedSize(r.view, offset, version, littleEndian); offset += numKv.length; - const metadata: GGUFMetadata = { + const metadata: GGUFMetadata<{ strict: false }> = { version, tensor_count: tensorCount.value, kv_count: numKv.value, @@ -308,9 +308,6 @@ export async function gguf( } } offset += valueResult.length; - /// TODO(fix typing) - // eslint-disable-next-line @typescript-eslint/ban-ts-comment - // @ts-ignore metadata[keyResult.value] = valueResult.value; } diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index 693b47159..0911b81a1 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -1,15 +1,15 @@ import { describe, it } from "vitest"; import type { gguf } from "./gguf"; -import type { GGUFMetadata, GGUFParseOutput, GGUFType } from "./types"; +import type { GGUFMetadata, GGUFParseOutput } from "./types"; describe("gguf-types", () => { it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any const result: Awaited> = null as any; - const strictType = result as GGUFParseOutput; + const strictType = result as GGUFParseOutput<{ strict: true }>; // @ts-expect-error because the key "abc" does not exist strictType.metadata.abc = 123; - const nonStrictType = result as GGUFParseOutput; + const nonStrictType = result as GGUFParseOutput<{ strict: false }>; nonStrictType.metadata.abc = 123; // PASS, because it can be anything // @ts-expect-error because ArrayBuffer is not a MetadataValue nonStrictType.metadata.fff = ArrayBuffer; @@ -17,14 +17,14 @@ describe("gguf-types", () => { it("GGUFType.NON_STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const model: GGUFMetadata<{ strict: false }> = null as any; model.kv_count = 123n; model.abc = 456; // PASS, because it can be anything }); it("GGUFType.STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata = null as any; + const model: GGUFMetadata<{ strict: true }> = null as any; if (model["general.architecture"] === "whisper") { model["encoder.whisper.block_count"] = 0; diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index 6b8e76595..e0a75eaf4 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -110,17 +110,21 @@ export type Whisper = GGUFGeneralInfo<"whisper"> & /// Types for parse output -export enum GGUFType { - STRICT, - NON_STRICT, +export interface GGUFMetadataOptions { + /** + * Enable strict type for known GGUF fields. + * + * @default true + */ + strict: boolean; } -export type GGUFMetadata = { +export type GGUFMetadata = { version: Version; tensor_count: bigint; kv_count: bigint; } & GGUFModelKV & - (TGGUFType extends GGUFType.STRICT ? unknown : Record); + (Options extends { strict: true } ? unknown : Record); export type GGUFModelKV = (NoModelMetadata | ModelMetadata) & (NoTokenizer | Tokenizer); @@ -132,7 +136,7 @@ export interface GGUFTensorInfo { offset: bigint; } -export interface GGUFParseOutput { - metadata: GGUFMetadata; +export interface GGUFParseOutput { + metadata: GGUFMetadata; tensorInfos: GGUFTensorInfo[]; } From 8c1bce07b3a13978f8c6a3ff7a59046f1bd8c3d6 Mon Sep 17 00:00:00 2001 From: ngxson Date: Tue, 7 May 2024 15:15:54 +0200 Subject: [PATCH 08/10] format --- packages/gguf/src/types.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts index e0a75eaf4..9e6f89dbf 100644 --- a/packages/gguf/src/types.ts +++ b/packages/gguf/src/types.ts @@ -112,11 +112,11 @@ export type Whisper = GGUFGeneralInfo<"whisper"> & export interface GGUFMetadataOptions { /** - * Enable strict type for known GGUF fields. - * + * Enable strict type for known GGUF fields. + * * @default true - */ - strict: boolean; + */ + strict: boolean; } export type GGUFMetadata = { From 2e62e41ef028322fcd9afbd42602efecc9d2882f Mon Sep 17 00:00:00 2001 From: ngxson Date: Tue, 7 May 2024 15:18:10 +0200 Subject: [PATCH 09/10] fix CI --- packages/gguf/src/types.spec.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index 0911b81a1..547886d31 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -5,7 +5,7 @@ import type { GGUFMetadata, GGUFParseOutput } from "./types"; describe("gguf-types", () => { it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const result: Awaited> = null as any; + const result: Awaited> = {} as any; const strictType = result as GGUFParseOutput<{ strict: true }>; // @ts-expect-error because the key "abc" does not exist strictType.metadata.abc = 123; @@ -17,14 +17,14 @@ describe("gguf-types", () => { it("GGUFType.NON_STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata<{ strict: false }> = null as any; + const model: GGUFMetadata<{ strict: false }> = {} as any; model.kv_count = 123n; model.abc = 456; // PASS, because it can be anything }); it("GGUFType.STRICT should be correct (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const model: GGUFMetadata<{ strict: true }> = null as any; + const model: GGUFMetadata<{ strict: true }> = {} as any; if (model["general.architecture"] === "whisper") { model["encoder.whisper.block_count"] = 0; From a2250d3b5502727eb9fa14cb2d7ce10880183510 Mon Sep 17 00:00:00 2001 From: ngxson Date: Tue, 7 May 2024 15:19:40 +0200 Subject: [PATCH 10/10] fix CI (2) --- packages/gguf/src/types.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gguf/src/types.spec.ts b/packages/gguf/src/types.spec.ts index 547886d31..9d20bfa8c 100644 --- a/packages/gguf/src/types.spec.ts +++ b/packages/gguf/src/types.spec.ts @@ -5,7 +5,7 @@ import type { GGUFMetadata, GGUFParseOutput } from "./types"; describe("gguf-types", () => { it("gguf() type can be casted between STRICT and NON_STRICT (at compile time)", async () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any - const result: Awaited> = {} as any; + const result: Awaited> = { metadata: {} } as any; const strictType = result as GGUFParseOutput<{ strict: true }>; // @ts-expect-error because the key "abc" does not exist strictType.metadata.abc = 123;