From e745ba58d5ccd1a4690b0ccef1ad7f618b43782d Mon Sep 17 00:00:00 2001 From: Mishig Date: Wed, 20 Mar 2024 05:32:09 -0700 Subject: [PATCH] [gguf] Add types (#562) GGUF add types. Follow up to https://github.com/huggingface/huggingface.js/pull/540#issuecomment-2000249807. No any kind of validation, just types cc: @biw also --- packages/gguf/src/gguf.ts | 67 ++---------------- packages/gguf/src/types.ts | 141 +++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 62 deletions(-) create mode 100644 packages/gguf/src/types.ts diff --git a/packages/gguf/src/gguf.ts b/packages/gguf/src/gguf.ts index 9f99b5e08..8c1b1cfa9 100644 --- a/packages/gguf/src/gguf.ts +++ b/packages/gguf/src/gguf.ts @@ -1,7 +1,9 @@ -export type MetadataBaseValue = string | number | bigint | boolean; -export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. +import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; +import { GGUFValueType } from "./types"; + +export type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types"; +export { GGUFValueType, GGMLQuantizationType } from "./types"; -type Version = 1 | 2 | 3; const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3; /** @@ -12,46 +14,6 @@ const isVersion = (version: number): version is Version => version === 1 || vers */ const ggufMagicNumber = new Uint8Array([0x47, 0x47, 0x55, 0x46]); /// "GGUF" -export enum GGMLQuantizationType { - F32 = 0, - F16 = 1, - Q4_0 = 2, - Q4_1 = 3, - Q5_0 = 6, - Q5_1 = 7, - Q8_0 = 8, - Q8_1 = 9, - Q2_K = 10, - Q3_K = 11, - Q4_K = 12, - Q5_K = 13, - Q6_K = 14, - Q8_K = 15, - IQ2_XXS = 16, - IQ2_XS = 17, - IQ3_XXS = 18, - IQ1_S = 19, - IQ4_NL = 20, - IQ3_S = 21, - IQ2_S = 22, - IQ4_XS = 23, -} - -enum GGUFValueType { - UINT8 = 0, - INT8 = 1, - UINT16 = 2, - INT16 = 3, - UINT32 = 4, - INT32 = 5, - FLOAT32 = 6, - BOOL = 7, - STRING = 8, - ARRAY = 9, - UINT64 = 10, - INT64 = 11, - FLOAT64 = 12, -} function isGGUFValueType(n: number): n is GGUFValueType { return typeof GGUFValueType[n] === "string"; } @@ -185,25 +147,6 @@ function readMetadataValue( } } -export type GGUFMetadata = { - version: Version; - tensor_count: bigint; - kv_count: bigint; -} & Record; - -export interface GGUFTensorInfo { - name: string; - n_dims: number; - shape: bigint[]; - dtype: GGMLQuantizationType; - offset: bigint; -} - -export interface GGUFParseOutput { - metadata: GGUFMetadata; - tensorInfos: GGUFTensorInfo[]; -} - export async function gguf( url: string, params?: { diff --git a/packages/gguf/src/types.ts b/packages/gguf/src/types.ts new file mode 100644 index 000000000..ec6d86d52 --- /dev/null +++ b/packages/gguf/src/types.ts @@ -0,0 +1,141 @@ +export type MetadataBaseValue = string | number | bigint | boolean; +export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested. + +export type Version = 1 | 2 | 3; + +export enum GGMLQuantizationType { + F32 = 0, + F16 = 1, + Q4_0 = 2, + Q4_1 = 3, + Q5_0 = 6, + Q5_1 = 7, + Q8_0 = 8, + Q8_1 = 9, + Q2_K = 10, + Q3_K = 11, + Q4_K = 12, + Q5_K = 13, + Q6_K = 14, + Q8_K = 15, + IQ2_XXS = 16, + IQ2_XS = 17, + IQ3_XXS = 18, + IQ1_S = 19, + IQ4_NL = 20, + IQ3_S = 21, + IQ2_S = 22, + IQ4_XS = 23, +} + +export enum GGUFValueType { + UINT8 = 0, + INT8 = 1, + UINT16 = 2, + INT16 = 3, + UINT32 = 4, + INT32 = 5, + FLOAT32 = 6, + BOOL = 7, + STRING = 8, + ARRAY = 9, + UINT64 = 10, + INT64 = 11, + FLOAT64 = 12, +} + +export const ARCHITECTURES = [ + "llama", + "mpt", + "gptneox", + "gptj", + "gpt2", + "bloom", + "falcon", + "gemma", + "rwkv", + "whisper", +] as const; + +export type Architecture = (typeof ARCHITECTURES)[number]; + +interface General { + "general.architecture": Architecture; + "general.name": string; + "general.file_type": number; + "general.quantization_version": number; +} + +type Attention = + | { [K in `${TArchitecture}.attention.head_count`]: number } + | { [K in `${TArchitecture}.attention.head_count_kv`]: number } + | { [K in `${TArchitecture}.attention.layer_norm_epsilon`]: number } + | { [K in `${TArchitecture}.attention.layer_norm_rms_epsilon`]: number } + | { [K in `${TArchitecture}.attention.alibi_bias_max`]: number } + | { [K in `${TArchitecture}.attention.clip_kqv`]: number } + | { [K in `${TArchitecture}.attention.use_norm`]: number }; + +type Rope = + | { [K in `${TArchitecture}.rope.dimension_count`]: number } + | { [K in `${TArchitecture}.rope.freq_base`]: number } + | { [K in `${TArchitecture}.rope.scale`]: number } + | { [K in `${TArchitecture}.rope.scale_linear`]: number }; + +type ModelBase< + TArchitecture extends + | Architecture + | `encoder.${Extract}` + | `decoder.${Extract}`, +> = + | { [K in `${TArchitecture}.layer_count`]: number } + | { [K in `${TArchitecture}.feed_forward_length`]: number } + | { [K in `${TArchitecture}.context_length`]: number } + | { [K in `${TArchitecture}.embedding_length`]: number } + | { [K in `${TArchitecture}.block_count`]: number }; + +type MOE = + | { [K in `${TArchitecture}.expert_count`]: number } + | { [K in `${TArchitecture}.expert_used_count`]: number }; + +interface Tokenizer { + "tokenizer.ggml.model": Architecture; + "tokenizer.ggml.tokens": string[]; + "tokenizer.ggml.scores": number[]; + "tokenizer.ggml.token_type": number[]; + "tokenizer.ggml.bos_token_id": number; + "tokenizer.ggml.eos_token_id": number; + "tokenizer.ggml.add_bos_token": boolean; + "tokenizer.chat_template": string; +} + +type TransformerLLMArchitecture = Exclude; +type TransformerLLM = ModelBase & + MOE & + Attention & + Rope; + +export type RWKV = ModelBase<"rwkv"> & { "rwkv.architecture_version": number }; +export type LLM = TransformerLLM | RWKV; +export type Whisper = ModelBase<"encoder.whisper"> & ModelBase<"decoder.whisper">; +export type Model = (LLM | Whisper) & Partial; + +export type GGUFMetadata = { + version: Version; + tensor_count: bigint; + kv_count: bigint; +} & Partial & + Partial & + Record; + +export interface GGUFTensorInfo { + name: string; + n_dims: number; + shape: bigint[]; + dtype: GGMLQuantizationType; + offset: bigint; +} + +export interface GGUFParseOutput { + metadata: GGUFMetadata; + tensorInfos: GGUFTensorInfo[]; +}