From 3887e3c45c8cb21c57e993ac57343209589cb22f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 7 Aug 2025 12:01:48 +0200 Subject: [PATCH 1/3] add MXFP4 QuantizationType --- packages/tasks/src/gguf.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/tasks/src/gguf.ts b/packages/tasks/src/gguf.ts index a5bac82a0b..828a40a8f5 100644 --- a/packages/tasks/src/gguf.ts +++ b/packages/tasks/src/gguf.ts @@ -40,6 +40,7 @@ export enum GGMLFileQuantizationType { Q4_0_8_8 = 35, TQ1_0 = 36, TQ2_0 = 37, + MXFP4_MOE = 38, // custom quants used by unsloth // they are not officially a scheme enum value in GGUF, but only here for naming @@ -95,6 +96,7 @@ export const GGUF_QUANT_ORDER: GGMLFileQuantizationType[] = [ GGMLFileQuantizationType.Q4_1, GGMLFileQuantizationType.Q4_2, GGMLFileQuantizationType.Q4_3, + GGMLFileQuantizationType.MXFP4_MOE, // 3-bit quantizations GGMLFileQuantizationType.Q3_K_XL, @@ -197,4 +199,5 @@ export enum GGMLQuantizationType { BF16 = 30, TQ1_0 = 34, TQ2_0 = 35, + MXFP4 = 39, } From 520386481b765915f9517054ded25367019d5128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Thu, 7 Aug 2025 12:09:28 +0200 Subject: [PATCH 2/3] add MXFP4 size and description --- packages/gguf/src/quant-descriptions.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/gguf/src/quant-descriptions.ts b/packages/gguf/src/quant-descriptions.ts index 0f68d1f687..a09f3fb16c 100644 --- a/packages/gguf/src/quant-descriptions.ts +++ b/packages/gguf/src/quant-descriptions.ts @@ -132,6 +132,10 @@ export const GGUF_QUANT_DESCRIPTIONS: Record Date: Thu, 7 Aug 2025 20:58:49 +0200 Subject: [PATCH 3/3] fix test --- packages/gguf/src/gguf.spec.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/gguf/src/gguf.spec.ts b/packages/gguf/src/gguf.spec.ts index 0978b2ca29..f68b8fa2b5 100644 --- a/packages/gguf/src/gguf.spec.ts +++ b/packages/gguf/src/gguf.spec.ts @@ -294,8 +294,8 @@ describe("gguf", () => { // Quantization handler - it("should have GGUF_QUANT_ORDER in sync with GGMLQuantizationType enum", () => { - const enumValues = Object.values(GGMLQuantizationType).filter((value) => typeof value === "number") as number[]; + it("should have GGUF_QUANT_ORDER in sync with GGMLFileQuantizationType enum", () => { + const enumValues = Object.values(GGMLFileQuantizationType).filter((value) => typeof value === "number") as number[]; const checkValues = new Set(GGUF_QUANT_ORDER); for (const value of enumValues) { expect(checkValues).toContain(value);