From a404527e90173bacfd55a9c8c0b9fc47dda32e4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=87a=C4=9Fr=C4=B1=20Tu=C4=9Frul=20Canbol?=
 <103742287+ctcanbol@users.noreply.github.com>
Date: Thu, 24 Jul 2025 13:09:50 +0200
Subject: [PATCH 1/4] support loading qwen3 gguf

---
 src/transformers/integrations/ggml.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/transformers/integrations/ggml.py b/src/transformers/integrations/ggml.py
index 17f86e166737..ecf34bbf5e19 100644
--- a/src/transformers/integrations/ggml.py
+++ b/src/transformers/integrations/ggml.py
@@ -102,6 +102,20 @@
         "attention.layer_norm_rms_epsilon": "rms_norm_eps",
         "vocab_size": "vocab_size",
     },
+    "qwen3moe": {
+        "context_length": "max_position_embeddings",
+        "block_count": "num_hidden_layers",
+        "feed_forward_length": "intermediate_size",
+        "embedding_length": "hidden_size",
+        "rope.dimension_count": None,
+        "rope.freq_base": "rope_theta",
+        "attention.head_count": "num_attention_heads",
+        "attention.head_count_kv": "num_key_value_heads",
+        "attention.layer_norm_rms_epsilon": "rms_norm_eps",
+        "vocab_size": "vocab_size",
+        "expert_count": "num_experts",
+        "expert_used_count": "num_experts_per_tok",
+    },
     "falcon": {
         "context_length": "max_position_embeddings",
         "block_count": "num_hidden_layers",
@@ -689,6 +703,7 @@ def converted(self) -> Tokenizer:
     "qwen2": GGUFQwen2Converter,
     "qwen2_moe": GGUFQwen2Converter,
     "qwen3": GGUFQwen2Converter,
+    "qwen3_moe": GGUFQwen2Converter,
     "phi3": GGUFPhi3Converter,
     "bloom": GGUFGPTConverter,
     "falcon": GGUFGPTConverter,

From 6982becb69009efba40f206005b55ebac5e519dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=87a=C4=9Fr=C4=B1=20Tu=C4=9Frul=20Canbol?=
 <103742287+ctcanbol@users.noreply.github.com>
Date: Thu, 24 Jul 2025 13:43:07 +0200
Subject: [PATCH 2/4] qwen3moe test cases

---
 tests/quantization/ggml/test_ggml.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
index d2ed7f7a745d..6c60bab12bd5 100644
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -302,6 +302,7 @@ class GgufModelTests(unittest.TestCase):
     gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
     gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
     qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
+    qwen3moe_model_id = "Qwen/Qwen3-235B-A22B-Instruct-2507"
 
     q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
     q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -335,6 +336,7 @@ class GgufModelTests(unittest.TestCase):
     bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
     bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
     q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
+    q8_qwen3moe_model_id = "unsloth/Qwen3-235B-A22B-Instruct-2507-GGUF"
 
     example_text = "Hello"
 
@@ -973,3 +975,17 @@ def test_qwen3_q8_0(self):
 
         EXPECTED_TEXT = "HelloED\nI need to find the value of the"
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
+        
+    def test_qwen3moe_q8(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q8_qwen3moe_model_id)
+        model = AutoModelForCausalLM.from_pretrained(
+            self.qwen3moe_model_id,
+            gguf_file=self.q8_qwen3moe_model_id,
+            torch_dtype=torch.float16,
+        )
+
+        text = tokenizer(self.example_text, return_tensors="pt")
+        out = model.generate(**text, max_new_tokens=10)
+
+        EXPECTED_TEXT = "Hello, I am a 20 year old male"
+        self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)

From 237bc7043b5f74a3a1de413f40c8ea9f27b5b7cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=87a=C4=9Fr=C4=B1=20Tu=C4=9Frul=20Canbol?=
 <103742287+ctcanbol@users.noreply.github.com>
Date: Thu, 24 Jul 2025 17:12:14 +0200
Subject: [PATCH 3/4] fix whitespaces

---
 tests/quantization/ggml/test_ggml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
index 6c60bab12bd5..13f49015909f 100644
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -975,7 +975,7 @@ def test_qwen3_q8_0(self):
 
         EXPECTED_TEXT = "HelloED\nI need to find the value of the"
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
-        
+
     def test_qwen3moe_q8(self):
         tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q8_qwen3moe_model_id)
         model = AutoModelForCausalLM.from_pretrained(

From 60118d2e1bfdaf2c3715fe0d24cef99c047a9ae8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=87a=C4=9Fr=C4=B1=20Tu=C4=9Frul=20Canbol?=
 <103742287+ctcanbol@users.noreply.github.com>
Date: Tue, 29 Jul 2025 02:08:26 +0200
Subject: [PATCH 4/4] fix ggml tests

---
 tests/quantization/ggml/test_ggml.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/quantization/ggml/test_ggml.py b/tests/quantization/ggml/test_ggml.py
index 13f49015909f..408ab47a6296 100644
--- a/tests/quantization/ggml/test_ggml.py
+++ b/tests/quantization/ggml/test_ggml.py
@@ -302,7 +302,7 @@ class GgufModelTests(unittest.TestCase):
     gemma3_text_model_id = "unsloth/gemma-3-1b-it-GGUF"
     gemma3_vision_model_id = "unsloth/gemma-3-4b-it-GGUF"
     qwen3_model_id = "Qwen/Qwen3-0.6B-GGUF"
-    qwen3moe_model_id = "Qwen/Qwen3-235B-A22B-Instruct-2507"
+    qwen3moe_model_id = "Qwen/Qwen3-30B-A3B-GGUF"
 
     q4_0_phi3_model_id = "Phi-3-mini-4k-instruct-q4.gguf"
     q4_0_mistral_model_id = "mistral-7b-instruct-v0.2.Q4_0.gguf"
@@ -336,7 +336,7 @@ class GgufModelTests(unittest.TestCase):
     bf16_gemma3_text_model_id = "gemma-3-1b-it-BF16.gguf"
     bf16_gemma3_vision_model_id = "gemma-3-4b-it-BF16.gguf"
     q8_0_qwen3_model_id = "Qwen3-0.6B-Q8_0.gguf"
-    q8_qwen3moe_model_id = "unsloth/Qwen3-235B-A22B-Instruct-2507-GGUF"
+    q4_k_m_qwen3moe_model_id = "Qwen3-30B-A3B-Q4_K_M.gguf"
 
     example_text = "Hello"
 
@@ -976,11 +976,11 @@ def test_qwen3_q8_0(self):
         EXPECTED_TEXT = "HelloED\nI need to find the value of the"
         self.assertEqual(tokenizer.decode(out[0], skip_special_tokens=True), EXPECTED_TEXT)
 
-    def test_qwen3moe_q8(self):
-        tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q8_qwen3moe_model_id)
+    def test_qwen3moe_q4_k_m(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.qwen3moe_model_id, gguf_file=self.q4_k_m_qwen3moe_model_id)
         model = AutoModelForCausalLM.from_pretrained(
             self.qwen3moe_model_id,
-            gguf_file=self.q8_qwen3moe_model_id,
+            gguf_file=self.q4_k_m_qwen3moe_model_id,
             torch_dtype=torch.float16,
         )