Add rope_theta to llama models

According to [Huggingface Transformers CodeLlama PR](huggingface/transformers#25740), an additional parameter `rope_theta` is added to llama models, which is the base of position embedding.
ModelTC · Sep 4, 2023 · 0c4f6b5 · 0c4f6b5
1 parent 2d3cd33
commit 0c4f6b5
Showing 1 changed file with 9 additions and 3 deletions.
diff --git a/lightllm/models/llama/model.py b/lightllm/models/llama/model.py
@@ -62,16 +62,22 @@ def _init_custom(self):
         return
 
 
-    def _init_to_get_rotary(self, base=10000):
+    def _init_to_get_rotary(self, default_base=10000.0):
         if self.config.get("rope_scaling", {}) is None:
             rope_scaling_factor = 1.0
         else:
             rope_scaling_factor = self.config.get("rope_scaling", {}).get("factor", 1.0)
+
+        base = self.config.get("rope_theta", float(default_base))
+
         if "max_sequence_length" in self.config:
             max_seq_len = self.config["max_sequence_length"]
         else:
-            max_seq_len = self.config.get("max_position_embeddings", 2048) * rope_scaling_factor
-        base = float(base)
+            max_position_embeddings = self.config.get(
+                "max_position_embeddings",
+                2048 if base <= 10000.0 + 1e-5 else 16384
+            )
+            max_seq_len = max_position_embeddings * rope_scaling_factor
 
         # NTK
         try: