Rename Qwen2 recipe files.

pytorch · Aug 6, 2024 · 856555b · 856555b
1 parent 07b2f3d
commit 856555b
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 46 deletions.
diff --git a/...s/configs/qwen2/0.5B_full_low_memory.yaml → ...onfigs/qwen2/0.5B_full_single_device.yaml b/...s/configs/qwen2/0.5B_full_low_memory.yaml → ...onfigs/qwen2/0.5B_full_single_device.yaml
@@ -10,12 +10,12 @@
 #   pip install bitsandbytes
 #
 # To launch on a single device, run the following command from root:
-#   tune run full_finetune_single_device --config qwen2/0.5B_full_low_memory
+#   tune run full_finetune_single_device --config qwen2/0.5B_full_single_device
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run full_finetune_single_device --config qwen2/0.5B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run full_finetune_single_device --config qwen2/0.5B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 

diff --git a/...s/configs/qwen2/1.5B_full_low_memory.yaml → ...onfigs/qwen2/1.5B_full_single_device.yaml b/...s/configs/qwen2/1.5B_full_low_memory.yaml → ...onfigs/qwen2/1.5B_full_single_device.yaml
@@ -10,12 +10,12 @@
 #   pip install bitsandbytes
 #
 # To launch on a single device, run the following command from root:
-#   tune run full_finetune_single_device --config qwen2/1.5B_full_low_memory
+#   tune run full_finetune_single_device --config qwen2/1.5B_full_single_device
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run full_finetune_single_device --config qwen2/1.5B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run full_finetune_single_device --config qwen2/1.5B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 

diff --git a/...pes/configs/qwen2/7B_full_low_memory.yaml → .../configs/qwen2/7B_full_single_device.yaml b/...pes/configs/qwen2/7B_full_low_memory.yaml → .../configs/qwen2/7B_full_single_device.yaml
@@ -10,12 +10,12 @@
 #   pip install bitsandbytes
 #
 # To launch on a single device, run the following command from root:
-#   tune run full_finetune_single_device --config qwen2/7B_full_low_memory
+#   tune run full_finetune_single_device --config qwen2/7B_full_single_device
 #
 # You can add specific overrides through the command line. For example
 # to override the checkpointer directory while launching training
 # you can run:
-#   tune run full_finetune_single_device --config qwen2/7B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
+#   tune run full_finetune_single_device --config qwen2/7B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
 #
 # This config works only for training on single device.
 

diff --git a/torchtune/_recipe_registry.py b/torchtune/_recipe_registry.py
@@ -52,16 +52,16 @@ class Recipe:
                 file_path="phi3/mini_full_low_memory.yaml",
             ),
             Config(
-                name="qwen2/7B_full_low_memory",
-                file_path="qwen2/7B_full_low_memory.yaml",
+                name="qwen2/7B_full_single_device",
+                file_path="qwen2/7B_full_single_device.yaml",
             ),
             Config(
-                name="qwen2/0.5B_full_low_memory",
-                file_path="qwen2/0.5B_full_low_memory.yaml",
+                name="qwen2/0.5B_full_single_device",
+                file_path="qwen2/0.5B_full_single_device.yaml",
             ),
             Config(
-                name="qwen2/1.5B_full_low_memory",
-                file_path="qwen2/1.5B_full_low_memory.yaml",
+                name="qwen2/1.5B_full_single_device",
+                file_path="qwen2/1.5B_full_single_device.yaml",
             ),
         ],
         supports_distributed=False,

diff --git a/torchtune/models/qwen2/_model_builders.py b/torchtune/models/qwen2/_model_builders.py
@@ -18,13 +18,13 @@
 """
 
 
-def qwen2_7b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+def qwen2_7b() -> TransformerDecoder:
     """
     Builder for creating a Qwen2 model initialized w/ the default 7B parameter values
     from https://huggingface.co/Qwen/Qwen2-7B-Instruct
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 7B model
+        TransformerDecoder: Instantiation of Qwen2 7B model
     """
     return qwen2(
         vocab_size=152064,
@@ -40,13 +40,17 @@ def qwen2_7b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
     )
 
 
-def qwen2_0_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+def qwen2_0_5b() -> TiedEmbeddingTransformerDecoder:
     """
     Builder for creating a Qwen2 model initialized w/ the default 0.5B parameter values
     from https://huggingface.co/Qwen/Qwen2-0.5B-Instruct
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 0.5B model
+        TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 0.5B model
+
+    Note:
+        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
+        and returns an instance of `TiedEmbeddingTransformerDecoder`.
     """
     return qwen2(
         vocab_size=151936,
@@ -63,13 +67,17 @@ def qwen2_0_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
     )
 
 
-def qwen2_1_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+def qwen2_1_5b() -> TiedEmbeddingTransformerDecoder:
     """
     Builder for creating a Qwen2 model initialized w/ the default 1.5B parameter values
     from https://huggingface.co/Qwen/Qwen2-1.5B-Instruct
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 1.5B model
+        TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 1.5B model
+
+    Note:
+        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
+        and returns an instance of `TiedEmbeddingTransformerDecoder`.
     """
     return qwen2(
         vocab_size=151936,
@@ -101,21 +109,21 @@ def qwen2_tokenizer(
             structured similarly. Default is None to use the canonical Qwen2 special tokens.
 
     Returns:
-        Llama3Tokenizer: Instantiation of the Qwen2 tokenizer
+        Qwen2Tokenizer: Instantiation of the Qwen2 tokenizer
     """
     special_tokens = parse_hf_tokenizer_json(special_tokens_path) if special_tokens_path is not None else None
     return Qwen2Tokenizer(path=path, merges_file=merges_file, special_tokens=special_tokens, **kwargs)
 
 
 def lora_qwen2_7b(
-        lora_attn_modules: List[LORA_ATTN_MODULES],
-        apply_lora_to_mlp: bool = False,
-        apply_lora_to_output: bool = False,
-        lora_rank: int = 8,
-        lora_alpha: float = 16,
-        lora_dropout: float = 0.05,
-        quantize_base: bool = False,
-) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+    lora_attn_modules: List[LORA_ATTN_MODULES],
+    apply_lora_to_mlp: bool = False,
+    apply_lora_to_output: bool = False,
+    lora_rank: int = 8,
+    lora_alpha: float = 16,
+    lora_dropout: float = 0.05,
+    quantize_base: bool = False,
+) -> TransformerDecoder:
     """
     Builder for creating a Qwen2 7B model with LoRA enabled.
 
@@ -136,7 +144,7 @@ def lora_qwen2_7b(
         quantize_base (bool): Whether to quantize base model weights
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 7B model with LoRA applied
+        TransformerDecoder: Instantiation of Qwen2 7B model with LoRA applied
     """
     return lora_qwen2(
         lora_attn_modules=lora_attn_modules,
@@ -160,14 +168,14 @@ def lora_qwen2_7b(
 
 
 def lora_qwen2_0_5b(
-        lora_attn_modules: List[LORA_ATTN_MODULES],
-        apply_lora_to_mlp: bool = False,
-        apply_lora_to_output: bool = False,
-        lora_rank: int = 8,
-        lora_alpha: float = 16,
-        lora_dropout: float = 0.05,
-        quantize_base: bool = False,
-) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+    lora_attn_modules: List[LORA_ATTN_MODULES],
+    apply_lora_to_mlp: bool = False,
+    apply_lora_to_output: bool = False,
+    lora_rank: int = 8,
+    lora_alpha: float = 16,
+    lora_dropout: float = 0.05,
+    quantize_base: bool = False,
+) -> TiedEmbeddingTransformerDecoder:
     """
     Builder for creating a Qwen2 0.5B model with LoRA enabled.
 
@@ -188,7 +196,11 @@ def lora_qwen2_0_5b(
         quantize_base (bool): Whether to quantize base model weights
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 0.5B model with LoRA applied
+        TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 0.5B model with LoRA applied
+
+    Note:
+        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
+        and returns an instance of `TiedEmbeddingTransformerDecoder`.
     """
     return lora_qwen2(
         lora_attn_modules=lora_attn_modules,
@@ -213,14 +225,14 @@ def lora_qwen2_0_5b(
 
 
 def lora_qwen2_1_5b(
-        lora_attn_modules: List[LORA_ATTN_MODULES],
-        apply_lora_to_mlp: bool = False,
-        apply_lora_to_output: bool = False,
-        lora_rank: int = 8,
-        lora_alpha: float = 16,
-        lora_dropout: float = 0.05,
-        quantize_base: bool = False,
-) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
+    lora_attn_modules: List[LORA_ATTN_MODULES],
+    apply_lora_to_mlp: bool = False,
+    apply_lora_to_output: bool = False,
+    lora_rank: int = 8,
+    lora_alpha: float = 16,
+    lora_dropout: float = 0.05,
+    quantize_base: bool = False,
+) -> TiedEmbeddingTransformerDecoder:
     """
     Builder for creating a Qwen2 1.5B model with LoRA enabled.
 
@@ -241,7 +253,11 @@ def lora_qwen2_1_5b(
         quantize_base (bool): Whether to quantize base model weights
 
     Returns:
-        Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 1.5B model with LoRA applied
+        TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 1.5B model with LoRA applied
+
+    Note:
+        Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
+        and returns an instance of `TiedEmbeddingTransformerDecoder`.
     """
     return lora_qwen2(
         lora_attn_modules=lora_attn_modules,