Skip to content

Commit

Permalink
Rename Qwen2 recipe files.
Browse files Browse the repository at this point in the history
  • Loading branch information
fyabc committed Aug 6, 2024
1 parent 07b2f3d commit 856555b
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# pip install bitsandbytes
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device --config qwen2/0.5B_full_low_memory
# tune run full_finetune_single_device --config qwen2/0.5B_full_single_device
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run full_finetune_single_device --config qwen2/0.5B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run full_finetune_single_device --config qwen2/0.5B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# pip install bitsandbytes
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device --config qwen2/1.5B_full_low_memory
# tune run full_finetune_single_device --config qwen2/1.5B_full_single_device
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run full_finetune_single_device --config qwen2/1.5B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run full_finetune_single_device --config qwen2/1.5B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# pip install bitsandbytes
#
# To launch on a single device, run the following command from root:
# tune run full_finetune_single_device --config qwen2/7B_full_low_memory
# tune run full_finetune_single_device --config qwen2/7B_full_single_device
#
# You can add specific overrides through the command line. For example
# to override the checkpointer directory while launching training
# you can run:
# tune run full_finetune_single_device --config qwen2/7B_full_low_memory checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
# tune run full_finetune_single_device --config qwen2/7B_full_single_device checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
#
# This config works only for training on single device.

Expand Down
12 changes: 6 additions & 6 deletions torchtune/_recipe_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,16 @@ class Recipe:
file_path="phi3/mini_full_low_memory.yaml",
),
Config(
name="qwen2/7B_full_low_memory",
file_path="qwen2/7B_full_low_memory.yaml",
name="qwen2/7B_full_single_device",
file_path="qwen2/7B_full_single_device.yaml",
),
Config(
name="qwen2/0.5B_full_low_memory",
file_path="qwen2/0.5B_full_low_memory.yaml",
name="qwen2/0.5B_full_single_device",
file_path="qwen2/0.5B_full_single_device.yaml",
),
Config(
name="qwen2/1.5B_full_low_memory",
file_path="qwen2/1.5B_full_low_memory.yaml",
name="qwen2/1.5B_full_single_device",
file_path="qwen2/1.5B_full_single_device.yaml",
),
],
supports_distributed=False,
Expand Down
84 changes: 50 additions & 34 deletions torchtune/models/qwen2/_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
"""


def qwen2_7b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
def qwen2_7b() -> TransformerDecoder:
"""
Builder for creating a Qwen2 model initialized w/ the default 7B parameter values
from https://huggingface.co/Qwen/Qwen2-7B-Instruct
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 7B model
TransformerDecoder: Instantiation of Qwen2 7B model
"""
return qwen2(
vocab_size=152064,
Expand All @@ -40,13 +40,17 @@ def qwen2_7b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
)


def qwen2_0_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
def qwen2_0_5b() -> TiedEmbeddingTransformerDecoder:
"""
Builder for creating a Qwen2 model initialized w/ the default 0.5B parameter values
from https://huggingface.co/Qwen/Qwen2-0.5B-Instruct
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 0.5B model
TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 0.5B model
Note:
Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
and returns an instance of `TiedEmbeddingTransformerDecoder`.
"""
return qwen2(
vocab_size=151936,
Expand All @@ -63,13 +67,17 @@ def qwen2_0_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
)


def qwen2_1_5b() -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
def qwen2_1_5b() -> TiedEmbeddingTransformerDecoder:
"""
Builder for creating a Qwen2 model initialized w/ the default 1.5B parameter values
from https://huggingface.co/Qwen/Qwen2-1.5B-Instruct
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 1.5B model
TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 1.5B model
Note:
Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
and returns an instance of `TiedEmbeddingTransformerDecoder`.
"""
return qwen2(
vocab_size=151936,
Expand Down Expand Up @@ -101,21 +109,21 @@ def qwen2_tokenizer(
structured similarly. Default is None to use the canonical Qwen2 special tokens.
Returns:
Llama3Tokenizer: Instantiation of the Qwen2 tokenizer
Qwen2Tokenizer: Instantiation of the Qwen2 tokenizer
"""
special_tokens = parse_hf_tokenizer_json(special_tokens_path) if special_tokens_path is not None else None
return Qwen2Tokenizer(path=path, merges_file=merges_file, special_tokens=special_tokens, **kwargs)


def lora_qwen2_7b(
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> TransformerDecoder:
"""
Builder for creating a Qwen2 7B model with LoRA enabled.
Expand All @@ -136,7 +144,7 @@ def lora_qwen2_7b(
quantize_base (bool): Whether to quantize base model weights
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 7B model with LoRA applied
TransformerDecoder: Instantiation of Qwen2 7B model with LoRA applied
"""
return lora_qwen2(
lora_attn_modules=lora_attn_modules,
Expand All @@ -160,14 +168,14 @@ def lora_qwen2_7b(


def lora_qwen2_0_5b(
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> TiedEmbeddingTransformerDecoder:
"""
Builder for creating a Qwen2 0.5B model with LoRA enabled.
Expand All @@ -188,7 +196,11 @@ def lora_qwen2_0_5b(
quantize_base (bool): Whether to quantize base model weights
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 0.5B model with LoRA applied
TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 0.5B model with LoRA applied
Note:
Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
and returns an instance of `TiedEmbeddingTransformerDecoder`.
"""
return lora_qwen2(
lora_attn_modules=lora_attn_modules,
Expand All @@ -213,14 +225,14 @@ def lora_qwen2_0_5b(


def lora_qwen2_1_5b(
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]:
lora_attn_modules: List[LORA_ATTN_MODULES],
apply_lora_to_mlp: bool = False,
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
quantize_base: bool = False,
) -> TiedEmbeddingTransformerDecoder:
"""
Builder for creating a Qwen2 1.5B model with LoRA enabled.
Expand All @@ -241,7 +253,11 @@ def lora_qwen2_1_5b(
quantize_base (bool): Whether to quantize base model weights
Returns:
Union[TransformerDecoder, TiedEmbeddingTransformerDecoder]: Instantiation of Qwen2 1.5B model with LoRA applied
TiedEmbeddingTransformerDecoder: Instantiation of Qwen2 1.5B model with LoRA applied
Note:
Qwen2 0.5B and Qwen2 1.5B model builders will enable `tie_word_embeddings` by default
and returns an instance of `TiedEmbeddingTransformerDecoder`.
"""
return lora_qwen2(
lora_attn_modules=lora_attn_modules,
Expand Down

0 comments on commit 856555b

Please sign in to comment.