Skip to content

Commit

Permalink
expose and set lora_dropout = 0.0 (#1492)
Browse files Browse the repository at this point in the history
Co-authored-by: Felipe Mello <felipemello@fb.com>
  • Loading branch information
felipemello1 and Felipe Mello authored Sep 11, 2024
1 parent 221031a commit d7fae96
Show file tree
Hide file tree
Showing 48 changed files with 90 additions and 28 deletions.
1 change: 1 addition & 0 deletions recipes/configs/code_llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/code_llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/2B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/gemma/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ model:
apply_lora_to_mlp: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/13B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ model:
apply_lora_to_output: True
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/13B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/70B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora_dpo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ model:
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora_dpo_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ model:
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelMetaCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3/8B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3_1/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ model:
apply_lora_to_output: False
lora_rank: 16
lora_alpha: 32
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.llama3.llama3_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3_1/8B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3_1/8B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/llama3_1/8B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/mistral/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ model:
apply_lora_to_output: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/mistral/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ model:
apply_lora_to_output: True
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/mistral/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ model:
apply_lora_to_output: False
lora_rank: 64
lora_alpha: 16
lora_dropout: 0.0

checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/phi3/mini_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/phi3/mini_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/phi3/mini_qlora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

# Tokenizer
tokenizer:
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/0.5B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ model:
apply_lora_to_mlp: False
lora_rank: 32
lora_alpha: 64
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/0.5B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_mlp: False
lora_rank: 32
lora_alpha: 64
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/1.5B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_mlp: False
lora_rank: 32
lora_alpha: 64
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/1.5B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ model:
apply_lora_to_mlp: False
lora_rank: 32
lora_alpha: 64
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/7B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
1 change: 1 addition & 0 deletions recipes/configs/qwen2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ model:
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
lora_dropout: 0.0

tokenizer:
_component_: torchtune.models.qwen2.qwen2_tokenizer
Expand Down
9 changes: 5 additions & 4 deletions torchtune/models/code_llama2/_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def lora_code_llama2_7b(
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
lora_dropout: float = 0.0,
use_dora: bool = False,
quantize_base: bool = False,
) -> TransformerDecoder:
Expand All @@ -62,7 +62,7 @@ def lora_code_llama2_7b(
Default: False
lora_rank (int): rank of each low-rank approximation
lora_alpha (float): scaling factor for the low-rank approximation
lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.05
lora_dropout (float): dropout probability for LoRA linear layers. Default: 0.0
quantize_base (bool): Whether to quantize base model weights
Returns:
Expand Down Expand Up @@ -125,7 +125,7 @@ def lora_code_llama2_13b(
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
lora_dropout: float = 0.0,
use_dora: bool = False,
quantize_base: bool = False,
) -> TransformerDecoder:
Expand Down Expand Up @@ -212,7 +212,7 @@ def lora_code_llama2_70b(
apply_lora_to_output: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.05,
lora_dropout: float = 0.0,
use_dora: bool = False,
quantize_base: bool = False,
) -> TransformerDecoder:
Expand All @@ -233,6 +233,7 @@ def lora_code_llama2_70b(
Default: False
lora_rank (int): rank of each low-rank approximation
lora_alpha (float): scaling factor for the low-rank approximation
lora_dropout (float): LoRA dropout probability. Default: 0.0
use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
quantize_base (bool): Whether to quantize base model weights
Expand Down
8 changes: 6 additions & 2 deletions torchtune/models/gemma/_model_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def lora_gemma_2b(
apply_lora_to_mlp: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.0,
use_dora: bool = False,
quantize_base: bool = False,
) -> GemmaTransformerDecoder:
Expand All @@ -86,6 +87,7 @@ def lora_gemma_2b(
Default: False
lora_rank (int): rank of each low-rank approximation
lora_alpha (float): scaling factor for the low-rank approximation
lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
quantize_base (bool): Whether to quantize base model weights
Expand All @@ -108,7 +110,7 @@ def lora_gemma_2b(
norm_eps=1e-6,
lora_rank=lora_rank,
lora_alpha=lora_alpha,
lora_dropout=0.05,
lora_dropout=lora_dropout,
use_dora=use_dora,
quantize_base=quantize_base,
)
Expand Down Expand Up @@ -150,6 +152,7 @@ def lora_gemma_7b(
apply_lora_to_mlp: bool = False,
lora_rank: int = 8,
lora_alpha: float = 16,
lora_dropout: float = 0.0,
use_dora: bool = False,
quantize_base: bool = False,
) -> GemmaTransformerDecoder:
Expand All @@ -168,6 +171,7 @@ def lora_gemma_7b(
Default: False
lora_rank (int): rank of each low-rank approximation
lora_alpha (float): scaling factor for the low-rank approximation
lora_dropout (float): dropout probability for the low-rank approximation. Default: 0.0
use_dora (bool): Decompose the LoRA weight into magnitude and direction, as
introduced in "DoRA: Weight-Decomposed Low-Rank Adaptation" (https://arxiv.org/abs/2402.09353).
quantize_base (bool): Whether to quantize base model weights
Expand All @@ -190,7 +194,7 @@ def lora_gemma_7b(
norm_eps=1e-6,
lora_rank=lora_rank,
lora_alpha=lora_alpha,
lora_dropout=0.05,
lora_dropout=lora_dropout,
use_dora=use_dora,
quantize_base=quantize_base,
)
Expand Down
Loading

0 comments on commit d7fae96

Please sign in to comment.