Skip to content

Commit

Permalink
rm dropout_ffn, default num modes
Browse files Browse the repository at this point in the history
Signed-off-by: Jimmy Zhang <jiemingz@nvidia.com>
  • Loading branch information
jiemingz committed Oct 31, 2024
1 parent b08148b commit 2e64c06
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
1 change: 0 additions & 1 deletion nemo/collections/llm/gpt/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,6 @@ class GPTConfig175B(GPTConfig):
num_attention_heads: int = 96
hidden_dropout: float = 0.0
attention_dropout: float = 0.0
ffn_dropout: float = 0.0
bias_activation_fusion: bool = True
bias_dropout_add_fusion: bool = True
use_transformer_engine_full_layer_spec: bool = True
Expand Down
9 changes: 7 additions & 2 deletions nemo/collections/llm/recipes/llama3_70b.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def pretrain_performance_optimizations(recipe: run.Partial) -> run.Partial:
def finetune_recipe(
dir: Optional[str] = None,
name: str = "default",
num_nodes: int = 1,
num_nodes: int = None,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
seq_length: Optional[int] = None,
Expand Down Expand Up @@ -293,11 +293,16 @@ def finetune_recipe(
if seq_length is None:
seq_length = 4096 if packed_sequence else 2048

if num_nodes is None:
if peft_scheme is None or peft_scheme.lower() == 'none':
num_nodes = 4
elif peft_scheme.lower() == 'lora':
num_nodes = 1

recipe = default_finetune_recipe(
model(), "meta-llama/Meta-Llama-3-70B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
assert num_nodes >= 4
recipe.trainer.strategy.tensor_model_parallel_size = 8
recipe.trainer.strategy.pipeline_model_parallel_size = 4
recipe.optim.config.lr = 5e-6
Expand Down

0 comments on commit 2e64c06

Please sign in to comment.