Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add alpha scaling to lora #8248

Merged
merged 13 commits into from
Feb 25, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ model:
lora_tuning:
target_modules: ['attention_qkv'] # this can either be 'attention_qkv','attention_dense','mlp_fc1','mlp_fc2', attention (qkv & dense), mlp (fc1 & fc2)
adapter_dim: 32
alpha: ${model.peft.lora_tuning.adapter_dim}
adapter_dropout: 0.0
column_init_method: 'xavier' # IGNORED if linear_adapter is used, options: xavier, zero or normal
row_init_method: 'zero' # IGNORED if linear_adapter is used, options: xavier, zero or normal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def __init__(
input_is_parallel: bool = False, # NOTE: (@ertkonuk) we need this for LoRA adapters that are applied to RowParallelLinear layers
dropout: float = 0.0,
model_parallel_config: Optional[ModelParallelConfig] = None,
alpha: float | None = None,
**kwargs,
):
super().__init__()
Expand All @@ -151,7 +152,9 @@ def __init__(
self.activation = activation_registry[activation]()
self.norm_position = norm_position
self.dim = dim
self.alpha = alpha if alpha is not None else self.dim
self.input_is_parallel = input_is_parallel

# megatron_gpt_peft_models will provide this arg, but deprecated ones do not.
# in case this arg is not provided, use the dummy default config.
if model_parallel_config is None:
Expand Down Expand Up @@ -274,6 +277,8 @@ def forward(self, x):
if self.dropout is not None:
x = self.dropout(x)

x = x * (self.alpha / self.dim)

return x


Expand All @@ -290,6 +295,7 @@ class ParallelLinearAdapterConfig(AdapterConfig):
gather_output: bool = True
input_is_parallel: bool = False
dropout: float = 0.0
alpha: float | None = None
network_alpha: int | None = None
_target_: str = "{0}.{1}".format(ParallelLinearAdapter.__module__, ParallelLinearAdapter.__name__)

Expand Down
1 change: 1 addition & 0 deletions nemo/collections/nlp/parts/peft_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _create_lora_config(self, cfg, lora_cfg, in_features, out_features, adapter_
"row_init_method": lora_cfg.get("row_init_method", "zero"),
"gather_output": False,
"dropout": lora_cfg.adapter_dropout,
"alpha": lora_cfg.get("alpha", lora_cfg.adapter_dim),
}

if lora_cfg.weight_tying:
Expand Down
Loading