Skip to content

Commit

Permalink
Add dropout to bottleneck adapters (#667)
Browse files Browse the repository at this point in the history
Closes #414.
  • Loading branch information
calpt committed Apr 8, 2024
1 parent f38ce85 commit 07370a6
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
2 changes: 2 additions & 0 deletions src/adapters/configuration/adapter_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class BnConfig(AdapterConfig):
model. Defaults to False.
leave_out (:obj:`List[int]`, optional):
The IDs of the layers (starting at 0) where NO adapter modules should be added.
dropout (:obj:`float`, optional): The dropout rate used in the adapter layer. Defaults to 0.0.
phm_layer (:obj:`bool`, optional): If True the down and up projection layers are a PHMLayer.
Defaults to False
phm_dim (:obj:`int`, optional): The dimension of the phm matrix.
Expand Down Expand Up @@ -234,6 +235,7 @@ class BnConfig(AdapterConfig):
inv_adapter_reduction_factor: Optional[float] = None
cross_adapter: bool = False
leave_out: List[int] = field(default_factory=list)
dropout: float = 0.0
phm_layer: bool = False
phm_dim: int = 4
factorized_phm_W: Optional[bool] = True
Expand Down
6 changes: 4 additions & 2 deletions src/adapters/methods/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def __init__(
if self.use_gating:
self.gate = nn.Linear(self.input_size, 1)

self.dropout = nn.Dropout(p=config["dropout"])

# if we want to initialize with the bert strategy then this function is called for all the linear layers
if config["init_weights"] == "bert":
self.adapter_down.apply(self.init_bert_weights)
Expand Down Expand Up @@ -173,7 +175,7 @@ def forward(self, x, residual_input, output_gating=False):

up = self.adapter_up(down)
up = up * self.scaling
output = up
output = self.dropout(up)

if self.use_gating:
# x.shape = (batch_size, seq_len, hidden_size)
Expand Down Expand Up @@ -271,7 +273,7 @@ def forward(self, x, residual_input, output_gating=False):
up = self.adapter_up(down)
up = up * self.scaling

output = up
output = self.dropout(up)

if self.use_gating:
# x.shape = (batch_size, seq_len, hidden_size)
Expand Down

0 comments on commit 07370a6

Please sign in to comment.