pytorch · parmeet · Nov 24, 2021 · Nov 24, 2021
diff --git a/torchtext/models/roberta/model.py b/torchtext/models/roberta/model.py
@@ -10,6 +10,7 @@
 
 from .modules import (
  TransformerEncoder,
+ ProjectionLayer,
 )
 import logging
 logger = logging.getLogger(__name__)
@@ -25,6 +26,8 @@ class RobertaEncoderConf:
  num_attention_heads: int = 12
  num_encoder_layers: int = 12
  dropout: float = 0.1
+ projection_dim: Optional[int] = None
+ projection_dropout: Optional[float] = None
  scaling: Optional[float] = None
  normalize_before: bool = False
 
@@ -40,6 +43,8 @@ def __init__(
  num_attention_heads: int,
  num_encoder_layers: int,
  dropout: float = 0.1,
+ projection_dim: Optional[int] = None,
+ projection_dropout: Optional[float] = None,
  scaling: Optional[float] = None,
  normalize_before: bool = False,
  ):
@@ -62,6 +67,10 @@ def __init__(
  return_all_layers=False,
  )
 
+ self.project = nn.Identity()
+ if projection_dim is not None:
+ self.project = ProjectionLayer(embed_dim=embedding_dim, projection_dim=projection_dim, dropout=projection_dropout)
+
  @classmethod
  def from_config(cls, config: RobertaEncoderConf):
  return cls(**asdict(config))
@@ -73,6 +82,9 @@ def forward(self, tokens: Tensor, mask: Optional[Tensor] = None) -> Tensor:
  output = output.transpose(1, 0)
  if mask is not None:
  output = output[mask.to(torch.bool), :]
+
+ output = self.project(output)
+
  return output
 
 

diff --git a/torchtext/models/roberta/modules.py b/torchtext/models/roberta/modules.py
@@ -31,6 +31,27 @@ def _make_positions(self, tensor, pad_index: int):
  return torch.cumsum(masked, dim=1) * masked + pad_index
 
 
+class ProjectionLayer(Module):
+ def __init__(self,
+ embed_dim: int,
+ projection_dim: int,
+ dropout: Optional[float] = None) -> None:
+ super().__init__()
+
+ self.projection_layer = nn.Linear(embed_dim, projection_dim)
+ self.norm_layer = nn.LayerNorm(projection_dim)
+ if dropout is not None:
+ self.dropout_layer = nn.Dropout(dropout)
+ else:
+ self.dropout_layer = nn.Identity()
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ x = self.projection_layer(x)
+ x = self.norm_layer(x)
+ x = self.dropout_layer(x)
+ return x
+
+
 class ResidualMLP(Module):
  def __init__(
  self,