Skip to content

Commit

Permalink
[feat] changes from make-fix-copies to keep consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
Lorenzobattistela committed Jul 20, 2023
1 parent 234e2be commit 56e3e9e
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -555,16 +555,16 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, position_embeddings: Optional[Tensor]):
return tensor if position_embeddings is None else tensor + position_embeddings
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
object_queries: Optional[torch.Tensor] = None,
key_value_states: Optional[torch.Tensor] = None,
key_value_position_embeddings: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
Expand All @@ -575,14 +575,14 @@ def forward(
batch_size, target_len, embed_dim = hidden_states.size()

# add position embeddings to the hidden states before projecting to queries and keys
if position_embeddings is not None:
if object_queries is not None:
hidden_states_original = hidden_states
hidden_states = self.with_pos_embed(hidden_states, position_embeddings)
hidden_states = self.with_pos_embed(hidden_states, object_queries)

# add key-value position embeddings to the key value states
if key_value_position_embeddings is not None:
if spatial_position_embeddings is not None:
key_value_states_original = key_value_states
key_value_states = self.with_pos_embed(key_value_states, key_value_position_embeddings)
key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)

# get query proj
query_states = self.q_proj(hidden_states) * self.scaling
Expand Down Expand Up @@ -790,7 +790,7 @@ def forward(
self,
hidden_states: torch.Tensor,
attention_mask: torch.Tensor,
position_embeddings: torch.Tensor = None,
object_queries: torch.Tensor = None,
output_attentions: bool = False,
):
"""
Expand All @@ -799,7 +799,8 @@ def forward(
attention_mask (`torch.FloatTensor`): attention mask of size
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
values.
position_embeddings (`torch.FloatTensor`, *optional*): position embeddings, to be added to hidden_states.
object_queries (`torch.FloatTensor`, *optional*):
Object queries (also called content embeddings), to be added to the hidden states.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
Expand All @@ -808,7 +809,7 @@ def forward(
hidden_states, attn_weights = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
position_embeddings=position_embeddings,
object_queries=object_queries,
output_attentions=output_attentions,
)

Expand Down Expand Up @@ -1150,7 +1151,7 @@ class ConditionalDetrEncoder(ConditionalDetrPreTrainedModel):
Small tweak for ConditionalDETR:
- position_embeddings are added to the forward pass.
- object_queries are added to the forward pass.
Args:
config: ConditionalDetrConfig
Expand All @@ -1173,7 +1174,7 @@ def forward(
self,
inputs_embeds=None,
attention_mask=None,
position_embeddings=None,
object_queries=None,
output_attentions=None,
output_hidden_states=None,
return_dict=None,
Expand All @@ -1191,8 +1192,8 @@ def forward(
[What are attention masks?](../glossary#attention-mask)
position_embeddings (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
Position embeddings that are added to the queries and keys in each self-attention layer.
object_queries (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
Object queries that are added to the queries in each self-attention layer.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
Expand Down Expand Up @@ -1232,11 +1233,11 @@ def forward(
if to_drop:
layer_outputs = (None, None)
else:
# we add position_embeddings as extra input to the encoder_layer
# we add object_queries as extra input to the encoder_layer
layer_outputs = encoder_layer(
hidden_states,
attention_mask,
position_embeddings=position_embeddings,
object_queries=object_queries,
output_attentions=output_attentions,
)

Expand Down
28 changes: 14 additions & 14 deletions src/transformers/models/maskformer/modeling_maskformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,16 +437,16 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, position_embeddings: Optional[Tensor]):
return tensor if position_embeddings is None else tensor + position_embeddings
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
object_queries: Optional[torch.Tensor] = None,
key_value_states: Optional[torch.Tensor] = None,
key_value_position_embeddings: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
Expand All @@ -457,14 +457,14 @@ def forward(
batch_size, target_len, embed_dim = hidden_states.size()

# add position embeddings to the hidden states before projecting to queries and keys
if position_embeddings is not None:
if object_queries is not None:
hidden_states_original = hidden_states
hidden_states = self.with_pos_embed(hidden_states, position_embeddings)
hidden_states = self.with_pos_embed(hidden_states, object_queries)

# add key-value position embeddings to the key value states
if key_value_position_embeddings is not None:
if spatial_position_embeddings is not None:
key_value_states_original = key_value_states
key_value_states = self.with_pos_embed(key_value_states, key_value_position_embeddings)
key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)

# get query proj
query_states = self.q_proj(hidden_states) * self.scaling
Expand Down Expand Up @@ -563,7 +563,7 @@ def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
object_queries: Optional[torch.Tensor] = None,
query_position_embeddings: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.Tensor] = None,
Expand All @@ -575,8 +575,8 @@ def forward(
attention_mask (`torch.FloatTensor`): attention mask of size
`(batch, 1, target_len, source_len)` where padding elements are indicated by very large negative
values.
position_embeddings (`torch.FloatTensor`, *optional*):
position embeddings that are added to the queries and keys
object_queries (`torch.FloatTensor`, *optional*):
object_queries that are added to the hidden states
in the cross-attention layer.
query_position_embeddings (`torch.FloatTensor`, *optional*):
position embeddings that are added to the queries and keys
Expand All @@ -595,7 +595,7 @@ def forward(
# Self Attention
hidden_states, self_attn_weights = self.self_attn(
hidden_states=hidden_states,
position_embeddings=query_position_embeddings,
object_queries=query_position_embeddings,
attention_mask=attention_mask,
output_attentions=output_attentions,
)
Expand All @@ -611,10 +611,10 @@ def forward(

hidden_states, cross_attn_weights = self.encoder_attn(
hidden_states=hidden_states,
position_embeddings=query_position_embeddings,
object_queries=object_queries,
key_value_states=encoder_hidden_states,
attention_mask=encoder_attention_mask,
key_value_position_embeddings=position_embeddings,
spatial_position_embeddings=query_position_embeddings,
output_attentions=output_attentions,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,16 +461,16 @@ def __init__(
def _shape(self, tensor: torch.Tensor, seq_len: int, batch_size: int):
return tensor.view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()

def with_pos_embed(self, tensor: torch.Tensor, position_embeddings: Optional[Tensor]):
return tensor if position_embeddings is None else tensor + position_embeddings
def with_pos_embed(self, tensor: torch.Tensor, object_queries: Optional[Tensor]):
return tensor if object_queries is None else tensor + object_queries

def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_embeddings: Optional[torch.Tensor] = None,
object_queries: Optional[torch.Tensor] = None,
key_value_states: Optional[torch.Tensor] = None,
key_value_position_embeddings: Optional[torch.Tensor] = None,
spatial_position_embeddings: Optional[torch.Tensor] = None,
output_attentions: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
"""Input shape: Batch x Time x Channel"""
Expand All @@ -481,14 +481,14 @@ def forward(
batch_size, target_len, embed_dim = hidden_states.size()

# add position embeddings to the hidden states before projecting to queries and keys
if position_embeddings is not None:
if object_queries is not None:
hidden_states_original = hidden_states
hidden_states = self.with_pos_embed(hidden_states, position_embeddings)
hidden_states = self.with_pos_embed(hidden_states, object_queries)

# add key-value position embeddings to the key value states
if key_value_position_embeddings is not None:
if spatial_position_embeddings is not None:
key_value_states_original = key_value_states
key_value_states = self.with_pos_embed(key_value_states, key_value_position_embeddings)
key_value_states = self.with_pos_embed(key_value_states, spatial_position_embeddings)

# get query proj
query_states = self.q_proj(hidden_states) * self.scaling
Expand Down Expand Up @@ -962,7 +962,7 @@ class TableTransformerDecoder(TableTransformerPreTrainedModel):
Some small tweaks for TABLE_TRANSFORMER:
- position_embeddings and query_position_embeddings are added to the forward pass.
- object_queries and query_position_embeddings are added to the forward pass.
- if self.config.auxiliary_loss is set to True, also returns a stack of activations from all decoding layers.
Args:
Expand All @@ -988,7 +988,7 @@ def forward(
attention_mask=None,
encoder_hidden_states=None,
encoder_attention_mask=None,
position_embeddings=None,
object_queries=None,
query_position_embeddings=None,
output_attentions=None,
output_hidden_states=None,
Expand Down Expand Up @@ -1016,10 +1016,11 @@ def forward(
- 1 for pixels that are real (i.e. **not masked**),
- 0 for pixels that are padding (i.e. **masked**).
position_embeddings (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Position embeddings that are added to the queries and keys in each cross-attention layer.
object_queries (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Object queries that are added to the queries and keys in each cross-attention layer.
query_position_embeddings (`torch.FloatTensor` of shape `(batch_size, num_queries, hidden_size)`):
, *optional*): Position embeddings that are added to the queries and keys in each self-attention layer.
, *optional*): Position embeddings that are added to the values and keys in each self-attention layer.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
Expand Down Expand Up @@ -1091,7 +1092,7 @@ def custom_forward(*inputs):
layer_outputs = decoder_layer(
hidden_states,
attention_mask=combined_attention_mask,
position_embeddings=position_embeddings,
object_queries=object_queries,
query_position_embeddings=query_position_embeddings,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
Expand Down Expand Up @@ -1150,8 +1151,8 @@ def __init__(self, config: TableTransformerConfig):

# Create backbone + positional encoding
backbone = TableTransformerConvEncoder(config)
position_embeddings = build_position_encoding(config)
self.backbone = TableTransformerConvModel(backbone, position_embeddings)
object_queries = build_position_encoding(config)
self.backbone = TableTransformerConvModel(backbone, object_queries)

# Create projection layer
self.input_projection = nn.Conv2d(backbone.intermediate_channel_sizes[-1], config.d_model, kernel_size=1)
Expand Down

0 comments on commit 56e3e9e

Please sign in to comment.