Skip to content

Commit

Permalink
[MT5Config] add relative_attention_max_distance in config (#16170)
Browse files Browse the repository at this point in the history
  • Loading branch information
patil-suraj authored and FrancescoSaverioZuppichini committed Mar 17, 2022
1 parent 9689a81 commit f5f8086
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/transformers/models/mt5/configuration_mt5.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class MT5Config(PretrainedConfig):
Number of attention heads for each attention layer in the Transformer encoder.
relative_attention_num_buckets (`int`, *optional*, defaults to 32):
The number of buckets to use for each attention layer.
relative_attention_max_distance (`int`, *optional*, defaults to 128):
The maximum distance of the longer sequences for the bucket separation.
dropout_rate (`float`, *optional*, defaults to 0.1):
The ratio for all dropout layers.
layer_norm_eps (`float`, *optional*, defaults to 1e-6):
Expand All @@ -75,6 +77,7 @@ def __init__(
num_decoder_layers=None,
num_heads=6,
relative_attention_num_buckets=32,
relative_attention_max_distance=128,
dropout_rate=0.1,
layer_norm_epsilon=1e-6,
initializer_factor=1.0,
Expand Down Expand Up @@ -107,6 +110,7 @@ def __init__(
) # default = symmetry
self.num_heads = num_heads
self.relative_attention_num_buckets = relative_attention_num_buckets
self.relative_attention_max_distance = relative_attention_max_distance
self.dropout_rate = dropout_rate
self.layer_norm_epsilon = layer_norm_epsilon
self.initializer_factor = initializer_factor
Expand Down

0 comments on commit f5f8086

Please sign in to comment.