Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Use tanh approximation in gelu (#1590)
Browse files Browse the repository at this point in the history
Co-authored-by: Bartlomiej Gawrych <barlomiej.gawrych@intel.com>
  • Loading branch information
bgawrych and Bartlomiej Gawrych authored Dec 16, 2022
1 parent ce0e0a2 commit fecd3e1
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/gluonnlp/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

InitializerType = Optional[Union[mx.init.Initializer, str]]

GELU_TANH_SUPPORT = 'gelu_tanh' in mx.symbol.LeakyReLU.__doc__

@use_np
def get_norm_layer(normalization: str = 'layer_norm',
Expand Down Expand Up @@ -322,8 +323,11 @@ def forward(self, x):
if self._mode == 'erf':
return npx.leaky_relu(x, act_type='gelu')
elif self._mode == 'tanh':
return 0.5 * x\
* (1.0 + np.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * (x ** 3))))
if GELU_TANH_SUPPORT:
return npx.leaky_relu(x, act_type='gelu_tanh')
else:
return 0.5 * x\
* (1.0 + np.tanh(math.sqrt(2.0 / math.pi) * (x + 0.044715 * (x ** 3))))
elif self._mode == 'sigmoid':
return x * npx.sigmoid(1.702 * x)
else:
Expand Down

0 comments on commit fecd3e1

Please sign in to comment.