-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgelu.py
41 lines (31 loc) · 1.16 KB
/
gelu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from typing import Literal
import torch
import torch.nn as nn
class GeLU(nn.Module):
"""
Implements the Gaussian Error Linear Unit (GeLU) activation function.
Args:
approximate (Literal["none", "tanh"], optional): Specifies whether to use the exact
GeLU function or an approximation using tanh. Default is "none".
Methods:
forward(x: torch.Tensor) -> torch.Tensor:
Applies the GeLU activation function to the input tensor.
"""
def __init__(self, approximate: Literal["none", "tanh"] = "none") -> None:
self.approximate = approximate
super().__init__()
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Applies the GeLU activation function to the input tensor.
Args:
x (torch.Tensor): Input tensor.
Returns:
torch.Tensor: Output tensor after applying GeLU activation.
"""
if self.approximate == "tanh":
return (
0.5
* x
* (1 + torch.tanh(((2 / torch.pi) ** 0.5) * (x + 0.044715 * (x**3))))
)
return x * 0.5 * (1 + torch.erf(x / (2**0.5)))