diff --git a/model/model.py b/model/model.py index ec7c385..5d249db 100755 --- a/model/model.py +++ b/model/model.py @@ -48,7 +48,7 @@ def __init__( *, dim: Union[int, Tuple[int, int]] = 512, # hidden size of Transformer max_seq_len = 9600, # max sequence length - flash_attn = True, # wether to use flash attention + flash_attn = True, # whether to use flash attention attn_depth = 12, # number of layers attn_dim_head = 64, # dim for each head attn_heads = 16, # number of heads