Merge pull request #383 from WenjieDu/(refactor)unify_attn_opt

Enable all attention operators to work with `MultiHeadAttention`
WenjieDu · May 6, 2024 · dc975e5 · dc975e5
2 parents 7b2ea9b + 4cfba42
commit dc975e5
Show file tree

Hide file tree

Showing 46 changed files with 358 additions and 309 deletions.
diff --git a/pypots/base.py b/pypots/base.py
@@ -285,7 +285,10 @@ def save(
  f"‼️ File {saving_path} exists. Argument `overwrite` is True. Overwriting now..."
  )
  else:
- logger.error(f"❌ File {saving_path} exists. Saving operation aborted.")
+ logger.error(
+ f"❌ File {saving_path} exists. Saving operation aborted. "
+ f"Use the arg `overwrite=True` to force overwrite."
+ )
 
  try:
  create_dir_if_not_exist(saving_dir)

diff --git a/pypots/classification/raindrop/core.py b/pypots/classification/raindrop/core.py
@@ -21,8 +21,8 @@ def __init__(
  n_features,
  n_layers,
  d_model,
- d_ffn,
  n_heads,
+ d_ffn,
  n_classes,
  dropout=0.3,
  max_len=215,
@@ -41,8 +41,8 @@ def __init__(
  n_features,
  n_layers,
  d_model,
- d_ffn,
  n_heads,
+ d_ffn,
  n_classes,
  dropout,
  max_len,

diff --git a/pypots/classification/raindrop/model.py b/pypots/classification/raindrop/model.py
@@ -43,12 +43,12 @@ class Raindrop(BaseNNClassifier):
  The dimension of the Transformer encoder backbone.
  It is the input dimension of the multi-head self-attention layers.
 
- d_ffn :
- The dimension of the layer in the Feed-Forward Networks (FFN).
-
  n_heads :
  The number of heads in the multi-head self-attention mechanism.
 
+ d_ffn :
+ The dimension of the layer in the Feed-Forward Networks (FFN).
+
  dropout :
  The dropout rate for all fully-connected layers in the model.
 
@@ -112,8 +112,8 @@ def __init__(
  n_classes,
  n_layers,
  d_model,
- d_ffn,
  n_heads,
+ d_ffn,
  dropout,
  d_static=0,
  aggregation="mean",
@@ -147,8 +147,8 @@ def __init__(
  n_features,
  n_layers,
  d_model,
- d_ffn,
  n_heads,
+ d_ffn,
  n_classes,
  dropout,
  n_steps,

diff --git a/pypots/imputation/autoformer/core.py b/pypots/imputation/autoformer/core.py
@@ -8,13 +8,7 @@
 
 import torch.nn as nn
 
-from ...nn.modules.autoformer import (
- SeasonalLayerNorm,
- AutoformerEncoderLayer,
- AutoCorrelation,
- AutoCorrelationLayer,
-)
-from ...nn.modules.informer import InformerEncoder
+from ...nn.modules.autoformer import AutoformerEncoder
 from ...nn.modules.saits import SaitsLoss, SaitsEmbedding
 
 
@@ -24,8 +18,8 @@ def __init__(
  n_steps,
  n_features,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  factor,
  moving_avg_window_size,
@@ -44,23 +38,15 @@ def __init__(
  with_pos=False,
  dropout=dropout,
  )
- self.encoder = InformerEncoder(
- [
- AutoformerEncoderLayer(
- AutoCorrelationLayer(
- AutoCorrelation(factor, dropout),
- d_model,
- n_heads,
- ),
- d_model,
- d_ffn,
- moving_avg_window_size,
- dropout,
- activation,
- )
- for _ in range(n_layers)
- ],
- norm_layer=SeasonalLayerNorm(d_model),
+ self.encoder = AutoformerEncoder(
+ n_layers,
+ d_model,
+ n_heads,
+ d_ffn,
+ factor,
+ moving_avg_window_size,
+ dropout,
+ activation,
  )
 
  # for the imputation task, the output dim is the same as input dim

diff --git a/pypots/imputation/autoformer/model.py b/pypots/imputation/autoformer/model.py
@@ -37,12 +37,12 @@ class Autoformer(BaseNNImputer):
  n_layers :
  The number of layers in the Autoformer model.
 
- n_heads :
- The number of heads in each layer of Autoformer.
-
  d_model :
  The dimension of the model.
 
+ n_heads :
+ The number of heads in each layer of Autoformer.
+
  d_ffn :
  The dimension of the feed-forward network.
 
@@ -107,8 +107,8 @@ def __init__(
  n_steps: int,
  n_features: int,
  n_layers: int,
- n_heads: int,
  d_model: int,
+ n_heads: int,
  d_ffn: int,
  factor: int,
  moving_avg_window_size: int,
@@ -152,8 +152,8 @@ def __init__(
  self.n_steps,
  self.n_features,
  self.n_layers,
- self.n_heads,
  self.d_model,
+ self.n_heads,
  self.d_ffn,
  self.factor,
  self.moving_avg_window_size,

diff --git a/pypots/imputation/crossformer/core.py b/pypots/imputation/crossformer/core.py
@@ -23,8 +23,8 @@ def __init__(
  n_steps,
  n_features,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  factor,
  seg_len,

diff --git a/pypots/imputation/crossformer/model.py b/pypots/imputation/crossformer/model.py
@@ -37,12 +37,12 @@ class Crossformer(BaseNNImputer):
  n_layers :
  The number of layers in the 1st and 2nd DMSA blocks in the SAITS model.
 
- n_heads:
- The number of heads in the multi-head attention mechanism.
-
  d_model :
  The dimension of the model.
 
+ n_heads:
+ The number of heads in the multi-head attention mechanism.
+
  d_ffn :
  The dimension of the feed-forward network.
 
@@ -110,8 +110,8 @@ def __init__(
  n_steps: int,
  n_features: int,
  n_layers: int,
- n_heads: int,
  d_model: int,
+ n_heads: int,
  d_ffn: int,
  factor: int,
  seg_len: int,
@@ -157,8 +157,8 @@ def __init__(
  self.n_steps,
  self.n_features,
  self.n_layers,
- self.n_heads,
  self.d_model,
+ self.n_heads,
  self.d_ffn,
  self.factor,
  self.seg_len,

diff --git a/pypots/imputation/etsformer/core.py b/pypots/imputation/etsformer/core.py
@@ -24,8 +24,8 @@ def __init__(
  n_features,
  n_e_layers,
  n_d_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  dropout,
  top_k,
@@ -53,7 +53,7 @@ def __init__(
  n_steps,
  n_steps,
  top_k,
- dim_feedforward=d_ffn,
+ d_ffn=d_ffn,
  dropout=dropout,
  activation=activation,
  )

diff --git a/pypots/imputation/etsformer/model.py b/pypots/imputation/etsformer/model.py
@@ -40,12 +40,12 @@ class ETSformer(BaseNNImputer):
  n_d_layers :
  The number of layers in the ETSformer decoder.
 
- n_heads :
- The number of heads in each layer of ETSformer.
-
  d_model :
  The dimension of the model.
 
+ n_heads :
+ The number of heads in each layer of ETSformer.
+
  d_ffn :
  The dimension of the feed-forward network.
 
@@ -108,8 +108,8 @@ def __init__(
  n_features,
  n_e_layers,
  n_d_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  top_k,
  dropout: float = 0,
@@ -153,8 +153,8 @@ def __init__(
  self.n_features,
  self.n_e_layers,
  self.n_d_layers,
- self.n_heads,
  self.d_model,
+ self.n_heads,
  self.d_ffn,
  self.dropout,
  self.top_k,

diff --git a/pypots/imputation/fedformer/core.py b/pypots/imputation/fedformer/core.py
@@ -18,8 +18,8 @@ def __init__(
  n_steps,
  n_features,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  moving_avg_window_size,
  dropout,
@@ -43,8 +43,8 @@ def __init__(
  self.encoder = FEDformerEncoder(
  n_steps,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  moving_avg_window_size,
  dropout,

diff --git a/pypots/imputation/fedformer/model.py b/pypots/imputation/fedformer/model.py
@@ -115,8 +115,8 @@ def __init__(
  n_steps,
  n_features,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  moving_avg_window_size,
  dropout: float = 0,
@@ -164,8 +164,8 @@ def __init__(
  self.n_steps,
  self.n_features,
  self.n_layers,
- self.n_heads,
  self.d_model,
+ self.n_heads,
  self.d_ffn,
  self.moving_avg_window_size,
  self.dropout,

diff --git a/pypots/imputation/informer/core.py b/pypots/imputation/informer/core.py
@@ -24,8 +24,8 @@ def __init__(
  n_steps,
  n_features,
  n_layers,
- n_heads,
  d_model,
+ n_heads,
  d_ffn,
  factor,
  dropout,
@@ -47,11 +47,11 @@ def __init__(
  [
  InformerEncoderLayer(
  MultiHeadAttention(
- n_heads,
+ ProbAttention(False, factor, dropout),
  d_model,
+ n_heads,
  d_model // n_heads,
  d_model // n_heads,
- ProbAttention(False, factor, dropout),
  ),
  d_model,
  d_ffn,

diff --git a/pypots/imputation/informer/model.py b/pypots/imputation/informer/model.py
@@ -37,12 +37,12 @@ class Informer(BaseNNImputer):
  n_layers :
  The number of layers in the Informer model.
 
- n_heads :
- The number of heads in each layer of Informer.
-
  d_model :
  The dimension of the model.
 
+ n_heads :
+ The number of heads in each layer of Informer.
+
  d_ffn :
  The dimension of the feed-forward network.
 
@@ -104,8 +104,8 @@ def __init__(
  n_steps: int,
  n_features: int,
  n_layers: int,
- n_heads: int,
  d_model: int,
+ n_heads: int,
  d_ffn: int,
  factor: int,
  dropout: float = 0,
@@ -147,8 +147,8 @@ def __init__(
  self.n_steps,
  self.n_features,
  self.n_layers,
- self.n_heads,
  self.d_model,
+ self.n_heads,
  self.d_ffn,
  self.factor,
  self.dropout,

diff --git a/pypots/imputation/itransformer/core.py b/pypots/imputation/itransformer/core.py
@@ -20,10 +20,10 @@ def __init__(
  n_features: int,
  n_layers: int,
  d_model: int,
- d_ffn: int,
  n_heads: int,
  d_k: int,
  d_v: int,
+ d_ffn: int,
  dropout: float,
  attn_dropout: float,
  ORT_weight: float = 1,
@@ -41,10 +41,10 @@ def __init__(
  self.encoder = TransformerEncoder(
  n_layers,
  d_model,
- d_ffn,
  n_heads,
  d_k,
  d_v,
+ d_ffn,
  dropout,
  attn_dropout,
  )