docs

awslabs · Jun 16, 2024 · 9905d36 · 9905d36
1 parent 13ec2da
commit 9905d36
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 7 deletions.
diff --git a/src/gluonts/torch/model/samformer/estimator.py b/src/gluonts/torch/model/samformer/estimator.py
@@ -49,7 +49,7 @@
 class SamFormerEstimator(PyTorchLightningEstimator):
  """
  An estimator training the SamFormer model for multivariate forecasting
- as described in TODO extended to be
+ as described in https://arxiv.org/abs/2402.10198 extended to be
  probabilistic.
 
  This class uses the model defined in ``SamFormerModel``,
@@ -66,10 +66,16 @@ class SamFormerEstimator(PyTorchLightningEstimator):
  takes as inputs (default: ``10 * prediction_length``).
  hidden_dim
  Size of query and key projection (default: ``32``).
+ projection_dim
+ Size of the projection dimension (default: ``8``).
+ sam
+ Whether to use SAM optimizer (default: ``True``).
+ rho
+ Rho parameter for SAM optimizer (default: ``0.5``).
  lr
  Learning rate (default: ``1e-3``).
  weight_decay
- Weight decay regularization parameter (default: ``1e-8``).
+ Weight decay regularization parameter (default: ``1e-5``).
  scaling
  Scaling parameter can be "mean", "std" or None.
  distr_output

diff --git a/src/gluonts/torch/model/samformer/lightning_module.py b/src/gluonts/torch/model/samformer/lightning_module.py
@@ -39,6 +39,10 @@ class SamFormerLightningModule(pl.LightningModule):
  Learning rate.
  weight_decay
  Weight decay regularization parameter.
+ rho
+ Rho parameter for SAM optimizer.
+ sam
+ Whether to use SAM optimizer.
  """
 
  @validated()

diff --git a/src/gluonts/torch/model/samformer/module.py b/src/gluonts/torch/model/samformer/module.py
@@ -20,14 +20,14 @@
 from gluonts.core.component import validated
 from gluonts.model import Input, InputSpec
 from gluonts.torch.distributions import StudentTOutput
-from gluonts.torch.scaler import StdScaler, MeanScaler, NOPScaler
+from gluonts.torch.scaler import MeanScaler, NOPScaler, StdScaler
 from gluonts.torch.util import weighted_average
 
 
 class SamFormerModel(nn.Module):
  """
  Module implementing the SamFormer model for multivariate forecasting as
- described in TODO extended to be probabilistic.
+ described in https://arxiv.org/abs/2402.10198 extended to be probabilistic.
 
  Parameters
  ----------
@@ -37,6 +37,8 @@ class SamFormerModel(nn.Module):
  Number of time steps prior to prediction time that the model.
  hidden_dim
  Dim of query and key projection.
+ projection_dim
+ Dim of projection layer.
  scaling
  Whether to scale the input using mean or std or None.
  distr_output
@@ -78,6 +80,7 @@ def __init__(
  self.scaler = NOPScaler(keepdim=True, dim=1)
  self.nonnegative_pred_samples = nonnegative_pred_samples
 
+ # input is each variate together with the loc and scale
  self.compute_keys = nn.Linear(context_length + 2, hidden_dim)
  self.compute_queries = nn.Linear(context_length + 2, hidden_dim)
  self.compute_values = nn.Linear(context_length + 2, context_length)

diff --git a/src/gluonts/torch/model/samformer/sam.py b/src/gluonts/torch/model/samformer/sam.py
@@ -66,9 +66,9 @@ def step(self, closure=None):
  self.second_step()
 
  def _grad_norm(self):
- shared_device = (
- self.param_groups[0]["params"][0].device
- ) # put everything on the same device, in case of model parallelism
+ shared_device = self.param_groups[0]["params"][
+ 0
+ ].device # put everything on the same device, in case of model parallelism
  norm = torch.norm(
  torch.stack(
  [