From 666101774f1f82e4be0a880d7d0b7d1bc6dcd6a4 Mon Sep 17 00:00:00 2001
From: Tony-Y <11532812+Tony-Y@users.noreply.github.com>
Date: Sat, 12 Oct 2024 09:31:48 +0900
Subject: [PATCH] Use "Warning" in documentation

---
 README.md                 | 10 ++++++++--
 docs/index.rst            |  4 +++-
 pytorch_warmup/base.py    |  4 ++--
 pytorch_warmup/radam.py   |  2 +-
 pytorch_warmup/untuned.py |  4 ++--
 5 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index fef7edf..33cbf15 100644
--- a/README.md
+++ b/README.md
@@ -41,6 +41,7 @@ The scheduled learning rate is dampened by the multiplication of the warmup fact
 <p align="center"><img src="https://github.com/Tony-Y/pytorch_warmup/raw/master/examples/emnist/figs/learning_rate.png" alt="Learning rate" width="400"/></p>
 
 #### Approach 1
+
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Tony-Y/colab-notebooks/blob/master/PyTorch_Warmup_Approach1_chaining.ipynb)
 
 When the learning rate schedule uses the global iteration number, the untuned linear warmup can be used
@@ -66,9 +67,12 @@ for epoch in range(1,num_epochs+1):
         with warmup_scheduler.dampening():
             lr_scheduler.step()
 ```
-Note that the warmup schedule must not be initialized before the initialization of the learning rate schedule.
+
+> [!Warning]
+> Note that the warmup schedule must not be initialized before the initialization of the learning rate schedule.
 
 If you want to use the learning rate schedule *chaining*, which is supported for PyTorch 1.4 or above, you may simply write a code of learning rate schedulers as a suite of the `with` statement:
+
 ```python
 lr_scheduler1 = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
 lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
@@ -83,6 +87,7 @@ for epoch in range(1,num_epochs+1):
 ```
 
 If you want to start the learning rate schedule after the end of the linear warmup, delay it by the warmup period:
+
 ```python
 warmup_period = 2000
 num_steps = len(dataloader) * num_epochs - warmup_period
@@ -98,6 +103,7 @@ for epoch in range(1,num_epochs+1):
 ```
 
 #### Approach 2
+
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Tony-Y/colab-notebooks/blob/master/PyTorch_Warmup_Approach2_chaining.ipynb)
 
 When the learning rate schedule uses the epoch number, the warmup schedule can be used as follows:
@@ -133,6 +139,7 @@ for epoch in range(1,num_epochs+1):
 ```
 
 #### Approach 3
+
 When you use `CosineAnnealingWarmRestarts`, the warmup schedule can be used as follows:
 
 ```python
@@ -216,7 +223,6 @@ lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_s
 warmup_scheduler = warmup.UntunedLinearWarmup(optimizer)
 ```
 
-
 ## License
 
 MIT License
diff --git a/docs/index.rst b/docs/index.rst
index 862f56a..b26d10d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -80,7 +80,9 @@ together with :class:`Adam` or its variant (:class:`AdamW`, :class:`NAdam`, etc.
          with warmup_scheduler.dampening():
                lr_scheduler.step()
 
-Note that the warmup schedule must not be initialized before the initialization of the learning rate schedule.
+.. warning::
+   Note that the warmup schedule must not be initialized before the initialization of the learning rate schedule.
+
 Other approaches can be found in `README <https://github.com/Tony-Y/pytorch_warmup?tab=readme-ov-file#usage>`_.
 
 .. toctree::
diff --git a/pytorch_warmup/base.py b/pytorch_warmup/base.py
index ed0fe66..447875a 100644
--- a/pytorch_warmup/base.py
+++ b/pytorch_warmup/base.py
@@ -170,7 +170,7 @@ class LinearWarmup(BaseWarmup):
         >>>     with warmup_scheduler.dampening():
         >>>         lr_scheduler.step()
 
-    Note:
+    Warning:
         The warmup schedule must not be initialized before the initialization of the learning rate schedule.
     """
 
@@ -218,7 +218,7 @@ class ExponentialWarmup(BaseWarmup):
         >>>     with warmup_scheduler.dampening():
         >>>         lr_scheduler.step()
 
-    Note:
+    Warning:
         The warmup schedule must not be initialized before the initialization of the learning rate schedule.
     """
 
diff --git a/pytorch_warmup/radam.py b/pytorch_warmup/radam.py
index 54a64c7..8aa3c38 100644
--- a/pytorch_warmup/radam.py
+++ b/pytorch_warmup/radam.py
@@ -117,7 +117,7 @@ class RAdamWarmup(BaseWarmup):
         >>>     with warmup_scheduler.dampening():
         >>>         lr_scheduler.step()
 
-    Note:
+    Warning:
         The warmup schedule must not be initialized before the initialization of the learning rate schedule.
     """
 
diff --git a/pytorch_warmup/untuned.py b/pytorch_warmup/untuned.py
index 76c4155..2f4fc89 100644
--- a/pytorch_warmup/untuned.py
+++ b/pytorch_warmup/untuned.py
@@ -56,7 +56,7 @@ class UntunedLinearWarmup(LinearWarmup):
         >>>     with warmup_scheduler.dampening():
         >>>         lr_scheduler.step()
 
-    Note:
+    Warning:
         The warmup schedule must not be initialized before the initialization of the learning rate schedule.
     """
 
@@ -133,7 +133,7 @@ class UntunedExponentialWarmup(ExponentialWarmup):
         >>>     with warmup_scheduler.dampening():
         >>>         lr_scheduler.step()
 
-    Note:
+    Warning:
         The warmup schedule must not be initialized before the initialization of the learning rate schedule.
     """