Merge branch 'master' into more-gpu-test-fixes

pytorch · Dec 17, 2024 · 064cfda · 064cfda
2 parents 61a2c29 + 06fe8bd
commit 064cfda
Show file tree

Hide file tree

Showing 21 changed files with 278 additions and 136 deletions.
diff --git a/.github/workflows/gpu-tests.yml b/.github/workflows/gpu-tests.yml
@@ -16,7 +16,7 @@ concurrency:
   group: gpu-tests-${{ github.ref_name }}-${{ !(github.ref_protected) || github.sha }}
   cancel-in-progress: true
 
-# Cherry-picked from https://github.com/pytorch/test-infra/blob/main/.github/workflows/linux_job.yml
+# Cherry-picked from https://github.com/pytorch/test-infra/blob/main/.github/workflows/linux_job_v2.yml
 
 jobs:
   gpu-tests:
@@ -25,7 +25,7 @@ jobs:
         pytorch-channel: [pytorch, pytorch-nightly]
       fail-fast: false
     env:
-      DOCKER_IMAGE: "pytorch/conda-builder:cuda12.1"
+      DOCKER_IMAGE: "pytorch/almalinux-builder:cuda12.4"
       REPOSITORY: ${{ github.repository }}
       PR_NUMBER: ${{ github.event.pull_request.number }}
     runs-on: linux.8xlarge.nvidia.gpu
@@ -40,7 +40,7 @@ jobs:
           echo "::endgroup::"
 
       - name: Checkout repository (pytorch/test-infra)
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           # Support the use case where we need to checkout someone's fork
           repository: pytorch/test-infra
@@ -55,7 +55,7 @@ jobs:
           docker-image: ${{ env.DOCKER_IMAGE }}
 
       - name: Checkout repository (${{ github.repository }})
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           # Support the use case where we need to checkout someone's fork
           repository: ${{ github.repository }}
@@ -102,9 +102,9 @@ jobs:
 
           # Install PyTorch
           if [ "${{ matrix.pytorch-channel }}" == "pytorch" ]; then
-            pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu121
+            pip install --upgrade torch torchvision --index-url https://download.pytorch.org/whl/cu124
           else
-            pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu121
+            pip install --upgrade --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
           fi
 
           python -c "import torch; print(torch.__version__, ', CUDA is available: ', torch.cuda.is_available()); exit(not torch.cuda.is_available())"
@@ -124,7 +124,7 @@ jobs:
         uses: nick-fields/retry@v2.9.0
         with:
           max_attempts: 5
-          timeout_minutes: 25
+          timeout_minutes: 45
           shell: bash
           command: docker exec -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
           new_command_on_retry: docker exec -e USE_LAST_FAILED=1 -t pthd /bin/bash -xec 'bash tests/run_gpu_tests.sh 2'
@@ -139,7 +139,7 @@ jobs:
       - name: Run examples in container
         continue-on-error: false
         run: |
-          SCRIPT=$(cat << EOF
+          script=$(cat << EOF
 
           set -xe
 

diff --git a/.github/workflows/pytorch-version-tests.yml b/.github/workflows/pytorch-version-tests.yml
@@ -15,24 +15,25 @@ jobs:
       max-parallel: 5
       fail-fast: false
       matrix:
-        # Here we keep python 3.8 tests until the end of the 2024 and 
-        # will drop python version and related pytorch versions
-        python-version: [3.8, 3.9, "3.10"]
+        python-version: [3.9, "3.10", "3.11"]
         pytorch-version:
-          [2.4.1, 2.3.1, 2.2.2, 2.0.1, 1.13.1, 1.12.1, 1.10.0, 1.8.1]
+          [2.4.1, 2.3.1, 2.2.2, 2.0.1, 1.13.1, 1.12.1, 1.10.0]
         exclude:
-          # disabling python 3.9 support with PyTorch 1.7.1 and 1.8.1, to stop repeated pytorch-version test fail.
-          # https://github.com/pytorch/ignite/issues/2383
-          - pytorch-version: 1.8.1
-            python-version: 3.9
-          - pytorch-version: 1.8.1
-            python-version: "3.10"
-
           - pytorch-version: 1.10.0
             python-version: "3.10"
+          - pytorch-version: 1.10.0
+            python-version: "3.11"
 
           - pytorch-version: 1.11.0
             python-version: "3.10"
+          - pytorch-version: 1.11.0
+            python-version: "3.11"
+          - pytorch-version: 1.12.1
+            python-version: "3.11"
+          # Conda fails to install cpuonly version and few cpu distributed tests are
+          # failing with unrelated errors
+          - pytorch-version: 1.13.1
+            python-version: "3.11"
 
     steps:
       - uses: actions/checkout@v4

diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -299,7 +300,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 
@@ -355,7 +356,7 @@ def evaluate_step(engine: Engine, batch):
             x = x.to(device, non_blocking=True)
             y = y.to(device, non_blocking=True)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(x)
         return output, y
 

diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -1,6 +1,7 @@
 import fire
 import torch
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torchvision.models import wide_resnet50_2
@@ -34,7 +35,7 @@ def train_step(engine, batch):
         optimizer.zero_grad()
 
         # Runs the forward pass with autocasting.
-        with autocast():
+        with autocast("cuda"):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 

diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
@@ -6,7 +6,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -283,7 +284,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             loss = criterion(y_pred, y)
 

diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -887,7 +887,7 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.cuda.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
@@ -896,7 +896,8 @@
     "id": "vrJls4p-FRcA"
    },
    "source": [
-    "from torch.cuda.amp import autocast, GradScaler\n",
+    "from torch.cuda.amp import GradScaler\n",
+    "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
     "import torch.nn.functional as F\n",

diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -144,7 +145,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def training_step(engine, batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -235,7 +236,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)

diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
@@ -6,9 +6,10 @@
 import torch
 
 try:
-    from torch.cuda.amp import autocast, GradScaler
+    from torch.amp import autocast
+    from torch.cuda.amp import GradScaler
 except ImportError:
-    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.6.0")
+    raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
 import dataflow as data
 import utils
@@ -191,7 +192,7 @@ def create_trainer(model, optimizer, criterion, train_sampler, config, logger, w
     def forward_pass(batch):
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=True)
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)
             loss = criterion(y_pred, y) / accumulation_steps
@@ -272,7 +273,7 @@ def create_evaluator(model, metrics, config, with_clearml, tag="val"):
     @torch.no_grad()
     def evaluate_step(engine, batch):
         model.eval()
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             x, y = prepare_batch(batch, device=config.device, non_blocking=True)
             y_pred = model(x)
             y_pred = model_output_transform(y_pred)

diff --git a/examples/transformers/main.py b/examples/transformers/main.py
@@ -7,7 +7,8 @@
 import torch.nn as nn
 import torch.optim as optim
 import utils
-from torch.cuda.amp import autocast, GradScaler
+from torch.amp import autocast
+from torch.cuda.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
@@ -309,7 +310,7 @@ def train_step(engine, batch):
 
         model.train()
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             y_pred = model(input_batch)
             loss = criterion(y_pred, labels)
 
@@ -373,7 +374,7 @@ def evaluate_step(engine, batch):
             input_batch = {k: v.to(device, non_blocking=True, dtype=torch.long) for k, v in batch[0].items()}
             labels = labels.to(device, non_blocking=True, dtype=torch.float)
 
-        with autocast(enabled=with_amp):
+        with autocast("cuda", enabled=with_amp):
             output = model(input_batch)
         return output, labels
 

diff --git a/ignite/contrib/engines/common.py b/ignite/contrib/engines/common.py
@@ -78,7 +78,7 @@ def setup_common_training_handlers(
         lr_scheduler: learning rate scheduler
             as native torch LRScheduler or ignite's parameter scheduler.
         with_gpu_stats: if True, :class:`~ignite.metrics.GpuInfo` is attached to the
-            trainer. This requires `pynvml` package to be installed.
+            trainer. This requires `pynvml<12` package to be installed.
         output_names: list of names associated with `update_function` output dictionary.
         with_pbars: if True, two progress bars on epochs and optionally on iterations are attached.
             Default, True.

diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
@@ -185,9 +185,9 @@ def supervised_training_step_amp(
     """
 
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     if gradient_accumulation_steps <= 0:
         raise ValueError(
@@ -200,7 +200,7 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
             optimizer.zero_grad()
         model.train()
         x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-        with autocast(enabled=True):
+        with autocast("cuda", enabled=True):
             output = model_fn(model, x)
             y_pred = model_transform(output)
             loss = loss_fn(y_pred, y)
@@ -726,15 +726,15 @@ def supervised_evaluation_step_amp(
         Added `model_fn` to customize model's application on the sample
     """
     try:
-        from torch.cuda.amp import autocast
+        from torch.amp import autocast
     except ImportError:
-        raise ImportError("Please install torch>=1.6.0 to use amp_mode='amp'.")
+        raise ImportError("Please install torch>=1.12.0 to use amp_mode='amp'.")
 
     def evaluate_step(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[torch.Tensor]]:
         model.eval()
         with torch.no_grad():
             x, y = prepare_batch(batch, device=device, non_blocking=non_blocking)
-            with autocast(enabled=True):
+            with autocast("cuda", enabled=True):
                 output = model_fn(model, x)
                 y_pred = model_transform(output)
             return output_transform(x, y, y_pred)