diff --git a/.github/workflows/cron-mmar.yml b/.github/workflows/cron-mmar.yml
new file mode 100644
index 0000000000..735c23117c
--- /dev/null
+++ b/.github/workflows/cron-mmar.yml
@@ -0,0 +1,42 @@
+name: cron-mmar
+
+on:
+  schedule:
+    - cron: "0 2 * * *"  # at 02:00 UTC
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+concurrency:
+  # automatically cancel the previously triggered workflows when there's a newer version
+  group: mmar-tests-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  cron-load:
+    if: github.repository == 'Project-MONAI/MONAI'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: echo "::set-output name=datew::$(date '+%Y-%V')"
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ steps.pip-cache.outputs.datew }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip wheel
+        python -m pip install -r requirements-dev.txt
+    - name: Loading MMARs
+      run: |
+        # clean up temporary files
+        $(pwd)/runtests.sh --clean
+        # run tests
+        python -m tests.ngc_mmar_loading
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index a36cfbcdb9..9f90d4b8e8 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -62,7 +62,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:21.02", "pytorch:21.08"]  # 21.02 for backward comp.
+        container: ["pytorch:21.02", "pytorch:21.09"]  # 21.02 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -106,7 +106,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:21.02", "pytorch:21.08"]  # 21.02 for backward comp.
+        container: ["pytorch:21.02", "pytorch:21.09"]  # 21.02 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -173,7 +173,7 @@ jobs:
   cron-docker:
     if: github.repository == 'Project-MONAI/MONAI'
     container:
-      image: localhost:5000/local_monai:dockerhub # use currently latest, locally available dockerhub image
+      image: docker://projectmonai/monai:latest  # this might be slow and has the pull count limitations
       options: "--gpus all"
     runs-on: [self-hosted, linux, x64, common]
     steps:
@@ -204,7 +204,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     needs: cron-gpu  # so that monai itself is verified first
     container:
-      image: nvcr.io/nvidia/pytorch:21.08-py3  # testing with the latest pytorch base image
+      image: nvcr.io/nvidia/pytorch:21.09-py3  # testing with the latest pytorch base image
       options: "--gpus all --ipc=host"
     runs-on: [self-hosted, linux, x64, common]
     steps:
@@ -215,7 +215,7 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip install -r requirements-dev.txt
-        BUILD_MONAI=0 python setup.py develop  # install monai
+        BUILD_MONAI=1 python setup.py develop  # install monai
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
@@ -234,5 +234,7 @@ jobs:
         trap 'if pgrep python; then pkill python; fi;' ERR
         python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         cd /opt/tutorials
+        python -c 'import monai; monai.config.print_debug_info()'
         $(pwd)/runner.sh
+        python -c 'import monai; monai.config.print_debug_info()'
         if pgrep python; then pkill python; fi
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 3104224e2b..7140cd7dd8 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -13,20 +13,21 @@ on:
   workflow_dispatch:
 
 jobs:
-  versioning:
+  versioning_dev:
     # compute versioning file from python setup.py
     # upload as artifact
-    # (also used in release.yml)
     if: github.repository == 'Project-MONAI/MONAI'
-    container:
-      image: localhost:5000/local_monai:latest
-    runs-on: [self-hosted, linux, x64, build_only]
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
         # full history so that we can git describe
         with:
           ref: dev
           fetch-depth: 0
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
       - shell: bash
         run: |
           git describe
@@ -43,13 +44,11 @@ jobs:
           ls -al
           rm -rf {*,.[^.]*}
 
-  local_docker:
-    # builds two versions: local_monai:latest and local_monai:dockerhub
-    # latest: used for local tests
-    # dockerhub: release, no flake package
+  docker_build_dev:
+    # builds projectmonai/monai:latest
     if: github.repository == 'Project-MONAI/MONAI'
-    needs: versioning
-    runs-on: [self-hosted, linux, x64, build_only]
+    needs: versioning_dev
+    runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
       with:
@@ -58,67 +57,47 @@ jobs:
       uses: actions/download-artifact@v2
       with:
         name: _version.py
+    - name: Install Latest Docker
+      run: |
+        curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+        sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu  $(lsb_release -cs)  stable"
+        sudo apt-get update
+        sudo apt-get install docker-ce
     - name: docker_build
       shell: bash
       run: |
         # get tag info for versioning
         cat _version.py
         mv _version.py monai/
-        # build and run original docker image for local registry
-        docker build -t localhost:5000/local_monai:latest -f Dockerfile .
-        docker push localhost:5000/local_monai:latest
-        # build once more w/ tag "latest": remove flake package as it is not needed on hub.docker.com
+
+        # build "latest": remove flake package as it is not needed on hub.docker.com
         sed -i '/flake/d' requirements-dev.txt
         docker build -t projectmonai/monai:latest -f Dockerfile .
-        # also push as tag "dockerhub" to local registry
-        docker image tag projectmonai/monai:latest localhost:5000/local_monai:dockerhub
-        docker push localhost:5000/local_monai:dockerhub
+
         # distribute as always w/ tag "latest" to hub.docker.com
         echo "${{ secrets.DOCKER_PW }}" | docker login -u projectmonai --password-stdin
+
         docker push projectmonai/monai:latest
         docker logout
         docker image prune -f
 
-  docker_test_latest:
-    if: github.repository == 'Project-MONAI/MONAI'
-    needs: local_docker
-    container:
-      image: localhost:5000/local_monai:latest
-    runs-on: [self-hosted, linux, x64, common]
-    steps:
-    - name: Import
-      run: |
-        export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
-        echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        python -c 'import monai; monai.config.print_config()'
-        cd /opt/monai
-        ls -al
-        ngc --version
-        python -m tests.min_tests
-        if pgrep python; then pkill python; fi
-      env:
-        QUICKTEST: True
-
   docker_test_dockerhub:
     if: github.repository == 'Project-MONAI/MONAI'
-    needs: local_docker
+    needs: docker_build_dev
     container:
-      image: localhost:5000/local_monai:dockerhub
-    runs-on: [self-hosted, linux, x64, common]
+      image: docker://projectmonai/monai:latest
+      options: "--shm-size=4g --ipc=host"
+    runs-on: ubuntu-latest
     steps:
     - name: Import
       run: |
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
-        trap 'if pgrep python; then pkill python; fi;' ERR
-        python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
-        python -c 'import monai; monai.config.print_config()'
+        python -c 'import monai; monai.config.print_debug_info()'
         cd /opt/monai
         ls -al
         ngc --version
-        python -m tests.min_tests
-        if pgrep python; then pkill python; fi
+        ./runtests.sh --min
+      shell: bash
       env:
         QUICKTEST: True
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index ed025e98fe..ff77a171ee 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -34,7 +34,7 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip uninstall -y torch torchvision
-        python -m pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.10.0+cu111 torchvision==0.11.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
         python -m pip install -r requirements-dev.txt
     - name: Run integration tests
       run: |
diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml
index 999567ae16..2ca0492e5c 100644
--- a/.github/workflows/pythonapp-gpu.yml
+++ b/.github/workflows/pythonapp-gpu.yml
@@ -24,7 +24,7 @@ jobs:
           - "PT17+CUDA110"
           - "PT18+CUDA102"
           - "PT19+CUDA114"
-          - "PT19+CUDA102"
+          - "PT110+CUDA102"
         include:
           - environment: PT16+CUDA110
             # we explicitly set pytorch to -h to avoid pip install error
@@ -43,11 +43,11 @@ jobs:
           - environment: PT19+CUDA114
             # we explicitly set pytorch to -h to avoid pip install error
             # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes
-            # 21.08: 1.10.0a0+3fd9dcf
+            # 21.09: 1.10.0a0+3fd9dcf
             pytorch: "-h"
-            base: "nvcr.io/nvidia/pytorch:21.08-py3"
-          - environment: PT19+CUDA102
-            pytorch: "torch==1.9.0 torchvision==0.10.0"
+            base: "nvcr.io/nvidia/pytorch:21.09-py3"
+          - environment: PT110+CUDA102
+            pytorch: "torch==1.10.0 torchvision==0.11.1"
             base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04"
     container:
       image: ${{ matrix.base }}
@@ -59,7 +59,7 @@ jobs:
       run: |
         if [ ${{ matrix.environment }} = "PT17+CUDA102" ] || \
           [ ${{ matrix.environment }} = "PT18+CUDA102" ] || \
-          [ ${{ matrix.environment }} = "PT19+CUDA102" ]
+          [ ${{ matrix.environment }} = "PT110+CUDA102" ]
         then
         PYVER=3.6 PYSFX=3 DISTUTILS=python3-distutils && \
         apt-get update && apt-get install -y --no-install-recommends \
@@ -100,6 +100,8 @@ jobs:
       run: |
         which python
         python -m pip install --upgrade pip wheel
+        # fixes preinstalled ruamel_yaml error from the docker image
+        rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
         python -m pip install ${{ matrix.pytorch }}
         python -m pip install -r requirements-dev.txt
         python -m pip list
@@ -121,7 +123,7 @@ jobs:
         python -c "import monai; monai.config.print_config()"
         # build for the current self-hosted CI Tesla V100
         BUILD_MONAI=1 TORCH_CUDA_ARCH_LIST="7.0" ./runtests.sh --quick --unittests
-        if [ ${{ matrix.environment }} = "PT19+CUDA102" ]; then
+        if [ ${{ matrix.environment }} = "PT110+CUDA102" ]; then
           # test the clang-format tool downloading once
           coverage run -m tests.clang_format_utils
         fi
diff --git a/.github/workflows/pythonapp-min.yml b/.github/workflows/pythonapp-min.yml
new file mode 100644
index 0000000000..002701c5ad
--- /dev/null
+++ b/.github/workflows/pythonapp-min.yml
@@ -0,0 +1,170 @@
+name: build-min
+
+on:
+  # quick tests for pull requests and the releasing branches
+  push:
+    branches:
+      - dev
+      - main
+      - releasing/*
+  pull_request:
+
+concurrency:
+  # automatically cancel the previously triggered workflows when there's a newer version
+  group: build-min-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # caching of these jobs:
+  #   - docker-py3-pip- (shared)
+  #   - ubuntu py37 pip-
+  #   - os-latest-pip- (shared)
+  min-dep-os:  # min dependencies installed tests for different OS
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [windows-latest, macOS-latest, ubuntu-latest]
+    timeout-minutes: 40
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Prepare pip wheel
+      run: |
+        which python
+        python -m pip install --upgrade pip wheel
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: |
+        echo "::set-output name=datew::$(date '+%Y-%V')"
+        echo "::set-output name=dir::$(pip cache dir)"
+      shell: bash
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key: ${{ matrix.os }}-latest-pip-${{ steps.pip-cache.outputs.datew }}
+    - if: runner.os == 'windows'
+      name: Install torch cpu from pytorch.org (Windows only)
+      run: |
+        python -m pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+    - name: Install the dependencies
+      run: |
+        # min. requirements
+        python -m pip install torch==1.10.0
+        python -m pip install -r requirements-min.txt
+        python -m pip list
+        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
+      shell: bash
+    - name: Run quick tests (CPU ${{ runner.os }})
+      run: |
+        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
+        python -c "import monai; monai.config.print_config()"
+        ./runtests.sh --min
+      shell: bash
+      env:
+        QUICKTEST: True
+
+  min-dep-py3:  # min dependencies installed tests for different python
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.6, 3.7, 3.8, 3.9]
+    timeout-minutes: 40
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Prepare pip wheel
+      run: |
+        which python
+        python -m pip install --user --upgrade pip setuptools wheel
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: |
+        echo "::set-output name=datew::$(date '+%Y-%V')"
+        echo "::set-output name=dir::$(pip cache dir)"
+      shell: bash
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key: ubuntu-latest-latest-pip-${{ steps.pip-cache.outputs.datew }}
+    - name: Install the dependencies
+      run: |
+        # min. requirements
+        python -m pip install torch==1.10.0
+        python -m pip install -r requirements-min.txt
+        python -m pip list
+        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
+      shell: bash
+    - name: Run quick tests (CPU ${{ runner.os }})
+      run: |
+        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
+        python -c "import monai; monai.config.print_config()"
+        ./runtests.sh --min
+      env:
+        QUICKTEST: True
+
+  min-dep-pytorch:  # min dependencies installed tests for different pytorch
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        pytorch-version: [1.5.1, 1.6.0, 1.7.1, 1.8.1, latest]
+    timeout-minutes: 40
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Prepare pip wheel
+      run: |
+        which python
+        python -m pip install --user --upgrade pip setuptools wheel
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: |
+        echo "::set-output name=datew::$(date '+%Y-%V')"
+        echo "::set-output name=dir::$(pip cache dir)"
+      shell: bash
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ${{ steps.pip-cache.outputs.dir }}
+        key: ubuntu-latest-latest-pip-${{ steps.pip-cache.outputs.datew }}
+    - name: Install the dependencies
+      run: |
+        # min. requirements
+        if [ ${{ matrix.pytorch-version }} == "latest" ]; then
+          python -m pip install torch
+        elif [ ${{ matrix.pytorch-version }} == "1.5.1" ]; then
+          python -m pip install torch==1.5.1
+        elif [ ${{ matrix.pytorch-version }} == "1.6.0" ]; then
+          python -m pip install torch==1.6.0
+        elif [ ${{ matrix.pytorch-version }} == "1.7.1" ]; then
+          python -m pip install torch==1.7.1
+        elif [ ${{ matrix.pytorch-version }} == "1.8.1" ]; then
+          python -m pip install torch==1.8.1
+        fi
+        python -m pip install -r requirements-min.txt
+        python -m pip list
+        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
+      shell: bash
+    - name: Run quick tests (pytorch ${{ matrix.pytorch-version }})
+      run: |
+        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
+        python -c "import monai; monai.config.print_config()"
+        ./runtests.sh --min
+      env:
+        QUICKTEST: True
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index 3f18263e9e..4d69206247 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -87,10 +87,10 @@ jobs:
     - if: runner.os == 'windows'
       name: Install torch cpu from pytorch.org (Windows only)
       run: |
-        python -m pip install torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.10.0+cpu torchvision==0.11.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
     - name: Install the dependencies
       run: |
-        python -m pip install torch==1.9.0 torchvision==0.10.0
+        python -m pip install torch==1.10.0 torchvision==0.11.1
         cat "requirements-dev.txt"
         python -m pip install -r requirements-dev.txt
         python -m pip list
@@ -106,100 +106,6 @@ jobs:
       env:
         QUICKTEST: True
 
-  min-dep-os:  # min dependencies installed tests for different OS
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [windows-latest, macOS-latest, ubuntu-latest]
-    timeout-minutes: 40
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python 3.8
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.8
-    - name: Prepare pip wheel
-      run: |
-        which python
-        python -m pip install --upgrade pip wheel
-    - name: cache weekly timestamp
-      id: pip-cache
-      run: |
-        echo "::set-output name=datew::$(date '+%Y-%V')"
-        echo "::set-output name=dir::$(pip cache dir)"
-      shell: bash
-    - name: cache for pip
-      uses: actions/cache@v2
-      id: cache
-      with:
-        path: ${{ steps.pip-cache.outputs.dir }}
-        key: ${{ matrix.os }}-latest-pip-${{ steps.pip-cache.outputs.datew }}
-    - if: runner.os == 'windows'
-      name: Install torch cpu from pytorch.org (Windows only)
-      run: |
-        python -m pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
-    - name: Install the dependencies
-      run: |
-        # min. requirements
-        python -m pip install torch==1.9.0
-        python -m pip install -r requirements-min.txt
-        python -m pip list
-        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
-      shell: bash
-    - name: Run quick tests (CPU ${{ runner.os }})
-      run: |
-        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
-        python -c "import monai; monai.config.print_config()"
-        ./runtests.sh --min
-      env:
-        QUICKTEST: True
-
-  min-dep-py3:  # min dependencies installed tests for different python
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [3.6, 3.7, 3.8, 3.9]
-    timeout-minutes: 40
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Prepare pip wheel
-      run: |
-        which python
-        python -m pip install --user --upgrade pip setuptools wheel
-    - name: cache weekly timestamp
-      id: pip-cache
-      run: |
-        echo "::set-output name=datew::$(date '+%Y-%V')"
-        echo "::set-output name=dir::$(pip cache dir)"
-      shell: bash
-    - name: cache for pip
-      uses: actions/cache@v2
-      id: cache
-      with:
-        path: ${{ steps.pip-cache.outputs.dir }}
-        key: ubuntu-latest-latest-pip-${{ steps.pip-cache.outputs.datew }}
-    - name: Install the dependencies
-      run: |
-        # min. requirements
-        python -m pip install torch==1.9.0
-        python -m pip install -r requirements-min.txt
-        python -m pip list
-        BUILD_MONAI=0 python setup.py develop  # no compile of extensions
-      shell: bash
-    - name: Run quick tests (CPU ${{ runner.os }})
-      run: |
-        python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
-        python -c "import monai; monai.config.print_config()"
-        ./runtests.sh --min
-      env:
-        QUICKTEST: True
-
   packaging:
     runs-on: ubuntu-latest
     env:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bfdc639788..34f8390fa9 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -87,18 +87,18 @@ jobs:
   versioning:
     # compute versioning file from python setup.py
     # upload as artifact
-    # (also used in docker.yml)
     if: github.repository == 'Project-MONAI/MONAI'
     needs: packaging
-    container:
-      image: localhost:5000/local_monai:latest
-    runs-on: [self-hosted, linux, x64, build_only]
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
         # full history so that we can git describe
         with:
-          ref: main
           fetch-depth: 0
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
       - shell: bash
         run: |
           git describe
@@ -118,11 +118,9 @@ jobs:
   release_tag_docker:
     if: github.repository == 'Project-MONAI/MONAI'
     needs: versioning
-    runs-on: [self-hosted, linux, x64, build_only]
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
-        with:
-          ref: main
       - name: Download version
         uses: actions/download-artifact@v2
         with:
@@ -136,6 +134,13 @@ jobs:
         run: |
           echo "$RELEASE_VERSION"
           cat _version.py
+      - if: startsWith(github.ref, 'refs/tags/')
+        name: Install latest docker
+        run: |
+          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+          sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu  $(lsb_release -cs)  stable"
+          sudo apt-get update
+          sudo apt-get install docker-ce
       - if: startsWith(github.ref, 'refs/tags/')
         name: build with the tag
         env:
@@ -144,6 +149,17 @@ jobs:
         run: |
           # get tag info for versioning
           mv _version.py monai/
+          # version checks
+          target=" \"version\": \"$RELEASE_VERSION\""
+          local=`grep "\"version\"" monai/_version.py`
+          echo "$target"
+          echo "$local"
+          if [[ "$local" == "$target" ]]; then
+            echo "matched version string"
+          else
+            echo "unmatched version string, please check the tagging branch."
+            exit 1
+          fi
           # remove flake package as it is not needed on hub.docker.com
           sed -i '/flake/d' requirements-dev.txt
           docker build -t projectmonai/monai:"$RELEASE_VERSION" -f Dockerfile .
diff --git a/.github/workflows/setupapp.yml b/.github/workflows/setupapp.yml
index d0dc3a9f10..eaf91f8876 100644
--- a/.github/workflows/setupapp.yml
+++ b/.github/workflows/setupapp.yml
@@ -43,7 +43,8 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip uninstall -y torch torchvision
-        python -m pip install torch==1.9.0 torchvision==0.10.0
+        rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
+        python -m pip install torch==1.10.0 torchvision==0.11.1
         python -m pip install -r requirements-dev.txt
     - name: Run unit tests report coverage
       run: |
@@ -97,7 +98,7 @@ jobs:
     - name: Install the dependencies
       run: |
         python -m pip install --upgrade pip wheel
-        python -m pip install torch==1.9.0 torchvision==0.10.0
+        python -m pip install torch==1.10.0 torchvision==0.11.1
         python -m pip install -r requirements-dev.txt
     - name: Run quick tests CPU ubuntu
       run: |
diff --git a/.github/workflows/weekly-preview.yml b/.github/workflows/weekly-preview.yml
index df0b5dd759..d5c4e5ae05 100644
--- a/.github/workflows/weekly-preview.yml
+++ b/.github/workflows/weekly-preview.yml
@@ -33,7 +33,7 @@ jobs:
         export YEAR_WEEK=$(date +'%y%U')
         echo "Year week for tag is ${YEAR_WEEK}"
         if ! [[ $YEAR_WEEK =~ ^[0-9]{4}$ ]] ; then echo "Wrong 'year week' format.  Should be 4 digits."; exit 1 ; fi
-        git tag "0.7.dev${YEAR_WEEK}"
+        git tag "0.8.dev${YEAR_WEEK}"
         git log -1
         git tag --list
         python setup.py sdist bdist_wheel
diff --git a/.gitignore b/.gitignore
index 7444d7f2f9..13155c3088 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,4 @@ tests/testing_data/*.tiff
 
 # VSCode
 .vscode/
+*.zip
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c36c96186c..ea637fe329 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,18 +22,35 @@ repos:
         args: ['--maxkb=1024']
       - id: detect-private-key
 
-  #- repo: https://github.com/asottile/pyupgrade
-  #  rev: v2.23.2
-  #  hooks:
-  #    - id: pyupgrade
-  #      args: [--py36-plus]
-  #      name: Upgrade code
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v2.29.0
+    hooks:
+      - id: pyupgrade
+        args: [--py36-plus]
+        name: Upgrade code
+        exclude: |
+          (?x)^(
+              versioneer.py|
+              monai/_version.py
+          )$
 
-  #- repo: https://github.com/asottile/yesqa
-  #  rev: v1.2.3
-  #  hooks:
-  #    - id: yesqa
-  #      name: Unused noqa
+  - repo: https://github.com/asottile/yesqa
+    rev: v1.2.3
+    hooks:
+      - id: yesqa
+        name: Unused noqa
+        additional_dependencies:
+          - flake8>=3.8.1
+          - flake8-bugbear
+          - flake8-comprehensions
+          - flake8-executable
+          - flake8-pyi
+          - pep8-naming
+        exclude: |
+          (?x)^(
+              monai/__init__.py|
+              docs/source/conf.py
+          )$
 
   #- repo: https://github.com/PyCQA/isort
   #  rev: 5.9.3
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bdbd23e7dd..7dea15cd0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,50 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
-* renamed model's `n_classes` to `num_classes`
+
+## [0.7.0] - 2021-09-24
+### Added
+* Overview of [new features in v0.7](docs/source/whatsnew_0_7.md)
+* Initial phase of major usability improvements in `monai.transforms` to support input and backend in PyTorch and NumPy
+* Performance enhancements, with [profiling and tuning guides](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md) for typical use cases
+* Reproducing [training modules and workflows](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution) of state-of-the-art Kaggle competition solutions
+* 24 new transforms, including
+  * `OneOf` meta transform
+  * DeepEdit guidance signal transforms for interactive segmentation
+  * Transforms for self-supervised pre-training
+  * Integration of [NVIDIA Tools Extension](https://developer.nvidia.com/blog/nvidia-tools-extension-api-nvtx-annotation-tool-for-profiling-code-in-python-and-c-c/) (NVTX)
+  * Integration of [cuCIM](https://github.com/rapidsai/cucim)
+  * Stain normalization and contextual grid for digital pathology
+* `Transchex` network for vision-language transformers for chest X-ray analysis
+* `DatasetSummary` utility in `monai.data`
+* `WarmupCosineSchedule`
+* Deprecation warnings and documentation support for better backwards compatibility
+* Padding with additional `kwargs` and different backend API
+* Additional options such as `dropout` and `norm` in various networks and their submodules
+
+### Changed
+* Base Docker image upgraded to `nvcr.io/nvidia/pytorch:21.08-py3` from `nvcr.io/nvidia/pytorch:21.06-py3`
+* Deprecated input argument `n_classes`, in favor of `num_classes`
+* Deprecated input argument `dimensions` and `ndims`, in favor of `spatial_dims`
+* Updated the Sphinx-based documentation theme for better readability
+* `NdarrayTensor` type is replaced by `NdarrayOrTensor` for simpler annotations
+* Self-attention-based network blocks now support both 2D and 3D inputs
+
+### Removed
+* The deprecated `TransformInverter`, in favor of `monai.transforms.InvertD`
+* GitHub self-hosted CI/CD pipelines for nightly and post-merge tests
+* `monai.handlers.utils.evenly_divisible_all_gather`
+* `monai.handlers.utils.string_list_all_gather`
+
+### Fixed
+* A Multi-thread cache writing issue in `LMDBDataset`
+* Output shape convention inconsistencies of the image readers
+* Output directory and file name flexibility issue for `NiftiSaver`, `PNGSaver`
+* Requirement of the `label` field in test-time augmentation
+* Input argument flexibility issues for  `ThreadDataLoader`
+* Decoupled `Dice` and `CrossEntropy` intermediate results in `DiceCELoss`
+* Improved documentation, code examples, and warning messages in various modules
+* Various usability issues reported by users
 
 ## [0.6.0] - 2021-07-08
 ### Added
@@ -25,6 +68,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Fully compatible with PyTorch 1.9
 * `--disttests` and `--min` options for `runtests.sh`
 * Initial support of pre-merge tests with Nvidia Blossom system
+
 ### Changed
 * Base Docker image upgraded to `nvcr.io/nvidia/pytorch:21.06-py3` from
   `nvcr.io/nvidia/pytorch:21.04-py3`
@@ -34,11 +78,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Unified the terms: `post_transform` is renamed to `postprocessing`, `pre_transform` is renamed to `preprocessing`
 * Unified the postprocessing transforms and event handlers to accept the "channel-first" data format
 * `evenly_divisible_all_gather` and `string_list_all_gather` moved to `monai.utils.dist`
+
 ### Removed
 * Support of 'batched' input for postprocessing transforms and event handlers
 * `TorchVisionFullyConvModel`
 * `set_visible_devices` utility function
 * `SegmentationSaver` and `TransformsInverter` handlers
+
 ### Fixed
 * Issue of handling big-endian image headers
 * Multi-thread issue for non-random transforms in the cache-based datasets
@@ -269,9 +315,11 @@ the postprocessing steps should be used before calling the metrics methods
 * Optionally depend on PyTorch-Ignite v0.4.2 instead of v0.3.0
 * Optionally depend on torchvision, ITK
 * Enhanced CI tests with 8 new testing environments
+
 ### Removed
 * `MONAI/examples` folder (relocated into [`Project-MONAI/tutorials`](https://github.com/Project-MONAI/tutorials))
 * `MONAI/research` folder (relocated to [`Project-MONAI/research-contributions`](https://github.com/Project-MONAI/research-contributions))
+
 ### Fixed
 * `dense_patch_slices` incorrect indexing
 * Data type issue in `GeneralizedWassersteinDiceLoss`
@@ -302,6 +350,7 @@ the postprocessing steps should be used before calling the metrics methods
 * Cross-platform CI tests supporting multiple Python versions
 * Optional import mechanism
 * Experimental features for third-party transforms integration
+
 ### Changed
 > For more details please visit [the project wiki](https://github.com/Project-MONAI/MONAI/wiki/Notable-changes-between-0.1.0-and-0.2.0)
 * Core modules now require numpy >= 1.17
@@ -311,9 +360,11 @@ the postprocessing steps should be used before calling the metrics methods
 * Base Docker image upgraded to `nvcr.io/nvidia/pytorch:20.03-py3` from `nvcr.io/nvidia/pytorch:19.10-py3`
 * Enhanced local testing tools
 * Documentation website domain changed to https://docs.monai.io
+
 ### Removed
 * Support of Python < 3.6
 * Automatic installation of optional dependencies including pytorch-ignite, nibabel, tensorboard, pillow, scipy, scikit-image
+
 ### Fixed
 * Various issues in type and argument names consistency
 * Various issues in docstring and documentation site
@@ -336,7 +387,8 @@ the postprocessing steps should be used before calling the metrics methods
 
 [highlights]: https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md
 
-[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.6.0...HEAD
+[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.7.0...HEAD
+[0.7.0]: https://github.com/Project-MONAI/MONAI/compare/0.6.0...0.7.0
 [0.6.0]: https://github.com/Project-MONAI/MONAI/compare/0.5.3...0.6.0
 [0.5.3]: https://github.com/Project-MONAI/MONAI/compare/0.5.0...0.5.3
 [0.5.0]: https://github.com/Project-MONAI/MONAI/compare/0.4.0...0.5.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0dce26582a..954549581a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -289,9 +289,9 @@ When major features are ready for a milestone, to prepare for a new release:
   repository's artifacts (e.g. the file at https://github.com/Project-MONAI/MONAI/actions/runs/66570977).
 - Check the release test at [TestPyPI](https://test.pypi.org/project/monai/), download the artifacts when the CI finishes.
 - Optionally run [the cron testing jobs](https://github.com/Project-MONAI/MONAI/blob/dev/.github/workflows/cron.yml) on `releasing/[version number]`.
+- Rebase `releasing/[version number]` to `main`, make sure all the test pipelines succeed.
 - Once the release candidate is verified, tag and push a milestone, for example, `git push origin 0.1.0`.
   The tag must be with the latest commit of `releasing/[version number]`.
-- Rebase `releasing/[version number]` to `main`, make sure all the test pipelines succeed.
 - Upload the packages to [PyPI](https://pypi.org/project/monai/).
   This could be done manually by ``twine upload dist/*``, given the artifacts are unzipped to the folder ``dist/``.
 - Merge `releasing/[version number]` to `dev`, this step must make sure that the tagging commit unchanged on `dev`.
diff --git a/Dockerfile b/Dockerfile
index 77fe1f828f..ce4306c639 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@
 
 # To build with a different base image
 # please run `docker build` using the `--build-arg PYTORCH_IMAGE=...` flag.
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.08-py3
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.09-py3
 FROM ${PYTORCH_IMAGE}
 
 LABEL maintainer="monai.contact@gmail.com"
diff --git a/README.md b/README.md
index e9facef64d..e08b1d07a8 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Its ambitions are:
 
 ## Features
 > _The codebase is currently under active development._
-> _Please see [the technical highlights](https://docs.monai.io/en/latest/highlights.html) and [What's New in 0.6](https://docs.monai.io/en/latest/whatsnew_0_6.html) of the current milestone release._
+> _Please see [the technical highlights](https://docs.monai.io/en/latest/highlights.html) and [What's New](https://docs.monai.io/en/latest/whatsnew.html) of the current milestone release._
 
 - flexible pre-processing for multi-dimensional medical imaging data;
 - compositional & portable APIs for ease of integration in existing workflows;
diff --git a/docs/images/fast_training.png b/docs/images/fast_training.png
index d0584b9dac..34e47bcb21 100644
Binary files a/docs/images/fast_training.png and b/docs/images/fast_training.png differ
diff --git a/docs/images/nsight_comparison.png b/docs/images/nsight_comparison.png
new file mode 100644
index 0000000000..9b91826513
Binary files /dev/null and b/docs/images/nsight_comparison.png differ
diff --git a/docs/images/threaddataloader.png b/docs/images/threaddataloader.png
new file mode 100644
index 0000000000..565df8d0d4
Binary files /dev/null and b/docs/images/threaddataloader.png differ
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 00dd4d2c1e..cefb47e7e0 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,6 +1,6 @@
 -f https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp37-cp37m-linux_x86_64.whl
 torch>=1.5
-pytorch-ignite==0.4.5
+pytorch-ignite==0.4.6
 numpy>=1.17
 itk>=5.2
 nibabel
@@ -20,3 +20,5 @@ sphinxcontrib-serializinghtml
 sphinx-autodoc-typehints==1.11.1
 pandas
 einops
+transformers
+mlflow
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 324be8a0fd..fe10c546cd 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -107,16 +107,8 @@ def generate_apidocs(*args):
 html_theme_options = {
     "external_links": [{"url": "https://github.com/Project-MONAI/tutorials", "name": "Tutorials"}],
     "icon_links": [
-        {
-            "name": "GitHub",
-            "url": "https://github.com/project-monai/monai",
-            "icon": "fab fa-github-square",
-        },
-        {
-            "name": "Twitter",
-            "url": "https://twitter.com/projectmonai",
-            "icon": "fab fa-twitter-square",
-        },
+        {"name": "GitHub", "url": "https://github.com/project-monai/monai", "icon": "fab fa-github-square"},
+        {"name": "Twitter", "url": "https://twitter.com/projectmonai", "icon": "fab fa-twitter-square"},
     ],
     "collapse_navigation": True,
     "navigation_depth": 3,
diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst
index 5caccc6b4b..d32b6d88e3 100644
--- a/docs/source/handlers.rst
+++ b/docs/source/handlers.rst
@@ -150,11 +150,6 @@ GarbageCollector handler
 .. autoclass:: GarbageCollector
     :members:
 
-Transform inverter
-------------------
-.. autoclass:: TransformInverter
-    :members:
-
 Post processing
 ---------------
 .. autoclass:: PostProcessing
@@ -165,6 +160,11 @@ Decollate batch
 .. autoclass:: DecollateBatch
     :members:
 
+MLFlow handler
+--------------
+.. autoclass:: MLFlowHandler
+    :members:
+
 NVTX Handlers
 -------------
 .. automodule:: monai.handlers.nvtx_handlers
diff --git a/docs/source/highlights.md b/docs/source/highlights.md
index 141c0846d1..2db79b4821 100644
--- a/docs/source/highlights.md
+++ b/docs/source/highlights.md
@@ -16,7 +16,7 @@ The overall architecture and modules are shown in the following figure:
 The rest of this page provides more details for each module.
 
 * [Data I/O, processing and augmentation](#medical-image-data-i-o-processing-and-augmentation)
-* [Datasets](#datasets)
+* [Datasets and DataLoader](#datasets-and-dataloader)
 * [Loss functions](#losses)
 * [Optimizers](#optimizers)
 * [Network architectures](#network-architectures)
@@ -25,7 +25,7 @@ The rest of this page provides more details for each module.
 * [Result writing](#result-writing)
 * [Workflows](#workflows)
 * [Research](#research)
-* [GPU acceleration](#gpu-acceleration)
+* [Performance optimization and GPU acceleration](#performance-optimization-and-gpu-acceleration)
 * [Applications](#applications)
 
 ## Medical image data I/O, processing and augmentation
@@ -56,8 +56,15 @@ transformations. These currently include, for example:
 [2D transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/transforms_demo_2d.ipynb) shows the detailed usage of several MONAI medical image specific transforms.
 ![2d transform examples](../images/medical_transforms.png)
 
-### 3. Fused spatial transforms and GPU acceleration
-As medical image volumes are usually large (in multi-dimensional arrays), pre-processing performance affects the overall pipeline speed. MONAI provides affine transforms to execute fused spatial operations, supports GPU acceleration via native PyTorch for high performance.
+
+### 3. Transforms support both NumPy array and PyTorch Tensor (CPU or GPU accelerated)
+From MONAI v0.7 we introduced PyTorch `Tensor` based computation in transforms, many transforms already support both `NumPy array` and `Tensor` as input types and computational backends. To get the supported backends of every transform, please execute: `python monai/transforms/utils.py`.
+
+To accelerate the transforms, a common approach is to leverage GPU parallel-computation. Users can first convert input data into GPU Tensor by `ToTensor` or `EnsureType` transform, then the following transforms can execute on GPU based on PyTorch `Tensor` APIs.
+GPU transform tutorial is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
+### 4. Fused spatial transforms
+As medical image volumes are usually large (in multi-dimensional arrays), pre-processing performance affects the overall pipeline speed. MONAI provides affine transforms to execute fused spatial operations.
 
 For example:
 ```py
@@ -67,20 +74,21 @@ affine = Affine(
     scale_params=(1.2, 1.2),
     translate_params=(200, 40),
     padding_mode='zeros',
-    device=torch.device('cuda:0')
 )
 # convert the image using bilinear interpolation
 new_img = affine(image, spatial_size=(300, 400), mode='bilinear')
 ```
 Experiments and test results are available at [Fused transforms test](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/transform_speed.ipynb).
 
-Currently, all the geometric image transforms (Spacing, Zoom, Rotate, Resize, etc.) are designed based on the PyTorch native interfaces. [Geometric transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/3d_image_transforms.ipynb) indicates the usage of affine transforms with 3D medical images.
+Currently, all the geometric image transforms (Spacing, Zoom, Rotate, Resize, etc.) are designed based on the PyTorch native interfaces. So all of them support GPU acceleration via `GPU Tensor` operations for high performance.
+
+[Geometric transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/3d_image_transforms.ipynb) indicates the usage of affine transforms with 3D medical images.
 ![3d transform examples](../images/affine.png)
 
-### 4. Randomly crop out batch images based on positive/negative ratio
+### 5. Randomly crop out batch images based on positive/negative ratio
 Medical image data volume may be too large to fit into GPU memory. A widely-used approach is to randomly draw small size data samples during training and run a “sliding window” routine for inference.  MONAI currently provides general random sampling strategies including class-balanced fixed ratio sampling which may help stabilize the patch-based training process. A typical example is in [Spleen 3D segmentation tutorial](https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/spleen_segmentation_3d.ipynb), which achieves the class-balanced sampling with `RandCropByPosNegLabel` transform.
 
-### 5. Deterministic training for reproducibility
+### 6. Deterministic training for reproducibility
 Deterministic training support is necessary and important for deep learning research, especially in the medical field. Users can easily set the random seed to all the random transforms in MONAI locally and will not affect other non-deterministic modules in the user's program.
 
 For example:
@@ -99,16 +107,16 @@ Users can also enable/disable deterministic at the beginning of training program
 monai.utils.set_determinism(seed=0, additional_settings=None)
 ```
 
-### 6. Multiple transform chains
+### 7. Multiple transform chains
 To apply different transforms on the same data and concatenate the results, MONAI provides `CopyItems` transform to make copies of specified items in the data dictionary and `ConcatItems` transform to combine specified items on the expected dimension, and also provides `DeleteItems` transform to delete unnecessary items to save memory.
 
 Typical usage is to scale the intensity of the same image into different ranges and concatenate the results together.
 ![multiple transform chains](../images/multi_transform_chains.png)
 
-### 7. Debug transforms with DataStats
+### 8. Debug transforms with DataStats
 When transforms are combined with the "compose" function, it's not easy to track the output of a specific transform. To help debug errors in the composed transforms, MONAI provides utility transforms such as `DataStats` to print out intermediate data properties such as `data shape`, `value range`, `data value`, `Additional information`, etc. It's a self-contained transform and can be integrated into any transform chain.
 
-### 8. Post-processing transforms for model output
+### 9. Post-processing transforms for model output
 MONAI also provides post-processing transforms for handling the model outputs. Currently, the transforms include:
 - Adding an activation layer (Sigmoid, Softmax, etc.).
 - Converting to discrete values (Argmax, One-Hot, Threshold value, etc), as below figure (b).
@@ -119,12 +127,19 @@ MONAI also provides post-processing transforms for handling the model outputs. C
 After decollating the batch data of model output and applying the post-processing transforms, it's easier to compute metrics, save model output into files or visualize data in the TensorBoard. [Postprocessing transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/postprocessing_transforms.ipynb) shows an example with several main transforms for post-processing.
 ![post-processing transforms](../images/postprocessing_transforms.png)
 
-### 9. Integrate third-party transforms
+### 10. Integrate third-party transforms
 The design of MONAI transforms emphasis code readability and usability. It works for array data or dictionary-based data. MONAI also provides `Adaptor` tools to accommodate different data format for 3rd party transforms. To convert the data shapes or types, utility transforms such as `ToTensor`, `ToNumpy`, `SqueezeDim` are also provided. So it's easy to enhance the transform chain by seamlessly integrating transforms from external packages, including: `ITK`, `BatchGenerator`, `TorchIO` and `Rising`.
 
 For more details, please check out the tutorial: [integrate 3rd party transforms into MONAI program](https://github.com/Project-MONAI/tutorials/blob/master/modules/integrate_3rd_party_transforms.ipynb).
 
-### 10. IO factory for medical image formats
+In digital pathology training, due to the immense burden of loading images, the CPU is preoccupied by loading images and cannot catch up with preparing the data. This causes the pipeline to become IO bound and results in under-utilization of GPU. To overcome this bottleneck, [cuCIM](https://github.com/rapidsai/cucim) has implemented an optimized version of several common transforms that we are using in digital pathology pipeline. These transforms are natively being run on GPU and act on CuPy arrays. MONAI provides `CuCIM` and `RandCuCIM` adapters to integrate the `cuCIM` library. For instance:
+```py
+RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04)
+CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)
+```
+It has shown a significant speed up in pathology training metastasis detection model.
+
+### 11. IO factory for medical image formats
 Many popular image formats exist in the medical domain, and they are quite different with rich metadata information. To easily handle different medical image formats in the same pipeline, [MONAI provides `LoadImage` transform](https://github.com/Project-MONAI/tutorials/blob/master/modules/load_medical_images.ipynb), which can automatically choose image readers based on the supported suffixes and in the following priority order:
 - User-specified reader at runtime when calling this loader.
 - Registered readers from the latest to the first in the list.
@@ -134,13 +149,13 @@ The `ImageReader` API is quite straightforward, users can easily extend it for t
 
 With these pre-defined image readers, MONAI can load images in formats: `NIfTI`, `DICOM`, `PNG`, `JPG`, `BMP`, `NPY/NPZ`, etc.
 
-### 11. Save transform data into NIfTI or PNG files
+### 12. Save transform data into NIfTI or PNG files
 To convert images into files or debug the transform chain, MONAI provides `SaveImage` transform. Users can inject this transform into the transform chain to save the results.
 
-### 12. Automatically ensure `channel-first` data shape
+### 13. Automatically ensure `channel-first` data shape
 Medical images have different shape formats. They can be `channel-last`, `channel-first` or even `no-channel`. We may, for example, want to load several `no-channel` images and stack them as `channel-first` data. To improve the user experience, MONAI provided an `EnsureChannelFirst` transform to automatically detect data shape according to the meta information and convert it to the `channel-first` format consistently.
 
-### 13. Invert spatial transforms and test-time augmentations
+### 14. Invert spatial transforms and test-time augmentations
 It is often desirable to invert the previously applied spatial transforms (resize, flip, rotate, zoom, crop, pad, etc.) within the deep learning workflows, for example, to resume to the original imaging space after processing the image data in a normalized data space.  Many spatial transforms are enhanced with an `inverse` operation since in v0.5. The [model inference tutorial](https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/torch/unet_inference_dict.py) shows a basic example.
 
 If the pipeline includes random transformations, users may want to observe the effect that these transformations have on the output. The typical approach is that we pass the same input through the transforms multiple times with different random realizations. Then use the inverse transforms to move all the results to a common space, and calculate the metrics. MONAI provided `TestTimeAugmentation` for this feature, which by default will calculate the `mode`, `mean`, `standard deviation` and `volume variation coefficient`.
@@ -153,7 +168,7 @@ If the pipeline includes random transformations, users may want to observe the e
 (2) The TTA results of `mode`, `mean` and `standard deviation`:
 ![test time augmentation](../images/tta.png)
 
-## Datasets
+## Datasets and DataLoader
 ### 1. Cache IO and transforms data to accelerate training
 Users often need to train the model with many (potentially thousands of) epochs over the data to achieve the desired model quality. A native PyTorch implementation may repeatedly load data and run the same preprocessing steps for every epoch during training, which can be time-consuming and unnecessary, especially when the medical image volumes are large.
 
@@ -221,6 +236,11 @@ The `partition_dataset` utility in MONAI can perform different types of partitio
 CSV tables are often used in additional to image data to incorporate adjunct information, such as patient demographics, lab results, image acquisition parameters and other non-image data, MONAI provides `CSVDataset` to load CSV files and `CSVIterableDataset` to load large CSV files with scalable data access.
 In addition to the regular preprocessing transform while loading, it also supports multiple CSV files loading, joining tables, rows and columns selection and grouping. [CSVDatasets tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/csv_datasets.ipynb) shows detailed usage examples.
 
+### 9. `ThreadDataLoader` vs. `DataLoader`
+If the transforms are light-weighted, especially when we cache all the data in RAM, the multiprocessing of PyTorch `DataLoader` may cause unnecessary IPC time and cause the drop of GPU utilization after every epoch. MONAI provides `ThreadDataLoader` which executes the transforms in a separate thread:
+![threaddataloader](../images/threaddataloader.png)
+a `ThreadDataLoader` example is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
 ## Losses
 There are domain-specific loss functions in the medical imaging research which are not typically used in generic computer vision tasks. As an important module of MONAI, these loss functions are implemented in PyTorch, such as `DiceLoss`, `GeneralizedDiceLoss`, `MaskedDiceLoss`, `TverskyLoss`, `FocalLoss`, `DiceCELoss`, and `DiceFocalLoss`, etc.
 
@@ -249,7 +269,7 @@ add_module('conv1', conv_type(in_channels, out_channels, kernel_size=1, bias=Fal
 ```
 
 ### 2. Implementation of generic 2D/3D networks
-And there are several 1D/2D/3D-compatible implementations of intermediate blocks and generic networks, such as UNet, DynUNet, DenseNet, GAN, AHNet, VNet, SENet(and SEResNet, SEResNeXt), SegResNet, EfficientNet, Attention-based networks. All the networks can support PyTorch serialization pipeline based on `torch.jit.script`.
+And there are several 1D/2D/3D-compatible implementations of intermediate blocks and generic networks, such as UNet, DynUNet, DenseNet, GAN, AHNet, VNet, SENet(and SEResNet, SEResNeXt), SegResNet, EfficientNet, Attention-based transformer networks. All the networks can support PyTorch serialization pipeline based on `torch.jit.script`.
 
 ### 3. Network adapter to finetune final layers
 Instead of training from scratch, we often leverage the existing models, and finetune the final layers of a network for new learning tasks. MONAI provides a `NetAdapter` to easily replace the last layer of a model by a convolutional layer or a fully-connected layer. A typical usage example is to adapt [Torchvision models trained with ImageNet](https://pytorch.org/vision/stable/models.html) for other learning tasks.
@@ -366,10 +386,15 @@ G. Wang, X. Liu, C. Li, Z. Xu, J. Ruan, H. Zhu, T. Meng, K. Li, N. Huang, S. Zha
 Wentao Zhu, Can Zhao, Wenqi Li, Holger Roth, Ziyue Xu, and Daguang Xu (2020) "LAMP: Large Deep Nets with Automated Model Parallelism for Image Segmentation." MICCAI 2020 (Early Accept, paper link: https://arxiv.org/abs/2006.12575)
 ![LAMP UNet](../images/unet-pipe.png)
 
-## GPU acceleration
+## Performance optimization and GPU acceleration
+Typically, model training is a time-consuming step during deep learning development, especially in medical imaging applications. Volumetric medical images are usually large (as multi-dimensional arrays) and the model training process can be complex. Even with powerful hardware (e.g. CPU/GPU with large RAM), it is not easy to fully leverage them to achieve high performance. MONAI provides a fast training guide to achieve the best performance: https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md.
+
 NVIDIA GPUs have been widely applied in many areas of deep learning training and evaluation, and the CUDA parallel computation shows obvious acceleration when comparing to traditional computation methods. To fully leverage GPU features, many popular mechanisms raised, like automatic mixed precision (AMP), distributed data parallel, etc. MONAI can support these features and provides rich examples.
 
-### 1. Auto mixed precision(AMP)
+### 1. Profiling the pipelines
+First of all, MONAI provides several methods based on `DLProf`, `Nsight`, `NVTX` and `NVML` for users to analyze their programs to identify the performance bottleneck. The analyses include operation-based GPU activity and overall GPU activity during model training. They will greatly help users manage computing bottlenecks and provide insights for the area to be improved for better computing efficiency. The detailed example is shown in the [performance profiling tutorial]( https://github.com/Project-MONAI/tutorials/blob/master/performance_profiling/profiling_train_base_nvtx.ipynb).
+
+### 2. Auto mixed precision(AMP)
 In 2017, NVIDIA researchers developed a methodology for mixed-precision training, which combined single-precision (FP32) with half-precision (e.g. FP16) format when training a network, and it achieved the same accuracy as FP32 training using the same hyperparameters.
 
 For the PyTorch 1.6 release, developers at NVIDIA and Facebook moved mixed precision functionality into PyTorch core as the AMP package, `torch.cuda.amp`.
@@ -379,16 +404,16 @@ MONAI workflows can easily set `amp=True/False` in `SupervisedTrainer` or `Super
 We also executed the same test program on NVIDIA A100 GPU with the same software environment, obtained faster results:
 ![amp a100 results](../images/amp_training_a100.png)
 More details is available at [AMP training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/automatic_mixed_precision.ipynb).
-We also tried to combine AMP with `CacheDataset` and `Novograd` optimizer to achieve the fast training in MONAI, able to obtain approximately 12x speedup compared with a Pytorch native implementation when the training converges at a validation mean dice of 0.93. Benchmark for reference:
+We also tried to combine `AMP` with `CacheDataset`, `GPU cache`, `GPU transforms`, `ThreadDataLoader`, `DiceCE` loss function and `Novograd` optimizer to achieve the fast training in MONAI, able to obtain approximately `200x` speedup compared with a Pytorch native implementation when the training converges at a validation mean dice of `0.95`. Benchmark for reference:
 ![fast training results](../images/fast_training.png)
 More details is available at [Fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
 
-### 2. Distributed data parallel
+### 3. Distributed data parallel
 Distributed data parallel is an important feature of PyTorch to connect multiple GPU devices on single or multiple nodes to train or evaluate models. The distributed data parallel APIs of MONAI are compatible with native PyTorch distributed module, pytorch-ignite distributed module, Horovod, XLA, and the SLURM platform. MONAI provides demos for reference: train/evaluate with PyTorch DDP, train/evaluate with Horovod, train/evaluate with Ignite DDP, partition dataset and train with SmartCacheDataset, as well as a real world training example based on Decathlon challenge Task01 - Brain Tumor segmentation.  The demo contains distributed caching, training, and validation. We obtained performance benchmarks for reference (based on PyTorch 1.6, CUDA 11, NVIDIA V100 GPUs):
 
 ![distributed training results](../images/distributed_training.png)
 
-### 3. C++/CUDA optimized modules
+### 4. C++/CUDA optimized modules
 To further accelerate the domain-specific routines in the workflows, MONAI C++/CUDA implementation are introduced as extensions of the PyTorch native implementations.
 MONAI provides the modules using [the two ways of building C++ extensions from PyTorch](https://pytorch.org/tutorials/advanced/cpp_extension.html#custom-c-and-cuda-extensions):
 - via `setuptools`, for modules including `Resampler`, `Conditional random field (CRF)`, `Fast bilateral filtering using the permutohedral lattice`.
@@ -396,6 +421,26 @@ MONAI provides the modules using [the two ways of building C++ extensions from P
 The following figure shows results of MONAI's Gaussian mixture models applied to tissue and surgical tools segmentation:
 ![Gaussian mixture models as a postprocessing step](../images/gmm_feature_set_comparison_s.png)
 
+### 5. Cache IO and transforms data to GPU memory
+Even with `CacheDataset`, we usually need to copy the same data to GPU memory for GPU random transforms or network computation in every epoch. An efficient approach is to cache the data to GPU memory directly, then every epoch can start from GPU computation immediately.
+
+For example:
+```py
+train_transforms = [
+    LoadImaged(...),
+    AddChanneld(...),
+    Spacingd(...),
+    Orientationd(...),
+    ScaleIntensityRanged(...),
+    EnsureTyped(..., data_type="tensor"),
+    ToDeviced(..., device="cuda:0"),
+    RandCropByPosNegLabeld(...),
+]
+dataset = CacheDataset(..., transform=train_trans)
+```
+Here we convert to PyTorch `Tensor` with `EnsureTyped` transform and move data to GPU with `ToDeviced` transform. `CacheDataset` caches the transform results until `ToDeviced`, so it is in GPU memory. Then in every epoch, the program fetches cached data from GPU memory and only executes the random transform `RandCropByPosNegLabeld` on GPU directly.
+GPU caching example is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
 ## Applications
 The research area of medical image deep learning is expanding fast. To apply the latest achievements into applications, MONAI contains many application components to build end-to-end solutions or prototypes for other similar use cases.
 
@@ -417,3 +462,8 @@ Starting from v0.5.0, MONAI provides experimental features for building learning
 The following figure shows the registration of CT images acquired at different time points for a single patient using MONAI:
 
 ![3d registration](../images/3d_paired.png)
+
+### 4. Reproducing the state-of-the-art Kaggle competition solutions
+[A reimplementation](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution) of the 4th place solution of RANZCR CLiP - Catheter and Line Position Challenge in Kaggle: https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification
+
+The original solution is produced by Team Watercooled, and the authors are Dieter (https://www.kaggle.com/christofhenkel) and Psi (https://www.kaggle.com/philippsinger).
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 08ab109142..6936c0bf49 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -174,9 +174,9 @@ Since MONAI v0.2.0, the extras syntax such as `pip install 'monai[nibabel]'` is
 
 - The options are
 ```
-[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops]
+[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow]
 ```
 which correspond to `nibabel`, `scikit-image`, `pillow`, `tensorboard`,
-`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas` and `einops`, respectively.
+`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers` and `mlflow`, respectively.
 
 - `pip install 'monai[all]'` installs all the optional dependencies.
diff --git a/docs/source/networks.rst b/docs/source/networks.rst
index 54c2756535..36d62752d4 100644
--- a/docs/source/networks.rst
+++ b/docs/source/networks.rst
@@ -500,6 +500,11 @@ Nets
 .. autoclass:: Critic
   :members:
 
+`Transchex`
+~~~~~~~~~~~~~~~~
+.. autoclass:: Transchex
+  :members:
+
 `NetAdapter`
 ~~~~~~~~~~~~
 .. autoclass:: NetAdapter
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index b8f57e0dbe..804346b290 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -53,80 +53,115 @@ Generic Interfaces
 .. autoclass:: Decollated
     :members:
 
+`OneOf`
+^^^^^^^
+.. autoclass:: OneOf
+    :members:
+
 Vanilla Transforms
 ------------------
 
 Crop and Pad
 ^^^^^^^^^^^^
 
+`Pad`
+"""""
+.. autoclass:: Pad
+    :members:
+    :special-members: __call__
+
 `SpatialPad`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialPad.png
+    :alt: example of SpatialPad
 .. autoclass:: SpatialPad
     :members:
     :special-members: __call__
 
 `BorderPad`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/BorderPad.png
+    :alt: example of BorderPad
 .. autoclass:: BorderPad
     :members:
     :special-members: __call__
 
 `DivisiblePad`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/DivisiblePad.png
+    :alt: example of DivisiblePad
 .. autoclass:: DivisiblePad
     :members:
     :special-members: __call__
 
 `SpatialCrop`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialCrop.png
+    :alt: example of SpatialCrop
 .. autoclass:: SpatialCrop
     :members:
     :special-members: __call__
 
 `CenterSpatialCrop`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterSpatialCrop.png
+    :alt: example of CenterSpatialCrop
 .. autoclass:: CenterSpatialCrop
     :members:
     :special-members: __call__
 
 `RandSpatialCrop`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCrop.png
+    :alt: example of RandSpatialCrop
 .. autoclass:: RandSpatialCrop
     :members:
     :special-members: __call__
 
 `RandSpatialCropSamples`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropSamples.png
+    :alt: example of RandSpatialCropSamples
 .. autoclass:: RandSpatialCropSamples
     :members:
     :special-members: __call__
 
 `CropForeground`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CropForeground.png
+    :alt: example of CropForeground
 .. autoclass:: CropForeground
     :members:
     :special-members: __call__
 
 `RandWeightedCrop`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandWeightedCrop.png
+    :alt: example of RandWeightedCrop
 .. autoclass:: RandWeightedCrop
     :members:
     :special-members: __call__
 
 `RandCropByPosNegLabel`
 """""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByPosNegLabel.png
+    :alt: example of RandCropByPosNegLabel
 .. autoclass:: RandCropByPosNegLabel
     :members:
     :special-members: __call__
 
 `RandCropByLabelClasses`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByLabelClasses.png
+    :alt: example of RandCropByLabelClasses
 .. autoclass:: RandCropByLabelClasses
     :members:
     :special-members: __call__
 
 `ResizeWithPadOrCrop`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ResizeWithPadOrCrop.png
+    :alt: example of ResizeWithPadOrCrop
 .. autoclass:: ResizeWithPadOrCrop
     :members:
     :special-members: __call__
@@ -139,12 +174,16 @@ Crop and Pad
 
 `RandScaleCrop`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleCrop.png
+    :alt: example of RandScaleCrop
 .. autoclass:: RandScaleCrop
     :members:
     :special-members: __call__
 
 `CenterScaleCrop`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterScaleCrop.png
+    :alt: example of CenterScaleCrop
 .. autoclass:: CenterScaleCrop
     :members:
     :special-members: __call__
@@ -154,90 +193,120 @@ Intensity
 
 `RandGaussianNoise`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianNoise.png
+    :alt: example of RandGaussianNoise
 .. autoclass:: RandGaussianNoise
     :members:
     :special-members: __call__
 
 `ShiftIntensity`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ShiftIntensity.png
+    :alt: example of ShiftIntensity
 .. autoclass:: ShiftIntensity
     :members:
     :special-members: __call__
 
 `RandShiftIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandShiftIntensity.png
+    :alt: example of RandShiftIntensity
 .. autoclass:: RandShiftIntensity
     :members:
     :special-members: __call__
 
 `StdShiftIntensity`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/StdShiftIntensity.png
+    :alt: example of StdShiftIntensity
 .. autoclass:: StdShiftIntensity
     :members:
     :special-members: __call__
 
 `RandStdShiftIntensity`
 """""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandStdShiftIntensity.png
+    :alt: example of RandStdShiftIntensity
 .. autoclass:: RandStdShiftIntensity
     :members:
     :special-members: __call__
 
 `RandBiasField`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandBiasField.png
+    :alt: example of RandBiasField
 .. autoclass:: RandBiasField
     :members:
     :special-members: __call__
 
 `ScaleIntensity`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensity.png
+    :alt: example of ScaleIntensity
 .. autoclass:: ScaleIntensity
     :members:
     :special-members: __call__
 
 `RandScaleIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleIntensity.png
+    :alt: example of RandScaleIntensity
 .. autoclass:: RandScaleIntensity
     :members:
     :special-members: __call__
 
 `NormalizeIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/NormalizeIntensity.png
+    :alt: example of NormalizeIntensity
 .. autoclass:: NormalizeIntensity
     :members:
     :special-members: __call__
 
 `ThresholdIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ThresholdIntensity.png
+    :alt: example of ThresholdIntensity
 .. autoclass:: ThresholdIntensity
     :members:
     :special-members: __call__
 
 `ScaleIntensityRange`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRange.png
+    :alt: example of ScaleIntensityRange
 .. autoclass:: ScaleIntensityRange
     :members:
     :special-members: __call__
 
 `ScaleIntensityRangePercentiles`
 """"""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRangePercentiles.png
+    :alt: example of ScaleIntensityRangePercentiles
 .. autoclass:: ScaleIntensityRangePercentiles
     :members:
     :special-members: __call__
 
 `AdjustContrast`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AdjustContrast.png
+    :alt: example of AdjustContrast
 .. autoclass:: AdjustContrast
     :members:
     :special-members: __call__
 
 `RandAdjustContrast`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAdjustContrast.png
+    :alt: example of RandAdjustContrast
 .. autoclass:: RandAdjustContrast
     :members:
     :special-members: __call__
 
 `MaskIntensity`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/MaskIntensity.png
+    :alt: example of MaskIntensity
 .. autoclass:: MaskIntensity
     :members:
     :special-members: __call__
@@ -250,30 +319,40 @@ Intensity
 
 `GaussianSmooth`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSmooth.png
+    :alt: example of GaussianSmooth
 .. autoclass:: GaussianSmooth
     :members:
     :special-members: __call__
 
 `RandGaussianSmooth`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSmooth.png
+    :alt: example of RandGaussianSmooth
 .. autoclass:: RandGaussianSmooth
     :members:
     :special-members: __call__
 
 `GaussianSharpen`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSharpen.png
+    :alt: example of GaussianSharpen
 .. autoclass:: GaussianSharpen
     :members:
     :special-members: __call__
 
 `RandGaussianSharpen`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSharpen.png
+    :alt: example of RandGaussianSharpen
 .. autoclass:: RandGaussianSharpen
     :members:
     :special-members: __call__
 
 `RandHistogramShift`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandHistogramShift.png
+    :alt: example of RandHistogramShift
 .. autoclass:: RandHistogramShift
     :members:
     :special-members: __call__
@@ -286,43 +365,63 @@ Intensity
 
 `GibbsNoise`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GibbsNoise.png
+    :alt: example of GibbsNoise
 .. autoclass:: GibbsNoise
     :members:
     :special-members: __call__
 
 `RandGibbsNoise`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGibbsNoise.png
+    :alt: example of RandGibbsNoise
 .. autoclass:: RandGibbsNoise
     :members:
     :special-members: __call__
 
 `KSpaceSpikeNoise`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KSpaceSpikeNoise.png
+    :alt: example of KSpaceSpikeNoise
 .. autoclass:: KSpaceSpikeNoise
     :members:
     :special-members: __call__
 
 `RandKSpaceSpikeNoise`
 """"""""""""""""""""""
- .. autoclass:: RandKSpaceSpikeNoise
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandKSpaceSpikeNoise.png
+    :alt: example of RandKSpaceSpikeNoise
+.. autoclass:: RandKSpaceSpikeNoise
+    :members:
+    :special-members: __call__
+
+`RandCoarseTransform`
+"""""""""""""""""""""
+.. autoclass:: RandCoarseTransform
+    :members:
+    :special-members: __call__
 
 `RandCoarseDropout`
 """""""""""""""""""
- .. autoclass:: RandCoarseDropout
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseDropout.png
+    :alt: example of RandCoarseDropout
+.. autoclass:: RandCoarseDropout
+    :members:
+    :special-members: __call__
+
+`RandCoarseShuffle`
+"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseShuffle.png
+    :alt: example of RandCoarseShuffle
+.. autoclass:: RandCoarseShuffle
+    :members:
+    :special-members: __call__
 
 `HistogramNormalize`
 """"""""""""""""""""
- .. autoclass:: HistogramNormalize
-     :members:
-     :special-members: __call__
-
-`LocalPatchShuffling`
-"""""""""""""""""""""
-.. autoclass:: LocalPatchShuffling
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/HistogramNormalize.png
+    :alt: example of HistogramNormalize
+.. autoclass:: HistogramNormalize
     :members:
     :special-members: __call__
 
@@ -381,18 +480,24 @@ Post-processing
 
 `AsDiscrete`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AsDiscrete.png
+    :alt: example of AsDiscrete
 .. autoclass:: AsDiscrete
     :members:
     :special-members: __call__
 
 `KeepLargestConnectedComponent`
 """""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KeepLargestConnectedComponent.png
+    :alt: example of KeepLargestConnectedComponent
 .. autoclass:: KeepLargestConnectedComponent
     :members:
     :special-members: __call__
 
 `LabelFilter`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelFilter.png
+    :alt: example of LabelFilter
 .. autoclass:: LabelFilter
     :members:
     :special-members: __call__
@@ -405,6 +510,8 @@ Post-processing
 
 `LabelToContour`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelToContour.png
+    :alt: example of LabelToContour
 .. autoclass:: LabelToContour
     :members:
     :special-members: __call__
@@ -431,42 +538,56 @@ Spatial
 
 `Spacing`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Spacing.png
+    :alt: example of Spacing
 .. autoclass:: Spacing
     :members:
     :special-members: __call__
 
 `Orientation`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Orientation.png
+    :alt: example of Orientation
 .. autoclass:: Orientation
     :members:
     :special-members: __call__
 
 `RandRotate`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate.png
+    :alt: example of RandRotate
 .. autoclass:: RandRotate
     :members:
     :special-members: __call__
 
 `RandFlip`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandFlip.png
+    :alt: example of RandFlip
 .. autoclass:: RandFlip
     :members:
     :special-members: __call__
 
 `RandAxisFlip`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAxisFlip.png
+    :alt: example of RandAxisFlip
 .. autoclass:: RandAxisFlip
     :members:
     :special-members: __call__
 
 `RandZoom`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandZoom.png
+    :alt: example of RandZoom
 .. autoclass:: RandZoom
     :members:
     :special-members: __call__
 
 `Affine`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Affine.png
+    :alt: example of Affine
 .. autoclass:: Affine
     :members:
     :special-members: __call__
@@ -479,6 +600,8 @@ Spatial
 
 `RandAffine`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAffine.png
+    :alt: example of RandAffine
 .. autoclass:: RandAffine
     :members:
     :special-members: __call__
@@ -501,50 +624,80 @@ Spatial
     :members:
     :special-members: __call__
 
+`GridDistortion`
+""""""""""""""""
+.. autoclass:: GridDistortion
+    :members:
+    :special-members: __call__
+
+`RandGridDistortion`
+""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGridDistortion.png
+    :alt: example of RandGridDistortion
+.. autoclass:: RandGridDistortion
+    :members:
+    :special-members: __call__
+
 `Rand2DElastic`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand2DElastic.png
+    :alt: example of Rand2DElastic
 .. autoclass:: Rand2DElastic
     :members:
     :special-members: __call__
 
 `Rand3DElastic`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand3DElastic.png
+    :alt: example of Rand3DElastic
 .. autoclass:: Rand3DElastic
     :members:
     :special-members: __call__
 
 `Rotate90`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate90.png
+    :alt: example of Rotate90
 .. autoclass:: Rotate90
     :members:
     :special-members: __call__
 
 `RandRotate90`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate90.png
+    :alt: example of RandRotate90
 .. autoclass:: RandRotate90
     :members:
     :special-members: __call__
 
 `Flip`
 """"""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Flip.png
+    :alt: example of Flip
 .. autoclass:: Flip
     :members:
     :special-members: __call__
 
 `Resize`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Resize.png
+    :alt: example of Resize
 .. autoclass:: Resize
     :members:
     :special-members: __call__
 
 `Rotate`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate.png
+    :alt: example of Rotate
 .. autoclass:: Rotate
     :members:
     :special-members: __call__
 
 `Zoom`
 """"""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Zoom.png
+    :alt: example of Zoom
 .. autoclass:: Zoom
     :members:
     :special-members: __call__
@@ -711,16 +864,28 @@ Utility
 
 `IntensityStats`
 """"""""""""""""
- .. autoclass:: IntensityStats
-     :members:
-     :special-members: __call__
+.. autoclass:: IntensityStats
+    :members:
+    :special-members: __call__
 
 `ToDevice`
 """"""""""
- .. autoclass:: ToDevice
+.. autoclass:: ToDevice
      :members:
      :special-members: __call__
 
+`CuCIM`
+"""""""
+.. autoclass:: CuCIM
+    :members:
+    :special-members: __call__
+
+`RandCuCIM`
+"""""""""""
+.. autoclass:: RandCuCIM
+    :members:
+    :special-members: __call__
+
 
 Dictionary Transforms
 ---------------------
@@ -730,72 +895,96 @@ Crop and Pad (Dict)
 
 `SpatialPadd`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialPadd.png
+    :alt: example of SpatialPadd
 .. autoclass:: SpatialPadd
     :members:
     :special-members: __call__
 
 `BorderPadd`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/BorderPadd.png
+    :alt: example of BorderPadd
 .. autoclass:: BorderPadd
     :members:
     :special-members: __call__
 
 `DivisiblePadd`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/DivisiblePadd.png
+    :alt: example of DivisiblePadd
 .. autoclass:: DivisiblePadd
     :members:
     :special-members: __call__
 
 `SpatialCropd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialCropd.png
+    :alt: example of SpatialCropd
 .. autoclass:: SpatialCropd
     :members:
     :special-members: __call__
 
 `CenterSpatialCropd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterSpatialCropd.png
+    :alt: example of CenterSpatialCropd
 .. autoclass:: CenterSpatialCropd
     :members:
     :special-members: __call__
 
 `RandSpatialCropd`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropd.png
+    :alt: example of RandSpatialCropd
 .. autoclass:: RandSpatialCropd
     :members:
     :special-members: __call__
 
 `RandSpatialCropSamplesd`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropSamplesd.png
+    :alt: example of RandSpatialCropSamplesd
 .. autoclass:: RandSpatialCropSamplesd
     :members:
     :special-members: __call__
 
 `CropForegroundd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CropForegroundd.png
+    :alt: example of CropForegroundd
 .. autoclass:: CropForegroundd
     :members:
     :special-members: __call__
 
 `RandWeightedCropd`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandWeightedCropd.png
+    :alt: example of RandWeightedCropd
 .. autoclass:: RandWeightedCropd
     :members:
     :special-members: __call__
 
 `RandCropByPosNegLabeld`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByPosNegLabeld.png
+    :alt: example of RandCropByPosNegLabeld
 .. autoclass:: RandCropByPosNegLabeld
     :members:
     :special-members: __call__
 
 `RandCropByLabelClassesd`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByLabelClassesd.png
+    :alt: example of RandCropByLabelClassesd
 .. autoclass:: RandCropByLabelClassesd
     :members:
     :special-members: __call__
 
 `ResizeWithPadOrCropd`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ResizeWithPadOrCropd.png
+    :alt: example of ResizeWithPadOrCropd
 .. autoclass:: ResizeWithPadOrCropd
     :members:
     :special-members: __call__
@@ -808,12 +997,16 @@ Crop and Pad (Dict)
 
 `RandScaleCropd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleCropd.png
+    :alt: example of RandScaleCropd
 .. autoclass:: RandScaleCropd
     :members:
     :special-members: __call__
 
 `CenterScaleCropd`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterScaleCropd.png
+    :alt: example of CenterScaleCropd
 .. autoclass:: CenterScaleCropd
     :members:
     :special-members: __call__
@@ -823,159 +1016,219 @@ Intensity (Dict)
 
 `RandGaussianNoised`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianNoised.png
+    :alt: example of RandGaussianNoised
 .. autoclass:: RandGaussianNoised
     :members:
     :special-members: __call__
 
 `ShiftIntensityd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ShiftIntensityd.png
+    :alt: example of ShiftIntensityd
 .. autoclass:: ShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandShiftIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandShiftIntensityd.png
+    :alt: example of RandShiftIntensityd
 .. autoclass:: RandShiftIntensityd
     :members:
     :special-members: __call__
 
 `StdShiftIntensityd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/StdShiftIntensityd.png
+    :alt: example of StdShiftIntensityd
 .. autoclass:: StdShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandStdShiftIntensityd`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandStdShiftIntensityd.png
+    :alt: example of RandStdShiftIntensityd
 .. autoclass:: RandStdShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandBiasFieldd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandBiasFieldd.png
+    :alt: example of RandBiasFieldd
 .. autoclass:: RandBiasFieldd
     :members:
     :special-members: __call__
 
 `ScaleIntensityd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityd.png
+    :alt: example of ScaleIntensityd
 .. autoclass:: ScaleIntensityd
     :members:
     :special-members: __call__
 
 `RandScaleIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleIntensityd.png
+    :alt: example of RandScaleIntensityd
 .. autoclass:: RandScaleIntensityd
     :members:
     :special-members: __call__
 
 `NormalizeIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/NormalizeIntensityd.png
+    :alt: example of NormalizeIntensityd
 .. autoclass:: NormalizeIntensityd
     :members:
     :special-members: __call__
 
 `ThresholdIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ThresholdIntensityd.png
+    :alt: example of ThresholdIntensityd
 .. autoclass:: ThresholdIntensityd
     :members:
     :special-members: __call__
 
 `ScaleIntensityRanged`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRanged.png
+    :alt: example of ScaleIntensityRanged
 .. autoclass:: ScaleIntensityRanged
     :members:
     :special-members: __call__
 
 `GibbsNoised`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GibbsNoised.png
+    :alt: example of GibbsNoised
 .. autoclass:: GibbsNoised
     :members:
     :special-members: __call__
 
 `RandGibbsNoised`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGibbsNoised.png
+    :alt: example of RandGibbsNoised
 .. autoclass:: RandGibbsNoised
     :members:
     :special-members: __call__
 
 `KSpaceSpikeNoised`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KSpaceSpikeNoised.png
+    :alt: example of KSpaceSpikeNoised
 .. autoclass:: KSpaceSpikeNoised
     :members:
     :special-members: __call__
 
 `RandKSpaceSpikeNoised`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandKSpaceSpikeNoised.png
+    :alt: example of RandKSpaceSpikeNoised
 .. autoclass:: RandKSpaceSpikeNoised
     :members:
     :special-members: __call__
 
 `ScaleIntensityRangePercentilesd`
 """""""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRangePercentilesd.png
+    :alt: example of ScaleIntensityRangePercentilesd
 .. autoclass:: ScaleIntensityRangePercentilesd
     :members:
     :special-members: __call__
 
 `AdjustContrastd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AdjustContrastd.png
+    :alt: example of AdjustContrastd
 .. autoclass:: AdjustContrastd
     :members:
     :special-members: __call__
 
 `RandAdjustContrastd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAdjustContrastd.png
+    :alt: example of RandAdjustContrastd
 .. autoclass:: RandAdjustContrastd
     :members:
     :special-members: __call__
 
 `MaskIntensityd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/MaskIntensityd.png
+    :alt: example of MaskIntensityd
 .. autoclass:: MaskIntensityd
     :members:
     :special-members: __call__
 
 `GaussianSmoothd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSmoothd.png
+    :alt: example of GaussianSmoothd
 .. autoclass:: GaussianSmoothd
     :members:
     :special-members: __call__
 
 `RandGaussianSmoothd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSmoothd.png
+    :alt: example of RandGaussianSmoothd
 .. autoclass:: RandGaussianSmoothd
     :members:
     :special-members: __call__
 
 `GaussianSharpend`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSharpend.png
+    :alt: example of GaussianSharpend
 .. autoclass:: GaussianSharpend
     :members:
     :special-members: __call__
 
 `RandGaussianSharpend`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSharpend.png
+    :alt: example of RandGaussianSharpend
 .. autoclass:: RandGaussianSharpend
     :members:
     :special-members: __call__
 
 `RandHistogramShiftd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandHistogramShiftd.png
+    :alt: example of RandHistogramShiftd
 .. autoclass:: RandHistogramShiftd
     :members:
     :special-members: __call__
 
 `RandCoarseDropoutd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseDropoutd.png
+    :alt: example of RandCoarseDropoutd
 .. autoclass:: RandCoarseDropoutd
     :members:
     :special-members: __call__
 
+`RandCoarseShuffled`
+""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseShuffled.png
+    :alt: example of RandCoarseShuffled
+.. autoclass:: RandCoarseShuffled
+    :members:
+    :special-members: __call__
+
 `HistogramNormalized`
 """""""""""""""""""""
- .. autoclass:: HistogramNormalized
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/HistogramNormalized.png
+    :alt: example of HistogramNormalized
+.. autoclass:: HistogramNormalized
+    :members:
+    :special-members: __call__
 
 
 IO (Dict)
@@ -1004,18 +1257,24 @@ Post-processing (Dict)
 
 `AsDiscreted`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AsDiscreted.png
+    :alt: example of AsDiscreted
 .. autoclass:: AsDiscreted
     :members:
     :special-members: __call__
 
 `KeepLargestConnectedComponentd`
 """"""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KeepLargestConnectedComponentd.png
+    :alt: example of KeepLargestConnectedComponentd
 .. autoclass:: KeepLargestConnectedComponentd
     :members:
     :special-members: __call__
 
 `LabelFilterd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelFilterd.png
+    :alt: example of LabelFilterd
 .. autoclass:: LabelFilterd
     :members:
     :special-members: __call__
@@ -1028,6 +1287,8 @@ Post-processing (Dict)
 
 `LabelToContourd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelToContourd.png
+    :alt: example of LabelToContourd
 .. autoclass:: LabelToContourd
     :members:
     :special-members: __call__
@@ -1067,96 +1328,128 @@ Spatial (Dict)
 
 `Spacingd`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Spacingd.png
+    :alt: example of Spacingd
 .. autoclass:: Spacingd
     :members:
     :special-members: __call__
 
 `Orientationd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Orientationd.png
+    :alt: example of Orientationd
 .. autoclass:: Orientationd
     :members:
     :special-members: __call__
 
 `Flipd`
 """""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Flipd.png
+    :alt: example of Flipd
 .. autoclass:: Flipd
     :members:
     :special-members: __call__
 
 `RandFlipd`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandFlipd.png
+    :alt: example of RandFlipd
 .. autoclass:: RandFlipd
     :members:
     :special-members: __call__
 
 `RandAxisFlipd`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAxisFlipd.png
+    :alt: example of RandAxisFlipd
 .. autoclass:: RandAxisFlipd
     :members:
     :special-members: __call__
 
 `Rotated`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotated.png
+    :alt: example of Rotated
 .. autoclass:: Rotated
     :members:
     :special-members: __call__
 
 `RandRotated`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotated.png
+    :alt: example of RandRotated
 .. autoclass:: RandRotated
     :members:
     :special-members: __call__
 
 `Zoomd`
 """""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Zoomd.png
+    :alt: example of Zoomd
 .. autoclass:: Zoomd
     :members:
     :special-members: __call__
 
 `RandZoomd`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandZoomd.png
+    :alt: example of RandZoomd
 .. autoclass:: RandZoomd
     :members:
     :special-members: __call__
 
 `RandRotate90d`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate90d.png
+    :alt: example of RandRotate90d
 .. autoclass:: RandRotate90d
     :members:
     :special-members: __call__
 
 `Rotate90d`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate90d.png
+    :alt: example of Rotate90d
 .. autoclass:: Rotate90d
     :members:
     :special-members: __call__
 
 `Resized`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Resized.png
+    :alt: example of Resized
 .. autoclass:: Resized
     :members:
     :special-members: __call__
 
 `Affined`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Affined.png
+    :alt: example of Affined
 .. autoclass:: Affined
     :members:
     :special-members: __call__
 
 `RandAffined`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAffined.png
+    :alt: example of RandAffined
 .. autoclass:: RandAffined
     :members:
     :special-members: __call__
 
 `Rand2DElasticd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand2DElasticd.png
+    :alt: example of Rand2DElasticd
 .. autoclass:: Rand2DElasticd
     :members:
     :special-members: __call__
 
 `Rand3DElasticd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand3DElasticd.png
+    :alt: example of Rand3DElasticd
 .. autoclass:: Rand3DElasticd
     :members:
     :special-members: __call__
@@ -1167,6 +1460,20 @@ Spatial (Dict)
     :members:
     :special-members: __call__
 
+`GridDistortiond`
+"""""""""""""""""
+.. autoclass:: GridDistortiond
+    :members:
+    :special-members: __call__
+
+`RandGridDistortiond`
+"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGridDistortiond.png
+    :alt: example of RandGridDistortiond
+.. autoclass:: RandGridDistortiond
+    :members:
+    :special-members: __call__
+
 Utility (Dict)
 ^^^^^^^^^^^^^^
 
@@ -1352,10 +1659,21 @@ Utility (Dict)
 
 `ToDeviced`
 """""""""""
- .. autoclass:: ToDeviced
-     :members:
-     :special-members: __call__
+.. autoclass:: ToDeviced
+    :members:
+    :special-members: __call__
 
+`CuCIMd`
+""""""""
+.. autoclass:: CuCIMd
+    :members:
+    :special-members: __call__
+
+`RandCuCIMd`
+""""""""""""
+.. autoclass:: RandCuCIMd
+    :members:
+    :special-members: __call__
 
 Transform Adaptors
 ------------------
@@ -1377,3 +1695,6 @@ Utilities
 ---------
 .. automodule:: monai.transforms.utils
     :members:
+
+.. automodule:: monai.transforms.utils_pytorch_numpy_unification
+    :members:
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index a9aea7932b..c97d16de17 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -43,7 +43,7 @@ Profiling
 
 Deprecated
 ----------
-.. automodule:: monai.utils.deprecated
+.. automodule:: monai.utils.deprecate_utils
   :members:
 
 
diff --git a/docs/source/whatsnew.rst b/docs/source/whatsnew.rst
index daed871e14..e1f118cdf6 100644
--- a/docs/source/whatsnew.rst
+++ b/docs/source/whatsnew.rst
@@ -6,5 +6,6 @@ What's New
 .. toctree::
    :maxdepth: 1
 
+   whatsnew_0_7.md
    whatsnew_0_6.md
    whatsnew_0_5.md
diff --git a/docs/source/whatsnew_0_6.md b/docs/source/whatsnew_0_6.md
index bdc419df37..8df0503142 100644
--- a/docs/source/whatsnew_0_6.md
+++ b/docs/source/whatsnew_0_6.md
@@ -1,4 +1,4 @@
-# What's new in 0.6 🎉🎉
+# What's new in 0.6
 
 - Decollating mini-batches as an essential post-processing step
 - Pythonic APIs to load the pretrained models from Clara Train MMARs
diff --git a/docs/source/whatsnew_0_7.md b/docs/source/whatsnew_0_7.md
new file mode 100644
index 0000000000..748729e94d
--- /dev/null
+++ b/docs/source/whatsnew_0_7.md
@@ -0,0 +1,63 @@
+# What's new in 0.7 🎉🎉
+
+- Performance enhancements with profiling and tuning guides
+- Major usability improvements in `monai.transforms`
+- Reimplementing state-of-the-art Kaggle solutions
+- Vision-language multimodal transformer architectures
+
+## Performance enhancements with profiling and tuning guides
+
+Model training is often a time-consuming step during deep learning development,
+especially for medical imaging applications. Even with powerful hardware (e.g.
+CPU/GPU with large RAM), the workflows often require careful profiling and
+tuning to achieve high performance. MONAI has been focusing on performance
+enhancements, and in this version, a fast model training guide is provided
+to help build highly performant workflows, with a comprehensive overview of
+the profiling tools and practical strategies:
+https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md.
+
+The following figure shows the use of [Nvidia Nsight™ Systems](https://developer.nvidia.com/nsight-systems) for system-wide
+performance analysis during a performance enhancement study.
+![nsight_vis](../images/nsight_comparison.png)
+
+With the performance profiling and enhancements, several typical use cases were studied to
+improve the training efficiency.  The following figure shows that fast
+training using MONAI can be `200` times faster than a regular baseline ([learn
+more](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb)), and it's `20` times faster than the MONAI v0.6 fast training solution.
+![fast_training](../images/fast_training.png)
+
+## Major usability improvements in `monai.transforms` for NumPy/PyTorch inputs and backends
+
+ MONAI starts to roll out major usability enhancements for the
+ `monai.transforms` module. Many transforms are now supporting both NumPy and
+ PyTorch, as input types and computational backends. To get the supported backends of every transform, please execute: `python monai/transforms/utils.py`.
+
+One benefit of these enhancements is that the users can now better leverage the
+GPUs for preprocessing. By transferring the input data onto GPU using
+`ToTensor` or `EnsureType`, and applying the GPU-based transforms to the data,
+[the tutorial of spleen
+segmentation](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb)
+shows the great potential of using the flexible modules for fast and efficient
+training.
+
+## Reimplementing state-of-the-art Kaggle solutions
+
+With this release, we actively evaluate and enhance the quality and flexibility
+of the MONAI core modules, using the public Kaggle challenge as a testbed. [A
+reimplementation](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution)
+of a state-of-the-art solution at [Kaggle RANZCR CLiP - Catheter and Line
+Position
+Challenge](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification)
+is made available in this version.
+
+##  Vision-language multimodal transformers
+
+In this release, MONAI adds support for training multimodal (vision + language)
+transformers that can handle both image and textual data. MONAI introduces the
+`TransCheX` model which consists of vision, language, and mixed-modality
+transformer layers for processing chest X-ray and their corresponding
+radiological reports within a unified framework. In addition to `TransCheX`,
+users have the flexibility to alter the architecture by varying the number of
+vision, language and mixed-modality layers and customizing the classification
+head. In addition, the model can be initialized from pre-trained BERT language
+models for fine-tuning.
diff --git a/monai/__init__.py b/monai/__init__.py
index 2c7c920162..dbe0a0aa03 100644
--- a/monai/__init__.py
+++ b/monai/__init__.py
@@ -26,11 +26,11 @@
 
 __basedir__ = os.path.dirname(__file__)
 
-if not (sys.version_info.major == PY_REQUIRED_MAJOR and sys.version_info.minor >= PY_REQUIRED_MINOR):
+if sys.version_info.major != PY_REQUIRED_MAJOR or sys.version_info.minor < PY_REQUIRED_MINOR:
     raise RuntimeError(
         "MONAI requires Python {}.{} or higher. But the current Python is: {}".format(
             PY_REQUIRED_MAJOR, PY_REQUIRED_MINOR, sys.version
-        ),
+        )
     )
 
 from .utils.module import load_submodules  # noqa: E402
diff --git a/monai/_extensions/loader.py b/monai/_extensions/loader.py
index 5f77480ecc..d7ebca64e3 100644
--- a/monai/_extensions/loader.py
+++ b/monai/_extensions/loader.py
@@ -34,7 +34,7 @@ def timeout(time, message):
     except KeyboardInterrupt as e:
         if timer is not None and timer.is_alive():
             raise e  # interrupt from user?
-        raise TimeoutError(message)
+        raise TimeoutError(message) from e
     finally:
         if timer is not None:
             try:
@@ -84,11 +84,7 @@ def load_module(
         # This will either run the build or return the existing .so object.
         name = module_name + platform_str.replace(".", "_")
         module = load(
-            name=name,
-            sources=source,
-            extra_cflags=define_args,
-            extra_cuda_cflags=define_args,
-            verbose=verbose_build,
+            name=name, sources=source, extra_cflags=define_args, extra_cuda_cflags=define_args, verbose=verbose_build
         )
 
     return module
diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py
index c766914026..d87e635876 100644
--- a/monai/apps/datasets.py
+++ b/monai/apps/datasets.py
@@ -111,7 +111,7 @@ def _generate_data_list(self, dataset_dir: str) -> List[Dict]:
             ValueError: When ``section`` is not one of ["training", "validation", "test"].
 
         """
-        class_names = sorted((x for x in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, x))))
+        class_names = sorted(x for x in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, x)))
         self.num_class = len(class_names)
         image_files = [
             [
@@ -147,11 +147,7 @@ def _generate_data_list(self, dataset_dir: str) -> List[Dict]:
             )
 
         return [
-            {
-                "image": image_files_list[i],
-                "label": image_class[i],
-                "class_name": class_name[i],
-            }
+            {"image": image_files_list[i], "label": image_class[i], "class_name": class_name[i]}
             for i in section_indices
         ]
 
@@ -366,13 +362,7 @@ class CrossValidation:
 
     """
 
-    def __init__(
-        self,
-        dataset_cls,
-        nfolds: int = 5,
-        seed: int = 0,
-        **dataset_params,
-    ) -> None:
+    def __init__(self, dataset_cls, nfolds: int = 5, seed: int = 0, **dataset_params) -> None:
         if not hasattr(dataset_cls, "_split_datalist"):
             raise ValueError("dataset class must have _split_datalist API.")
         self.dataset_cls = dataset_cls
diff --git a/monai/apps/deepedit/transforms.py b/monai/apps/deepedit/transforms.py
index 845e7bd1d0..e0881c7158 100644
--- a/monai/apps/deepedit/transforms.py
+++ b/monai/apps/deepedit/transforms.py
@@ -15,12 +15,7 @@
 
 
 class DiscardAddGuidanced(MapTransform):
-    def __init__(
-        self,
-        keys: KeysCollection,
-        probability: float = 1.0,
-        allow_missing_keys: bool = False,
-    ):
+    def __init__(self, keys: KeysCollection, probability: float = 1.0, allow_missing_keys: bool = False):
         """
         Discard positive and negative points randomly or Add the two channels for inference time
 
@@ -54,11 +49,7 @@ class ResizeGuidanceCustomd(Transform):
     Resize the guidance based on cropped vs resized image.
     """
 
-    def __init__(
-        self,
-        guidance: str,
-        ref_image: str,
-    ) -> None:
+    def __init__(self, guidance: str, ref_image: str) -> None:
         self.guidance = guidance
         self.ref_image = ref_image
 
diff --git a/monai/apps/deepgrow/dataset.py b/monai/apps/deepgrow/dataset.py
index acaeba0bc3..1dcdc4ec25 100644
--- a/monai/apps/deepgrow/dataset.py
+++ b/monai/apps/deepgrow/dataset.py
@@ -97,7 +97,7 @@ def create_dataset(
         image = os.path.abspath(image)
         label = os.path.abspath(label) if label else None
 
-        logging.info("Image: {}; Label: {}".format(image, label if label else None))
+        logging.info(f"Image: {image}; Label: {label if label else None}")
         data = transforms({image_key: image, label_key: label})
         if dimension == 2:
             data = _save_data_2d(
@@ -154,7 +154,7 @@ def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         if vol_label is not None and np.sum(label) == 0:
             continue
 
-        image_file_prefix = "vol_idx_{:0>4d}_slice_{:0>3d}".format(vol_idx, sid)
+        image_file_prefix = f"vol_idx_{vol_idx:0>4d}_slice_{sid:0>3d}"
         image_file = os.path.join(dataset_dir, "images", image_file_prefix)
         image_file += ".npy"
 
@@ -165,9 +165,7 @@ def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         # Test Data
         if vol_label is None:
             data_list.append(
-                {
-                    "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file,
-                }
+                {"image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file}
             )
             continue
 
@@ -177,7 +175,7 @@ def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         unique_labels_count = max(unique_labels_count, len(unique_labels))
 
         for idx in unique_labels:
-            label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx))
+            label_file_prefix = f"{image_file_prefix}_region_{int(idx):0>2d}"
             label_file = os.path.join(dataset_dir, "labels", label_file_prefix)
             label_file += ".npy"
 
@@ -226,7 +224,7 @@ def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
     label_count = 0
     unique_labels_count = 0
 
-    image_file_prefix = "vol_idx_{:0>4d}".format(vol_idx)
+    image_file_prefix = f"vol_idx_{vol_idx:0>4d}"
     image_file = os.path.join(dataset_dir, "images", image_file_prefix)
     image_file += ".npy"
 
@@ -236,11 +234,7 @@ def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
 
     # Test Data
     if vol_label is None:
-        data_list.append(
-            {
-                "image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file,
-            }
-        )
+        data_list.append({"image": image_file.replace(dataset_dir + os.pathsep, "") if relative_path else image_file})
     else:
         # For all Labels
         unique_labels = np.unique(vol_label.flatten())
@@ -248,7 +242,7 @@ def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         unique_labels_count = max(unique_labels_count, len(unique_labels))
 
         for idx in unique_labels:
-            label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx))
+            label_file_prefix = f"{image_file_prefix}_region_{int(idx):0>2d}"
             label_file = os.path.join(dataset_dir, "labels", label_file_prefix)
             label_file += ".npy"
 
diff --git a/monai/apps/deepgrow/transforms.py b/monai/apps/deepgrow/transforms.py
index db450792b0..8c9eb884dd 100644
--- a/monai/apps/deepgrow/transforms.py
+++ b/monai/apps/deepgrow/transforms.py
@@ -19,7 +19,7 @@
 from monai.transforms import Resize, SpatialCrop
 from monai.transforms.transform import MapTransform, Randomizable, Transform
 from monai.transforms.utils import generate_spatial_bounding_box
-from monai.utils import InterpolateMode, ensure_tuple, ensure_tuple_rep, min_version, optional_import
+from monai.utils import InterpolateMode, deprecated_arg, ensure_tuple, ensure_tuple_rep, min_version, optional_import
 
 measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
 distance_transform_cdt, _ = optional_import("scipy.ndimage.morphology", name="distance_transform_cdt")
@@ -163,13 +163,7 @@ class AddGuidanceSignald(Transform):
 
     """
 
-    def __init__(
-        self,
-        image: str = "image",
-        guidance: str = "guidance",
-        sigma: int = 2,
-        number_intensity_ch: int = 1,
-    ):
+    def __init__(self, image: str = "image", guidance: str = "guidance", sigma: int = 2, number_intensity_ch: int = 1):
         self.image = image
         self.guidance = guidance
         self.sigma = sigma
@@ -276,12 +270,7 @@ class AddRandomGuidanced(Randomizable, Transform):
 
     """
 
-    def __init__(
-        self,
-        guidance: str = "guidance",
-        discrepancy: str = "discrepancy",
-        probability: str = "probability",
-    ):
+    def __init__(self, guidance: str = "guidance", discrepancy: str = "discrepancy", probability: str = "probability"):
         self.guidance = guidance
         self.discrepancy = discrepancy
         self.probability = probability
@@ -476,7 +465,7 @@ class AddGuidanceFromPointsd(Transform):
         background: key that represents user background (-ve) clicks.
         axis: axis that represents slices in 3D volume. (axis to Depth)
         depth_first: if depth (slices) is positioned at first dimension.
-        dimensions: dimensions based on model used for deepgrow (2D vs 3D).
+        spatial_dims: dimensions based on model used for deepgrow (2D vs 3D).
         slice_key: key that represents applicable slice to add guidance.
         meta_keys: explicitly indicate the key of the meta data dictionary of `ref_image`.
             for example, for data with key `image`, the metadata by default is in `image_meta_dict`.
@@ -486,8 +475,13 @@ class AddGuidanceFromPointsd(Transform):
             to the key data, default is `meta_dict`, the meta data is a dictionary object.
             For example, to handle key `image`,  read/write affine matrices from the
             metadata `image_meta_dict` dictionary's `affine` field.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     """
 
+    @deprecated_arg(name="dimensions", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
         ref_image,
@@ -496,10 +490,11 @@ def __init__(
         background: str = "background",
         axis: int = 0,
         depth_first: bool = True,
-        dimensions: int = 2,
+        spatial_dims: int = 2,
         slice_key: str = "slice",
         meta_keys: Optional[str] = None,
         meta_key_postfix: str = "meta_dict",
+        dimensions: Optional[int] = None,
     ):
         self.ref_image = ref_image
         self.guidance = guidance
@@ -507,7 +502,7 @@ def __init__(
         self.background = background
         self.axis = axis
         self.depth_first = depth_first
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.slice = slice_key
         self.meta_keys = meta_keys
         self.meta_key_postfix = meta_key_postfix
diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py
index 3694ca4144..c9521b1201 100644
--- a/monai/apps/pathology/data/datasets.py
+++ b/monai/apps/pathology/data/datasets.py
@@ -64,7 +64,7 @@ def __init__(
         self.patch_size = ensure_tuple_rep(patch_size, 2)
 
         self.image_path_list = list({x["image"] for x in self.data})
-        self.image_reader_name = image_reader_name
+        self.image_reader_name = image_reader_name.lower()
         self.image_reader = WSIReader(image_reader_name)
         self.wsi_object_dict = None
         if self.image_reader_name != "openslide":
@@ -190,7 +190,7 @@ def __init__(
         self.patch_size = ensure_tuple_rep(patch_size, 2)
 
         # set up whole slide image reader
-        self.image_reader_name = image_reader_name
+        self.image_reader_name = image_reader_name.lower()
         self.image_reader = WSIReader(image_reader_name)
 
         # process data and create a list of dictionaries containing all required data and metadata
@@ -293,11 +293,7 @@ def _load_a_patch(self, index):
         location_on_image = sample["image_locations"][patch_num]
         location_on_mask = sample["mask_locations"][patch_num]
 
-        image, _ = self.image_reader.get_data(
-            img=sample["image"],
-            location=location_on_image,
-            size=self.patch_size,
-        )
+        image, _ = self.image_reader.get_data(img=sample["image"], location=location_on_image, size=self.patch_size)
         processed_sample = {"image": image, "name": sample["name"], "mask_location": location_on_mask}
         return processed_sample
 
diff --git a/monai/apps/pathology/handlers/prob_map_producer.py b/monai/apps/pathology/handlers/prob_map_producer.py
index 7ac4a0e45b..469e9d3c25 100644
--- a/monai/apps/pathology/handlers/prob_map_producer.py
+++ b/monai/apps/pathology/handlers/prob_map_producer.py
@@ -62,9 +62,10 @@ def attach(self, engine: Engine) -> None:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
 
-        self.num_images = len(engine.data_loader.dataset.data)
+        data_loader = engine.data_loader  # type: ignore
+        self.num_images = len(data_loader.dataset.data)
 
-        for sample in engine.data_loader.dataset.data:
+        for sample in data_loader.dataset.data:
             name = sample["name"]
             self.prob_map[name] = np.zeros(sample["mask_shape"], dtype=self.dtype)
             self.counter[name] = len(sample["mask_locations"])
@@ -84,6 +85,8 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
+        if not isinstance(engine.state.batch, dict) or not isinstance(engine.state.output, dict):
+            raise ValueError("engine.state.batch and engine.state.output must be dictionaries.")
         names = engine.state.batch["name"]
         locs = engine.state.batch["mask_location"]
         pred = engine.state.output["pred"]
diff --git a/monai/apps/pathology/metrics/lesion_froc.py b/monai/apps/pathology/metrics/lesion_froc.py
index 2140de0080..fa4ce422be 100644
--- a/monai/apps/pathology/metrics/lesion_froc.py
+++ b/monai/apps/pathology/metrics/lesion_froc.py
@@ -78,11 +78,7 @@ def __init__(
         self.itc_diameter = itc_diameter
         self.eval_thresholds = eval_thresholds
         self.image_reader = WSIReader(image_reader_name)
-        self.nms = PathologyProbNMS(
-            sigma=nms_sigma,
-            prob_threshold=nms_prob_threshold,
-            box_size=nms_box_size,
-        )
+        self.nms = PathologyProbNMS(sigma=nms_sigma, prob_threshold=nms_prob_threshold, box_size=nms_box_size)
 
     def prepare_inference_result(self, sample: Dict):
         """
@@ -151,12 +147,7 @@ def compute_fp_tp(self):
             total_tp_probs.extend(tp_probs)
             total_num_targets += num_targets
 
-        return (
-            np.array(total_fp_probs),
-            np.array(total_tp_probs),
-            total_num_targets,
-            num_images,
-        )
+        return (np.array(total_fp_probs), np.array(total_tp_probs), total_num_targets, num_images)
 
     def evaluate(self):
         """
@@ -168,17 +159,12 @@ def evaluate(self):
 
         # compute FROC curve given the evaluation of all images
         fps_per_image, total_sensitivity = compute_froc_curve_data(
-            fp_probs=fp_probs,
-            tp_probs=tp_probs,
-            num_targets=num_targets,
-            num_images=num_images,
+            fp_probs=fp_probs, tp_probs=tp_probs, num_targets=num_targets, num_images=num_images
         )
 
         # compute FROC score give specific evaluation threshold
         froc_score = compute_froc_score(
-            fps_per_image=fps_per_image,
-            total_sensitivity=total_sensitivity,
-            eval_thresholds=self.eval_thresholds,
+            fps_per_image=fps_per_image, total_sensitivity=total_sensitivity, eval_thresholds=self.eval_thresholds
         )
 
         return froc_score
diff --git a/monai/apps/pathology/transforms/spatial/array.py b/monai/apps/pathology/transforms/spatial/array.py
index 53e0c63715..b47e6fd015 100644
--- a/monai/apps/pathology/transforms/spatial/array.py
+++ b/monai/apps/pathology/transforms/spatial/array.py
@@ -34,9 +34,7 @@ class SplitOnGrid(Transform):
     """
 
     def __init__(
-        self,
-        grid_size: Union[int, Tuple[int, int]] = (2, 2),
-        patch_size: Optional[Union[int, Tuple[int, int]]] = None,
+        self, grid_size: Union[int, Tuple[int, int]] = (2, 2), patch_size: Optional[Union[int, Tuple[int, int]]] = None
     ):
         # Grid size
         if isinstance(grid_size, int):
diff --git a/monai/apps/pathology/utils.py b/monai/apps/pathology/utils.py
index 54d49f5717..30bdde91bb 100644
--- a/monai/apps/pathology/utils.py
+++ b/monai/apps/pathology/utils.py
@@ -62,11 +62,7 @@ class PathologyProbNMS(ProbNMS):
     Pathology.
     """
 
-    def __call__(
-        self,
-        probs_map: Union[np.ndarray, torch.Tensor],
-        resolution_level: int = 0,
-    ):
+    def __call__(self, probs_map: Union[np.ndarray, torch.Tensor], resolution_level: int = 0):
         """
         probs_map: the input probabilities map, it must have shape (H[, W, ...]).
         resolution_level: the level at which the probabilities map is made.
diff --git a/monai/apps/utils.py b/monai/apps/utils.py
index 36fac955fe..16bda628f4 100644
--- a/monai/apps/utils.py
+++ b/monai/apps/utils.py
@@ -31,12 +31,7 @@
 else:
     tqdm, has_tqdm = optional_import("tqdm", "4.47.0", min_version, "tqdm")
 
-__all__ = [
-    "check_hash",
-    "download_url",
-    "extractall",
-    "download_and_extract",
-]
+__all__ = ["check_hash", "download_url", "extractall", "download_and_extract"]
 
 
 def _basename(p):
@@ -69,19 +64,13 @@ def update_to(self, b: int = 1, bsize: int = 1, tsize: Optional[int] = None):
                         self.total = tsize
                     self.update(b * bsize - self.n)  # will also set self.n = b * bsize
 
-            with TqdmUpTo(
-                unit="B",
-                unit_scale=True,
-                unit_divisor=1024,
-                miniters=1,
-                desc=_basename(filepath),
-            ) as t:
+            with TqdmUpTo(unit="B", unit_scale=True, unit_divisor=1024, miniters=1, desc=_basename(filepath)) as t:
                 urlretrieve(url, filepath, reporthook=t.update_to)
         else:
             if not has_tqdm and progress:
                 warnings.warn("tqdm is not installed, will not show the downloading progress bar.")
             urlretrieve(url, filepath)
-    except (URLError, HTTPError, ContentTooShortError, IOError) as e:
+    except (URLError, HTTPError, ContentTooShortError, OSError) as e:
         print(f"Download failed from {url} to {filepath}.")
         raise e
 
@@ -128,7 +117,8 @@ def download_url(
 
     Args:
         url: source URL link to download file.
-        filepath: target filepath to save the downloaded file. If undefined, `os.path.basename(url)` will be used.
+        filepath: target filepath to save the downloaded file (including the filename).
+            If undefined, `os.path.basename(url)` will be used.
         hash_val: expected hash value to validate the downloaded file.
             if None, skip hash validation.
         hash_type: 'md5' or 'sha1', defaults to 'md5'.
diff --git a/monai/config/deviceconfig.py b/monai/config/deviceconfig.py
index 273431fc72..e542da14ab 100644
--- a/monai/config/deviceconfig.py
+++ b/monai/config/deviceconfig.py
@@ -73,6 +73,8 @@ def get_optional_config_values():
     output["psutil"] = psutil_version
     output["pandas"] = get_package_version("pandas")
     output["einops"] = get_package_version("einops")
+    output["transformers"] = get_package_version("transformers")
+    output["mlflow"] = get_package_version("mlflow")
 
     return output
 
@@ -121,7 +123,7 @@ def get_system_info() -> OrderedDict:
     elif output["System"] == "Darwin":
         _dict_append(output, "Mac version", lambda: platform.mac_ver()[0])
     else:
-        with open("/etc/os-release", "r") as rel_f:
+        with open("/etc/os-release") as rel_f:
             linux_ver = re.search(r'PRETTY_NAME="(.*)"', rel_f.read())
         if linux_ver:
             _dict_append(output, "Linux version", lambda: linux_ver.group(1))
diff --git a/monai/data/csv_saver.py b/monai/data/csv_saver.py
index 62f407bfd5..f9c814679d 100644
--- a/monai/data/csv_saver.py
+++ b/monai/data/csv_saver.py
@@ -32,11 +32,7 @@ class CSVSaver:
     """
 
     def __init__(
-        self,
-        output_dir: str = "./",
-        filename: str = "predictions.csv",
-        overwrite: bool = True,
-        flush: bool = False,
+        self, output_dir: str = "./", filename: str = "predictions.csv", overwrite: bool = True, flush: bool = False
     ) -> None:
         """
         Args:
diff --git a/monai/data/dataloader.py b/monai/data/dataloader.py
index 2c9174e9f4..bfb6c01c4e 100644
--- a/monai/data/dataloader.py
+++ b/monai/data/dataloader.py
@@ -81,8 +81,4 @@ def __init__(self, dataset: Dataset, num_workers: int = 0, **kwargs) -> None:
         if "worker_init_fn" not in kwargs:
             kwargs.update({"worker_init_fn": worker_init_fn})
 
-        super().__init__(  # type: ignore[call-overload]
-            dataset=dataset,
-            num_workers=num_workers,
-            **kwargs,
-        )
+        super().__init__(dataset=dataset, num_workers=num_workers, **kwargs)  # type: ignore[call-overload]
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index c970e83d0d..debf9c6aa4 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -575,6 +575,7 @@ def __init__(
         cache_rate: float = 1.0,
         num_workers: Optional[int] = None,
         progress: bool = True,
+        copy_cache: bool = True,
     ) -> None:
         """
         Args:
@@ -587,11 +588,17 @@ def __init__(
             num_workers: the number of worker processes to use.
                 If num_workers is None then the number returned by os.cpu_count() is used.
             progress: whether to display a progress bar.
+            copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+                default to `True`. if the random transforms don't modify the cached content
+                (for example, randomly crop from the cached image and deepcopy the crop region)
+                or if every cache item is only used once in a `multi-processing` environment,
+                may set `copy=False` for better performance.
         """
         if not isinstance(transform, Compose):
             transform = Compose(transform)
         super().__init__(data=data, transform=transform)
         self.progress = progress
+        self.copy_cache = copy_cache
         self.cache_num = min(int(cache_num), int(len(data) * cache_rate), len(data))
         self.num_workers = num_workers
         if self.num_workers is not None:
@@ -656,7 +663,8 @@ def _transform(self, index: int):
                 # only need to deep copy data on first non-deterministic transform
                 if not start_run:
                     start_run = True
-                    data = deepcopy(data)
+                    if self.copy_cache:
+                        data = deepcopy(data)
                 data = apply_transform(_transform, data)
         return data
 
@@ -722,6 +730,10 @@ class SmartCacheDataset(Randomizable, CacheDataset):
         shuffle: whether to shuffle the whole data list before preparing the cache content for first epoch.
             it will not modify the original input data sequence in-place.
         seed: random seed if shuffle is `True`, default to `0`.
+        copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+            default to `True`. if the random transforms don't modify the cache content
+            or every cache item is only used once in a `multi-processing` environment,
+            may set `copy=False` for better performance.
     """
 
     def __init__(
@@ -736,6 +748,7 @@ def __init__(
         progress: bool = True,
         shuffle: bool = True,
         seed: int = 0,
+        copy_cache: bool = True,
     ) -> None:
         if shuffle:
             self.set_random_state(seed=seed)
@@ -743,7 +756,7 @@ def __init__(
             self.randomize(data)
         self.shuffle = shuffle
 
-        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress)
+        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress, copy_cache)
         if self._cache is None:
             self._cache = self._fill_cache()
         if self.cache_num >= len(data):
@@ -977,7 +990,7 @@ def __init__(self, datasets: Sequence, transform: Optional[Callable] = None) ->
         super().__init__(list(datasets), transform=transform)
 
     def __len__(self) -> int:
-        return min((len(dataset) for dataset in self.data))
+        return min(len(dataset) for dataset in self.data)
 
     def _transform(self, index: int):
         def to_list(x):
@@ -1206,11 +1219,6 @@ def __init__(
         files = ensure_tuple(filename)
         dfs = [pd.read_csv(f) for f in files]
         data = convert_tables_to_dicts(
-            dfs=dfs,
-            row_indices=row_indices,
-            col_names=col_names,
-            col_types=col_types,
-            col_groups=col_groups,
-            **kwargs,
+            dfs=dfs, row_indices=row_indices, col_names=col_names, col_types=col_types, col_groups=col_groups, **kwargs
         )
         super().__init__(data=data, transform=transform)
diff --git a/monai/data/dataset_summary.py b/monai/data/dataset_summary.py
index a8598eb6c8..dfc22f9bc8 100644
--- a/monai/data/dataset_summary.py
+++ b/monai/data/dataset_summary.py
@@ -60,7 +60,7 @@ def __init__(
         self.image_key = image_key
         self.label_key = label_key
         if image_key:
-            self.meta_key = "{}_{}".format(image_key, meta_key_postfix)
+            self.meta_key = f"{image_key}_{meta_key_postfix}"
         self.all_meta_data: List = []
 
     def collect_meta_data(self):
diff --git a/monai/data/decathlon_datalist.py b/monai/data/decathlon_datalist.py
index 663b68a08e..78440fe11c 100644
--- a/monai/data/decathlon_datalist.py
+++ b/monai/data/decathlon_datalist.py
@@ -126,10 +126,7 @@ def load_decathlon_datalist(
     return _append_paths(base_dir, is_segmentation, expected_data)
 
 
-def load_decathlon_properties(
-    data_property_file_path: str,
-    property_keys: Union[Sequence[str], str],
-) -> Dict:
+def load_decathlon_properties(data_property_file_path: str, property_keys: Union[Sequence[str], str]) -> Dict:
     """Load the properties from the JSON file contains data property with specified `property_keys`.
 
     Args:
diff --git a/monai/data/grid_dataset.py b/monai/data/grid_dataset.py
index 5b2a4d7abd..5c330f10e4 100644
--- a/monai/data/grid_dataset.py
+++ b/monai/data/grid_dataset.py
@@ -141,7 +141,7 @@ def __iter__(self):
         try:
             iter_end = len(self.dataset)  # TODO: support iterable self.dataset
         except TypeError:
-            raise NotImplementedError("image dataset must implement `len()`.")
+            raise NotImplementedError("image dataset must implement `len()`.") from None
 
         if worker_info is not None:
             # split workload
diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py
index cd1486d6d3..303091f0d8 100644
--- a/monai/data/image_reader.py
+++ b/monai/data/image_reader.py
@@ -25,21 +25,17 @@
 from .utils import is_supported_format
 
 if TYPE_CHECKING:
-    import cucim
     import itk  # type: ignore
     import nibabel as nib
-    import openslide
     from nibabel.nifti1 import Nifti1Image
     from PIL import Image as PILImage
 
-    has_itk = has_nib = has_pil = has_cim = has_osl = True
+    has_itk = has_nib = has_pil = True
 else:
     itk, has_itk = optional_import("itk", allow_namespace_pkg=True)
     nib, has_nib = optional_import("nibabel")
     Nifti1Image, _ = optional_import("nibabel.nifti1", name="Nifti1Image")
     PILImage, has_pil = optional_import("PIL.Image")
-    cucim, has_cim = optional_import("cucim")
-    openslide, has_osl = optional_import("openslide")
 
 __all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"]
 
@@ -641,12 +637,7 @@ def _get_meta_dict(self, img) -> Dict:
             img: a PIL Image object loaded from an image file.
 
         """
-        return {
-            "format": img.format,
-            "mode": img.mode,
-            "width": img.width,
-            "height": img.height,
-        }
+        return {"format": img.format, "mode": img.mode, "width": img.width, "height": img.height}
 
     def _get_spatial_shape(self, img):
         """
@@ -670,11 +661,9 @@ def __init__(self, reader_lib: str = "OpenSlide"):
         super().__init__()
         self.reader_lib = reader_lib.lower()
         if self.reader_lib == "openslide":
-            if has_osl:
-                self.wsi_reader = openslide.OpenSlide
+            self.wsi_reader, *_ = optional_import("openslide", name="OpenSlide")
         elif self.reader_lib == "cucim":
-            if has_cim:
-                self.wsi_reader = cucim.CuImage
+            self.wsi_reader, *_ = optional_import("cucim", name="CuImage")
         else:
             raise ValueError('`reader_lib` should be either "cuCIM" or "OpenSlide"')
 
@@ -697,11 +686,6 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs):
             data: file name or a list of file names to read.
 
         """
-        if (self.reader_lib == "openslide") and (not has_osl):
-            raise ImportError("No module named 'openslide'")
-        if (self.reader_lib == "cucim") and (not has_cim):
-            raise ImportError("No module named 'cucim'")
-
         img_: List = []
 
         filenames: Sequence[str] = ensure_tuple(data)
@@ -740,10 +724,7 @@ def get_data(
 
         if self.reader_lib == "openslide" and size is None:
             # the maximum size is set to WxH
-            size = (
-                img.shape[0] // (2 ** level) - location[0],
-                img.shape[1] // (2 ** level) - location[1],
-            )
+            size = (img.shape[0] // (2 ** level) - location[0], img.shape[1] // (2 ** level) - location[1])
 
         region = self._extract_region(img, location=location, size=size, level=level, dtype=dtype)
 
@@ -756,10 +737,7 @@ def get_data(
         else:
             tuple_patch_size = ensure_tuple_rep(patch_size, 2)
             patches = self._extract_patches(
-                region,
-                patch_size=tuple_patch_size,  # type: ignore
-                grid_shape=grid_shape,
-                dtype=dtype,
+                region, patch_size=tuple_patch_size, grid_shape=grid_shape, dtype=dtype  # type: ignore
             )
 
         return patches, metadata
@@ -783,11 +761,7 @@ def _extract_region(
         region = self.convert_to_rgb_array(region, dtype)
         return region
 
-    def convert_to_rgb_array(
-        self,
-        raw_region,
-        dtype: DtypeLike = np.uint8,
-    ):
+    def convert_to_rgb_array(self, raw_region, dtype: DtypeLike = np.uint8):
         """Convert to RGB mode and numpy array"""
         if self.reader_lib == "openslide":
             # convert to RGB
diff --git a/monai/data/nifti_writer.py b/monai/data/nifti_writer.py
index c56d4c1e8d..210321daca 100644
--- a/monai/data/nifti_writer.py
+++ b/monai/data/nifti_writer.py
@@ -15,17 +15,19 @@
 import torch
 
 from monai.config import DtypeLike
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import compute_shape_offset, to_affine_nd
 from monai.networks.layers import AffineTransform
 from monai.utils import GridSampleMode, GridSamplePadMode, optional_import
+from monai.utils.type_conversion import convert_data_type
 
 nib, _ = optional_import("nibabel")
 
 
 def write_nifti(
-    data: np.ndarray,
+    data: NdarrayOrTensor,
     file_name: str,
-    affine: Optional[np.ndarray] = None,
+    affine: Optional[NdarrayOrTensor] = None,
     target_affine: Optional[np.ndarray] = None,
     resample: bool = True,
     output_spatial_shape: Union[Sequence[int], np.ndarray, None] = None,
@@ -96,13 +98,17 @@ def write_nifti(
             If None, use the data type of input data.
         output_dtype: data type for saving data. Defaults to ``np.float32``.
     """
+    if isinstance(data, torch.Tensor):
+        data, *_ = convert_data_type(data, np.ndarray)
+    if isinstance(affine, torch.Tensor):
+        affine, *_ = convert_data_type(affine, np.ndarray)
     if not isinstance(data, np.ndarray):
-        raise AssertionError("input data must be numpy array.")
+        raise AssertionError("input data must be numpy array or torch tensor.")
     dtype = dtype or data.dtype
     sr = min(data.ndim, 3)
     if affine is None:
         affine = np.eye(4, dtype=np.float64)
-    affine = to_affine_nd(sr, affine)
+    affine = to_affine_nd(sr, affine)  # type: ignore
 
     if target_affine is None:
         target_affine = affine
@@ -122,7 +128,7 @@ def write_nifti(
     data = nib.orientations.apply_orientation(data, ornt_transform)
     _affine = affine @ nib.orientations.inv_ornt_aff(ornt_transform, data_shape)
     if np.allclose(_affine, target_affine, atol=1e-3) or not resample:
-        results_img = nib.Nifti1Image(data.astype(output_dtype), to_affine_nd(3, _affine))
+        results_img = nib.Nifti1Image(data.astype(output_dtype), to_affine_nd(3, _affine))  # type: ignore
         nib.save(results_img, file_name)
         return
 
@@ -138,7 +144,7 @@ def write_nifti(
         while len(output_spatial_shape_) < 3:
             output_spatial_shape_ = output_spatial_shape_ + [1]
         spatial_shape, channel_shape = data.shape[:3], data.shape[3:]
-        data_np = data.reshape(list(spatial_shape) + [-1])
+        data_np: np.ndarray = data.reshape(list(spatial_shape) + [-1])  # type: ignore
         data_np = np.moveaxis(data_np, -1, 0)  # channel first for pytorch
         data_torch = affine_xform(
             torch.as_tensor(np.ascontiguousarray(data_np).astype(dtype)).unsqueeze(0),
diff --git a/monai/data/png_saver.py b/monai/data/png_saver.py
index e6fb641cca..609cc8d7be 100644
--- a/monai/data/png_saver.py
+++ b/monai/data/png_saver.py
@@ -134,11 +134,7 @@ def save(self, data: Union[torch.Tensor, np.ndarray], meta_data: Optional[Dict]
             raise ValueError(f"Unsupported number of channels: {data.shape[0]}, available options are [1, 3, 4]")
 
         write_png(
-            np.asarray(data),
-            file_name=path,
-            output_spatial_shape=spatial_shape,
-            mode=self.mode,
-            scale=self.scale,
+            np.asarray(data), file_name=path, output_spatial_shape=spatial_shape, mode=self.mode, scale=self.scale
         )
 
         if self.print_log:
diff --git a/monai/data/png_writer.py b/monai/data/png_writer.py
index 2baec3b872..52163e40ac 100644
--- a/monai/data/png_writer.py
+++ b/monai/data/png_writer.py
@@ -48,7 +48,7 @@ def write_png(
 
     """
     if not isinstance(data, np.ndarray):
-        raise AssertionError("input data must be numpy array.")
+        raise ValueError("input data must be numpy array.")
     if len(data.shape) == 3 and data.shape[2] == 1:  # PIL Image can't save image with 1 channel
         data = data.squeeze(2)
     if output_spatial_shape is not None:
@@ -59,11 +59,11 @@ def write_png(
         _min, _max = np.min(data), np.max(data)
         if len(data.shape) == 3:
             data = np.moveaxis(data, -1, 0)  # to channel first
-            data = xform(data)
+            data = xform(data)  # type: ignore
             data = np.moveaxis(data, 0, -1)
         else:  # (H, W)
             data = np.expand_dims(data, 0)  # make a channel
-            data = xform(data)[0]  # first channel
+            data = xform(data)[0]  # type: ignore
         if mode != InterpolateMode.NEAREST:
             data = np.clip(data, _min, _max)  # type: ignore
 
diff --git a/monai/data/test_time_augmentation.py b/monai/data/test_time_augmentation.py
index 33239ea924..6bb300a686 100644
--- a/monai/data/test_time_augmentation.py
+++ b/monai/data/test_time_augmentation.py
@@ -159,7 +159,7 @@ def __call__(
             raise ValueError("num_examples should be multiple of batch size.")
 
         # generate batch of data of size == batch_size, dataset and dataloader
-        data_in = [d] * num_examples
+        data_in = [deepcopy(d) for _ in range(num_examples)]
         ds = Dataset(data_in, self.transform)
         dl = DataLoader(ds, self.num_workers, batch_size=self.batch_size, collate_fn=pad_list_data_collate)
 
@@ -184,9 +184,7 @@ def __call__(
             transform_info = batch_data[transform_key]
             if self.nearest_interp:
                 transform_info = convert_inverse_interp_mode(
-                    trans_info=deepcopy(transform_info),
-                    mode="nearest",
-                    align_corners=None,
+                    trans_info=deepcopy(transform_info), mode="nearest", align_corners=None
                 )
 
             # create a dictionary containing the inferred batch and their transforms
diff --git a/monai/data/thread_buffer.py b/monai/data/thread_buffer.py
index da5847465e..8aa5c014c3 100644
--- a/monai/data/thread_buffer.py
+++ b/monai/data/thread_buffer.py
@@ -92,12 +92,7 @@ class ThreadDataLoader(DataLoader):
     """
 
     def __init__(
-        self,
-        dataset: Dataset,
-        buffer_size: int = 1,
-        buffer_timeout: float = 0.01,
-        num_workers: int = 0,
-        **kwargs,
+        self, dataset: Dataset, buffer_size: int = 1, buffer_timeout: float = 0.01, num_workers: int = 0, **kwargs
     ):
         super().__init__(dataset, num_workers, **kwargs)
         self.buffer_size = buffer_size
diff --git a/monai/data/utils.py b/monai/data/utils.py
index aab23217dc..880ceed7b8 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -134,9 +134,7 @@ def iter_patch_slices(
 
 
 def dense_patch_slices(
-    image_size: Sequence[int],
-    patch_size: Sequence[int],
-    scan_interval: Sequence[int],
+    image_size: Sequence[int], patch_size: Sequence[int], scan_interval: Sequence[int]
 ) -> List[Tuple[slice, ...]]:
     """
     Enumerate all slices defining ND patches of size `patch_size` from an `image_size` input image.
@@ -283,7 +281,7 @@ def list_data_collate(batch: Sequence):
                 + "`DataLoader` with `collate_fn=pad_list_data_collate` might solve this problem (check its "
                 + "documentation)."
             )
-        raise RuntimeError(re_str)
+        raise RuntimeError(re_str) from re
     except TypeError as re:
         re_str = str(re)
         if "numpy" in re_str and "Tensor" in re_str:
@@ -294,7 +292,7 @@ def list_data_collate(batch: Sequence):
                 + "creating your `DataLoader` with `collate_fn=pad_list_data_collate` might solve this problem "
                 + "(check its documentation)."
             )
-        raise TypeError(re_str)
+        raise TypeError(re_str) from re
 
 
 def decollate_batch(batch, detach: bool = True):
@@ -1029,7 +1027,7 @@ def json_hashing(item) -> bytes:
     """
     # TODO: Find way to hash transforms content as part of the cache
     cache_key = hashlib.md5(json.dumps(item, sort_keys=True).encode("utf-8")).hexdigest()
-    return f"{cache_key}".encode("utf-8")
+    return f"{cache_key}".encode()
 
 
 def pickle_hashing(item, protocol=pickle.HIGHEST_PROTOCOL) -> bytes:
@@ -1044,7 +1042,7 @@ def pickle_hashing(item, protocol=pickle.HIGHEST_PROTOCOL) -> bytes:
 
     """
     cache_key = hashlib.md5(pickle.dumps(sorted_dict(item), protocol=protocol)).hexdigest()
-    return f"{cache_key}".encode("utf-8")
+    return f"{cache_key}".encode()
 
 
 def sorted_dict(item, key=None, reverse=False):
diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
index 1c37da71d4..bfe9d01e1f 100644
--- a/monai/engines/evaluator.py
+++ b/monai/engines/evaluator.py
@@ -219,7 +219,7 @@ def __init__(
         self.network = network
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
         Return below items in a dictionary:
@@ -237,7 +237,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -246,15 +246,15 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         # execute forward computation
         with self.mode(self.network):
             if self.amp:
                 with torch.cuda.amp.autocast():
-                    engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                    engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)  # type: ignore
             else:
-                engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)  # type: ignore
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
@@ -349,7 +349,7 @@ def __init__(
         self.pred_keys = ensure_tuple(pred_keys)
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
         Return below items in a dictionary:
@@ -370,7 +370,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -379,17 +379,21 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         for idx, network in enumerate(self.networks):
             with self.mode(network):
                 if self.amp:
                     with torch.cuda.amp.autocast():
+                        if isinstance(engine.state.output, dict):
+                            engine.state.output.update(
+                                {self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)}
+                            )
+                else:
+                    if isinstance(engine.state.output, dict):
                         engine.state.output.update(
                             {self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)}
                         )
-                else:
-                    engine.state.output.update({self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)})
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
diff --git a/monai/engines/multi_gpu_supervised_trainer.py b/monai/engines/multi_gpu_supervised_trainer.py
index 3671dbcfd1..3736d257cb 100644
--- a/monai/engines/multi_gpu_supervised_trainer.py
+++ b/monai/engines/multi_gpu_supervised_trainer.py
@@ -34,10 +34,7 @@
     Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
     Metric, _ = optional_import("ignite.metrics", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Metric")
 
-__all__ = [
-    "create_multigpu_supervised_trainer",
-    "create_multigpu_supervised_evaluator",
-]
+__all__ = ["create_multigpu_supervised_trainer", "create_multigpu_supervised_evaluator"]
 
 
 def _default_transform(_x: torch.Tensor, _y: torch.Tensor, _y_pred: torch.Tensor, loss: torch.Tensor) -> float:
@@ -59,7 +56,7 @@ def create_multigpu_supervised_trainer(
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_transform,
     distributed: bool = False,
-) -> Engine:
+):
     """
     Derived from `create_supervised_trainer` in Ignite.
 
@@ -107,7 +104,7 @@ def create_multigpu_supervised_evaluator(
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_eval_transform,
     distributed: bool = False,
-) -> Engine:
+):
     """
     Derived from `create_supervised_evaluator` in Ignite.
 
diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
index 44e265be1f..3f8065f7a3 100644
--- a/monai/engines/trainer.py
+++ b/monai/engines/trainer.py
@@ -172,7 +172,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -180,7 +180,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         else:
             inputs, targets, args, kwargs = batch
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         def _compute_pred_loss():
             engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
@@ -198,13 +198,13 @@ def _compute_pred_loss():
         if self.amp and self.scaler is not None:
             with torch.cuda.amp.autocast():
                 _compute_pred_loss()
-            self.scaler.scale(engine.state.output[Keys.LOSS]).backward()
+            self.scaler.scale(engine.state.output[Keys.LOSS]).backward()  # type: ignore
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.scaler.step(self.optimizer)
             self.scaler.update()
         else:
             _compute_pred_loss()
-            engine.state.output[Keys.LOSS].backward()
+            engine.state.output[Keys.LOSS].backward()  # type: ignore
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.optimizer.step()
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
@@ -345,15 +345,18 @@ def _iteration(
         if batchdata is None:
             raise ValueError("must provide batch data for current iteration.")
 
-        d_input = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        d_input = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         batch_size = self.data_loader.batch_size  # type: ignore
-        g_input = self.g_prepare_batch(batch_size, self.latent_shape, engine.state.device, engine.non_blocking)
+        g_input = self.g_prepare_batch(
+            num_latents=batch_size,
+            latent_size=self.latent_shape,
+            device=engine.state.device,  # type: ignore
+            non_blocking=engine.non_blocking,  # type: ignore
+        )
         g_output = self.g_inferer(g_input, self.g_network)
 
         # Train Discriminator
-        d_total_loss = torch.zeros(
-            1,
-        )
+        d_total_loss = torch.zeros(1)
         for _ in range(self.d_train_steps):
             # `set_to_none` only work from PyTorch 1.7.0
             if PT_BEFORE_1_7:
@@ -367,7 +370,12 @@ def _iteration(
 
         # Train Generator
         if self.g_update_latents:
-            g_input = self.g_prepare_batch(batch_size, self.latent_shape, engine.state.device, engine.non_blocking)
+            g_input = self.g_prepare_batch(
+                num_latents=batch_size,
+                latent_size=self.latent_shape,
+                device=engine.state.device,  # type: ignore
+                non_blocking=engine.non_blocking,  # type: ignore
+            )
         g_output = self.g_inferer(g_input, self.g_network)
         if PT_BEFORE_1_7:
             self.g_optimizer.zero_grad()
diff --git a/monai/engines/utils.py b/monai/engines/utils.py
index c94cc16916..f075eec492 100644
--- a/monai/engines/utils.py
+++ b/monai/engines/utils.py
@@ -100,9 +100,7 @@ def get_devices_spec(devices: Optional[Sequence[torch.device]] = None) -> List[t
 
 
 def default_prepare_batch(
-    batchdata: Dict[str, torch.Tensor],
-    device: Optional[Union[str, torch.device]] = None,
-    non_blocking: bool = False,
+    batchdata: Dict[str, torch.Tensor], device: Optional[Union[str, torch.device]] = None, non_blocking: bool = False
 ) -> Union[Tuple[torch.Tensor, Optional[torch.Tensor]], torch.Tensor]:
     """
     Default function to prepare the data for current iteration.
@@ -126,10 +124,7 @@ def default_prepare_batch(
 
 
 def default_make_latent(
-    num_latents: int,
-    latent_size: int,
-    device: Optional[Union[str, torch.device]] = None,
-    non_blocking: bool = False,
+    num_latents: int, latent_size: int, device: Optional[Union[str, torch.device]] = None, non_blocking: bool = False
 ) -> torch.Tensor:
     return torch.randn(num_latents, latent_size).to(device=device, non_blocking=non_blocking)
 
diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
index ffb8ce05b3..7f0b65ac28 100644
--- a/monai/engines/workflow.py
+++ b/monai/engines/workflow.py
@@ -152,15 +152,15 @@ def set_sampler_epoch(engine: Engine):
         self.scaler: Optional[torch.cuda.amp.GradScaler] = None
 
         if event_names is None:
-            event_names = [IterationEvents]
+            event_names = [IterationEvents]  # type: ignore
         else:
             if not isinstance(event_names, list):
                 raise ValueError("event_names must be a list or string or EventEnum.")
-            event_names += [IterationEvents]
+            event_names += [IterationEvents]  # type: ignore
         for name in event_names:
             if isinstance(name, str):
                 self.register_events(name, event_to_attr=event_to_attr)
-            elif issubclass(name, EventEnum):
+            elif issubclass(name, EventEnum):  # type: ignore
                 self.register_events(*name, event_to_attr=event_to_attr)
             else:
                 raise ValueError("event_names must be a list or string or EventEnum.")
@@ -187,8 +187,10 @@ def _register_decollate(self):
         def _decollate_data(engine: Engine) -> None:
             # replicate the scalar values to make sure all the items have batch dimension, then decollate
             transform = Decollated(keys=None, detach=True)
-            engine.state.batch = transform(engine.state.batch)
-            engine.state.output = transform(engine.state.output)
+            if isinstance(engine.state.batch, (list, dict)):
+                engine.state.batch = transform(engine.state.batch)
+            if isinstance(engine.state.output, (list, dict)):
+                engine.state.output = transform(engine.state.output)
 
     def _register_postprocessing(self, posttrans: Callable):
         """
@@ -200,9 +202,7 @@ def _register_postprocessing(self, posttrans: Callable):
         def _run_postprocessing(engine: Engine) -> None:
             if not isinstance(engine.state.batch, list) or not isinstance(engine.state.output, list):
                 engine.state.batch, engine.state.output = engine_apply_transform(
-                    batch=engine.state.batch,
-                    output=engine.state.output,
-                    transform=posttrans,
+                    batch=engine.state.batch, output=engine.state.output, transform=posttrans
                 )
             else:
                 for i, (b, o) in enumerate(zip(engine.state.batch, engine.state.output)):
@@ -226,12 +226,13 @@ def _register_metrics(self, k_metric: Dict, add_metrics: Optional[Dict] = None):
 
         @self.on(Events.EPOCH_COMPLETED)
         def _compare_metrics(engine: Engine) -> None:
-            if engine.state.key_metric_name is not None:
-                current_val_metric = engine.state.metrics[engine.state.key_metric_name]
-                if self.metric_cmp_fn(current_val_metric, engine.state.best_metric):
-                    self.logger.info(f"Got new best metric of {engine.state.key_metric_name}: {current_val_metric}")
-                    engine.state.best_metric = current_val_metric
-                    engine.state.best_metric_epoch = engine.state.epoch
+            key_metric_name = engine.state.key_metric_name  # type: ignore
+            if key_metric_name is not None:
+                current_val_metric = engine.state.metrics[key_metric_name]
+                if self.metric_cmp_fn(current_val_metric, engine.state.best_metric):  # type: ignore
+                    self.logger.info(f"Got new best metric of {key_metric_name}: {current_val_metric}")
+                    engine.state.best_metric = current_val_metric  # type: ignore
+                    engine.state.best_metric_epoch = engine.state.epoch  # type: ignore
 
     def _register_handlers(self, handlers: Sequence):
         """
diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index c9eecc6d46..520af0a94c 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -22,6 +22,7 @@
 from .mean_dice import MeanDice
 from .metric_logger import MetricLogger, MetricLoggerKeys
 from .metrics_saver import MetricsSaver
+from .mlflow_handler import MLFlowHandler
 from .nvtx_handlers import MarkHandler, RangeHandler, RangePopHandler, RangePushHandler
 from .parameter_scheduler import ParamSchedulerHandler
 from .postprocessing import PostProcessing
@@ -32,13 +33,5 @@
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardHandler, TensorBoardImageHandler, TensorBoardStatsHandler
-from .transform_inverter import TransformInverter
-from .utils import (
-    evenly_divisible_all_gather,
-    from_engine,
-    stopping_fn_from_loss,
-    stopping_fn_from_metric,
-    string_list_all_gather,
-    write_metrics_reports,
-)
+from .utils import from_engine, stopping_fn_from_loss, stopping_fn_from_metric, write_metrics_reports
 from .validation_handler import ValidationHandler
diff --git a/monai/handlers/checkpoint_loader.py b/monai/handlers/checkpoint_loader.py
index f1f60abf63..7c30584b13 100644
--- a/monai/handlers/checkpoint_loader.py
+++ b/monai/handlers/checkpoint_loader.py
@@ -126,7 +126,7 @@ def __call__(self, engine: Engine) -> None:
         # save current max epochs setting in the engine, don't overwrite it if larger than max_epochs in checkpoint
         prior_max_epochs = engine.state.max_epochs
         Checkpoint.load_objects(to_load=self.load_dict, checkpoint=checkpoint, strict=self.strict)
-        if engine.state.epoch > prior_max_epochs:
+        if prior_max_epochs is not None and engine.state.epoch > prior_max_epochs:
             raise ValueError(
                 f"Epoch count ({engine.state.epoch}) in checkpoint is larger than "
                 f"the `engine.state.max_epochs` ({prior_max_epochs}) of engine. To further train from checkpoint, "
diff --git a/monai/handlers/checkpoint_saver.py b/monai/handlers/checkpoint_saver.py
index f365ff73c4..d5aadadfed 100644
--- a/monai/handlers/checkpoint_saver.py
+++ b/monai/handlers/checkpoint_saver.py
@@ -11,7 +11,7 @@
 
 import logging
 import warnings
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict, Mapping, Optional
 
 from monai.config import IgniteInfo
 from monai.utils import min_version, optional_import
@@ -126,7 +126,7 @@ def __init__(self, dirname: str, filename: Optional[str] = None):
                 super().__init__(dirname=dirname, require_empty=False, atomic=False)
                 self.filename = filename
 
-            def __call__(self, checkpoint: Dict, filename: str, metadata: Optional[Dict] = None) -> None:
+            def __call__(self, checkpoint: Mapping, filename: str, metadata: Optional[Mapping] = None) -> None:
                 if self.filename is not None:
                     filename = self.filename
                 super().__call__(checkpoint=checkpoint, filename=filename, metadata=metadata)
@@ -154,8 +154,8 @@ def _final_func(engine: Engine):
             def _score_func(engine: Engine):
                 if isinstance(key_metric_name, str):
                     metric_name = key_metric_name
-                elif hasattr(engine.state, "key_metric_name") and isinstance(engine.state.key_metric_name, str):
-                    metric_name = engine.state.key_metric_name
+                elif hasattr(engine.state, "key_metric_name"):
+                    metric_name = engine.state.key_metric_name  # type: ignore
                 else:
                     raise ValueError(
                         f"Incompatible values: save_key_metric=True and key_metric_name={key_metric_name}."
diff --git a/monai/handlers/classification_saver.py b/monai/handlers/classification_saver.py
index 815be87754..4481ae0fec 100644
--- a/monai/handlers/classification_saver.py
+++ b/monai/handlers/classification_saver.py
@@ -55,9 +55,15 @@ def __init__(
             batch_transform: a callable that is used to extract the `meta_data` dictionary of
                 the input images from `ignite.engine.state.batch`. the purpose is to get the input
                 filenames from the `meta_data` and store with classification results together.
+                `engine.state` and `batch_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             output_transform: a callable that is used to extract the model prediction data from
                 `ignite.engine.state.output`. the first dimension of its output will be treated as
                 the batch dimension. each item in the batch will be saved individually.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             name: identifier of logging.logger to use, defaulting to `engine.logger`.
             save_rank: only the handler on specified rank will save to CSV file in multi-gpus validation,
                 default to 0.
diff --git a/monai/handlers/confusion_matrix.py b/monai/handlers/confusion_matrix.py
index 368aacc6cb..24adeb879c 100644
--- a/monai/handlers/confusion_matrix.py
+++ b/monai/handlers/confusion_matrix.py
@@ -43,8 +43,9 @@ def __init__(
             output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
                 construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
                 lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-                for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-                output_transform can be `lambda x: (x["pred"], x["label"])`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             save_details: whether to save metric computation details per image, for example: TP/TN/FP/FN of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
@@ -58,8 +59,4 @@ def __init__(
             reduction=MetricReduction.MEAN,
         )
         self.metric_name = metric_name
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
diff --git a/monai/handlers/decollate_batch.py b/monai/handlers/decollate_batch.py
index 4e99fc6f04..0905ee6ebc 100644
--- a/monai/handlers/decollate_batch.py
+++ b/monai/handlers/decollate_batch.py
@@ -88,7 +88,7 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        if self.batch_transform is not None:
+        if self.batch_transform is not None and isinstance(engine.state.batch, (list, dict)):
             engine.state.batch = self.batch_transform(engine.state.batch)
-        if self.output_transform is not None:
+        if self.output_transform is not None and isinstance(engine.state.output, (list, dict)):
             engine.state.output = self.output_transform(engine.state.output)
diff --git a/monai/handlers/garbage_collector.py b/monai/handlers/garbage_collector.py
index fffca2a740..1eb970e795 100644
--- a/monai/handlers/garbage_collector.py
+++ b/monai/handlers/garbage_collector.py
@@ -42,6 +42,7 @@ class GarbageCollector:
     """
 
     def __init__(self, trigger_event: str = "epoch", log_level: int = 10):
+        self.trigger_event: Events
         if isinstance(trigger_event, Events):
             self.trigger_event = trigger_event
         elif trigger_event.lower() == "epoch":
diff --git a/monai/handlers/hausdorff_distance.py b/monai/handlers/hausdorff_distance.py
index a25ef04383..321e840353 100644
--- a/monai/handlers/hausdorff_distance.py
+++ b/monai/handlers/hausdorff_distance.py
@@ -44,8 +44,9 @@ def __init__(
             output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
                 construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
                 lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-                for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-                output_transform can be `lambda x: (x["pred"], x["label"])`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             save_details: whether to save metric computation details per image, for example: hausdorff distance
                 of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
@@ -57,8 +58,4 @@ def __init__(
             directed=directed,
             reduction=MetricReduction.MEAN,
         )
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
diff --git a/monai/handlers/ignite_metric.py b/monai/handlers/ignite_metric.py
index cbf84e4626..ec99e83752 100644
--- a/monai/handlers/ignite_metric.py
+++ b/monai/handlers/ignite_metric.py
@@ -41,18 +41,16 @@ class IgniteMetric(Metric):  # type: ignore[valid-type, misc] # due to optional_
         output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
             construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
             lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-            for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-            output_transform can be `lambda x: (x["pred"], x["label"])`.
+            `engine.state` and `output_transform` inherit from the ignite concept:
+            https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+            https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
         save_details: whether to save metric computation details per image, for example: mean_dice of every image.
             default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
     """
 
     def __init__(
-        self,
-        metric_fn: CumulativeIterationMetric,
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
+        self, metric_fn: CumulativeIterationMetric, output_transform: Callable = lambda x: x, save_details: bool = True
     ) -> None:
         self._is_reduced: bool = False
         self.metric_fn = metric_fn
@@ -101,7 +99,7 @@ def compute(self) -> Any:
         if self.save_details:
             if self._engine is None or self._name is None:
                 raise RuntimeError("please call the attach() function to connect expected engine first.")
-            self._engine.state.metric_details[self._name] = self.metric_fn.get_buffer()
+            self._engine.state.metric_details[self._name] = self.metric_fn.get_buffer()  # type: ignore
 
         return result.item() if isinstance(result, torch.Tensor) else result
 
@@ -120,4 +118,4 @@ def attach(self, engine: Engine, name: str) -> None:
         self._engine = engine
         self._name = name
         if self.save_details and not hasattr(engine.state, "metric_details"):
-            engine.state.metric_details = {}
+            engine.state.metric_details = {}  # type: ignore
diff --git a/monai/handlers/mean_dice.py b/monai/handlers/mean_dice.py
index ba5805fc19..6c270caa4c 100644
--- a/monai/handlers/mean_dice.py
+++ b/monai/handlers/mean_dice.py
@@ -22,10 +22,7 @@ class MeanDice(IgniteMetric):
     """
 
     def __init__(
-        self,
-        include_background: bool = True,
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
+        self, include_background: bool = True, output_transform: Callable = lambda x: x, save_details: bool = True
     ) -> None:
         """
 
@@ -35,8 +32,9 @@ def __init__(
             output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
                 construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
                 lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-                for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-                output_transform can be `lambda x: (x["pred"], x["label"])`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             save_details: whether to save metric computation details per image, for example: mean dice of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
@@ -44,8 +42,4 @@ def __init__(
             :py:meth:`monai.metrics.meandice.compute_meandice`
         """
         metric_fn = DiceMetric(include_background=include_background, reduction=MetricReduction.MEAN)
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
diff --git a/monai/handlers/metric_logger.py b/monai/handlers/metric_logger.py
index 64553955b7..048f230d1a 100644
--- a/monai/handlers/metric_logger.py
+++ b/monai/handlers/metric_logger.py
@@ -57,6 +57,9 @@ class MetricLogger:
 
     Args:
         loss_transform: Converts the `output` value from the trainer's state into a loss value
+            `engine.state` and `loss_transform` inherit from the ignite concept:
+            https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+            https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
         metric_transform: Converts the metric value coming from the trainer/evaluator's state into a storable value
         evaluator: Optional evaluator to consume metric results from at the end of its evaluation run
     """
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 97b080b244..d6aa0c7b9f 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -50,6 +50,9 @@ class MetricsSaver:
         batch_transform: a callable that is used to extract the `meta_data` dictionary of
             the input images from `ignite.engine.state.batch` if saving metric details. the purpose is to get the
             input filenames from the `meta_data` and store with metric details together.
+            `engine.state` and `batch_transform` inherit from the ignite concept:
+            https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+            https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
         summary_ops: expected computation operations to generate the summary report.
             it can be: None, "*" or list of strings, default to None.
             None - don't generate summary report for every expected metric_details.
@@ -132,10 +135,12 @@ def __call__(self, engine: Engine) -> None:
             if self.metrics is not None and len(engine.state.metrics) > 0:
                 _metrics = {k: v for k, v in engine.state.metrics.items() if k in self.metrics or "*" in self.metrics}
             _metric_details = {}
-            if self.metric_details is not None and len(engine.state.metric_details) > 0:
-                for k, v in engine.state.metric_details.items():
-                    if k in self.metric_details or "*" in self.metric_details:
-                        _metric_details[k] = v
+            if hasattr(engine.state, "metric_details"):
+                details = engine.state.metric_details  # type: ignore
+                if self.metric_details is not None and len(details) > 0:
+                    for k, v in details.items():
+                        if k in self.metric_details or "*" in self.metric_details:
+                            _metric_details[k] = v
 
             write_metrics_reports(
                 save_dir=self.save_dir,
diff --git a/monai/handlers/mlflow_handler.py b/monai/handlers/mlflow_handler.py
new file mode 100644
index 0000000000..7bf6596437
--- /dev/null
+++ b/monai/handlers/mlflow_handler.py
@@ -0,0 +1,193 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
+
+import torch
+
+from monai.config import IgniteInfo
+from monai.utils import min_version, optional_import
+
+Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
+mlflow, _ = optional_import("mlflow")
+
+if TYPE_CHECKING:
+    from ignite.engine import Engine
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+
+DEFAULT_TAG = "Loss"
+
+
+class MLFlowHandler:
+    """
+    MLFlowHandler defines a set of Ignite Event-handlers for the MLFlow tracking logics.
+    It can be used for any Ignite Engine(trainer, validator and evaluator).
+    And it can track both epoch level and iteration level logging, then MLFlow can store
+    the data and visualize.
+    The expected data source is Ignite ``engine.state.output`` and ``engine.state.metrics``.
+
+    Default behaviors:
+        - When EPOCH_COMPLETED, track each dictionary item in
+          ``engine.state.metrics`` in MLFlow.
+        - When ITERATION_COMPLETED, track expected item in
+          ``self.output_transform(engine.state.output)`` in MLFlow, default to `Loss`.
+
+    Usage example is available in the tutorial:
+    https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_ignite.ipynb.
+
+    Args:
+        tracking_uri: connects to a tracking URI. can also set the `MLFLOW_TRACKING_URI` environment
+            variable to have MLflow find a URI from there. in both cases, the URI can either be
+            a HTTP/HTTPS URI for a remote server, a database connection string, or a local path
+            to log data to a directory. The URI defaults to path `mlruns`.
+            for more details: https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.set_tracking_uri.
+        epoch_logger: customized callable logger for epoch level logging with MLFlow.
+            Must accept parameter "engine", use default logger if None.
+        iteration_logger: customized callable logger for iteration level logging with MLFlow.
+            Must accept parameter "engine", use default logger if None.
+        output_transform: a callable that is used to transform the
+            ``ignite.engine.state.output`` into a scalar to track, or a dictionary of {key: scalar}.
+            By default this value logging happens when every iteration completed.
+            The default behavior is to track loss from output[0] as output is a decollated list
+            and we replicated loss value for every item of the decollated list.
+            `engine.state` and `output_transform` inherit from the ignite concept:
+            https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+            https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
+        global_epoch_transform: a callable that is used to customize global epoch number.
+            For example, in evaluation, the evaluator engine might want to track synced epoch number
+            with the trainer engine.
+        state_attributes: expected attributes from `engine.state`, if provided, will extract them
+            when epoch completed.
+        tag_name: when iteration output is a scalar, `tag_name` is used to track, defaults to `'Loss'`.
+
+    For more details of MLFlow usage, please refer to: https://mlflow.org/docs/latest/index.html.
+
+    """
+
+    def __init__(
+        self,
+        tracking_uri: Optional[str] = None,
+        epoch_logger: Optional[Callable[[Engine], Any]] = None,
+        iteration_logger: Optional[Callable[[Engine], Any]] = None,
+        output_transform: Callable = lambda x: x[0],
+        global_epoch_transform: Callable = lambda x: x,
+        state_attributes: Optional[Sequence[str]] = None,
+        tag_name: str = DEFAULT_TAG,
+    ) -> None:
+        if tracking_uri is not None:
+            mlflow.set_tracking_uri(tracking_uri)
+
+        self.epoch_logger = epoch_logger
+        self.iteration_logger = iteration_logger
+        self.output_transform = output_transform
+        self.global_epoch_transform = global_epoch_transform
+        self.state_attributes = state_attributes
+        self.tag_name = tag_name
+
+    def attach(self, engine: Engine) -> None:
+        """
+        Register a set of Ignite Event-Handlers to a specified Ignite engine.
+
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+
+        """
+        if not engine.has_event_handler(self.start, Events.STARTED):
+            engine.add_event_handler(Events.STARTED, self.start)
+        if not engine.has_event_handler(self.iteration_completed, Events.ITERATION_COMPLETED):
+            engine.add_event_handler(Events.ITERATION_COMPLETED, self.iteration_completed)
+        if not engine.has_event_handler(self.epoch_completed, Events.EPOCH_COMPLETED):
+            engine.add_event_handler(Events.EPOCH_COMPLETED, self.epoch_completed)
+
+    def start(self) -> None:
+        """
+        Check MLFlow status and start if not active.
+
+        """
+        if mlflow.active_run() is None:
+            mlflow.start_run()
+
+    def close(self) -> None:
+        """
+        Stop current running logger of MLFlow.
+
+        """
+        mlflow.end_run()
+
+    def epoch_completed(self, engine: Engine) -> None:
+        """
+        Handler for train or validation/evaluation epoch completed Event.
+        Track epoch level log, default values are from Ignite `engine.state.metrics` dict.
+
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+
+        """
+        if self.epoch_logger is not None:
+            self.epoch_logger(engine)
+        else:
+            self._default_epoch_log(engine)
+
+    def iteration_completed(self, engine: Engine) -> None:
+        """
+        Handler for train or validation/evaluation iteration completed Event.
+        Track iteration level log.
+
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+
+        """
+        if self.iteration_logger is not None:
+            self.iteration_logger(engine)
+        else:
+            self._default_iteration_log(engine)
+
+    def _default_epoch_log(self, engine: Engine) -> None:
+        """
+        Execute epoch level log operation.
+        Default to track the values from Ignite `engine.state.metrics` dict and
+        track the values of specified attributes of `engine.state`.
+
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+
+        """
+        log_dict = engine.state.metrics
+        if not log_dict:
+            return
+
+        current_epoch = self.global_epoch_transform(engine.state.epoch)
+        mlflow.log_metrics(log_dict, step=current_epoch)
+
+        if self.state_attributes is not None:
+            attrs = {attr: getattr(engine.state, attr, None) for attr in self.state_attributes}
+            mlflow.log_metrics(attrs, step=current_epoch)
+
+    def _default_iteration_log(self, engine: Engine) -> None:
+        """
+        Execute iteration log operation based on Ignite `engine.state.output` data.
+        Log the values from `self.output_transform(engine.state.output)`.
+        Since `engine.state.output` is a decollated list and we replicated the loss value for every item
+        of the decollated list, the default behavior is to track the loss from `output[0]`.
+
+        Args:
+            engine: Ignite Engine, it can be a trainer, validator or evaluator.
+
+        """
+        loss = self.output_transform(engine.state.output)
+        if loss is None:
+            return
+
+        if not isinstance(loss, dict):
+            loss = {self.tag_name: loss.item() if isinstance(loss, torch.Tensor) else loss}
+
+        mlflow.log_metrics(loss, step=engine.state.iteration)
diff --git a/monai/handlers/nvtx_handlers.py b/monai/handlers/nvtx_handlers.py
index aba7a7ec0e..37bacc7f95 100644
--- a/monai/handlers/nvtx_handlers.py
+++ b/monai/handlers/nvtx_handlers.py
@@ -50,9 +50,7 @@ class RangeHandler:
     """
 
     def __init__(
-        self,
-        events: Union[str, Tuple[Union[str, Events], Union[str, Events]]],
-        msg: Optional[str] = None,
+        self, events: Union[str, Tuple[Union[str, Events], Union[str, Events]]], msg: Optional[str] = None
     ) -> None:
         self.events = self.resolve_events(events)
         if msg is None:
@@ -73,10 +71,7 @@ def resolve_events(self, events: Union[str, Tuple]) -> Tuple[Events, Events]:
         if len(events) == 1:
             return self.create_paired_events(events[0])
         if len(events) == 2:
-            return (
-                self.get_event(events[0]),
-                self.get_event(events[1]),
-            )
+            return (self.get_event(events[0]), self.get_event(events[1]))
         raise ValueError(f"Exactly two Ignite events should be provided [received {len(events)}].")
 
     def create_paired_events(self, event: str) -> Tuple[Events, Events]:
@@ -84,22 +79,11 @@ def create_paired_events(self, event: str) -> Tuple[Events, Events]:
         Create pair of Ignite events from a event prefix name
         """
         event = event.upper()
-        event_prefix = {
-            "": "",
-            "ENGINE": "",
-            "EPOCH": "EPOCH_",
-            "ITERATION": "ITERATION_",
-            "BATCH": "GET_BATCH_",
-        }
-        return (
-            self.get_event(event_prefix[event] + "STARTED"),
-            self.get_event(event_prefix[event] + "COMPLETED"),
-        )
+        event_prefix = {"": "", "ENGINE": "", "EPOCH": "EPOCH_", "ITERATION": "ITERATION_", "BATCH": "GET_BATCH_"}
+        return (self.get_event(event_prefix[event] + "STARTED"), self.get_event(event_prefix[event] + "COMPLETED"))
 
     def get_event(self, event: Union[str, Events]) -> Events:
-        if isinstance(event, str):
-            event = event.upper()
-        return Events[event]
+        return Events[event.upper()] if isinstance(event, str) else event
 
     def attach(self, engine: Engine) -> None:
         """
@@ -126,10 +110,8 @@ class RangePushHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events, msg: Optional[str] = None) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events], msg: Optional[str] = None) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
         if msg is None:
             msg = self.event.name
         self.msg = msg
@@ -156,10 +138,8 @@ class RangePopHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events]) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
 
     def attach(self, engine: Engine) -> None:
         """
@@ -181,10 +161,8 @@ class MarkHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events, msg: Optional[str] = None) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events], msg: Optional[str] = None) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
         if msg is None:
             msg = self.event.name
         self.msg = msg
diff --git a/monai/handlers/postprocessing.py b/monai/handlers/postprocessing.py
index 05c6bd414d..29029306d2 100644
--- a/monai/handlers/postprocessing.py
+++ b/monai/handlers/postprocessing.py
@@ -63,9 +63,7 @@ def __call__(self, engine: Engine) -> None:
         """
         if not isinstance(engine.state.batch, list) or not isinstance(engine.state.output, list):
             engine.state.batch, engine.state.output = engine_apply_transform(
-                batch=engine.state.batch,
-                output=engine.state.output,
-                transform=self.transform,
+                batch=engine.state.batch, output=engine.state.output, transform=self.transform
             )
         else:
             for i, (b, o) in enumerate(zip(engine.state.batch, engine.state.output)):
diff --git a/monai/handlers/regression_metrics.py b/monai/handlers/regression_metrics.py
index f203439f40..9758d86bae 100644
--- a/monai/handlers/regression_metrics.py
+++ b/monai/handlers/regression_metrics.py
@@ -21,19 +21,16 @@ class MeanSquaredError(IgniteMetric):
     Computes Mean Squared Error from full size Tensor and collects average over batch, iterations.
     """
 
-    def __init__(
-        self,
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
-    ) -> None:
+    def __init__(self, output_transform: Callable = lambda x: x, save_details: bool = True) -> None:
         """
 
         Args:
             output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
                 construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
                 lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-                for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-                output_transform can be `lambda x: (x["pred"], x["label"])`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             save_details: whether to save metric computation details per image, for example: mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
@@ -41,11 +38,7 @@ def __init__(
             :py:class:`monai.metrics.MSEMetric`
         """
         metric_fn = MSEMetric(reduction=MetricReduction.MEAN)
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
 
 
 class MeanAbsoluteError(IgniteMetric):
@@ -53,27 +46,24 @@ class MeanAbsoluteError(IgniteMetric):
     Computes Mean Absolute Error from full size Tensor and collects average over batch, iterations.
     """
 
-    def __init__(
-        self,
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
-    ) -> None:
+    def __init__(self, output_transform: Callable = lambda x: x, save_details: bool = True) -> None:
         """
 
         Args:
-            output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
-            save_details: whether to save metric computation details per image, for example: mean absolute error of every image.
+            output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
+                construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
+                lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
+            save_details: whether to save metric computation details per image, for example: mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:class:`monai.metrics.MAEMetric`
         """
         metric_fn = MAEMetric(reduction=MetricReduction.MEAN)
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
 
 
 class RootMeanSquaredError(IgniteMetric):
@@ -81,27 +71,24 @@ class RootMeanSquaredError(IgniteMetric):
     Computes Root Mean Squared Error from full size Tensor and collects average over batch, iterations.
     """
 
-    def __init__(
-        self,
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
-    ) -> None:
+    def __init__(self, output_transform: Callable = lambda x: x, save_details: bool = True) -> None:
         """
 
         Args:
-            output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
-            save_details: whether to save metric computation details per image, for example: root mean squared error of every image.
+            output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
+                construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
+                lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
+            save_details: whether to save metric computation details per image, for example: mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
         See also:
             :py:class:`monai.metrics.RMSEMetric`
         """
         metric_fn = RMSEMetric(reduction=MetricReduction.MEAN)
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
 
 
 class PeakSignalToNoiseRatio(IgniteMetric):
@@ -110,18 +97,20 @@ class PeakSignalToNoiseRatio(IgniteMetric):
     """
 
     def __init__(
-        self,
-        max_val: Union[int, float],
-        output_transform: Callable = lambda x: x,
-        save_details: bool = True,
+        self, max_val: Union[int, float], output_transform: Callable = lambda x: x, save_details: bool = True
     ) -> None:
         """
 
         Args:
             max_val: The dynamic range of the images/volumes (i.e., the difference between the
                 maximum and the minimum allowed values e.g. 255 for a uint8 image).
-            output_transform: transform the ignite.engine.state.output into [y_pred, y] pair.
-            save_details: whether to save metric computation details per image, for example: PSNR of every image.
+            output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
+                construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
+                lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
+            save_details: whether to save metric computation details per image, for example: mean squared error of every image.
                 default to True, will save to `engine.state.metric_details` dict with the metric name as key.
             reduction: {``"none"``, ``"mean"``, ``"sum"``, ``"mean_batch"``, ``"sum_batch"``,
 
@@ -129,8 +118,4 @@ def __init__(
             :py:class:`monai.metrics.PSNRMetric`
         """
         metric_fn = PSNRMetric(max_val=max_val, reduction=MetricReduction.MEAN)
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
diff --git a/monai/handlers/roc_auc.py b/monai/handlers/roc_auc.py
index 98c8c8f8bc..125a4991ea 100644
--- a/monai/handlers/roc_auc.py
+++ b/monai/handlers/roc_auc.py
@@ -36,8 +36,9 @@ class ROCAUC(IgniteMetric):  # type: ignore[valid-type, misc]  # due to optional
         output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
             construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
             lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-            for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-            output_transform can be `lambda x: (x["pred"], x["label"])`.
+            `engine.state` and `output_transform` inherit from the ignite concept:
+            https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+            https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
 
     Note:
         ROCAUC expects y to be comprised of 0's and 1's.
@@ -45,14 +46,6 @@ class ROCAUC(IgniteMetric):  # type: ignore[valid-type, misc]  # due to optional
 
     """
 
-    def __init__(
-        self,
-        average: Union[Average, str] = Average.MACRO,
-        output_transform: Callable = lambda x: x,
-    ) -> None:
+    def __init__(self, average: Union[Average, str] = Average.MACRO, output_transform: Callable = lambda x: x) -> None:
         metric_fn = ROCAUCMetric(average=Average(average))
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=False,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=False)
diff --git a/monai/handlers/segmentation_saver.py b/monai/handlers/segmentation_saver.py
index 535f58945b..479cc1408a 100644
--- a/monai/handlers/segmentation_saver.py
+++ b/monai/handlers/segmentation_saver.py
@@ -26,7 +26,7 @@
     Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `SaveImage[d]` transform instead.")
+@deprecated(since="0.6.0", removed="0.8.0", msg_suffix="Please consider using `SaveImage[d]` transform instead.")
 class SegmentationSaver:
     """
     Event handler triggered on completing every iteration to save the segmentation predictions into files.
@@ -113,9 +113,15 @@ def __init__(
             batch_transform: a callable that is used to extract the `meta_data` dictionary of the input images
                 from `ignite.engine.state.batch`. the purpose is to extract necessary information from the meta data:
                 filename, affine, original_shape, etc.
+                `engine.state` and `batch_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             output_transform: a callable that is used to extract the model prediction data from
                 `ignite.engine.state.output`. the first dimension of its output will be treated as the batch dimension.
                 each item in the batch will be saved individually.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             name: identifier of logging.logger to use, defaulting to `engine.logger`.
 
         """
diff --git a/monai/handlers/stats_handler.py b/monai/handlers/stats_handler.py
index d5756074fc..7c88634820 100644
--- a/monai/handlers/stats_handler.py
+++ b/monai/handlers/stats_handler.py
@@ -11,7 +11,7 @@
 
 import logging
 import warnings
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
 
 import torch
 
@@ -31,7 +31,7 @@
 class StatsHandler:
     """
     StatsHandler defines a set of Ignite Event-handlers for all the log printing logics.
-    It's can be used for any Ignite Engine(trainer, validator and evaluator).
+    It can be used for any Ignite Engine(trainer, validator and evaluator).
     And it can support logging for epoch level and iteration level with pre-defined loggers.
 
     Default behaviors:
@@ -39,6 +39,9 @@ class StatsHandler:
         - When ITERATION_COMPLETED, logs
           ``self.output_transform(engine.state.output)`` using ``self.logger``.
 
+    Usage example is available in the tutorial:
+    https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_ignite.ipynb.
+
     """
 
     def __init__(
@@ -47,6 +50,7 @@ def __init__(
         iteration_print_logger: Optional[Callable[[Engine], Any]] = None,
         output_transform: Callable = lambda x: x[0],
         global_epoch_transform: Callable = lambda x: x,
+        state_attributes: Optional[Sequence[str]] = None,
         name: Optional[str] = None,
         tag_name: str = DEFAULT_TAG,
         key_var_format: str = DEFAULT_KEY_VAL_FORMAT,
@@ -65,9 +69,14 @@ def __init__(
                 By default this value logging happens when every iteration completed.
                 The default behavior is to print loss from output[0] as output is a decollated list
                 and we replicated loss value for every item of the decollated list.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             global_epoch_transform: a callable that is used to customize global epoch number.
                 For example, in evaluation, the evaluator engine might want to print synced epoch number
                 with the trainer engine.
+            state_attributes: expected attributes from `engine.state`, if provided, will extract them
+                when epoch completed.
             name: identifier of logging.logger to use, defaulting to ``engine.logger``.
             tag_name: when iteration output is a scalar, tag_name is used to print
                 tag_name: scalar_value to logger. Defaults to ``'Loss'``.
@@ -80,6 +89,7 @@ def __init__(
         self.iteration_print_logger = iteration_print_logger
         self.output_transform = output_transform
         self.global_epoch_transform = global_epoch_transform
+        self.state_attributes = state_attributes
         self.logger = logging.getLogger(name)
         self._name = name
 
@@ -108,7 +118,7 @@ def attach(self, engine: Engine) -> None:
     def epoch_completed(self, engine: Engine) -> None:
         """
         Handler for train or validation/evaluation epoch completed Event.
-        Print epoch level log, default values are from Ignite state.metrics dict.
+        Print epoch level log, default values are from Ignite `engine.state.metrics` dict.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -122,7 +132,7 @@ def epoch_completed(self, engine: Engine) -> None:
     def iteration_completed(self, engine: Engine) -> None:
         """
         Handler for train or validation/evaluation iteration completed Event.
-        Print iteration level log, default values are from Ignite state.logs dict.
+        Print iteration level log, default values are from Ignite `engine.state.output`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -149,39 +159,46 @@ def exception_raised(self, engine: Engine, e: Exception) -> None:
 
     def _default_epoch_print(self, engine: Engine) -> None:
         """
-        Execute epoch level log operation based on Ignite engine.state data.
-        print the values from Ignite state.metrics dict.
+        Execute epoch level log operation.
+        Default to print the values from Ignite `engine.state.metrics` dict and
+        print the values of specified attributes of `engine.state`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
 
         """
-        prints_dict = engine.state.metrics
-        if not prints_dict:
-            return
         current_epoch = self.global_epoch_transform(engine.state.epoch)
 
-        out_str = f"Epoch[{current_epoch}] Metrics -- "
-        for name in sorted(prints_dict):
-            value = prints_dict[name]
-            out_str += self.key_var_format.format(name, value)
-        self.logger.info(out_str)
+        prints_dict = engine.state.metrics
+        if prints_dict is not None and len(prints_dict) > 0:
+            out_str = f"Epoch[{current_epoch}] Metrics -- "
+            for name in sorted(prints_dict):
+                value = prints_dict[name]
+                out_str += self.key_var_format.format(name, value)
+            self.logger.info(out_str)
 
         if (
             hasattr(engine.state, "key_metric_name")
             and hasattr(engine.state, "best_metric")
             and hasattr(engine.state, "best_metric_epoch")
         ):
-            out_str = f"Key metric: {engine.state.key_metric_name} "
-            out_str += f"best value: {engine.state.best_metric} at epoch: {engine.state.best_metric_epoch}"
-        self.logger.info(out_str)
+            out_str = f"Key metric: {engine.state.key_metric_name} "  # type: ignore
+            out_str += f"best value: {engine.state.best_metric} "  # type: ignore
+            out_str += f"at epoch: {engine.state.best_metric_epoch}"  # type: ignore
+            self.logger.info(out_str)
+
+        if self.state_attributes is not None and len(self.state_attributes) > 0:
+            out_str = "State values: "
+            for attr in self.state_attributes:
+                out_str += f"{attr}: {getattr(engine.state, attr, None)} "
+            self.logger.info(out_str)
 
     def _default_iteration_print(self, engine: Engine) -> None:
         """
-        Execute iteration log operation based on Ignite engine.state data.
-        Print the values from Ignite state.logs dict.
-        The default behavior is to print loss from output[0] as output is a decollated list and we replicated loss
-        value for every item of the decollated list.
+        Execute iteration log operation based on Ignite `engine.state.output` data.
+        Print the values from `self.output_transform(engine.state.output)`.
+        Since `engine.state.output` is a decollated list and we replicated the loss value for every item
+        of the decollated list, the default behavior is to print the loss from `output[0]`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -220,7 +237,9 @@ def _default_iteration_print(self, engine: Engine) -> None:
             return  # no value to print
 
         num_iterations = engine.state.epoch_length
-        current_iteration = (engine.state.iteration - 1) % num_iterations + 1
+        current_iteration = engine.state.iteration - 1
+        if num_iterations is not None:
+            current_iteration %= num_iterations + 1
         current_epoch = engine.state.epoch
         num_epochs = engine.state.max_epochs
 
diff --git a/monai/handlers/surface_distance.py b/monai/handlers/surface_distance.py
index 4fc5b5a60a..aee1475ae7 100644
--- a/monai/handlers/surface_distance.py
+++ b/monai/handlers/surface_distance.py
@@ -41,8 +41,9 @@ def __init__(
             output_transform: callable to extract `y_pred` and `y` from `ignite.engine.state.output` then
                 construct `(y_pred, y)` pair, where `y_pred` and `y` can be `batch-first` Tensors or
                 lists of `channel-first` Tensors. the form of `(y_pred, y)` is required by the `update()`.
-                for example: if `ignite.engine.state.output` is `{"pred": xxx, "label": xxx, "other": xxx}`,
-                output_transform can be `lambda x: (x["pred"], x["label"])`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             save_details: whether to save metric computation details per image, for example: surface dice
                 of every image. default to True, will save to `engine.state.metric_details` dict with the metric name as key.
 
@@ -53,8 +54,4 @@ def __init__(
             distance_metric=distance_metric,
             reduction=MetricReduction.MEAN,
         )
-        super().__init__(
-            metric_fn=metric_fn,
-            output_transform=output_transform,
-            save_details=save_details,
-        )
+        super().__init__(metric_fn=metric_fn, output_transform=output_transform, save_details=save_details)
diff --git a/monai/handlers/tensorboard_handlers.py b/monai/handlers/tensorboard_handlers.py
index a3a0bf76b8..42bed14c83 100644
--- a/monai/handlers/tensorboard_handlers.py
+++ b/monai/handlers/tensorboard_handlers.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence
 
 import numpy as np
 import torch
@@ -64,7 +64,7 @@ def close(self):
 class TensorBoardStatsHandler(TensorBoardHandler):
     """
     TensorBoardStatsHandler defines a set of Ignite Event-handlers for all the TensorBoard logics.
-    It's can be used for any Ignite Engine(trainer, validator and evaluator).
+    It can be used for any Ignite Engine(trainer, validator and evaluator).
     And it can support both epoch level and iteration level with pre-defined TensorBoard event writer.
     The expected data source is Ignite ``engine.state.output`` and ``engine.state.metrics``.
 
@@ -73,6 +73,10 @@ class TensorBoardStatsHandler(TensorBoardHandler):
           ``engine.state.metrics`` to TensorBoard.
         - When ITERATION_COMPLETED, write each dictionary item in
           ``self.output_transform(engine.state.output)`` to TensorBoard.
+
+    Usage example is available in the tutorial:
+    https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_ignite.ipynb.
+
     """
 
     def __init__(
@@ -85,6 +89,7 @@ def __init__(
         iteration_interval: int = 1,
         output_transform: Callable = lambda x: x[0],
         global_epoch_transform: Callable = lambda x: x,
+        state_attributes: Optional[Sequence[str]] = None,
         tag_name: str = DEFAULT_TAG,
     ) -> None:
         """
@@ -104,9 +109,14 @@ def __init__(
                 By default this value plotting happens when every iteration completed.
                 The default behavior is to print loss from output[0] as output is a decollated list
                 and we replicated loss value for every item of the decollated list.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             global_epoch_transform: a callable that is used to customize global epoch number.
                 For example, in evaluation, the evaluator engine might want to use trainer engines epoch number
                 when plotting epoch vs metric curves.
+            state_attributes: expected attributes from `engine.state`, if provided, will extract them
+                when epoch completed.
             tag_name: when iteration output is a scalar, tag_name is used to plot, defaults to ``'Loss'``.
         """
         super().__init__(summary_writer=summary_writer, log_dir=log_dir)
@@ -116,6 +126,7 @@ def __init__(
         self.iteration_interval = iteration_interval
         self.output_transform = output_transform
         self.global_epoch_transform = global_epoch_transform
+        self.state_attributes = state_attributes
         self.tag_name = tag_name
 
     def attach(self, engine: Engine) -> None:
@@ -136,7 +147,7 @@ def attach(self, engine: Engine) -> None:
     def epoch_completed(self, engine: Engine) -> None:
         """
         Handler for train or validation/evaluation epoch completed Event.
-        Write epoch level events, default values are from Ignite state.metrics dict.
+        Write epoch level events, default values are from Ignite `engine.state.metrics` dict.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -150,7 +161,7 @@ def epoch_completed(self, engine: Engine) -> None:
     def iteration_completed(self, engine: Engine) -> None:
         """
         Handler for train or validation/evaluation iteration completed Event.
-        Write iteration level events, default values are from Ignite state.logs dict.
+        Write iteration level events, default values are from Ignite `engine.state.output`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -163,8 +174,9 @@ def iteration_completed(self, engine: Engine) -> None:
 
     def _default_epoch_writer(self, engine: Engine, writer: SummaryWriter) -> None:
         """
-        Execute epoch level event write operation based on Ignite engine.state data.
-        Default is to write the values from Ignite state.metrics dict.
+        Execute epoch level event write operation.
+        Default to write the values from Ignite `engine.state.metrics` dict and
+        write the values of specified attributes of `engine.state`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -175,13 +187,18 @@ def _default_epoch_writer(self, engine: Engine, writer: SummaryWriter) -> None:
         summary_dict = engine.state.metrics
         for name, value in summary_dict.items():
             writer.add_scalar(name, value, current_epoch)
+
+        if self.state_attributes is not None:
+            for attr in self.state_attributes:
+                writer.add_scalar(attr, getattr(engine.state, attr, None), current_epoch)
         writer.flush()
 
     def _default_iteration_writer(self, engine: Engine, writer: SummaryWriter) -> None:
         """
-        Execute iteration level event write operation based on Ignite engine.state data.
-        The default behavior is to print loss from output[0] as output is a decollated list and we replicated loss
-        value for every item of the decollated list.
+        Execute iteration level event write operation based on Ignite `engine.state.output` data.
+        Extract the values from `self.output_transform(engine.state.output)`.
+        Since `engine.state.output` is a decollated list and we replicated the loss value for every item
+        of the decollated list, the default behavior is to track the loss from `output[0]`.
 
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
@@ -239,6 +256,9 @@ class TensorBoardImageHandler(TensorBoardHandler):
         - Expects ``output_transform(engine.state.output)`` to return a torch
           tensor in format (y_pred[N, channel, ...], loss).
 
+    Usage example is available in the tutorial:
+    https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/unet_segmentation_3d_ignite.ipynb.
+
     """
 
     def __init__(
@@ -266,8 +286,14 @@ def __init__(
                 then construct `(image, label)` pair. for example: if `ignite.engine.state.batch` is `{"image": xxx,
                 "label": xxx, "other": xxx}`, `batch_transform` can be `lambda x: (x["image"], x["label"])`.
                 will use the result to plot image from `result[0][index]` and plot label from `result[1][index]`.
+                `engine.state` and `batch_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             output_transform: a callable that is used to extract the `predictions` data from
                 `ignite.engine.state.output`, will use the result to plot output from `result[index]`.
+                `engine.state` and `output_transform` inherit from the ignite concept:
+                https://pytorch.org/ignite/concepts.html#state, explanation and usage example are in the tutorial:
+                https://github.com/Project-MONAI/tutorials/blob/master/modules/batch_output_transform.ipynb.
             global_iter_transform: a callable that is used to customize global step number for TensorBoard.
                 For example, in evaluation, the evaluator engine needs to know current epoch from trainer.
             index: plot which element in a data batch, default is the first element.
diff --git a/monai/handlers/transform_inverter.py b/monai/handlers/transform_inverter.py
deleted file mode 100644
index 83b5f56396..0000000000
--- a/monai/handlers/transform_inverter.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2020 - 2021 MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import warnings
-from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union
-
-import torch
-
-from monai.config import IgniteInfo, KeysCollection
-from monai.engines.utils import CommonKeys, IterationEvents
-from monai.transforms import Invertd, InvertibleTransform
-from monai.utils import deprecated, ensure_tuple, ensure_tuple_rep, min_version, optional_import
-
-Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
-if TYPE_CHECKING:
-    from ignite.engine import Engine
-else:
-    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
-
-
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `Invertd` transform instead.")
-class TransformInverter:
-    """
-    Ignite handler to automatically invert `transforms`.
-    It takes `engine.state.output` as the input data and uses the transforms information from `engine.state.batch`.
-    Expect both `engine.state.output` and `engine.state.batch` to be list of dictionaries data.
-    The inverted data is in-place saved back to `engine.state.output` with key: "{output_key}".
-    And the inverted meta dict will be stored in `engine.state.batch`
-    with key: "{meta_keys}" or "{key}_{meta_key_postfix}".
-
-    .. deprecated:: 0.6.0
-        Use :class:`monai.transforms.Invertd` instead.
-
-    """
-
-    def __init__(
-        self,
-        transform: InvertibleTransform,
-        output_keys: KeysCollection = CommonKeys.PRED,
-        batch_keys: KeysCollection = CommonKeys.IMAGE,
-        meta_keys: Optional[KeysCollection] = None,
-        batch_meta_keys: Optional[KeysCollection] = None,
-        meta_key_postfix: str = "meta_dict",
-        nearest_interp: Union[bool, Sequence[bool]] = True,
-        to_tensor: Union[bool, Sequence[bool]] = True,
-        device: Union[Union[str, torch.device], Sequence[Union[str, torch.device]]] = "cpu",
-        post_func: Union[Callable, Sequence[Callable]] = lambda x: x,
-        num_workers: Optional[int] = 0,
-    ) -> None:
-        """
-        Args:
-            transform: a callable data transform on input data.
-            output_keys: the key of expected data in `ignite.engine.output`, invert transforms on it.
-                it also can be a list of keys, will invert transform for each of them.
-                Default to "pred". it's in-place operation.
-            batch_keys: the key of input data in `ignite.engine.batch`. will get the applied transforms
-                for this input data, then invert them for the expected data with `output_keys`.
-                It can also be a list of keys, each matches to the `output_keys` data. default to "image".
-            meta_keys: explicitly indicate the key for the inverted meta data dictionary.
-                the meta data is a dictionary object which contains: filename, original_shape, etc.
-                it can be a sequence of string, map to the `keys`.
-                if None, will try to construct meta_keys by `{key}_{meta_key_postfix}`.
-            batch_meta_keys: the key of the meta data of input data in `ignite.engine.batch`,
-                will get the `affine`, `data_shape`, etc.
-                the meta data is a dictionary object which contains: filename, original_shape, etc.
-                it can be a sequence of string, map to the `keys`.
-                if None, will try to construct meta_keys by `{orig_key}_{meta_key_postfix}`.
-                meta data will also be inverted and stored in `meta_keys`.
-            meta_key_postfix: if `orig_meta_keys` is None, use `{orig_key}_{meta_key_postfix}` to to fetch the
-                meta data from dict, if `meta_keys` is None, use `{key}_{meta_key_postfix}`.
-                default is `meta_dict`, the meta data is a dictionary object.
-                For example, to handle orig_key `image`,  read/write `affine` matrices from the
-                metadata `image_meta_dict` dictionary's `affine` field.
-                the inverted meta dict will be stored with key: "{key}_{meta_key_postfix}".
-            nearest_interp: whether to use `nearest` interpolation mode when inverting the spatial transforms,
-                default to `True`. If `False`, use the same interpolation mode as the original transform.
-                it also can be a list of bool, each matches to the `output_keys` data.
-            to_tensor: whether to convert the inverted data into PyTorch Tensor first, default to `True`.
-                it also can be a list of bool, each matches to the `output_keys` data.
-            device: if converted to Tensor, move the inverted results to target device before `post_func`,
-                default to "cpu", it also can be a list of string or `torch.device`,
-                each matches to the `output_keys` data.
-            post_func: post processing for the inverted data, should be a callable function.
-                it also can be a list of callable, each matches to the `output_keys` data.
-
-        """
-        self.inverter = Invertd(
-            keys=output_keys,
-            transform=transform,
-            orig_keys=batch_keys,
-            meta_keys=meta_keys,
-            orig_meta_keys=batch_meta_keys,
-            meta_key_postfix=meta_key_postfix,
-            nearest_interp=nearest_interp,
-            to_tensor=to_tensor,
-            device=device,
-            post_func=post_func,
-        )
-        self.output_keys = ensure_tuple(output_keys)
-        self.meta_keys = ensure_tuple_rep(None, len(self.output_keys)) if meta_keys is None else ensure_tuple(meta_keys)
-        if len(self.output_keys) != len(self.meta_keys):
-            raise ValueError("meta_keys should have the same length as output_keys.")
-        self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.output_keys))
-
-    def attach(self, engine: Engine) -> None:
-        """
-        Args:
-            engine: Ignite Engine, it can be a trainer, validator or evaluator.
-        """
-        engine.add_event_handler(IterationEvents.MODEL_COMPLETED, self)
-
-    def __call__(self, engine: Engine) -> None:
-        """
-        Args:
-            engine: Ignite Engine, it can be a trainer, validator or evaluator.
-        """
-        if not isinstance(engine.state.batch, list) or not isinstance(engine.state.output, list):
-            warnings.warn("inverter requires `engine.state.batch` and `engine.state.output` to be lists.")
-        else:
-            for i, (b, o) in enumerate(zip(engine.state.batch, engine.state.output)):
-                # combine `batch` and `output` to temporarily act as 1 dict for postprocessing
-                data = dict(b)
-                data.update(o)
-                ret = self.inverter(data)
-
-                for output_key, meta_key, meta_key_postfix in zip(
-                    self.output_keys, self.meta_keys, self.meta_key_postfix
-                ):
-                    # save the inverted data into state.output
-                    engine.state.output[i][output_key] = ret.get(output_key)
-                    # save the inverted meta dict into state.batch
-                    meta_key = meta_key or f"{output_key}_{meta_key_postfix}"
-                    if meta_key in ret:
-                        # FIXME: we save inverted meta dict into `batch` to be compatible with `SegmentationSaver`
-                        # will deprecate both handlers soon
-                        engine.state.batch[i][meta_key] = ret.get(meta_key)
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 13f23c582a..3567dbac03 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -11,13 +11,13 @@
 
 import os
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Dict, Optional, Sequence, Union
 
 import numpy as np
 import torch
 
 from monai.config import IgniteInfo, KeysCollection
-from monai.utils import deprecated, ensure_tuple, get_torch_version_tuple, look_up_option, min_version, optional_import
+from monai.utils import ensure_tuple, look_up_option, min_version, optional_import
 
 idist, _ = optional_import("ignite", IgniteInfo.OPT_IMPORT_VERSION, min_version, "distributed")
 if TYPE_CHECKING:
@@ -25,14 +25,7 @@
 else:
     Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
 
-__all__ = [
-    "stopping_fn_from_metric",
-    "stopping_fn_from_loss",
-    "evenly_divisible_all_gather",
-    "string_list_all_gather",
-    "write_metrics_reports",
-    "from_engine",
-]
+__all__ = ["stopping_fn_from_metric", "stopping_fn_from_loss", "write_metrics_reports", "from_engine"]
 
 
 def stopping_fn_from_metric(metric_name: str):
@@ -52,83 +45,11 @@ def stopping_fn_from_loss():
     """
 
     def stopping_fn(engine: Engine):
-        return -engine.state.output
+        return -engine.state.output  # type:ignore
 
     return stopping_fn
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="The API had been moved to monai.utils module.")
-def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
-    """
-    Utility function for distributed data parallel to pad at first dim to make it evenly divisible and all_gather.
-
-    Args:
-        data: source tensor to pad and execute all_gather in distributed data parallel.
-
-    Note:
-        The input data on different ranks must have exactly same `dtype`.
-
-    .. versionchanged:: 0.6.0
-        The API had been moved to `monai.utils`.
-
-    """
-    if not isinstance(data, torch.Tensor):
-        raise ValueError("input data must be PyTorch Tensor.")
-
-    if idist.get_world_size() <= 1:
-        return data
-
-    # make sure the data is evenly-divisible on multi-GPUs
-    length = data.shape[0]
-    all_lens = idist.all_gather(length)
-    max_len = max(all_lens)
-    if length < max_len:
-        size = [max_len - length] + list(data.shape[1:])
-        data = torch.cat([data, data.new_full(size, 0)], dim=0)
-    # all gather across all processes
-    data = idist.all_gather(data)
-    # delete the padding NaN items
-    return torch.cat([data[i * max_len : i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
-
-
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="The API had been moved to monai.utils module.")
-def string_list_all_gather(strings: List[str]) -> List[str]:
-    """
-    Utility function for distributed data parallel to all gather a list of strings.
-    Note that if the item in `strings` is longer than 1024 chars, it will be truncated to 1024:
-    https://pytorch.org/ignite/v0.4.5/distributed.html#ignite.distributed.utils.all_gather.
-
-    Args:
-        strings: a list of strings to all gather.
-
-    .. versionchanged:: 0.6.0
-        The API had been moved to `monai.utils`.
-
-    """
-    world_size = idist.get_world_size()
-    if world_size <= 1:
-        return strings
-
-    result: List[List[str]] = [[] for _ in range(world_size)]
-    # get length of strings
-    length = len(strings)
-    all_lens = idist.all_gather(length)
-    max_len = max(all_lens)
-    # pad the item to make sure the same length
-    if length < max_len:
-        strings += ["" for _ in range(max_len - length)]
-
-    if get_torch_version_tuple() <= (1, 6):
-        raise RuntimeError("string all_gather can not be supported in PyTorch < 1.7.0.")
-
-    for s in strings:
-        gathered = idist.all_gather(s)
-        for i, g in enumerate(gathered):
-            if len(g) > 0:
-                result[i].append(g)
-    return [i for k in result for i in k]
-
-
 def write_metrics_reports(
     save_dir: str,
     images: Optional[Sequence[str]],
@@ -204,12 +125,12 @@ class    mean    median    max    5percentile 95percentile  notnans
             if summary_ops is not None:
                 supported_ops = OrderedDict(
                     {
-                        "mean": lambda x: np.nanmean(x),
-                        "median": lambda x: np.nanmedian(x),
-                        "max": lambda x: np.nanmax(x),
-                        "min": lambda x: np.nanmin(x),
+                        "mean": np.nanmean,
+                        "median": np.nanmedian,
+                        "max": np.nanmax,
+                        "min": np.nanmin,
                         "90percentile": lambda x: np.nanpercentile(x[0], x[1]),
-                        "std": lambda x: np.nanstd(x),
+                        "std": np.nanstd,
                         "notnans": lambda x: (~np.isnan(x)).sum(),
                     }
                 )
@@ -223,7 +144,7 @@ def _compute_op(op: str, d: np.ndarray):
                         return c_op(d)
 
                     threshold = int(op.split("percentile")[0])
-                    return supported_ops["90percentile"]((d, threshold))
+                    return supported_ops["90percentile"]((d, threshold))  # type: ignore
 
                 with open(os.path.join(save_dir, f"{k}_summary.csv"), "w") as f:
                     f.write(f"class{deli}{deli.join(ops)}\n")
diff --git a/monai/inferers/inferer.py b/monai/inferers/inferer.py
index ecb2c2c178..25d9fd1fb0 100644
--- a/monai/inferers/inferer.py
+++ b/monai/inferers/inferer.py
@@ -42,13 +42,7 @@ class Inferer(ABC):
     """
 
     @abstractmethod
-    def __call__(
-        self,
-        inputs: torch.Tensor,
-        network: Callable[..., torch.Tensor],
-        *args: Any,
-        **kwargs: Any,
-    ):
+    def __call__(self, inputs: torch.Tensor, network: Callable[..., torch.Tensor], *args: Any, **kwargs: Any):
         """
         Run inference on `inputs` with the `network` model.
 
@@ -75,13 +69,7 @@ class SimpleInferer(Inferer):
     def __init__(self) -> None:
         Inferer.__init__(self)
 
-    def __call__(
-        self,
-        inputs: torch.Tensor,
-        network: Callable[..., torch.Tensor],
-        *args: Any,
-        **kwargs: Any,
-    ):
+    def __call__(self, inputs: torch.Tensor, network: Callable[..., torch.Tensor], *args: Any, **kwargs: Any):
         """Unified callable function API of Inferers.
 
         Args:
@@ -161,11 +149,7 @@ def __init__(
         self.device = device
 
     def __call__(
-        self,
-        inputs: torch.Tensor,
-        network: Callable[..., torch.Tensor],
-        *args: Any,
-        **kwargs: Any,
+        self, inputs: torch.Tensor, network: Callable[..., torch.Tensor], *args: Any, **kwargs: Any
     ) -> torch.Tensor:
         """
 
@@ -217,13 +201,7 @@ def __init__(self, cam_name: str, target_layers: str, class_idx: Optional[int] =
         self.args = args
         self.kwargs = kwargs
 
-    def __call__(  # type: ignore
-        self,
-        inputs: torch.Tensor,
-        network: nn.Module,
-        *args: Any,
-        **kwargs: Any,
-    ):
+    def __call__(self, inputs: torch.Tensor, network: nn.Module, *args: Any, **kwargs: Any):  # type: ignore
         """Unified callable function API of Inferers.
 
         Args:
diff --git a/monai/losses/__init__.py b/monai/losses/__init__.py
index 1221cd3041..3e307fed22 100644
--- a/monai/losses/__init__.py
+++ b/monai/losses/__init__.py
@@ -18,7 +18,6 @@
     GeneralizedDiceLoss,
     GeneralizedWassersteinDiceLoss,
     MaskedDiceLoss,
-    dice,
     dice_ce,
     dice_focal,
     generalized_dice,
diff --git a/monai/losses/deform.py b/monai/losses/deform.py
index d96fa1440a..fea56010c7 100644
--- a/monai/losses/deform.py
+++ b/monai/losses/deform.py
@@ -52,10 +52,7 @@ class BendingEnergyLoss(_Loss):
         DeepReg (https://github.com/DeepRegNet/DeepReg)
     """
 
-    def __init__(
-        self,
-        reduction: Union[LossReduction, str] = LossReduction.MEAN,
-    ) -> None:
+    def __init__(self, reduction: Union[LossReduction, str] = LossReduction.MEAN) -> None:
         """
         Args:
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -65,7 +62,7 @@ def __init__(
                 - ``"mean"``: the sum of the output will be divided by the number of elements in the output.
                 - ``"sum"``: the output will be summed.
         """
-        super(BendingEnergyLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
     def forward(self, pred: torch.Tensor) -> torch.Tensor:
         """
diff --git a/monai/losses/dice.py b/monai/losses/dice.py
index 325c5300ea..09f86a4ebd 100644
--- a/monai/losses/dice.py
+++ b/monai/losses/dice.py
@@ -419,7 +419,7 @@ def __init__(
                 wass_loss(pred_score, grnd)  # 0
 
         """
-        super(GeneralizedWassersteinDiceLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
         if dist_matrix.shape[0] != dist_matrix.shape[1]:
             raise ValueError(f"dist_matrix must be C x C, got {dist_matrix.shape[0]} x {dist_matrix.shape[1]}.")
@@ -536,10 +536,7 @@ def _compute_generalized_true_positive(
         flat_target_extended = torch.unsqueeze(flat_target, dim=1)
         alpha_extended = torch.gather(alpha_extended, index=flat_target_extended, dim=1)
 
-        return torch.sum(
-            alpha_extended * (1.0 - wasserstein_distance_map),
-            dim=[1, 2],
-        )
+        return torch.sum(alpha_extended * (1.0 - wasserstein_distance_map), dim=[1, 2])
 
     def _compute_denominator(
         self, alpha: torch.Tensor, flat_target: torch.Tensor, wasserstein_distance_map: torch.Tensor
@@ -556,10 +553,7 @@ def _compute_denominator(
         flat_target_extended = torch.unsqueeze(flat_target, dim=1)
         alpha_extended = torch.gather(alpha_extended, index=flat_target_extended, dim=1)
 
-        return torch.sum(
-            alpha_extended * (2.0 - wasserstein_distance_map),
-            dim=[1, 2],
-        )
+        return torch.sum(alpha_extended * (2.0 - wasserstein_distance_map), dim=[1, 2])
 
     def _compute_alpha_generalized_true_positives(self, flat_target: torch.Tensor) -> torch.Tensor:
         """
@@ -657,10 +651,7 @@ def __init__(
             smooth_dr=smooth_dr,
             batch=batch,
         )
-        self.cross_entropy = nn.CrossEntropyLoss(
-            weight=ce_weight,
-            reduction=reduction,
-        )
+        self.cross_entropy = nn.CrossEntropyLoss(weight=ce_weight, reduction=reduction)
         if lambda_dice < 0.0:
             raise ValueError("lambda_dice should be no less than 0.0.")
         if lambda_ce < 0.0:
@@ -819,7 +810,7 @@ def forward(self, input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         return total_loss
 
 
-dice = Dice = DiceLoss
+Dice = DiceLoss
 dice_ce = DiceCELoss
 dice_focal = DiceFocalLoss
 generalized_dice = GeneralizedDiceLoss
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
index b4b3698e5b..157ce9fd01 100644
--- a/monai/losses/focal_loss.py
+++ b/monai/losses/focal_loss.py
@@ -67,7 +67,7 @@ def __init__(
                 fl(pred, grnd)
 
         """
-        super(FocalLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         self.include_background = include_background
         self.to_onehot_y = to_onehot_y
         self.gamma = gamma
diff --git a/monai/losses/image_dissimilarity.py b/monai/losses/image_dissimilarity.py
index eed5808aa3..78f92303fc 100644
--- a/monai/losses/image_dissimilarity.py
+++ b/monai/losses/image_dissimilarity.py
@@ -8,14 +8,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 from torch.nn import functional as F
 from torch.nn.modules.loss import _Loss
 
 from monai.networks.layers import gaussian_1d, separable_filtering
-from monai.utils import LossReduction
+from monai.utils import LossReduction, deprecated_arg
+from monai.utils.module import look_up_option
 
 
 def make_rectangular_kernel(kernel_size: int) -> torch.Tensor:
@@ -59,18 +60,20 @@ class LocalNormalizedCrossCorrelationLoss(_Loss):
         DeepReg (https://github.com/DeepRegNet/DeepReg)
     """
 
+    @deprecated_arg(name="ndim", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        ndim: int = 3,
+        spatial_dims: int = 3,
         kernel_size: int = 3,
         kernel_type: str = "rectangular",
         reduction: Union[LossReduction, str] = LossReduction.MEAN,
         smooth_nr: float = 1e-5,
         smooth_dr: float = 1e-5,
+        ndim: Optional[int] = None,
     ) -> None:
         """
         Args:
-            ndim: number of spatial ndimensions, {``1``, ``2``, ``3``}. Defaults to 3.
+            spatial_dims: number of spatial ndimensions, {``1``, ``2``, ``3``}. Defaults to 3.
             kernel_size: kernel spatial size, must be odd.
             kernel_type: {``"rectangular"``, ``"triangular"``, ``"gaussian"``}. Defaults to ``"rectangular"``.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -81,22 +84,24 @@ def __init__(
                 - ``"sum"``: the output will be summed.
             smooth_nr: a small constant added to the numerator to avoid nan.
             smooth_dr: a small constant added to the denominator to avoid nan.
+
+        .. deprecated:: 0.6.0
+            ``ndim`` is deprecated, use ``spatial_dims``.
         """
-        super(LocalNormalizedCrossCorrelationLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
-        self.ndim = ndim
-        if self.ndim not in [1, 2, 3]:
+        if ndim is not None:
+            spatial_dims = ndim
+        self.ndim = spatial_dims
+        if self.ndim not in {1, 2, 3}:
             raise ValueError(f"Unsupported ndim: {self.ndim}-d, only 1-d, 2-d, and 3-d inputs are supported")
 
         self.kernel_size = kernel_size
         if self.kernel_size % 2 == 0:
             raise ValueError(f"kernel_size must be odd, got {self.kernel_size}")
 
-        if kernel_type not in kernel_dict.keys():
-            raise ValueError(
-                f'Unsupported kernel_type: {kernel_type}, available options are ["rectangular", "triangular", "gaussian"].'
-            )
-        self.kernel = kernel_dict[kernel_type](self.kernel_size)
+        _kernel = look_up_option(kernel_type, kernel_dict)
+        self.kernel = _kernel(self.kernel_size)
         self.kernel_vol = self.get_kernel_vol()
 
         self.smooth_nr = float(smooth_nr)
@@ -170,6 +175,7 @@ class GlobalMutualInformationLoss(_Loss):
 
     def __init__(
         self,
+        kernel_type: str = "gaussian",
         num_bins: int = 23,
         sigma_ratio: float = 0.5,
         reduction: Union[LossReduction, str] = LossReduction.MEAN,
@@ -178,6 +184,19 @@ def __init__(
     ) -> None:
         """
         Args:
+            kernel_type: {``"gaussian"``, ``"b-spline"``}
+                ``"gaussian"``: adapted from DeepReg
+                Reference: https://dspace.mit.edu/handle/1721.1/123142, Section 3.1, equation 3.1-3.5, Algorithm 1.
+                ``"b-spline"``: based on the method of Mattes et al [1,2] and adapted from ITK
+                References:
+                  [1] "Nonrigid multimodality image registration"
+                      D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank
+                      Medical Imaging 2001: Image Processing, 2001, pp. 1609-1620.
+                  [2] "PET-CT Image Registration in the Chest Using Free-form Deformations"
+                      D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank
+                      IEEE Transactions in Medical Imaging. Vol.22, No.1,
+                      January 2003. pp.120-128.
+
             num_bins: number of bins for intensity
             sigma_ratio: a hyper param for gaussian function
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -189,25 +208,99 @@ def __init__(
             smooth_nr: a small constant added to the numerator to avoid nan.
             smooth_dr: a small constant added to the denominator to avoid nan.
         """
-        super(GlobalMutualInformationLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         if num_bins <= 0:
             raise ValueError("num_bins must > 0, got {num_bins}")
         bin_centers = torch.linspace(0.0, 1.0, num_bins)  # (num_bins,)
         sigma = torch.mean(bin_centers[1:] - bin_centers[:-1]) * sigma_ratio
-        self.preterm = 1 / (2 * sigma ** 2)
-        self.bin_centers = bin_centers[None, None, ...]
+        self.kernel_type = look_up_option(kernel_type, ["gaussian", "b-spline"])
+        self.num_bins = num_bins
+        self.kernel_type = kernel_type
+        if self.kernel_type == "gaussian":
+            self.preterm = 1 / (2 * sigma ** 2)
+            self.bin_centers = bin_centers[None, None, ...]
         self.smooth_nr = float(smooth_nr)
         self.smooth_dr = float(smooth_dr)
 
-    def parzen_windowing(self, pred: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def parzen_windowing(
+        self, pred: torch.Tensor, target: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        if self.kernel_type == "gaussian":
+            pred_weight, pred_probability = self.parzen_windowing_gaussian(pred)
+            target_weight, target_probability = self.parzen_windowing_gaussian(target)
+        elif self.kernel_type == "b-spline":
+            # a third order BSpline kernel is used for the pred image intensity PDF.
+            pred_weight, pred_probability = self.parzen_windowing_b_spline(pred, order=3)
+            # a zero order (box car) BSpline kernel is used for the target image intensity PDF.
+            target_weight, target_probability = self.parzen_windowing_b_spline(target, order=0)
+        else:
+            raise ValueError
+        return pred_weight, pred_probability, target_weight, target_probability
+
+    def parzen_windowing_b_spline(self, img: torch.Tensor, order: int) -> Tuple[torch.Tensor, torch.Tensor]:
         """
+        Parzen windowing with b-spline kernel (adapted from ITK)
+
         Args:
-            pred: the shape should be B[NDHW].
+            img: the shape should be B[NDHW].
+            order: int.
+        """
+
+        # Compute binsize for the histograms.
+        #
+        # The binsize for the image intensities needs to be adjusted so that
+        # we can avoid dealing with boundary conditions using the cubic
+        # spline as the Parzen window.  We do this by increasing the size
+        # of the bins so that the joint histogram becomes "padded" at the
+        # borders. Because we are changing the binsize,
+        # we also need to shift the minimum by the padded amount in order to
+        # avoid minimum values filling in our padded region.
+        #
+        # Note that there can still be non-zero bin values in the padded region,
+        # it's just that these bins will never be a central bin for the Parzen
+        # window.
+        _max, _min = torch.max(img), torch.min(img)
+        padding = 2
+        bin_size = (_max - _min) / (self.num_bins - 2 * padding)
+        norm_min = torch.div(_min, bin_size, rounding_mode="floor") - padding
+
+        # assign bin/window index to each voxel
+        window_term = torch.div(img, bin_size) - norm_min  # B[NDHW]
+        # make sure the extreme values are in valid (non-padded) bins
+        window_term = torch.clamp(window_term, padding, self.num_bins - padding - 1)  # B[NDHW]
+        window_term = window_term.reshape(window_term.shape[0], -1, 1)  # (batch, num_sample, 1)
+        bins = torch.arange(self.num_bins, device=window_term.device).reshape(1, 1, -1)  # (1, 1, num_bins)
+        sample_bin_matrix = torch.abs(bins - window_term)  # (batch, num_sample, num_bins)
+
+        # b-spleen kernel
+        # (4 - 6 * abs ** 2 + 3 * abs ** 3) / 6 when 0 <= abs < 1
+        # (2 - abs) ** 3 / 6 when 1 <= abs < 2
+        weight = torch.zeros_like(sample_bin_matrix, dtype=torch.float)  # (batch, num_sample, num_bins)
+        if order == 0:
+            weight = weight + (sample_bin_matrix < 0.5) + (sample_bin_matrix == 0.5) * 0.5
+        elif order == 3:
+            weight = (
+                weight + (4 - 6 * sample_bin_matrix ** 2 + 3 * sample_bin_matrix ** 3) * (sample_bin_matrix < 1) / 6
+            )
+            weight = weight + (2 - sample_bin_matrix) ** 3 * (sample_bin_matrix >= 1) * (sample_bin_matrix < 2) / 6
+        else:
+            raise ValueError(f"Do not support b-spline {order}-order parzen windowing")
+
+        weight = weight / torch.sum(weight, dim=-1, keepdim=True)  # (batch, num_sample, num_bins)
+        probability = torch.mean(weight, dim=-2, keepdim=True)  # (batch, 1, num_bins)
+        return weight, probability
+
+    def parzen_windowing_gaussian(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Parzen windowing with gaussian kernel (adapted from DeepReg implementation)
+        Note: the input is expected to range between 0 and 1
+        Args:
+            img: the shape should be B[NDHW].
         """
-        pred = torch.clamp(pred, 0, 1)
-        pred = pred.reshape(pred.shape[0], -1, 1)  # (batch, num_sample, 1)
+        img = torch.clamp(img, 0, 1)
+        img = img.reshape(img.shape[0], -1, 1)  # (batch, num_sample, 1)
         weight = torch.exp(
-            -self.preterm.to(pred) * (pred - self.bin_centers.to(pred)) ** 2
+            -self.preterm.to(img) * (img - self.bin_centers.to(img)) ** 2
         )  # (batch, num_sample, num_bin)
         weight = weight / torch.sum(weight, dim=-1, keepdim=True)  # (batch, num_sample, num_bin)
         probability = torch.mean(weight, dim=-2, keepdim=True)  # (batch, 1, num_bin)
@@ -223,11 +316,10 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         """
         if target.shape != pred.shape:
             raise ValueError(f"ground truth has differing shape ({target.shape}) from pred ({pred.shape})")
-        wa, pa = self.parzen_windowing(pred)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
-        wb, pb = self.parzen_windowing(target)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
-        pab = torch.bmm(wa.permute(0, 2, 1), wb).div(wa.shape[1])  # (batch, num_bins, num_bins)
+        wa, pa, wb, pb = self.parzen_windowing(pred, target)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
 
-        papb = torch.bmm(pa.permute(0, 2, 1), pb)  # (batch, num_bins, num_bins)
+        pab = torch.bmm(wa.permute(0, 2, 1), wb.to(wa)).div(wa.shape[1])  # (batch, num_bins, num_bins)
+        papb = torch.bmm(pa.permute(0, 2, 1), pb.to(pa))  # (batch, num_bins, num_bins)
         mi = torch.sum(
             pab * torch.log((pab + self.smooth_nr) / (papb + self.smooth_dr) + self.smooth_dr), dim=(1, 2)
         )  # (batch)
diff --git a/monai/losses/multi_scale.py b/monai/losses/multi_scale.py
index 6f9326420b..182cb2f7a6 100644
--- a/monai/losses/multi_scale.py
+++ b/monai/losses/multi_scale.py
@@ -21,12 +21,7 @@
 def make_gaussian_kernel(sigma: int) -> torch.Tensor:
     if sigma <= 0:
         raise ValueError(f"expecting positive sigma, got sigma={sigma}")
-    return gaussian_1d(
-        sigma=torch.tensor(sigma),
-        truncated=3,
-        approx="sampled",
-        normalize=False,
-    )
+    return gaussian_1d(sigma=torch.tensor(sigma), truncated=3, approx="sampled", normalize=False)
 
 
 def make_cauchy_kernel(sigma: int) -> torch.Tensor:
@@ -39,10 +34,7 @@ def make_cauchy_kernel(sigma: int) -> torch.Tensor:
     return k
 
 
-kernel_fn_dict = {
-    "gaussian": make_gaussian_kernel,
-    "cauchy": make_cauchy_kernel,
-}
+kernel_fn_dict = {"gaussian": make_gaussian_kernel, "cauchy": make_cauchy_kernel}
 
 
 class MultiScaleLoss(_Loss):
@@ -67,7 +59,7 @@ def __init__(
             scales: list of scalars or None, if None, do not apply any scaling.
             kernel: gaussian or cauchy.
         """
-        super(MultiScaleLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         if kernel not in kernel_fn_dict.keys():
             raise ValueError(f"got unsupported kernel type: {kernel}", "only support gaussian and cauchy")
         self.kernel_fn = kernel_fn_dict[kernel]
diff --git a/monai/metrics/confusion_matrix.py b/monai/metrics/confusion_matrix.py
index 9568cf6028..0a2084c566 100644
--- a/monai/metrics/confusion_matrix.py
+++ b/monai/metrics/confusion_matrix.py
@@ -99,11 +99,7 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor):  # type: ignor
                 warnings.warn("As for classification task, compute_sample should be False.")
                 self.compute_sample = False
 
-        return get_confusion_matrix(
-            y_pred=y_pred,
-            y=y,
-            include_background=self.include_background,
-        )
+        return get_confusion_matrix(y_pred=y_pred, y=y, include_background=self.include_background)
 
     def aggregate(self):  # type: ignore
         """
@@ -129,11 +125,7 @@ def aggregate(self):  # type: ignore
         return results
 
 
-def get_confusion_matrix(
-    y_pred: torch.Tensor,
-    y: torch.Tensor,
-    include_background: bool = True,
-):
+def get_confusion_matrix(y_pred: torch.Tensor, y: torch.Tensor, include_background: bool = True):
     """
     Compute confusion matrix. A tensor with the shape [BC4] will be returned. Where, the third dimension
     represents the number of true positive, false positive, true negative and false negative values for
@@ -153,10 +145,7 @@ def get_confusion_matrix(
     """
 
     if not include_background:
-        y_pred, y = ignore_background(
-            y_pred=y_pred,
-            y=y,
-        )
+        y_pred, y = ignore_background(y_pred=y_pred, y=y)
 
     y = y.float()
     y_pred = y_pred.float()
diff --git a/monai/metrics/froc.py b/monai/metrics/froc.py
index faebbbf7a6..011021f33b 100644
--- a/monai/metrics/froc.py
+++ b/monai/metrics/froc.py
@@ -96,7 +96,7 @@ def compute_froc_curve_data(
         num_images: the number of images under evaluation.
 
     """
-    if type(fp_probs) is not type(tp_probs):
+    if not isinstance(fp_probs, type(tp_probs)):
         raise AssertionError("fp and tp probs should have same type.")
     if isinstance(fp_probs, torch.Tensor):
         fp_probs = fp_probs.detach().cpu().numpy()
@@ -116,9 +116,7 @@ def compute_froc_curve_data(
 
 
 def compute_froc_score(
-    fps_per_image: np.ndarray,
-    total_sensitivity: np.ndarray,
-    eval_thresholds: Tuple = (0.25, 0.5, 1, 2, 4, 8),
+    fps_per_image: np.ndarray, total_sensitivity: np.ndarray, eval_thresholds: Tuple = (0.25, 0.5, 1, 2, 4, 8)
 ):
     """
     This function is modified from the official evaluation code of
diff --git a/monai/metrics/hausdorff_distance.py b/monai/metrics/hausdorff_distance.py
index 12f3b49d32..ed3639e39e 100644
--- a/monai/metrics/hausdorff_distance.py
+++ b/monai/metrics/hausdorff_distance.py
@@ -141,10 +141,7 @@ def compute_hausdorff_distance(
     """
 
     if not include_background:
-        y_pred, y = ignore_background(
-            y_pred=y_pred,
-            y=y,
-        )
+        y_pred, y = ignore_background(y_pred=y_pred, y=y)
     if isinstance(y, torch.Tensor):
         y = y.float()
     if isinstance(y_pred, torch.Tensor):
@@ -172,10 +169,7 @@ def compute_hausdorff_distance(
 
 
 def compute_percent_hausdorff_distance(
-    edges_pred: np.ndarray,
-    edges_gt: np.ndarray,
-    distance_metric: str = "euclidean",
-    percentile: Optional[float] = None,
+    edges_pred: np.ndarray, edges_gt: np.ndarray, distance_metric: str = "euclidean", percentile: Optional[float] = None
 ):
     """
     This function is used to compute the directed Hausdorff distance.
diff --git a/monai/metrics/meandice.py b/monai/metrics/meandice.py
index 226c106f7e..528407cf3c 100644
--- a/monai/metrics/meandice.py
+++ b/monai/metrics/meandice.py
@@ -77,11 +77,7 @@ def _compute_tensor(self, y_pred: torch.Tensor, y: torch.Tensor):  # type: ignor
         if dims < 3:
             raise ValueError("y_pred should have at least three dimensions.")
         # compute dice (BxC) for each channel for each batch
-        return compute_meandice(
-            y_pred=y_pred,
-            y=y,
-            include_background=self.include_background,
-        )
+        return compute_meandice(y_pred=y_pred, y=y, include_background=self.include_background)
 
     def aggregate(self):  # type: ignore
         """
@@ -97,11 +93,7 @@ def aggregate(self):  # type: ignore
         return (f, not_nans) if self.get_not_nans else f
 
 
-def compute_meandice(
-    y_pred: torch.Tensor,
-    y: torch.Tensor,
-    include_background: bool = True,
-) -> torch.Tensor:
+def compute_meandice(y_pred: torch.Tensor, y: torch.Tensor, include_background: bool = True) -> torch.Tensor:
     """Computes Dice score metric from full size Tensor and collects average.
 
     Args:
@@ -122,10 +114,7 @@ def compute_meandice(
     """
 
     if not include_background:
-        y_pred, y = ignore_background(
-            y_pred=y_pred,
-            y=y,
-        )
+        y_pred, y = ignore_background(y_pred=y_pred, y=y)
 
     y = y.float()
     y_pred = y_pred.float()
@@ -142,8 +131,4 @@ def compute_meandice(
     y_pred_o = torch.sum(y_pred, dim=reduce_axis)
     denominator = y_o + y_pred_o
 
-    return torch.where(
-        y_o > 0,
-        (2.0 * intersection) / denominator,
-        torch.tensor(float("nan"), device=y_o.device),
-    )
+    return torch.where(y_o > 0, (2.0 * intersection) / denominator, torch.tensor(float("nan"), device=y_o.device))
diff --git a/monai/metrics/metric.py b/monai/metrics/metric.py
index bb4aa7c343..f54bb984f0 100644
--- a/monai/metrics/metric.py
+++ b/monai/metrics/metric.py
@@ -120,7 +120,7 @@ class Cumulative(ABC):
         cum.add(a, b)
         cum.add(c, d)
         cum.aggregate()
-        result = cum.get_buffer()
+        result = cum.get_buffer()  # optional
         cum.reset()
 
     """
@@ -197,6 +197,27 @@ class CumulativeIterationMetric(Cumulative, IterationMetric):
     Typically, it computes some intermediate results for every iteration, cumulates in buffers,
     then syncs across all the distributed ranks and aggregates for the final result when epoch completed.
 
+    For example, `MeanDice` inherits this class and the usage:
+
+    .. code-block:: python
+
+        dice_metric = DiceMetric(include_background=True, reduction="mean")
+
+        for val_data in val_loader:
+            val_outputs = model(val_data["img"])
+            val_outputs = [postprocessing_transform(i) for i in decollate_batch(val_outputs)]
+            # compute metric for current iteration
+            dice_metric(y_pred=val_outputs, y=val_data["seg"])
+
+        # aggregate the final mean dice result
+        metric = dice_metric.aggregate().item()
+
+        # reset the status for next computation round
+        dice_metric.reset()
+
+    And to load `predictions` and `labels` from files, then compute metrics with multi-processing, please refer to:
+    https://github.com/Project-MONAI/tutorials/blob/master/modules/compute_metric.py.
+
     """
 
     def __call__(self, y_pred: TensorOrList, y: Optional[TensorOrList] = None):  # type: ignore
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index a2a2f0853d..4b1acb4920 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -39,9 +39,7 @@ class RegressionMetric(CumulativeIterationMetric):
     """
 
     def __init__(
-        self,
-        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-        get_not_nans: bool = False,
+        self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN, get_not_nans: bool = False
     ) -> None:
         super().__init__()
         self.reduction = reduction
@@ -57,9 +55,7 @@ def aggregate(self):  # type: ignore
 
     def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
         if y_pred.shape != y.shape:
-            raise ValueError(
-                "y_pred and y shapes dont match, received y_pred: [{}] and y: [{}]".format(y_pred.shape, y.shape)
-            )
+            raise ValueError(f"y_pred and y shapes dont match, received y_pred: [{y_pred.shape}] and y: [{y.shape}]")
 
         # also check if there is atleast one non-batch dimension i.e. num_dims >= 2
         if len(y_pred.shape) < 2:
@@ -96,9 +92,7 @@ class MSEMetric(RegressionMetric):
     """
 
     def __init__(
-        self,
-        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-        get_not_nans: bool = False,
+        self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN, get_not_nans: bool = False
     ) -> None:
         super().__init__(reduction=reduction, get_not_nans=get_not_nans)
         self.sq_func = partial(torch.pow, exponent=2.0)
@@ -130,9 +124,7 @@ class MAEMetric(RegressionMetric):
     """
 
     def __init__(
-        self,
-        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-        get_not_nans: bool = False,
+        self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN, get_not_nans: bool = False
     ) -> None:
         super().__init__(reduction=reduction, get_not_nans=get_not_nans)
         self.abs_func = torch.abs
@@ -165,9 +157,7 @@ class RMSEMetric(RegressionMetric):
     """
 
     def __init__(
-        self,
-        reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-        get_not_nans: bool = False,
+        self, reduction: Union[MetricReduction, str] = MetricReduction.MEAN, get_not_nans: bool = False
     ) -> None:
         super().__init__(reduction=reduction, get_not_nans=get_not_nans)
         self.sq_func = partial(torch.pow, exponent=2.0)
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index c2679cc2ea..4c71fe6374 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -93,20 +93,18 @@ def _calculate(y_pred: torch.Tensor, y: torch.Tensor) -> float:
     return auc / (nneg * (n - nneg))
 
 
-def compute_roc_auc(
-    y_pred: torch.Tensor,
-    y: torch.Tensor,
-    average: Union[Average, str] = Average.MACRO,
-):
+def compute_roc_auc(y_pred: torch.Tensor, y: torch.Tensor, average: Union[Average, str] = Average.MACRO):
     """Computes Area Under the Receiver Operating Characteristic Curve (ROC AUC). Referring to:
     `sklearn.metrics.roc_auc_score <https://scikit-learn.org/stable/modules/generated/
     sklearn.metrics.roc_auc_score.html#sklearn.metrics.roc_auc_score>`_.
 
     Args:
         y_pred: input data to compute, typical classification model output.
-            it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2].
-        y: ground truth to compute ROC AUC metric, the first dim is batch.
-            example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`).
+            the first dim must be batch, if multi-classes, it must be in One-Hot format.
+            for example: shape `[16]` or `[16, 1]` for a binary data, shape `[16, 2]` for 2 classes data.
+        y: ground truth to compute ROC AUC metric, the first dim must be batch.
+            if multi-classes, it must be in One-Hot format.
+            for example: shape `[16]` or `[16, 1]` for a binary data, shape `[16, 2]` for 2 classes data.
         average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``}
             Type of averaging performed if not binary classification.
             Defaults to ``"macro"``.
diff --git a/monai/metrics/surface_distance.py b/monai/metrics/surface_distance.py
index 6039f1b55e..2c9359ea0c 100644
--- a/monai/metrics/surface_distance.py
+++ b/monai/metrics/surface_distance.py
@@ -134,10 +134,7 @@ def compute_average_surface_distance(
     """
 
     if not include_background:
-        y_pred, y = ignore_background(
-            y_pred=y_pred,
-            y=y,
-        )
+        y_pred, y = ignore_background(y_pred=y_pred, y=y)
 
     if isinstance(y, torch.Tensor):
         y = y.float()
diff --git a/monai/metrics/utils.py b/monai/metrics/utils.py
index 84de834f74..540eda3096 100644
--- a/monai/metrics/utils.py
+++ b/monai/metrics/utils.py
@@ -25,10 +25,7 @@
 __all__ = ["ignore_background", "do_metric_reduction", "get_mask_edges", "get_surface_distance"]
 
 
-def ignore_background(
-    y_pred: Union[np.ndarray, torch.Tensor],
-    y: Union[np.ndarray, torch.Tensor],
-):
+def ignore_background(y_pred: Union[np.ndarray, torch.Tensor], y: Union[np.ndarray, torch.Tensor]):
     """
     This function is used to remove background (the first channel) for `y_pred` and `y`.
     Args:
@@ -43,10 +40,7 @@ def ignore_background(
     return y_pred, y
 
 
-def do_metric_reduction(
-    f: torch.Tensor,
-    reduction: Union[MetricReduction, str] = MetricReduction.MEAN,
-):
+def do_metric_reduction(f: torch.Tensor, reduction: Union[MetricReduction, str] = MetricReduction.MEAN):
     """
     This function is to do the metric reduction for calculated metrics of each example's each class.
     The function also returns `not_nans`, which counts the number of not nans for the metric.
@@ -170,11 +164,7 @@ def get_mask_edges(
     return (edges_pred, edges_gt)
 
 
-def get_surface_distance(
-    seg_pred: np.ndarray,
-    seg_gt: np.ndarray,
-    distance_metric: str = "euclidean",
-) -> np.ndarray:
+def get_surface_distance(seg_pred: np.ndarray, seg_gt: np.ndarray, distance_metric: str = "euclidean") -> np.ndarray:
     """
     This function is used to compute the surface distances from `seg_pred` to `seg_gt`.
 
diff --git a/monai/networks/blocks/activation.py b/monai/networks/blocks/activation.py
index a380f8e757..b136eb7f1f 100644
--- a/monai/networks/blocks/activation.py
+++ b/monai/networks/blocks/activation.py
@@ -48,8 +48,7 @@ class Swish(nn.Module):
 
 
     Shape:
-        - Input: :math:`(N, *)` where `*` means, any number of additional
-          dimensions
+        - Input: :math:`(N, *)` where `*` means, any number of additional dimensions
         - Output: :math:`(N, *)`, same shape as the input
 
 
@@ -123,7 +122,7 @@ class MemoryEfficientSwish(nn.Module):
     """
 
     def __init__(self, inplace: bool = False):
-        super(MemoryEfficientSwish, self).__init__()
+        super().__init__()
         # inplace only works when using torch.nn.functional.silu
         self.inplace = inplace
 
@@ -143,8 +142,7 @@ class Mish(nn.Module):
     this class will utilize `torch.nn.functional.mish` to do the calculation if meets the version.
 
     Shape:
-        - Input: :math:`(N, *)` where `*` means, any number of additional
-          dimensions
+        - Input: :math:`(N, *)` where `*` means, any number of additional dimensions
         - Output: :math:`(N, *)`, same shape as the input
 
 
@@ -158,7 +156,7 @@ class Mish(nn.Module):
     """
 
     def __init__(self, inplace: bool = False):
-        super(Mish, self).__init__()
+        super().__init__()
         # inplace only works when using torch.nn.functional.mish
         self.inplace = inplace
 
diff --git a/monai/networks/blocks/aspp.py b/monai/networks/blocks/aspp.py
index f8bf8a5ba6..9796ea8148 100644
--- a/monai/networks/blocks/aspp.py
+++ b/monai/networks/blocks/aspp.py
@@ -86,7 +86,7 @@ def __init__(
 
         out_channels = conv_out_channels * len(pads)  # final conv. output channels
         self.conv_k1 = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=out_channels,
             out_channels=out_channels,
             kernel_size=1,
diff --git a/monai/networks/blocks/convolutions.py b/monai/networks/blocks/convolutions.py
index 39ce60e3f8..e12eb6fc8f 100644
--- a/monai/networks/blocks/convolutions.py
+++ b/monai/networks/blocks/convolutions.py
@@ -18,6 +18,7 @@
 from monai.networks.blocks import ADN
 from monai.networks.layers.convutils import same_padding, stride_minus_kernel_padding
 from monai.networks.layers.factories import Conv
+from monai.utils.deprecate_utils import deprecated_arg
 
 
 class Convolution(nn.Sequential):
@@ -59,7 +60,7 @@ class Convolution(nn.Sequential):
         )
 
     Args:
-        dimensions: number of spatial dimensions.
+        spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
         out_channels: number of output channels.
         strides: convolution stride. Defaults to 1.
@@ -69,13 +70,13 @@ class Convolution(nn.Sequential):
         act: activation type and arguments. Defaults to PReLU.
         norm: feature normalization type and arguments. Defaults to instance norm.
         dropout: dropout ratio. Defaults to no dropout.
-        dropout_dim: determine the dimensions of dropout. Defaults to 1.
+        dropout_dim: determine the spatial dimensions of dropout. Defaults to 1.
 
             - When dropout_dim = 1, randomly zeroes some of the elements for each channel.
             - When dropout_dim = 2, Randomly zeroes out entire channels (a channel is a 2D feature map).
             - When dropout_dim = 3, Randomly zeroes out entire channels (a channel is a 3D feature map).
 
-            The value of dropout_dim should be no no larger than the value of `dimensions`.
+            The value of dropout_dim should be no no larger than the value of `spatial_dims`.
         dilation: dilation rate. Defaults to 1.
         groups: controls the connections between inputs and outputs. Defaults to 1.
         bias: whether to have a bias term. Defaults to True.
@@ -86,6 +87,9 @@ class Convolution(nn.Sequential):
         output_padding: controls the additional size added to one side of the output shape.
             Defaults to None.
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     See also:
 
         :py:class:`monai.networks.layers.Conv`
@@ -93,9 +97,12 @@ class Convolution(nn.Sequential):
 
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         strides: Union[Sequence[int], int] = 1,
@@ -112,15 +119,16 @@ def __init__(
         is_transposed: bool = False,
         padding: Optional[Union[Sequence[int], int]] = None,
         output_padding: Optional[Union[Sequence[int], int]] = None,
+        dimensions: Optional[int] = None,
     ) -> None:
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.is_transposed = is_transposed
         if padding is None:
             padding = same_padding(kernel_size, dilation)
-        conv_type = Conv[Conv.CONVTRANS if is_transposed else Conv.CONV, dimensions]
+        conv_type = Conv[Conv.CONVTRANS if is_transposed else Conv.CONV, self.dimensions]
 
         conv: nn.Module
         if is_transposed:
@@ -159,7 +167,7 @@ def __init__(
                     in_channels=out_channels,
                     act=act,
                     norm=norm,
-                    norm_dim=dimensions,
+                    norm_dim=self.dimensions,
                     dropout=dropout,
                     dropout_dim=dropout_dim,
                 ),
@@ -177,7 +185,7 @@ class ResidualUnit(nn.Module):
         from monai.networks.blocks import ResidualUnit
 
         convs = ResidualUnit(
-            dimensions=3,
+            spatial_dims=3,
             in_channels=1,
             out_channels=1,
             adn_ordering="AN",
@@ -209,7 +217,7 @@ class ResidualUnit(nn.Module):
         )
 
     Args:
-        dimensions: number of spatial dimensions.
+        spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
         out_channels: number of output channels.
         strides: convolution stride. Defaults to 1.
@@ -234,15 +242,19 @@ class ResidualUnit(nn.Module):
         padding: controls the amount of implicit zero-paddings on both sides for padding number of points
             for each dimension. Defaults to None.
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     See also:
 
         :py:class:`monai.networks.blocks.Convolution`
 
     """
 
+    @deprecated_arg(name="dimensions", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         strides: Union[Sequence[int], int] = 1,
@@ -257,9 +269,10 @@ def __init__(
         bias: bool = True,
         last_conv_only: bool = False,
         padding: Optional[Union[Sequence[int], int]] = None,
+        dimensions: Optional[int] = None,
     ) -> None:
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.conv = nn.Sequential()
@@ -273,7 +286,7 @@ def __init__(
         for su in range(subunits):
             conv_only = last_conv_only and su == (subunits - 1)
             unit = Convolution(
-                dimensions,
+                self.dimensions,
                 schannels,
                 out_channels,
                 strides=sstrides,
@@ -304,7 +317,7 @@ def __init__(
                 rkernel_size = 1
                 rpadding = 0
 
-            conv_type = Conv[Conv.CONV, dimensions]
+            conv_type = Conv[Conv.CONV, self.dimensions]
             self.residual = conv_type(in_channels, out_channels, rkernel_size, strides, rpadding, bias=bias)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/monai/networks/blocks/crf.py b/monai/networks/blocks/crf.py
index 49ff5bcd04..21da3bb74f 100644
--- a/monai/networks/blocks/crf.py
+++ b/monai/networks/blocks/crf.py
@@ -57,7 +57,7 @@ def __init__(
             compatibility_matrix: a matrix describing class compatibility,
                 should be NxN where N is the number of classes.
         """
-        super(CRF, self).__init__()
+        super().__init__()
         self.iterations = iterations
         self.bilateral_weight = bilateral_weight
         self.gaussian_weight = gaussian_weight
diff --git a/monai/networks/blocks/dynunet_block.py b/monai/networks/blocks/dynunet_block.py
index bb654d841c..248bc5341c 100644
--- a/monai/networks/blocks/dynunet_block.py
+++ b/monai/networks/blocks/dynunet_block.py
@@ -33,6 +33,7 @@ class UnetResBlock(nn.Module):
         kernel_size: convolution kernel size.
         stride: convolution stride.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -44,31 +45,23 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetResBlock, self).__init__()
+        super().__init__()
         self.conv1 = get_conv_layer(
             spatial_dims,
             in_channels,
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
-            spatial_dims,
-            out_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=1,
-            conv_only=True,
+            spatial_dims, out_channels, out_channels, kernel_size=kernel_size, stride=1, dropout=dropout, conv_only=True
         )
         self.conv3 = get_conv_layer(
-            spatial_dims,
-            in_channels,
-            out_channels,
-            kernel_size=1,
-            stride=stride,
-            conv_only=True,
+            spatial_dims, in_channels, out_channels, kernel_size=1, stride=stride, dropout=dropout, conv_only=True
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
         self.norm1 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)
@@ -107,6 +100,7 @@ class UnetBasicBlock(nn.Module):
         kernel_size: convolution kernel size.
         stride: convolution stride.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -118,23 +112,20 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetBasicBlock, self).__init__()
+        super().__init__()
         self.conv1 = get_conv_layer(
             spatial_dims,
             in_channels,
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
-            spatial_dims,
-            out_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=1,
-            conv_only=True,
+            spatial_dims, out_channels, out_channels, kernel_size=kernel_size, stride=1, dropout=dropout, conv_only=True
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
         self.norm1 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)
@@ -164,6 +155,7 @@ class UnetUpBlock(nn.Module):
         stride: convolution stride.
         upsample_kernel_size: convolution kernel size for transposed convolution layers.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -176,8 +168,9 @@ def __init__(
         stride: Union[Sequence[int], int],
         upsample_kernel_size: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetUpBlock, self).__init__()
+        super().__init__()
         upsample_stride = upsample_kernel_size
         self.transp_conv = get_conv_layer(
             spatial_dims,
@@ -185,6 +178,7 @@ def __init__(
             out_channels,
             kernel_size=upsample_kernel_size,
             stride=upsample_stride,
+            dropout=dropout,
             conv_only=True,
             is_transposed=True,
         )
@@ -194,6 +188,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             norm_name=norm_name,
         )
 
@@ -206,10 +201,12 @@ def forward(self, inp, skip):
 
 
 class UnetOutBlock(nn.Module):
-    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int):
-        super(UnetOutBlock, self).__init__()
+    def __init__(
+        self, spatial_dims: int, in_channels: int, out_channels: int, dropout: Optional[Union[Tuple, str, float]] = None
+    ):
+        super().__init__()
         self.conv = get_conv_layer(
-            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, bias=True, conv_only=True
+            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, dropout=dropout, bias=True, conv_only=True
         )
 
     def forward(self, inp):
@@ -224,6 +221,7 @@ def get_conv_layer(
     stride: Union[Sequence[int], int] = 1,
     act: Optional[Union[Tuple, str]] = Act.PRELU,
     norm: Union[Tuple, str] = Norm.INSTANCE,
+    dropout: Optional[Union[Tuple, str, float]] = None,
     bias: bool = False,
     conv_only: bool = True,
     is_transposed: bool = False,
@@ -240,6 +238,7 @@ def get_conv_layer(
         kernel_size=kernel_size,
         act=act,
         norm=norm,
+        dropout=dropout,
         bias=bias,
         conv_only=conv_only,
         is_transposed=is_transposed,
@@ -249,8 +248,7 @@ def get_conv_layer(
 
 
 def get_padding(
-    kernel_size: Union[Sequence[int], int],
-    stride: Union[Sequence[int], int],
+    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int]
 ) -> Union[Tuple[int, ...], int]:
 
     kernel_size_np = np.atleast_1d(kernel_size)
@@ -264,9 +262,7 @@ def get_padding(
 
 
 def get_output_padding(
-    kernel_size: Union[Sequence[int], int],
-    stride: Union[Sequence[int], int],
-    padding: Union[Sequence[int], int],
+    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int], padding: Union[Sequence[int], int]
 ) -> Union[Tuple[int, ...], int]:
     kernel_size_np = np.atleast_1d(kernel_size)
     stride_np = np.atleast_1d(stride)
diff --git a/monai/networks/blocks/dynunet_block_v1.py b/monai/networks/blocks/dynunet_block_v1.py
index d5d9bbf3dc..faf7586be9 100644
--- a/monai/networks/blocks/dynunet_block_v1.py
+++ b/monai/networks/blocks/dynunet_block_v1.py
@@ -32,6 +32,7 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         self.conv1 = get_conv_layer(
@@ -40,23 +41,14 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
-            spatial_dims,
-            out_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=1,
-            conv_only=True,
+            spatial_dims, out_channels, out_channels, kernel_size=kernel_size, stride=1, dropout=dropout, conv_only=True
         )
         self.conv3 = get_conv_layer(
-            spatial_dims,
-            in_channels,
-            out_channels,
-            kernel_size=1,
-            stride=stride,
-            conv_only=True,
+            spatial_dims, in_channels, out_channels, kernel_size=1, stride=stride, dropout=dropout, conv_only=True
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
         self.norm1 = _get_norm_layer(spatial_dims, out_channels, norm_name)
@@ -81,6 +73,7 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         self.conv1 = get_conv_layer(
@@ -89,15 +82,11 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
-            spatial_dims,
-            out_channels,
-            out_channels,
-            kernel_size=kernel_size,
-            stride=1,
-            conv_only=True,
+            spatial_dims, out_channels, out_channels, kernel_size=kernel_size, stride=1, dropout=dropout, conv_only=True
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
         self.norm1 = _get_norm_layer(spatial_dims, out_channels, norm_name)
@@ -118,6 +107,7 @@ def __init__(
         stride: Union[Sequence[int], int],
         upsample_kernel_size: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         upsample_stride = upsample_kernel_size
@@ -127,6 +117,7 @@ def __init__(
             out_channels,
             kernel_size=upsample_kernel_size,
             stride=upsample_stride,
+            dropout=dropout,
             conv_only=True,
             is_transposed=True,
         )
@@ -137,6 +128,7 @@ def __init__(
             kernel_size=kernel_size,
             stride=1,
             norm_name=norm_name,
+            dropout=dropout,
         )
 
 
diff --git a/monai/networks/blocks/fcn.py b/monai/networks/blocks/fcn.py
index d84e506774..09d5d4779e 100644
--- a/monai/networks/blocks/fcn.py
+++ b/monai/networks/blocks/fcn.py
@@ -36,7 +36,7 @@ def __init__(self, inplanes: int, planes: int, ks: int = 7):
             planes: number of output channels.
             ks: kernel size for one dimension. Defaults to 7.
         """
-        super(GCN, self).__init__()
+        super().__init__()
 
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
         self.conv_l1 = conv2d_type(in_channels=inplanes, out_channels=planes, kernel_size=(ks, 1), padding=(ks // 2, 0))
@@ -67,7 +67,7 @@ def __init__(self, planes: int):
         Args:
             planes: number of input channels.
         """
-        super(Refine, self).__init__()
+        super().__init__()
 
         relu_type: Type[nn.ReLU] = Act[Act.RELU]
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
@@ -116,7 +116,7 @@ class FCN(nn.Module):
     def __init__(
         self, out_channels: int = 1, upsample_mode: str = "bilinear", pretrained: bool = True, progress: bool = True
     ):
-        super(FCN, self).__init__()
+        super().__init__()
 
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
 
@@ -154,12 +154,7 @@ def __init__(
         self.transformer = self.conv2d_type(in_channels=256, out_channels=64, kernel_size=1)
 
         if self.upsample_mode == "transpose":
-            self.up_conv = UpSample(
-                dimensions=2,
-                in_channels=self.out_channels,
-                scale_factor=2,
-                mode="deconv",
-            )
+            self.up_conv = UpSample(spatial_dims=2, in_channels=self.out_channels, scale_factor=2, mode="deconv")
 
     def forward(self, x: torch.Tensor):
         """
@@ -195,14 +190,7 @@ def forward(self, x: torch.Tensor):
         fs2 = self.refine7(F.interpolate(fs1, fm2.size()[2:], mode=self.upsample_mode, align_corners=True) + gcfm3)
         fs3 = self.refine8(F.interpolate(fs2, pool_x.size()[2:], mode=self.upsample_mode, align_corners=True) + gcfm4)
         fs4 = self.refine9(F.interpolate(fs3, conv_x.size()[2:], mode=self.upsample_mode, align_corners=True) + gcfm5)
-        return self.refine10(
-            F.interpolate(
-                fs4,
-                org_input.size()[2:],
-                mode=self.upsample_mode,
-                align_corners=True,
-            )
-        )
+        return self.refine10(F.interpolate(fs4, org_input.size()[2:], mode=self.upsample_mode, align_corners=True))
 
 
 class MCFCN(FCN):
@@ -231,12 +219,12 @@ def __init__(
         pretrained: bool = True,
         progress: bool = True,
     ):
-        super(MCFCN, self).__init__(
+        super().__init__(
             out_channels=out_channels, upsample_mode=upsample_mode, pretrained=pretrained, progress=progress
         )
 
         self.init_proj = Convolution(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=in_channels,
             out_channels=3,
             kernel_size=1,
@@ -251,4 +239,4 @@ def forward(self, x: torch.Tensor):
             x: in shape (batch, in_channels, spatial_1, spatial_2).
         """
         x = self.init_proj(x)
-        return super(MCFCN, self).forward(x)
+        return super().forward(x)
diff --git a/monai/networks/blocks/localnet_block.py b/monai/networks/blocks/localnet_block.py
index 3997d42436..d3b81ff494 100644
--- a/monai/networks/blocks/localnet_block.py
+++ b/monai/networks/blocks/localnet_block.py
@@ -29,7 +29,7 @@ def get_conv_block(
     norm: Optional[Union[Tuple, str]] = "BATCH",
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
+    mod: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -40,33 +40,22 @@ def get_conv_block(
         conv_only=False,
         padding=padding,
     )
+    return mod
 
 
 def get_conv_layer(
-    spatial_dims: int,
-    in_channels: int,
-    out_channels: int,
-    kernel_size: Union[Sequence[int], int] = 3,
+    spatial_dims: int, in_channels: int, out_channels: int, kernel_size: Union[Sequence[int], int] = 3
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
-        spatial_dims,
-        in_channels,
-        out_channels,
-        kernel_size=kernel_size,
-        bias=False,
-        conv_only=True,
-        padding=padding,
+    mod: nn.Module = Convolution(
+        spatial_dims, in_channels, out_channels, kernel_size=kernel_size, bias=False, conv_only=True, padding=padding
     )
+    return mod
 
 
-def get_deconv_block(
-    spatial_dims: int,
-    in_channels: int,
-    out_channels: int,
-) -> nn.Module:
-    return Convolution(
-        dimensions=spatial_dims,
+def get_deconv_block(spatial_dims: int, in_channels: int, out_channels: int) -> nn.Module:
+    mod: nn.Module = Convolution(
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=out_channels,
         strides=2,
@@ -77,26 +66,20 @@ def get_deconv_block(
         padding=1,
         output_padding=1,
     )
+    return mod
 
 
 class ResidualBlock(nn.Module):
     def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: Union[Sequence[int], int],
+        self, spatial_dims: int, in_channels: int, out_channels: int, kernel_size: Union[Sequence[int], int]
     ) -> None:
-        super(ResidualBlock, self).__init__()
+        super().__init__()
         if in_channels != out_channels:
             raise ValueError(
                 f"expecting in_channels == out_channels, " f"got in_channels={in_channels}, out_channels={out_channels}"
             )
         self.conv_block = get_conv_block(
-            spatial_dims=spatial_dims,
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
+            spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size
         )
         self.conv = get_conv_layer(
             spatial_dims=spatial_dims, in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size
@@ -110,22 +93,13 @@ def forward(self, x) -> torch.Tensor:
 
 
 class LocalNetResidualBlock(nn.Module):
-    def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-    ) -> None:
-        super(LocalNetResidualBlock, self).__init__()
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int) -> None:
+        super().__init__()
         if in_channels != out_channels:
             raise ValueError(
                 f"expecting in_channels == out_channels, " f"got in_channels={in_channels}, out_channels={out_channels}"
             )
-        self.conv_layer = get_conv_layer(
-            spatial_dims=spatial_dims,
-            in_channels=in_channels,
-            out_channels=out_channels,
-        )
+        self.conv_layer = get_conv_layer(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
         self.norm = Norm[Norm.BATCH, spatial_dims](out_channels)
         self.relu = nn.ReLU()
 
@@ -147,11 +121,7 @@ class LocalNetDownSampleBlock(nn.Module):
     """
 
     def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: Union[Sequence[int], int],
+        self, spatial_dims: int, in_channels: int, out_channels: int, kernel_size: Union[Sequence[int], int]
     ) -> None:
         """
         Args:
@@ -162,16 +132,14 @@ def __init__(
         Raises:
             NotImplementedError: when ``kernel_size`` is even
         """
-        super(LocalNetDownSampleBlock, self).__init__()
+        super().__init__()
         self.conv_block = get_conv_block(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size
         )
         self.residual_block = ResidualBlock(
             spatial_dims=spatial_dims, in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size
         )
-        self.max_pool = Pool[Pool.MAX, spatial_dims](
-            kernel_size=2,
-        )
+        self.max_pool = Pool[Pool.MAX, spatial_dims](kernel_size=2)
 
     def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
         """
@@ -208,12 +176,7 @@ class LocalNetUpSampleBlock(nn.Module):
         DeepReg (https://github.com/DeepRegNet/DeepReg)
     """
 
-    def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-    ) -> None:
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int) -> None:
         """
         Args:
             spatial_dims: number of spatial dimensions.
@@ -222,21 +185,13 @@ def __init__(
         Raises:
             ValueError: when ``in_channels != 2 * out_channels``
         """
-        super(LocalNetUpSampleBlock, self).__init__()
+        super().__init__()
         self.deconv_block = get_deconv_block(
-            spatial_dims=spatial_dims,
-            in_channels=in_channels,
-            out_channels=out_channels,
-        )
-        self.conv_block = get_conv_block(
-            spatial_dims=spatial_dims,
-            in_channels=out_channels,
-            out_channels=out_channels,
+            spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels
         )
+        self.conv_block = get_conv_block(spatial_dims=spatial_dims, in_channels=out_channels, out_channels=out_channels)
         self.residual_block = LocalNetResidualBlock(
-            spatial_dims=spatial_dims,
-            in_channels=out_channels,
-            out_channels=out_channels,
+            spatial_dims=spatial_dims, in_channels=out_channels, out_channels=out_channels
         )
         if in_channels / out_channels != 2:
             raise ValueError(
@@ -306,7 +261,7 @@ def __init__(
         act: activation type and arguments. Defaults to ReLU.
         kernel_initializer: kernel initializer. Defaults to None.
         """
-        super(LocalNetFeatureExtractorBlock, self).__init__()
+        super().__init__()
         self.conv_block = get_conv_block(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, act=act, norm=None
         )
diff --git a/monai/networks/blocks/mlp.py b/monai/networks/blocks/mlp.py
index 11b5e6fc15..9f6d12594e 100644
--- a/monai/networks/blocks/mlp.py
+++ b/monai/networks/blocks/mlp.py
@@ -18,12 +18,7 @@ class MLPBlock(nn.Module):
     An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
     """
 
-    def __init__(
-        self,
-        hidden_size: int,
-        mlp_dim: int,
-        dropout_rate: float = 0.0,
-    ) -> None:
+    def __init__(self, hidden_size: int, mlp_dim: int, dropout_rate: float = 0.0) -> None:
         """
         Args:
             hidden_size: dimension of hidden layer.
diff --git a/monai/networks/blocks/patchembedding.py b/monai/networks/blocks/patchembedding.py
index c1fcfa9af7..492e7bf236 100644
--- a/monai/networks/blocks/patchembedding.py
+++ b/monai/networks/blocks/patchembedding.py
@@ -62,7 +62,7 @@ def __init__(
 
         """
 
-        super(PatchEmbeddingBlock, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
@@ -94,8 +94,7 @@ def __init__(
             to_chars = f"b ({' '.join([c[0] for c in chars])}) ({' '.join([c[1] for c in chars])} c)"
             axes_len = {f"p{i+1}": p for i, p in enumerate(patch_size)}
             self.patch_embeddings = nn.Sequential(
-                Rearrange(f"{from_chars} -> {to_chars}", **axes_len),
-                nn.Linear(self.patch_dim, hidden_size),
+                Rearrange(f"{from_chars} -> {to_chars}", **axes_len), nn.Linear(self.patch_dim, hidden_size)
             )
         self.position_embeddings = nn.Parameter(torch.zeros(1, self.n_patches, hidden_size))
         self.cls_token = nn.Parameter(torch.zeros(1, 1, hidden_size))
diff --git a/monai/networks/blocks/regunet_block.py b/monai/networks/blocks/regunet_block.py
index d2cd3518b9..b65f08a443 100644
--- a/monai/networks/blocks/regunet_block.py
+++ b/monai/networks/blocks/regunet_block.py
@@ -32,7 +32,7 @@ def get_conv_block(
 ) -> nn.Module:
     if padding is None:
         padding = same_padding(kernel_size)
-    conv_block = Convolution(
+    conv_block: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -59,21 +59,13 @@ def get_conv_block(
 
 
 def get_conv_layer(
-    spatial_dims: int,
-    in_channels: int,
-    out_channels: int,
-    kernel_size: Union[Sequence[int], int] = 3,
+    spatial_dims: int, in_channels: int, out_channels: int, kernel_size: Union[Sequence[int], int] = 3
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
-        spatial_dims,
-        in_channels,
-        out_channels,
-        kernel_size=kernel_size,
-        bias=False,
-        conv_only=True,
-        padding=padding,
+    mod: nn.Module = Convolution(
+        spatial_dims, in_channels, out_channels, kernel_size=kernel_size, bias=False, conv_only=True, padding=padding
     )
+    return mod
 
 
 class RegistrationResidualConvBlock(nn.Module):
@@ -83,12 +75,7 @@ class RegistrationResidualConvBlock(nn.Module):
     """
 
     def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-        num_layers: int = 2,
-        kernel_size: int = 3,
+        self, spatial_dims: int, in_channels: int, out_channels: int, num_layers: int = 2, kernel_size: int = 3
     ):
         """
 
@@ -99,7 +86,7 @@ def __init__(
             num_layers: number of layers inside the block
             kernel_size: kernel_size
         """
-        super(RegistrationResidualConvBlock, self).__init__()
+        super().__init__()
         self.num_layers = num_layers
         self.layers = nn.ModuleList(
             [
@@ -145,19 +132,14 @@ class RegistrationDownSampleBlock(nn.Module):
         DeepReg (https://github.com/DeepRegNet/DeepReg)
     """
 
-    def __init__(
-        self,
-        spatial_dims: int,
-        channels: int,
-        pooling: bool,
-    ) -> None:
+    def __init__(self, spatial_dims: int, channels: int, pooling: bool) -> None:
         """
         Args:
             spatial_dims: number of spatial dimensions.
             channels: channels
             pooling: use MaxPool if True, strided conv if False
         """
-        super(RegistrationDownSampleBlock, self).__init__()
+        super().__init__()
         if pooling:
             self.layer = Pool[Pool.MAX, spatial_dims](kernel_size=2)
         else:
@@ -188,13 +170,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return out
 
 
-def get_deconv_block(
-    spatial_dims: int,
-    in_channels: int,
-    out_channels: int,
-) -> nn.Module:
-    return Convolution(
-        dimensions=spatial_dims,
+def get_deconv_block(spatial_dims: int, in_channels: int, out_channels: int) -> nn.Module:
+    mod: nn.Module = Convolution(
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=out_channels,
         strides=2,
@@ -205,6 +183,7 @@ def get_deconv_block(
         padding=1,
         output_padding=1,
     )
+    return mod
 
 
 class RegistrationExtractionBlock(nn.Module):
@@ -233,7 +212,7 @@ def __init__(
             kernel_initializer: kernel initializer
             activation: kernel activation function
         """
-        super(RegistrationExtractionBlock, self).__init__()
+        super().__init__()
         self.extract_levels = extract_levels
         self.max_level = max(extract_levels)
         self.layers = nn.ModuleList(
@@ -261,10 +240,7 @@ def forward(self, x: List[torch.Tensor], image_size: List[int]) -> torch.Tensor:
             Tensor of shape (batch, `out_channels`, size1, size2, size3), where (size1, size2, size3) = ``image_size``
         """
         feature_list = [
-            F.interpolate(
-                layer(x[self.max_level - level]),
-                size=image_size,
-            )
+            F.interpolate(layer(x[self.max_level - level]), size=image_size)
             for layer, level in zip(self.layers, self.extract_levels)
         ]
         out: torch.Tensor = torch.mean(torch.stack(feature_list, dim=0), dim=0)
diff --git a/monai/networks/blocks/segresnet_block.py b/monai/networks/blocks/segresnet_block.py
index d8f6d7b268..aad2278934 100644
--- a/monai/networks/blocks/segresnet_block.py
+++ b/monai/networks/blocks/segresnet_block.py
@@ -25,13 +25,7 @@ def get_conv_layer(
 ):
 
     return Convolution(
-        spatial_dims,
-        in_channels,
-        out_channels,
-        strides=stride,
-        kernel_size=kernel_size,
-        bias=bias,
-        conv_only=True,
+        spatial_dims, in_channels, out_channels, strides=stride, kernel_size=kernel_size, bias=bias, conv_only=True
     )
 
 
@@ -39,7 +33,7 @@ def get_upsample_layer(
     spatial_dims: int, in_channels: int, upsample_mode: Union[UpsampleMode, str] = "nontrainable", scale_factor: int = 2
 ):
     return UpSample(
-        dimensions=spatial_dims,
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=in_channels,
         scale_factor=scale_factor,
@@ -56,13 +50,7 @@ class ResBlock(nn.Module):
     <https://arxiv.org/pdf/1810.11654.pdf>`_.
     """
 
-    def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        norm: Union[Tuple, str],
-        kernel_size: int = 3,
-    ) -> None:
+    def __init__(self, spatial_dims: int, in_channels: int, norm: Union[Tuple, str], kernel_size: int = 3) -> None:
         """
         Args:
             spatial_dims: number of spatial dimensions, could be 1, 2 or 3.
diff --git a/monai/networks/blocks/selfattention.py b/monai/networks/blocks/selfattention.py
index 9dc45cccc8..932475b06c 100644
--- a/monai/networks/blocks/selfattention.py
+++ b/monai/networks/blocks/selfattention.py
@@ -23,12 +23,7 @@ class SABlock(nn.Module):
     An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
     """
 
-    def __init__(
-        self,
-        hidden_size: int,
-        num_heads: int,
-        dropout_rate: float = 0.0,
-    ) -> None:
+    def __init__(self, hidden_size: int, num_heads: int, dropout_rate: float = 0.0) -> None:
         """
         Args:
             hidden_size: dimension of hidden layer.
@@ -37,7 +32,7 @@ def __init__(
 
         """
 
-        super(SABlock, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/blocks/squeeze_and_excitation.py b/monai/networks/blocks/squeeze_and_excitation.py
index 4db6dc30f7..46cd48d6aa 100644
--- a/monai/networks/blocks/squeeze_and_excitation.py
+++ b/monai/networks/blocks/squeeze_and_excitation.py
@@ -50,7 +50,7 @@ def __init__(
             :py:class:`monai.networks.layers.Act`
 
         """
-        super(ChannelSELayer, self).__init__()
+        super().__init__()
 
         self.add_residual = add_residual
 
@@ -181,21 +181,21 @@ def __init__(
             :py:class:`monai.networks.blocks.ChannelSELayer`
 
         """
-        super(SEBlock, self).__init__()
+        super().__init__()
 
         if not conv_param_1:
             conv_param_1 = {"kernel_size": 1, "norm": Norm.BATCH, "act": ("relu", {"inplace": True})}
         self.conv1 = Convolution(
-            dimensions=spatial_dims, in_channels=in_channels, out_channels=n_chns_1, **conv_param_1
+            spatial_dims=spatial_dims, in_channels=in_channels, out_channels=n_chns_1, **conv_param_1
         )
 
         if not conv_param_2:
             conv_param_2 = {"kernel_size": 3, "norm": Norm.BATCH, "act": ("relu", {"inplace": True})}
-        self.conv2 = Convolution(dimensions=spatial_dims, in_channels=n_chns_1, out_channels=n_chns_2, **conv_param_2)
+        self.conv2 = Convolution(spatial_dims=spatial_dims, in_channels=n_chns_1, out_channels=n_chns_2, **conv_param_2)
 
         if not conv_param_3:
             conv_param_3 = {"kernel_size": 1, "norm": Norm.BATCH, "act": None}
-        self.conv3 = Convolution(dimensions=spatial_dims, in_channels=n_chns_2, out_channels=n_chns_3, **conv_param_3)
+        self.conv3 = Convolution(spatial_dims=spatial_dims, in_channels=n_chns_2, out_channels=n_chns_3, **conv_param_3)
 
         self.se_layer = ChannelSELayer(
             spatial_dims=spatial_dims, in_channels=n_chns_3, r=r, acti_type_1=acti_type_1, acti_type_2=acti_type_2
@@ -264,7 +264,7 @@ def __init__(
         }
         conv_param_3 = {"strides": 1, "kernel_size": 1, "act": None, "norm": Norm.BATCH, "bias": False}
 
-        super(SEBottleneck, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             in_channels=inplanes,
             n_chns_1=planes * 2,
@@ -315,7 +315,7 @@ def __init__(
         }
         conv_param_3 = {"strides": 1, "kernel_size": 1, "act": None, "norm": Norm.BATCH, "bias": False}
 
-        super(SEResNetBottleneck, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             in_channels=inplanes,
             n_chns_1=planes,
diff --git a/monai/networks/blocks/transformerblock.py b/monai/networks/blocks/transformerblock.py
index c7a948ed76..5ccc2090b4 100644
--- a/monai/networks/blocks/transformerblock.py
+++ b/monai/networks/blocks/transformerblock.py
@@ -21,13 +21,7 @@ class TransformerBlock(nn.Module):
     An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>"
     """
 
-    def __init__(
-        self,
-        hidden_size: int,
-        mlp_dim: int,
-        num_heads: int,
-        dropout_rate: float = 0.0,
-    ) -> None:
+    def __init__(self, hidden_size: int, mlp_dim: int, num_heads: int, dropout_rate: float = 0.0) -> None:
         """
         Args:
             hidden_size: dimension of hidden layer.
diff --git a/monai/networks/blocks/unetr_block.py b/monai/networks/blocks/unetr_block.py
index a0852d05e0..ccc055e889 100644
--- a/monai/networks/blocks/unetr_block.py
+++ b/monai/networks/blocks/unetr_block.py
@@ -46,7 +46,7 @@ def __init__(
 
         """
 
-        super(UnetrUpBlock, self).__init__()
+        super().__init__()
         upsample_stride = upsample_kernel_size
         self.transp_conv = get_conv_layer(
             spatial_dims,
diff --git a/monai/networks/blocks/upsample.py b/monai/networks/blocks/upsample.py
index 5320611ce6..a6aa13dde4 100644
--- a/monai/networks/blocks/upsample.py
+++ b/monai/networks/blocks/upsample.py
@@ -16,7 +16,7 @@
 
 from monai.networks.layers.factories import Conv, Pad, Pool
 from monai.networks.utils import icnr_init, pixelshuffle
-from monai.utils import InterpolateMode, UpsampleMode, ensure_tuple_rep, look_up_option
+from monai.utils import InterpolateMode, UpsampleMode, deprecated_arg, ensure_tuple_rep, look_up_option
 
 __all__ = ["Upsample", "UpSample", "SubpixelUpsample", "Subpixelupsample", "SubpixelUpSample"]
 
@@ -34,9 +34,12 @@ class UpSample(nn.Sequential):
     (often used to map the number of features from `in_channels` to `out_channels`).
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: Optional[int] = None,
         out_channels: Optional[int] = None,
         scale_factor: Union[Sequence[float], float] = 2,
@@ -47,10 +50,11 @@ def __init__(
         align_corners: Optional[bool] = True,
         bias: bool = True,
         apply_pad_pool: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
         """
         Args:
-            dimensions: number of spatial dimensions of the input image.
+            spatial_dims: number of spatial dimensions of the input image.
             in_channels: number of channels of the input image.
             out_channels: number of channels of the output image. Defaults to `in_channels`.
             scale_factor: multiplier for spatial size. Has to match input size if it is a tuple. Defaults to 2.
@@ -75,16 +79,21 @@ def __init__(
             apply_pad_pool: if True the upsampled tensor is padded then average pooling is applied with a kernel the
                 size of `scale_factor` with a stride of 1. See also: :py:class:`monai.networks.blocks.SubpixelUpsample`.
                 Only used in the "pixelshuffle" mode.
+
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
-        scale_factor_ = ensure_tuple_rep(scale_factor, dimensions)
+        if dimensions is not None:
+            spatial_dims = dimensions
+        scale_factor_ = ensure_tuple_rep(scale_factor, spatial_dims)
         up_mode = look_up_option(mode, UpsampleMode)
         if up_mode == UpsampleMode.DECONV:
             if not in_channels:
                 raise ValueError(f"in_channels needs to be specified in the '{mode}' mode.")
             self.add_module(
                 "deconv",
-                Conv[Conv.CONVTRANS, dimensions](
+                Conv[Conv.CONVTRANS, spatial_dims](
                     in_channels=in_channels,
                     out_channels=out_channels or in_channels,
                     kernel_size=scale_factor_,
@@ -98,7 +107,7 @@ def __init__(
                     raise ValueError(f"in_channels needs to be specified in the '{mode}' mode.")
                 self.add_module(
                     "preconv",
-                    Conv[Conv.CONV, dimensions](
+                    Conv[Conv.CONV, spatial_dims](
                         in_channels=in_channels, out_channels=out_channels or in_channels, kernel_size=1, bias=bias
                     ),
                 )
@@ -112,7 +121,7 @@ def __init__(
             interp_mode = InterpolateMode(interp_mode)
             linear_mode = [InterpolateMode.LINEAR, InterpolateMode.BILINEAR, InterpolateMode.TRILINEAR]
             if interp_mode in linear_mode:  # choose mode based on dimensions
-                interp_mode = linear_mode[dimensions - 1]
+                interp_mode = linear_mode[spatial_dims - 1]
             self.add_module(
                 "upsample_non_trainable",
                 nn.Upsample(
@@ -126,7 +135,7 @@ def __init__(
             self.add_module(
                 "pixelshuffle",
                 SubpixelUpsample(
-                    dimensions=dimensions,
+                    spatial_dims=spatial_dims,
                     in_channels=in_channels,
                     out_channels=out_channels,
                     scale_factor=scale_factor_[0],  # isotropic
@@ -164,19 +173,23 @@ class SubpixelUpsample(nn.Module):
 
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: Optional[int],
         out_channels: Optional[int] = None,
         scale_factor: int = 2,
         conv_block: Optional[Union[nn.Module, str]] = "default",
         apply_pad_pool: bool = True,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
         """
         Args:
-            dimensions: number of spatial dimensions of the input image.
+            spatial_dims: number of spatial dimensions of the input image.
             in_channels: number of channels of the input image.
             out_channels: optional number of channels of the output image.
             scale_factor: multiplier for spatial size. Defaults to 2.
@@ -190,21 +203,24 @@ def __init__(
                 size of `scale_factor` with a stride of 1. This implements the nearest neighbour resize convolution
                 component of subpixel convolutions described in Aitken et al.
             bias: whether to have a bias term in the default conv_block. Defaults to True.
+
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
 
         if scale_factor <= 0:
             raise ValueError(f"The `scale_factor` multiplier must be an integer greater than 0, got {scale_factor}.")
 
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.scale_factor = scale_factor
 
         if conv_block == "default":
             out_channels = out_channels or in_channels
             if not out_channels:
                 raise ValueError("in_channels need to be specified.")
-            conv_out_channels = out_channels * (scale_factor ** dimensions)
-            self.conv_block = Conv[Conv.CONV, dimensions](
+            conv_out_channels = out_channels * (scale_factor ** self.dimensions)
+            self.conv_block = Conv[Conv.CONV, self.dimensions](
                 in_channels=in_channels, out_channels=conv_out_channels, kernel_size=3, stride=1, padding=1, bias=bias
             )
 
diff --git a/monai/networks/blocks/warp.py b/monai/networks/blocks/warp.py
index d916c026ff..e9cc908464 100644
--- a/monai/networks/blocks/warp.py
+++ b/monai/networks/blocks/warp.py
@@ -30,11 +30,7 @@ class Warp(nn.Module):
     Warp an image with given dense displacement field (DDF).
     """
 
-    def __init__(
-        self,
-        mode=GridSampleMode.BILINEAR.value,
-        padding_mode=GridSamplePadMode.BORDER.value,
-    ):
+    def __init__(self, mode=GridSampleMode.BILINEAR.value, padding_mode=GridSamplePadMode.BORDER.value):
         """
         For pytorch native APIs, the possible values are:
 
@@ -50,7 +46,7 @@ def __init__(
 
         See also: :py:class:`monai.networks.layers.grid_pull`
         """
-        super(Warp, self).__init__()
+        super().__init__()
         # resolves _interp_mode for different methods
 
         if USE_COMPILED:
@@ -123,13 +119,7 @@ def forward(self, image: torch.Tensor, ddf: torch.Tensor):
             )
 
         # using csrc resampling
-        return grid_pull(
-            image,
-            grid,
-            bound=self._padding_mode,
-            extrapolate=True,
-            interpolation=self._interp_mode,
-        )
+        return grid_pull(image, grid, bound=self._padding_mode, extrapolate=True, interpolation=self._interp_mode)
 
 
 class DVF2DDF(nn.Module):
@@ -143,12 +133,9 @@ class DVF2DDF(nn.Module):
     """
 
     def __init__(
-        self,
-        num_steps: int = 7,
-        mode=GridSampleMode.BILINEAR.value,
-        padding_mode=GridSamplePadMode.ZEROS.value,
+        self, num_steps: int = 7, mode=GridSampleMode.BILINEAR.value, padding_mode=GridSamplePadMode.ZEROS.value
     ):
-        super(DVF2DDF, self).__init__()
+        super().__init__()
         if num_steps <= 0:
             raise ValueError(f"expecting positive num_steps, got {num_steps}")
         self.num_steps = num_steps
diff --git a/monai/networks/layers/convutils.py b/monai/networks/layers/convutils.py
index 994ca05b85..9aa11fa7d0 100644
--- a/monai/networks/layers/convutils.py
+++ b/monai/networks/layers/convutils.py
@@ -44,8 +44,7 @@ def same_padding(
 
 
 def stride_minus_kernel_padding(
-    kernel_size: Union[Sequence[int], int],
-    stride: Union[Sequence[int], int],
+    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int]
 ) -> Union[Tuple[int, ...], int]:
     kernel_size_np = np.atleast_1d(kernel_size)
     stride_np = np.atleast_1d(stride)
diff --git a/monai/networks/layers/simplelayers.py b/monai/networks/layers/simplelayers.py
index 52f19aab29..0f9f78b4be 100644
--- a/monai/networks/layers/simplelayers.py
+++ b/monai/networks/layers/simplelayers.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 import math
+from copy import deepcopy
 from typing import List, Sequence, Union
 
 import torch
@@ -24,10 +25,10 @@
     ChannelMatching,
     InvalidPyTorchVersionError,
     SkipMode,
-    ensure_tuple_rep,
     look_up_option,
     optional_import,
 )
+from monai.utils.misc import issequenceiterable
 
 _C, _ = optional_import("monai._C")
 if not PT_BEFORE_1_7:
@@ -393,13 +394,18 @@ def __init__(
                 (for example `parameters()` iterator could be used to get the parameters);
                 otherwise this module will fix the kernels using `sigma` as the std.
         """
+        if issequenceiterable(sigma):
+            if len(sigma) != spatial_dims:  # type: ignore
+                raise ValueError
+        else:
+            sigma = [deepcopy(sigma) for _ in range(spatial_dims)]  # type: ignore
         super().__init__()
         self.sigma = [
             torch.nn.Parameter(
                 torch.as_tensor(s, dtype=torch.float, device=s.device if isinstance(s, torch.Tensor) else None),
                 requires_grad=requires_grad,
             )
-            for s in ensure_tuple_rep(sigma, int(spatial_dims))
+            for s in sigma  # type: ignore
         ]
         self.truncated = truncated
         self.approx = approx
@@ -449,7 +455,7 @@ class LLTM(nn.Module):
     """
 
     def __init__(self, input_features: int, state_size: int):
-        super(LLTM, self).__init__()
+        super().__init__()
         self.input_features = input_features
         self.state_size = state_size
         self.weights = nn.Parameter(torch.empty(3 * state_size, input_features + state_size))
diff --git a/monai/networks/layers/spatial_transforms.py b/monai/networks/layers/spatial_transforms.py
index 511c24fcb0..6b5acb166a 100644
--- a/monai/networks/layers/spatial_transforms.py
+++ b/monai/networks/layers/spatial_transforms.py
@@ -46,7 +46,9 @@ def backward(ctx, grad):
             return None, grads[0], None, None, None
 
 
-def grid_pull(input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True):
+def grid_pull(
+    input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True
+) -> torch.Tensor:
     """
     Sample an image with respect to a deformation field.
 
@@ -112,8 +114,9 @@ def grid_pull(input: torch.Tensor, grid: torch.Tensor, interpolation="linear", b
         _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
         for i in ensure_tuple(interpolation)
     ]
-
-    return _GridPull.apply(input, grid, interpolation, bound, extrapolate)
+    out: torch.Tensor
+    out = _GridPull.apply(input, grid, interpolation, bound, extrapolate)  # type: ignore
+    return out
 
 
 class _GridPush(torch.autograd.Function):
diff --git a/monai/networks/nets/__init__.py b/monai/networks/nets/__init__.py
index ad1ca2418b..3b8d1dd6ec 100644
--- a/monai/networks/nets/__init__.py
+++ b/monai/networks/nets/__init__.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .ahnet import AHnet, Ahnet, AHNet, ahnet
+from .ahnet import AHnet, Ahnet, AHNet
 from .autoencoder import AutoEncoder
 from .basic_unet import BasicUNet, BasicUnet, Basicunet, basicunet
 from .classifier import Classifier, Critic, Discriminator
@@ -24,13 +24,12 @@
     Densenet201,
     DenseNet264,
     Densenet264,
-    densenet,
     densenet121,
     densenet169,
     densenet201,
     densenet264,
 )
-from .dynunet import DynUNet, DynUnet, Dynunet, dynunet
+from .dynunet import DynUNet, DynUnet, Dynunet
 from .efficientnet import (
     BlockArgs,
     EfficientNet,
@@ -71,7 +70,6 @@
     SEResNeXt101,
     SEresnext101,
     Seresnext101,
-    senet,
     senet154,
     seresnet50,
     seresnet101,
@@ -80,7 +78,8 @@
     seresnext101,
 )
 from .torchvision_fc import TorchVisionFCModel, TorchVisionFullyConvModel
-from .unet import UNet, Unet, unet
+from .transchex import BertAttention, BertMixedLayer, BertOutput, BertPreTrainedModel, MultiModal, Pooler, Transchex
+from .unet import UNet, Unet
 from .unetr import UNETR
 from .varautoencoder import VarAutoEncoder
 from .vit import ViT
diff --git a/monai/networks/nets/ahnet.py b/monai/networks/nets/ahnet.py
index 5ca6813efe..21e3c33bf3 100644
--- a/monai/networks/nets/ahnet.py
+++ b/monai/networks/nets/ahnet.py
@@ -19,7 +19,7 @@
 from monai.networks.blocks.fcn import FCN
 from monai.networks.layers.factories import Act, Conv, Norm, Pool
 
-__all__ = ["AHnet", "Ahnet", "ahnet", "AHNet"]
+__all__ = ["AHnet", "Ahnet", "AHNet"]
 
 
 class Bottleneck3x3x1(nn.Module):
@@ -35,7 +35,7 @@ def __init__(
         downsample: Optional[nn.Sequential] = None,
     ) -> None:
 
-        super(Bottleneck3x3x1, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -87,7 +87,7 @@ def forward(self, x):
 
 class Projection(nn.Sequential):
     def __init__(self, spatial_dims: int, num_input_features: int, num_output_features: int):
-        super(Projection, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -108,7 +108,7 @@ def __init__(
         growth_rate: int,
         dropout_prob: float,
     ):
-        super(DenseBlock, self).__init__()
+        super().__init__()
         for i in range(num_layers):
             layer = Pseudo3DLayer(
                 spatial_dims, num_input_features + i * growth_rate, growth_rate, bn_size, dropout_prob
@@ -120,7 +120,7 @@ class UpTransition(nn.Sequential):
     def __init__(
         self, spatial_dims: int, num_input_features: int, num_output_features: int, upsample_mode: str = "transpose"
     ):
-        super(UpTransition, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -145,7 +145,7 @@ class Final(nn.Sequential):
     def __init__(
         self, spatial_dims: int, num_input_features: int, num_output_features: int, upsample_mode: str = "transpose"
     ):
-        super(Final, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -178,7 +178,7 @@ def __init__(
 
 class Pseudo3DLayer(nn.Module):
     def __init__(self, spatial_dims: int, num_input_features: int, growth_rate: int, bn_size: int, dropout_prob: float):
-        super(Pseudo3DLayer, self).__init__()
+        super().__init__()
         # 1x1x1
 
         conv_type = Conv[Conv.CONV, spatial_dims]
@@ -244,7 +244,7 @@ def forward(self, x):
 
 class PSP(nn.Module):
     def __init__(self, spatial_dims: int, psp_block_num: int, in_ch: int, upsample_mode: str = "transpose"):
-        super(PSP, self).__init__()
+        super().__init__()
         self.up_modules = nn.ModuleList()
         conv_type = Conv[Conv.CONV, spatial_dims]
         pool_type: Type[Union[nn.MaxPool2d, nn.MaxPool3d]] = Pool[Pool.MAX, spatial_dims]
@@ -256,13 +256,7 @@ def __init__(self, spatial_dims: int, psp_block_num: int, in_ch: int, upsample_m
             size = (2 ** (i + 3), 2 ** (i + 3), 1)[-spatial_dims:]
             self.pool_modules.append(pool_type(kernel_size=size, stride=size))
             self.project_modules.append(
-                conv_type(
-                    in_ch,
-                    1,
-                    kernel_size=(1, 1, 1)[-spatial_dims:],
-                    stride=1,
-                    padding=(1, 1, 0)[-spatial_dims:],
-                )
+                conv_type(in_ch, 1, kernel_size=(1, 1, 1)[-spatial_dims:], stride=1, padding=(1, 1, 0)[-spatial_dims:])
             )
 
         self.spatial_dims = spatial_dims
@@ -274,15 +268,7 @@ def __init__(self, spatial_dims: int, psp_block_num: int, in_ch: int, upsample_m
             for i in range(psp_block_num):
                 size = (2 ** (i + 3), 2 ** (i + 3), 1)[-spatial_dims:]
                 pad_size = (2 ** (i + 3), 2 ** (i + 3), 0)[-spatial_dims:]
-                self.up_modules.append(
-                    conv_trans_type(
-                        1,
-                        1,
-                        kernel_size=size,
-                        stride=size,
-                        padding=pad_size,
-                    )
-                )
+                self.up_modules.append(conv_trans_type(1, 1, kernel_size=size, stride=size, padding=pad_size))
 
     def forward(self, x):
         outputs = []
@@ -356,7 +342,7 @@ def __init__(
         progress: bool = True,
     ):
         self.inplanes = 64
-        super(AHNet, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         conv_trans_type = Conv[Conv.CONVTRANS, spatial_dims]
@@ -451,13 +437,7 @@ def __init__(
             net2d = FCN(pretrained=True, progress=progress)
             self.copy_from(net2d)
 
-    def _make_layer(
-        self,
-        block: Type[Bottleneck3x3x1],
-        planes: int,
-        blocks: int,
-        stride: int = 1,
-    ) -> nn.Sequential:
+    def _make_layer(self, block: Type[Bottleneck3x3x1], planes: int, blocks: int, stride: int = 1) -> nn.Sequential:
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
@@ -559,4 +539,4 @@ def copy_bn_param(module2d, module3d):
         p3d.data[:] = p2d.data[:]  # Two parameter gamma and beta
 
 
-AHnet = Ahnet = ahnet = AHNet
+AHnet = Ahnet = AHNet
diff --git a/monai/networks/nets/autoencoder.py b/monai/networks/nets/autoencoder.py
index d0a54b8148..f4a0451dc7 100644
--- a/monai/networks/nets/autoencoder.py
+++ b/monai/networks/nets/autoencoder.py
@@ -16,14 +16,84 @@
 
 from monai.networks.blocks import Convolution, ResidualUnit
 from monai.networks.layers.factories import Act, Norm
+from monai.utils import deprecated_arg
 
 __all__ = ["AutoEncoder"]
 
 
 class AutoEncoder(nn.Module):
+    """
+    Simple definition of an autoencoder and base class for the architecture implementing
+    :py:class:`monai.networks.nets.VarAutoEncoder`. The network is composed of an encode sequence of blocks, followed
+    by an intermediary sequence of blocks, and finally a decode sequence of blocks. The encode and decode blocks are
+    default :py:class:`monai.networks.blocks.Convolution` instances with the encode blocks having the given stride
+    and the decode blocks having transpose convolutions with the same stride. If `num_res_units` is given residual
+    blocks are used instead.
+
+    By default the intermediary sequence is empty but if `inter_channels` is given to specify the output channels of
+    blocks then this will be become a sequence of Convolution blocks or of residual blocks if `num_inter_units` is
+    given. The optional parameter `inter_dilations` can be used to specify the dilation values of the convolutions in
+    these blocks, this allows a network to use dilated kernels in this  middle section. Since the intermediary section
+    isn't meant to change the size of the output the strides for all these kernels is 1.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import AutoEncoder
+
+        # 3 layers each down/up sampling their inputs by a factor 2 with no intermediate layer
+        net = AutoEncoder(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(2, 4, 8),
+            strides=(2, 2, 2)
+        )
+
+        # 1 layer downsampling by 2, followed by a sequence of residual units with 2 convolutions defined by
+        # progressively increasing dilations, then final upsample layer
+        net = AutoEncoder(
+                spatial_dims=2,
+                in_channels=1,
+                out_channels=1,
+                channels=(4,),
+                strides=(2,),
+                inter_channels=(8, 8, 8),
+                inter_dilations=(1, 2, 4),
+                num_inter_units=2
+            )
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         channels: Sequence[int],
@@ -38,10 +108,11 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: Optional[Union[Tuple, str, float]] = None,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
 
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.channels = list(channels)
@@ -71,6 +142,9 @@ def __init__(
     def _get_encode_module(
         self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
     ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the encode part of the network by building up a sequence of layers returned by `_get_encode_layer`.
+        """
         encode = nn.Sequential()
         layer_channels = in_channels
 
@@ -82,6 +156,10 @@ def _get_encode_module(
         return encode, layer_channels
 
     def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tuple[nn.Module, int]:
+        """
+        Returns the intermediate block of the network which accepts input from the encoder and whose output goes
+        to the decoder.
+        """
         # Define some types
         intermediate: nn.Module
         unit: nn.Module
@@ -95,7 +173,7 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
             for i, (dc, di) in enumerate(zip(self.inter_channels, self.inter_dilations)):
                 if self.num_inter_units > 0:
                     unit = ResidualUnit(
-                        dimensions=self.dimensions,
+                        spatial_dims=self.dimensions,
                         in_channels=layer_channels,
                         out_channels=dc,
                         strides=1,
@@ -109,7 +187,7 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
                     )
                 else:
                     unit = Convolution(
-                        dimensions=self.dimensions,
+                        spatial_dims=self.dimensions,
                         in_channels=layer_channels,
                         out_channels=dc,
                         strides=1,
@@ -129,6 +207,9 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
     def _get_decode_module(
         self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
     ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the decode part of the network by building up a sequence of layers returned by `_get_decode_layer`.
+        """
         decode = nn.Sequential()
         layer_channels = in_channels
 
@@ -140,10 +221,13 @@ def _get_decode_module(
         return decode, layer_channels
 
     def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Module:
-
+        """
+        Returns a single layer of the encoder part of the network.
+        """
+        mod: nn.Module
         if self.num_res_units > 0:
-            return ResidualUnit(
-                dimensions=self.dimensions,
+            mod = ResidualUnit(
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
@@ -155,8 +239,8 @@ def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, i
                 bias=self.bias,
                 last_conv_only=is_last,
             )
-        return Convolution(
-            dimensions=self.dimensions,
+        mod = Convolution(
+            spatial_dims=self.dimensions,
             in_channels=in_channels,
             out_channels=out_channels,
             strides=strides,
@@ -167,13 +251,16 @@ def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, i
             bias=self.bias,
             conv_only=is_last,
         )
+        return mod
 
     def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Sequential:
-
+        """
+        Returns a single layer of the decoder part of the network.
+        """
         decode = nn.Sequential()
 
         conv = Convolution(
-            dimensions=self.dimensions,
+            spatial_dims=self.dimensions,
             in_channels=in_channels,
             out_channels=out_channels,
             strides=strides,
@@ -190,7 +277,7 @@ def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, i
 
         if self.num_res_units > 0:
             ru = ResidualUnit(
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=out_channels,
                 out_channels=out_channels,
                 strides=1,
diff --git a/monai/networks/nets/basic_unet.py b/monai/networks/nets/basic_unet.py
index 63205f45ee..f96b299d2b 100644
--- a/monai/networks/nets/basic_unet.py
+++ b/monai/networks/nets/basic_unet.py
@@ -16,7 +16,7 @@
 
 from monai.networks.blocks import Convolution, UpSample
 from monai.networks.layers.factories import Conv, Pool
-from monai.utils import ensure_tuple_rep
+from monai.utils import deprecated_arg, ensure_tuple_rep
 
 __all__ = ["BasicUNet", "BasicUnet", "Basicunet"]
 
@@ -24,19 +24,21 @@
 class TwoConv(nn.Sequential):
     """two convolutions."""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         out_chns: int,
         act: Union[str, tuple],
         norm: Union[str, tuple],
         bias: bool,
         dropout: Union[float, tuple] = 0.0,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels.
             out_chns: number of output channels.
             act: activation type and arguments.
@@ -44,11 +46,17 @@ def __init__(
             bias: whether to have a bias term in convolution blocks.
             dropout: dropout ratio. Defaults to no dropout.
 
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
 
-        conv_0 = Convolution(dim, in_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
-        conv_1 = Convolution(dim, out_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
+        if dim is not None:
+            spatial_dims = dim
+        conv_0 = Convolution(spatial_dims, in_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
+        conv_1 = Convolution(
+            spatial_dims, out_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1
+        )
         self.add_module("conv_0", conv_0)
         self.add_module("conv_1", conv_1)
 
@@ -56,19 +64,21 @@ def __init__(
 class Down(nn.Sequential):
     """maxpooling downsampling and two convolutions."""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         out_chns: int,
         act: Union[str, tuple],
         norm: Union[str, tuple],
         bias: bool,
         dropout: Union[float, tuple] = 0.0,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels.
             out_chns: number of output channels.
             act: activation type and arguments.
@@ -76,11 +86,14 @@ def __init__(
             bias: whether to have a bias term in convolution blocks.
             dropout: dropout ratio. Defaults to no dropout.
 
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
-
-        max_pooling = Pool["MAX", dim](kernel_size=2)
-        convs = TwoConv(dim, in_chns, out_chns, act, norm, bias, dropout)
+        if dim is not None:
+            spatial_dims = dim
+        max_pooling = Pool["MAX", spatial_dims](kernel_size=2)
+        convs = TwoConv(spatial_dims, in_chns, out_chns, act, norm, bias, dropout)
         self.add_module("max_pooling", max_pooling)
         self.add_module("convs", convs)
 
@@ -88,9 +101,10 @@ def __init__(
 class UpCat(nn.Module):
     """upsampling, concatenation with the encoder feature map, two convolutions"""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         cat_chns: int,
         out_chns: int,
@@ -103,10 +117,11 @@ def __init__(
         interp_mode: str = "linear",
         align_corners: Optional[bool] = True,
         halves: bool = True,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels to be upsampled.
             cat_chns: number of channels from the decoder.
             out_chns: number of output channels.
@@ -124,14 +139,19 @@ def __init__(
                 Only used in the "nontrainable" mode.
             halves: whether to halve the number of channels during upsampling.
                 This parameter does not work on ``nontrainable`` mode if ``pre_conv`` is `None`.
+
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
+        if dim is not None:
+            spatial_dims = dim
         if upsample == "nontrainable" and pre_conv is None:
             up_chns = in_chns
         else:
             up_chns = in_chns // 2 if halves else in_chns
         self.upsample = UpSample(
-            dim,
+            spatial_dims,
             in_chns,
             up_chns,
             2,
@@ -140,7 +160,7 @@ def __init__(
             interp_mode=interp_mode,
             align_corners=align_corners,
         )
-        self.convs = TwoConv(dim, cat_chns + up_chns, out_chns, act, norm, bias, dropout)
+        self.convs = TwoConv(spatial_dims, cat_chns + up_chns, out_chns, act, norm, bias, dropout)
 
     def forward(self, x: torch.Tensor, x_e: Optional[torch.Tensor]):
         """
@@ -167,9 +187,12 @@ def forward(self, x: torch.Tensor, x_e: Optional[torch.Tensor]):
 
 
 class BasicUNet(nn.Module):
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int = 3,
+        spatial_dims: int = 3,
         in_channels: int = 1,
         out_channels: int = 2,
         features: Sequence[int] = (32, 32, 64, 128, 256, 32),
@@ -178,6 +201,7 @@ def __init__(
         bias: bool = True,
         dropout: Union[float, tuple] = 0.0,
         upsample: str = "deconv",
+        dimensions: Optional[int] = None,
     ):
         """
         A UNet implementation with 1D/2D/3D supports.
@@ -189,7 +213,7 @@ def __init__(
             http://dx.doi.org/10.1038/s41592-018-0261-2
 
         Args:
-            dimensions: number of spatial dimensions. Defaults to 3 for spatial 3D inputs.
+            spatial_dims: number of spatial dimensions. Defaults to 3 for spatial 3D inputs.
             in_channels: number of input channels. Defaults to 1.
             out_channels: number of output channels. Defaults to 2.
             features: six integers as numbers of features.
@@ -207,16 +231,19 @@ def __init__(
             upsample: upsampling mode, available options are
                 ``"deconv"``, ``"pixelshuffle"``, ``"nontrainable"``.
 
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
         Examples::
 
             # for spatial 2D
-            >>> net = BasicUNet(dimensions=2, features=(64, 128, 256, 512, 1024, 128))
+            >>> net = BasicUNet(spatial_dims=2, features=(64, 128, 256, 512, 1024, 128))
 
             # for spatial 2D, with group norm
-            >>> net = BasicUNet(dimensions=2, features=(64, 128, 256, 512, 1024, 128), norm=("group", {"num_groups": 4}))
+            >>> net = BasicUNet(spatial_dims=2, features=(64, 128, 256, 512, 1024, 128), norm=("group", {"num_groups": 4}))
 
             # for spatial 3D
-            >>> net = BasicUNet(dimensions=3, features=(32, 32, 64, 128, 256, 32))
+            >>> net = BasicUNet(spatial_dims=3, features=(32, 32, 64, 128, 256, 32))
 
         See Also
 
@@ -225,22 +252,24 @@ def __init__(
 
         """
         super().__init__()
+        if dimensions is not None:
+            spatial_dims = dimensions
 
         fea = ensure_tuple_rep(features, 6)
         print(f"BasicUNet features: {fea}.")
 
-        self.conv_0 = TwoConv(dimensions, in_channels, features[0], act, norm, bias, dropout)
-        self.down_1 = Down(dimensions, fea[0], fea[1], act, norm, bias, dropout)
-        self.down_2 = Down(dimensions, fea[1], fea[2], act, norm, bias, dropout)
-        self.down_3 = Down(dimensions, fea[2], fea[3], act, norm, bias, dropout)
-        self.down_4 = Down(dimensions, fea[3], fea[4], act, norm, bias, dropout)
+        self.conv_0 = TwoConv(spatial_dims, in_channels, features[0], act, norm, bias, dropout)
+        self.down_1 = Down(spatial_dims, fea[0], fea[1], act, norm, bias, dropout)
+        self.down_2 = Down(spatial_dims, fea[1], fea[2], act, norm, bias, dropout)
+        self.down_3 = Down(spatial_dims, fea[2], fea[3], act, norm, bias, dropout)
+        self.down_4 = Down(spatial_dims, fea[3], fea[4], act, norm, bias, dropout)
 
-        self.upcat_4 = UpCat(dimensions, fea[4], fea[3], fea[3], act, norm, bias, dropout, upsample)
-        self.upcat_3 = UpCat(dimensions, fea[3], fea[2], fea[2], act, norm, bias, dropout, upsample)
-        self.upcat_2 = UpCat(dimensions, fea[2], fea[1], fea[1], act, norm, bias, dropout, upsample)
-        self.upcat_1 = UpCat(dimensions, fea[1], fea[0], fea[5], act, norm, bias, dropout, upsample, halves=False)
+        self.upcat_4 = UpCat(spatial_dims, fea[4], fea[3], fea[3], act, norm, bias, dropout, upsample)
+        self.upcat_3 = UpCat(spatial_dims, fea[3], fea[2], fea[2], act, norm, bias, dropout, upsample)
+        self.upcat_2 = UpCat(spatial_dims, fea[2], fea[1], fea[1], act, norm, bias, dropout, upsample)
+        self.upcat_1 = UpCat(spatial_dims, fea[1], fea[0], fea[5], act, norm, bias, dropout, upsample, halves=False)
 
-        self.final_conv = Conv["conv", dimensions](fea[5], out_channels, kernel_size=1)
+        self.final_conv = Conv["conv", spatial_dims](fea[5], out_channels, kernel_size=1)
 
     def forward(self, x: torch.Tensor):
         """
diff --git a/monai/networks/nets/classifier.py b/monai/networks/nets/classifier.py
index 92fee4f566..a1f913ea23 100644
--- a/monai/networks/nets/classifier.py
+++ b/monai/networks/nets/classifier.py
@@ -25,6 +25,19 @@ class Classifier(Regressor):
     Defines a classification network from Regressor by specifying the output shape as a single dimensional tensor
     with size equal to the number of classes to predict. The final activation function can also be specified, eg.
     softmax or sigmoid.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        classes: integer stating the dimension of the final output tensor
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
     """
 
     def __init__(
@@ -41,20 +54,6 @@ def __init__(
         bias: bool = True,
         last_act: Optional[str] = None,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            classes: integer stating the dimension of the final output tensor
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-            last_act: name defining the last activation layer
-        """
         super().__init__(in_shape, (classes,), channels, strides, kernel_size, num_res_units, act, norm, dropout, bias)
 
         if last_act is not None:
@@ -68,6 +67,18 @@ class Discriminator(Classifier):
     """
     Defines a discriminator network from Classifier with a single output value and sigmoid activation by default. This
     is meant for use with GANs or other applications requiring a generic discriminator network.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
     """
 
     def __init__(
@@ -83,19 +94,6 @@ def __init__(
         bias: bool = True,
         last_act=Act.SIGMOID,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-            last_act: name defining the last activation layer
-        """
         super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, last_act)
 
 
@@ -104,6 +102,17 @@ class Critic(Classifier):
     Defines a critic network from Classifier with a single output value and no final activation. The final layer is
     `nn.Flatten` instead of `nn.Linear`, the final result is computed as the mean over the first dimension. This is
     meant to be used with Wasserstein GANs.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
     """
 
     def __init__(
@@ -118,18 +127,6 @@ def __init__(
         dropout: Optional[float] = 0.25,
         bias: bool = True,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, None)
 
     def _get_final_layer(self, in_shape: Sequence[int]):
diff --git a/monai/networks/nets/densenet.py b/monai/networks/nets/densenet.py
index e9f3b6d33e..59576f5dd4 100644
--- a/monai/networks/nets/densenet.py
+++ b/monai/networks/nets/densenet.py
@@ -23,7 +23,6 @@
 
 __all__ = [
     "DenseNet",
-    "densenet",
     "Densenet",
     "DenseNet121",
     "densenet121",
@@ -62,7 +61,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_DenseLayer, self).__init__()
+        super().__init__()
 
         out_channels = bn_size * growth_rate
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
@@ -110,7 +109,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_DenseBlock, self).__init__()
+        super().__init__()
         for i in range(layers):
             layer = _DenseLayer(spatial_dims, in_channels, growth_rate, bn_size, dropout_prob, act=act, norm=norm)
             in_channels += growth_rate
@@ -134,7 +133,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_Transition, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         pool_type: Callable = Pool[Pool.AVG, spatial_dims]
@@ -178,7 +177,7 @@ def __init__(
         dropout_prob: float = 0.0,
     ) -> None:
 
-        super(DenseNet, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
         pool_type: Type[Union[nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d]] = Pool[Pool.MAX, spatial_dims]
@@ -299,14 +298,13 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet121, self).__init__(
-            init_features=init_features,
-            growth_rate=growth_rate,
-            block_config=block_config,
-            **kwargs,
-        )
+        super().__init__(init_features=init_features, growth_rate=growth_rate, block_config=block_config, **kwargs)
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet121", progress)
 
 
@@ -322,14 +320,13 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet169, self).__init__(
-            init_features=init_features,
-            growth_rate=growth_rate,
-            block_config=block_config,
-            **kwargs,
-        )
+        super().__init__(init_features=init_features, growth_rate=growth_rate, block_config=block_config, **kwargs)
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet169", progress)
 
 
@@ -345,14 +342,13 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet201, self).__init__(
-            init_features=init_features,
-            growth_rate=growth_rate,
-            block_config=block_config,
-            **kwargs,
-        )
+        super().__init__(init_features=init_features, growth_rate=growth_rate, block_config=block_config, **kwargs)
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet201", progress)
 
 
@@ -363,22 +359,17 @@ def __init__(
         self,
         init_features: int = 64,
         growth_rate: int = 32,
-        block_config: Sequence[int] = (6, 12, 48, 32),
+        block_config: Sequence[int] = (6, 12, 64, 48),
         pretrained: bool = False,
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet264, self).__init__(
-            init_features=init_features,
-            growth_rate=growth_rate,
-            block_config=block_config,
-            **kwargs,
-        )
+        super().__init__(init_features=init_features, growth_rate=growth_rate, block_config=block_config, **kwargs)
         if pretrained:
             raise NotImplementedError("Currently PyTorch Hub does not provide densenet264 pretrained models.")
 
 
-Densenet = densenet = DenseNet
+Densenet = DenseNet
 Densenet121 = densenet121 = DenseNet121
 Densenet169 = densenet169 = DenseNet169
 Densenet201 = densenet201 = DenseNet201
diff --git a/monai/networks/nets/dynunet.py b/monai/networks/nets/dynunet.py
index 4af70b22c7..3e846b9b7b 100644
--- a/monai/networks/nets/dynunet.py
+++ b/monai/networks/nets/dynunet.py
@@ -18,7 +18,7 @@
 
 from monai.networks.blocks.dynunet_block import UnetBasicBlock, UnetOutBlock, UnetResBlock, UnetUpBlock
 
-__all__ = ["DynUNet", "DynUnet", "Dynunet", "dynunet"]
+__all__ = ["DynUNet", "DynUnet", "Dynunet"]
 
 
 class DynUNetSkipLayer(nn.Module):
@@ -78,6 +78,9 @@ class DynUNet(nn.Module):
     For example, if `strides=((1, 2, 4), 2, 1, 1)`, the minimal spatial size of the input is `(8, 16, 32)`, and
     the spatial size of the output is `(8, 8, 8)`.
 
+    Usage example with medical segmentation decathlon dataset is available at:
+    https://github.com/Project-MONAI/tutorials/tree/master/modules/dynunet_pipeline.
+
     Args:
         spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
@@ -86,6 +89,7 @@ class DynUNet(nn.Module):
         strides: convolution strides for each blocks.
         upsample_kernel_size: convolution kernel size for transposed convolution layers. The values should
             equal to strides[1:].
+        dropout: dropout ratio. Defaults to no dropout.
         norm_name: feature normalization type and arguments. Defaults to ``INSTANCE``.
         deep_supervision: whether to add deep supervision head before output. Defaults to ``False``.
             If ``True``, in training mode, the forward function will output not only the last feature
@@ -115,12 +119,13 @@ def __init__(
         kernel_size: Sequence[Union[Sequence[int], int]],
         strides: Sequence[Union[Sequence[int], int]],
         upsample_kernel_size: Sequence[Union[Sequence[int], int]],
+        dropout: Optional[Union[Tuple, str, float]] = None,
         norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
         deep_supervision: bool = False,
         deep_supr_num: int = 1,
         res_block: bool = False,
     ):
-        super(DynUNet, self).__init__()
+        super().__init__()
         self.spatial_dims = spatial_dims
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -128,6 +133,7 @@ def __init__(
         self.strides = strides
         self.upsample_kernel_size = upsample_kernel_size
         self.norm_name = norm_name
+        self.dropout = dropout
         self.conv_block = UnetResBlock if res_block else UnetBasicBlock
         self.filters = [min(2 ** (5 + i), 320 if spatial_dims == 3 else 512) for i in range(len(strides))]
         self.input_block = self.get_input_block()
@@ -184,17 +190,17 @@ def create_skips(index, downsamples, upsamples, superheads, bottleneck):
     def check_kernel_stride(self):
         kernels, strides = self.kernel_size, self.strides
         error_msg = "length of kernel_size and strides should be the same, and no less than 3."
-        if not (len(kernels) == len(strides) and len(kernels) >= 3):
+        if len(kernels) != len(strides) or len(kernels) < 3:
             raise AssertionError(error_msg)
 
         for idx, k_i in enumerate(kernels):
             kernel, stride = k_i, strides[idx]
             if not isinstance(kernel, int):
-                error_msg = "length of kernel_size in block {} should be the same as spatial_dims.".format(idx)
+                error_msg = f"length of kernel_size in block {idx} should be the same as spatial_dims."
                 if len(kernel) != self.spatial_dims:
                     raise AssertionError(error_msg)
             if not isinstance(stride, int):
-                error_msg = "length of stride in block {} should be the same as spatial_dims.".format(idx)
+                error_msg = f"length of stride in block {idx} should be the same as spatial_dims."
                 if len(stride) != self.spatial_dims:
                     raise AssertionError(error_msg)
 
@@ -225,6 +231,7 @@ def get_input_block(self):
             self.kernel_size[0],
             self.strides[0],
             self.norm_name,
+            dropout=self.dropout,
         )
 
     def get_bottleneck(self):
@@ -235,14 +242,11 @@ def get_bottleneck(self):
             self.kernel_size[-1],
             self.strides[-1],
             self.norm_name,
+            dropout=self.dropout,
         )
 
     def get_output_block(self, idx: int):
-        return UnetOutBlock(
-            self.spatial_dims,
-            self.filters[idx],
-            self.out_channels,
-        )
+        return UnetOutBlock(self.spatial_dims, self.filters[idx], self.out_channels, dropout=self.dropout)
 
     def get_downsamples(self):
         inp, out = self.filters[:-2], self.filters[1:-1]
@@ -276,6 +280,7 @@ def get_module_list(
                     "kernel_size": kernel,
                     "stride": stride,
                     "norm_name": self.norm_name,
+                    "dropout": self.dropout,
                     "upsample_kernel_size": up_kernel,
                 }
                 layer = conv_block(**params)
@@ -289,6 +294,7 @@ def get_module_list(
                     "kernel_size": kernel,
                     "stride": stride,
                     "norm_name": self.norm_name,
+                    "dropout": self.dropout,
                 }
                 layer = conv_block(**params)
                 layers.append(layer)
@@ -305,4 +311,4 @@ def initialize_weights(module):
                 module.bias = nn.init.constant_(module.bias, 0)
 
 
-DynUnet = Dynunet = dynunet = DynUNet
+DynUnet = Dynunet = DynUNet
diff --git a/monai/networks/nets/dynunet_v1.py b/monai/networks/nets/dynunet_v1.py
index feb05d1762..4c910157c9 100644
--- a/monai/networks/nets/dynunet_v1.py
+++ b/monai/networks/nets/dynunet_v1.py
@@ -24,7 +24,7 @@
 
 @deprecated(
     since="0.6.0",
-    removed="0.7.0",
+    removed="0.8.0",
     msg_suffix="This module is for backward compatibility purpose only. Please use `DynUNet` instead.",
 )
 class DynUNetV1(DynUNet):
@@ -38,6 +38,7 @@ class DynUNetV1(DynUNet):
         kernel_size: convolution kernel size.
         strides: convolution strides for each blocks.
         upsample_kernel_size: convolution kernel size for transposed convolution layers.
+        dropout: dropout ratio. Defaults to no dropout.
         norm_name: [``"batch"``, ``"instance"``, ``"group"``]. Defaults to "instance".
         deep_supervision: whether to add deep supervision head before output. Defaults to ``False``.
         deep_supr_num: number of feature maps that will output during deep supervision head. Defaults to 1.
@@ -57,6 +58,7 @@ def __init__(
         kernel_size: Sequence[Union[Sequence[int], int]],
         strides: Sequence[Union[Sequence[int], int]],
         upsample_kernel_size: Sequence[Union[Sequence[int], int]],
+        dropout: float = 0.0,
         norm_name: str = "instance",
         deep_supervision: bool = False,
         deep_supr_num: int = 1,
@@ -70,6 +72,7 @@ def __init__(
         self.strides = strides
         self.upsample_kernel_size = upsample_kernel_size
         self.norm_name = norm_name
+        self.dropout = dropout
         self.conv_block = _UnetResBlockV1 if res_block else _UnetBasicBlockV1  # type: ignore
         self.filters = [min(2 ** (5 + i), 320 if spatial_dims == 3 else 512) for i in range(len(strides))]
         self.input_block = self.get_input_block()
diff --git a/monai/networks/nets/efficientnet.py b/monai/networks/nets/efficientnet.py
index 453916758a..6cd0e83e46 100644
--- a/monai/networks/nets/efficientnet.py
+++ b/monai/networks/nets/efficientnet.py
@@ -369,10 +369,7 @@ def __init__(
                 )
                 idx += 1  # increment blocks index counter
 
-            self._blocks.add_module(
-                str(stack_idx),
-                sub_stack,
-            )
+            self._blocks.add_module(str(stack_idx), sub_stack)
 
         # sanity check to see if len(self._blocks) equal expected num_blocks
         if idx != num_blocks:
@@ -534,7 +531,7 @@ def __init__(
         weight_coeff, depth_coeff, image_size, dropout_rate, dropconnect_rate = efficientnet_params[model_name]
 
         # create model and initialize random weights
-        super(EfficientNetBN, self).__init__(
+        super().__init__(
             blocks_args_str=blocks_args_str,
             spatial_dims=spatial_dims,
             in_channels=in_channels,
@@ -594,7 +591,7 @@ def __init__(
         weight_coeff, depth_coeff, image_size, dropout_rate, dropconnect_rate = efficientnet_params[model_name]
 
         # create model and initialize random weights
-        super(EfficientNetBNFeatures, self).__init__(
+        super().__init__(
             blocks_args_str=blocks_args_str,
             spatial_dims=spatial_dims,
             in_channels=in_channels,
@@ -677,7 +674,7 @@ def drop_connect(inputs: torch.Tensor, p: float, training: bool) -> torch.Tensor
         output: output tensor after applying drop connection.
     """
     if p < 0.0 or p > 1.0:
-        raise ValueError("p must be in range of [0, 1], found {}".format(p))
+        raise ValueError(f"p must be in range of [0, 1], found {p}")
 
     # eval mode: drop_connect is switched off - so return input without modifying
     if not training:
@@ -708,7 +705,7 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool, adv_prop: bool
         arch = arch.split("efficientnet-")[-1] + "-ap"
     model_url = look_up_option(arch, url_map, None)
     if model_url is None:
-        print("pretrained weights of {} is not provided".format(arch))
+        print(f"pretrained weights of {arch} is not provided")
     else:
         # load state dict from url
         model_url = url_map[arch]
@@ -852,7 +849,7 @@ def _calculate_output_image_size(input_image_size: List[int], stride: Union[int,
     if isinstance(stride, tuple):
         all_strides_equal = all(stride[0] == s for s in stride)
         if not all_strides_equal:
-            raise ValueError("unequal strides are not possible, got {}".format(stride))
+            raise ValueError(f"unequal strides are not possible, got {stride}")
 
         stride = stride[0]
 
diff --git a/monai/networks/nets/fullyconnectednet.py b/monai/networks/nets/fullyconnectednet.py
index b906bab015..19197bd58d 100644
--- a/monai/networks/nets/fullyconnectednet.py
+++ b/monai/networks/nets/fullyconnectednet.py
@@ -30,9 +30,24 @@ def _get_adn_layer(
 
 class FullyConnectedNet(nn.Sequential):
     """
-    Plain full-connected layer neural network
+    Simple full-connected layer neural network composed of a sequence of linear layers with PReLU activation and
+    dropout.  The network accepts input with `in_channels` channels, has output with `out_channels` channels, and
+    hidden layer output channels given in `hidden_channels`. If `bias` is True then linear units have a bias term.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        hidden_channels: number of output channels for each hidden layer.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts 4 values and infers 3 values as output, has 3 hidden layers with 10, 20, 10 values as output
+        net = FullyConnectedNet(4, 3, [10, 20, 10], dropout=0.2)
 
-    The network uses dropout and, by default, PReLU activation
     """
 
     def __init__(
@@ -53,8 +68,11 @@ def __init__(
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.hidden_channels = list(hidden_channels)
+        self.act = act
+        self.dropout = dropout
+        self.adn_ordering = adn_ordering
+
         self.add_module("flatten", nn.Flatten())
-        self.adn_layer = _get_adn_layer(act, dropout, adn_ordering)
 
         prev_channels = self.in_channels
         for i, c in enumerate(hidden_channels):
@@ -64,13 +82,34 @@ def __init__(
         self.add_module("output", nn.Linear(prev_channels, out_channels, bias))
 
     def _get_layer(self, in_channels: int, out_channels: int, bias: bool) -> nn.Sequential:
-        seq = nn.Sequential(nn.Linear(in_channels, out_channels, bias))
-        seq.add_module("ADN", self.adn_layer)
+        seq = nn.Sequential(
+            nn.Linear(in_channels, out_channels, bias), _get_adn_layer(self.act, self.dropout, self.adn_ordering)
+        )
         return seq
 
 
 class VarFullyConnectedNet(nn.Module):
-    """Variational fully-connected network."""
+    """
+    Variational fully-connected network. This is composed of an encode layer, reparameterization layer, and then a
+    decode layer.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        latent_size: number of latent variables to use.
+        encode_channels: number of output channels for each hidden layer of the encode half.
+        decode_channels: number of output channels for each hidden layer of the decode half.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts inputs with 4 values, uses a latent space of 2 variables, and produces outputs of 3 values
+        net = VarFullyConnectedNet(4, 3, 2, [5, 10], [10, 5])
+
+    """
 
     def __init__(
         self,
diff --git a/monai/networks/nets/generator.py b/monai/networks/nets/generator.py
index 1f24944a63..90aa26cd01 100644
--- a/monai/networks/nets/generator.py
+++ b/monai/networks/nets/generator.py
@@ -25,13 +25,35 @@ class Generator(nn.Module):
     """
     Defines a simple generator network accepting a latent vector and through a sequence of convolution layers
     constructs an output tensor of greater size and high dimensionality. The method `_get_layer` is used to
-    create each of these layers, override this method to define layers beyond the default Convolution or
-    ResidualUnit layers.
+    create each of these layers, override this method to define layers beyond the default
+    :py:class:`monai.networks.blocks.Convolution` or :py:class:`monai.networks.blocks.ResidualUnit` layers.
+
+    The layers are constructed using the values in the `channels` and `strides` arguments, the number being defined by
+    the length of these (which must match). Input is first passed through a :py:class:`torch.nn.Linear` layer to
+    convert the input vector to an image tensor with dimensions `start_shape`. This passes through the convolution
+    layers and is progressively upsampled if the `strides` valus are greater than 1 using transpose convolutions. The
+    size of the final output is defined by the `start_shape` dimension and the amount of upsampling done through
+    strides. In the default definition the size of the output's spatial dimensions will be that of `start_shape`
+    multiplied by the product of `strides`, thus the example network below upsamples an starting size of (64, 8, 8)
+    to (1, 64, 64) since its `strides` are (2, 2, 2).
+
+    Args:
+        latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension)
+        start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (upscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # 3 layers, latent input vector of shape (42, 24), output volume of shape (1, 64, 64)
+        net = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2))
 
-    For example, a generator accepting a latent vector if shape (42,24) and producing an output volume of
-    shape (1,64,64) can be constructed as:
-
-        gen = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2))
     """
 
     def __init__(
@@ -47,26 +69,6 @@ def __init__(
         dropout: Optional[float] = None,
         bias: bool = True,
     ) -> None:
-        """
-        Construct the generator network with the number of layers defined by `channels` and `strides`. In the
-        forward pass a `nn.Linear` layer relates the input latent vector to a tensor of dimensions `start_shape`,
-        this is then fed forward through the sequence of convolutional layers. The number of layers is defined by
-        the length of `channels` and `strides` which must match, each layer having the number of output channels
-        given in `channels` and an upsample factor given in `strides` (ie. a transpose convolution with that stride
-        size).
-
-        Args:
-            latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension)
-            start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (upscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__()
 
         self.in_channels, *self.start_shape = ensure_tuple(start_shape)
@@ -112,7 +114,7 @@ def _get_layer(
             strides=strides,
             is_transposed=True,
             conv_only=is_last or self.num_res_units > 0,
-            dimensions=self.dimensions,
+            spatial_dims=self.dimensions,
             out_channels=out_channels,
             kernel_size=self.kernel_size,
             act=self.act,
@@ -126,7 +128,7 @@ def _get_layer(
                 in_channels=out_channels,
                 subunits=self.num_res_units,
                 last_conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 out_channels=out_channels,
                 kernel_size=self.kernel_size,
                 act=self.act,
diff --git a/monai/networks/nets/highresnet.py b/monai/networks/nets/highresnet.py
index f644a7835a..2937cda32a 100644
--- a/monai/networks/nets/highresnet.py
+++ b/monai/networks/nets/highresnet.py
@@ -70,7 +70,7 @@ def __init__(
             ValueError: When ``channel_matching=pad`` and ``in_channels > out_channels``. Incompatible values.
 
         """
-        super(HighResBlock, self).__init__()
+        super().__init__()
         self.chn_pad = ChannelPad(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, mode=channel_matching
         )
@@ -84,7 +84,7 @@ def __init__(
             )
             layers.append(
                 Convolution(
-                    dimensions=spatial_dims,
+                    spatial_dims=spatial_dims,
                     in_channels=_in_chns,
                     out_channels=_out_chns,
                     kernel_size=kernel_size,
@@ -146,7 +146,7 @@ def __init__(
         channel_matching: Union[ChannelMatching, str] = ChannelMatching.PAD,
     ) -> None:
 
-        super(HighResNet, self).__init__()
+        super().__init__()
         blocks = nn.ModuleList()
 
         # initial conv layer
@@ -154,7 +154,7 @@ def __init__(
         _in_chns, _out_chns = in_channels, params["n_features"]
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=_out_chns,
                 kernel_size=params["kernel_size"],
@@ -190,7 +190,7 @@ def __init__(
         _in_chns, _out_chns = _out_chns, params["n_features"]
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=_out_chns,
                 kernel_size=params["kernel_size"],
@@ -206,7 +206,7 @@ def __init__(
         _in_chns = _out_chns
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=out_channels,
                 kernel_size=params["kernel_size"],
diff --git a/monai/networks/nets/netadapter.py b/monai/networks/nets/netadapter.py
index 80288f7945..8aaf83f24d 100644
--- a/monai/networks/nets/netadapter.py
+++ b/monai/networks/nets/netadapter.py
@@ -37,6 +37,9 @@ class NetAdapter(torch.nn.Module):
         bias: the bias value when replacing the last layer. if False, the layer will not learn an additive bias,
             default to True.
 
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
     @deprecated_arg("n_classes", since="0.6")
@@ -78,21 +81,12 @@ def __init__(
         self.fc: Union[torch.nn.Linear, torch.nn.Conv2d, torch.nn.Conv3d]
         if use_conv:
             # add 1x1 conv (it behaves like a FC layer)
-            self.fc = Conv[Conv.CONV, dim](
-                in_channels=in_channels_,
-                out_channels=num_classes,
-                kernel_size=1,
-                bias=bias,
-            )
+            self.fc = Conv[Conv.CONV, dim](in_channels=in_channels_, out_channels=num_classes, kernel_size=1, bias=bias)
         else:
             # remove the last Linear layer (fully connected)
             self.features = torch.nn.Sequential(*layers[:-1])
             # replace the out_features of FC layer
-            self.fc = torch.nn.Linear(
-                in_features=in_channels_,
-                out_features=num_classes,
-                bias=bias,
-            )
+            self.fc = torch.nn.Linear(in_features=in_channels_, out_features=num_classes, bias=bias)
         self.use_conv = use_conv
 
     def forward(self, x):
diff --git a/monai/networks/nets/regressor.py b/monai/networks/nets/regressor.py
index 25acb9bfa5..bc8feb7527 100644
--- a/monai/networks/nets/regressor.py
+++ b/monai/networks/nets/regressor.py
@@ -29,6 +29,30 @@ class Regressor(nn.Module):
     This defines a network for relating large-sized input tensors to small output tensors, ie. regressing large
     values to a prediction. An output of a single dimension can be used as value regression or multi-label
     classification prediction, an output of a single value can be used as a discriminator or critic prediction.
+
+    The network is constructed as a sequence of layers, either :py:class:`monai.networks.blocks.Convolution` or
+    :py:class:`monai.networks.blocks.ResidualUnit`, with a final fully-connected layer resizing the output from the
+    blocks to the final size. Each block is defined with a stride value typically used to downsample the input using
+    strided convolutions. In this way each block progressively condenses information from the input into a deep
+    representation the final fully-connected layer relates to a final result.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        out_shape: tuple of integers stating the dimension of the final output tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # infers a 2-value result (eg. a 2D cartesian coordinate) from a 64x64 image
+        net = Regressor((1, 64, 64), (2,), (2, 4, 8), (2, 2, 2))
+
     """
 
     def __init__(
@@ -44,23 +68,6 @@ def __init__(
         dropout: Optional[float] = None,
         bias: bool = True,
     ) -> None:
-        """
-        Construct the regressor network with the number of layers defined by `channels` and `strides`. Inputs are
-        first passed through the convolutional layers in the forward pass, the output from this is then pass
-        through a fully connected layer to relate them to the final output tensor.
-
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            out_shape: tuple of integers stating the dimension of the final output tensor
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__()
 
         self.in_channels, *self.in_shape = ensure_tuple(in_shape)
@@ -107,7 +114,7 @@ def _get_layer(
             layer = ResidualUnit(
                 subunits=self.num_res_units,
                 last_conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
@@ -120,7 +127,7 @@ def _get_layer(
         else:
             layer = Convolution(
                 conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
diff --git a/monai/networks/nets/regunet.py b/monai/networks/nets/regunet.py
index 4cf747f650..ead12382eb 100644
--- a/monai/networks/nets/regunet.py
+++ b/monai/networks/nets/regunet.py
@@ -67,7 +67,7 @@ def __init__(
             concat_skip: when up-sampling, concatenate skipped tensor if true, otherwise use addition
             encode_kernel_sizes: kernel size for down-sampling
         """
-        super(RegUNet, self).__init__()
+        super().__init__()
         if not extract_levels:
             extract_levels = (depth,)
         if max(extract_levels) != depth:
@@ -106,9 +106,7 @@ def __init__(
         # build layers
         self.build_layers()
 
-    def build_layers(
-        self,
-    ):
+    def build_layers(self):
         self.build_encode_layers()
         self.build_decode_layers()
 
@@ -125,23 +123,13 @@ def build_encode_layers(self):
             ]
         )
         self.encode_pools = nn.ModuleList(
-            [
-                self.build_down_sampling_block(
-                    channels=self.num_channels[d],
-                )
-                for d in range(self.depth)
-            ]
+            [self.build_down_sampling_block(channels=self.num_channels[d]) for d in range(self.depth)]
         )
         self.bottom_block = self.build_bottom_block(
             in_channels=self.num_channels[-2], out_channels=self.num_channels[-1]
         )
 
-    def build_conv_block(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-    ):
+    def build_conv_block(self, in_channels, out_channels, kernel_size):
         return nn.Sequential(
             get_conv_block(
                 spatial_dims=self.spatial_dims,
@@ -157,10 +145,7 @@ def build_conv_block(
             ),
         )
 
-    def build_down_sampling_block(
-        self,
-        channels: int,
-    ):
+    def build_down_sampling_block(self, channels: int):
         return RegistrationDownSampleBlock(spatial_dims=self.spatial_dims, channels=channels, pooling=self.pooling)
 
     def build_bottom_block(self, in_channels: int, out_channels: int):
@@ -203,11 +188,7 @@ def build_decode_layers(self):
         # extraction
         self.output_block = self.build_output_block()
 
-    def build_up_sampling_block(
-        self,
-        in_channels: int,
-        out_channels: int,
-    ) -> nn.Module:
+    def build_up_sampling_block(self, in_channels: int, out_channels: int) -> nn.Module:
         return get_deconv_block(spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels)
 
     def build_output_block(self) -> nn.Module:
@@ -255,14 +236,8 @@ def forward(self, x):
 
 
 class AffineHead(nn.Module):
-    def __init__(
-        self,
-        spatial_dims: int,
-        image_size: List[int],
-        decode_size: List[int],
-        in_channels: int,
-    ):
-        super(AffineHead, self).__init__()
+    def __init__(self, spatial_dims: int, image_size: List[int], decode_size: List[int], in_channels: int):
+        super().__init__()
         self.spatial_dims = spatial_dims
         if spatial_dims == 2:
             in_features = in_channels * decode_size[0] * decode_size[1]
@@ -365,13 +340,8 @@ def build_output_block(self):
 
 
 class AdditiveUpSampleBlock(nn.Module):
-    def __init__(
-        self,
-        spatial_dims: int,
-        in_channels: int,
-        out_channels: int,
-    ):
-        super(AdditiveUpSampleBlock, self).__init__()
+    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int):
+        super().__init__()
         self.deconv = get_deconv_block(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
@@ -435,17 +405,10 @@ def __init__(
     def build_bottom_block(self, in_channels: int, out_channels: int):
         kernel_size = self.encode_kernel_sizes[self.depth]
         return get_conv_block(
-            spatial_dims=self.spatial_dims,
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
+            spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size
         )
 
-    def build_up_sampling_block(
-        self,
-        in_channels: int,
-        out_channels: int,
-    ) -> nn.Module:
+    def build_up_sampling_block(self, in_channels: int, out_channels: int) -> nn.Module:
         if self._use_additive_upsampling:
             return AdditiveUpSampleBlock(
                 spatial_dims=self.spatial_dims, in_channels=in_channels, out_channels=out_channels
diff --git a/monai/networks/nets/resnet.py b/monai/networks/nets/resnet.py
index a5e6b7ab81..d859365203 100644
--- a/monai/networks/nets/resnet.py
+++ b/monai/networks/nets/resnet.py
@@ -14,9 +14,10 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from monai.networks.layers.factories import Conv, Norm, Pool
+from monai.networks.layers.utils import get_pool_layer
+from monai.utils.module import look_up_option
 
 __all__ = ["ResNet", "resnet10", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152", "resnet200"]
 
@@ -58,7 +59,7 @@ def __init__(
             stride: stride to use for first conv layer.
             downsample: which downsample layer to use.
         """
-        super(ResNetBlock, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
@@ -110,7 +111,7 @@ def __init__(
             downsample: which downsample layer to use.
         """
 
-        super(ResNetBottleneck, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
@@ -162,9 +163,15 @@ class ResNet(nn.Module):
         conv1_t_size: size of first convolution layer, determines kernel and padding.
         conv1_t_stride: stride of first convolution layer.
         no_max_pool: bool argument to determine if to use maxpool layer.
-        shortcut_type: which downsample block to use.
+        shortcut_type: which downsample block to use. Options are 'A', 'B', default to 'B'.
+            - 'A': using `self._downsample_basic_block`.
+            - 'B': kernel_size 1 conv + norm.
         widen_factor: widen output for each layer.
         num_classes: number of output (classifications)
+
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
     @deprecated_arg("n_classes", since="0.6")
@@ -185,7 +192,7 @@ def __init__(
         n_classes: Optional[int] = None,
     ) -> None:
 
-        super(ResNet, self).__init__()
+        super().__init__()
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes == 400:
             num_classes = n_classes
@@ -198,7 +205,7 @@ def __init__(
         ]
 
         block_avgpool = get_avgpool()
-        conv1_kernel, conv1_stride, con1_padding = get_conv1(conv1_t_size, conv1_t_stride)
+        conv1_kernel, conv1_stride, conv1_padding = get_conv1(conv1_t_size, conv1_t_stride)
         block_inplanes = [int(x * widen_factor) for x in block_inplanes]
 
         self.in_planes = block_inplanes[0]
@@ -209,7 +216,7 @@ def __init__(
             self.in_planes,
             kernel_size=conv1_kernel[spatial_dims],
             stride=conv1_stride[spatial_dims],
-            padding=con1_padding[spatial_dims],
+            padding=conv1_padding[spatial_dims],
             bias=False,
         )
         self.bn1 = norm_type(self.in_planes)
@@ -234,14 +241,9 @@ def __init__(
                 nn.init.constant_(torch.as_tensor(m.bias), 0)
 
     def _downsample_basic_block(self, x: torch.Tensor, planes: int, stride: int, spatial_dims: int = 3) -> torch.Tensor:
-        assert spatial_dims == 3
-        out: torch.Tensor = F.avg_pool3d(x, kernel_size=1, stride=stride)
-        zero_pads = torch.zeros(out.size(0), planes - out.size(1), out.size(2), out.size(3), out.size(4))
-        if isinstance(out.data, torch.FloatTensor):
-            zero_pads = zero_pads.cuda()
-
+        out: torch.Tensor = get_pool_layer(("avg", {"kernel_size": 1, "stride": stride}), spatial_dims=spatial_dims)(x)
+        zero_pads = torch.zeros(out.size(0), planes - out.size(1), *out.shape[2:], dtype=out.dtype, device=out.device)
         out = torch.cat([out.data, zero_pads], dim=1)
-
         return out
 
     def _make_layer(
@@ -259,9 +261,12 @@ def _make_layer(
 
         downsample: Union[nn.Module, partial, None] = None
         if stride != 1 or self.in_planes != planes * block.expansion:
-            if shortcut_type == "A":
+            if look_up_option(shortcut_type, {"A", "B"}) == "A":
                 downsample = partial(
-                    self._downsample_basic_block, planes=planes * block.expansion, kernel_size=1, stride=stride
+                    self._downsample_basic_block,
+                    planes=planes * block.expansion,
+                    stride=stride,
+                    spatial_dims=spatial_dims,
                 )
             else:
                 downsample = nn.Sequential(
@@ -269,12 +274,12 @@ def _make_layer(
                     norm_type(planes * block.expansion),
                 )
 
-        layers = []
-        layers.append(
+        layers = [
             block(
                 in_planes=self.in_planes, planes=planes, spatial_dims=spatial_dims, stride=stride, downsample=downsample
             )
-        )
+        ]
+
         self.in_planes = planes * block.expansion
         for _i in range(1, blocks):
             layers.append(block(self.in_planes, planes, spatial_dims=spatial_dims))
diff --git a/monai/networks/nets/segresnet.py b/monai/networks/nets/segresnet.py
index 8be562aadd..b722e5d70f 100644
--- a/monai/networks/nets/segresnet.py
+++ b/monai/networks/nets/segresnet.py
@@ -9,7 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, Sequence, Tuple, Union
+from typing import List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
@@ -99,12 +99,7 @@ def __init__(
 
     def _make_down_layers(self):
         down_layers = nn.ModuleList()
-        blocks_down, spatial_dims, filters, norm = (
-            self.blocks_down,
-            self.spatial_dims,
-            self.init_filters,
-            self.norm,
-        )
+        blocks_down, spatial_dims, filters, norm = (self.blocks_down, self.spatial_dims, self.init_filters, self.norm)
         for i in range(len(blocks_down)):
             layer_in_channels = filters * 2 ** i
             pre_conv = (
@@ -113,8 +108,7 @@ def _make_down_layers(self):
                 else nn.Identity()
             )
             down_layer = nn.Sequential(
-                pre_conv,
-                *[ResBlock(spatial_dims, layer_in_channels, norm=norm) for _ in range(blocks_down[i])],
+                pre_conv, *[ResBlock(spatial_dims, layer_in_channels, norm=norm) for _ in range(blocks_down[i])]
             )
             down_layers.append(down_layer)
         return down_layers
@@ -153,7 +147,7 @@ def _make_final_conv(self, out_channels: int):
             get_conv_layer(self.spatial_dims, self.init_filters, out_channels, kernel_size=1, bias=True),
         )
 
-    def forward(self, x):
+    def encode(self, x: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
         x = self.convInit(x)
         if self.dropout_prob is not None:
             x = self.dropout(x)
@@ -164,14 +158,23 @@ def forward(self, x):
             x = down(x)
             down_x.append(x)
 
-        down_x.reverse()
+        return x, down_x
 
+    def decode(self, x: torch.Tensor, down_x: List[torch.Tensor]) -> torch.Tensor:
         for i, (up, upl) in enumerate(zip(self.up_samples, self.up_layers)):
             x = up(x) + down_x[i + 1]
             x = upl(x)
 
         if self.use_conv_final:
             x = self.conv_final(x)
+
+        return x
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x, down_x = self.encode(x)
+        down_x.reverse()
+
+        x = self.decode(x, down_x)
         return x
 
 
@@ -226,7 +229,7 @@ def __init__(
         blocks_up: tuple = (1, 1, 1),
         upsample_mode: Union[UpsampleMode, str] = UpsampleMode.NONTRAINABLE,
     ):
-        super(SegResNetVAE, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             init_filters=init_filters,
             in_channels=in_channels,
diff --git a/monai/networks/nets/senet.py b/monai/networks/nets/senet.py
index 9b7035c259..a58c2adb51 100644
--- a/monai/networks/nets/senet.py
+++ b/monai/networks/nets/senet.py
@@ -17,12 +17,32 @@
 import torch.nn as nn
 from torch.hub import load_state_dict_from_url
 
+from monai.apps.utils import download_url
 from monai.networks.blocks.convolutions import Convolution
 from monai.networks.blocks.squeeze_and_excitation import SEBottleneck, SEResNetBottleneck, SEResNeXtBottleneck
 from monai.networks.layers.factories import Act, Conv, Dropout, Norm, Pool
 from monai.utils.module import look_up_option
 
-__all__ = ["SENet", "SENet154", "SEResNet50", "SEResNet101", "SEResNet152", "SEResNeXt50", "SEResNext101"]
+__all__ = [
+    "SENet",
+    "SENet154",
+    "SEResNet50",
+    "SEResNet101",
+    "SEResNet152",
+    "SEResNeXt50",
+    "SEResNext101",
+    "SE_NET_MODELS",
+]
+
+
+SE_NET_MODELS = {
+    "senet154": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth",
+    "se_resnet50": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth",
+    "se_resnet101": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth",
+    "se_resnet152": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth",
+    "se_resnext50_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth",
+    "se_resnext101_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth",
+}
 
 
 class SENet(nn.Module):
@@ -87,7 +107,7 @@ def __init__(
         num_classes: int = 1000,
     ) -> None:
 
-        super(SENet, self).__init__()
+        super().__init__()
 
         relu_type: Type[nn.ReLU] = Act[Act.RELU]
         conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
@@ -192,7 +212,7 @@ def _make_layer(
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = Convolution(
-                dimensions=self.spatial_dims,
+                spatial_dims=self.spatial_dims,
                 in_channels=self.inplanes,
                 out_channels=planes * block.expansion,
                 strides=stride,
@@ -254,15 +274,7 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool):
     """
     This function is used to load pretrained models.
     """
-    model_urls = {
-        "senet154": "http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth",
-        "se_resnet50": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth",
-        "se_resnet101": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth",
-        "se_resnet152": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth",
-        "se_resnext50_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth",
-        "se_resnext101_32x4d": "http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth",
-    }
-    model_url = look_up_option(arch, model_urls, None)
+    model_url = look_up_option(arch, SE_NET_MODELS, None)
     if model_url is None:
         raise ValueError(
             "only 'senet154', 'se_resnet50', 'se_resnet101',  'se_resnet152', 'se_resnext50_32x4d', "
@@ -276,7 +288,11 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool):
     pattern_down_conv = re.compile(r"^(layer[1-4]\.\d\.)(?:downsample.0.)(\w*)$")
     pattern_down_bn = re.compile(r"^(layer[1-4]\.\d\.)(?:downsample.1.)(\w*)$")
 
-    state_dict = load_state_dict_from_url(model_url, progress=progress)
+    if isinstance(model_url, dict):
+        download_url(model_url["url"], filepath=model_url["filename"])
+        state_dict = torch.load(model_url["filename"], map_location=None)
+    else:
+        state_dict = load_state_dict_from_url(model_url, progress=progress)
     for key in list(state_dict.keys()):
         new_key = None
         if pattern_conv.match(key):
@@ -317,13 +333,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SENet154, self).__init__(
-            block=SEBottleneck,
-            layers=layers,
-            groups=groups,
-            reduction=reduction,
-            **kwargs,
-        )
+        super().__init__(block=SEBottleneck, layers=layers, groups=groups, reduction=reduction, **kwargs)
         if pretrained:
             # it only worked when `spatial_dims` is 2
             _load_state_dict(self, "senet154", progress)
@@ -345,7 +355,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet50, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -378,7 +388,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet101, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -410,7 +420,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet152, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -443,7 +453,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNext50, self).__init__(
+        super().__init__(
             block=SEResNeXtBottleneck,
             layers=layers,
             groups=groups,
@@ -477,7 +487,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNext101, self).__init__(
+        super().__init__(
             block=SEResNeXtBottleneck,
             layers=layers,
             groups=groups,
@@ -493,7 +503,7 @@ def __init__(
             _load_state_dict(self, "se_resnext101_32x4d", progress)
 
 
-SEnet = Senet = senet = SENet
+SEnet = Senet = SENet
 SEnet154 = Senet154 = senet154 = SENet154
 SEresnet50 = Seresnet50 = seresnet50 = SEResNet50
 SEresnet101 = Seresnet101 = seresnet101 = SEResNet101
diff --git a/monai/networks/nets/torchvision_fc.py b/monai/networks/nets/torchvision_fc.py
index 1619f877e7..d5bd6e9f57 100644
--- a/monai/networks/nets/torchvision_fc.py
+++ b/monai/networks/nets/torchvision_fc.py
@@ -73,7 +73,7 @@ def __init__(
         )
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `TorchVisionFCModel` instead.")
+@deprecated(since="0.6.0", removed="0.8.0", msg_suffix="Please consider using `TorchVisionFCModel` instead.")
 class TorchVisionFullyConvModel(TorchVisionFCModel):
     """
     Customize TorchVision models to replace fully connected layer by convolutional layer.
diff --git a/monai/networks/nets/transchex.py b/monai/networks/nets/transchex.py
new file mode 100644
index 0000000000..3e57d868e9
--- /dev/null
+++ b/monai/networks/nets/transchex.py
@@ -0,0 +1,377 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import shutil
+import tarfile
+import tempfile
+from typing import Sequence, Tuple, Union
+
+import torch
+from torch import nn
+
+from monai.utils import optional_import
+
+transformers = optional_import("transformers")
+load_tf_weights_in_bert = optional_import("transformers", name="load_tf_weights_in_bert")
+cached_path = optional_import("transformers.file_utils", name="cached_path")[0]
+BertEmbeddings = optional_import("transformers.models.bert.modeling_bert", name="BertEmbeddings")[0]
+BertLayer = optional_import("transformers.models.bert.modeling_bert", name="BertLayer")[0]
+
+__all__ = ["BertPreTrainedModel", "BertAttention", "BertOutput", "BertMixedLayer", "Pooler", "MultiModal", "Transchex"]
+
+
+class BertPreTrainedModel(nn.Module):
+    """Module to load BERT pre-trained weights.
+    Based on:
+    LXMERT
+    https://github.com/airsplay/lxmert
+    BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, *inputs, **kwargs) -> None:
+        super().__init__()
+
+    def init_bert_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        num_language_layers,
+        num_vision_layers,
+        num_mixed_layers,
+        bert_config,
+        state_dict=None,
+        cache_dir=None,
+        from_tf=False,
+        *inputs,
+        **kwargs,
+    ):
+        archive_file = "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz"
+        resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        tempdir = None
+        if os.path.isdir(resolved_archive_file) or from_tf:
+            serialization_dir = resolved_archive_file
+        else:
+            tempdir = tempfile.mkdtemp()
+            with tarfile.open(resolved_archive_file, "r:gz") as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        model = cls(num_language_layers, num_vision_layers, num_mixed_layers, bert_config, *inputs, **kwargs)
+        if state_dict is None and not from_tf:
+            weights_path = os.path.join(serialization_dir, "pytorch_model.bin")
+            state_dict = torch.load(weights_path, map_location="cpu" if not torch.cuda.is_available() else None)
+        if tempdir:
+            shutil.rmtree(tempdir)
+        if from_tf:
+            weights_path = os.path.join(serialization_dir, "model.ckpt")
+            return load_tf_weights_in_bert(model, weights_path)
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if "gamma" in key:
+                new_key = key.replace("gamma", "weight")
+            if "beta" in key:
+                new_key = key.replace("beta", "bias")
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        metadata = getattr(state_dict, "_metadata", None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=""):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs
+            )
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + ".")
+
+        start_prefix = ""
+        if not hasattr(model, "bert") and any(s.startswith("bert.") for s in state_dict.keys()):
+            start_prefix = "bert."
+        load(model, prefix=start_prefix)
+        return model
+
+
+class BertAttention(nn.Module):
+    """BERT attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, context):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(context)
+        mixed_value_layer = self.value(context)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        attention_probs = self.dropout(nn.Softmax(dim=-1)(attention_scores))
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertOutput(nn.Module):
+    """BERT output layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = torch.nn.LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertMixedLayer(nn.Module):
+    """BERT cross attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.att = BertAttention(config)
+        self.output = BertOutput(config)
+
+    def forward(self, x, y):
+        output = self.att(x, y)
+        return self.output(output, x)
+
+
+class Pooler(nn.Module):
+    """BERT pooler layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, hidden_size) -> None:
+        super().__init__()
+        self.dense = nn.Linear(hidden_size, hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class MultiModal(BertPreTrainedModel):
+    """
+    Multimodal Transformers From Pretrained BERT Weights"
+    """
+
+    def __init__(
+        self, num_language_layers: int, num_vision_layers: int, num_mixed_layers: int, bert_config: dict  # type: ignore
+    ) -> None:
+        """
+        Args:
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            bert_config: configuration for bert language transformer encoder.
+
+        """
+        super().__init__()
+        self.config = type("obj", (object,), bert_config)
+        self.embeddings = BertEmbeddings(self.config)
+        self.language_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_language_layers)])
+        self.vision_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_vision_layers)])
+        self.mixed_encoder = nn.ModuleList([BertMixedLayer(self.config) for _ in range(num_mixed_layers)])
+        self.apply(self.init_bert_weights)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None, attention_mask=None):
+        language_features = self.embeddings(input_ids, token_type_ids)
+        for layer in self.vision_encoder:
+            hidden_state_vision = layer(vision_feats, None)[0]
+        for layer in self.language_encoder:
+            hidden_state_language = layer(language_features, attention_mask)[0]
+        for layer in self.mixed_encoder:
+            hidden_state_mixed = layer(hidden_state_language, hidden_state_vision)
+        return hidden_state_mixed
+
+
+class Transchex(torch.nn.Module):
+    """
+    TransChex based on: "Hatamizadeh et al.,TransCheX: Self-Supervised Pretraining of Vision-Language
+    Transformers for Chest X-ray Analysis"
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Union[Sequence[int], int],  # type: ignore
+        patch_size: Union[int, Tuple[int, int]],  # type: ignore
+        num_classes: int,
+        num_language_layers: int,
+        num_vision_layers: int,
+        num_mixed_layers: int,
+        hidden_size: int = 768,
+        drop_out: float = 0.0,
+        attention_probs_dropout_prob: float = 0.1,
+        gradient_checkpointing: bool = False,
+        hidden_act: str = "gelu",
+        hidden_dropout_prob: float = 0.1,
+        initializer_range: float = 0.02,
+        intermediate_size: int = 3072,
+        layer_norm_eps: float = 1e-12,
+        max_position_embeddings: int = 512,
+        model_type: str = "bert",
+        num_attention_heads: int = 12,
+        num_hidden_layers: int = 12,
+        pad_token_id: int = 0,
+        position_embedding_type: str = "absolute",
+        transformers_version: str = "4.10.2",
+        type_vocab_size: int = 2,
+        use_cache: bool = True,
+        vocab_size: int = 30522,
+        chunk_size_feed_forward: int = 0,
+        is_decoder: bool = False,
+        add_cross_attention: bool = False,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            num_classes: number of classes if classification is used.
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            num_mixed_layers: number of mixed transformer layers.
+            drop_out: faction of the input units to drop.
+            bert_config: configuration for bert language transformer encoder.
+
+        Examples:
+
+        .. code-block:: python
+
+            # for 3-channel with image size of (224,224), patch size of (32,32), 3 classes, 2 language layers,
+            # 2 vision layers, 2 mixed modality layers and dropout of 0.2 in the classification head
+            net = Transchex(in_channels=3,
+                                 img_size=(224, 224),
+                                 num_classes=3,
+                                 num_language_layers=2,
+                                 num_vision_layers=2,
+                                 num_mixed_layers=2,
+                                 drop_out=0.2)
+
+        """
+        super().__init__()
+        bert_config = {
+            "attention_probs_dropout_prob": attention_probs_dropout_prob,
+            "classifier_dropout": None,
+            "gradient_checkpointing": gradient_checkpointing,
+            "hidden_act": hidden_act,
+            "hidden_dropout_prob": hidden_dropout_prob,
+            "hidden_size": hidden_size,
+            "initializer_range": initializer_range,
+            "intermediate_size": intermediate_size,
+            "layer_norm_eps": layer_norm_eps,
+            "max_position_embeddings": max_position_embeddings,
+            "model_type": model_type,
+            "num_attention_heads": num_attention_heads,
+            "num_hidden_layers": num_hidden_layers,
+            "pad_token_id": pad_token_id,
+            "position_embedding_type": position_embedding_type,
+            "transformers_version": transformers_version,
+            "type_vocab_size": type_vocab_size,
+            "use_cache": use_cache,
+            "vocab_size": vocab_size,
+            "chunk_size_feed_forward": chunk_size_feed_forward,
+            "is_decoder": is_decoder,
+            "add_cross_attention": add_cross_attention,
+        }
+        if not (0 <= drop_out <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+
+        if (img_size[0] % patch_size[0] != 0) or (img_size[1] % patch_size[1] != 0):  # type: ignore
+            raise ValueError("img_size should be divisible by patch_size.")
+
+        self.multimodal = MultiModal.from_pretrained(
+            num_language_layers=num_language_layers,
+            num_vision_layers=num_vision_layers,
+            num_mixed_layers=num_mixed_layers,
+            bert_config=bert_config,
+        )
+
+        self.patch_size = patch_size
+        self.num_patches = (img_size[0] // self.patch_size[0]) * (img_size[1] // self.patch_size[1])  # type: ignore
+        self.vision_proj = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=hidden_size,
+            kernel_size=self.patch_size,  # type: ignore
+            stride=self.patch_size,  # type: ignore
+        )
+        self.norm_vision_pos = nn.LayerNorm(hidden_size)
+        self.pos_embed_vis = nn.Parameter(torch.zeros(1, self.num_patches, hidden_size))
+        self.pooler = Pooler(hidden_size=hidden_size)
+        self.drop = torch.nn.Dropout(drop_out)
+        self.cls_head = torch.nn.Linear(hidden_size, num_classes)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None):
+        attention_mask = torch.ones_like(input_ids).unsqueeze(1).unsqueeze(2)
+        attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)
+        attention_mask = (1.0 - attention_mask) * -10000.0
+        vision_feats = self.vision_proj(vision_feats).flatten(2).transpose(1, 2)
+        vision_feats = self.norm_vision_pos(vision_feats)
+        vision_feats = vision_feats + self.pos_embed_vis
+        hidden_state_mixed = self.multimodal(
+            input_ids=input_ids, token_type_ids=token_type_ids, vision_feats=vision_feats, attention_mask=attention_mask
+        )
+        pooled_features = self.pooler(hidden_state_mixed)
+        logits = self.cls_head(self.drop(pooled_features))
+        return logits
diff --git a/monai/networks/nets/unet.py b/monai/networks/nets/unet.py
index 70cc816fe9..c3e62776fb 100644
--- a/monai/networks/nets/unet.py
+++ b/monai/networks/nets/unet.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Sequence, Tuple, Union
+from typing import Optional, Sequence, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -18,17 +18,99 @@
 from monai.networks.blocks.convolutions import Convolution, ResidualUnit
 from monai.networks.layers.factories import Act, Norm
 from monai.networks.layers.simplelayers import SkipConnection
-from monai.utils import alias, export
+from monai.utils import alias, deprecated_arg, export
 
-__all__ = ["UNet", "Unet", "unet"]
+__all__ = ["UNet", "Unet"]
 
 
 @export("monai.networks.nets")
 @alias("Unet")
 class UNet(nn.Module):
+    """
+    Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
+    The residual part uses a convolution to change the input dimensions to match the output dimensions
+    if this is necessary but will use nn.Identity if not.
+    Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40.
+
+    Each layer of the network has a encode and decode path with a skip connection between them. Data in the encode path
+    is downsampled using strided convolutions (if `strides` is given values greater than 1) and in the decode path
+    upsampled using strided transpose convolutions. These down or up sampling operations occur at the beginning of each
+    block rather than afterwards as is typical in UNet implementations.
+
+    To further explain this consider the first example network given below. This network has 3 layers with strides
+    of 2 for each of the middle layers (the last layer is the bottom connection which does not down/up sample). Input
+    data to this network is immediately reduced in the spatial dimensions by a factor of 2 by the first convolution of
+    the residual unit defining the first layer of the encode part. The last layer of the decode part will upsample its
+    input (data from the previous layer concatenated with data from the skip connection) in the first convolution. this
+    ensures the final output of the network has the same shape as the input.
+
+    Padding values for the convolutions are chosen to ensure output sizes are even divisors/multiples of the input
+    sizes if the `strides` value for a layer is a factor of the input sizes. A typical case is to use `strides` values
+    of 2 and inputs that are multiples of powers of 2. An input can thus be downsampled evenly however many times its
+    dimensions can be divided by 2, so for the example network inputs would have to have dimensions that are mutliples
+    of 4. In the second example network given below the input to the bottom layer will have shape (1, 64, 15, 15) for
+    an input of shape (1, 1, 240, 240) demonstrating the input being reduced in size spatially by 2**4.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    Examples::
+
+        from monai.networks.nets import UNet
+
+        # 3 layer network with down/upsampling by a factor of 2 at each layer with 2-convolution residual units
+        net = UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16),
+            strides=(2, 2),
+            num_res_units=2
+        )
+
+        # 5 layer network with simple convolution/normalization/dropout/activation blocks defining the layers
+        net=UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16, 32, 64),
+            strides=(2, 2, 2, 2),
+        )
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Note: The acceptable spatial size of input data depends on the parameters of the network,
+        to set appropriate spatial size, please check the tutorial for more details:
+        https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb.
+        Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the
+        input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network,
+        the inputs must have spatial dimensions that are all multiples of 2^N.
+        Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data.
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         channels: Sequence[int],
@@ -40,40 +122,9 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: float = 0.0,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
-        """
-        Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
-        The residual part uses a convolution to change the input dimensions to match the output dimensions
-        if this is necessary but will use nn.Identity if not.
-        Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40.
-
-        Args:
-            dimensions: number of spatial dimensions.
-            in_channels: number of input channels.
-            out_channels: number of output channels.
-            channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
-            strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
-            kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
-                its length should equal to dimensions. Defaults to 3.
-            up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
-                its length should equal to dimensions. Defaults to 3.
-            num_res_units: number of residual units. Defaults to 0.
-            act: activation type and arguments. Defaults to PReLU.
-            norm: feature normalization type and arguments. Defaults to instance norm.
-            dropout: dropout ratio. Defaults to no dropout.
-            bias: whether to have a bias term in convolution blocks. Defaults to True.
-                According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
-                if a conv layer is directly followed by a batch norm layer, bias should be False.
-
-        Note: The acceptable spatial size of input data depends on the parameters of the network,
-            to set appropriate spatial size, please check the tutorial for more details:
-            https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb.
-            Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the
-            input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network,
-            the inputs must have spatial dimensions that are all multiples of 2^N.
-            Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data.
 
-        """
         super().__init__()
 
         if len(channels) < 2:
@@ -83,14 +134,16 @@ def __init__(
             raise ValueError("the length of `strides` should equal to `len(channels) - 1`.")
         if delta > 0:
             warnings.warn(f"`len(strides) > len(channels) - 1`, the last {delta} values of strides will not be used.")
+        if dimensions is not None:
+            spatial_dims = dimensions
         if isinstance(kernel_size, Sequence):
-            if len(kernel_size) != dimensions:
+            if len(kernel_size) != spatial_dims:
                 raise ValueError("the length of `kernel_size` should equal to `dimensions`.")
         if isinstance(up_kernel_size, Sequence):
-            if len(up_kernel_size) != dimensions:
+            if len(up_kernel_size) != spatial_dims:
                 raise ValueError("the length of `up_kernel_size` should equal to `dimensions`.")
 
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.channels = channels
@@ -145,8 +198,10 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
             strides: convolution stride.
             is_top: True if this is the top block.
         """
+        mod: nn.Module
         if self.num_res_units > 0:
-            return ResidualUnit(
+
+            mod = ResidualUnit(
                 self.dimensions,
                 in_channels,
                 out_channels,
@@ -158,7 +213,8 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
                 dropout=self.dropout,
                 bias=self.bias,
             )
-        return Convolution(
+            return mod
+        mod = Convolution(
             self.dimensions,
             in_channels,
             out_channels,
@@ -169,6 +225,7 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
             dropout=self.dropout,
             bias=self.bias,
         )
+        return mod
 
     def _get_bottom_layer(self, in_channels: int, out_channels: int) -> nn.Module:
         """
@@ -225,4 +282,4 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return x
 
 
-Unet = unet = UNet
+Unet = UNet
diff --git a/monai/networks/nets/unetr.py b/monai/networks/nets/unetr.py
index 9990cb6643..b75bc15892 100644
--- a/monai/networks/nets/unetr.py
+++ b/monai/networks/nets/unetr.py
@@ -70,7 +70,7 @@ def __init__(
 
         """
 
-        super(UNETR, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/nets/varautoencoder.py b/monai/networks/nets/varautoencoder.py
index 7f54890992..b4ef8be93d 100644
--- a/monai/networks/nets/varautoencoder.py
+++ b/monai/networks/nets/varautoencoder.py
@@ -19,16 +19,65 @@
 from monai.networks.layers.convutils import calculate_out_shape, same_padding
 from monai.networks.layers.factories import Act, Norm
 from monai.networks.nets import AutoEncoder
+from monai.utils import deprecated_arg
 
 __all__ = ["VarAutoEncoder"]
 
 
 class VarAutoEncoder(AutoEncoder):
-    """Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114"""
+    """
+    Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_shape: shape of input data starting with channel dimension.
+        out_channels: number of output channels.
+        latent_size: size of the latent variable.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import VarAutoEncoder
+
+        # 3 layer network accepting images with dimensions (1, 32, 32) and using a latent vector with 2 values
+        model = VarAutoEncoder(
+            dimensions=2,
+            in_shape=(32, 32),  # image spatial shape
+            out_channels=1,
+            latent_size=2,
+            channels=(16, 32, 64),
+            strides=(1, 2, 2),
+        )
+
+    see also:
+        - Variational autoencoder network with MedNIST Dataset
+          https://github.com/Project-MONAI/tutorials/blob/master/modules/varautoencoder_mednist.ipynb
+    """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_shape: Sequence[int],
         out_channels: int,
         latent_size: int,
@@ -44,15 +93,18 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: Optional[Union[Tuple, str, float]] = None,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
 
         self.in_channels, *self.in_shape = in_shape
 
         self.latent_size = latent_size
         self.final_size = np.asarray(self.in_shape, dtype=int)
+        if dimensions is not None:
+            spatial_dims = dimensions
 
         super().__init__(
-            dimensions,
+            spatial_dims,
             self.in_channels,
             out_channels,
             channels,
diff --git a/monai/networks/nets/vit.py b/monai/networks/nets/vit.py
index 3a5d94cc37..2707e5ad1d 100644
--- a/monai/networks/nets/vit.py
+++ b/monai/networks/nets/vit.py
@@ -18,6 +18,8 @@
 from monai.networks.blocks.patchembedding import PatchEmbeddingBlock
 from monai.networks.blocks.transformerblock import TransformerBlock
 
+__all__ = ["ViT"]
+
 
 class ViT(nn.Module):
     """
@@ -68,7 +70,7 @@ def __init__(
 
         """
 
-        super(ViT, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/nets/vnet.py b/monai/networks/nets/vnet.py
index 72f3290a89..1b1d3bfba7 100644
--- a/monai/networks/nets/vnet.py
+++ b/monai/networks/nets/vnet.py
@@ -30,11 +30,11 @@ def get_acti_layer(act: Union[Tuple[str, Dict], str], nchan: int = 0):
 
 class LUConv(nn.Module):
     def __init__(self, spatial_dims: int, nchan: int, act: Union[Tuple[str, Dict], str], bias: bool = False):
-        super(LUConv, self).__init__()
+        super().__init__()
 
         self.act_function = get_acti_layer(act, nchan)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=nchan,
             out_channels=nchan,
             kernel_size=5,
@@ -65,7 +65,7 @@ def __init__(
         act: Union[Tuple[str, Dict], str],
         bias: bool = False,
     ):
-        super(InputTransition, self).__init__()
+        super().__init__()
 
         if 16 % in_channels != 0:
             raise ValueError(f"16 should be divisible by in_channels, got in_channels={in_channels}.")
@@ -74,7 +74,7 @@ def __init__(
         self.in_channels = in_channels
         self.act_function = get_acti_layer(act, 16)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=in_channels,
             out_channels=16,
             kernel_size=5,
@@ -102,7 +102,7 @@ def __init__(
         dropout_dim: int = 3,
         bias: bool = False,
     ):
-        super(DownTransition, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -138,7 +138,7 @@ def __init__(
         dropout_prob: Optional[float] = None,
         dropout_dim: int = 3,
     ):
-        super(UpTransition, self).__init__()
+        super().__init__()
 
         conv_trans_type: Type[Union[nn.ConvTranspose2d, nn.ConvTranspose3d]] = Conv[Conv.CONVTRANS, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -174,13 +174,13 @@ def __init__(
         act: Union[Tuple[str, Dict], str],
         bias: bool = False,
     ):
-        super(OutputTransition, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
 
         self.act_function1 = get_acti_layer(act, out_channels)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=in_channels,
             out_channels=out_channels,
             kernel_size=5,
diff --git a/monai/networks/utils.py b/monai/networks/utils.py
index 9d20d2a83b..529dfbf977 100644
--- a/monai/networks/utils.py
+++ b/monai/networks/utils.py
@@ -20,6 +20,8 @@
 import torch
 import torch.nn as nn
 
+from monai.utils.deprecate_utils import deprecated_arg
+
 __all__ = [
     "one_hot",
     "slice_channels",
@@ -225,9 +227,14 @@ def icnr_init(conv, upsample_factor, init=nn.init.kaiming_normal_):
     conv.weight.data.copy_(kernel)
 
 
-def pixelshuffle(x: torch.Tensor, dimensions: int, scale_factor: int) -> torch.Tensor:
+@deprecated_arg(
+    name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+)
+def pixelshuffle(
+    x: torch.Tensor, spatial_dims: int, scale_factor: int, dimensions: Optional[int] = None
+) -> torch.Tensor:
     """
-    Apply pixel shuffle to the tensor `x` with spatial dimensions `dimensions` and scaling factor `scale_factor`.
+    Apply pixel shuffle to the tensor `x` with spatial dimensions `spatial_dims` and scaling factor `scale_factor`.
 
     See: Shi et al., 2016, "Real-Time Single Image and Video Super-Resolution
     Using a nEfficient Sub-Pixel Convolutional Neural Network."
@@ -236,17 +243,21 @@ def pixelshuffle(x: torch.Tensor, dimensions: int, scale_factor: int) -> torch.T
 
     Args:
         x: Input tensor
-        dimensions: number of spatial dimensions, typically 2 or 3 for 2D or 3D
+        spatial_dims: number of spatial dimensions, typically 2 or 3 for 2D or 3D
         scale_factor: factor to rescale the spatial dimensions by, must be >=1
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     Returns:
         Reshuffled version of `x`.
 
     Raises:
-        ValueError: When input channels of `x` are not divisible by (scale_factor ** dimensions)
+        ValueError: When input channels of `x` are not divisible by (scale_factor ** spatial_dims)
     """
-
-    dim, factor = dimensions, scale_factor
+    if dimensions is not None:
+        spatial_dims = dimensions
+    dim, factor = spatial_dims, scale_factor
     input_size = list(x.size())
     batch_size, channels = input_size[:2]
     scale_divisor = factor ** dim
diff --git a/monai/optimizers/lr_finder.py b/monai/optimizers/lr_finder.py
index 49d4427b3d..d1ab1b44e1 100644
--- a/monai/optimizers/lr_finder.py
+++ b/monai/optimizers/lr_finder.py
@@ -120,7 +120,7 @@ def __iter__(self):
 
     def __next__(self):
         self.run_counter += 1
-        return super(ValDataLoaderIter, self).__next__()
+        return super().__next__()
 
 
 def default_image_extractor(x: Any) -> torch.Tensor:
@@ -328,11 +328,7 @@ def range_test(
                 print(f"Computing optimal learning rate, iteration {iteration + 1}/{num_iter}")
 
             # Train on batch and retrieve loss
-            loss = self._train_batch(
-                train_iter,
-                accumulation_steps,
-                non_blocking_transfer=non_blocking_transfer,
-            )
+            loss = self._train_batch(train_iter, accumulation_steps, non_blocking_transfer=non_blocking_transfer)
             if val_loader:
                 loss = self._validate(val_iter, non_blocking_transfer=non_blocking_transfer)
 
@@ -429,11 +425,7 @@ def _validate(self, val_iter: ValDataLoaderIter, non_blocking_transfer: bool = T
 
         return running_loss / len(val_iter.dataset)
 
-    def get_lrs_and_losses(
-        self,
-        skip_start: int = 0,
-        skip_end: int = 0,
-    ) -> Tuple[list, list]:
+    def get_lrs_and_losses(self, skip_start: int = 0, skip_end: int = 0) -> Tuple[list, list]:
         """Get learning rates and their corresponding losses
 
         Args:
@@ -454,9 +446,7 @@ def get_lrs_and_losses(
         return lrs, losses
 
     def get_steepest_gradient(
-        self,
-        skip_start: int = 0,
-        skip_end: int = 0,
+        self, skip_start: int = 0, skip_end: int = 0
     ) -> Union[Tuple[float, float], Tuple[None, None]]:
         """Get learning rate which has steepest gradient and its corresponding loss
 
@@ -476,14 +466,7 @@ def get_steepest_gradient(
             print("Failed to compute the gradients, there might not be enough points.")
             return None, None
 
-    def plot(
-        self,
-        skip_start: int = 0,
-        skip_end: int = 0,
-        log_lr: bool = True,
-        ax=None,
-        steepest_lr: bool = True,
-    ):
+    def plot(self, skip_start: int = 0, skip_end: int = 0, log_lr: bool = True, ax=None, steepest_lr: bool = True):
         """Plots the learning rate range test.
 
         Args:
diff --git a/monai/optimizers/lr_scheduler.py b/monai/optimizers/lr_scheduler.py
index 9416b583f7..5ad52c5286 100644
--- a/monai/optimizers/lr_scheduler.py
+++ b/monai/optimizers/lr_scheduler.py
@@ -33,7 +33,7 @@ def __init__(self, optimizer: Optimizer, end_lr: float, num_iter: int, last_epoc
         """
         self.end_lr = end_lr
         self.num_iter = num_iter
-        super(_LRSchedulerMONAI, self).__init__(optimizer, last_epoch)
+        super().__init__(optimizer, last_epoch)
 
 
 class LinearLR(_LRSchedulerMONAI):
@@ -77,7 +77,7 @@ def __init__(
         self.warmup_steps = warmup_steps
         self.t_total = t_total
         self.cycles = cycles
-        super(WarmupCosineSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch)
+        super().__init__(optimizer, self.lr_lambda, last_epoch)
 
     def lr_lambda(self, step):
         if step < self.warmup_steps:
diff --git a/monai/optimizers/novograd.py b/monai/optimizers/novograd.py
index 62e42cc9ab..6d14a055fb 100644
--- a/monai/optimizers/novograd.py
+++ b/monai/optimizers/novograd.py
@@ -45,28 +45,23 @@ def __init__(
         amsgrad: bool = False,
     ):
         if 0.0 > lr:
-            raise ValueError("Invalid learning rate: {}".format(lr))
+            raise ValueError(f"Invalid learning rate: {lr}")
         if 0.0 > eps:
-            raise ValueError("Invalid epsilon value: {}".format(eps))
+            raise ValueError(f"Invalid epsilon value: {eps}")
         if not 0.0 <= betas[0] < 1.0:
-            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+            raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
         if not 0.0 <= betas[1] < 1.0:
-            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+            raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
         if 0.0 > weight_decay:
-            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
+            raise ValueError(f"Invalid weight_decay value: {weight_decay}")
         defaults = dict(
-            lr=lr,
-            betas=betas,
-            eps=eps,
-            weight_decay=weight_decay,
-            grad_averaging=grad_averaging,
-            amsgrad=amsgrad,
+            lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, grad_averaging=grad_averaging, amsgrad=amsgrad
         )
 
-        super(Novograd, self).__init__(params, defaults)
+        super().__init__(params, defaults)
 
     def __setstate__(self, state):
-        super(Novograd, self).__setstate__(state)
+        super().__setstate__(state)
         for group in self.param_groups:
             group.setdefault("amsgrad", False)
 
diff --git a/monai/optimizers/utils.py b/monai/optimizers/utils.py
index c52ab07a04..08949912d7 100644
--- a/monai/optimizers/utils.py
+++ b/monai/optimizers/utils.py
@@ -47,7 +47,7 @@ def generate_param_groups(
 
     .. code-block:: python
 
-        net = Unet(dimensions=3, in_channels=1, out_channels=3, channels=[2, 2, 2], strides=[1, 1, 1])
+        net = Unet(spatial_dims=3, in_channels=1, out_channels=3, channels=[2, 2, 2], strides=[1, 1, 1])
         print(net)  # print out network components to select expected items
         print(net.named_parameters())  # print out all the named parameters to filter out expected items
         params = generate_param_groups(
diff --git a/monai/transforms/__init__.py b/monai/transforms/__init__.py
index 2ea7e3aa63..1223254db5 100644
--- a/monai/transforms/__init__.py
+++ b/monai/transforms/__init__.py
@@ -18,6 +18,7 @@
     CenterSpatialCrop,
     CropForeground,
     DivisiblePad,
+    Pad,
     RandCropByLabelClasses,
     RandCropByPosNegLabel,
     RandScaleCrop,
@@ -48,7 +49,7 @@
     DivisiblePadd,
     DivisiblePadD,
     DivisiblePadDict,
-    NumpyPadModeSequence,
+    PadModeSequence,
     RandCropByLabelClassesd,
     RandCropByLabelClassesD,
     RandCropByLabelClassesDict,
@@ -85,12 +86,13 @@
     GibbsNoise,
     HistogramNormalize,
     KSpaceSpikeNoise,
-    LocalPatchShuffling,
     MaskIntensity,
     NormalizeIntensity,
     RandAdjustContrast,
     RandBiasField,
     RandCoarseDropout,
+    RandCoarseShuffle,
+    RandCoarseTransform,
     RandGaussianNoise,
     RandGaussianSharpen,
     RandGaussianSmooth,
@@ -143,6 +145,9 @@
     RandCoarseDropoutd,
     RandCoarseDropoutD,
     RandCoarseDropoutDict,
+    RandCoarseShuffled,
+    RandCoarseShuffleD,
+    RandCoarseShuffleDict,
     RandGaussianNoised,
     RandGaussianNoiseD,
     RandGaussianNoiseDict,
@@ -274,6 +279,7 @@
     Affine,
     AffineGrid,
     Flip,
+    GridDistortion,
     Orientation,
     Rand2DElastic,
     Rand3DElastic,
@@ -282,6 +288,7 @@
     RandAxisFlip,
     RandDeformGrid,
     RandFlip,
+    RandGridDistortion,
     RandRotate,
     RandRotate90,
     RandZoom,
@@ -302,6 +309,9 @@
     Flipd,
     FlipD,
     FlipDict,
+    GridDistortiond,
+    GridDistortionD,
+    GridDistortionDict,
     Orientationd,
     OrientationD,
     OrientationDict,
@@ -320,6 +330,9 @@
     RandFlipd,
     RandFlipD,
     RandFlipDict,
+    RandGridDistortiond,
+    RandGridDistortionD,
+    RandGridDistortionDict,
     RandRotate90d,
     RandRotate90D,
     RandRotate90Dict,
@@ -354,6 +367,7 @@
     CastToType,
     ClassesToIndices,
     ConvertToMultiChannelBasedOnBratsClasses,
+    CuCIM,
     DataStats,
     EnsureChannelFirst,
     EnsureType,
@@ -363,6 +377,7 @@
     LabelToMask,
     Lambda,
     MapLabelValue,
+    RandCuCIM,
     RandLambda,
     RemoveRepeatedChannel,
     RepeatChannel,
@@ -405,6 +420,9 @@
     CopyItemsd,
     CopyItemsD,
     CopyItemsDict,
+    CuCIMd,
+    CuCIMD,
+    CuCIMDict,
     DataStatsd,
     DataStatsD,
     DataStatsDict,
@@ -435,6 +453,9 @@
     MapLabelValued,
     MapLabelValueD,
     MapLabelValueDict,
+    RandCuCIMd,
+    RandCuCIMD,
+    RandCuCIMDict,
     RandLambdad,
     RandLambdaD,
     RandLambdaDict,
@@ -486,6 +507,7 @@
     allow_missing_keys_mode,
     compute_divisible_spatial_size,
     convert_inverse_interp_mode,
+    convert_pad_mode,
     copypaste_arrays,
     create_control_grid,
     create_grid,
@@ -518,4 +540,19 @@
     weighted_patch_samples,
     zero_margins,
 )
-from .utils_pytorch_numpy_unification import in1d, moveaxis
+from .utils_pytorch_numpy_unification import (
+    any_np_pt,
+    clip,
+    concatenate,
+    cumsum,
+    floor_divide,
+    in1d,
+    isfinite,
+    maximum,
+    moveaxis,
+    nonzero,
+    percentile,
+    ravel,
+    unravel_index,
+    where,
+)
diff --git a/monai/transforms/compose.py b/monai/transforms/compose.py
index 4bf175769b..2405530ef3 100644
--- a/monai/transforms/compose.py
+++ b/monai/transforms/compose.py
@@ -181,7 +181,7 @@ class OneOf(Compose):
         weights: probabilities corresponding to each callable in transforms.
             Probabilities are normalized to sum to one.
 
-    OneOf inherits from Compose and uses args map_items and unpack_items in
+    ``OneOf`` inherits from ``Compose`` and uses args ``map_items`` and ``unpack_items`` in
     the same way.
     """
 
@@ -204,14 +204,13 @@ def __init__(
     def _normalize_probabilities(self, weights):
         if len(weights) == 0:
             return weights
-        else:
-            weights = np.array(weights)
-            if np.any(weights < 0):
-                raise AssertionError("Probabilities must be greater than or equal to zero.")
-            if np.all(weights == 0):
-                raise AssertionError("At least one probability must be greater than zero.")
-            weights = weights / weights.sum()
-            return list(weights)
+        weights = np.array(weights)
+        if np.any(weights < 0):
+            raise AssertionError("Probabilities must be greater than or equal to zero.")
+        if np.all(weights == 0):
+            raise AssertionError("At least one probability must be greater than zero.")
+        weights = weights / weights.sum()
+        return list(weights)
 
     def flatten(self):
         transforms = []
@@ -232,16 +231,15 @@ def flatten(self):
     def __call__(self, data):
         if len(self.transforms) == 0:
             return data
-        else:
-            index = self.R.multinomial(1, self.weights).argmax()
-            _transform = self.transforms[index]
-            data = apply_transform(_transform, data, self.map_items, self.unpack_items)
-            # if the data is a mapping (dictionary), append the OneOf transform to the end
-            if isinstance(data, Mapping):
-                for key in data.keys():
-                    if key + InverseKeys.KEY_SUFFIX in data:
-                        self.push_transform(data, key, extra_info={"index": index})
-            return data
+        index = self.R.multinomial(1, self.weights).argmax()
+        _transform = self.transforms[index]
+        data = apply_transform(_transform, data, self.map_items, self.unpack_items)
+        # if the data is a mapping (dictionary), append the OneOf transform to the end
+        if isinstance(data, Mapping):
+            for key in data.keys():
+                if key + InverseKeys.KEY_SUFFIX in data:
+                    self.push_transform(data, key, extra_info={"index": index})
+        return data
 
     def inverse(self, data):
         if len(self.transforms) == 0:
diff --git a/monai/transforms/croppad/array.py b/monai/transforms/croppad/array.py
index 74f556cc1a..cc8dd677da 100644
--- a/monai/transforms/croppad/array.py
+++ b/monai/transforms/croppad/array.py
@@ -22,11 +22,12 @@
 from torch.nn.functional import pad as pad_pt
 
 from monai.config import IndexSelection
-from monai.config.type_definitions import NdarrayTensor
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import get_random_patch, get_valid_patch_size
 from monai.transforms.transform import Randomizable, Transform
 from monai.transforms.utils import (
     compute_divisible_spatial_size,
+    convert_pad_mode,
     generate_label_classes_crop_centers,
     generate_pos_neg_label_crop_centers,
     generate_spatial_bounding_box,
@@ -35,9 +36,18 @@
     map_classes_to_indices,
     weighted_patch_samples,
 )
-from monai.utils import Method, NumpyPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple, look_up_option
+from monai.transforms.utils_pytorch_numpy_unification import floor_divide, maximum
+from monai.utils import (
+    Method,
+    NumpyPadMode,
+    PytorchPadMode,
+    ensure_tuple,
+    ensure_tuple_rep,
+    fall_back_tuple,
+    look_up_option,
+)
 from monai.utils.enums import TransformBackends
-from monai.utils.type_conversion import convert_data_type
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 __all__ = [
     "SpatialPad",
@@ -61,16 +71,18 @@
 class Pad(Transform):
     """
     Perform padding for a given an amount of padding in each dimension.
-    If input is `torch.Tensor` and mode is `constant`, `torch.nn.functional.pad` will be used.
-    Otherwise, `np.pad` will be used (input converted to `np.ndarray` if necessary).
-    Uses np.pad so in practice, a mode needs to be provided. See numpy.lib.arraypad.pad
-    for additional details.
+    If input is `torch.Tensor`, `torch.nn.functional.pad` will be used, otherwise, `np.pad` will be used.
+
     Args:
         to_pad: the amount to be padded in each dimension [(low_H, high_H), (low_W, high_W), ...].
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
@@ -78,43 +90,44 @@ class Pad(Transform):
     def __init__(
         self,
         to_pad: List[Tuple[int, int]],
-        mode: Union[NumpyPadMode, str, None] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.to_pad = to_pad
-        self.mode = mode or NumpyPadMode.CONSTANT
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
     @staticmethod
-    def _np_pad(img: np.ndarray, all_pad_width, mode, **np_kwargs) -> np.ndarray:
-        img_np, *_ = convert_data_type(img, np.ndarray)
-        return np.pad(img_np, all_pad_width, mode=mode, **np_kwargs)  # type: ignore
+    def _np_pad(img: np.ndarray, all_pad_width, mode, **kwargs) -> np.ndarray:
+        return np.pad(img, all_pad_width, mode=mode, **kwargs)  # type: ignore
 
     @staticmethod
-    def _pt_pad(img: torch.Tensor, all_pad_width, mode, **np_kwargs) -> torch.Tensor:
-        pt_pad_width = [val for sublist in all_pad_width for val in sublist[::-1]][::-1]
-        return pad_pt(img, pt_pad_width, mode=mode, **np_kwargs)
+    def _pt_pad(img: torch.Tensor, all_pad_width, mode, **kwargs) -> torch.Tensor:
+        pt_pad_width = [val for sublist in all_pad_width[1:] for val in sublist[::-1]][::-1]
+        # torch.pad expects `[B, C, H, W, [D]]` shape
+        return pad_pt(img.unsqueeze(0), pt_pad_width, mode=mode, **kwargs).squeeze(0)
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
-                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"`` or ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to `self.mode`.
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         if not np.asarray(self.to_pad).any():
             # all zeros, skip padding
             return img
-        mode = mode or self.mode
-        mode = mode.value if isinstance(mode, NumpyPadMode) else mode
-        if isinstance(img, torch.Tensor) and mode == "constant" and not self.np_kwargs:
-            pad = self._pt_pad
-        else:
-            pad = self._np_pad  # type: ignore
-        return pad(img, self.to_pad, mode, **self.np_kwargs)
+        mode = convert_pad_mode(dst=img, mode=mode or self.mode).value
+        pad = self._pt_pad if isinstance(img, torch.Tensor) else self._np_pad
+        return pad(img, self.to_pad, mode, **self.kwargs)  # type: ignore
 
 
 class SpatialPad(Transform):
@@ -135,12 +148,14 @@ class SpatialPad(Transform):
             `spatial_size=[32, 25, -1]`, the spatial size of output data will be [32, 30, 30].
         method: {``"symmetric"``, ``"end"``}
             Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
@@ -150,13 +165,13 @@ def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
         method: Union[Method, str] = Method.SYMMETRIC,
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.spatial_size = spatial_size
         self.method: Method = look_up_option(method, Method)
-        self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
     def _determine_data_pad_width(self, data_shape: Sequence[int]) -> List[Tuple[int, int]]:
         spatial_size = fall_back_tuple(self.spatial_size, data_shape)
@@ -168,15 +183,20 @@ def _determine_data_pad_width(self, data_shape: Sequence[int]) -> List[Tuple[int
             return pad_width
         return [(0, max(sp_i - data_shape[i], 0)) for i, sp_i in enumerate(spatial_size)]
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         data_pad_width = self._determine_data_pad_width(img.shape[1:])
         all_pad_width = [(0, 0)] + data_pad_width
@@ -184,8 +204,7 @@ def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]]
             # all zeros, skip padding
             return img
 
-        mode = look_up_option(mode or self.mode, NumpyPadMode)
-        padder = Pad(all_pad_width, mode, **self.np_kwargs)
+        padder = Pad(all_pad_width, mode or self.mode, **self.kwargs)
         return padder(img)
 
 
@@ -204,13 +223,14 @@ class BorderPad(Transform):
               for example, image shape(CHW) is [1, 4, 4], spatial_border is [1, 2, 3, 4], pad top of H dim with 1,
               pad bottom of H dim with 2, pad left of W dim with 3, pad right of W dim with 4.
               the result shape is [1, 7, 11].
-
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
@@ -219,22 +239,26 @@ class BorderPad(Transform):
     def __init__(
         self,
         spatial_border: Union[Sequence[int], int],
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.spatial_border = spatial_border
-        self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
 
         Raises:
             ValueError: When ``self.spatial_border`` does not contain ints.
@@ -261,8 +285,7 @@ def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]]
             )
 
         all_pad_width = [(0, 0)] + data_pad_width
-        mode = look_up_option(mode or self.mode, NumpyPadMode)
-        padder = Pad(all_pad_width, mode, **self.np_kwargs)
+        padder = Pad(all_pad_width, mode or self.mode, **self.kwargs)
         return padder(img)
 
 
@@ -276,48 +299,50 @@ class DivisiblePad(Transform):
     def __init__(
         self,
         k: Union[Sequence[int], int],
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
         method: Union[Method, str] = Method.SYMMETRIC,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
             k: the target k for each spatial dimension.
                 if `k` is negative or 0, the original size is preserved.
                 if `k` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         See also :py:class:`monai.transforms.SpatialPad`
         """
         self.k = k
         self.mode: NumpyPadMode = NumpyPadMode(mode)
         self.method: Method = Method(method)
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first
                 and padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         new_size = compute_divisible_spatial_size(spatial_shape=img.shape[1:], k=self.k)
-        spatial_pad = SpatialPad(
-            spatial_size=new_size,
-            method=self.method,
-            mode=mode or self.mode,
-            **self.np_kwargs,
-        )
+        spatial_pad = SpatialPad(spatial_size=new_size, method=self.method, mode=mode or self.mode, **self.kwargs)
 
         return spatial_pad(img)
 
@@ -336,13 +361,16 @@ class SpatialCrop(Transform):
         - the start and end coordinates of the ROI
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
-        roi_center: Union[Sequence[int], np.ndarray, None] = None,
-        roi_size: Union[Sequence[int], np.ndarray, None] = None,
-        roi_start: Union[Sequence[int], np.ndarray, None] = None,
-        roi_end: Union[Sequence[int], np.ndarray, None] = None,
+        roi_center: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_size: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_start: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_end: Union[Sequence[int], NdarrayOrTensor, None] = None,
         roi_slices: Optional[Sequence[slice]] = None,
+        allow_smaller: bool = False,
     ) -> None:
         """
         Args:
@@ -353,29 +381,42 @@ def __init__(
             roi_end: voxel coordinates for end of the crop ROI, if a coordinate is out of image,
                 use the end coordinate of image.
             roi_slices: list of slices for each of the spatial dimensions.
+            allow_smaller: if `False`, an exception will be raised if the image is smaller than
+                the requested ROI in any dimension. If `True`, any smaller dimensions will remain
+                unchanged.
         """
+        roi_start_torch: torch.Tensor
+
         if roi_slices:
             if not all(s.step is None or s.step == 1 for s in roi_slices):
                 raise ValueError("Only slice steps of 1/None are currently supported")
             self.slices = list(roi_slices)
         else:
             if roi_center is not None and roi_size is not None:
-                roi_center = np.asarray(roi_center, dtype=np.int16)
-                roi_size = np.asarray(roi_size, dtype=np.int16)
-                roi_start_np = np.maximum(roi_center - np.floor_divide(roi_size, 2), 0)
-                roi_end_np = np.maximum(roi_start_np + roi_size, roi_start_np)
+                roi_center, *_ = convert_data_type(
+                    data=roi_center, output_type=torch.Tensor, dtype=torch.int16, wrap_sequence=True
+                )
+                roi_size, *_ = convert_to_dst_type(src=roi_size, dst=roi_center, wrap_sequence=True)
+                roi_start_torch = maximum(
+                    roi_center - floor_divide(roi_size, 2), torch.zeros_like(roi_center)  # type: ignore
+                )
+                roi_end_torch = maximum(roi_start_torch + roi_size, roi_start_torch)
             else:
                 if roi_start is None or roi_end is None:
                     raise ValueError("Please specify either roi_center, roi_size or roi_start, roi_end.")
-                roi_start_np = np.maximum(np.asarray(roi_start, dtype=np.int16), 0)
-                roi_end_np = np.maximum(np.asarray(roi_end, dtype=np.int16), roi_start_np)
-            # Allow for 1D by converting back to np.array (since np.maximum will convert to int)
-            roi_start_np = roi_start_np if isinstance(roi_start_np, np.ndarray) else np.array([roi_start_np])
-            roi_end_np = roi_end_np if isinstance(roi_end_np, np.ndarray) else np.array([roi_end_np])
-            # convert to slices
-            self.slices = [slice(s, e) for s, e in zip(roi_start_np, roi_end_np)]
-
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]):
+                roi_start_torch, *_ = convert_data_type(  # type: ignore
+                    data=roi_start, output_type=torch.Tensor, dtype=torch.int16, wrap_sequence=True
+                )
+                roi_start_torch = maximum(roi_start_torch, torch.zeros_like(roi_start_torch))  # type: ignore
+                roi_end_torch, *_ = convert_to_dst_type(src=roi_end, dst=roi_start_torch, wrap_sequence=True)
+                roi_end_torch = maximum(roi_end_torch, roi_start_torch)
+            # convert to slices (accounting for 1d)
+            if roi_start_torch.numel() == 1:
+                self.slices = [slice(int(roi_start_torch.item()), int(roi_end_torch.item()))]
+            else:
+                self.slices = [slice(int(s), int(e)) for s, e in zip(roi_start_torch.tolist(), roi_end_torch.tolist())]
+
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -400,10 +441,12 @@ class CenterSpatialCrop(Transform):
             the spatial size of output data will be [32, 40, 40].
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(self, roi_size: Union[Sequence[int], int]) -> None:
         self.roi_size = roi_size
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -424,10 +467,12 @@ class CenterScaleCrop(Transform):
 
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(self, roi_scale: Union[Sequence[float], float]):
         self.roi_scale = roi_scale
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         img_size = img.shape[1:]
         ndim = len(img_size)
         roi_size = [ceil(r * s) for r, s in zip(ensure_tuple_rep(self.roi_scale, ndim), img_size)]
@@ -459,6 +504,8 @@ class RandSpatialCrop(Randomizable, Transform):
             if True, the actual size is sampled from `randint(roi_size, max_roi_size + 1)`.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self,
         roi_size: Union[Sequence[int], int],
@@ -479,19 +526,19 @@ def randomize(self, img_size: Sequence[int]) -> None:
             max_size = img_size if self.max_roi_size is None else fall_back_tuple(self.max_roi_size, img_size)
             if any(i > j for i, j in zip(self._size, max_size)):
                 raise ValueError(f"min ROI size: {self._size} is bigger than max ROI size: {max_size}.")
-            self._size = tuple((self.R.randint(low=self._size[i], high=max_size[i] + 1) for i in range(len(img_size))))
+            self._size = tuple(self.R.randint(low=self._size[i], high=max_size[i] + 1) for i in range(len(img_size)))
         if self.random_center:
             valid_size = get_valid_patch_size(img_size, self._size)
             self._slices = (slice(None),) + get_random_patch(img_size, valid_size, self.R)
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
         """
         self.randomize(img.shape[1:])
         if self._size is None:
-            raise AssertionError
+            raise RuntimeError("self._size not specified.")
         if self.random_center:
             return img[self._slices]
         cropper = CenterSpatialCrop(self._size)
@@ -530,7 +577,7 @@ def __init__(
         self.roi_scale = roi_scale
         self.max_roi_scale = max_roi_scale
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -576,6 +623,8 @@ class RandSpatialCropSamples(Randomizable, Transform):
 
     """
 
+    backend = RandSpatialCrop.backend
+
     def __init__(
         self,
         roi_size: Union[Sequence[int], int],
@@ -591,15 +640,15 @@ def __init__(
 
     def set_random_state(
         self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
-    ) -> "Randomizable":
-        super().set_random_state(seed=seed, state=state)
-        self.cropper.set_random_state(state=self.R)
+    ) -> "RandSpatialCropSamples":
+        super().set_random_state(seed, state)
+        self.cropper.set_random_state(seed, state)
         return self
 
     def randomize(self, data: Optional[Any] = None) -> None:
         pass
 
-    def __call__(self, img: np.ndarray) -> List[np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> List[NdarrayOrTensor]:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         cropping doesn't change the channel dim.
@@ -639,6 +688,8 @@ def threshold_at_one(x):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         select_fn: Callable = is_positive,
@@ -646,7 +697,7 @@ def __init__(
         margin: Union[Sequence[int], int] = 0,
         return_coords: bool = False,
         k_divisible: Union[Sequence[int], int] = 1,
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = NumpyPadMode.CONSTANT,
         **np_kwargs,
     ) -> None:
         """
@@ -658,10 +709,12 @@ def __init__(
             return_coords: whether return the coordinates of spatial bounding box for foreground.
             k_divisible: make each spatial dimension to be divisible by k, default to 1.
                 if `k_divisible` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: padding mode {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                one of the listed string values or a user supplied function. Defaults to ``"constant"``.
-                see also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
                 more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
 
@@ -674,18 +727,18 @@ def __init__(
         self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
         self.np_kwargs = np_kwargs
 
-    def compute_bounding_box(self, img: np.ndarray):
+    def compute_bounding_box(self, img: NdarrayOrTensor):
         """
         Compute the start points and end points of bounding box to crop.
         And adjust bounding box coords to be divisible by `k`.
 
         """
         box_start, box_end = generate_spatial_bounding_box(img, self.select_fn, self.channel_indices, self.margin)
-        box_start_ = np.asarray(box_start, dtype=np.int16)
-        box_end_ = np.asarray(box_end, dtype=np.int16)
+        box_start_, *_ = convert_data_type(box_start, output_type=np.ndarray, dtype=np.int16, wrap_sequence=True)
+        box_end_, *_ = convert_data_type(box_end, output_type=np.ndarray, dtype=np.int16, wrap_sequence=True)
         orig_spatial_size = box_end_ - box_start_
         # make the spatial size divisible by `k`
-        spatial_size = np.asarray(compute_divisible_spatial_size(spatial_shape=orig_spatial_size, k=self.k_divisible))
+        spatial_size = np.asarray(compute_divisible_spatial_size(orig_spatial_size.tolist(), k=self.k_divisible))
         # update box_start and box_end
         box_start_ = box_start_ - np.floor_divide(np.asarray(spatial_size) - orig_spatial_size, 2)
         box_end_ = box_start_ + spatial_size
@@ -693,10 +746,10 @@ def compute_bounding_box(self, img: np.ndarray):
 
     def crop_pad(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         box_start: np.ndarray,
         box_end: np.ndarray,
-        mode: Optional[Union[NumpyPadMode, str]] = None,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
     ):
         """
         Crop and pad based on the bounding box.
@@ -708,7 +761,7 @@ def crop_pad(
         pad = list(chain(*zip(pad_to_start.tolist(), pad_to_end.tolist())))
         return BorderPad(spatial_border=pad, mode=mode or self.mode, **self.np_kwargs)(cropped)
 
-    def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = None):
+    def __call__(self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, str]] = None):
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't change the channel dim.
@@ -734,20 +787,25 @@ class RandWeightedCrop(Randomizable, Transform):
             It should be a single-channel array in shape, for example, `(1, spatial_dim_0, spatial_dim_1, ...)`.
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(
-        self, spatial_size: Union[Sequence[int], int], num_samples: int = 1, weight_map: Optional[np.ndarray] = None
+        self,
+        spatial_size: Union[Sequence[int], int],
+        num_samples: int = 1,
+        weight_map: Optional[NdarrayOrTensor] = None,
     ):
         self.spatial_size = ensure_tuple(spatial_size)
         self.num_samples = int(num_samples)
         self.weight_map = weight_map
         self.centers: List[np.ndarray] = []
 
-    def randomize(self, weight_map: np.ndarray) -> None:
+    def randomize(self, weight_map: NdarrayOrTensor) -> None:
         self.centers = weighted_patch_samples(
             spatial_size=self.spatial_size, w=weight_map[0], n_samples=self.num_samples, r_state=self.R
         )  # using only the first channel as weight map
 
-    def __call__(self, img: np.ndarray, weight_map: Optional[np.ndarray] = None) -> List[np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor, weight_map: Optional[NdarrayOrTensor] = None) -> List[NdarrayOrTensor]:
         """
         Args:
             img: input image to sample patches from. assuming `img` is a channel-first array.
@@ -764,9 +822,10 @@ def __call__(self, img: np.ndarray, weight_map: Optional[np.ndarray] = None) ->
             raise ValueError("weight map must be provided for weighted patch sampling.")
         if img.shape[1:] != weight_map.shape[1:]:
             raise ValueError(f"image and weight map spatial shape mismatch: {img.shape[1:]} vs {weight_map.shape[1:]}.")
+
         self.randomize(weight_map)
         _spatial_size = fall_back_tuple(self.spatial_size, weight_map.shape[1:])
-        results = []
+        results: List[NdarrayOrTensor] = []
         for center in self.centers:
             cropper = SpatialCrop(roi_center=center, roi_size=_spatial_size)
             results.append(cropper(img))
@@ -816,6 +875,9 @@ class RandCropByPosNegLabel(Randomizable, Transform):
             `image_threshold`, and randomly select crop centers based on them, need to provide `fg_indices`
             and `bg_indices` together, expect to be 1 dim array of spatial indices after flattening.
             a typical usage is to call `FgBgToIndices` transform first and cache the results.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
 
     Raises:
         ValueError: When ``pos`` or ``neg`` are negative.
@@ -823,17 +885,20 @@ class RandCropByPosNegLabel(Randomizable, Transform):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
-        label: Optional[np.ndarray] = None,
+        label: Optional[NdarrayOrTensor] = None,
         pos: float = 1.0,
         neg: float = 1.0,
         num_samples: int = 1,
-        image: Optional[np.ndarray] = None,
+        image: Optional[NdarrayOrTensor] = None,
         image_threshold: float = 0.0,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+        allow_smaller: bool = False,
     ) -> None:
         self.spatial_size = ensure_tuple(spatial_size)
         self.label = label
@@ -845,16 +910,17 @@ def __init__(
         self.num_samples = num_samples
         self.image = image
         self.image_threshold = image_threshold
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
         self.fg_indices = fg_indices
         self.bg_indices = bg_indices
+        self.allow_smaller = allow_smaller
 
     def randomize(
         self,
-        label: np.ndarray,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
         if fg_indices is None or bg_indices is None:
@@ -867,17 +933,24 @@ def randomize(
             fg_indices_ = fg_indices
             bg_indices_ = bg_indices
         self.centers = generate_pos_neg_label_crop_centers(
-            self.spatial_size, self.num_samples, self.pos_ratio, label.shape[1:], fg_indices_, bg_indices_, self.R
+            self.spatial_size,
+            self.num_samples,
+            self.pos_ratio,
+            label.shape[1:],
+            fg_indices_,
+            bg_indices_,
+            self.R,
+            self.allow_smaller,
         )
 
     def __call__(
         self,
-        img: np.ndarray,
-        label: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-    ) -> List[np.ndarray]:
+        img: NdarrayOrTensor,
+        label: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             img: input data to crop samples from based on the pos/neg ratio of `label` and `image`.
@@ -900,10 +973,10 @@ def __call__(
             image = self.image
 
         self.randomize(label, fg_indices, bg_indices, image)
-        results: List[np.ndarray] = []
+        results: List[NdarrayOrTensor] = []
         if self.centers is not None:
             for center in self.centers:
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=center, roi_size=self.spatial_size)
                 results.append(cropper(img))
 
         return results
@@ -965,19 +1038,25 @@ class RandCropByLabelClasses(Randomizable, Transform):
             `image_threshold`, and randomly select crop centers based on them, expect to be 1 dim array
             of spatial indices after flattening. a typical usage is to call `ClassesToIndices` transform first
             and cache the results for better performance.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will remain
+            unchanged.
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
         ratios: Optional[List[Union[float, int]]] = None,
-        label: Optional[np.ndarray] = None,
+        label: Optional[NdarrayOrTensor] = None,
         num_classes: Optional[int] = None,
         num_samples: int = 1,
-        image: Optional[np.ndarray] = None,
+        image: Optional[NdarrayOrTensor] = None,
         image_threshold: float = 0.0,
-        indices: Optional[List[np.ndarray]] = None,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+        allow_smaller: bool = False,
     ) -> None:
         self.spatial_size = ensure_tuple(spatial_size)
         self.ratios = ratios
@@ -986,17 +1065,18 @@ def __init__(
         self.num_samples = num_samples
         self.image = image
         self.image_threshold = image_threshold
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
         self.indices = indices
+        self.allow_smaller = allow_smaller
 
     def randomize(
         self,
-        label: np.ndarray,
-        indices: Optional[List[np.ndarray]] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
-        indices_: List[np.ndarray]
+        indices_: Sequence[NdarrayOrTensor]
         if indices is None:
             if self.indices is not None:
                 indices_ = self.indices
@@ -1005,16 +1085,16 @@ def randomize(
         else:
             indices_ = indices
         self.centers = generate_label_classes_crop_centers(
-            self.spatial_size, self.num_samples, label.shape[1:], indices_, self.ratios, self.R
+            self.spatial_size, self.num_samples, label.shape[1:], indices_, self.ratios, self.R, self.allow_smaller
         )
 
     def __call__(
         self,
-        img: np.ndarray,
-        label: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
-        indices: Optional[List[np.ndarray]] = None,
-    ) -> List[np.ndarray]:
+        img: NdarrayOrTensor,
+        label: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             img: input data to crop samples from based on the ratios of every class, assumes `img` is a
@@ -1033,10 +1113,10 @@ def __call__(
             image = self.image
 
         self.randomize(label, indices, image)
-        results: List[np.ndarray] = []
+        results: List[NdarrayOrTensor] = []
         if self.centers is not None:
             for center in self.centers:
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 results.append(cropper(img))
 
         return results
@@ -1063,6 +1143,8 @@ class ResizeWithPadOrCrop(Transform):
 
     """
 
+    backend = list(set(SpatialPad.backend) & set(CenterSpatialCrop.backend))
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
@@ -1073,7 +1155,7 @@ def __init__(
         self.padder = SpatialPad(spatial_size=spatial_size, method=method, mode=mode, **np_kwargs)
         self.cropper = CenterSpatialCrop(roi_size=spatial_size)
 
-    def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = None) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayOrTensor:
         """
         Args:
             img: data to pad or crop, assuming `img` is channel-first and
@@ -1084,7 +1166,7 @@ def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = N
                 If None, defaults to the ``mode`` in construction.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
         """
-        return self.padder(self.cropper(img), mode=mode)
+        return self.padder(self.cropper(img), mode=mode)  # type: ignore
 
 
 class BoundingRect(Transform):
@@ -1118,6 +1200,7 @@ def __call__(self, img: np.ndarray) -> np.ndarray:
         """
         See also: :py:class:`monai.transforms.utils.generate_spatial_bounding_box`.
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         bbox = []
 
         for channel in range(img.shape[0]):
diff --git a/monai/transforms/croppad/batch.py b/monai/transforms/croppad/batch.py
index 956dff7881..42178ec0bc 100644
--- a/monai/transforms/croppad/batch.py
+++ b/monai/transforms/croppad/batch.py
@@ -26,9 +26,7 @@
 from monai.transforms.utility.array import ToTensor
 from monai.utils.enums import InverseKeys, Method, NumpyPadMode
 
-__all__ = [
-    "PadListDataCollate",
-]
+__all__ = ["PadListDataCollate"]
 
 
 def replace_element(to_replace, batch, idx, key_or_idx):
diff --git a/monai/transforms/croppad/dictionary.py b/monai/transforms/croppad/dictionary.py
index 9e33ab2db1..b98973d14e 100644
--- a/monai/transforms/croppad/dictionary.py
+++ b/monai/transforms/croppad/dictionary.py
@@ -25,7 +25,7 @@
 import numpy as np
 
 from monai.config import IndexSelection, KeysCollection
-from monai.config.type_definitions import NdarrayTensor
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import get_random_patch, get_valid_patch_size
 from monai.transforms.croppad.array import (
     BorderPad,
@@ -33,6 +33,8 @@
     CenterSpatialCrop,
     CropForeground,
     DivisiblePad,
+    RandCropByLabelClasses,
+    RandCropByPosNegLabel,
     ResizeWithPadOrCrop,
     SpatialCrop,
     SpatialPad,
@@ -49,11 +51,11 @@
     weighted_patch_samples,
 )
 from monai.utils import ImageMetaKey as Key
-from monai.utils import Method, NumpyPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple
+from monai.utils import Method, NumpyPadMode, PytorchPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple
 from monai.utils.enums import InverseKeys
 
 __all__ = [
-    "NumpyPadModeSequence",
+    "PadModeSequence",
     "SpatialPadd",
     "BorderPadd",
     "DivisiblePadd",
@@ -99,6 +101,7 @@
 ]
 
 NumpyPadModeSequence = Union[Sequence[Union[NumpyPadMode, str]], NumpyPadMode, str]
+PadModeSequence = Union[Sequence[Union[NumpyPadMode, PytorchPadMode, str]], NumpyPadMode, PytorchPadMode, str]
 
 
 class SpatialPadd(MapTransform, InvertibleTransform):
@@ -114,9 +117,9 @@ def __init__(
         keys: KeysCollection,
         spatial_size: Union[Sequence[int], int],
         method: Union[Method, str] = Method.SYMMETRIC,
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -129,28 +132,30 @@ def __init__(
                 the spatial size of output data will be [32, 30, 30].
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = SpatialPad(spatial_size, method, **np_kwargs)
+        self.padder = SpatialPad(spatial_size, method, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -183,9 +188,9 @@ def __init__(
         self,
         keys: KeysCollection,
         spatial_border: Union[Sequence[int], int],
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -202,28 +207,30 @@ def __init__(
                   pad bottom of H dim with 2, pad left of W dim with 3, pad right of W dim with 4.
                   the result shape is [1, 7, 11].
 
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = BorderPad(spatial_border=spatial_border, **np_kwargs)
+        self.padder = BorderPad(spatial_border=spatial_border, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -260,10 +267,10 @@ def __init__(
         self,
         keys: KeysCollection,
         k: Union[Sequence[int], int],
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         method: Union[Method, str] = Method.SYMMETRIC,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -272,32 +279,34 @@ def __init__(
             k: the target k for each spatial dimension.
                 if `k` is negative or 0, the original size is preserved.
                 if `k` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         See also :py:class:`monai.transforms.SpatialPad`
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = DivisiblePad(k=k, method=method, **np_kwargs)
+        self.padder = DivisiblePad(k=k, method=method, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -331,6 +340,8 @@ class SpatialCropd(MapTransform, InvertibleTransform):
         - the start and end coordinates of the ROI
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -357,14 +368,14 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.cropper = SpatialCrop(roi_center, roi_size, roi_start, roi_end, roi_slices)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             self.push_transform(d, key)
             d[key] = self.cropper(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -404,13 +415,15 @@ class CenterSpatialCropd(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self, keys: KeysCollection, roi_size: Union[Sequence[int], int], allow_missing_keys: bool = False
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.cropper = CenterSpatialCrop(roi_size)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             orig_size = d[key].shape[1:]
@@ -418,7 +431,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             self.push_transform(d, key, orig_size=orig_size)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -454,13 +467,15 @@ class CenterScaleCropd(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self, keys: KeysCollection, roi_scale: Union[Sequence[float], float], allow_missing_keys: bool = False
     ) -> None:
         super().__init__(keys, allow_missing_keys=allow_missing_keys)
         self.roi_scale = roi_scale
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         # use the spatial size of first image to scale, expect all images have the same spatial size
         img_size = data[self.keys[0]].shape[1:]
@@ -473,7 +488,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -525,6 +540,8 @@ class RandSpatialCropd(Randomizable, MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -553,11 +570,11 @@ def randomize(self, img_size: Sequence[int]) -> None:
             valid_size = get_valid_patch_size(img_size, self._size)
             self._slices = (slice(None),) + get_random_patch(img_size, valid_size, self.R)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize(d[self.keys[0]].shape[1:])  # image shape from the first data key
         if self._size is None:
-            raise AssertionError
+            raise RuntimeError("self._size not specified.")
         for key in self.key_iterator(d):
             if self.random_center:
                 self.push_transform(d, key, {"slices": [(i.start, i.stop) for i in self._slices[1:]]})  # type: ignore
@@ -568,7 +585,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
                 d[key] = cropper(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -626,6 +643,8 @@ class RandScaleCropd(RandSpatialCropd):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = RandSpatialCropd.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -643,11 +662,10 @@ def __init__(
             random_size=random_size,
             allow_missing_keys=allow_missing_keys,
         )
-        MapTransform.__init__(self, keys, allow_missing_keys)
         self.roi_scale = roi_scale
         self.max_roi_scale = max_roi_scale
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         img_size = data[self.keys[0]].shape[1:]
         ndim = len(img_size)
         self.roi_size = [ceil(r * s) for r, s in zip(ensure_tuple_rep(self.roi_scale, ndim), img_size)]
@@ -711,6 +729,8 @@ class RandSpatialCropSamplesd(Randomizable, MapTransform, InvertibleTransform):
 
     """
 
+    backend = RandSpatialCropd.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -735,15 +755,15 @@ def __init__(
 
     def set_random_state(
         self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
-    ) -> "Randomizable":
-        super().set_random_state(seed=seed, state=state)
-        self.cropper.set_random_state(state=self.R)
+    ) -> "RandSpatialCropSamplesd":
+        super().set_random_state(seed, state)
+        self.cropper.set_random_state(seed, state)
         return self
 
     def randomize(self, data: Optional[Any] = None) -> None:
         pass
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         ret = []
         for i in range(self.num_samples):
             d = dict(data)
@@ -753,18 +773,18 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             cropped = self.cropper(d)
             # self.cropper will have added RandSpatialCropd to the list. Change to RandSpatialCropSamplesd
             for key in self.key_iterator(cropped):
-                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.CLASS_NAME] = self.__class__.__name__
-                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.ID] = id(self)
+                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.CLASS_NAME] = self.__class__.__name__  # type: ignore
+                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.ID] = id(self)  # type: ignore
             # add `patch_index` to the meta data
             for key, meta_key, meta_key_postfix in self.key_iterator(d, self.meta_keys, self.meta_key_postfix):
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in cropped:
                     cropped[meta_key] = {}  # type: ignore
-                cropped[meta_key][Key.PATCH_INDEX] = i
+                cropped[meta_key][Key.PATCH_INDEX] = i  # type: ignore
             ret.append(cropped)
         return ret
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = deepcopy(dict(data))
         # We changed the transform name from RandSpatialCropd to RandSpatialCropSamplesd
         # Need to revert that since we're calling RandSpatialCropd's inverse
@@ -789,6 +809,8 @@ class CropForegroundd(MapTransform, InvertibleTransform):
     channels. And it can also add margin to every dim of the bounding box of foreground object.
     """
 
+    backend = CropForeground.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -797,7 +819,7 @@ def __init__(
         channel_indices: Optional[IndexSelection] = None,
         margin: Union[Sequence[int], int] = 0,
         k_divisible: Union[Sequence[int], int] = 1,
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = NumpyPadMode.CONSTANT,
         start_coord_key: str = "foreground_start_coord",
         end_coord_key: str = "foreground_end_coord",
         allow_missing_keys: bool = False,
@@ -814,10 +836,12 @@ def __init__(
             margin: add margin value to spatial dims of the bounding box, if only 1 value provided, use it for all dims.
             k_divisible: make each spatial dimension to be divisible by k, default to 1.
                 if `k_divisible` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: padding mode {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                one of the listed string values or a user supplied function. Defaults to ``"constant"``.
-                see also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 it also can be a sequence of string, each element corresponds to a key in ``keys``.
             start_coord_key: key to record the start coordinate of spatial bounding box for foreground.
             end_coord_key: key to record the end coordinate of spatial bounding box for foreground.
@@ -831,15 +855,11 @@ def __init__(
         self.start_coord_key = start_coord_key
         self.end_coord_key = end_coord_key
         self.cropper = CropForeground(
-            select_fn=select_fn,
-            channel_indices=channel_indices,
-            margin=margin,
-            k_divisible=k_divisible,
-            **np_kwargs,
+            select_fn=select_fn, channel_indices=channel_indices, margin=margin, k_divisible=k_divisible, **np_kwargs
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         box_start, box_end = self.cropper.compute_bounding_box(img=d[self.source_key])
         d[self.start_coord_key] = box_start
@@ -849,7 +869,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             d[key] = self.cropper.crop_pad(img=d[key], box_start=box_start, box_end=box_end, mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -906,6 +926,8 @@ class RandWeightedCropd(Randomizable, MapTransform, InvertibleTransform):
         :py:class:`monai.transforms.RandWeightedCrop`
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -928,18 +950,18 @@ def __init__(
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
         self.centers: List[np.ndarray] = []
 
-    def randomize(self, weight_map: np.ndarray) -> None:
+    def randomize(self, weight_map: NdarrayOrTensor) -> None:
         self.centers = weighted_patch_samples(
             spatial_size=self.spatial_size, w=weight_map[0], n_samples=self.num_samples, r_state=self.R
         )
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         d = dict(data)
         self.randomize(d[self.w_key])
         _spatial_size = fall_back_tuple(self.spatial_size, d[self.w_key].shape[1:])
 
         # initialize returned list with shallow copy to preserve key ordering
-        results: List[Dict[Hashable, np.ndarray]] = [dict(data) for _ in range(self.num_samples)]
+        results: List[Dict[Hashable, NdarrayOrTensor]] = [dict(data) for _ in range(self.num_samples)]
         # fill in the extra keys with unmodified data
         for i in range(self.num_samples):
             for key in set(data.keys()).difference(set(self.keys)):
@@ -965,11 +987,11 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in results[i]:
                     results[i][meta_key] = {}  # type: ignore
-                results[i][meta_key][Key.PATCH_INDEX] = i
+                results[i][meta_key][Key.PATCH_INDEX] = i  # type: ignore
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -977,7 +999,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             orig_size = np.asarray(transform[InverseKeys.ORIG_SIZE])
             current_size = np.asarray(d[key].shape[1:])
             center = transform[InverseKeys.EXTRA_INFO]["center"]
-            cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
+            cropper = SpatialCrop(roi_center=center, roi_size=self.spatial_size)
             # get required pad to start and end
             pad_to_start = np.array([s.indices(o)[0] for s, o in zip(cropper.slices, orig_size)])
             pad_to_end = orig_size - current_size - pad_to_start
@@ -1040,6 +1062,9 @@ class RandCropByPosNegLabeld(Randomizable, MapTransform, InvertibleTransform):
         meta_key_postfix: if meta_keys is None, use `key_{postfix}` to to fetch the meta data according
             to the key data, default is `meta_dict`, the meta data is a dictionary object.
             used to add `patch_index` to the meta dict.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
         allow_missing_keys: don't raise exception if key is missing.
 
     Raises:
@@ -1048,6 +1073,8 @@ class RandCropByPosNegLabeld(Randomizable, MapTransform, InvertibleTransform):
 
     """
 
+    backend = RandCropByPosNegLabel.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1062,6 +1089,7 @@ def __init__(
         bg_indices_key: Optional[str] = None,
         meta_keys: Optional[KeysCollection] = None,
         meta_key_postfix: str = "meta_dict",
+        allow_smaller: bool = False,
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
@@ -1081,14 +1109,15 @@ def __init__(
         if len(self.keys) != len(self.meta_keys):
             raise ValueError("meta_keys should have the same length as keys.")
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
+        self.allow_smaller = allow_smaller
 
     def randomize(
         self,
-        label: np.ndarray,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
         if fg_indices is None or bg_indices is None:
@@ -1097,10 +1126,17 @@ def randomize(
             fg_indices_ = fg_indices
             bg_indices_ = bg_indices
         self.centers = generate_pos_neg_label_crop_centers(
-            self.spatial_size, self.num_samples, self.pos_ratio, label.shape[1:], fg_indices_, bg_indices_, self.R
+            self.spatial_size,
+            self.num_samples,
+            self.pos_ratio,
+            label.shape[1:],
+            fg_indices_,
+            bg_indices_,
+            self.R,
+            self.allow_smaller,
         )
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         d = dict(data)
         label = d[self.label_key]
         image = d[self.image_key] if self.image_key else None
@@ -1114,7 +1150,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             raise ValueError("no available ROI centers to crop.")
 
         # initialize returned list with shallow copy to preserve key ordering
-        results: List[Dict[Hashable, np.ndarray]] = [dict(d) for _ in range(self.num_samples)]
+        results: List[Dict[Hashable, NdarrayOrTensor]] = [dict(d) for _ in range(self.num_samples)]
 
         for i, center in enumerate(self.centers):
             # fill in the extra keys with unmodified data
@@ -1122,7 +1158,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
                 results[i][key] = deepcopy(d[key])
             for key in self.key_iterator(d):
                 img = d[key]
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 orig_size = img.shape[1:]
                 results[i][key] = cropper(img)
                 self.push_transform(results[i], key, extra_info={"center": center}, orig_size=orig_size)
@@ -1131,11 +1167,11 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in results[i]:
                     results[i][meta_key] = {}  # type: ignore
-                results[i][meta_key][Key.PATCH_INDEX] = i
+                results[i][meta_key][Key.PATCH_INDEX] = i  # type: ignore
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1230,10 +1266,15 @@ class RandCropByLabelClassesd(Randomizable, MapTransform, InvertibleTransform):
         meta_key_postfix: if meta_keys is None, use `key_{postfix}` to to fetch the meta data according
             to the key data, default is `meta_dict`, the meta data is a dictionary object.
             used to add `patch_index` to the meta dict.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will remain
+            unchanged.
         allow_missing_keys: don't raise exception if key is missing.
 
     """
 
+    backend = RandCropByLabelClasses.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1247,6 +1288,7 @@ def __init__(
         indices_key: Optional[str] = None,
         meta_keys: Optional[KeysCollection] = None,
         meta_key_postfix: str = "meta_dict",
+        allow_smaller: bool = False,
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
@@ -1262,25 +1304,25 @@ def __init__(
         if len(self.keys) != len(self.meta_keys):
             raise ValueError("meta_keys should have the same length as keys.")
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
+        self.allow_smaller = allow_smaller
 
     def randomize(
         self,
-        label: np.ndarray,
-        indices: Optional[List[np.ndarray]] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
-        indices_: List[np.ndarray]
         if indices is None:
             indices_ = map_classes_to_indices(label, self.num_classes, image, self.image_threshold)
         else:
             indices_ = indices
         self.centers = generate_label_classes_crop_centers(
-            self.spatial_size, self.num_samples, label.shape[1:], indices_, self.ratios, self.R
+            self.spatial_size, self.num_samples, label.shape[1:], indices_, self.ratios, self.R, self.allow_smaller
         )
 
-    def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         d = dict(data)
         label = d[self.label_key]
         image = d[self.image_key] if self.image_key else None
@@ -1293,7 +1335,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
             raise ValueError("no available ROI centers to crop.")
 
         # initialize returned list with shallow copy to preserve key ordering
-        results: List[Dict[Hashable, np.ndarray]] = [dict(d) for _ in range(self.num_samples)]
+        results: List[Dict[Hashable, NdarrayOrTensor]] = [dict(d) for _ in range(self.num_samples)]
 
         for i, center in enumerate(self.centers):
             # fill in the extra keys with unmodified data
@@ -1301,7 +1343,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
                 results[i][key] = deepcopy(d[key])
             for key in self.key_iterator(d):
                 img = d[key]
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 orig_size = img.shape[1:]
                 results[i][key] = cropper(img)
                 self.push_transform(results[i], key, extra_info={"center": center}, orig_size=orig_size)
@@ -1310,11 +1352,11 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in results[i]:
                     results[i][meta_key] = {}  # type: ignore
-                results[i][meta_key][Key.PATCH_INDEX] = i
+                results[i][meta_key][Key.PATCH_INDEX] = i  # type: ignore
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1359,6 +1401,8 @@ class ResizeWithPadOrCropd(MapTransform, InvertibleTransform):
 
     """
 
+    backend = ResizeWithPadOrCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1372,22 +1416,15 @@ def __init__(
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padcropper = ResizeWithPadOrCrop(spatial_size=spatial_size, method=method, **np_kwargs)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             orig_size = d[key].shape[1:]
             d[key] = self.padcropper(d[key], mode=m)
-            self.push_transform(
-                d,
-                key,
-                orig_size=orig_size,
-                extra_info={
-                    "mode": m.value if isinstance(m, Enum) else m,
-                },
-            )
+            self.push_transform(d, key, orig_size=orig_size, extra_info={"mode": m.value if isinstance(m, Enum) else m})
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
index 20d306be04..6aa45a9f1d 100644
--- a/monai/transforms/intensity/array.py
+++ b/monai/transforms/intensity/array.py
@@ -13,7 +13,7 @@
 https://github.com/Project-MONAI/MONAI/wiki/MONAI_Design
 """
 
-import copy
+from abc import abstractmethod
 from collections.abc import Iterable
 from functools import partial
 from typing import Any, Callable, List, Optional, Sequence, Tuple, Union
@@ -28,6 +28,7 @@
 from monai.networks.layers import GaussianFilter, HilbertTransform, SavitzkyGolayFilter
 from monai.transforms.transform import RandomizableTransform, Transform
 from monai.transforms.utils import Fourier, equalize_hist, is_positive, rescale_array
+from monai.transforms.utils_pytorch_numpy_unification import clip, percentile, where
 from monai.utils import (
     PT_BEFORE_1_7,
     InvalidPyTorchVersionError,
@@ -38,6 +39,7 @@
     ensure_tuple_size,
     fall_back_tuple,
 )
+from monai.utils.deprecate_utils import deprecated_arg
 from monai.utils.enums import TransformBackends
 from monai.utils.type_conversion import convert_to_tensor, get_equivalent_dtype
 
@@ -69,9 +71,10 @@
     "RandGibbsNoise",
     "KSpaceSpikeNoise",
     "RandKSpaceSpikeNoise",
+    "RandCoarseTransform",
     "RandCoarseDropout",
+    "RandCoarseShuffle",
     "HistogramNormalize",
-    "LocalPatchShuffling",
 ]
 
 
@@ -83,30 +86,37 @@ class RandGaussianNoise(RandomizableTransform):
         prob: Probability to add Gaussian noise.
         mean: Mean or “centre” of the distribution.
         std: Standard deviation (spread) of distribution.
+
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __init__(self, prob: float = 0.1, mean: Union[Sequence[float], float] = 0.0, std: float = 0.1) -> None:
+    def __init__(self, prob: float = 0.1, mean: float = 0.0, std: float = 0.1) -> None:
         RandomizableTransform.__init__(self, prob)
         self.mean = mean
         self.std = std
-        self._noise: np.ndarray
+        self.noise: Optional[np.ndarray] = None
 
-    def randomize(self, im_shape: Sequence[int]) -> None:
+    def randomize(self, img: NdarrayOrTensor, mean: Optional[float] = None) -> None:
         super().randomize(None)
-        self._noise = self.R.normal(self.mean, self.R.uniform(0, self.std), size=im_shape)
+        if not self._do_transform:
+            return None
+        rand_std = self.R.uniform(0, self.std)
+        self.noise = self.R.normal(self.mean if mean is None else mean, rand_std, size=img.shape)
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, mean: Optional[float] = None, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize(img.shape)
-        if self._noise is None:
-            raise RuntimeError("randomized factor should not be None.")
+        if randomize:
+            self.randomize(img=img, mean=self.mean if mean is None else mean)
+
         if not self._do_transform:
             return img
-        noise, *_ = convert_to_dst_type(self._noise, img)
+
+        if self.noise is None:
+            raise RuntimeError("please call the `randomize()` function first.")
+        noise, *_ = convert_to_dst_type(self.noise, img)
         return img + noise
 
 
@@ -114,8 +124,8 @@ class RandRicianNoise(RandomizableTransform):
     """
     Add Rician noise to image.
     Rician noise in MRI is the result of performing a magnitude operation on complex
-    data with Gaussian noise of the same variance in both channels, as described in `Noise in Magnitude Magnetic Resonance Images
-    <https://doi.org/10.1002/cmr.a.20124>`_. This transform is adapted from
+    data with Gaussian noise of the same variance in both channels, as described in `Noise in Magnitude
+    Magnetic Resonance Images <https://doi.org/10.1002/cmr.a.20124>`_. This transform is adapted from
     `DIPY<https://github.com/dipy/dipy>`_. See also: `The rician distribution of noisy mri data
     <https://doi.org/10.1002/mrm.1910340618>`_.
 
@@ -167,13 +177,16 @@ def _add_noise(self, img: NdarrayTensor, mean: float, std: float):
 
         return np.sqrt((img + self._noise1) ** 2 + self._noise2 ** 2)
 
-    def __call__(self, img: NdarrayTensor) -> NdarrayTensor:
+    def __call__(self, img: NdarrayTensor, randomize: bool = True) -> NdarrayTensor:
         """
         Apply the transform to `img`.
         """
-        super().randomize(None)
+        if randomize:
+            super().randomize(None)
+
         if not self._do_transform:
             return img
+
         if self.channel_wise:
             _mean = ensure_tuple_rep(self.mean, len(img))
             _std = ensure_tuple_rep(self.std, len(img))
@@ -211,9 +224,9 @@ def __call__(self, img: NdarrayOrTensor, offset: Optional[float] = None) -> Ndar
 
         offset = self.offset if offset is None else offset
         out = img + offset
-        if isinstance(out, torch.Tensor):
-            return out.type(img.dtype)
-        return out.astype(img.dtype)  # type: ignore
+        out, *_ = convert_data_type(data=out, dtype=img.dtype)
+
+        return out
 
 
 class RandShiftIntensity(RandomizableTransform):
@@ -241,10 +254,12 @@ def __init__(self, offsets: Union[Tuple[float, float], float], prob: float = 0.1
         self._shfiter = ShiftIntensity(self._offset)
 
     def randomize(self, data: Optional[Any] = None) -> None:
-        self._offset = self.R.uniform(low=self.offsets[0], high=self.offsets[1])
         super().randomize(None)
+        if not self._do_transform:
+            return None
+        self._offset = self.R.uniform(low=self.offsets[0], high=self.offsets[1])
 
-    def __call__(self, img: NdarrayOrTensor, factor: Optional[float] = None) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, factor: Optional[float] = None, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
 
@@ -254,9 +269,12 @@ def __call__(self, img: NdarrayOrTensor, factor: Optional[float] = None) -> Ndar
                 can be some image specific value at runtime, like: max(img), etc.
 
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
+
         return self._shfiter(img, self._offset if factor is None else self._offset * factor)
 
 
@@ -353,20 +371,25 @@ def __init__(
         self.dtype = dtype
 
     def randomize(self, data: Optional[Any] = None) -> None:
-        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
         super().randomize(None)
+        if not self._do_transform:
+            return None
+        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
+
         shifter = StdShiftIntensity(
             factor=self.factor, nonzero=self.nonzero, channel_wise=self.channel_wise, dtype=self.dtype
         )
-        return shifter(img)
+        return shifter(img=img)
 
 
 class ScaleIntensity(Transform):
@@ -378,7 +401,12 @@ class ScaleIntensity(Transform):
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
     def __init__(
-        self, minv: Optional[float] = 0.0, maxv: Optional[float] = 1.0, factor: Optional[float] = None
+        self,
+        minv: Optional[float] = 0.0,
+        maxv: Optional[float] = 1.0,
+        factor: Optional[float] = None,
+        channel_wise: bool = False,
+        dtype: DtypeLike = np.float32,
     ) -> None:
         """
         Args:
@@ -386,10 +414,15 @@ def __init__(
             maxv: maximum value of output data.
             factor: factor scale by ``v = v * (1 + factor)``. In order to use
                 this parameter, please set `minv` and `maxv` into None.
+            channel_wise: if True, scale on each channel separately. Please ensure
+                that the first dimension represents the channel of the image if True.
+            dtype: output data type, defaults to float32.
         """
         self.minv = minv
         self.maxv = maxv
         self.factor = factor
+        self.channel_wise = channel_wise
+        self.dtype = dtype
 
     def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
@@ -400,11 +433,14 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
 
         """
         if self.minv is not None and self.maxv is not None:
-            return rescale_array(img, self.minv, self.maxv, img.dtype)
+            if self.channel_wise:
+                out = [rescale_array(d, self.minv, self.maxv, dtype=self.dtype) for d in img]
+                return torch.stack(out) if isinstance(img, torch.Tensor) else np.stack(out)  # type: ignore
+            return rescale_array(img, self.minv, self.maxv, dtype=self.dtype)
         if self.factor is not None:
-            out = img * (1 + self.factor)
-            out, *_ = convert_data_type(out, dtype=img.dtype)
-            return out
+            ret = img * (1 + self.factor)
+            ret, *_ = convert_data_type(ret, dtype=self.dtype)
+            return ret
         raise ValueError("Incompatible values: minv=None or maxv=None and factor=None.")
 
 
@@ -416,12 +452,15 @@ class RandScaleIntensity(RandomizableTransform):
 
     backend = ScaleIntensity.backend
 
-    def __init__(self, factors: Union[Tuple[float, float], float], prob: float = 0.1) -> None:
+    def __init__(
+        self, factors: Union[Tuple[float, float], float], prob: float = 0.1, dtype: DtypeLike = np.float32
+    ) -> None:
         """
         Args:
             factors: factor range to randomly scale by ``v = v * (1 + factor)``.
                 if single number, factor value is picked from (-factors, factors).
             prob: probability of scale.
+            dtype: output data type, defaults to float32.
 
         """
         RandomizableTransform.__init__(self, prob)
@@ -432,20 +471,25 @@ def __init__(self, factors: Union[Tuple[float, float], float], prob: float = 0.1
         else:
             self.factors = (min(factors), max(factors))
         self.factor = self.factors[0]
+        self.dtype = dtype
 
     def randomize(self, data: Optional[Any] = None) -> None:
-        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
         super().randomize(None)
+        if not self._do_transform:
+            return None
+        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
-        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor)
-        return scaler(img)
+
+        return ScaleIntensity(minv=None, maxv=None, factor=self.factor, dtype=self.dtype)(img)
 
 
 class RandBiasField(RandomizableTransform):
@@ -468,12 +512,14 @@ class RandBiasField(RandomizableTransform):
 
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         degree: int = 3,
         coeff_range: Tuple[float, float] = (0.0, 0.1),
         dtype: DtypeLike = np.float32,
-        prob: float = 1.0,
+        prob: float = 0.1,
     ) -> None:
         RandomizableTransform.__init__(self, prob)
         if degree < 1:
@@ -507,18 +553,23 @@ def _generate_random_field(self, spatial_shape: Sequence[int], degree: int, coef
             return np.polynomial.legendre.leggrid3d(coords[0], coords[1], coords[2], coeff_mat)
         raise NotImplementedError("only supports 2D or 3D fields")
 
-    def randomize(self, data: np.ndarray) -> None:
+    def randomize(self, img_size: Sequence[int]) -> None:
         super().randomize(None)
-        n_coeff = int(np.prod([(self.degree + k) / k for k in range(1, len(data.shape[1:]) + 1)]))
+        if not self._do_transform:
+            return None
+        n_coeff = int(np.prod([(self.degree + k) / k for k in range(1, len(img_size) + 1)]))
         self._coeff = self.R.uniform(*self.coeff_range, n_coeff).tolist()
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize(data=img)
+        if randomize:
+            self.randomize(img_size=img.shape[1:])
+
         if not self._do_transform:
             return img
+
         num_channels, *spatial_shape = img.shape
         _bias_fields = np.stack(
             [
@@ -527,7 +578,10 @@ def __call__(self, img: np.ndarray):
             ],
             axis=0,
         )
-        return (img * np.exp(_bias_fields)).astype(self.dtype)
+        img_np, *_ = convert_data_type(img, np.ndarray)
+        out = img_np * np.exp(_bias_fields)
+        out, *_ = convert_to_dst_type(src=out, dst=img, dtype=self.dtype)
+        return out
 
 
 class NormalizeIntensity(Transform):
@@ -641,6 +695,8 @@ class ThresholdIntensity(Transform):
         cval: value to fill the remaining parts of the image, default is 0.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, threshold: float, above: bool = True, cval: float = 0.0) -> None:
         if not isinstance(threshold, (int, float)):
             raise ValueError("threshold must be a float or int number.")
@@ -648,13 +704,14 @@ def __init__(self, threshold: float, above: bool = True, cval: float = 0.0) -> N
         self.above = above
         self.cval = cval
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        return np.asarray(
-            np.where(img > self.threshold if self.above else img < self.threshold, img, self.cval), dtype=img.dtype
-        )
+        mask = img > self.threshold if self.above else img < self.threshold
+        res = where(mask, img, self.cval)
+        res, *_ = convert_data_type(res, dtype=img.dtype)
+        return res
 
 
 class ScaleIntensityRange(Transform):
@@ -670,6 +727,8 @@ class ScaleIntensityRange(Transform):
         clip: whether to perform clip after scaling.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, a_min: float, a_max: float, b_min: float, b_max: float, clip: bool = False) -> None:
         self.a_min = a_min
         self.a_max = a_max
@@ -677,7 +736,7 @@ def __init__(self, a_min: float, a_max: float, b_min: float, b_max: float, clip:
         self.b_max = b_max
         self.clip = clip
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
@@ -688,7 +747,7 @@ def __call__(self, img: np.ndarray):
         img = (img - self.a_min) / (self.a_max - self.a_min)
         img = img * (self.b_max - self.b_min) + self.b_min
         if self.clip:
-            img = np.asarray(np.clip(img, self.b_min, self.b_max))
+            img = clip(img, self.b_min, self.b_max)
         return img
 
 
@@ -702,19 +761,22 @@ class AdjustContrast(Transform):
         gamma: gamma value to adjust the contrast as function.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, gamma: float) -> None:
         if not isinstance(gamma, (int, float)):
             raise ValueError("gamma must be a float or int number.")
         self.gamma = gamma
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
         epsilon = 1e-7
         img_min = img.min()
         img_range = img.max() - img_min
-        return np.power(((img - img_min) / float(img_range + epsilon)), self.gamma) * img_range + img_min
+        ret: NdarrayOrTensor = ((img - img_min) / float(img_range + epsilon)) ** self.gamma * img_range + img_min
+        return ret
 
 
 class RandAdjustContrast(RandomizableTransform):
@@ -729,6 +791,8 @@ class RandAdjustContrast(RandomizableTransform):
             If single number, value is picked from (0.5, gamma), default is (0.5, 4.5).
     """
 
+    backend = AdjustContrast.backend
+
     def __init__(self, prob: float = 0.1, gamma: Union[Sequence[float], float] = (0.5, 4.5)) -> None:
         RandomizableTransform.__init__(self, prob)
 
@@ -743,23 +807,27 @@ def __init__(self, prob: float = 0.1, gamma: Union[Sequence[float], float] = (0.
         else:
             self.gamma = (min(gamma), max(gamma))
 
-        self.gamma_value: float
+        self.gamma_value: Optional[float] = None
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.gamma_value = self.R.uniform(low=self.gamma[0], high=self.gamma[1])
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize()
-        if self.gamma_value is None:
-            raise ValueError("gamma_value is not set.")
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
-        adjuster = AdjustContrast(self.gamma_value)
-        return adjuster(img)
+
+        if self.gamma_value is None:
+            raise RuntimeError("gamma_value is not set, please call `randomize` function first.")
+        return AdjustContrast(self.gamma_value)(img)
 
 
 class ScaleIntensityRangePercentiles(Transform):
@@ -817,6 +885,8 @@ class ScaleIntensityRangePercentiles(Transform):
         relative: whether to scale to the corresponding percentiles of [b_min, b_max].
     """
 
+    backend = ScaleIntensityRange.backend
+
     def __init__(
         self, lower: float, upper: float, b_min: float, b_max: float, clip: bool = False, relative: bool = False
     ) -> None:
@@ -831,12 +901,12 @@ def __init__(
         self.clip = clip
         self.relative = relative
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        a_min = np.percentile(img, self.lower)
-        a_max = np.percentile(img, self.upper)
+        a_min: float = percentile(img, self.lower)  # type: ignore
+        a_max: float = percentile(img, self.upper)  # type: ignore
         b_min = self.b_min
         b_max = self.b_max
 
@@ -848,7 +918,7 @@ def __call__(self, img: np.ndarray):
         img = scalar(img)
 
         if self.clip:
-            img = np.asarray(np.clip(img, self.b_min, self.b_max))
+            img = clip(img, self.b_min, self.b_max)
 
         return img
 
@@ -871,11 +941,13 @@ class MaskIntensity(Transform):
 
     """
 
-    def __init__(self, mask_data: Optional[np.ndarray] = None, select_fn: Callable = is_positive) -> None:
+    backend = [TransformBackends.NUMPY]
+
+    def __init__(self, mask_data: Optional[NdarrayOrTensor] = None, select_fn: Callable = is_positive) -> None:
         self.mask_data = mask_data
         self.select_fn = select_fn
 
-    def __call__(self, img: np.ndarray, mask_data: Optional[np.ndarray] = None) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, mask_data: Optional[NdarrayOrTensor] = None) -> NdarrayOrTensor:
         """
         Args:
             mask_data: if mask data is single channel, apply to every channel
@@ -892,14 +964,16 @@ def __call__(self, img: np.ndarray, mask_data: Optional[np.ndarray] = None) -> n
         if mask_data is None:
             raise ValueError("must provide the mask_data when initializing the transform or at runtime.")
 
-        mask_data = np.asarray(self.select_fn(mask_data))
-        if mask_data.shape[0] != 1 and mask_data.shape[0] != img.shape[0]:
+        mask_data_, *_ = convert_to_dst_type(src=mask_data, dst=img)
+
+        mask_data_ = self.select_fn(mask_data_)
+        if mask_data_.shape[0] != 1 and mask_data_.shape[0] != img.shape[0]:
             raise ValueError(
                 "When mask_data is not single channel, mask_data channels must match img, "
-                f"got img channels={img.shape[0]} mask_data channels={mask_data.shape[0]}."
+                f"got img channels={img.shape[0]} mask_data channels={mask_data_.shape[0]}."
             )
 
-        return np.asarray(img * mask_data)
+        return img * mask_data_
 
 
 class SavitzkyGolaySmooth(Transform):
@@ -914,7 +988,7 @@ class SavitzkyGolaySmooth(Transform):
             or ``'circular'``. Default: ``'zeros'``. See ``torch.nn.Conv1d()`` for more information.
     """
 
-    backend = [TransformBackends.NUMPY]
+    backend = [TransformBackends.TORCH]
 
     def __init__(self, window_length: int, order: int, axis: int = 1, mode: str = "zeros"):
 
@@ -927,7 +1001,7 @@ def __init__(self, window_length: int, order: int, axis: int = 1, mode: str = "z
         self.mode = mode
         self.img_t: torch.Tensor = torch.tensor(0.0)
 
-    def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: array containing input data. Must be real and in shape [channels, spatial1, spatial2, ...].
@@ -941,7 +1015,9 @@ def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
         # add one to transform axis because a batch axis will be added at dimension 0
         savgol_filter = SavitzkyGolayFilter(self.window_length, self.order, self.axis + 1, self.mode)
         # convert to Tensor and add Batch axis expected by HilbertTransform
-        out: torch.Tensor = savgol_filter(self.img_t.unsqueeze(0)).squeeze(0)
+        smoothed = savgol_filter(self.img_t.unsqueeze(0)).squeeze(0)
+        out, *_ = convert_to_dst_type(smoothed, dst=img)
+
         return out
 
 
@@ -957,6 +1033,8 @@ class DetectEnvelope(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, axis: int = 1, n: Union[int, None] = None) -> None:
 
         if PT_BEFORE_1_7:
@@ -968,7 +1046,7 @@ def __init__(self, axis: int = 1, n: Union[int, None] = None) -> None:
         self.axis = axis
         self.n = n
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor):
         """
 
         Args:
@@ -978,11 +1056,15 @@ def __call__(self, img: np.ndarray):
             np.ndarray containing envelope of data in img along the specified axis.
 
         """
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor)  # type: ignore
         # add one to transform axis because a batch axis will be added at dimension 0
         hilbert_transform = HilbertTransform(self.axis + 1, self.n)
         # convert to Tensor and add Batch axis expected by HilbertTransform
-        input_data = torch.as_tensor(np.ascontiguousarray(img)).unsqueeze(0)
-        return np.abs(hilbert_transform(input_data).squeeze(0).numpy())
+        out = hilbert_transform(img_t.unsqueeze(0)).squeeze(0).abs()
+        out, *_ = convert_to_dst_type(src=out, dst=img)
+
+        return out
 
 
 class GaussianSmooth(Transform):
@@ -999,14 +1081,25 @@ class GaussianSmooth(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, sigma: Union[Sequence[float], float] = 1.0, approx: str = "erf") -> None:
         self.sigma = sigma
         self.approx = approx
 
-    def __call__(self, img: np.ndarray):
-        gaussian_filter = GaussianFilter(img.ndim - 1, self.sigma, approx=self.approx)
-        input_data = torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0)
-        return gaussian_filter(input_data).squeeze(0).detach().numpy()
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float)  # type: ignore
+        sigma: Union[Sequence[torch.Tensor], torch.Tensor]
+        if isinstance(self.sigma, Sequence):
+            sigma = [torch.as_tensor(s, device=img_t.device) for s in self.sigma]
+        else:
+            sigma = torch.as_tensor(self.sigma, device=img_t.device)
+        gaussian_filter = GaussianFilter(img_t.ndim - 1, sigma, approx=self.approx)
+        out_t: torch.Tensor = gaussian_filter(img_t.unsqueeze(0)).squeeze(0)
+        out, *_ = convert_data_type(out_t, type(img), device=img.device if isinstance(img, torch.Tensor) else None)
+
+        return out
 
 
 class RandGaussianSmooth(RandomizableTransform):
@@ -1023,6 +1116,8 @@ class RandGaussianSmooth(RandomizableTransform):
 
     """
 
+    backend = GaussianSmooth.backend
+
     def __init__(
         self,
         sigma_x: Tuple[float, float] = (0.25, 1.5),
@@ -1043,14 +1138,19 @@ def __init__(
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.x = self.R.uniform(low=self.sigma_x[0], high=self.sigma_x[1])
         self.y = self.R.uniform(low=self.sigma_y[0], high=self.sigma_y[1])
         self.z = self.R.uniform(low=self.sigma_z[0], high=self.sigma_z[1])
 
-    def __call__(self, img: np.ndarray):
-        self.randomize()
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
+
         sigma = ensure_tuple_size(tup=(self.x, self.y, self.z), dim=img.ndim - 1)
         return GaussianSmooth(sigma=sigma, approx=self.approx)(img)
 
@@ -1082,6 +1182,8 @@ class GaussianSharpen(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         sigma1: Union[Sequence[float], float] = 3.0,
@@ -1094,13 +1196,19 @@ def __init__(
         self.alpha = alpha
         self.approx = approx
 
-    def __call__(self, img: np.ndarray):
-        gaussian_filter1 = GaussianFilter(img.ndim - 1, self.sigma1, approx=self.approx)
-        gaussian_filter2 = GaussianFilter(img.ndim - 1, self.sigma2, approx=self.approx)
-        input_data = torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0)
-        blurred_f = gaussian_filter1(input_data)
-        filter_blurred_f = gaussian_filter2(blurred_f)
-        return (blurred_f + self.alpha * (blurred_f - filter_blurred_f)).squeeze(0).detach().numpy()
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float32)  # type: ignore
+
+        gf1, gf2 = (
+            GaussianFilter(img_t.ndim - 1, sigma, approx=self.approx).to(img_t.device)
+            for sigma in (self.sigma1, self.sigma2)
+        )
+        blurred_f = gf1(img_t.unsqueeze(0))
+        filter_blurred_f = gf2(blurred_f)
+        out_t: torch.Tensor = (blurred_f + self.alpha * (blurred_f - filter_blurred_f)).squeeze(0)
+        out, *_ = convert_data_type(out_t, type(img), device=img.device if isinstance(img, torch.Tensor) else None)
+        return out
 
 
 class RandGaussianSharpen(RandomizableTransform):
@@ -1125,6 +1233,8 @@ class RandGaussianSharpen(RandomizableTransform):
 
     """
 
+    backend = GaussianSharpen.backend
+
     def __init__(
         self,
         sigma1_x: Tuple[float, float] = (0.5, 1.0),
@@ -1146,9 +1256,18 @@ def __init__(
         self.sigma2_z = sigma2_z
         self.alpha = alpha
         self.approx = approx
+        self.x1: Optional[float] = None
+        self.y1: Optional[float] = None
+        self.z1: Optional[float] = None
+        self.x2: Optional[float] = None
+        self.y2: Optional[float] = None
+        self.z2: Optional[float] = None
+        self.a: Optional[float] = None
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.x1 = self.R.uniform(low=self.sigma1_x[0], high=self.sigma1_x[1])
         self.y1 = self.R.uniform(low=self.sigma1_y[0], high=self.sigma1_y[1])
         self.z1 = self.R.uniform(low=self.sigma1_z[0], high=self.sigma1_z[1])
@@ -1160,10 +1279,15 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.z2 = self.R.uniform(low=sigma2_z[0], high=sigma2_z[1])
         self.a = self.R.uniform(low=self.alpha[0], high=self.alpha[1])
 
-    def __call__(self, img: np.ndarray):
-        self.randomize()
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
+
+        if self.x2 is None or self.y2 is None or self.z2 is None or self.a is None:
+            raise RuntimeError("please call the `randomize()` function first.")
         sigma1 = ensure_tuple_size(tup=(self.x1, self.y1, self.z1), dim=img.ndim - 1)
         sigma2 = ensure_tuple_size(tup=(self.x2, self.y2, self.z2), dim=img.ndim - 1)
         return GaussianSharpen(sigma1=sigma1, sigma2=sigma2, alpha=self.a, approx=self.approx)(img)
@@ -1180,6 +1304,8 @@ class RandHistogramShift(RandomizableTransform):
         prob: probability of histogram shift.
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(self, num_control_points: Union[Tuple[int, int], int] = 10, prob: float = 0.1) -> None:
         RandomizableTransform.__init__(self, prob)
 
@@ -1193,9 +1319,13 @@ def __init__(self, num_control_points: Union[Tuple[int, int], int] = 10, prob: f
             if min(num_control_points) <= 2:
                 raise ValueError("num_control_points should be greater than or equal to 3")
             self.num_control_points = (min(num_control_points), max(num_control_points))
+        self.reference_control_points: np.ndarray
+        self.floating_control_points: np.ndarray
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         num_control_point = self.R.randint(self.num_control_points[0], self.num_control_points[1] + 1)
         self.reference_control_points = np.linspace(0, 1, num_control_point)
         self.floating_control_points = np.copy(self.reference_control_points)
@@ -1204,79 +1334,26 @@ def randomize(self, data: Optional[Any] = None) -> None:
                 self.floating_control_points[i - 1], self.floating_control_points[i + 1]
             )
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
-        self.randomize()
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
-        img_min, img_max = img.min(), img.max()
+
+        if self.reference_control_points is None or self.floating_control_points is None:
+            raise RuntimeError("please call the `randomize()` function first.")
+        img_np: np.ndarray
+        img_np, *_ = convert_data_type(img, np.ndarray)  # type: ignore
+        img_min, img_max = img_np.min(), img_np.max()
         reference_control_points_scaled = self.reference_control_points * (img_max - img_min) + img_min
         floating_control_points_scaled = self.floating_control_points * (img_max - img_min) + img_min
-        return np.asarray(
-            np.interp(img, reference_control_points_scaled, floating_control_points_scaled), dtype=img.dtype
+        img_np = np.asarray(
+            np.interp(img_np, reference_control_points_scaled, floating_control_points_scaled), dtype=img_np.dtype
         )
-
-
-class RandGibbsNoise(RandomizableTransform):
-    """
-    Naturalistic image augmentation via Gibbs artifacts. The transform
-    randomly applies Gibbs noise to 2D/3D MRI images. Gibbs artifacts
-    are one of the common type of type artifacts appearing in MRI scans.
-
-    The transform is applied to all the channels in the data.
-
-    For general information on Gibbs artifacts, please refer to:
-    https://pubs.rsna.org/doi/full/10.1148/rg.313105115
-    https://pubs.rsna.org/doi/full/10.1148/radiographics.22.4.g02jl14949
-
-
-    Args:
-        prob (float): probability of applying the transform.
-        alpha (float, Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
-            values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-            If a length-2 list is given as [a,b] then the value of alpha will be
-            sampled uniformly from the interval [a,b]. 0 <= a <= b <= 1.
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
-    """
-
-    def __init__(self, prob: float = 0.1, alpha: Sequence[float] = (0.0, 1.0), as_tensor_output: bool = True) -> None:
-
-        if len(alpha) != 2:
-            raise ValueError("alpha length must be 2.")
-        if alpha[1] > 1 or alpha[0] < 0:
-            raise ValueError("alpha must take values in the interval [0,1]")
-        if alpha[0] > alpha[1]:
-            raise ValueError("When alpha = [a,b] we need a < b.")
-
-        self.alpha = alpha
-        self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
-        self.as_tensor_output = as_tensor_output
-
-        RandomizableTransform.__init__(self, prob=prob)
-
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
-
-        # randomize application and possibly alpha
-        self._randomize(None)
-
-        if self._do_transform:
-            # apply transform
-            transform = GibbsNoise(self.sampled_alpha, self.as_tensor_output)
-            img = transform(img)
-        else:
-            if isinstance(img, np.ndarray) and self.as_tensor_output:
-                img = torch.Tensor(img)
-            elif isinstance(img, torch.Tensor) and not self.as_tensor_output:
-                img = img.detach().cpu().numpy()
+        img, *_ = convert_to_dst_type(img_np, dst=img)
         return img
 
-    def _randomize(self, _: Any) -> None:
-        """
-        (1) Set random variable to apply the transform.
-        (2) Get alpha from uniform distribution.
-        """
-        super().randomize(None)
-        self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
-
 
 class GibbsNoise(Transform, Fourier):
     """
@@ -1296,21 +1373,20 @@ class GibbsNoise(Transform, Fourier):
     Args:
         alpha: Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-        as_tensor_output: if true return torch.Tensor, else return np.array. Default: True.
     """
 
-    def __init__(self, alpha: float = 0.5, as_tensor_output: bool = True) -> None:
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
+    def __init__(self, alpha: float = 0.1, as_tensor_output: bool = True) -> None:
 
         if alpha > 1 or alpha < 0:
-            raise ValueError("alpha must take values in the interval [0,1].")
+            raise ValueError("alpha must take values in the interval [0, 1].")
         self.alpha = alpha
-        self.as_tensor_output = as_tensor_output
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         n_dims = len(img.shape[1:])
 
-        if isinstance(img, np.ndarray):
-            img = torch.Tensor(img)
         # FT
         k = self.shift_fourier(img, n_dims)
         # build and apply mask
@@ -1318,13 +1394,13 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         # map back
         img = self.inv_shift_fourier(k, n_dims)
 
-        return img if self.as_tensor_output else img.cpu().detach().numpy()
+        return img
 
-    def _apply_mask(self, k: torch.Tensor) -> torch.Tensor:
+    def _apply_mask(self, k: NdarrayOrTensor) -> NdarrayOrTensor:
         """Builds and applies a mask on the spatial dimensions.
 
         Args:
-            k (np.ndarray): k-space version of the image.
+            k: k-space version of the image.
         Returns:
             masked version of the k-space image.
         """
@@ -1345,11 +1421,73 @@ def _apply_mask(self, k: torch.Tensor) -> torch.Tensor:
         # add channel dimension into mask
         mask = np.repeat(mask[None], k.shape[0], axis=0)
 
+        if isinstance(k, torch.Tensor):
+            mask, *_ = convert_data_type(mask, torch.Tensor, device=k.device)
+
         # apply binary mask
-        k_masked = k * torch.tensor(mask, device=k.device)
+        k_masked: NdarrayOrTensor
+        k_masked = k * mask
         return k_masked
 
 
+class RandGibbsNoise(RandomizableTransform):
+    """
+    Naturalistic image augmentation via Gibbs artifacts. The transform
+    randomly applies Gibbs noise to 2D/3D MRI images. Gibbs artifacts
+    are one of the common type of type artifacts appearing in MRI scans.
+
+    The transform is applied to all the channels in the data.
+
+    For general information on Gibbs artifacts, please refer to:
+    https://pubs.rsna.org/doi/full/10.1148/rg.313105115
+    https://pubs.rsna.org/doi/full/10.1148/radiographics.22.4.g02jl14949
+
+
+    Args:
+        prob (float): probability of applying the transform.
+        alpha (Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
+            values in the interval [0,1] with alpha = 0 acting as the identity mapping.
+            If a length-2 list is given as [a,b] then the value of alpha will be
+            sampled uniformly from the interval [a,b]. 0 <= a <= b <= 1.
+    """
+
+    backend = GibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
+    def __init__(self, prob: float = 0.1, alpha: Sequence[float] = (0.0, 1.0), as_tensor_output: bool = True) -> None:
+        if len(alpha) != 2:
+            raise ValueError("alpha length must be 2.")
+        if alpha[1] > 1 or alpha[0] < 0:
+            raise ValueError("alpha must take values in the interval [0, 1]")
+        if alpha[0] > alpha[1]:
+            raise ValueError("When alpha = [a,b] we need a < b.")
+
+        self.alpha = alpha
+        self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
+
+        RandomizableTransform.__init__(self, prob=prob)
+
+    def randomize(self, data: Any) -> None:
+        """
+        (1) Set random variable to apply the transform.
+        (2) Get alpha from uniform distribution.
+        """
+        super().randomize(None)
+        if not self._do_transform:
+            return None
+        self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
+
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True):
+        if randomize:
+            # randomize application and possibly alpha
+            self.randomize(None)
+
+        if not self._do_transform:
+            return img
+
+        return GibbsNoise(self.sampled_alpha)(img)
+
+
 class KSpaceSpikeNoise(Transform, Fourier):
     """
     Apply localized spikes in `k`-space at the given locations and intensities.
@@ -1377,8 +1515,6 @@ class KSpaceSpikeNoise(Transform, Fourier):
             receive a sequence of intensities. This value should be tested as it is
             data-dependent. The default values are the 2.5 the mean of the
             log-intensity for each channel.
-        as_tensor_output: if ``True`` return torch.Tensor, else return np.array.
-            Default: ``True``.
 
     Example:
         When working with 4D data, ``KSpaceSpikeNoise(loc = ((3,60,64,32), (64,60,32)), k_intensity = (13,14))``
@@ -1387,6 +1523,9 @@ class KSpaceSpikeNoise(Transform, Fourier):
         with `log-intensity = 14`.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         loc: Union[Tuple, Sequence[Tuple]],
@@ -1395,7 +1534,6 @@ def __init__(
     ):
 
         self.loc = ensure_tuple(loc)
-        self.as_tensor_output = as_tensor_output
         self.k_intensity = k_intensity
 
         # assert one-to-one relationship between factors and locations
@@ -1409,7 +1547,7 @@ def __init__(
         if isinstance(self.loc[0], Sequence) and k_intensity is not None and not isinstance(self.k_intensity, Sequence):
             raise ValueError("There must be one intensity_factor value for each tuple of indices in loc.")
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: image with dimensions (C, H, W) or (C, H, W, D)
@@ -1421,22 +1559,21 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
             raise RuntimeError("Image needs a channel direction.")
         if isinstance(self.loc[0], int) and len(img.shape) == 4 and len(self.loc) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
-        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(lambda x: len(x), self.loc)) == 2:
+        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(len, self.loc)) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
 
         n_dims = len(img.shape[1:])
 
-        if isinstance(img, np.ndarray):
-            img = torch.Tensor(img)
         # FT
         k = self.shift_fourier(img, n_dims)
-        log_abs = torch.log(torch.absolute(k) + 1e-10)
-        phase = torch.angle(k)
+        lib = np if isinstance(k, np.ndarray) else torch
+        log_abs = lib.log(lib.abs(k) + 1e-10)  # type: ignore
+        phase = lib.angle(k)  # type: ignore
 
         k_intensity = self.k_intensity
         # default log intensity
         if k_intensity is None:
-            k_intensity = tuple(torch.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5)
+            k_intensity = tuple(lib.mean(log_abs, axis=tuple(range(-n_dims, 0))) * 2.5)  # type: ignore
 
         # highlight
         if isinstance(self.loc[0], Sequence):
@@ -1445,10 +1582,10 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         else:
             self._set_spike(log_abs, self.loc, k_intensity)
         # map back
-        k = torch.exp(log_abs) * torch.exp(1j * phase)
-        img = self.inv_shift_fourier(k, n_dims)
+        k = lib.exp(log_abs) * lib.exp(1j * phase)  # type: ignore
+        img, *_ = convert_to_dst_type(self.inv_shift_fourier(k, n_dims), dst=img)
 
-        return img if self.as_tensor_output else img.cpu().detach().numpy()
+        return img
 
     def _check_indices(self, img) -> None:
         """Helper method to check consistency of self.loc and input image.
@@ -1468,7 +1605,7 @@ def _check_indices(self, img) -> None:
                     f"The index value at position {i} of one of the tuples in loc = {self.loc} is out of bounds for current image."
                 )
 
-    def _set_spike(self, k: torch.Tensor, idx: Tuple, val: Union[Sequence[float], float]):
+    def _set_spike(self, k: NdarrayOrTensor, idx: Tuple, val: Union[Sequence[float], float]):
         """
         Helper function to introduce a given intensity at given location.
 
@@ -1504,18 +1641,14 @@ class RandKSpaceSpikeNoise(RandomizableTransform, Fourier):
     Args:
         prob: probability of applying the transform, either on all
             channels at once, or channel-wise if ``channel_wise = True``.
-        intensity_range: pass a tuple
-            (a, b) to sample the log-intensity from the interval (a, b)
+        intensity_range: pass a tuple (a, b) to sample the log-intensity from the interval (a, b)
             uniformly for all channels. Or pass sequence of intevals
             ((a0, b0), (a1, b1), ...) to sample for each respective channel.
-            In the second case, the number of 2-tuples must match the number of
-            channels.
+            In the second case, the number of 2-tuples must match the number of channels.
             Default ranges is `(0.95x, 1.10x)` where `x` is the mean
             log-intensity for each channel.
         channel_wise: treat each channel independently. True by
             default.
-        as_tensor_output: if True return torch.Tensor, else
-            return np.array. default: True.
 
     Example:
         To apply `k`-space spikes randomly with probability 0.5, and
@@ -1524,17 +1657,19 @@ class RandKSpaceSpikeNoise(RandomizableTransform, Fourier):
         ``RandKSpaceSpikeNoise(prob=0.5, intensity_range=(11, 12), channel_wise=True)``
     """
 
+    backend = KSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         prob: float = 0.1,
         intensity_range: Optional[Sequence[Union[Sequence[float], float]]] = None,
-        channel_wise=True,
+        channel_wise: bool = True,
         as_tensor_output: bool = True,
     ):
 
         self.intensity_range = intensity_range
         self.channel_wise = channel_wise
-        self.as_tensor_output = as_tensor_output
         self.sampled_k_intensity: List = []
         self.sampled_locs: List[Tuple] = []
 
@@ -1543,13 +1678,14 @@ def __init__(
 
         super().__init__(prob)
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True):
         """
         Apply transform to `img`. Assumes data is in channel-first form.
 
         Args:
             img: image with dimensions (C, H, W) or (C, H, W, D)
         """
+
         if (
             self.intensity_range is not None
             and isinstance(self.intensity_range[0], Sequence)
@@ -1562,20 +1698,16 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         self.sampled_k_intensity = []
         self.sampled_locs = []
 
-        if not isinstance(img, torch.Tensor):
-            img = torch.Tensor(img)
-
-        intensity_range = self._make_sequence(img)
-        self._randomize(img, intensity_range)
+        if randomize:
+            intensity_range = self._make_sequence(img)
+            self.randomize(img, intensity_range)
 
-        # build/appy transform only if there are spike locations
-        if self.sampled_locs:
-            transform = KSpaceSpikeNoise(self.sampled_locs, self.sampled_k_intensity, self.as_tensor_output)
-            return transform(img)
+        if not self._do_transform:
+            return img
 
-        return img if self.as_tensor_output else img.detach().numpy()
+        return KSpaceSpikeNoise(self.sampled_locs, self.sampled_k_intensity)(img)
 
-    def _randomize(self, img: torch.Tensor, intensity_range: Sequence[Sequence[float]]) -> None:
+    def randomize(self, img: NdarrayOrTensor, intensity_range: Sequence[Sequence[float]]) -> None:  # type: ignore
         """
         Helper method to sample both the location and intensity of the spikes.
         When not working channel wise (channel_wise=False) it use the random
@@ -1585,25 +1717,24 @@ def _randomize(self, img: torch.Tensor, intensity_range: Sequence[Sequence[float
         When working channel wise, the method randomly samples a location and
         intensity for each channel depending on ``self._do_transform``.
         """
-        # randomizing per channel
+        super().randomize(None)
+        if not self._do_transform:
+            return None
         if self.channel_wise:
+            # randomizing per channel
             for i, chan in enumerate(img):
-                super().randomize(None)
-                if self._do_transform:
-                    self.sampled_locs.append((i,) + tuple(self.R.randint(0, k) for k in chan.shape))
-                    self.sampled_k_intensity.append(self.R.uniform(intensity_range[i][0], intensity_range[i][1]))
-        # working with all channels together
+                self.sampled_locs.append((i,) + tuple(self.R.randint(0, k) for k in chan.shape))
+                self.sampled_k_intensity.append(self.R.uniform(intensity_range[i][0], intensity_range[i][1]))
         else:
-            super().randomize(None)
-            if self._do_transform:
-                spatial = tuple(self.R.randint(0, k) for k in img.shape[1:])
-                self.sampled_locs = [(i,) + spatial for i in range(img.shape[0])]
-                if isinstance(intensity_range[0], Sequence):
-                    self.sampled_k_intensity = [self.R.uniform(p[0], p[1]) for p in intensity_range]
-                else:
-                    self.sampled_k_intensity = [self.R.uniform(intensity_range[0], intensity_range[1])] * len(img)
+            # working with all channels together
+            spatial = tuple(self.R.randint(0, k) for k in img.shape[1:])
+            self.sampled_locs = [(i,) + spatial for i in range(img.shape[0])]
+            if isinstance(intensity_range[0], Sequence):
+                self.sampled_k_intensity = [self.R.uniform(p[0], p[1]) for p in intensity_range]
+            else:
+                self.sampled_k_intensity = [self.R.uniform(intensity_range[0], intensity_range[1])] * len(img)
 
-    def _make_sequence(self, x: torch.Tensor) -> Sequence[Sequence[float]]:
+    def _make_sequence(self, x: NdarrayOrTensor) -> Sequence[Sequence[float]]:
         """
         Formats the sequence of intensities ranges to Sequence[Sequence[float]].
         """
@@ -1615,7 +1746,7 @@ def _make_sequence(self, x: torch.Tensor) -> Sequence[Sequence[float]]:
             return (ensure_tuple(self.intensity_range),) * x.shape[0]
         return ensure_tuple(self.intensity_range)
 
-    def _set_default_range(self, img: torch.Tensor) -> Sequence[Sequence[float]]:
+    def _set_default_range(self, img: NdarrayOrTensor) -> Sequence[Sequence[float]]:
         """
         Sets default intensity ranges to be sampled.
 
@@ -1625,18 +1756,17 @@ def _set_default_range(self, img: torch.Tensor) -> Sequence[Sequence[float]]:
         n_dims = len(img.shape[1:])
 
         k = self.shift_fourier(img, n_dims)
-        log_abs = torch.log(torch.absolute(k) + 1e-10)
-        shifted_means = torch.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5
+        mod = torch if isinstance(k, torch.Tensor) else np
+        log_abs = mod.log(mod.absolute(k) + 1e-10)  # type: ignore
+        shifted_means = mod.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5  # type: ignore
         return tuple((i * 0.95, i * 1.1) for i in shifted_means)
 
 
-class RandCoarseDropout(RandomizableTransform):
+class RandCoarseTransform(RandomizableTransform):
     """
-    Randomly coarse dropout regions in the image, then fill in the rectangular regions with specified value.
-    Or keep the rectangular regions and fill in the other areas with specified value.
-    Refer to papers: https://arxiv.org/abs/1708.04552, https://arxiv.org/pdf/1604.07379
-    And other implementation: https://albumentations.ai/docs/api_reference/augmentations/transforms/
-    #albumentations.augmentations.transforms.CoarseDropout.
+    Randomly select coarse regions in the image, then execute transform operations for the regions.
+    It's the base class of all kinds of region transforms.
+    Refer to papers: https://arxiv.org/abs/1708.04552
 
     Args:
         holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
@@ -1646,12 +1776,6 @@ class RandCoarseDropout(RandomizableTransform):
             if some components of the `spatial_size` are non-positive values, the transform will use the
             corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
             to `(32, 64)` if the second spatial dimension size of img is `64`.
-        dropout_holes: if `True`, dropout the regions of holes and fill value, if `False`, keep the holes and
-            dropout the outside and fill value. default to `True`.
-        fill_value: target value to fill the dropout regions, if providing a number, will use it as constant
-            value to fill all the regions. if providing a tuple for the `min` and `max`, will randomly select
-            value for every pixel / voxel from the range `[min, max)`. if None, will compute the `min` and `max`
-            value of input image then randomly select value to fill, default to None.
         max_holes: if not None, define the maximum number to randomly select the expected number of regions.
         max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
             if some components of the `max_spatial_size` are non-positive values, the transform will use the
@@ -1661,12 +1785,12 @@ class RandCoarseDropout(RandomizableTransform):
 
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         holes: int,
         spatial_size: Union[Sequence[int], int],
-        dropout_holes: bool = True,
-        fill_value: Optional[Union[Tuple[float, float], float]] = None,
         max_holes: Optional[int] = None,
         max_spatial_size: Optional[Union[Sequence[int], int]] = None,
         prob: float = 0.1,
@@ -1676,17 +1800,14 @@ def __init__(
             raise ValueError("number of holes must be greater than 0.")
         self.holes = holes
         self.spatial_size = spatial_size
-        self.dropout_holes = dropout_holes
-        if isinstance(fill_value, (tuple, list)):
-            if len(fill_value) != 2:
-                raise ValueError("fill value should contain 2 numbers if providing the `min` and `max`.")
-        self.fill_value = fill_value
         self.max_holes = max_holes
         self.max_spatial_size = max_spatial_size
         self.hole_coords: List = []
 
     def randomize(self, img_size: Sequence[int]) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         size = fall_back_tuple(self.spatial_size, img_size)
         self.hole_coords = []  # clear previously computed coords
         num_holes = self.holes if self.max_holes is None else self.R.randint(self.holes, self.max_holes + 1)
@@ -1697,28 +1818,143 @@ def randomize(self, img_size: Sequence[int]) -> None:
             valid_size = get_valid_patch_size(img_size, size)
             self.hole_coords.append((slice(None),) + get_random_patch(img_size, valid_size, self.R))
 
-    def __call__(self, img: np.ndarray):
-        self.randomize(img.shape[1:])
-        ret = img
-        if self._do_transform:
-            fill_value = (img.min(), img.max()) if self.fill_value is None else self.fill_value
-
-            if self.dropout_holes:
-                for h in self.hole_coords:
-                    if isinstance(fill_value, (tuple, list)):
-                        ret[h] = self.R.uniform(fill_value[0], fill_value[1], size=img[h].shape)
-                    else:
-                        ret[h] = fill_value
-            else:
+    @abstractmethod
+    def _transform_holes(self, img: np.ndarray) -> np.ndarray:
+        """
+        Transform the randomly selected `self.hole_coords` in input images.
+
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
+        if randomize:
+            self.randomize(img.shape[1:])
+
+        if not self._do_transform:
+            return img
+
+        img_np: np.ndarray
+        img_np, *_ = convert_data_type(img, np.ndarray)  # type: ignore
+        out = self._transform_holes(img=img_np)
+        ret, *_ = convert_to_dst_type(src=out, dst=img)
+        return ret
+
+
+class RandCoarseDropout(RandCoarseTransform):
+    """
+    Randomly coarse dropout regions in the image, then fill in the rectangular regions with specified value.
+    Or keep the rectangular regions and fill in the other areas with specified value.
+    Refer to papers: https://arxiv.org/abs/1708.04552, https://arxiv.org/pdf/1604.07379
+    And other implementation: https://albumentations.ai/docs/api_reference/augmentations/transforms/
+    #albumentations.augmentations.transforms.CoarseDropout.
+
+    Args:
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        dropout_holes: if `True`, dropout the regions of holes and fill value, if `False`, keep the holes and
+            dropout the outside and fill value. default to `True`.
+        fill_value: target value to fill the dropout regions, if providing a number, will use it as constant
+            value to fill all the regions. if providing a tuple for the `min` and `max`, will randomly select
+            value for every pixel / voxel from the range `[min, max)`. if None, will compute the `min` and `max`
+            value of input image then randomly select value to fill, default to None.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+
+    """
+
+    def __init__(
+        self,
+        holes: int,
+        spatial_size: Union[Sequence[int], int],
+        dropout_holes: bool = True,
+        fill_value: Optional[Union[Tuple[float, float], float]] = None,
+        max_holes: Optional[int] = None,
+        max_spatial_size: Optional[Union[Sequence[int], int]] = None,
+        prob: float = 0.1,
+    ) -> None:
+        super().__init__(
+            holes=holes, spatial_size=spatial_size, max_holes=max_holes, max_spatial_size=max_spatial_size, prob=prob
+        )
+        self.dropout_holes = dropout_holes
+        if isinstance(fill_value, (tuple, list)):
+            if len(fill_value) != 2:
+                raise ValueError("fill value should contain 2 numbers if providing the `min` and `max`.")
+        self.fill_value = fill_value
+
+    def _transform_holes(self, img: np.ndarray):
+        """
+        Fill the randomly selected `self.hole_coords` in input images.
+        Please note that we usually only use `self.R` in `randomize()` method, here is a special case.
+
+        """
+        fill_value = (img.min(), img.max()) if self.fill_value is None else self.fill_value
+
+        if self.dropout_holes:
+            for h in self.hole_coords:
                 if isinstance(fill_value, (tuple, list)):
-                    ret = self.R.uniform(fill_value[0], fill_value[1], size=img.shape).astype(img.dtype)
+                    img[h] = self.R.uniform(fill_value[0], fill_value[1], size=img[h].shape)
                 else:
-                    ret = np.full_like(img, fill_value)
-                for h in self.hole_coords:
-                    ret[h] = img[h]
+                    img[h] = fill_value
+            ret = img
+        else:
+            if isinstance(fill_value, (tuple, list)):
+                ret = self.R.uniform(fill_value[0], fill_value[1], size=img.shape).astype(img.dtype)
+            else:
+                ret = np.full_like(img, fill_value)
+            for h in self.hole_coords:
+                ret[h] = img[h]
         return ret
 
 
+class RandCoarseShuffle(RandCoarseTransform):
+    """
+    Randomly select regions in the image, then shuffle the pixels within every region.
+    It shuffles every channel separately.
+    Refer to paper:
+    Kang, Guoliang, et al. "Patchshuffle regularization." arXiv preprint arXiv:1707.07103 (2017).
+    https://arxiv.org/abs/1707.07103
+
+    Args:
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+
+    """
+
+    def _transform_holes(self, img: np.ndarray):
+        """
+        Shuffle the content of randomly selected `self.hole_coords` in input images.
+        Please note that we usually only use `self.R` in `randomize()` method, here is a special case.
+
+        """
+        for h in self.hole_coords:
+            # shuffle every channel separately
+            for i, c in enumerate(img[h]):
+                patch_channel = c.flatten()
+                self.R.shuffle(patch_channel)
+                img[h][i] = patch_channel.reshape(c.shape)
+        return img
+
+
 class HistogramNormalize(Transform):
     """
     Apply the histogram normalization to input image.
@@ -1736,12 +1972,14 @@ class HistogramNormalize(Transform):
 
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         num_bins: int = 256,
         min: int = 0,
         max: int = 255,
-        mask: Optional[np.ndarray] = None,
+        mask: Optional[NdarrayOrTensor] = None,
         dtype: DtypeLike = np.float32,
     ) -> None:
         self.num_bins = num_bins
@@ -1750,7 +1988,7 @@ def __init__(
         self.mask = mask
         self.dtype = dtype
 
-    def __call__(self, img: np.ndarray, mask: Optional[np.ndarray] = None) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, mask: Optional[NdarrayOrTensor] = None) -> np.ndarray:
         return equalize_hist(
             img=img,
             mask=mask if mask is not None else self.mask,
@@ -1759,95 +1997,3 @@ def __call__(self, img: np.ndarray, mask: Optional[np.ndarray] = None) -> np.nda
             max=self.max,
             dtype=self.dtype,
         )
-
-
-class LocalPatchShuffling(RandomizableTransform):
-    """
-    Takes a 3D image and based on input of the local patch size, shuffles the pixels of the local patch within it.
-    This process is repeated a for N number of times where every time a different random block is selected for local
-    pixel shuffling.
-
-    Kang, Guoliang, et al. "Patchshuffle regularization." arXiv preprint arXiv:1707.07103 (2017).
-    """
-
-    def __init__(
-        self,
-        prob: float = 1.0,
-        number_blocks: int = 1000,
-        blocksize_ratio: int = 10,
-        channel_wise: bool = True,
-        device: Optional[torch.device] = None,
-        image_only: bool = False,
-    ) -> None:
-        """
-        Args:
-            prob: The chance of this transform occuring on the given volume.
-            number_blocks: Total number of time a random 3D block will be selected for local shuffling of pixels/voxels
-                contained in the block.
-            blocksize_ratio: This ratio can be used to estimate the local 3D block sizes that will be selected.
-            channel_wise: If True, treats each channel of the image separately.
-            device: device on which the tensor will be allocated.
-            image_only: if True return only the image volume, otherwise return (image, affine).
-        """
-        RandomizableTransform.__init__(self, prob)
-        self.prob = prob
-        self.number_blocks = number_blocks
-        self.blocksize_ratio = blocksize_ratio
-        self.channel_wise = channel_wise
-
-    def _local_patch_shuffle(self, img: Union[torch.Tensor, np.ndarray], number_blocks: int, blocksize_ratio: int):
-        im_shape = img.shape
-        img_copy = copy.deepcopy(img)
-        for _each_block in range(number_blocks):
-
-            block_size_x = self.R.randint(1, im_shape[0] // blocksize_ratio)
-            block_size_y = self.R.randint(1, im_shape[1] // blocksize_ratio)
-            block_size_z = self.R.randint(1, im_shape[2] // blocksize_ratio)
-
-            noise_x = self.R.randint(0, im_shape[0] - block_size_x)
-            noise_y = self.R.randint(0, im_shape[1] - block_size_y)
-            noise_z = self.R.randint(0, im_shape[2] - block_size_z)
-
-            local_patch = img[
-                noise_x : noise_x + block_size_x,
-                noise_y : noise_y + block_size_y,
-                noise_z : noise_z + block_size_z,
-            ]
-
-            local_patch = local_patch.flatten()
-            self.R.shuffle(local_patch)
-            local_patch = local_patch.reshape((block_size_x, block_size_y, block_size_z))
-
-            img_copy[
-                noise_x : noise_x + block_size_x, noise_y : noise_y + block_size_y, noise_z : noise_z + block_size_z
-            ] = local_patch
-
-        shuffled_image = img_copy
-        return shuffled_image
-
-    def __call__(
-        self,
-        img: Union[np.ndarray, torch.Tensor],
-        # spatial_size: Optional[Union[Sequence[int], int]] = None,
-        # mode: Optional[Union[GridSampleMode, str]] = None,
-        # padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ):
-        """
-        Args:
-            img: shape must be (num_channels, H, W[, D]),
-
-        """
-
-        super().randomize(None)
-        if not self._do_transform:
-            return img
-
-        if self.channel_wise:
-            # img = self._local_patch_shuffle(img=img)
-            for i, _d in enumerate(img):
-                img[i] = self._local_patch_shuffle(
-                    img=img[i], blocksize_ratio=self.blocksize_ratio, number_blocks=self.number_blocks
-                )
-        else:
-            raise AssertionError("If channel_wise is False, the image needs to be set to channel first")
-        return img
diff --git a/monai/transforms/intensity/dictionary.py b/monai/transforms/intensity/dictionary.py
index bc53fb6b7b..fb68608b2f 100644
--- a/monai/transforms/intensity/dictionary.py
+++ b/monai/transforms/intensity/dictionary.py
@@ -15,11 +15,9 @@
 Class names are ended with 'd' to denote dictionary-based transforms.
 """
 
-from collections.abc import Iterable
-from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
+from typing import Callable, Dict, Hashable, Mapping, Optional, Sequence, Tuple, Union
 
 import numpy as np
-import torch
 
 from monai.config import DtypeLike, KeysCollection, NdarrayTensor
 from monai.config.type_definitions import NdarrayOrTensor
@@ -32,11 +30,20 @@
     KSpaceSpikeNoise,
     MaskIntensity,
     NormalizeIntensity,
+    RandAdjustContrast,
     RandBiasField,
     RandCoarseDropout,
+    RandCoarseShuffle,
     RandGaussianNoise,
+    RandGaussianSharpen,
+    RandGaussianSmooth,
+    RandGibbsNoise,
+    RandHistogramShift,
     RandKSpaceSpikeNoise,
     RandRicianNoise,
+    RandScaleIntensity,
+    RandShiftIntensity,
+    RandStdShiftIntensity,
     ScaleIntensity,
     ScaleIntensityRange,
     ScaleIntensityRangePercentiles,
@@ -44,9 +51,10 @@
     StdShiftIntensity,
     ThresholdIntensity,
 )
-from monai.transforms.transform import MapTransform, Randomizable, RandomizableTransform
+from monai.transforms.transform import MapTransform, RandomizableTransform
 from monai.transforms.utils import is_positive
-from monai.utils import convert_to_dst_type, ensure_tuple, ensure_tuple_rep, ensure_tuple_size
+from monai.utils import ensure_tuple, ensure_tuple_rep
+from monai.utils.deprecate_utils import deprecated_arg
 
 __all__ = [
     "RandGaussianNoised",
@@ -75,6 +83,7 @@
     "RandKSpaceSpikeNoised",
     "RandHistogramShiftd",
     "RandCoarseDropoutd",
+    "RandCoarseShuffled",
     "HistogramNormalized",
     "RandGaussianNoiseD",
     "RandGaussianNoiseDict",
@@ -126,6 +135,8 @@
     "RandRicianNoiseDict",
     "RandCoarseDropoutD",
     "RandCoarseDropoutDict",
+    "RandCoarseShuffleD",
+    "RandCoarseShuffleDict",
     "HistogramNormalizeD",
     "HistogramNormalizeDict",
 ]
@@ -152,34 +163,31 @@ def __init__(
         self,
         keys: KeysCollection,
         prob: float = 0.1,
-        mean: Union[Sequence[float], float] = 0.0,
+        mean: float = 0.0,
         std: float = 0.1,
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.mean = ensure_tuple_rep(mean, len(self.keys))
-        self.std = std
-        self._noise: List[np.ndarray] = []
+        self.rand_gaussian_noise = RandGaussianNoise(mean=mean, std=std, prob=1.0)
 
-    def randomize(self, im_shape: Sequence[int]) -> None:
-        super().randomize(None)
-        self._noise.clear()
-        for m in self.mean:
-            self._noise.append(self.R.normal(m, self.R.uniform(0, self.std), size=im_shape))
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandGaussianNoised":
+        super().set_random_state(seed, state)
+        self.rand_gaussian_noise.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-
-        image_shape = d[self.keys[0]].shape  # image shape from the first data key
-        self.randomize(image_shape)
-        if len(self._noise) != len(self.keys):
-            raise RuntimeError("inconsistent noise items and keys.")
+        self.randomize(None)
         if not self._do_transform:
             return d
-        for key, noise in self.key_iterator(d, self._noise):
-            noise, *_ = convert_to_dst_type(noise, d[key])
-            d[key] = d[key] + noise
+
+        # all the keys share the same random noise
+        self.rand_gaussian_noise.randomize(d[self.keys[0]])
+        for key in self.key_iterator(d):
+            d[key] = self.rand_gaussian_noise(img=d[key], randomize=False)
         return d
 
 
@@ -214,7 +222,7 @@ def __init__(
         self,
         keys: KeysCollection,
         global_prob: float = 0.1,
-        prob: float = 1.0,
+        prob: float = 0.1,
         mean: Union[Sequence[float], float] = 0.0,
         std: Union[Sequence[float], float] = 1.0,
         channel_wise: bool = False,
@@ -224,19 +232,25 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, global_prob)
-        self.rand_rician_noise = RandRicianNoise(prob, mean, std, channel_wise, relative, sample_std)
+        self.rand_rician_noise = RandRicianNoise(
+            prob=1.0, mean=mean, std=std, channel_wise=channel_wise, relative=relative, sample_std=sample_std
+        )
 
-    def set_random_state(self, seed=None, state=None):
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandRicianNoised":
         super().set_random_state(seed, state)
         self.rand_rician_noise.set_random_state(seed, state)
+        return self
 
     def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
         d = dict(data)
-        super().randomize(None)
+        self.randomize(None)
         if not self._do_transform:
             return d
+
         for key in self.key_iterator(d):
-            d[key] = self.rand_rician_noise(d[key])
+            d[key] = self.rand_rician_noise(d[key], randomize=True)
         return d
 
 
@@ -302,7 +316,7 @@ class RandShiftIntensityd(RandomizableTransform, MapTransform):
     Dictionary-based version :py:class:`monai.transforms.RandShiftIntensity`.
     """
 
-    backend = ShiftIntensity.backend
+    backend = RandShiftIntensity.backend
 
     def __init__(
         self,
@@ -341,36 +355,34 @@ def __init__(
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
 
-        if isinstance(offsets, (int, float)):
-            self.offsets = (min(-offsets, offsets), max(-offsets, offsets))
-        else:
-            if len(offsets) != 2:
-                raise ValueError("offsets should be a number or pair of numbers.")
-            self.offsets = (min(offsets), max(offsets))
-        self._offset = self.offsets[0]
         self.factor_key = ensure_tuple_rep(factor_key, len(self.keys))
         self.meta_keys = ensure_tuple_rep(None, len(self.keys)) if meta_keys is None else ensure_tuple(meta_keys)
         if len(self.keys) != len(self.meta_keys):
             raise ValueError("meta_keys should have the same length as keys.")
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
-        self.shifter = ShiftIntensity(self._offset)
+        self.shifter = RandShiftIntensity(offsets=offsets, prob=1.0)
 
-    def randomize(self, data: Optional[Any] = None) -> None:
-        self._offset = self.R.uniform(low=self.offsets[0], high=self.offsets[1])
-        super().randomize(None)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandShiftIntensityd":
+        super().set_random_state(seed, state)
+        self.shifter.set_random_state(seed, state)
+        return self
 
     def __call__(self, data) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
+
+        # all the keys share the same random shift factor
+        self.shifter.randomize(None)
         for key, factor_key, meta_key, meta_key_postfix in self.key_iterator(
             d, self.factor_key, self.meta_keys, self.meta_key_postfix
         ):
             meta_key = meta_key or f"{key}_{meta_key_postfix}"
             factor: Optional[float] = d[meta_key].get(factor_key) if meta_key in d else None
-            offset = self._offset if factor is None else self._offset * factor
-            d[key] = self.shifter(d[key], offset=offset)
+            d[key] = self.shifter(d[key], factor=factor, randomize=False)
         return d
 
 
@@ -416,7 +428,7 @@ class RandStdShiftIntensityd(RandomizableTransform, MapTransform):
     Dictionary-based version :py:class:`monai.transforms.RandStdShiftIntensity`.
     """
 
-    backend = StdShiftIntensity.backend
+    backend = RandStdShiftIntensity.backend
 
     def __init__(
         self,
@@ -442,30 +454,27 @@ def __init__(
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
+        self.shifter = RandStdShiftIntensity(
+            factors=factors, nonzero=nonzero, channel_wise=channel_wise, dtype=dtype, prob=1.0
+        )
 
-        if isinstance(factors, (int, float)):
-            self.factors = (min(-factors, factors), max(-factors, factors))
-        elif len(factors) != 2:
-            raise ValueError("factors should be a number or pair of numbers.")
-        else:
-            self.factors = (min(factors), max(factors))
-        self.factor = self.factors[0]
-        self.nonzero = nonzero
-        self.channel_wise = channel_wise
-        self.dtype = dtype
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
-        super().randomize(None)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandStdShiftIntensityd":
+        super().set_random_state(seed, state)
+        self.shifter.set_random_state(seed, state)
+        return self
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
-        shifter = StdShiftIntensity(self.factor, self.nonzero, self.channel_wise, self.dtype)
+
+        # all the keys share the same random shift factor
+        self.shifter.randomize(None)
         for key in self.key_iterator(d):
-            d[key] = shifter(d[key])
+            d[key] = self.shifter(d[key], randomize=False)
         return d
 
 
@@ -484,6 +493,8 @@ def __init__(
         minv: Optional[float] = 0.0,
         maxv: Optional[float] = 1.0,
         factor: Optional[float] = None,
+        channel_wise: bool = False,
+        dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
     ) -> None:
         """
@@ -494,11 +505,14 @@ def __init__(
             maxv: maximum value of output data.
             factor: factor scale by ``v = v * (1 + factor)``. In order to use
                 this parameter, please set `minv` and `maxv` into None.
+            channel_wise: if True, scale on each channel separately. Please ensure
+                that the first dimension represents the channel of the image if True.
+            dtype: output data type, defaults to float32.
             allow_missing_keys: don't raise exception if key is missing.
 
         """
         super().__init__(keys, allow_missing_keys)
-        self.scaler = ScaleIntensity(minv, maxv, factor)
+        self.scaler = ScaleIntensity(minv, maxv, factor, channel_wise, dtype)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -512,13 +526,14 @@ class RandScaleIntensityd(RandomizableTransform, MapTransform):
     Dictionary-based version :py:class:`monai.transforms.RandScaleIntensity`.
     """
 
-    backend = ScaleIntensity.backend
+    backend = RandScaleIntensity.backend
 
     def __init__(
         self,
         keys: KeysCollection,
         factors: Union[Tuple[float, float], float],
         prob: float = 0.1,
+        dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
     ) -> None:
         """
@@ -529,32 +544,31 @@ def __init__(
                 if single number, factor value is picked from (-factors, factors).
             prob: probability of rotating.
                 (Default 0.1, with 10% probability it returns a rotated array.)
+            dtype: output data type, defaults to float32.
             allow_missing_keys: don't raise exception if key is missing.
 
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
+        self.scaler = RandScaleIntensity(factors=factors, dtype=dtype, prob=1.0)
 
-        if isinstance(factors, (int, float)):
-            self.factors = (min(-factors, factors), max(-factors, factors))
-        elif len(factors) != 2:
-            raise ValueError("factors should be a number or pair of numbers.")
-        else:
-            self.factors = (min(factors), max(factors))
-        self.factor = self.factors[0]
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
-        super().randomize(None)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandScaleIntensityd":
+        super().set_random_state(seed, state)
+        self.scaler.set_random_state(seed, state)
+        return self
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
-        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor)
+
+        # all the keys share the same random scale factor
+        self.scaler.randomize(None)
         for key in self.key_iterator(d):
-            d[key] = scaler(d[key])
+            d[key] = self.scaler(d[key], randomize=False)
         return d
 
 
@@ -563,13 +577,15 @@ class RandBiasFieldd(RandomizableTransform, MapTransform):
     Dictionary-based version :py:class:`monai.transforms.RandBiasField`.
     """
 
+    backend = RandBiasField.backend
+
     def __init__(
         self,
         keys: KeysCollection,
         degree: int = 3,
         coeff_range: Tuple[float, float] = (0.0, 0.1),
         dtype: DtypeLike = np.float32,
-        prob: float = 1.0,
+        prob: float = 0.1,
         allow_missing_keys: bool = False,
     ) -> None:
         """
@@ -587,18 +603,25 @@ def __init__(
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
 
-        self.rand_bias_field = RandBiasField(degree, coeff_range, dtype, prob)
+        self.rand_bias_field = RandBiasField(degree=degree, coeff_range=coeff_range, dtype=dtype, prob=1.0)
 
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandBiasFieldd":
+        super().set_random_state(seed, state)
+        self.rand_bias_field.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
+
+        # all the keys share the same random bias factor
+        self.rand_bias_field.randomize(img_size=d[self.keys[0]].shape[1:])
         for key in self.key_iterator(d):
-            d[key] = self.rand_bias_field(d[key])
+            d[key] = self.rand_bias_field(d[key], randomize=False)
         return d
 
 
@@ -655,6 +678,8 @@ class ThresholdIntensityd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ThresholdIntensity.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -666,7 +691,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.filter = ThresholdIntensity(threshold, above, cval)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.filter(d[key])
@@ -688,6 +713,8 @@ class ScaleIntensityRanged(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ScaleIntensityRange.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -701,7 +728,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.scaler = ScaleIntensityRange(a_min, a_max, b_min, b_max, clip)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.scaler(d[key])
@@ -722,11 +749,13 @@ class AdjustContrastd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = AdjustContrast.backend
+
     def __init__(self, keys: KeysCollection, gamma: float, allow_missing_keys: bool = False) -> None:
         super().__init__(keys, allow_missing_keys)
         self.adjuster = AdjustContrast(gamma)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.adjuster(d[key])
@@ -749,6 +778,8 @@ class RandAdjustContrastd(RandomizableTransform, MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = RandAdjustContrast.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -758,34 +789,25 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
+        self.adjuster = RandAdjustContrast(gamma=gamma, prob=1.0)
 
-        if isinstance(gamma, (int, float)):
-            if gamma <= 0.5:
-                raise ValueError(
-                    "if gamma is single number, must greater than 0.5 and value is picked from (0.5, gamma)"
-                )
-            self.gamma = (0.5, gamma)
-        elif len(gamma) != 2:
-            raise ValueError("gamma should be a number or pair of numbers.")
-        else:
-            self.gamma = (min(gamma), max(gamma))
-
-        self.gamma_value: Optional[float] = None
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        self.gamma_value = self.R.uniform(low=self.gamma[0], high=self.gamma[1])
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandAdjustContrastd":
+        super().set_random_state(seed, state)
+        self.adjuster.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
-        if self.gamma_value is None:
-            raise RuntimeError("gamma_value is not set.")
+        self.randomize(None)
         if not self._do_transform:
             return d
-        adjuster = AdjustContrast(self.gamma_value)
+
+        # all the keys share the same random gamma value
+        self.adjuster.randomize(None)
         for key in self.key_iterator(d):
-            d[key] = adjuster(d[key])
+            d[key] = self.adjuster(d[key], randomize=False)
         return d
 
 
@@ -805,6 +827,8 @@ class ScaleIntensityRangePercentilesd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ScaleIntensityRangePercentiles.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -819,7 +843,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.scaler = ScaleIntensityRangePercentiles(lower, upper, b_min, b_max, clip, relative)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.scaler(d[key])
@@ -847,10 +871,12 @@ class MaskIntensityd(MapTransform):
 
     """
 
+    backend = MaskIntensity.backend
+
     def __init__(
         self,
         keys: KeysCollection,
-        mask_data: Optional[np.ndarray] = None,
+        mask_data: Optional[NdarrayOrTensor] = None,
         mask_key: Optional[str] = None,
         select_fn: Callable = is_positive,
         allow_missing_keys: bool = False,
@@ -859,7 +885,7 @@ def __init__(
         self.converter = MaskIntensity(mask_data=mask_data, select_fn=select_fn)
         self.mask_key = mask_key if mask_data is None else None
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key], d[self.mask_key]) if self.mask_key is not None else self.converter(d[key])
@@ -882,6 +908,8 @@ class GaussianSmoothd(MapTransform):
 
     """
 
+    backend = GaussianSmooth.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -892,7 +920,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.converter = GaussianSmooth(sigma, approx=approx)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -916,6 +944,8 @@ class RandGaussianSmoothd(RandomizableTransform, MapTransform):
 
     """
 
+    backend = RandGaussianSmooth.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -928,25 +958,27 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.sigma_x, self.sigma_y, self.sigma_z = sigma_x, sigma_y, sigma_z
-        self.approx = approx
-
-        self.x, self.y, self.z = self.sigma_x[0], self.sigma_y[0], self.sigma_z[0]
+        self.rand_smooth = RandGaussianSmooth(
+            sigma_x=sigma_x, sigma_y=sigma_y, sigma_z=sigma_z, approx=approx, prob=1.0
+        )
 
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        self.x = self.R.uniform(low=self.sigma_x[0], high=self.sigma_x[1])
-        self.y = self.R.uniform(low=self.sigma_y[0], high=self.sigma_y[1])
-        self.z = self.R.uniform(low=self.sigma_z[0], high=self.sigma_z[1])
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandGaussianSmoothd":
+        super().set_random_state(seed, state)
+        self.rand_smooth.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
+
+        # all the keys share the same random sigma
+        self.rand_smooth.randomize(None)
         for key in self.key_iterator(d):
-            sigma = ensure_tuple_size(tup=(self.x, self.y, self.z), dim=d[key].ndim - 1)
-            d[key] = GaussianSmooth(sigma=sigma, approx=self.approx)(d[key])
+            d[key] = self.rand_smooth(d[key], randomize=False)
         return d
 
 
@@ -970,6 +1002,8 @@ class GaussianSharpend(MapTransform):
 
     """
 
+    backend = GaussianSharpen.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -982,7 +1016,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.converter = GaussianSharpen(sigma1, sigma2, alpha, approx=approx)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -1013,6 +1047,8 @@ class RandGaussianSharpend(RandomizableTransform, MapTransform):
 
     """
 
+    backend = RandGaussianSharpen.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1029,37 +1065,35 @@ def __init__(
     ):
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.sigma1_x = sigma1_x
-        self.sigma1_y = sigma1_y
-        self.sigma1_z = sigma1_z
-        self.sigma2_x = sigma2_x
-        self.sigma2_y = sigma2_y
-        self.sigma2_z = sigma2_z
-        self.alpha = alpha
-        self.approx = approx
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        self.x1 = self.R.uniform(low=self.sigma1_x[0], high=self.sigma1_x[1])
-        self.y1 = self.R.uniform(low=self.sigma1_y[0], high=self.sigma1_y[1])
-        self.z1 = self.R.uniform(low=self.sigma1_z[0], high=self.sigma1_z[1])
-        sigma2_x = (self.sigma2_x, self.x1) if not isinstance(self.sigma2_x, Iterable) else self.sigma2_x
-        sigma2_y = (self.sigma2_y, self.y1) if not isinstance(self.sigma2_y, Iterable) else self.sigma2_y
-        sigma2_z = (self.sigma2_z, self.z1) if not isinstance(self.sigma2_z, Iterable) else self.sigma2_z
-        self.x2 = self.R.uniform(low=sigma2_x[0], high=sigma2_x[1])
-        self.y2 = self.R.uniform(low=sigma2_y[0], high=sigma2_y[1])
-        self.z2 = self.R.uniform(low=sigma2_z[0], high=sigma2_z[1])
-        self.a = self.R.uniform(low=self.alpha[0], high=self.alpha[1])
+        self.rand_sharpen = RandGaussianSharpen(
+            sigma1_x=sigma1_x,
+            sigma1_y=sigma1_y,
+            sigma1_z=sigma1_z,
+            sigma2_x=sigma2_x,
+            sigma2_y=sigma2_y,
+            sigma2_z=sigma2_z,
+            alpha=alpha,
+            approx=approx,
+            prob=1.0,
+        )
 
-    def __call__(self, data):
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandGaussianSharpend":
+        super().set_random_state(seed, state)
+        self.rand_sharpen.set_random_state(seed, state)
+        return self
+
+    def __call__(self, data: Dict[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
+
+        # all the keys share the same random sigma1, sigma2, etc.
+        self.rand_sharpen.randomize(None)
         for key in self.key_iterator(d):
-            sigma1 = ensure_tuple_size(tup=(self.x1, self.y1, self.z1), dim=d[key].ndim - 1)
-            sigma2 = ensure_tuple_size(tup=(self.x2, self.y2, self.z2), dim=d[key].ndim - 1)
-            d[key] = GaussianSharpen(sigma1=sigma1, sigma2=sigma2, alpha=self.a, approx=self.approx)(d[key])
+            d[key] = self.rand_sharpen(d[key], randomize=False)
         return d
 
 
@@ -1078,6 +1112,8 @@ class RandHistogramShiftd(RandomizableTransform, MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = RandHistogramShift.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1087,38 +1123,25 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        if isinstance(num_control_points, int):
-            if num_control_points <= 2:
-                raise ValueError("num_control_points should be greater than or equal to 3")
-            self.num_control_points = (num_control_points, num_control_points)
-        else:
-            if len(num_control_points) != 2:
-                raise ValueError("num_control points should be a number or a pair of numbers")
-            if min(num_control_points) <= 2:
-                raise ValueError("num_control_points should be greater than or equal to 3")
-            self.num_control_points = (min(num_control_points), max(num_control_points))
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        num_control_point = self.R.randint(self.num_control_points[0], self.num_control_points[1] + 1)
-        self.reference_control_points = np.linspace(0, 1, num_control_point)
-        self.floating_control_points = np.copy(self.reference_control_points)
-        for i in range(1, num_control_point - 1):
-            self.floating_control_points[i] = self.R.uniform(
-                self.floating_control_points[i - 1], self.floating_control_points[i + 1]
-            )
-
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+        self.shifter = RandHistogramShift(num_control_points=num_control_points, prob=1.0)
+
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandHistogramShiftd":
+        super().set_random_state(seed, state)
+        self.shifter.set_random_state(seed, state)
+        return self
+
+    def __call__(self, data: Dict[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self.randomize()
+        self.randomize(None)
         if not self._do_transform:
             return d
+
+        # all the keys share the same random shift params
+        self.shifter.randomize(None)
         for key in self.key_iterator(d):
-            img_min, img_max = d[key].min(), d[key].max()
-            reference_control_points_scaled = self.reference_control_points * (img_max - img_min) + img_min
-            floating_control_points_scaled = self.floating_control_points * (img_max - img_min) + img_min
-            dtype = d[key].dtype
-            d[key] = np.interp(d[key], reference_control_points_scaled, floating_control_points_scaled).astype(dtype)
+            d[key] = self.shifter(d[key], randomize=False)
         return d
 
 
@@ -1144,56 +1167,43 @@ class RandGibbsNoised(RandomizableTransform, MapTransform):
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
             If a length-2 list is given as [a,b] then the value of alpha will be sampled
             uniformly from the interval [a,b].
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
         allow_missing_keys: do not raise exception if key is missing.
     """
 
+    backend = RandGibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
         prob: float = 0.1,
         alpha: Sequence[float] = (0.0, 1.0),
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ) -> None:
 
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob=prob)
-        self.alpha = alpha
-        self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
-        self.as_tensor_output = as_tensor_output
+        self.rand_gibbs_noise = RandGibbsNoise(alpha=alpha, prob=1.0)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandGibbsNoised":
+        super().set_random_state(seed, state)
+        self.rand_gibbs_noise.set_random_state(seed, state)
+        return self
 
+    def __call__(self, data: Dict[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        self._randomize(None)
-
-        for i, key in enumerate(self.key_iterator(d)):
-            if self._do_transform:
-                if i == 0:
-                    transform = GibbsNoise(self.sampled_alpha, self.as_tensor_output)
-                d[key] = transform(d[key])
-            else:
-                if isinstance(d[key], np.ndarray) and self.as_tensor_output:
-                    d[key] = torch.Tensor(d[key])
-                elif isinstance(d[key], torch.Tensor) and not self.as_tensor_output:
-                    d[key] = self._to_numpy(d[key])
-        return d
-
-    def _randomize(self, _: Any) -> None:
-        """
-        (1) Set random variable to apply the transform.
-        (2) Get alpha from uniform distribution.
-        """
-        super().randomize(None)
-        self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
+        self.randomize(None)
+        if not self._do_transform:
+            return d
 
-    def _to_numpy(self, d: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
-        if isinstance(d, torch.Tensor):
-            d_numpy: np.ndarray = d.cpu().detach().numpy()
-        return d_numpy
+        # all the keys share the same random noise params
+        self.rand_gibbs_noise.randomize(None)
+        for key in self.key_iterator(d):
+            d[key] = self.rand_gibbs_noise(d[key], randomize=False)
+        return d
 
 
 class GibbsNoised(MapTransform):
@@ -1212,20 +1222,20 @@ class GibbsNoised(MapTransform):
                 you need to transform.
         alpha (float): Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
         allow_missing_keys: do not raise exception if key is missing.
     """
 
+    backend = GibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
-        self, keys: KeysCollection, alpha: float = 0.5, as_tensor_output: bool = True, allow_missing_keys: bool = False
+        self, keys: KeysCollection, alpha: float = 0.5, allow_missing_keys: bool = False, as_tensor_output: bool = True
     ) -> None:
 
         MapTransform.__init__(self, keys, allow_missing_keys)
-        self.transform = GibbsNoise(alpha, as_tensor_output)
+        self.transform = GibbsNoise(alpha)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
 
         d = dict(data)
         for key in self.key_iterator(d):
@@ -1264,8 +1274,6 @@ class KSpaceSpikeNoised(MapTransform):
             receive a sequence of intensities. This value should be tested as it is
             data-dependent. The default values are the 2.5 the mean of the
             log-intensity for each channel.
-        as_tensor_output: if ``True`` return torch.Tensor, else return np.array.
-            Default: ``True``.
         allow_missing_keys: do not raise exception if key is missing.
 
     Example:
@@ -1276,21 +1284,22 @@ class KSpaceSpikeNoised(MapTransform):
         with `log-intensity = 14`.
     """
 
+    backend = KSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
         loc: Union[Tuple, Sequence[Tuple]],
         k_intensity: Optional[Union[Sequence[float], float]] = None,
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ) -> None:
 
         super().__init__(keys, allow_missing_keys)
-        self.transform = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        self.transform = KSpaceSpikeNoise(loc, k_intensity)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         """
         Args:
             data: Expects image/label to have dimensions (C, H, W) or
@@ -1320,110 +1329,66 @@ class RandKSpaceSpikeNoised(RandomizableTransform, MapTransform):
     Args:
         keys: "image", "label", or ["image", "label"] depending
              on which data you need to transform.
-        global_prob: probability of applying transform to the dictionary.
         prob: probability to add spike artifact to each item in the
             dictionary provided it is realized that the noise will be applied
             to the dictionary.
-        intensity_ranges: Dictionary with intensity
-            ranges to sample for each key. Given a dictionary value of `(a, b)` the
-            transform will sample the log-intensity from the interval `(a, b)` uniformly for all
-            channels of the respective key. If a sequence of intevals `((a0, b0), (a1, b1), ...)`
-            is given, then the transform will sample from each interval for each
-            respective channel. In the second case, the number of 2-tuples must
-            match the number of channels. Default ranges is `(0.95x, 1.10x)`
-            where `x` is the mean log-intensity for each channel.
-        channel_wise: treat each channel independently. True by
-            default.
-        common_sampling: If ``True`` same values for location and log-intensity
-             will be sampled for the image and label.
-        common_seed: Seed to be used in case ``common_sampling = True``.
-        as_tensor_output: if ``True`` return torch.Tensor, else return
-            np.array. Default: ``True``.
+        intensity_range: pass a tuple (a, b) to sample the log-intensity from the interval (a, b)
+            uniformly for all channels. Or pass sequence of intevals
+            ((a0, b0), (a1, b1), ...) to sample for each respective channel.
+            In the second case, the number of 2-tuples must match the number of channels.
+            Default ranges is `(0.95x, 1.10x)` where `x` is the mean
+            log-intensity for each channel.
+        channel_wise: treat each channel independently. True by default.
         allow_missing_keys: do not raise exception if key is missing.
 
     Example:
         To apply `k`-space spikes randomly on the image only, with probability
         0.5, and log-intensity sampled from the interval [13, 15] for each
         channel independently, one uses
-        ``RandKSpaceSpikeNoised("image", prob=0.5, intensity_ranges={"image":(13,15)}, channel_wise=True)``.
+        ``RandKSpaceSpikeNoised("image", prob=0.5, intensity_ranges=(13, 15), channel_wise=True)``.
     """
 
+    backend = RandKSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
+    @deprecated_arg(name="common_sampling", since="0.6")
+    @deprecated_arg(name="common_seed", since="0.6")
+    @deprecated_arg(name="global_prob", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
         global_prob: float = 1.0,
         prob: float = 0.1,
-        intensity_ranges: Optional[Mapping[Hashable, Sequence[Union[Sequence[float], float]]]] = None,
+        intensity_range: Optional[Sequence[Union[Sequence[float], float]]] = None,
         channel_wise: bool = True,
         common_sampling: bool = False,
         common_seed: int = 42,
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ):
-
         MapTransform.__init__(self, keys, allow_missing_keys)
-        RandomizableTransform.__init__(self, global_prob)
-
-        self.common_sampling = common_sampling
-        self.common_seed = common_seed
-        self.as_tensor_output = as_tensor_output
-        # the spikes artifact is amplitude dependent so we instantiate one per key
-        self.transforms = {}
-        if isinstance(intensity_ranges, Mapping):
-            for k in self.keys:
-                self.transforms[k] = RandKSpaceSpikeNoise(
-                    prob, intensity_ranges[k], channel_wise, self.as_tensor_output
-                )
-        else:
-            for k in self.keys:
-                self.transforms[k] = RandKSpaceSpikeNoise(prob, None, channel_wise, self.as_tensor_output)
-
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
-        """
-        Args:
-            data: Expects image/label to have dimensions (C, H, W) or
-                (C, H, W, D), where C is the channel.
-        """
-        d = dict(data)
-        super().randomize(None)
-
-        # In case the same spikes are desired for both image and label.
-        if self.common_sampling:
-            for k in self.keys:
-                self.transforms[k].set_random_state(self.common_seed)
-
-        for key, t in self.key_iterator(d, self.transforms):
-            if self._do_transform:
-                d[key] = self.transforms[t](d[key])
-            else:
-                if isinstance(d[key], np.ndarray) and self.as_tensor_output:
-                    d[key] = torch.Tensor(d[key])
-                elif isinstance(d[key], torch.Tensor) and not self.as_tensor_output:
-                    d[key] = self._to_numpy(d[key])
-        return d
-
-    def set_rand_state(self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None) -> None:
-        """
-        Set the random state locally to control the randomness.
-        User should use this method instead  of ``set_random_state``.
+        RandomizableTransform.__init__(self, prob=prob)
+        self.rand_noise = RandKSpaceSpikeNoise(prob=1.0, intensity_range=intensity_range, channel_wise=channel_wise)
 
-        Args:
-            seed: set the random state with an integer seed.
-            state: set the random state with a `np.random.RandomState` object."""
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandKSpaceSpikeNoised":
+        super().set_random_state(seed, state)
+        self.rand_noise.set_random_state(seed, state)
+        return self
 
-        self.set_random_state(seed, state)
-        for key in self.keys:
-            self.transforms[key].set_random_state(seed, state)
+    def __call__(self, data: Dict[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+        self.randomize(None)
+        if not self._do_transform:
+            return d
 
-    def _to_numpy(self, d: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
-        if isinstance(d, torch.Tensor):
-            d_numpy: np.ndarray = d.cpu().detach().numpy()
-        return d_numpy
+        for key in self.key_iterator(d):
+            d[key] = self.rand_noise(d[key], randomize=True)
+        return d
 
 
-class RandCoarseDropoutd(Randomizable, MapTransform):
+class RandCoarseDropoutd(RandomizableTransform, MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.RandCoarseDropout`.
     Expect all the data specified by `keys` have same spatial shape and will randomly dropout the same regions
@@ -1455,6 +1420,8 @@ class RandCoarseDropoutd(Randomizable, MapTransform):
 
     """
 
+    backend = RandCoarseDropout.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1468,6 +1435,7 @@ def __init__(
         allow_missing_keys: bool = False,
     ):
         MapTransform.__init__(self, keys, allow_missing_keys)
+        RandomizableTransform.__init__(self, prob=prob)
         self.dropper = RandCoarseDropout(
             holes=holes,
             spatial_size=spatial_size,
@@ -1475,19 +1443,91 @@ def __init__(
             fill_value=fill_value,
             max_holes=max_holes,
             max_spatial_size=max_spatial_size,
-            prob=prob,
+            prob=1.0,
         )
 
-    def randomize(self, img_size: Sequence[int]) -> None:
-        self.dropper.randomize(img_size=img_size)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandCoarseDropoutd":
+        super().set_random_state(seed, state)
+        self.dropper.set_random_state(seed, state)
+        return self
 
     def __call__(self, data):
         d = dict(data)
-        # expect all the specified keys have same spatial shape
-        self.randomize(d[self.keys[0]].shape[1:])
-        if self.dropper._do_transform:
-            for key in self.key_iterator(d):
-                d[key] = self.dropper(img=d[key])
+        self.randomize(None)
+        if not self._do_transform:
+            return d
+
+        # expect all the specified keys have same spatial shape and share same random holes
+        self.dropper.randomize(d[self.keys[0]].shape[1:])
+        for key in self.key_iterator(d):
+            d[key] = self.dropper(img=d[key], randomize=False)
+
+        return d
+
+
+class RandCoarseShuffled(RandomizableTransform, MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.RandCoarseShuffle`.
+    Expect all the data specified by `keys` have same spatial shape and will randomly dropout the same regions
+    for every key, if want to shuffle different regions for every key, please use this transform separately.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+        allow_missing_keys: don't raise exception if key is missing.
+
+    """
+
+    backend = RandCoarseShuffle.backend
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        holes: int,
+        spatial_size: Union[Sequence[int], int],
+        max_holes: Optional[int] = None,
+        max_spatial_size: Optional[Union[Sequence[int], int]] = None,
+        prob: float = 0.1,
+        allow_missing_keys: bool = False,
+    ):
+        MapTransform.__init__(self, keys, allow_missing_keys)
+        RandomizableTransform.__init__(self, prob=prob)
+        self.shuffle = RandCoarseShuffle(
+            holes=holes, spatial_size=spatial_size, max_holes=max_holes, max_spatial_size=max_spatial_size, prob=1.0
+        )
+
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandCoarseShuffled":
+        super().set_random_state(seed, state)
+        self.shuffle.set_random_state(seed, state)
+        return self
+
+    def __call__(self, data):
+        d = dict(data)
+        self.randomize(None)
+        if not self._do_transform:
+            return d
+
+        # expect all the specified keys have same spatial shape and share same random holes
+        self.shuffle.randomize(d[self.keys[0]].shape[1:])
+        for key in self.key_iterator(d):
+            d[key] = self.shuffle(img=d[key], randomize=False)
 
         return d
 
@@ -1512,13 +1552,15 @@ class HistogramNormalized(MapTransform):
 
     """
 
+    backend = HistogramNormalize.backend
+
     def __init__(
         self,
         keys: KeysCollection,
         num_bins: int = 256,
         min: int = 0,
         max: int = 255,
-        mask: Optional[np.ndarray] = None,
+        mask: Optional[NdarrayOrTensor] = None,
         mask_key: Optional[str] = None,
         dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
@@ -1527,7 +1569,7 @@ def __init__(
         self.transform = HistogramNormalize(num_bins=num_bins, min=min, max=max, mask=mask, dtype=dtype)
         self.mask_key = mask_key if mask is None else None
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.transform(d[key], d[self.mask_key]) if self.mask_key is not None else self.transform(d[key])
@@ -1562,3 +1604,4 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 RandKSpaceSpikeNoiseD = RandKSpaceSpikeNoiseDict = RandKSpaceSpikeNoised
 RandCoarseDropoutD = RandCoarseDropoutDict = RandCoarseDropoutd
 HistogramNormalizeD = HistogramNormalizeDict = HistogramNormalized
+RandCoarseShuffleD = RandCoarseShuffleDict = RandCoarseShuffled
diff --git a/monai/transforms/inverse.py b/monai/transforms/inverse.py
index 58f3526086..57a443241b 100644
--- a/monai/transforms/inverse.py
+++ b/monai/transforms/inverse.py
@@ -64,18 +64,11 @@ class InvertibleTransform(Transform):
     """
 
     def push_transform(
-        self,
-        data: dict,
-        key: Hashable,
-        extra_info: Optional[dict] = None,
-        orig_size: Optional[Tuple] = None,
+        self, data: dict, key: Hashable, extra_info: Optional[dict] = None, orig_size: Optional[Tuple] = None
     ) -> None:
         """Append to list of applied transforms for that key."""
         key_transform = str(key) + InverseKeys.KEY_SUFFIX
-        info = {
-            InverseKeys.CLASS_NAME: self.__class__.__name__,
-            InverseKeys.ID: id(self),
-        }
+        info = {InverseKeys.CLASS_NAME: self.__class__.__name__, InverseKeys.ID: id(self)}
         if orig_size is not None:
             info[InverseKeys.ORIG_SIZE] = orig_size
         elif hasattr(data[key], "shape"):
diff --git a/monai/transforms/inverse_batch_transform.py b/monai/transforms/inverse_batch_transform.py
index d9c6790840..e220d5f350 100644
--- a/monai/transforms/inverse_batch_transform.py
+++ b/monai/transforms/inverse_batch_transform.py
@@ -27,12 +27,7 @@
 
 
 class _BatchInverseDataset(Dataset):
-    def __init__(
-        self,
-        data: Sequence[Any],
-        transform: InvertibleTransform,
-        pad_collation_used: bool,
-    ) -> None:
+    def __init__(self, data: Sequence[Any], transform: InvertibleTransform, pad_collation_used: bool) -> None:
         self.data = data
         self.invertible_transform = transform
         self.pad_collation_used = pad_collation_used
@@ -99,7 +94,7 @@ def __call__(self, data: Dict[str, Any]) -> Any:
             re_str = str(re)
             if "equal size" in re_str:
                 re_str += "\nMONAI hint: try creating `BatchInverseTransform` with `collate_fn=lambda x: x`."
-            raise RuntimeError(re_str)
+            raise RuntimeError(re_str) from re
 
 
 class Decollated(MapTransform):
@@ -119,10 +114,7 @@ class Decollated(MapTransform):
     """
 
     def __init__(
-        self,
-        keys: Optional[KeysCollection] = None,
-        detach: bool = True,
-        allow_missing_keys: bool = False,
+        self, keys: Optional[KeysCollection] = None, detach: bool = True, allow_missing_keys: bool = False
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.detach = detach
diff --git a/monai/transforms/io/dictionary.py b/monai/transforms/io/dictionary.py
index 764e20f838..f714dd2831 100644
--- a/monai/transforms/io/dictionary.py
+++ b/monai/transforms/io/dictionary.py
@@ -26,14 +26,7 @@
 from monai.transforms.transform import MapTransform
 from monai.utils import GridSampleMode, GridSamplePadMode, InterpolateMode, ensure_tuple, ensure_tuple_rep
 
-__all__ = [
-    "LoadImaged",
-    "LoadImageD",
-    "LoadImageDict",
-    "SaveImaged",
-    "SaveImageD",
-    "SaveImageDict",
-]
+__all__ = ["LoadImaged", "LoadImageD", "LoadImageDict", "SaveImaged", "SaveImageD", "SaveImageDict"]
 
 
 class LoadImaged(MapTransform):
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
index 631947025c..f0648f6a83 100644
--- a/monai/transforms/post/array.py
+++ b/monai/transforms/post/array.py
@@ -25,7 +25,7 @@
 from monai.networks.layers import GaussianFilter
 from monai.transforms.transform import Transform
 from monai.transforms.utils import fill_holes, get_largest_connected_component_mask
-from monai.utils import deprecated_arg, ensure_tuple, look_up_option
+from monai.utils import TransformBackends, deprecated_arg, ensure_tuple, look_up_option
 
 __all__ = [
     "Activations",
@@ -57,6 +57,8 @@ class Activations(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, sigmoid: bool = False, softmax: bool = False, other: Optional[Callable] = None) -> None:
         self.sigmoid = sigmoid
         self.softmax = softmax
@@ -129,8 +131,13 @@ class AsDiscrete(Transform):
         rounding: if not None, round the data according to the specified option,
             available options: ["torchrounding"].
 
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
+    backend = [TransformBackends.TORCH]
+
     @deprecated_arg("n_classes", since="0.6")
     def __init__(
         self,
@@ -181,6 +188,9 @@ def __call__(
             rounding: if not None, round the data according to the specified option,
                 available options: ["torchrounding"].
 
+        .. deprecated:: 0.6.0
+            ``n_classes`` is deprecated, use ``num_classes`` instead.
+
         """
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes is None:
@@ -199,7 +209,7 @@ def __call__(
 
         rounding = self.rounding if rounding is None else rounding
         if rounding is not None:
-            rounding = look_up_option(rounding, ["torchrounding"])
+            look_up_option(rounding, ["torchrounding"])
             img = torch.round(img)
 
         return img.float()
@@ -637,10 +647,7 @@ def __init__(
         self.box_lower_bd = self.box_size // 2
         self.box_upper_bd = self.box_size - self.box_lower_bd
 
-    def __call__(
-        self,
-        prob_map: Union[np.ndarray, torch.Tensor],
-    ):
+    def __call__(self, prob_map: Union[np.ndarray, torch.Tensor]):
         """
         prob_map: the input probabilities map, it must have shape (H[, W, ...]).
         """
@@ -649,9 +656,8 @@ def __call__(
                 prob_map = torch.as_tensor(prob_map, dtype=torch.float)
             self.filter.to(prob_map)
             prob_map = self.filter(prob_map)
-        else:
-            if not isinstance(prob_map, torch.Tensor):
-                prob_map = prob_map.copy()
+        elif not isinstance(prob_map, torch.Tensor):
+            prob_map = prob_map.copy()
 
         if isinstance(prob_map, torch.Tensor):
             prob_map = prob_map.detach().cpu().numpy()
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
index 2fc3993e3e..a3ebbb6cbe 100644
--- a/monai/transforms/post/dictionary.py
+++ b/monai/transforms/post/dictionary.py
@@ -86,6 +86,8 @@ class Activationsd(MapTransform):
     Add activation layers to the input data specified by `keys`.
     """
 
+    backend = Activations.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -126,6 +128,8 @@ class AsDiscreted(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.AsDiscrete`.
     """
 
+    backend = AsDiscrete.backend
+
     @deprecated_arg("n_classes", since="0.6")
     def __init__(
         self,
@@ -158,6 +162,9 @@ def __init__(
                 each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
 
+        .. deprecated:: 0.6.0
+            ``n_classes`` is deprecated, use ``num_classes`` instead.
+
         """
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes is None:
@@ -176,15 +183,7 @@ def __call__(self, data: Mapping[Hashable, torch.Tensor]) -> Dict[Hashable, torc
         for key, argmax, to_onehot, num_classes, threshold_values, logit_thresh, rounding in self.key_iterator(
             d, self.argmax, self.to_onehot, self.num_classes, self.threshold_values, self.logit_thresh, self.rounding
         ):
-            d[key] = self.converter(
-                d[key],
-                argmax,
-                to_onehot,
-                num_classes,
-                threshold_values,
-                logit_thresh,
-                rounding,
-            )
+            d[key] = self.converter(d[key], argmax, to_onehot, num_classes, threshold_values, logit_thresh, rounding)
         return d
 
 
@@ -233,10 +232,7 @@ class LabelFilterd(MapTransform):
     """
 
     def __init__(
-        self,
-        keys: KeysCollection,
-        applied_labels: Union[Sequence[int], int],
-        allow_missing_keys: bool = False,
+        self, keys: KeysCollection, applied_labels: Union[Sequence[int], int], allow_missing_keys: bool = False
     ) -> None:
         """
         Args:
@@ -459,10 +455,7 @@ def __init__(
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.prob_nms = ProbNMS(
-            spatial_dims=spatial_dims,
-            sigma=sigma,
-            prob_threshold=prob_threshold,
-            box_size=box_size,
+            spatial_dims=spatial_dims, sigma=sigma, prob_threshold=prob_threshold, box_size=box_size
         )
 
     def __call__(self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]):
@@ -597,19 +590,14 @@ def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
             transform_info = d[transform_key]
             if nearest_interp:
                 transform_info = convert_inverse_interp_mode(
-                    trans_info=deepcopy(transform_info),
-                    mode="nearest",
-                    align_corners=None,
+                    trans_info=deepcopy(transform_info), mode="nearest", align_corners=None
                 )
 
             input = d[key]
             if isinstance(input, torch.Tensor):
                 input = input.detach()
             # construct the input dict data for BatchInverseTransform
-            input_dict = {
-                orig_key: input,
-                transform_key: transform_info,
-            }
+            input_dict = {orig_key: input, transform_key: transform_info}
             orig_meta_key = orig_meta_key or f"{orig_key}_{meta_key_postfix}"
             meta_key = meta_key or f"{key}_{meta_key_postfix}"
             if orig_meta_key in d:
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
index c3bd4a3433..dcdc5923c5 100644
--- a/monai/transforms/spatial/array.py
+++ b/monai/transforms/spatial/array.py
@@ -22,7 +22,7 @@
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import compute_shape_offset, to_affine_nd, zoom_affine
 from monai.networks.layers import AffineTransform, GaussianFilter, grid_pull
-from monai.transforms.croppad.array import CenterSpatialCrop
+from monai.transforms.croppad.array import CenterSpatialCrop, Pad
 from monai.transforms.transform import Randomizable, RandomizableTransform, ThreadUnsafe, Transform
 from monai.transforms.utils import (
     create_control_grid,
@@ -33,11 +33,13 @@
     create_translate,
     map_spatial_axes,
 )
+from monai.transforms.utils_pytorch_numpy_unification import concatenate
 from monai.utils import (
     GridSampleMode,
     GridSamplePadMode,
     InterpolateMode,
     NumpyPadMode,
+    PytorchPadMode,
     ensure_tuple,
     ensure_tuple_rep,
     ensure_tuple_size,
@@ -45,8 +47,10 @@
     issequenceiterable,
     optional_import,
 )
+from monai.utils.deprecate_utils import deprecated_arg
 from monai.utils.enums import TransformBackends
 from monai.utils.module import look_up_option
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 nib, _ = optional_import("nibabel")
 
@@ -54,6 +58,7 @@
     "Spacing",
     "Orientation",
     "Flip",
+    "GridDistortion",
     "Resize",
     "Rotate",
     "Zoom",
@@ -61,6 +66,7 @@
     "RandRotate90",
     "RandRotate",
     "RandFlip",
+    "RandGridDistortion",
     "RandAxisFlip",
     "RandZoom",
     "AffineGrid",
@@ -82,6 +88,8 @@ class Spacing(Transform):
     Resample input image into the specified `pixdim`.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         pixdim: Union[Sequence[float], float],
@@ -90,6 +98,7 @@ def __init__(
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
         dtype: DtypeLike = np.float64,
+        image_only: bool = False,
     ) -> None:
         """
         Args:
@@ -122,6 +131,7 @@ def __init__(
             dtype: data type for resampling computation. Defaults to ``np.float64`` for best precision.
                 If None, use the data type of input data. To be compatible with other modules,
                 the output data type is always ``np.float32``.
+            image_only: return just the image or the image, the old affine and new affine. Default is `False`.
 
         """
         self.pixdim = np.array(ensure_tuple(pixdim), dtype=np.float64)
@@ -130,17 +140,18 @@ def __init__(
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
         self.align_corners = align_corners
         self.dtype = dtype
+        self.image_only = image_only
 
     def __call__(
         self,
-        data_array: np.ndarray,
-        affine: Optional[np.ndarray] = None,
+        data_array: NdarrayOrTensor,
+        affine: Optional[NdarrayOrTensor] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
         dtype: DtypeLike = None,
         output_spatial_shape: Optional[np.ndarray] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> Union[NdarrayOrTensor, Tuple[NdarrayOrTensor, NdarrayOrTensor, NdarrayOrTensor]]:
         """
         Args:
             data_array: in shape (num_channels, H[, W, ...]).
@@ -169,15 +180,16 @@ def __call__(
 
         """
         _dtype = dtype or self.dtype or data_array.dtype
-        sr = data_array.ndim - 1
+        sr = int(data_array.ndim - 1)
         if sr <= 0:
             raise ValueError("data_array must have at least one spatial dimension.")
         if affine is None:
             # default to identity
-            affine = np.eye(sr + 1, dtype=np.float64)
+            affine_np = affine = np.eye(sr + 1, dtype=np.float64)
             affine_ = np.eye(sr + 1, dtype=np.float64)
         else:
-            affine_ = to_affine_nd(sr, affine)
+            affine_np, *_ = convert_data_type(affine, np.ndarray)  # type: ignore
+            affine_ = to_affine_nd(sr, affine_np)
 
         out_d = self.pixdim[:sr]
         if out_d.size < sr:
@@ -193,27 +205,31 @@ def __call__(
 
         # no resampling if it's identity transform
         if np.allclose(transform, np.diag(np.ones(len(transform))), atol=1e-3):
-            output_data = data_array.copy().astype(np.float32)
-            new_affine = to_affine_nd(affine, new_affine)
-            return output_data, affine, new_affine
-
-        # resample
-        affine_xform = AffineTransform(
-            normalized=False,
-            mode=look_up_option(mode or self.mode, GridSampleMode),
-            padding_mode=look_up_option(padding_mode or self.padding_mode, GridSamplePadMode),
-            align_corners=self.align_corners if align_corners is None else align_corners,
-            reverse_indexing=True,
-        )
-        output_data = affine_xform(
-            # AffineTransform requires a batch dim
-            torch.as_tensor(np.ascontiguousarray(data_array).astype(_dtype)).unsqueeze(0),
-            torch.as_tensor(np.ascontiguousarray(transform).astype(_dtype)),
-            spatial_size=output_shape if output_spatial_shape is None else output_spatial_shape,
-        )
-        output_data = np.asarray(output_data.squeeze(0).detach().cpu().numpy(), dtype=np.float32)  # type: ignore
-        new_affine = to_affine_nd(affine, new_affine)
-
+            output_data = data_array
+        else:
+            # resample
+            affine_xform = AffineTransform(
+                normalized=False,
+                mode=look_up_option(mode or self.mode, GridSampleMode),
+                padding_mode=look_up_option(padding_mode or self.padding_mode, GridSamplePadMode),
+                align_corners=self.align_corners if align_corners is None else align_corners,
+                reverse_indexing=True,
+            )
+            data_array_t: torch.Tensor
+            data_array_t, *_ = convert_data_type(data_array, torch.Tensor, dtype=_dtype)  # type: ignore
+            output_data = affine_xform(
+                # AffineTransform requires a batch dim
+                data_array_t.unsqueeze(0),
+                convert_data_type(transform, torch.Tensor, data_array_t.device, dtype=_dtype)[0],
+                spatial_size=output_shape if output_spatial_shape is None else output_spatial_shape,
+            ).squeeze(0)
+
+        output_data, *_ = convert_to_dst_type(output_data, data_array, dtype=torch.float32)
+        new_affine = to_affine_nd(affine_np, new_affine)  # type: ignore
+        new_affine, *_ = convert_to_dst_type(src=new_affine, dst=affine, dtype=torch.float32)
+
+        if self.image_only:
+            return output_data
         return output_data, affine, new_affine
 
 
@@ -222,11 +238,14 @@ class Orientation(Transform):
     Change the input image's orientation into the specified based on `axcodes`.
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         axcodes: Optional[str] = None,
         as_closest_canonical: bool = False,
         labels: Optional[Sequence[Tuple[str, str]]] = tuple(zip("LPI", "RAS")),
+        image_only: bool = False,
     ) -> None:
         """
         Args:
@@ -239,6 +258,7 @@ def __init__(
             labels: optional, None or sequence of (2,) sequences
                 (2,) sequences are labels for (beginning, end) of output axis.
                 Defaults to ``(('L', 'R'), ('P', 'A'), ('I', 'S'))``.
+            image_only: if True return only the image volume, otherwise return (image, affine, new_affine).
 
         Raises:
             ValueError: When ``axcodes=None`` and ``as_closest_canonical=True``. Incompatible values.
@@ -253,10 +273,11 @@ def __init__(
         self.axcodes = axcodes
         self.as_closest_canonical = as_closest_canonical
         self.labels = labels
+        self.image_only = image_only
 
     def __call__(
-        self, data_array: np.ndarray, affine: Optional[np.ndarray] = None
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        self, data_array: NdarrayOrTensor, affine: Optional[NdarrayOrTensor] = None
+    ) -> Union[NdarrayOrTensor, Tuple[NdarrayOrTensor, NdarrayOrTensor, NdarrayOrTensor]]:
         """
         original orientation of `data_array` is defined by `affine`.
 
@@ -269,17 +290,22 @@ def __call__(
             ValueError: When ``axcodes`` spatiality differs from ``data_array``.
 
         Returns:
-            data_array (reoriented in `self.axcodes`), original axcodes, current axcodes.
+            data_array [reoriented in `self.axcodes`] if `self.image_only`, else
+            (data_array [reoriented in `self.axcodes`], original axcodes, current axcodes).
 
         """
-        sr = data_array.ndim - 1
+        data_array_np, *_ = convert_data_type(data_array, np.ndarray)  # type: ignore
+        sr = data_array_np.ndim - 1
         if sr <= 0:
             raise ValueError("data_array must have at least one spatial dimension.")
         if affine is None:
-            affine = np.eye(sr + 1, dtype=np.float64)
+            # default to identity
+            affine_np = affine = np.eye(sr + 1, dtype=np.float64)
             affine_ = np.eye(sr + 1, dtype=np.float64)
         else:
-            affine_ = to_affine_nd(sr, affine)
+            affine_np, *_ = convert_data_type(affine, np.ndarray)  # type: ignore
+            affine_ = to_affine_nd(sr, affine_np)
+
         src = nib.io_orientation(affine_)
         if self.as_closest_canonical:
             spatial_ornt = src
@@ -295,12 +321,16 @@ def __call__(
         ornt = spatial_ornt.copy()
         ornt[:, 0] += 1  # skip channel dim
         ornt = np.concatenate([np.array([[0, 1]]), ornt])
-        shape = data_array.shape[1:]
-        data_array = np.ascontiguousarray(nib.orientations.apply_orientation(data_array, ornt))
+        shape = data_array_np.shape[1:]
+        data_array_np = np.ascontiguousarray(nib.orientations.apply_orientation(data_array_np, ornt))
         new_affine = affine_ @ nib.orientations.inv_ornt_aff(spatial_ornt, shape)
-        new_affine = to_affine_nd(affine, new_affine)
+        new_affine = to_affine_nd(affine_np, new_affine)
+        out, *_ = convert_to_dst_type(src=data_array_np, dst=data_array)
+        new_affine, *_ = convert_to_dst_type(src=new_affine, dst=affine, dtype=torch.float32)
 
-        return data_array, affine, new_affine
+        if self.image_only:
+            return out
+        return out, affine, new_affine
 
 
 class Flip(Transform):
@@ -330,8 +360,7 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         if isinstance(img, np.ndarray):
             return np.ascontiguousarray(np.flip(img, map_spatial_axes(img.ndim, self.spatial_axis)))
-        else:
-            return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
+        return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
 
 
 class Resize(Transform):
@@ -357,6 +386,8 @@ class Resize(Transform):
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
@@ -371,10 +402,10 @@ def __init__(
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ) -> np.ndarray:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]).
@@ -389,32 +420,33 @@ def __call__(
             ValueError: When ``self.spatial_size`` length is less than ``img`` spatial dimensions.
 
         """
+        img_, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float)  # type: ignore
         if self.size_mode == "all":
-            input_ndim = img.ndim - 1  # spatial ndim
+            input_ndim = img_.ndim - 1  # spatial ndim
             output_ndim = len(ensure_tuple(self.spatial_size))
             if output_ndim > input_ndim:
-                input_shape = ensure_tuple_size(img.shape, output_ndim + 1, 1)
-                img = img.reshape(input_shape)
+                input_shape = ensure_tuple_size(img_.shape, output_ndim + 1, 1)
+                img_ = img_.reshape(input_shape)
             elif output_ndim < input_ndim:
                 raise ValueError(
                     "len(spatial_size) must be greater or equal to img spatial dimensions, "
                     f"got spatial_size={output_ndim} img={input_ndim}."
                 )
-            spatial_size_ = fall_back_tuple(self.spatial_size, img.shape[1:])
+            spatial_size_ = fall_back_tuple(self.spatial_size, img_.shape[1:])
         else:  # for the "longest" mode
-            img_size = img.shape[1:]
+            img_size = img_.shape[1:]
             if not isinstance(self.spatial_size, int):
                 raise ValueError("spatial_size must be an int number if size_mode is 'longest'.")
             scale = self.spatial_size / max(img_size)
             spatial_size_ = tuple(int(round(s * scale)) for s in img_size)
         resized = torch.nn.functional.interpolate(  # type: ignore
-            input=torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0),
+            input=img_.unsqueeze(0),  # type: ignore
             size=spatial_size_,
             mode=look_up_option(self.mode if mode is None else mode, InterpolateMode).value,
             align_corners=self.align_corners if align_corners is None else align_corners,
         )
-        resized = resized.squeeze(0).detach().cpu().numpy()
-        return np.asarray(resized)
+        out, *_ = convert_to_dst_type(resized.squeeze(0), img)
+        return out
 
 
 class Rotate(Transform, ThreadUnsafe):
@@ -439,6 +471,8 @@ class Rotate(Transform, ThreadUnsafe):
             the output data type is always ``np.float32``.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         angle: Union[Sequence[float], float],
@@ -446,7 +480,7 @@ def __init__(
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
-        dtype: DtypeLike = np.float64,
+        dtype: Union[DtypeLike, torch.dtype] = np.float64,
     ) -> None:
         self.angle = angle
         self.keep_size = keep_size
@@ -454,16 +488,16 @@ def __init__(
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
         self.align_corners = align_corners
         self.dtype = dtype
-        self._rotation_matrix: Optional[np.ndarray] = None
+        self._rotation_matrix: Optional[NdarrayOrTensor] = None
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
-        dtype: DtypeLike = None,
-    ) -> np.ndarray:
+        dtype: Union[DtypeLike, torch.dtype] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: [chns, H, W] or [chns, H, W, D].
@@ -486,7 +520,11 @@ def __call__(
 
         """
         _dtype = dtype or self.dtype or img.dtype
-        im_shape = np.asarray(img.shape[1:])  # spatial dimensions
+
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=_dtype)  # type: ignore
+
+        im_shape = np.asarray(img_t.shape[1:])  # spatial dimensions
         input_ndim = len(im_shape)
         if input_ndim not in (2, 3):
             raise ValueError(f"Unsupported img dimension: {input_ndim}, available options are [2, 3].")
@@ -499,11 +537,14 @@ def __call__(
             corners = np.asarray(np.meshgrid(*[(0, dim) for dim in im_shape], indexing="ij")).reshape(
                 (len(im_shape), -1)
             )
-            corners = transform[:-1, :-1] @ corners
+            corners = transform[:-1, :-1] @ corners  # type: ignore
             output_shape = np.asarray(corners.ptp(axis=1) + 0.5, dtype=int)
         shift_1 = create_translate(input_ndim, (-(output_shape - 1) / 2).tolist())
         transform = shift @ transform @ shift_1
 
+        transform_t: torch.Tensor
+        transform_t, *_ = convert_to_dst_type(transform, img_t)  # type: ignore
+
         xform = AffineTransform(
             normalized=False,
             mode=look_up_option(mode or self.mode, GridSampleMode),
@@ -511,15 +552,13 @@ def __call__(
             align_corners=self.align_corners if align_corners is None else align_corners,
             reverse_indexing=True,
         )
-        output = xform(
-            torch.as_tensor(np.ascontiguousarray(img).astype(_dtype)).unsqueeze(0),
-            torch.as_tensor(np.ascontiguousarray(transform).astype(_dtype)),
-            spatial_size=output_shape,
-        )
+        output: torch.Tensor = xform(img_t.unsqueeze(0), transform_t, spatial_size=output_shape).float().squeeze(0)
         self._rotation_matrix = transform
-        return np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+        out: NdarrayOrTensor
+        out, *_ = convert_to_dst_type(output, dst=img, dtype=output.dtype)
+        return out
 
-    def get_rotation_matrix(self) -> Optional[np.ndarray]:
+    def get_rotation_matrix(self) -> Optional[NdarrayOrTensor]:
         """
         Get the most recently applied rotation matrix
         This is not thread-safe.
@@ -542,82 +581,96 @@ class Zoom(Transform):
         mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
         keep_size: Should keep original size (padding/slicing if needed), default is True.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         zoom: Union[Sequence[float], float],
         mode: Union[InterpolateMode, str] = InterpolateMode.AREA,
-        padding_mode: Union[NumpyPadMode, str] = NumpyPadMode.EDGE,
+        padding_mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.EDGE,
         align_corners: Optional[bool] = None,
         keep_size: bool = True,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         self.zoom = zoom
         self.mode: InterpolateMode = InterpolateMode(mode)
-        self.padding_mode: NumpyPadMode = NumpyPadMode(padding_mode)
+        self.padding_mode = padding_mode
         self.align_corners = align_corners
         self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
-        padding_mode: Optional[Union[NumpyPadMode, str]] = None,
+        padding_mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ):
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]).
             mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
                 The interpolation mode. Defaults to ``self.mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-            padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-                ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
-                The mode to pad data after zooming, default to ``self.padding_mode``.
-                See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                The mode to pad data after zooming.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             align_corners: This only has an effect when mode is
                 'linear', 'bilinear', 'bicubic' or 'trilinear'. Defaults to ``self.align_corners``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
 
         """
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float32)  # type: ignore
+
         _zoom = ensure_tuple_rep(self.zoom, img.ndim - 1)  # match the spatial image dim
-        zoomed = torch.nn.functional.interpolate(  # type: ignore
+        zoomed: NdarrayOrTensor = torch.nn.functional.interpolate(  # type: ignore
             recompute_scale_factor=True,
-            input=torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0),
+            input=img_t.unsqueeze(0),
             scale_factor=list(_zoom),
             mode=look_up_option(self.mode if mode is None else mode, InterpolateMode).value,
             align_corners=self.align_corners if align_corners is None else align_corners,
         )
-        zoomed = zoomed.squeeze(0).detach().cpu().numpy()
-        if not self.keep_size or np.allclose(img.shape, zoomed.shape):
-            return zoomed
+        zoomed = zoomed.squeeze(0)
+
+        if self.keep_size and not np.allclose(img_t.shape, zoomed.shape):
 
-        pad_vec = [[0, 0]] * len(img.shape)
-        slice_vec = [slice(None)] * len(img.shape)
-        for idx, (od, zd) in enumerate(zip(img.shape, zoomed.shape)):
-            diff = od - zd
-            half = abs(diff) // 2
-            if diff > 0:  # need padding
-                pad_vec[idx] = [half, diff - half]
-            elif diff < 0:  # need slicing
-                slice_vec[idx] = slice(half, half + od)
+            pad_vec = [(0, 0)] * len(img_t.shape)
+            slice_vec = [slice(None)] * len(img_t.shape)
+            for idx, (od, zd) in enumerate(zip(img_t.shape, zoomed.shape)):
+                diff = od - zd
+                half = abs(diff) // 2
+                if diff > 0:  # need padding
+                    pad_vec[idx] = (half, diff - half)
+                elif diff < 0:  # need slicing
+                    slice_vec[idx] = slice(half, half + od)
 
-        padding_mode = look_up_option(self.padding_mode if padding_mode is None else padding_mode, NumpyPadMode)
-        zoomed = np.pad(zoomed, pad_vec, mode=padding_mode.value, **self.np_kwargs)  # type: ignore
-        return zoomed[tuple(slice_vec)]
+            padder = Pad(pad_vec, padding_mode or self.padding_mode)
+            zoomed = padder(zoomed)
+            zoomed = zoomed[tuple(slice_vec)]
+
+        out, *_ = convert_to_dst_type(zoomed, dst=img)
+        return out
 
 
 class Rotate90(Transform):
@@ -628,6 +681,8 @@ class Rotate90(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
         """
         Args:
@@ -642,14 +697,15 @@ def __init__(self, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
             raise ValueError("spatial_axes must be 2 int numbers to indicate the axes to rotate 90 degrees.")
         self.spatial_axes = spatial_axes_
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
         """
-
-        result: np.ndarray = np.rot90(img, self.k, map_spatial_axes(img.ndim, self.spatial_axes))
-        return result.astype(img.dtype)
+        rot90 = torch.rot90 if isinstance(img, torch.Tensor) else np.rot90
+        out: NdarrayOrTensor = rot90(img, self.k, map_spatial_axes(img.ndim, self.spatial_axes))
+        out, *_ = convert_data_type(out, dtype=img.dtype)
+        return out
 
 
 class RandRotate90(RandomizableTransform):
@@ -658,6 +714,8 @@ class RandRotate90(RandomizableTransform):
     in the plane specified by `spatial_axes`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(self, prob: float = 0.1, max_k: int = 3, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
         """
         Args:
@@ -674,19 +732,24 @@ def __init__(self, prob: float = 0.1, max_k: int = 3, spatial_axes: Tuple[int, i
         self._rand_k = 0
 
     def randomize(self, data: Optional[Any] = None) -> None:
-        self._rand_k = self.R.randint(self.max_k) + 1
         super().randomize(None)
+        if not self._do_transform:
+            return None
+        self._rand_k = self.R.randint(self.max_k) + 1
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
+            randomize: whether to execute `randomize()` function first, default to True.
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
-        rotator = Rotate90(self._rand_k, self.spatial_axes)
-        return rotator(img)
+
+        return Rotate90(self._rand_k, self.spatial_axes)(img)
 
 
 class RandRotate(RandomizableTransform):
@@ -717,6 +780,8 @@ class RandRotate(RandomizableTransform):
             the output data type is always ``np.float32``.
     """
 
+    backend = Rotate.backend
+
     def __init__(
         self,
         range_x: Union[Tuple[float, float], float] = 0.0,
@@ -727,7 +792,7 @@ def __init__(
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
-        dtype: DtypeLike = np.float64,
+        dtype: Union[DtypeLike, torch.dtype] = np.float64,
     ) -> None:
         RandomizableTransform.__init__(self, prob)
         self.range_x = ensure_tuple(range_x)
@@ -752,18 +817,22 @@ def __init__(
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.x = self.R.uniform(low=self.range_x[0], high=self.range_x[1])
         self.y = self.R.uniform(low=self.range_y[0], high=self.range_y[1])
         self.z = self.R.uniform(low=self.range_z[0], high=self.range_z[1])
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
-        dtype: DtypeLike = None,
-    ) -> np.ndarray:
+        dtype: Union[DtypeLike, torch.dtype] = None,
+        randomize: bool = True,
+        get_matrix: bool = False,
+    ):
         """
         Args:
             img: channel first array, must have shape 2D: (nchannels, H, W), or 3D: (nchannels, H, W, D).
@@ -778,10 +847,15 @@ def __call__(
             dtype: data type for resampling computation. Defaults to ``self.dtype``.
                 If None, use the data type of input data. To be compatible with other modules,
                 the output data type is always ``np.float32``.
+            randomize: whether to execute `randomize()` function first, default to True.
+            get_matrix: wheter to return the rotated image and rotate matrix together, default to False.
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         if not self._do_transform:
             return img
+
         rotator = Rotate(
             angle=self.x if img.ndim == 3 else (self.x, self.y, self.z),
             keep_size=self.keep_size,
@@ -790,7 +864,8 @@ def __call__(
             align_corners=self.align_corners if align_corners is None else align_corners,
             dtype=dtype or self.dtype or img.dtype,
         )
-        return np.array(rotator(img))
+        img = rotator(img)
+        return (img, rotator.get_rotation_matrix()) if get_matrix else img
 
 
 class RandFlip(RandomizableTransform):
@@ -810,14 +885,18 @@ def __init__(self, prob: float = 0.1, spatial_axis: Optional[Union[Sequence[int]
         RandomizableTransform.__init__(self, prob)
         self.flipper = Flip(spatial_axis=spatial_axis)
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
+            randomize: whether to execute `randomize()` function first, default to True.
         """
-        self.randomize(None)
+        if randomize:
+            self.randomize(None)
+
         if not self._do_transform:
             return img
+
         return self.flipper(img)
 
 
@@ -840,18 +919,23 @@ def __init__(self, prob: float = 0.1) -> None:
 
     def randomize(self, data: NdarrayOrTensor) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self._axis = self.R.randint(data.ndim - 1)
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, img: NdarrayOrTensor, randomize: bool = True) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
+            randomize: whether to execute `randomize()` function first, default to True.
         """
-        self.randomize(data=img)
+        if randomize:
+            self.randomize(data=img)
+
         if not self._do_transform:
             return img
-        flipper = Flip(spatial_axis=self._axis)
-        return flipper(img)
+
+        return Flip(spatial_axis=self._axis)(img)
 
 
 class RandZoom(RandomizableTransform):
@@ -873,29 +957,34 @@ class RandZoom(RandomizableTransform):
         mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
         keep_size: Should keep original size (pad if needed), default is True.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = Zoom.backend
+
     def __init__(
         self,
         prob: float = 0.1,
         min_zoom: Union[Sequence[float], float] = 0.9,
         max_zoom: Union[Sequence[float], float] = 1.1,
         mode: Union[InterpolateMode, str] = InterpolateMode.AREA,
-        padding_mode: Union[NumpyPadMode, str] = NumpyPadMode.EDGE,
+        padding_mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.EDGE,
         align_corners: Optional[bool] = None,
         keep_size: bool = True,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         RandomizableTransform.__init__(self, prob)
         self.min_zoom = ensure_tuple(min_zoom)
@@ -903,59 +992,67 @@ def __init__(
         if len(self.min_zoom) != len(self.max_zoom):
             raise AssertionError("min_zoom and max_zoom must have same length.")
         self.mode: InterpolateMode = look_up_option(mode, InterpolateMode)
-        self.padding_mode: NumpyPadMode = look_up_option(padding_mode, NumpyPadMode)
+        self.padding_mode = padding_mode
         self.align_corners = align_corners
         self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
         self._zoom: Sequence[float] = [1.0]
 
-    def randomize(self, data: Optional[Any] = None) -> None:
+    def randomize(self, img: NdarrayOrTensor) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self._zoom = [self.R.uniform(l, h) for l, h in zip(self.min_zoom, self.max_zoom)]
+        if len(self._zoom) == 1:
+            # to keep the spatial shape ratio, use same random zoom factor for all dims
+            self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 1)
+        elif len(self._zoom) == 2 and img.ndim > 3:
+            # if 2 zoom factors provided for 3D data, use the first factor for H and W dims, second factor for D dim
+            self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 2) + ensure_tuple(self._zoom[-1])
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
-        padding_mode: Optional[Union[NumpyPadMode, str]] = None,
+        padding_mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ) -> np.ndarray:
+        randomize: bool = True,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape 2D: (nchannels, H, W), or 3D: (nchannels, H, W, D).
             mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
                 The interpolation mode. Defaults to ``self.mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-            padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-                ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
-                The mode to pad data after zooming, default to ``self.padding_mode``.
-                See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                The mode to pad data after zooming.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             align_corners: This only has an effect when mode is
                 'linear', 'bilinear', 'bicubic' or 'trilinear'. Defaults to ``self.align_corners``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
+            randomize: whether to execute `randomize()` function first, default to True.
+
         """
         # match the spatial image dim
-        self.randomize()
-        _dtype = np.float32
+        if randomize:
+            self.randomize(img=img)
+
         if not self._do_transform:
-            return img.astype(_dtype)
-        if len(self._zoom) == 1:
-            # to keep the spatial shape ratio, use same random zoom factor for all dims
-            self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 1)
-        elif len(self._zoom) == 2 and img.ndim > 3:
-            # if 2 zoom factors provided for 3D data, use the first factor for H and W dims, second factor for D dim
-            self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 2) + ensure_tuple(self._zoom[-1])
-        zoomer = Zoom(self._zoom, keep_size=self.keep_size, **self.np_kwargs)
-        return np.asarray(
-            zoomer(
-                img,
-                mode=look_up_option(mode or self.mode, InterpolateMode),
-                padding_mode=look_up_option(padding_mode or self.padding_mode, NumpyPadMode),
-                align_corners=self.align_corners if align_corners is None else align_corners,
-            ),
-            dtype=_dtype,
-        )
+            return img
+
+        return Zoom(
+            self._zoom,
+            keep_size=self.keep_size,
+            mode=look_up_option(mode or self.mode, InterpolateMode),
+            padding_mode=padding_mode or self.padding_mode,
+            align_corners=align_corners or self.align_corners,
+            **self.kwargs,
+        )(img)
 
 
 class AffineGrid(Transform):
@@ -979,14 +1076,18 @@ class AffineGrid(Transform):
             pixel/voxel relative to the center of the input image. Defaults to no translation.
         scale_params: scale factor for every spatial dims. a tuple of 2 floats for 2D,
             a tuple of 3 floats for 3D. Defaults to `1.0`.
-        as_tensor_output: whether to output tensor instead of numpy array, defaults to True.
-        device: device to store the output grid data.
         affine: If applied, ignore the params (`rotate_params`, etc.) and use the
             supplied matrix. Should be square with each side = num of image spatial
             dimensions + 1.
 
+    .. deprecated:: 0.6.0
+        ``as_tensor_output`` is deprecated.
+
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_params: Optional[Union[Sequence[float], float]] = None,
@@ -995,24 +1096,23 @@ def __init__(
         scale_params: Optional[Union[Sequence[float], float]] = None,
         as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
-        affine: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        affine: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.rotate_params = rotate_params
         self.shear_params = shear_params
         self.translate_params = translate_params
         self.scale_params = scale_params
-
-        self.as_tensor_output = as_tensor_output
         self.device = device
-
         self.affine = affine
 
     def __call__(
-        self,
-        spatial_size: Optional[Sequence[int]] = None,
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
-    ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
+        self, spatial_size: Optional[Sequence[int]] = None, grid: Optional[NdarrayOrTensor] = None
+    ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
         """
+        The grid can be initialized with a `spatial_size` parameter, or provided directly as `grid`.
+        Therefore, either `spatial_size` or `grid` must be provided.
+        When initialising from `spatial_size`, the backend "torch" will be used.
+
         Args:
             spatial_size: output grid size.
             grid: grid to be transformed. Shape must be (3, H, W) for 2D or (4, H, W, D) for 3D.
@@ -1023,36 +1123,36 @@ def __call__(
         """
         if grid is None:
             if spatial_size is not None:
-                grid = create_grid(spatial_size)
+                grid = create_grid(spatial_size, device=self.device, backend="torch")
             else:
                 raise ValueError("Incompatible values: grid=None and spatial_size=None.")
 
-        affine: Union[torch.Tensor, np.ndarray]
+        _b = TransformBackends.TORCH if isinstance(grid, torch.Tensor) else TransformBackends.NUMPY
+        _device = grid.device if isinstance(grid, torch.Tensor) else self.device
+        affine: NdarrayOrTensor
         if self.affine is None:
             spatial_dims = len(grid.shape) - 1
-            affine = np.eye(spatial_dims + 1)
+            affine = (
+                torch.eye(spatial_dims + 1, device=_device)
+                if _b == TransformBackends.TORCH
+                else np.eye(spatial_dims + 1)
+            )
             if self.rotate_params:
-                affine = affine @ create_rotate(spatial_dims, self.rotate_params)
+                affine = affine @ create_rotate(spatial_dims, self.rotate_params, device=_device, backend=_b)
             if self.shear_params:
-                affine = affine @ create_shear(spatial_dims, self.shear_params)
+                affine = affine @ create_shear(spatial_dims, self.shear_params, device=_device, backend=_b)
             if self.translate_params:
-                affine = affine @ create_translate(spatial_dims, self.translate_params)
+                affine = affine @ create_translate(spatial_dims, self.translate_params, device=_device, backend=_b)
             if self.scale_params:
-                affine = affine @ create_scale(spatial_dims, self.scale_params)
+                affine = affine @ create_scale(spatial_dims, self.scale_params, device=_device, backend=_b)
         else:
             affine = self.affine
 
-        if isinstance(affine, np.ndarray):
-            affine = torch.as_tensor(np.ascontiguousarray(affine))
+        grid, *_ = convert_data_type(grid, torch.Tensor, device=_device, dtype=float)
+        affine, *_ = convert_to_dst_type(affine, grid)
 
-        grid = torch.tensor(grid) if not isinstance(grid, torch.Tensor) else grid.detach().clone()
-        if self.device:
-            affine = affine.to(self.device)
-            grid = grid.to(self.device)
-        grid = (affine.float() @ grid.reshape((grid.shape[0], -1)).float()).reshape([-1] + list(grid.shape[1:]))
-        if grid is None or not isinstance(grid, torch.Tensor):
-            raise ValueError("Unknown grid.")
-        return grid if self.as_tensor_output else np.asarray(grid.cpu().numpy()), affine
+        grid = (affine @ grid.reshape((grid.shape[0], -1))).reshape([-1] + list(grid.shape[1:]))
+        return grid, affine
 
 
 class RandAffineGrid(Randomizable, Transform):
@@ -1061,6 +1161,9 @@ class RandAffineGrid(Randomizable, Transform):
 
     """
 
+    backend = AffineGrid.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_range: RandRange = None,
@@ -1094,8 +1197,6 @@ def __init__(
             scale_range: scaling range with format matching `rotate_range`. it defines the range to randomly select
                 the scale factor to translate for every spatial dims. A value of 1.0 is added to the result.
                 This allows 0 to correspond to no change (i.e., a scaling of 1.0).
-            as_tensor_output: whether to output tensor instead of numpy array.
-                defaults to True.
             device: device to store the output grid data.
 
         See also:
@@ -1103,6 +1204,10 @@ def __init__(
             - :py:meth:`monai.transforms.utils.create_shear`
             - :py:meth:`monai.transforms.utils.create_translate`
             - :py:meth:`monai.transforms.utils.create_scale`
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.rotate_range = ensure_tuple(rotate_range)
         self.shear_range = ensure_tuple(shear_range)
@@ -1114,9 +1219,8 @@ def __init__(
         self.translate_params: Optional[List[float]] = None
         self.scale_params: Optional[List[float]] = None
 
-        self.as_tensor_output = as_tensor_output
         self.device = device
-        self.affine: Optional[Union[np.ndarray, torch.Tensor]] = None
+        self.affine: Optional[NdarrayOrTensor] = None
 
     def _get_rand_param(self, param_range, add_scalar: float = 0.0):
         out_param = []
@@ -1136,10 +1240,8 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.scale_params = self._get_rand_param(self.scale_range, 1.0)
 
     def __call__(
-        self,
-        spatial_size: Optional[Sequence[int]] = None,
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+        self, spatial_size: Optional[Sequence[int]] = None, grid: Optional[NdarrayOrTensor] = None
+    ) -> NdarrayOrTensor:
         """
         Args:
             spatial_size: output grid size.
@@ -1154,13 +1256,13 @@ def __call__(
             shear_params=self.shear_params,
             translate_params=self.translate_params,
             scale_params=self.scale_params,
-            as_tensor_output=self.as_tensor_output,
             device=self.device,
         )
-        grid, self.affine = affine_grid(spatial_size, grid)
-        return grid
+        _grid: NdarrayOrTensor
+        _grid, self.affine = affine_grid(spatial_size, grid)
+        return _grid
 
-    def get_transformation_matrix(self) -> Optional[Union[np.ndarray, torch.Tensor]]:
+    def get_transformation_matrix(self) -> Optional[NdarrayOrTensor]:
         """Get the most recently applied transformation matrix"""
         return self.affine
 
@@ -1170,6 +1272,8 @@ class RandDeformGrid(Randomizable, Transform):
     Generate random deformation grid.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         spacing: Union[Sequence[float], float],
@@ -1207,20 +1311,25 @@ def __call__(self, spatial_size: Sequence[int]):
             spatial_size: spatial size of the grid.
         """
         self.spacing = fall_back_tuple(self.spacing, (1.0,) * len(spatial_size))
-        control_grid = create_control_grid(spatial_size, self.spacing)
+        control_grid = create_control_grid(spatial_size, self.spacing, device=self.device, backend="torch")
         self.randomize(control_grid.shape[1:])
-        control_grid[: len(spatial_size)] += self.rand_mag * self.random_offset
-        if self.as_tensor_output:
-            control_grid = torch.as_tensor(np.ascontiguousarray(control_grid), device=self.device)
+        _offset, *_ = convert_to_dst_type(self.rand_mag * self.random_offset, control_grid)
+        control_grid[: len(spatial_size)] += _offset
+        if not self.as_tensor_output:
+            control_grid, *_ = convert_data_type(control_grid, output_type=np.ndarray, dtype=np.float32)
         return control_grid
 
 
 class Resample(Transform):
+
+    backend = [TransformBackends.TORCH]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
     ) -> None:
         """
@@ -1234,21 +1343,23 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"border"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: whether to return a torch tensor. Defaults to False.
             device: device on which the tensor will be allocated.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
-        self.as_tensor_output = as_tensor_output
         self.device = device
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        img: NdarrayOrTensor,
+        grid: Optional[NdarrayOrTensor] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]).
@@ -1260,18 +1371,15 @@ def __call__(
                 Padding mode for outside grid values. Defaults to ``self.padding_mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
         """
-
-        if not isinstance(img, torch.Tensor):
-            img = torch.as_tensor(np.ascontiguousarray(img))
         if grid is None:
-            raise AssertionError("Error, grid argument must be supplied as an ndarray or tensor ")
-        grid = torch.tensor(grid) if not isinstance(grid, torch.Tensor) else grid.detach().clone()
-        if self.device:
-            img = img.to(self.device)
-            grid = grid.to(self.device)
+            raise ValueError("Unknown grid.")
+        _device = img.device if isinstance(img, torch.Tensor) else self.device
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, device=_device, dtype=torch.float32)  # type: ignore
+        grid, *_ = convert_to_dst_type(grid, img_t)
 
         if USE_COMPILED:
-            for i, dim in enumerate(img.shape[1:]):
+            for i, dim in enumerate(img_t.shape[1:]):
                 grid[i] += (dim - 1.0) / 2.0
             grid = grid[:-1] / grid[-1:]
             grid = grid.permute(list(range(grid.ndimension()))[1:] + [0])
@@ -1286,29 +1394,29 @@ def __call__(
                 bound = 1
             _interp_mode = look_up_option(self.mode if mode is None else mode, GridSampleMode).value
             out = grid_pull(
-                img.unsqueeze(0).float(),
-                grid.unsqueeze(0).float(),
+                img_t.unsqueeze(0),
+                grid.unsqueeze(0),
                 bound=bound,
                 extrapolate=True,
                 interpolation=1 if _interp_mode == "bilinear" else _interp_mode,
             )[0]
         else:
-            for i, dim in enumerate(img.shape[1:]):
+            for i, dim in enumerate(img_t.shape[1:]):
                 grid[i] = 2.0 * grid[i] / (dim - 1.0)
             grid = grid[:-1] / grid[-1:]
-            index_ordering: List[int] = list(range(img.ndimension() - 2, -1, -1))
+            index_ordering: List[int] = list(range(img_t.ndimension() - 2, -1, -1))
             grid = grid[index_ordering]
             grid = grid.permute(list(range(grid.ndimension()))[1:] + [0])
             out = torch.nn.functional.grid_sample(
-                img.unsqueeze(0).float(),
-                grid.unsqueeze(0).float(),
+                img_t.unsqueeze(0),
+                grid.unsqueeze(0),
                 mode=self.mode.value if mode is None else GridSampleMode(mode).value,
                 padding_mode=self.padding_mode.value if padding_mode is None else GridSamplePadMode(padding_mode).value,
                 align_corners=True,
             )[0]
-        if self.as_tensor_output:
-            return torch.as_tensor(out)
-        return np.asarray(out.cpu().numpy())
+        out_val: NdarrayOrTensor
+        out_val, *_ = convert_to_dst_type(out, dst=img, dtype=out.dtype)
+        return out_val
 
 
 class Affine(Transform):
@@ -1318,6 +1426,9 @@ class Affine(Transform):
 
     """
 
+    backend = list(set(AffineGrid.backend) & set(Resample.backend))
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_params: Optional[Union[Sequence[float], float]] = None,
@@ -1327,7 +1438,7 @@ def __init__(
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.REFLECTION,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
         image_only: bool = False,
     ) -> None:
@@ -1363,32 +1474,33 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             image_only: if True return only the image volume, otherwise return (image, affine).
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.affine_grid = AffineGrid(
             rotate_params=rotate_params,
             shear_params=shear_params,
             translate_params=translate_params,
             scale_params=scale_params,
-            as_tensor_output=True,
             device=device,
         )
         self.image_only = image_only
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
         self.spatial_size = spatial_size
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ):
+    ) -> Union[NdarrayOrTensor, Tuple[NdarrayOrTensor, NdarrayOrTensor]]:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]),
@@ -1418,6 +1530,9 @@ class RandAffine(RandomizableTransform):
 
     """
 
+    backend = Affine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         prob: float = 0.1,
@@ -1473,13 +1588,15 @@ def __init__(
             cache_grid: whether to cache the identity sampling grid.
                 If the spatial size is not dynamically defined by input image, enabling this option could
                 accelerate the transform.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
 
@@ -1488,10 +1605,9 @@ def __init__(
             shear_range=shear_range,
             translate_range=translate_range,
             scale_range=scale_range,
-            as_tensor_output=True,
             device=device,
         )
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
 
         self.spatial_size = spatial_size
         self.cache_grid = cache_grid
@@ -1519,7 +1635,7 @@ def _init_identity_cache(self):
                     f"'spatial_size={self.spatial_size}', please specify 'spatial_size'."
                 )
             return None
-        return torch.tensor(create_grid(spatial_size=_sp_size)).to(self.rand_affine_grid.device)
+        return create_grid(spatial_size=_sp_size, device=self.rand_affine_grid.device, backend="torch")
 
     def get_identity_grid(self, spatial_size: Sequence[int]):
         """
@@ -1533,7 +1649,11 @@ def get_identity_grid(self, spatial_size: Sequence[int]):
             spatial_size, [2] * ndim
         ):
             raise RuntimeError(f"spatial_size should not be dynamic, got {spatial_size}.")
-        return create_grid(spatial_size=spatial_size) if self._cached_grid is None else self._cached_grid
+        return (
+            create_grid(spatial_size=spatial_size, device=self.rand_affine_grid.device, backend="torch")
+            if self._cached_grid is None
+            else self._cached_grid
+        )
 
     def set_random_state(
         self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
@@ -1544,15 +1664,18 @@ def set_random_state(
 
     def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.rand_affine_grid.randomize()
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+        randomize: bool = True,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]),
@@ -1567,21 +1690,25 @@ def __call__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``self.padding_mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            randomize: whether to execute `randomize()` function first, default to True.
+
         """
-        self.randomize()
+        if randomize:
+            self.randomize()
+
         # if not doing transform and spatial size doesn't change, nothing to do
-        # except convert to float and convert numpy/torch
+        # except convert to float and device
         sp_size = fall_back_tuple(spatial_size or self.spatial_size, img.shape[1:])
         do_resampling = self._do_transform or (sp_size != ensure_tuple(img.shape[1:]))
         if not do_resampling:
-            img = img.float() if isinstance(img, torch.Tensor) else img.astype("float32")
-            return torch.Tensor(img) if self.resampler.as_tensor_output else np.array(img)
+            img, *_ = convert_data_type(img, dtype=torch.float32, device=self.resampler.device)
         grid = self.get_identity_grid(sp_size)
         if self._do_transform:
             grid = self.rand_affine_grid(grid=grid)
-        return self.resampler(
+        out: NdarrayOrTensor = self.resampler(
             img=img, grid=grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
         )
+        return out
 
 
 class Rand2DElastic(RandomizableTransform):
@@ -1591,6 +1718,9 @@ class Rand2DElastic(RandomizableTransform):
 
     """
 
+    backend = Resample.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         spacing: Union[Tuple[float, float], float],
@@ -1645,13 +1775,15 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
         self.deform_grid = RandDeformGrid(
@@ -1662,11 +1794,11 @@ def __init__(
             shear_range=shear_range,
             translate_range=translate_range,
             scale_range=scale_range,
-            as_tensor_output=True,
             device=device,
         )
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
 
+        self.device = device
         self.spatial_size = spatial_size
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
@@ -1681,16 +1813,19 @@ def set_random_state(
 
     def randomize(self, spatial_size: Sequence[int]) -> None:
         super().randomize(None)
+        if not self._do_transform:
+            return None
         self.deform_grid.randomize(spatial_size)
         self.rand_affine_grid.randomize()
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Tuple[int, int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+        randomize: bool = True,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W),
@@ -1703,23 +1838,30 @@ def __call__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``self.padding_mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            randomize: whether to execute `randomize()` function first, default to True.
         """
         sp_size = fall_back_tuple(spatial_size or self.spatial_size, img.shape[1:])
-        self.randomize(spatial_size=sp_size)
+        if randomize:
+            self.randomize(spatial_size=sp_size)
+
         if self._do_transform:
             grid = self.deform_grid(spatial_size=sp_size)
             grid = self.rand_affine_grid(grid=grid)
             grid = torch.nn.functional.interpolate(  # type: ignore
                 recompute_scale_factor=True,
-                input=torch.as_tensor(grid).unsqueeze(0),
+                input=grid.unsqueeze(0),
                 scale_factor=list(ensure_tuple(self.deform_grid.spacing)),
                 mode=InterpolateMode.BICUBIC.value,
                 align_corners=False,
             )
             grid = CenterSpatialCrop(roi_size=sp_size)(grid[0])
         else:
-            grid = create_grid(spatial_size=sp_size)
-        return self.resampler(img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode)
+            _device = img.device if isinstance(img, torch.Tensor) else self.device
+            grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
+        out: NdarrayOrTensor = self.resampler(
+            img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
+        )
+        return out
 
 
 class Rand3DElastic(RandomizableTransform):
@@ -1729,6 +1871,9 @@ class Rand3DElastic(RandomizableTransform):
 
     """
 
+    backend = Resample.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         sigma_range: Tuple[float, float],
@@ -1786,17 +1931,25 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
-        self.rand_affine_grid = RandAffineGrid(rotate_range, shear_range, translate_range, scale_range, True, device)
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.rand_affine_grid = RandAffineGrid(
+            rotate_range=rotate_range,
+            shear_range=shear_range,
+            translate_range=translate_range,
+            scale_range=scale_range,
+            device=device,
+        )
+        self.resampler = Resample(device=device)
 
         self.sigma_range = sigma_range
         self.magnitude_range = magnitude_range
@@ -1818,19 +1971,21 @@ def set_random_state(
 
     def randomize(self, grid_size: Sequence[int]) -> None:
         super().randomize(None)
-        if self._do_transform:
-            self.rand_offset = self.R.uniform(-1.0, 1.0, [3] + list(grid_size)).astype(np.float32)
+        if not self._do_transform:
+            return None
+        self.rand_offset = self.R.uniform(-1.0, 1.0, [3] + list(grid_size)).astype(np.float32)
         self.magnitude = self.R.uniform(self.magnitude_range[0], self.magnitude_range[1])
         self.sigma = self.R.uniform(self.sigma_range[0], self.sigma_range[1])
         self.rand_affine_grid.randomize()
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Tuple[int, int, int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+        randomize: bool = True,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W, D),
@@ -1843,19 +1998,25 @@ def __call__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``self.padding_mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            randomize: whether to execute `randomize()` function first, default to True.
         """
         sp_size = fall_back_tuple(spatial_size or self.spatial_size, img.shape[1:])
-        self.randomize(grid_size=sp_size)
-        grid = create_grid(spatial_size=sp_size)
+        if randomize:
+            self.randomize(grid_size=sp_size)
+
+        _device = img.device if isinstance(img, torch.Tensor) else self.device
+        grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
         if self._do_transform:
             if self.rand_offset is None:
-                raise AssertionError
-            grid = torch.as_tensor(np.ascontiguousarray(grid), device=self.device)
-            gaussian = GaussianFilter(3, self.sigma, 3.0).to(device=self.device)
-            offset = torch.as_tensor(self.rand_offset, device=self.device).unsqueeze(0)
+                raise RuntimeError("rand_offset is not initialized.")
+            gaussian = GaussianFilter(3, self.sigma, 3.0).to(device=_device)
+            offset = torch.as_tensor(self.rand_offset, device=_device).unsqueeze(0)
             grid[:3] += gaussian(offset)[0] * self.magnitude
             grid = self.rand_affine_grid(grid=grid)
-        return self.resampler(img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode)
+        out: NdarrayOrTensor = self.resampler(
+            img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
+        )
+        return out
 
 
 class AddCoordinateChannels(Transform):
@@ -1868,10 +2029,9 @@ class AddCoordinateChannels(Transform):
     Liu, R. et al. An Intriguing Failing of Convolutional Neural Networks and the CoordConv Solution, NeurIPS 2018.
     """
 
-    def __init__(
-        self,
-        spatial_channels: Sequence[int],
-    ) -> None:
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    def __init__(self, spatial_channels: Sequence[int]) -> None:
         """
         Args:
             spatial_channels: the spatial dimensions that are to have their coordinates encoded in a channel and
@@ -1880,7 +2040,7 @@ def __init__(
         """
         self.spatial_channels = spatial_channels
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel first.
@@ -1895,7 +2055,171 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]):
 
         spatial_dims = img.shape[1:]
         coord_channels = np.array(np.meshgrid(*tuple(np.linspace(-0.5, 0.5, s) for s in spatial_dims), indexing="ij"))
+        coord_channels, *_ = convert_to_dst_type(coord_channels, img)  # type: ignore
         # only keep required dimensions. need to subtract 1 since im will be 0-based
         # but user input is 1-based (because channel dim is 0)
         coord_channels = coord_channels[[s - 1 for s in self.spatial_channels]]
-        return np.concatenate((img, coord_channels), axis=0)
+        return concatenate((img, coord_channels), axis=0)
+
+
+class GridDistortion(Transform):
+
+    backend = [TransformBackends.TORCH]
+
+    def __init__(
+        self,
+        num_cells: Union[Tuple[int], int],
+        distort_steps: Sequence[Sequence[float]],
+        mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
+        padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
+        device: Optional[torch.device] = None,
+    ) -> None:
+        """
+        Grid distortion transform. Refer to:
+        https://github.com/albumentations-team/albumentations/blob/master/albumentations/augmentations/transforms.py
+
+        Args:
+            num_cells: number of grid cells on each dimension.
+            distort_steps: This argument is a list of tuples, where each tuple contains the distort steps of the
+                corresponding dimensions (in the order of H, W[, D]). The length of each tuple equals to `num_cells + 1`.
+                Each value in the tuple represents the distort step of the related cell.
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"border"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            device: device on which the tensor will be allocated.
+
+        """
+        self.resampler = Resample(mode=mode, padding_mode=padding_mode, device=device)
+        self.num_cells = num_cells
+        self.distort_steps = distort_steps
+        self.device = device
+
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        distort_steps: Optional[Sequence[Sequence]] = None,
+        mode: Optional[Union[GridSampleMode, str]] = None,
+        padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
+    ) -> NdarrayOrTensor:
+        """
+        Args:
+            img: shape must be (num_channels, H, W[, D]).
+            distort_steps: This argument is a list of tuples, where each tuple contains the distort steps of the
+                corresponding dimensions (in the order of H, W[, D]). The length of each tuple equals to `num_cells + 1`.
+                Each value in the tuple represents the distort step of the related cell.
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"border"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+
+        """
+        distort_steps = self.distort_steps if distort_steps is None else distort_steps
+        if len(img.shape) != len(distort_steps) + 1:
+            raise ValueError("the spatial size of `img` does not match with the length of `distort_steps`")
+
+        all_ranges = []
+        num_cells = ensure_tuple_rep(self.num_cells, len(img.shape) - 1)
+        for dim_idx, dim_size in enumerate(img.shape[1:]):
+            dim_distort_steps = distort_steps[dim_idx]
+            ranges = torch.zeros(dim_size, dtype=torch.float32)
+            cell_size = dim_size // num_cells[dim_idx]
+            prev = 0
+            for idx in range(num_cells[dim_idx] + 1):
+                start = int(idx * cell_size)
+                end = start + cell_size
+                if end > dim_size:
+                    end = dim_size
+                    cur = dim_size
+                else:
+                    cur = prev + cell_size * dim_distort_steps[idx]
+                ranges[start:end] = torch.linspace(prev, cur, end - start)
+                prev = cur
+            ranges = ranges - (dim_size - 1.0) / 2.0
+            all_ranges.append(ranges)
+
+        coords = torch.meshgrid(*all_ranges)
+        grid = torch.stack([*coords, torch.ones_like(coords[0])])
+
+        return self.resampler(img, grid=grid, mode=mode, padding_mode=padding_mode)  # type: ignore
+
+
+class RandGridDistortion(RandomizableTransform):
+
+    backend = [TransformBackends.TORCH]
+
+    def __init__(
+        self,
+        num_cells: Union[Tuple[int], int] = 5,
+        prob: float = 0.1,
+        distort_limit: Union[Tuple[float, float], float] = (-0.03, 0.03),
+        mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
+        padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
+        device: Optional[torch.device] = None,
+    ) -> None:
+        """
+        Random grid distortion transform. Refer to:
+        https://github.com/albumentations-team/albumentations/blob/master/albumentations/augmentations/transforms.py
+
+        Args:
+            num_cells: number of grid cells on each dimension.
+            prob: probability of returning a randomized grid distortion transform. Defaults to 0.1.
+            distort_limit: range to randomly distort.
+                If single number, distort_limit is picked from (-distort_limit, distort_limit).
+                Defaults to (-0.03, 0.03).
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"border"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            device: device on which the tensor will be allocated.
+
+        """
+        RandomizableTransform.__init__(self, prob)
+        self.num_cells = num_cells
+        if isinstance(distort_limit, (int, float)):
+            self.distort_limit = (min(-distort_limit, distort_limit), max(-distort_limit, distort_limit))
+        else:
+            self.distort_limit = (min(distort_limit), max(distort_limit))
+        self.distort_steps: Sequence[Sequence[float]] = ((1.0,),)
+        self.grid_distortion = GridDistortion(
+            num_cells=num_cells, distort_steps=self.distort_steps, mode=mode, padding_mode=padding_mode, device=device
+        )
+
+    def randomize(self, spatial_shape: Sequence[int]) -> None:
+        super().randomize(None)
+        if not self._do_transform:
+            return
+        self.distort_steps = tuple(
+            tuple(1.0 + self.R.uniform(low=self.distort_limit[0], high=self.distort_limit[1], size=n_cells + 1))
+            for n_cells in ensure_tuple_rep(self.num_cells, len(spatial_shape))
+        )
+
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        mode: Optional[Union[GridSampleMode, str]] = None,
+        padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
+        randomize: bool = True,
+    ) -> NdarrayOrTensor:
+        """
+        Args:
+            img: shape must be (num_channels, H, W[, D]).
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"border"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+            randomize: whether to shuffle the random factors using `randomize()`, default to True.
+        """
+        if randomize:
+            self.randomize(img.shape[1:])
+        if not self._do_transform:
+            return img
+        return self.grid_distortion(img, distort_steps=self.distort_steps, mode=mode, padding_mode=padding_mode)
diff --git a/monai/transforms/spatial/dictionary.py b/monai/transforms/spatial/dictionary.py
index b0558a6556..3e1e2a9d48 100644
--- a/monai/transforms/spatial/dictionary.py
+++ b/monai/transforms/spatial/dictionary.py
@@ -17,7 +17,7 @@
 
 from copy import deepcopy
 from enum import Enum
-from typing import Any, Dict, Hashable, Mapping, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import torch
@@ -33,10 +33,16 @@
     Affine,
     AffineGrid,
     Flip,
+    GridDistortion,
     Orientation,
     Rand2DElastic,
     Rand3DElastic,
     RandAffine,
+    RandAxisFlip,
+    RandFlip,
+    RandGridDistortion,
+    RandRotate,
+    RandZoom,
     Resize,
     Rotate,
     Rotate90,
@@ -50,12 +56,15 @@
     GridSamplePadMode,
     InterpolateMode,
     NumpyPadMode,
+    PytorchPadMode,
     ensure_tuple,
     ensure_tuple_rep,
     fall_back_tuple,
 )
+from monai.utils.deprecate_utils import deprecated_arg
 from monai.utils.enums import InverseKeys
 from monai.utils.module import optional_import
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 nib, _ = optional_import("nibabel")
 
@@ -71,6 +80,8 @@
     "Rand3DElasticd",
     "Flipd",
     "RandFlipd",
+    "GridDistortiond",
+    "RandGridDistortiond",
     "RandAxisFlipd",
     "Rotated",
     "RandRotated",
@@ -98,6 +109,10 @@
     "FlipDict",
     "RandFlipD",
     "RandFlipDict",
+    "GridDistortionD",
+    "GridDistortionDict",
+    "RandGridDistortionD",
+    "RandGridDistortionDict",
     "RandAxisFlipD",
     "RandAxisFlipDict",
     "RotateD",
@@ -115,7 +130,7 @@
 GridSampleModeSequence = Union[Sequence[Union[GridSampleMode, str]], GridSampleMode, str]
 GridSamplePadModeSequence = Union[Sequence[Union[GridSamplePadMode, str]], GridSamplePadMode, str]
 InterpolateModeSequence = Union[Sequence[Union[InterpolateMode, str]], InterpolateMode, str]
-NumpyPadModeSequence = Union[Sequence[Union[NumpyPadMode, str]], NumpyPadMode, str]
+PadModeSequence = Union[Sequence[Union[NumpyPadMode, PytorchPadMode, str]], NumpyPadMode, PytorchPadMode, str]
 
 
 class Spacingd(MapTransform, InvertibleTransform):
@@ -132,6 +147,8 @@ class Spacingd(MapTransform, InvertibleTransform):
         :py:class:`monai.transforms.Spacing`
     """
 
+    backend = Spacing.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -208,8 +225,8 @@ def __init__(
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
 
     def __call__(
-        self, data: Mapping[Union[Hashable, str], Dict[str, np.ndarray]]
-    ) -> Dict[Union[Hashable, str], Union[np.ndarray, Dict[str, np.ndarray]]]:
+        self, data: Mapping[Union[Hashable, str], Dict[str, NdarrayOrTensor]]
+    ) -> Dict[Hashable, NdarrayOrTensor]:
         d: Dict = dict(data)
         for key, mode, padding_mode, align_corners, dtype, meta_key, meta_key_postfix in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype, self.meta_keys, self.meta_key_postfix
@@ -223,7 +240,7 @@ def __call__(
             # using affine fetched from d[affine_key]
             original_spatial_shape = d[key].shape[1:]
             d[key], old_affine, new_affine = self.spacing_transform(
-                data_array=np.asarray(d[key]),
+                data_array=d[key],
                 affine=meta_data["affine"],
                 mode=mode,
                 padding_mode=padding_mode,
@@ -246,7 +263,7 @@ def __call__(
             meta_data["affine"] = new_affine
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -266,15 +283,15 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             inverse_transform = Spacing(orig_pixdim, diagonal=self.spacing_transform.diagonal)
             # Apply inverse
             d[key], _, new_affine = inverse_transform(
-                data_array=np.asarray(d[key]),
-                affine=meta_data["affine"],
+                data_array=d[key],
+                affine=meta_data["affine"],  # type: ignore
                 mode=mode,
                 padding_mode=padding_mode,
                 align_corners=False if align_corners == "none" else align_corners,
                 dtype=dtype,
                 output_spatial_shape=orig_size,
             )
-            meta_data["affine"] = new_affine
+            meta_data["affine"] = new_affine  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -292,6 +309,8 @@ class Orientationd(MapTransform, InvertibleTransform):
     to the `affine` field of metadata which is formed by ``key_{meta_key_postfix}``.
     """
 
+    backend = Orientation.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -341,8 +360,8 @@ def __init__(
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
 
     def __call__(
-        self, data: Mapping[Union[Hashable, str], Dict[str, np.ndarray]]
-    ) -> Dict[Union[Hashable, str], Union[np.ndarray, Dict[str, np.ndarray]]]:
+        self, data: Mapping[Union[Hashable, str], Dict[str, NdarrayOrTensor]]
+    ) -> Dict[Hashable, NdarrayOrTensor]:
         d: Dict = dict(data)
         for key, meta_key, meta_key_postfix in self.key_iterator(d, self.meta_keys, self.meta_key_postfix):
             meta_key = meta_key or f"{key}_{meta_key_postfix}"
@@ -355,18 +374,16 @@ def __call__(
             d[meta_key]["affine"] = new_affine
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             # Create inverse transform
-            meta_data = d[transform[InverseKeys.EXTRA_INFO]["meta_key"]]
+            meta_data: Dict = d[transform[InverseKeys.EXTRA_INFO]["meta_key"]]  # type: ignore
             orig_affine = transform[InverseKeys.EXTRA_INFO]["old_affine"]
             orig_axcodes = nib.orientations.aff2axcodes(orig_affine)
             inverse_transform = Orientation(
-                axcodes=orig_axcodes,
-                as_closest_canonical=False,
-                labels=self.ornt_transform.labels,
+                axcodes=orig_axcodes, as_closest_canonical=False, labels=self.ornt_transform.labels
             )
             # Apply inverse
             d[key], _, new_affine = inverse_transform(d[key], affine=meta_data["affine"])
@@ -382,6 +399,8 @@ class Rotate90d(MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rotate90`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(
         self, keys: KeysCollection, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1), allow_missing_keys: bool = False
     ) -> None:
@@ -395,14 +414,14 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.rotator = Rotate90(k, spatial_axes)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             self.push_transform(d, key)
             d[key] = self.rotator(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             _ = self.get_most_recent_transform(d, key)
@@ -411,9 +430,6 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             num_times_rotated = self.rotator.k
             num_times_to_rotate = 4 - num_times_rotated
             inverse_transform = Rotate90(num_times_to_rotate, spatial_axes)
-            # Might need to convert to numpy
-            if isinstance(d[key], torch.Tensor):
-                d[key] = torch.Tensor(d[key]).cpu().numpy()
             # Apply inverse
             d[key] = inverse_transform(d[key])
             # Remove the applied transform
@@ -429,6 +445,8 @@ class RandRotate90d(RandomizableTransform, MapTransform, InvertibleTransform):
     in the plane specified by `spatial_axes`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -461,10 +479,12 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self._rand_k = self.R.randint(self.max_k) + 1
         super().randomize(None)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Mapping[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Mapping[Hashable, NdarrayOrTensor]:
         self.randomize()
         d = dict(data)
 
+        # FIXME: here we didn't use array version `RandRotate90` transform as others, because we need
+        # to be compatible with the random status of some previous integration tests
         rotator = Rotate90(self._rand_k, self.spatial_axes)
         for key in self.key_iterator(d):
             if self._do_transform:
@@ -472,7 +492,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Mapping[Hashable, np.
             self.push_transform(d, key, extra_info={"rand_k": self._rand_k})
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -482,9 +502,6 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 num_times_rotated = transform[InverseKeys.EXTRA_INFO]["rand_k"]
                 num_times_to_rotate = 4 - num_times_rotated
                 inverse_transform = Rotate90(num_times_to_rotate, self.spatial_axes)
-                # Might need to convert to numpy
-                if isinstance(d[key], torch.Tensor):
-                    d[key] = torch.Tensor(d[key]).cpu().numpy()
                 # Apply inverse
                 d[key] = inverse_transform(d[key])
             # Remove the applied transform
@@ -520,6 +537,8 @@ class Resized(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = Resize.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -534,7 +553,7 @@ def __init__(
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.resizer = Resize(spatial_size=spatial_size, size_mode=size_mode)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, align_corners in self.key_iterator(d, self.mode, self.align_corners):
             self.push_transform(
@@ -548,7 +567,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             d[key] = self.resizer(d[key], mode=mode, align_corners=align_corners)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -557,9 +576,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
             # Create inverse transform
             inverse_transform = Resize(
-                spatial_size=orig_size,
-                mode=mode,
-                align_corners=None if align_corners == "none" else align_corners,
+                spatial_size=orig_size, mode=mode, align_corners=None if align_corners == "none" else align_corners
             )
             # Apply inverse transform
             d[key] = inverse_transform(d[key])
@@ -574,6 +591,9 @@ class Affined(MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Affine`.
     """
 
+    backend = Affine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -584,7 +604,7 @@ def __init__(
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.REFLECTION,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
         allow_missing_keys: bool = False,
     ) -> None:
@@ -621,14 +641,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`monai.transforms.compose.MapTransform`
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         self.affine = Affine(
@@ -637,15 +659,12 @@ def __init__(
             translate_params=translate_params,
             scale_params=scale_params,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
             orig_size = d[key].shape[1:]
@@ -662,7 +681,7 @@ def __call__(
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -678,10 +697,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             grid, _ = affine_grid(orig_size)  # type: ignore
 
             # Apply inverse transform
-            out = self.affine.resampler(d[key], grid, mode, padding_mode)
-
-            # Convert to numpy
-            d[key] = out if isinstance(out, np.ndarray) else out.cpu().numpy()
+            d[key] = self.affine.resampler(d[key], grid, mode, padding_mode)
 
             # Remove the applied transform
             self.pop_transform(d, key)
@@ -694,6 +710,9 @@ class RandAffined(RandomizableTransform, MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.RandAffine`.
     """
 
+    backend = RandAffine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -754,14 +773,16 @@ def __init__(
             cache_grid: whether to cache the identity sampling grid.
                 If the spatial size is not dynamically defined by input image, enabling this option could
                 accelerate the transform.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`monai.transforms.compose.MapTransform`
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -773,7 +794,6 @@ def __init__(
             scale_range=scale_range,
             spatial_size=spatial_size,
             cache_grid=cache_grid,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -786,22 +806,19 @@ def set_random_state(
         super().set_random_state(seed, state)
         return self
 
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+        self.randomize(None)
+        # all the keys share the same random Affine factor
         self.rand_affine.randomize()
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
-        d = dict(data)
-        self.randomize()
+        device = self.rand_affine.resampler.device
 
         sp_size = fall_back_tuple(self.rand_affine.spatial_size, data[self.keys[0]].shape[1:])
         # change image size or do random transform
         do_resampling = self._do_transform or (sp_size != ensure_tuple(data[self.keys[0]].shape[1:]))
-
-        # to be consistent with the self._do_transform case (dtype and device)
-        affine = torch.as_tensor(np.eye(len(sp_size) + 1), device=self.rand_affine.rand_affine_grid.device)
+        affine: torch.Tensor = torch.eye(len(sp_size) + 1, dtype=torch.float64, device=device)
+        # converting affine to tensor because the resampler currently only support torch backend
         grid = None
         if do_resampling:  # need to prepare grid
             grid = self.rand_affine.get_identity_grid(sp_size)
@@ -822,24 +839,16 @@ def __call__(
             # do the transform
             if do_resampling:
                 d[key] = self.rand_affine.resampler(d[key], grid, mode=mode, padding_mode=padding_mode)
-            # if not doing transform and and spatial size is unchanged, only need to do numpy/torch conversion
-            else:
-                if self.rand_affine.resampler.as_tensor_output and not isinstance(d[key], torch.Tensor):
-                    d[key] = torch.Tensor(d[key])
-                elif not self.rand_affine.resampler.as_tensor_output and isinstance(d[key], torch.Tensor):
-                    d[key] = d[key].detach().cpu().numpy()  # type: ignore[union-attr]
 
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             # if transform was not performed and spatial size is None, nothing to do.
-            if not transform[InverseKeys.DO_TRANSFORM] and self.rand_affine.spatial_size is None:
-                out: Union[np.ndarray, torch.Tensor] = d[key]
-            else:
+            if transform[InverseKeys.DO_TRANSFORM] or self.rand_affine.spatial_size is not None:
                 orig_size = transform[InverseKeys.ORIG_SIZE]
                 # Create inverse transform
                 fwd_affine = transform[InverseKeys.EXTRA_INFO]["affine"]
@@ -851,10 +860,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 grid, _ = affine_grid(orig_size)  # type: ignore
 
                 # Apply inverse transform
-                out = self.rand_affine.resampler(d[key], grid, mode, padding_mode)
-
-            # Convert to numpy
-            d[key] = out if isinstance(out, np.ndarray) else out.cpu().numpy()
+                d[key] = self.rand_affine.resampler(d[key], grid, mode, padding_mode)
 
             # Remove the applied transform
             self.pop_transform(d, key)
@@ -867,6 +873,9 @@ class Rand2DElasticd(RandomizableTransform, MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rand2DElastic`.
     """
 
+    backend = Rand2DElastic.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -927,14 +936,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -947,7 +958,6 @@ def __init__(
             translate_range=translate_range,
             scale_range=scale_range,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -960,17 +970,13 @@ def set_random_state(
         super().set_random_state(seed, state)
         return self
 
-    def randomize(self, spatial_size: Sequence[int]) -> None:
-        super().randomize(None)
-        self.rand_2d_elastic.randomize(spatial_size)
-
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
+        self.randomize(None)
 
         sp_size = fall_back_tuple(self.rand_2d_elastic.spatial_size, data[self.keys[0]].shape[1:])
-        self.randomize(spatial_size=sp_size)
+        # all the keys share the same random elastic factor
+        self.rand_2d_elastic.randomize(sp_size)
 
         if self._do_transform:
             grid = self.rand_2d_elastic.deform_grid(spatial_size=sp_size)
@@ -984,7 +990,8 @@ def __call__(
             )
             grid = CenterSpatialCrop(roi_size=sp_size)(grid[0])
         else:
-            grid = create_grid(spatial_size=sp_size)
+            _device = self.rand_2d_elastic.deform_grid.device
+            grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
 
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
             d[key] = self.rand_2d_elastic.resampler(d[key], grid, mode=mode, padding_mode=padding_mode)
@@ -996,6 +1003,9 @@ class Rand3DElasticd(RandomizableTransform, MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rand3DElastic`.
     """
 
+    backend = Rand3DElastic.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -1058,14 +1068,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -1078,7 +1090,6 @@ def __init__(
             translate_range=translate_range,
             scale_range=scale_range,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -1091,23 +1102,20 @@ def set_random_state(
         super().set_random_state(seed, state)
         return self
 
-    def randomize(self, grid_size: Sequence[int]) -> None:
-        super().randomize(None)
-        self.rand_3d_elastic.randomize(grid_size)
-
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
+        self.randomize(None)
+
         sp_size = fall_back_tuple(self.rand_3d_elastic.spatial_size, data[self.keys[0]].shape[1:])
+        # all the keys share the same random elastic factor
+        self.rand_3d_elastic.randomize(sp_size)
 
-        self.randomize(grid_size=sp_size)
-        grid = create_grid(spatial_size=sp_size)
+        _device = self.rand_3d_elastic.device
+        grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
         if self._do_transform:
             device = self.rand_3d_elastic.device
-            grid = torch.tensor(grid).to(device)
             gaussian = GaussianFilter(spatial_dims=3, sigma=self.rand_3d_elastic.sigma, truncated=3.0).to(device)
-            offset = torch.tensor(self.rand_3d_elastic.rand_offset, device=device).unsqueeze(0)
+            offset = torch.as_tensor(self.rand_3d_elastic.rand_offset, device=device).unsqueeze(0)
             grid[:3] += gaussian(offset)[0] * self.rand_3d_elastic.magnitude
             grid = self.rand_3d_elastic.rand_affine_grid(grid=grid)
 
@@ -1173,7 +1181,7 @@ class RandFlipd(RandomizableTransform, MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
-    backend = Flip.backend
+    backend = RandFlip.backend
 
     def __init__(
         self,
@@ -1184,16 +1192,22 @@ def __init__(
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.spatial_axis = spatial_axis
+        self.flipper = RandFlip(prob=1.0, spatial_axis=spatial_axis)
 
-        self.flipper = Flip(spatial_axis=spatial_axis)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandFlipd":
+        super().set_random_state(seed, state)
+        self.flipper.set_random_state(seed, state)
+        return self
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
-        self.randomize(None)
         d = dict(data)
+        self.randomize(None)
+
         for key in self.key_iterator(d):
             if self._do_transform:
-                d[key] = self.flipper(d[key])
+                d[key] = self.flipper(d[key], randomize=False)
             self.push_transform(d, key)
         return d
 
@@ -1204,7 +1218,7 @@ def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, Nd
             # Check if random transform was actually performed (based on `prob`)
             if transform[InverseKeys.DO_TRANSFORM]:
                 # Inverse is same as forward
-                d[key] = self.flipper(d[key])
+                d[key] = self.flipper(d[key], randomize=False)
             # Remove the applied transform
             self.pop_transform(d, key)
         return d
@@ -1224,26 +1238,30 @@ class RandAxisFlipd(RandomizableTransform, MapTransform, InvertibleTransform):
 
     """
 
-    backend = Flip.backend
+    backend = RandAxisFlip.backend
 
     def __init__(self, keys: KeysCollection, prob: float = 0.1, allow_missing_keys: bool = False) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self._axis: Optional[int] = None
+        self.flipper = RandAxisFlip(prob=1.0)
 
-    def randomize(self, data: NdarrayOrTensor) -> None:
-        super().randomize(None)
-        self._axis = self.R.randint(data.ndim - 1)
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandAxisFlipd":
+        super().set_random_state(seed, state)
+        self.flipper.set_random_state(seed, state)
+        return self
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
-        self.randomize(data=data[self.keys[0]])
-        flipper = Flip(spatial_axis=self._axis)
-
         d = dict(data)
+        self.randomize(None)
+
+        # all the keys share the same random selected axis
+        self.flipper.randomize(d[self.keys[0]])
         for key in self.key_iterator(d):
             if self._do_transform:
-                d[key] = flipper(d[key])
-            self.push_transform(d, key, extra_info={"axis": self._axis})
+                d[key] = self.flipper(d[key], randomize=False)
+            self.push_transform(d, key, extra_info={"axis": self.flipper._axis})
         return d
 
     def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
@@ -1288,6 +1306,8 @@ class Rotated(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = Rotate.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1296,7 +1316,7 @@ def __init__(
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.BORDER,
         align_corners: Union[Sequence[bool], bool] = False,
-        dtype: Union[Sequence[DtypeLike], DtypeLike] = np.float64,
+        dtype: Union[Sequence[Union[DtypeLike, torch.dtype]], Union[DtypeLike, torch.dtype]] = np.float64,
         allow_missing_keys: bool = False,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
@@ -1307,18 +1327,14 @@ def __init__(
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.dtype = ensure_tuple_rep(dtype, len(self.keys))
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype
         ):
             orig_size = d[key].shape[1:]
             d[key] = self.rotator(
-                d[key],
-                mode=mode,
-                padding_mode=padding_mode,
-                align_corners=align_corners,
-                dtype=dtype,
+                d[key], mode=mode, padding_mode=padding_mode, align_corners=align_corners, dtype=dtype
             )
             rot_mat = self.rotator.get_rotation_matrix()
             self.push_transform(
@@ -1334,7 +1350,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -1352,12 +1368,14 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 align_corners=False if align_corners == "none" else align_corners,
                 reverse_indexing=True,
             )
-            output = xform(
-                torch.as_tensor(np.ascontiguousarray(d[key]).astype(dtype)).unsqueeze(0),
-                torch.as_tensor(np.ascontiguousarray(inv_rot_mat).astype(dtype)),
-                spatial_size=transform[InverseKeys.ORIG_SIZE],
-            )
-            d[key] = np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+            img_t: torch.Tensor
+            img_t, *_ = convert_data_type(d[key], torch.Tensor, dtype=dtype)  # type: ignore
+            transform_t: torch.Tensor
+            transform_t, *_ = convert_to_dst_type(inv_rot_mat, img_t)  # type: ignore
+
+            out = xform(img_t.unsqueeze(0), transform_t, spatial_size=transform[InverseKeys.ORIG_SIZE]).squeeze(0)
+            out, *_ = convert_to_dst_type(out, dst=d[key], dtype=out.dtype)
+            d[key] = out
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1399,6 +1417,8 @@ class RandRotated(RandomizableTransform, MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = RandRotate.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1410,64 +1430,49 @@ def __init__(
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.BORDER,
         align_corners: Union[Sequence[bool], bool] = False,
-        dtype: Union[Sequence[DtypeLike], DtypeLike] = np.float64,
+        dtype: Union[Sequence[Union[DtypeLike, torch.dtype]], Union[DtypeLike, torch.dtype]] = np.float64,
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.range_x = ensure_tuple(range_x)
-        if len(self.range_x) == 1:
-            self.range_x = tuple(sorted([-self.range_x[0], self.range_x[0]]))
-        self.range_y = ensure_tuple(range_y)
-        if len(self.range_y) == 1:
-            self.range_y = tuple(sorted([-self.range_y[0], self.range_y[0]]))
-        self.range_z = ensure_tuple(range_z)
-        if len(self.range_z) == 1:
-            self.range_z = tuple(sorted([-self.range_z[0], self.range_z[0]]))
-
-        self.keep_size = keep_size
+        self.rand_rotate = RandRotate(range_x=range_x, range_y=range_y, range_z=range_z, prob=1.0, keep_size=keep_size)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.dtype = ensure_tuple_rep(dtype, len(self.keys))
 
-        self.x = 0.0
-        self.y = 0.0
-        self.z = 0.0
-
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        self.x = self.R.uniform(low=self.range_x[0], high=self.range_x[1])
-        self.y = self.R.uniform(low=self.range_y[0], high=self.range_y[1])
-        self.z = self.R.uniform(low=self.range_z[0], high=self.range_z[1])
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandRotated":
+        super().set_random_state(seed, state)
+        self.rand_rotate.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
-        self.randomize()
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
-        angle: Union[Sequence[float], float] = self.x if d[self.keys[0]].ndim == 3 else (self.x, self.y, self.z)
-        rotator = Rotate(
-            angle=angle,
-            keep_size=self.keep_size,
-        )
+        self.randomize(None)
+
+        # all the keys share the same random rotate angle
+        self.rand_rotate.randomize()
         for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype
         ):
-            orig_size = d[key].shape[1:]
             if self._do_transform:
-                d[key] = rotator(
+                d[key], rot_mat = self.rand_rotate(
                     d[key],
                     mode=mode,
                     padding_mode=padding_mode,
                     align_corners=align_corners,
                     dtype=dtype,
+                    randomize=False,
+                    get_matrix=True,
                 )
-                rot_mat = rotator.get_rotation_matrix()
             else:
                 rot_mat = np.eye(d[key].ndim)
             self.push_transform(
                 d,
                 key,
-                orig_size=orig_size,
+                orig_size=d[key].shape[1:],
                 extra_info={
                     "rot_mat": rot_mat,
                     "mode": mode.value if isinstance(mode, Enum) else mode,
@@ -1477,7 +1482,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -1497,12 +1502,14 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                     align_corners=False if align_corners == "none" else align_corners,
                     reverse_indexing=True,
                 )
-                output = xform(
-                    torch.as_tensor(np.ascontiguousarray(d[key]).astype(dtype)).unsqueeze(0),
-                    torch.as_tensor(np.ascontiguousarray(inv_rot_mat).astype(dtype)),
-                    spatial_size=transform[InverseKeys.ORIG_SIZE],
-                )
-                d[key] = np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+                img_t: torch.Tensor
+                img_t, *_ = convert_data_type(d[key], torch.Tensor, dtype=dtype)  # type: ignore
+                transform_t: torch.Tensor
+                transform_t, *_ = convert_to_dst_type(inv_rot_mat, img_t)  # type: ignore
+                output: torch.Tensor
+                out = xform(img_t.unsqueeze(0), transform_t, spatial_size=transform[InverseKeys.ORIG_SIZE]).squeeze(0)
+                out, *_ = convert_to_dst_type(out, dst=d[key], dtype=out.dtype)
+                d[key] = out
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1522,39 +1529,44 @@ class Zoomd(MapTransform, InvertibleTransform):
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of string, each element corresponds to a key in ``keys``.
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of bool or None, each element corresponds to a key in ``keys``.
         keep_size: Should keep original size (pad if needed), default is True.
         allow_missing_keys: don't raise exception if key is missing.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = Zoom.backend
+
     def __init__(
         self,
         keys: KeysCollection,
         zoom: Union[Sequence[float], float],
         mode: InterpolateModeSequence = InterpolateMode.AREA,
-        padding_mode: NumpyPadModeSequence = NumpyPadMode.EDGE,
+        padding_mode: PadModeSequence = NumpyPadMode.EDGE,
         align_corners: Union[Sequence[Optional[bool]], Optional[bool]] = None,
         keep_size: bool = True,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
-        self.zoomer = Zoom(zoom=zoom, keep_size=keep_size, **np_kwargs)
+        self.zoomer = Zoom(zoom=zoom, keep_size=keep_size, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
@@ -1568,15 +1580,10 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
                     "align_corners": align_corners if align_corners is not None else "none",
                 },
             )
-            d[key] = self.zoomer(
-                d[key],
-                mode=mode,
-                padding_mode=padding_mode,
-                align_corners=align_corners,
-            )
+            d[key] = self.zoomer(d[key], mode=mode, padding_mode=padding_mode, align_corners=align_corners)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1594,7 +1601,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 align_corners=None if align_corners == "none" else align_corners,
             )
             # Size might be out by 1 voxel so pad
-            d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])
+            d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1622,21 +1629,26 @@ class RandZoomd(RandomizableTransform, MapTransform, InvertibleTransform):
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of string, each element corresponds to a key in ``keys``.
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of bool or None, each element corresponds to a key in ``keys``.
         keep_size: Should keep original size (pad if needed), default is True.
         allow_missing_keys: don't raise exception if key is missing.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
+        kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
             more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
 
     """
 
+    backend = RandZoom.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1644,67 +1656,52 @@ def __init__(
         min_zoom: Union[Sequence[float], float] = 0.9,
         max_zoom: Union[Sequence[float], float] = 1.1,
         mode: InterpolateModeSequence = InterpolateMode.AREA,
-        padding_mode: NumpyPadModeSequence = NumpyPadMode.EDGE,
+        padding_mode: PadModeSequence = NumpyPadMode.EDGE,
         align_corners: Union[Sequence[Optional[bool]], Optional[bool]] = None,
         keep_size: bool = True,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
-        self.min_zoom = ensure_tuple(min_zoom)
-        self.max_zoom = ensure_tuple(max_zoom)
-        if len(self.min_zoom) != len(self.max_zoom):
-            raise AssertionError("min_zoom and max_zoom must have same length.")
-
+        self.rand_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, keep_size=keep_size, **kwargs)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
-        self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
-
-        self._zoom: Sequence[float] = [1.0]
 
-    def randomize(self, data: Optional[Any] = None) -> None:
-        super().randomize(None)
-        self._zoom = [self.R.uniform(l, h) for l, h in zip(self.min_zoom, self.max_zoom)]
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandZoomd":
+        super().set_random_state(seed, state)
+        self.rand_zoom.set_random_state(seed, state)
+        return self
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
-        # match the spatial dim of first item
-        self.randomize()
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
+        self.randomize(None)
 
-        img_dims = data[self.keys[0]].ndim
-        if len(self._zoom) == 1:
-            # to keep the spatial shape ratio, use same random zoom factor for all dims
-            self._zoom = ensure_tuple_rep(self._zoom[0], img_dims - 1)
-        elif len(self._zoom) == 2 and img_dims > 3:
-            # if 2 zoom factors provided for 3D data, use the first factor for H and W dims, second factor for D dim
-            self._zoom = ensure_tuple_rep(self._zoom[0], img_dims - 2) + ensure_tuple(self._zoom[-1])
-        zoomer = Zoom(self._zoom, keep_size=self.keep_size, **self.np_kwargs)
+        # all the keys share the same random zoom factor
+        self.rand_zoom.randomize(d[self.keys[0]])
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
         ):
+            if self._do_transform:
+                d[key] = self.rand_zoom(
+                    d[key], mode=mode, padding_mode=padding_mode, align_corners=align_corners, randomize=False
+                )
             self.push_transform(
                 d,
                 key,
                 extra_info={
-                    "zoom": self._zoom,
+                    "zoom": self.rand_zoom._zoom,
                     "mode": mode.value if isinstance(mode, Enum) else mode,
                     "padding_mode": padding_mode.value if isinstance(padding_mode, Enum) else padding_mode,
                     "align_corners": align_corners if align_corners is not None else "none",
                 },
             )
-            if self._do_transform:
-                d[key] = zoomer(
-                    d[key],
-                    mode=mode,
-                    padding_mode=padding_mode,
-                    align_corners=align_corners,
-                )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1715,7 +1712,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 mode = transform[InverseKeys.EXTRA_INFO]["mode"]
                 padding_mode = transform[InverseKeys.EXTRA_INFO]["padding_mode"]
                 align_corners = transform[InverseKeys.EXTRA_INFO]["align_corners"]
-                inverse_transform = Zoom(zoom=(1 / zoom).tolist(), keep_size=self.keep_size)
+                inverse_transform = Zoom(zoom=(1 / zoom).tolist(), keep_size=self.rand_zoom.keep_size)
                 # Apply inverse
                 d[key] = inverse_transform(
                     d[key],
@@ -1724,7 +1721,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                     align_corners=None if align_corners == "none" else align_corners,
                 )
                 # Size might be out by 1 voxel so pad
-                d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])
+                d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1736,6 +1733,8 @@ class AddCoordinateChannelsd(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.AddCoordinateChannels`.
     """
 
+    backend = AddCoordinateChannels.backend
+
     def __init__(self, keys: KeysCollection, spatial_channels: Sequence[int], allow_missing_keys: bool = False) -> None:
         """
         Args:
@@ -1750,15 +1749,126 @@ def __init__(self, keys: KeysCollection, spatial_channels: Sequence[int], allow_
         super().__init__(keys, allow_missing_keys)
         self.add_coordinate_channels = AddCoordinateChannels(spatial_channels)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.add_coordinate_channels(d[key])
         return d
 
 
+class GridDistortiond(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.GridDistortion`.
+    """
+
+    backend = GridDistortion.backend
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        num_cells: Union[Tuple[int], int],
+        distort_steps: List[Tuple],
+        mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
+        padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        """
+        Args:
+            keys: keys of the corresponding items to be transformed.
+            num_cells: number of grid cells on each dimension.
+            distort_steps: This argument is a list of tuples, where each tuple contains the distort steps of the
+                corresponding dimensions (in the order of H, W[, D]). The length of each tuple equals to `num_cells + 1`.
+                Each value in the tuple represents the distort step of the related cell.
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+                It also can be a sequence of string, each element corresponds to a key in ``keys``.
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"reflection"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+                It also can be a sequence of string, each element corresponds to a key in ``keys``.
+            device: device on which the tensor will be allocated.
+            allow_missing_keys: don't raise exception if key is missing.
+
+        """
+        super().__init__(keys, allow_missing_keys)
+        self.grid_distortion = GridDistortion(num_cells=num_cells, distort_steps=distort_steps, device=device)
+        self.mode = ensure_tuple_rep(mode, len(self.keys))
+        self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
+
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+        for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
+            d[key] = self.grid_distortion(d[key], mode=mode, padding_mode=padding_mode)
+        return d
+
+
+class RandGridDistortiond(RandomizableTransform, MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.RandGridDistortion`.
+    """
+
+    backend = RandGridDistortion.backend
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        num_cells: Union[Tuple[int], int] = 5,
+        prob: float = 0.1,
+        distort_limit: Union[Tuple[float, float], float] = (-0.03, 0.03),
+        mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
+        padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
+        """
+        Args:
+            keys: keys of the corresponding items to be transformed.
+            num_cells: number of grid cells on each dimension.
+            prob: probability of returning a randomized grid distortion transform. Defaults to 0.1.
+            distort_limit: range to randomly distort.
+                If single number, distort_limit is picked from (-distort_limit, distort_limit).
+                Defaults to (-0.03, 0.03).
+            mode: {``"bilinear"``, ``"nearest"``}
+                Interpolation mode to calculate output values. Defaults to ``"bilinear"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+                It also can be a sequence of string, each element corresponds to a key in ``keys``.
+            padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
+                Padding mode for outside grid values. Defaults to ``"reflection"``.
+                See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
+                It also can be a sequence of string, each element corresponds to a key in ``keys``.
+            device: device on which the tensor will be allocated.
+            allow_missing_keys: don't raise exception if key is missing.
+
+        """
+        MapTransform.__init__(self, keys, allow_missing_keys)
+        RandomizableTransform.__init__(self, prob)
+        self.rand_grid_distortion = RandGridDistortion(
+            num_cells=num_cells, prob=1.0, distort_limit=distort_limit, device=device
+        )
+        self.mode = ensure_tuple_rep(mode, len(self.keys))
+        self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
+
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandGridDistortiond":
+        super().set_random_state(seed, state)
+        self.rand_grid_distortion.set_random_state(seed, state)
+        return self
+
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
+        d = dict(data)
+        self.randomize(None)
+        if not self._do_transform:
+            return d
+
+        self.rand_grid_distortion.randomize(d[self.keys[0]].shape[1:])
+        for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
+            d[key] = self.rand_grid_distortion(d[key], mode=mode, padding_mode=padding_mode, randomize=False)
+        return d
+
+
 SpacingD = SpacingDict = Spacingd
 OrientationD = OrientationDict = Orientationd
 Rotate90D = Rotate90Dict = Rotate90d
@@ -1770,6 +1880,8 @@ def __call__(
 Rand3DElasticD = Rand3DElasticDict = Rand3DElasticd
 FlipD = FlipDict = Flipd
 RandFlipD = RandFlipDict = RandFlipd
+GridDistortionD = GridDistortionDict = GridDistortiond
+RandGridDistortionD = RandGridDistortionDict = RandGridDistortiond
 RandAxisFlipD = RandAxisFlipDict = RandAxisFlipd
 RotateD = RotateDict = Rotated
 RandRotateD = RandRotateDict = RandRotated
diff --git a/monai/transforms/transform.py b/monai/transforms/transform.py
index ef49bc706c..61794308f4 100644
--- a/monai/transforms/transform.py
+++ b/monai/transforms/transform.py
@@ -24,14 +24,7 @@
 from monai.utils import MAX_SEED, ensure_tuple
 from monai.utils.enums import TransformBackends
 
-__all__ = [
-    "ThreadUnsafe",
-    "apply_transform",
-    "Randomizable",
-    "RandomizableTransform",
-    "Transform",
-    "MapTransform",
-]
+__all__ = ["ThreadUnsafe", "apply_transform", "Randomizable", "RandomizableTransform", "Transform", "MapTransform"]
 
 ReturnType = TypeVar("ReturnType")
 
@@ -47,9 +40,9 @@ def _apply_transform(
     Otherwise `parameters` is considered as single argument to `transform`.
 
     Args:
-        transform (Callable[..., ReturnType]): a callable to be used to transform `data`.
-        parameters (Any): parameters for the `transform`.
-        unpack_parameters (bool, optional): whether to unpack parameters for `transform`. Defaults to False.
+        transform: a callable to be used to transform `data`.
+        parameters: parameters for the `transform`.
+        unpack_parameters: whether to unpack parameters for `transform`. Defaults to False.
 
     Returns:
         ReturnType: The return type of `transform`.
@@ -61,10 +54,7 @@ def _apply_transform(
 
 
 def apply_transform(
-    transform: Callable[..., ReturnType],
-    data: Any,
-    map_items: bool = True,
-    unpack_items: bool = False,
+    transform: Callable[..., ReturnType], data: Any, map_items: bool = True, unpack_items: bool = False
 ) -> Union[List[ReturnType], ReturnType]:
     """
     Transform `data` with `transform`.
@@ -74,11 +64,11 @@ def apply_transform(
     otherwise transform will be applied once with `data` as the argument.
 
     Args:
-        transform (Callable[..., ReturnType]): a callable to be used to transform `data`.
-        data (Any): an object to be transformed.
-        map_items (bool, optional): whether to apply transform to each item in `data`,
+        transform: a callable to be used to transform `data`.
+        data: an object to be transformed.
+        map_items: whether to apply transform to each item in `data`,
             if `data` is a list or tuple. Defaults to True.
-        unpack_items (bool, optional): [description]. Defaults to False.
+        unpack_items: whether to unpack parameters using `*`. Defaults to False.
 
     Raises:
         Exception: When ``transform`` raises an exception.
@@ -226,17 +216,15 @@ def __call__(self, data: Any):
         return an updated version of ``data``.
         To simplify the input validations, most of the transforms assume that
 
-        - ``data`` is a Numpy ndarray, PyTorch Tensor or string
+        - ``data`` is a Numpy ndarray, PyTorch Tensor or string,
         - the data shape can be:
 
-          #. string data without shape, `LoadImage` transform expects file paths
-          #. most of the pre-processing transforms expect: ``(num_channels, spatial_dim_1[, spatial_dim_2, ...])``,
-             except that `AddChannel` expects (spatial_dim_1[, spatial_dim_2, ...]) and
-             `AsChannelFirst` expects (spatial_dim_1[, spatial_dim_2, ...], num_channels)
-          #. most of the post-processing transforms expect
-             ``(batch_size, num_channels, spatial_dim_1[, spatial_dim_2, ...])``
+          #. string data without shape, `LoadImage` transform expects file paths,
+          #. most of the pre-/post-processing transforms expect: ``(num_channels, spatial_dim_1[, spatial_dim_2, ...])``,
+             except for example: `AddChannel` expects (spatial_dim_1[, spatial_dim_2, ...]) and
+             `AsChannelFirst` expects (spatial_dim_1[, spatial_dim_2, ...], num_channels),
 
-        - the channel dimension is not omitted even if number of channels is one
+        - the channel dimension is often not omitted even if number of channels is one.
 
         This method can optionally take additional arguments to help execute transformation operation.
 
@@ -333,18 +321,16 @@ def __call__(self, data):
 
         To simplify the input validations, this method assumes:
 
-        - ``data`` is a Python dictionary
+        - ``data`` is a Python dictionary,
         - ``data[key]`` is a Numpy ndarray, PyTorch Tensor or string, where ``key`` is an element
           of ``self.keys``, the data shape can be:
 
-          #. string data without shape, `LoadImaged` transform expects file paths
-          #. most of the pre-processing transforms expect: ``(num_channels, spatial_dim_1[, spatial_dim_2, ...])``,
-             except that `AddChanneld` expects (spatial_dim_1[, spatial_dim_2, ...]) and
+          #. string data without shape, `LoadImaged` transform expects file paths,
+          #. most of the pre-/post-processing transforms expect: ``(num_channels, spatial_dim_1[, spatial_dim_2, ...])``,
+             except for example: `AddChanneld` expects (spatial_dim_1[, spatial_dim_2, ...]) and
              `AsChannelFirstd` expects (spatial_dim_1[, spatial_dim_2, ...], num_channels)
-          #. most of the post-processing transforms expect
-             ``(batch_size, num_channels, spatial_dim_1[, spatial_dim_2, ...])``
 
-        - the channel dimension is not omitted even if number of channels is one
+        - the channel dimension is often not omitted even if number of channels is one.
 
         Raises:
             NotImplementedError: When the subclass does not override this method.
@@ -355,11 +341,7 @@ def __call__(self, data):
         """
         raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
 
-    def key_iterator(
-        self,
-        data: Dict[Hashable, Any],
-        *extra_iterables: Optional[Iterable],
-    ) -> Generator:
+    def key_iterator(self, data: Dict[Hashable, Any], *extra_iterables: Optional[Iterable]) -> Generator:
         """
         Iterate across keys and optionally extra iterables. If key is missing, exception is raised if
         `allow_missing_keys==False` (default). If `allow_missing_keys==True`, key is skipped.
diff --git a/monai/transforms/utility/array.py b/monai/transforms/utility/array.py
index 2eb6c447c6..3366f17653 100644
--- a/monai/transforms/utility/array.py
+++ b/monai/transforms/utility/array.py
@@ -31,16 +31,26 @@
     map_binary_to_indices,
     map_classes_to_indices,
 )
-from monai.transforms.utils_pytorch_numpy_unification import in1d, moveaxis
-from monai.utils import convert_to_numpy, convert_to_tensor, ensure_tuple, look_up_option, min_version, optional_import
+from monai.transforms.utils_pytorch_numpy_unification import concatenate, in1d, moveaxis, unravel_indices
+from monai.utils import (
+    convert_data_type,
+    convert_to_cupy,
+    convert_to_numpy,
+    convert_to_tensor,
+    ensure_tuple,
+    get_equivalent_dtype,
+    look_up_option,
+    min_version,
+    optional_import,
+)
 from monai.utils.enums import TransformBackends
 from monai.utils.misc import is_module_ver_at_least
-from monai.utils.type_conversion import convert_data_type
+from monai.utils.type_conversion import convert_to_dst_type
 
 PILImageImage, has_pil = optional_import("PIL.Image", name="Image")
 pil_image_fromarray, _ = optional_import("PIL.Image", name="fromarray")
 cp, has_cp = optional_import("cupy")
-cp_ndarray, _ = optional_import("cupy", name="ndarray")
+
 
 __all__ = [
     "Identity",
@@ -321,8 +331,6 @@ def __call__(self, img: NdarrayOrTensor, dtype: Optional[Union[DtypeLike, torch.
             TypeError: When ``img`` type is not in ``Union[numpy.ndarray, torch.Tensor]``.
 
         """
-        if not isinstance(img, (torch.Tensor, np.ndarray)):
-            raise TypeError(f"img must be one of (numpy.ndarray, torch.Tensor) but is {type(img).__name__}.")
         img_out, *_ = convert_data_type(img, output_type=type(img), dtype=dtype or self.dtype)
         return img_out
 
@@ -334,11 +342,16 @@ class ToTensor(Transform):
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
+    def __init__(self, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None) -> None:
+        super().__init__()
+        self.dtype = dtype
+        self.device = device
+
     def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
         """
         Apply the transform to `img` and make it contiguous.
         """
-        return convert_to_tensor(img, wrap_sequence=True)  # type: ignore
+        return convert_to_tensor(img, dtype=self.dtype, device=self.device, wrap_sequence=True)  # type: ignore
 
 
 class EnsureType(Transform):
@@ -350,19 +363,24 @@ class EnsureType(Transform):
 
     Args:
         data_type: target data type to convert, should be "tensor" or "numpy".
+        dtype: target data content type to convert, for example: np.float32, torch.float, etc.
+        device: for Tensor data type, specify the target device.
 
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __init__(self, data_type: str = "tensor") -> None:
-        data_type = data_type.lower()
-        if data_type not in ("tensor", "numpy"):
-            raise ValueError("`data type` must be 'tensor' or 'numpy'.")
-
-        self.data_type = data_type
+    def __init__(
+        self,
+        data_type: str = "tensor",
+        dtype: Optional[Union[DtypeLike, torch.dtype]] = None,
+        device: Optional[torch.device] = None,
+    ) -> None:
+        self.data_type = look_up_option(data_type.lower(), {"tensor", "numpy"})
+        self.dtype = dtype
+        self.device = device
 
-    def __call__(self, data: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, data: NdarrayOrTensor):
         """
         Args:
             data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
@@ -371,7 +389,11 @@ def __call__(self, data: NdarrayOrTensor) -> NdarrayOrTensor:
                 if applicable.
 
         """
-        return convert_to_tensor(data) if self.data_type == "tensor" else convert_to_numpy(data)  # type: ignore
+        if self.data_type == "tensor":
+            dtype_ = get_equivalent_dtype(self.dtype, torch.Tensor)
+            return convert_to_tensor(data, dtype=dtype_, device=self.device)
+        dtype_ = get_equivalent_dtype(self.dtype, np.ndarray)
+        return convert_to_numpy(data, dtype=dtype_)
 
 
 class ToNumpy(Transform):
@@ -381,27 +403,36 @@ class ToNumpy(Transform):
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
+    def __init__(self, dtype: Optional[DtypeLike] = None) -> None:
+        super().__init__()
+        self.dtype = dtype
+
     def __call__(self, img: NdarrayOrTensor) -> np.ndarray:
         """
         Apply the transform to `img` and make it contiguous.
         """
-        return convert_to_numpy(img)  # type: ignore
+        return convert_to_numpy(img, dtype=self.dtype)  # type: ignore
 
 
 class ToCupy(Transform):
     """
     Converts the input data to CuPy array, can support list or tuple of numbers, NumPy and PyTorch Tensor.
+
+    Args:
+        dtype: data type specifier. It is inferred from the input by default.
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __init__(self, dtype=None) -> None:
+        super().__init__()
+        self.dtype = dtype
+
+    def __call__(self, data: NdarrayOrTensor):
         """
-        Apply the transform to `img` and make it contiguous.
+        Create a CuPy array from `data` and make it contiguous
         """
-        if isinstance(img, torch.Tensor):
-            img = img.detach().cpu().numpy()
-        return cp.ascontiguousarray(cp.asarray(img))  # type: ignore
+        return convert_to_cupy(data, self.dtype)
 
 
 class ToPIL(Transform):
@@ -547,7 +578,7 @@ def __call__(
         lines = [f"{prefix or self.prefix} statistics:"]
 
         if self.data_type if data_type is None else data_type:
-            lines.append(f"Type: {type(img)}")
+            lines.append(f"Type: {type(img)} {img.dtype if hasattr(img, 'dtype') else None}")
         if self.data_shape if data_shape is None else data_shape:
             lines.append(f"Shape: {img.shape}")
         if self.value_range if value_range is None else value_range:
@@ -697,9 +728,7 @@ class LabelToMask(Transform):
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
     def __init__(  # pytype: disable=annotation-type-mismatch
-        self,
-        select_labels: Union[Sequence[int], int],
-        merge_channels: bool = False,
+        self, select_labels: Union[Sequence[int], int], merge_channels: bool = False
     ) -> None:  # pytype: disable=annotation-type-mismatch
         self.select_labels = ensure_tuple(select_labels)
         self.merge_channels = merge_channels
@@ -759,16 +788,18 @@ class FgBgToIndices(Transform):
 
     """
 
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+
     def __init__(self, image_threshold: float = 0.0, output_shape: Optional[Sequence[int]] = None) -> None:
         self.image_threshold = image_threshold
         self.output_shape = output_shape
 
     def __call__(
         self,
-        label: np.ndarray,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        image: Optional[NdarrayOrTensor] = None,
         output_shape: Optional[Sequence[int]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
         """
         Args:
             label: input data to compute foreground and background indices.
@@ -781,13 +812,15 @@ def __call__(
             output_shape = self.output_shape
         fg_indices, bg_indices = map_binary_to_indices(label, image, self.image_threshold)
         if output_shape is not None:
-            fg_indices = np.stack([np.unravel_index(i, output_shape) for i in fg_indices])
-            bg_indices = np.stack([np.unravel_index(i, output_shape) for i in bg_indices])
-
+            fg_indices = unravel_indices(fg_indices, output_shape)
+            bg_indices = unravel_indices(bg_indices, output_shape)
         return fg_indices, bg_indices
 
 
 class ClassesToIndices(Transform):
+
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -814,10 +847,10 @@ def __init__(
 
     def __call__(
         self,
-        label: np.ndarray,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        image: Optional[NdarrayOrTensor] = None,
         output_shape: Optional[Sequence[int]] = None,
-    ) -> List[np.ndarray]:
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             label: input data to compute the indices of every class.
@@ -826,11 +859,13 @@ def __call__(
             output_shape: expected shape of output indices. if None, use `self.output_shape` instead.
 
         """
+
         if output_shape is None:
             output_shape = self.output_shape
+        indices: List[NdarrayOrTensor]
         indices = map_classes_to_indices(label, self.num_classes, image, self.image_threshold)
         if output_shape is not None:
-            indices = [np.stack([np.unravel_index(i, output_shape) for i in array]) for array in indices]
+            indices = [unravel_indices(cls_indices, output_shape) for cls_indices in indices]
 
         return indices
 
@@ -846,13 +881,12 @@ class ConvertToMultiChannelBasedOnBratsClasses(Transform):
     """
 
     def __call__(self, img: np.ndarray) -> np.ndarray:
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         # if img has channel dim, squeeze it
         if img.ndim == 4 and img.shape[0] == 1:
             img = np.squeeze(img, axis=0)
 
-        result = []
-        # merge labels 1 (tumor non-enh) and 4 (tumor enh) to TC
-        result.append(np.logical_or(img == 1, img == 4))
+        result = [np.logical_or(img == 1, img == 4)]
         # merge labels 1 (tumor non-enh) and 4 (tumor enh) and 2 (large edema) to WT
         result.append(np.logical_or(np.logical_or(img == 1, img == 4), img == 2))
         # label 4 is ET
@@ -880,22 +914,24 @@ class AddExtremePointsChannel(Randomizable, Transform):
         ValueError: When label image is not single channel.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, background: int = 0, pert: float = 0.0) -> None:
         self._background = background
         self._pert = pert
         self._points: List[Tuple[int, ...]] = []
 
-    def randomize(self, label: np.ndarray) -> None:
+    def randomize(self, label: NdarrayOrTensor) -> None:
         self._points = get_extreme_points(label, rand_state=self.R, background=self._background, pert=self._pert)
 
     def __call__(
         self,
-        img: np.ndarray,
-        label: Optional[np.ndarray] = None,
+        img: NdarrayOrTensor,
+        label: Optional[NdarrayOrTensor] = None,
         sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor] = 3.0,
         rescale_min: float = -1.0,
         rescale_max: float = 1.0,
-    ):
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: the image that we want to add new channel to.
@@ -918,8 +954,8 @@ def __call__(
         points_image = extreme_points_to_image(
             points=self._points, label=label, sigma=sigma, rescale_min=rescale_min, rescale_max=rescale_max
         )
-
-        return np.concatenate([img, points_image], axis=0)
+        points_image, *_ = convert_to_dst_type(points_image, img)  # type: ignore
+        return concatenate((img, points_image), axis=0)
 
 
 class TorchVision:
@@ -948,6 +984,7 @@ def __call__(self, img: torch.Tensor):
             img: PyTorch Tensor data for the TorchVision transform.
 
         """
+        img, *_ = convert_data_type(img, torch.Tensor)  # type: ignore
         return self.trans(img)
 
 
@@ -978,7 +1015,7 @@ def __init__(self, orig_labels: Sequence, target_labels: Sequence, dtype: DtypeL
         self.dtype = dtype
 
     def __call__(self, img: np.ndarray):
-        img = np.asarray(img)
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         img_flat = img.flatten()
         try:
             out_flat = np.copy(img_flat).astype(self.dtype)
@@ -1019,10 +1056,7 @@ def __init__(self, ops: Sequence[Union[str, Callable]], key_prefix: str, channel
         self.channel_wise = channel_wise
 
     def __call__(
-        self,
-        img: np.ndarray,
-        meta_data: Optional[Dict] = None,
-        mask: Optional[np.ndarray] = None,
+        self, img: np.ndarray, meta_data: Optional[Dict] = None, mask: Optional[np.ndarray] = None
     ) -> Tuple[np.ndarray, Dict]:
         """
         Compute statistics for the intensity of input image.
@@ -1034,6 +1068,7 @@ def __call__(
                 mask must have the same shape as input `img`.
 
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         if meta_data is None:
             meta_data = {}
 
@@ -1044,11 +1079,11 @@ def __call__(
             img_ = img[mask]
 
         supported_ops = {
-            "mean": lambda x: np.nanmean(x),
-            "median": lambda x: np.nanmedian(x),
-            "max": lambda x: np.nanmax(x),
-            "min": lambda x: np.nanmin(x),
-            "std": lambda x: np.nanstd(x),
+            "mean": np.nanmean,
+            "median": np.nanmedian,
+            "max": np.nanmax,
+            "min": np.nanmin,
+            "std": np.nanstd,
         }
 
         def _compute(op: Callable, data: np.ndarray):
@@ -1060,7 +1095,7 @@ def _compute(op: Callable, data: np.ndarray):
         for o in self.ops:
             if isinstance(o, str):
                 o = look_up_option(o, supported_ops.keys())
-                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)
+                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)  # type: ignore
             elif callable(o):
                 meta_data[self.key_prefix + "_custom_" + str(custom_index)] = _compute(o, img_)
                 custom_index += 1
@@ -1083,6 +1118,8 @@ class ToDevice(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, device: Union[torch.device, str], **kwargs) -> None:
         """
         Args:
@@ -1099,3 +1136,78 @@ def __call__(self, img: torch.Tensor):
             raise ValueError("img must be PyTorch Tensor, consider converting img by `EnsureType` transform first.")
 
         return img.to(self.device, **self.kwargs)
+
+
+class CuCIM(Transform):
+    """
+    Wrap a non-randomized cuCIM transform, defined based on the transform name and args.
+    For randomized transforms (or randomly applying a transform) use :py:class:`monai.transforms.RandCuCIM`.
+
+    Args:
+        name: the transform name in CuCIM package
+        args: parameters for the CuCIM transform
+        kwargs: parameters for the CuCIM transform
+
+    Note:
+        CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+        Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+    """
+
+    def __init__(self, name: str, *args, **kwargs) -> None:
+        super().__init__()
+        self.transform, _ = optional_import("cucim.core.operations.expose.transform", name=name)
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self, data):
+        """
+        Args:
+            data: a CuPy array (`cupy.ndarray`) for the cuCIM transform
+
+        Returns:
+            `cupy.ndarray`
+
+        """
+        return self.transform(data, *self.args, **self.kwargs)
+
+
+class RandCuCIM(CuCIM, RandomizableTransform):
+    """
+    Wrap a randomized cuCIM transform, defined based on the transform name and args,
+    or randomly apply a non-randomized transform.
+    For deterministic non-randomized transforms use :py:class:`monai.transforms.CuCIM`.
+
+    Args:
+        name: the transform name in CuCIM package.
+        apply_prob: the probability to apply the transform (default=1.0)
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        - CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+          Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+        - If the cuCIM transform is already randomized the `apply_prob` argument has nothing to do with
+          the randomness of the underlying cuCIM transform. `apply_prob` defines if the transform (either randomized
+          or non-randomized) being applied randomly, so it can apply non-randomized tranforms randomly but be careful
+          with setting `apply_prob` to anything than 1.0 when using along with cuCIM's randomized transforms.
+        - If the random factor of the underlying cuCIM transform is not derived from `self.R`,
+          the results may not be deterministic. See Also: :py:class:`monai.transforms.Randomizable`.
+    """
+
+    def __init__(self, name: str, apply_prob: float = 1.0, *args, **kwargs) -> None:
+        CuCIM.__init__(self, name, *args, **kwargs)
+        RandomizableTransform.__init__(self, prob=apply_prob)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: a CuPy array (`cupy.ndarray`) for the cuCIM transform
+
+        Returns:
+            `cupy.ndarray`
+
+        """
+        self.randomize(data)
+        if not self._do_transform:
+            return data
+        return super().__call__(data)
diff --git a/monai/transforms/utility/dictionary.py b/monai/transforms/utility/dictionary.py
index e9bcce93b0..84ff98abf6 100644
--- a/monai/transforms/utility/dictionary.py
+++ b/monai/transforms/utility/dictionary.py
@@ -15,8 +15,8 @@
 Class names are ended with 'd' to denote dictionary-based transforms.
 """
 
-import copy
 import logging
+import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
 
@@ -30,11 +30,13 @@
 from monai.transforms.transform import MapTransform, Randomizable, RandomizableTransform
 from monai.transforms.utility.array import (
     AddChannel,
+    AddExtremePointsChannel,
     AsChannelFirst,
     AsChannelLast,
     CastToType,
     ClassesToIndices,
     ConvertToMultiChannelBasedOnBratsClasses,
+    CuCIM,
     DataStats,
     EnsureChannelFirst,
     EnsureType,
@@ -58,8 +60,10 @@
     Transpose,
 )
 from monai.transforms.utils import extreme_points_to_image, get_extreme_points
+from monai.transforms.utils_pytorch_numpy_unification import concatenate
 from monai.utils import convert_to_numpy, ensure_tuple, ensure_tuple_rep
 from monai.utils.enums import InverseKeys, TransformBackends
+from monai.utils.type_conversion import convert_to_dst_type
 
 __all__ = [
     "AddChannelD",
@@ -86,6 +90,9 @@
     "CopyItemsD",
     "CopyItemsDict",
     "CopyItemsd",
+    "CuCIMd",
+    "CuCIMD",
+    "CuCIMDict",
     "DataStatsD",
     "DataStatsDict",
     "DataStatsd",
@@ -116,6 +123,9 @@
     "MapLabelValueD",
     "MapLabelValueDict",
     "MapLabelValued",
+    "RandCuCIMd",
+    "RandCuCIMD",
+    "RandCuCIMDict",
     "RandLambdaD",
     "RandLambdaDict",
     "RandLambdad",
@@ -442,15 +452,23 @@ class ToTensord(MapTransform, InvertibleTransform):
 
     backend = ToTensor.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self,
+        keys: KeysCollection,
+        dtype: Optional[torch.dtype] = None,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
+            dtype: target data content type to convert, for example: torch.float, etc.
+            device: specify the target device to put the Tensor data.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToTensor()
+        self.converter = ToTensor(dtype=dtype, device=device)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -486,16 +504,25 @@ class EnsureTyped(MapTransform, InvertibleTransform):
 
     backend = EnsureType.backend
 
-    def __init__(self, keys: KeysCollection, data_type: str = "tensor", allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self,
+        keys: KeysCollection,
+        data_type: str = "tensor",
+        dtype: Optional[Union[DtypeLike, torch.dtype]] = None,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
             data_type: target data type to convert, should be "tensor" or "numpy".
+            dtype: target data content type to convert, for example: np.float32, torch.float, etc.
+            device: for Tensor data type, specify the target device.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = EnsureType(data_type=data_type)
+        self.converter = EnsureType(data_type=data_type, dtype=dtype, device=device)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -522,15 +549,18 @@ class ToNumpyd(MapTransform):
 
     backend = ToNumpy.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self, keys: KeysCollection, dtype: Optional[DtypeLike] = None, allow_missing_keys: bool = False
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
+            dtype: target data type when converting to numpy array.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToNumpy()
+        self.converter = ToNumpy(dtype=dtype)
 
     def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = dict(data)
@@ -542,19 +572,19 @@ def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
 class ToCupyd(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.ToCupy`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        dtype: data type specifier. It is inferred from the input by default.
+        allow_missing_keys: don't raise exception if key is missing.
     """
 
     backend = ToCupy.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
-        """
-        Args:
-            keys: keys of the corresponding items to be transformed.
-                See also: :py:class:`monai.transforms.compose.MapTransform`
-            allow_missing_keys: don't raise exception if key is missing.
-        """
+    def __init__(self, keys: KeysCollection, dtype=None, allow_missing_keys: bool = False) -> None:
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToCupy()
+        self.converter = ToCupy(dtype=dtype)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -630,8 +660,33 @@ class DeleteItemsd(MapTransform):
     It will remove the key-values and copy the others to construct a new dictionary.
     """
 
+    def __init__(self, keys: KeysCollection, sep: str = ".", use_re: Union[Sequence[bool], bool] = False) -> None:
+        """
+        Args:
+            keys: keys of the corresponding items to delete, can be "A{sep}B{sep}C"
+                to delete key `C` in nested dictionary, `C` can be regular expression.
+                See also: :py:class:`monai.transforms.compose.MapTransform`
+            sep: the separator tag to define nested dictionary keys, default to ".".
+            use_re: whether the specified key is a regular expression, it also can be
+                a list of bool values, map the to keys.
+        """
+        super().__init__(keys)
+        self.sep = sep
+        self.use_re = ensure_tuple_rep(use_re, len(self.keys))
+
     def __call__(self, data):
-        return {key: val for key, val in data.items() if key not in self.key_iterator(data)}
+        def _delete_item(keys, d, use_re: bool = False):
+            key = keys[0]
+            if len(keys) > 1:
+                d[key] = _delete_item(keys[1:], d[key], use_re)
+                return d
+            return {k: v for k, v in d.items() if (use_re and not re.search(key, k)) or (not use_re and k != key)}
+
+        d = dict(data)
+        for key, use_re in zip(self.keys, self.use_re):
+            d = _delete_item(key.split(self.sep), d, use_re)
+
+        return d
 
 
 class SelectItemsd(MapTransform):
@@ -641,8 +696,7 @@ class SelectItemsd(MapTransform):
     """
 
     def __call__(self, data):
-        result = {key: data[key] for key in self.key_iterator(data)}
-        return result
+        return {key: data[key] for key in self.key_iterator(data)}
 
 
 class SqueezeDimd(MapTransform):
@@ -728,15 +782,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         for key, prefix, data_type, data_shape, value_range, data_value, additional_info in self.key_iterator(
             d, self.prefix, self.data_type, self.data_shape, self.value_range, self.data_value, self.additional_info
         ):
-            d[key] = self.printer(
-                d[key],
-                prefix,
-                data_type,
-                data_shape,
-                value_range,
-                data_value,
-                additional_info,
-            )
+            d[key] = self.printer(d[key], prefix, data_type, data_shape, value_range, data_value, additional_info)
         return d
 
 
@@ -825,7 +871,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
                 if isinstance(val, torch.Tensor):
                     d[new_key] = val.detach().clone()
                 else:
-                    d[new_key] = copy.deepcopy(val)
+                    d[new_key] = deepcopy(val)
         return d
 
 
@@ -1059,6 +1105,8 @@ class FgBgToIndicesd(MapTransform):
 
     """
 
+    backend = FgBgToIndices.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1075,7 +1123,7 @@ def __init__(
         self.image_key = image_key
         self.converter = FgBgToIndices(image_threshold, output_shape)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         image = d[self.image_key] if self.image_key else None
         for key in self.key_iterator(d):
@@ -1103,6 +1151,8 @@ class ClassesToIndicesd(MapTransform):
 
     """
 
+    backend = ClassesToIndices.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1118,7 +1168,7 @@ def __init__(
         self.image_key = image_key
         self.converter = ClassesToIndices(num_classes, image_threshold, output_shape)
 
-    def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, Any]):
         d = dict(data)
         image = d[self.image_key] if self.image_key else None
         for key in self.key_iterator(d):
@@ -1168,6 +1218,8 @@ class AddExtremePointsChanneld(Randomizable, MapTransform):
 
     """
 
+    backend = AddExtremePointsChannel.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1188,10 +1240,10 @@ def __init__(
         self.rescale_min = rescale_min
         self.rescale_max = rescale_max
 
-    def randomize(self, label: np.ndarray) -> None:
+    def randomize(self, label: NdarrayOrTensor) -> None:
         self.points = get_extreme_points(label, rand_state=self.R, background=self.background, pert=self.pert)
 
-    def __call__(self, data):
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         label = d[self.label_key]
         if label.shape[0] != 1:
@@ -1209,7 +1261,8 @@ def __call__(self, data):
                 rescale_min=self.rescale_min,
                 rescale_max=self.rescale_max,
             )
-            d[key] = np.concatenate([img, points_image], axis=0)
+            points_image, *_ = convert_to_dst_type(points_image, img)  # type: ignore
+            d[key] = concatenate([img, points_image], axis=0)
         return d
 
 
@@ -1223,14 +1276,7 @@ class TorchVisiond(MapTransform):
         data to be dict of PyTorch Tensors, users can easily call `ToTensord` transform to convert Numpy to Tensor.
     """
 
-    def __init__(
-        self,
-        keys: KeysCollection,
-        name: str,
-        allow_missing_keys: bool = False,
-        *args,
-        **kwargs,
-    ) -> None:
+    def __init__(self, keys: KeysCollection, name: str, allow_missing_keys: bool = False, *args, **kwargs) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
@@ -1267,14 +1313,7 @@ class RandTorchVisiond(Randomizable, MapTransform):
 
     """
 
-    def __init__(
-        self,
-        keys: KeysCollection,
-        name: str,
-        allow_missing_keys: bool = False,
-        *args,
-        **kwargs,
-    ) -> None:
+    def __init__(self, keys: KeysCollection, name: str, allow_missing_keys: bool = False, *args, **kwargs) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
@@ -1389,9 +1428,7 @@ def __call__(self, data) -> Dict[Hashable, np.ndarray]:
         ):
             meta_key = meta_key or f"{key}_{meta_key_postfix}"
             d[key], d[meta_key] = self.stats(
-                img=d[key],
-                meta_data=d.get(meta_key),
-                mask=d.get(mask_key) if mask_key is not None else None,
+                img=d[key], meta_data=d.get(meta_key), mask=d.get(mask_key) if mask_key is not None else None
             )
         return d
 
@@ -1401,12 +1438,10 @@ class ToDeviced(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.ToDevice`.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
-        self,
-        keys: KeysCollection,
-        device: Union[torch.device, str],
-        allow_missing_keys: bool = False,
-        **kwargs,
+        self, keys: KeysCollection, device: Union[torch.device, str], allow_missing_keys: bool = False, **kwargs
     ) -> None:
         """
         Args:
@@ -1427,6 +1462,87 @@ def __call__(self, data: Mapping[Hashable, torch.Tensor]) -> Dict[Hashable, torc
         return d
 
 
+class CuCIMd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.CuCIM` for non-randomized transforms.
+    For randomized transforms of CuCIM use :py:class:`monai.transforms.RandCuCIMd`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        name: The transform name in CuCIM package.
+        allow_missing_keys: don't raise exception if key is missing.
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        CuCIM transforms only work with CuPy arrays, this transform expects input data to be `cupy.ndarray`.
+        Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+    """
+
+    def __init__(self, keys: KeysCollection, name: str, allow_missing_keys: bool = False, *args, **kwargs) -> None:
+        super().__init__(keys=keys, allow_missing_keys=allow_missing_keys)
+        self.trans = CuCIM(name, *args, **kwargs)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: Dict[Hashable, `cupy.ndarray`]
+
+        Returns:
+            Dict[Hashable, `cupy.ndarray`]
+
+        """
+        d = dict(data)
+        for key in self.key_iterator(d):
+            d[key] = self.trans(d[key])
+        return d
+
+
+class RandCuCIMd(CuCIMd, RandomizableTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.CuCIM` for randomized transforms.
+    For deterministic non-randomized transforms of CuCIM use :py:class:`monai.transforms.CuCIMd`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        name: The transform name in CuCIM package.
+        apply_prob: the probability to apply the transform (default=1.0)
+        allow_missing_keys: don't raise exception if key is missing.
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        - CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+          Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+        - If the cuCIM transform is already randomized the `apply_prob` argument has nothing to do with
+          the randomness of the underlying cuCIM transform. `apply_prob` defines if the transform (either randomized
+          or non-randomized) being applied randomly, so it can apply non-randomized tranforms randomly but be careful
+          with setting `apply_prob` to anything than 1.0 when using along with cuCIM's randomized transforms.
+        - If the random factor of the underlying cuCIM transform is not derived from `self.R`,
+          the results may not be deterministic. See Also: :py:class:`monai.transforms.Randomizable`.
+    """
+
+    def __init__(self, apply_prob: float = 1.0, *args, **kwargs) -> None:
+        CuCIMd.__init__(self, *args, **kwargs)
+        RandomizableTransform.__init__(self, prob=apply_prob)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: Dict[Hashable, `cupy.ndarray`]
+
+        Returns:
+            Dict[Hashable, `cupy.ndarray`]
+
+        """
+        self.randomize(data)
+        if not self._do_transform:
+            return dict(data)
+        return super().__call__(data)
+
+
 IdentityD = IdentityDict = Identityd
 AsChannelFirstD = AsChannelFirstDict = AsChannelFirstd
 AsChannelLastD = AsChannelLastDict = AsChannelLastd
@@ -1463,3 +1579,5 @@ def __call__(self, data: Mapping[Hashable, torch.Tensor]) -> Dict[Hashable, torc
 MapLabelValueD = MapLabelValueDict = MapLabelValued
 IntensityStatsD = IntensityStatsDict = IntensityStatsd
 ToDeviceD = ToDeviceDict = ToDeviced
+CuCIMD = CuCIMDict = CuCIMd
+RandCuCIMD = RandCuCIMDict = RandCuCIMd
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
index 30aa5e7b99..68f96b13f5 100644
--- a/monai/transforms/utils.py
+++ b/monai/transforms/utils.py
@@ -20,26 +20,39 @@
 import torch
 
 import monai
-import monai.transforms.transform
 from monai.config import DtypeLike, IndexSelection
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.networks.layers import GaussianFilter
 from monai.transforms.compose import Compose, OneOf
-from monai.transforms.transform import MapTransform, Transform
+from monai.transforms.transform import MapTransform, Transform, apply_transform
+from monai.transforms.utils_pytorch_numpy_unification import (
+    any_np_pt,
+    cumsum,
+    isfinite,
+    nonzero,
+    ravel,
+    searchsorted,
+    unravel_index,
+    where,
+)
 from monai.utils import (
     GridSampleMode,
     InterpolateMode,
     InverseKeys,
+    NumpyPadMode,
+    PytorchPadMode,
+    deprecated_arg,
     ensure_tuple,
     ensure_tuple_rep,
     ensure_tuple_size,
     fall_back_tuple,
     issequenceiterable,
+    look_up_option,
     min_version,
     optional_import,
 )
 from monai.utils.enums import TransformBackends
-from monai.utils.type_conversion import convert_data_type
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
 ndimage, _ = optional_import("scipy.ndimage")
@@ -84,6 +97,7 @@
     "get_number_image_type_conversions",
     "get_transform_backends",
     "print_transform_backends",
+    "convert_pad_mode",
 ]
 
 
@@ -175,11 +189,7 @@ def rescale_array_int_max(arr: np.ndarray, dtype: DtypeLike = np.uint16) -> np.n
 
 
 def copypaste_arrays(
-    src_shape,
-    dest_shape,
-    srccenter: Sequence[int],
-    destcenter: Sequence[int],
-    dims: Sequence[Optional[int]],
+    src_shape, dest_shape, srccenter: Sequence[int], destcenter: Sequence[int], dims: Sequence[Optional[int]]
 ) -> Tuple[Tuple[slice, ...], Tuple[slice, ...]]:
     """
     Calculate the slices to copy a sliced area of array in `src_shape` into array in `dest_shape`.
@@ -256,10 +266,8 @@ def resize_center(img: np.ndarray, *resize_dims: Optional[int], fill_value: floa
 
 
 def map_binary_to_indices(
-    label: np.ndarray,
-    image: Optional[np.ndarray] = None,
-    image_threshold: float = 0.0,
-) -> Tuple[np.ndarray, np.ndarray]:
+    label: NdarrayOrTensor, image: Optional[NdarrayOrTensor] = None, image_threshold: float = 0.0
+) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
     """
     Compute the foreground and background of input label data, return the indices after fattening.
     For example:
@@ -272,28 +280,32 @@ def map_binary_to_indices(
             to define background. so the output items will not map to all the voxels in the label.
         image_threshold: if enabled `image`, use ``image > image_threshold`` to
             determine the valid image content area and select background only in this area.
-
     """
+
     # Prepare fg/bg indices
     if label.shape[0] > 1:
         label = label[1:]  # for One-Hot format data, remove the background channel
-    label_flat = np.any(label, axis=0).ravel()  # in case label has multiple dimensions
-    fg_indices = np.nonzero(label_flat)[0]
+    label_flat = ravel(any_np_pt(label, 0))  # in case label has multiple dimensions
+    fg_indices = nonzero(label_flat)
     if image is not None:
-        img_flat = np.any(image > image_threshold, axis=0).ravel()
-        bg_indices = np.nonzero(np.logical_and(img_flat, ~label_flat))[0]
+        img_flat = ravel(any_np_pt(image > image_threshold, 0))
+        img_flat, *_ = convert_to_dst_type(img_flat, label, dtype=img_flat.dtype)
+        bg_indices = nonzero(img_flat & ~label_flat)
     else:
-        bg_indices = np.nonzero(~label_flat)[0]
+        bg_indices = nonzero(~label_flat)
 
+    # no need to save the indices in GPU, otherwise, still need to move to CPU at runtime when crop by indices
+    fg_indices, *_ = convert_data_type(fg_indices, device=torch.device("cpu"))
+    bg_indices, *_ = convert_data_type(bg_indices, device=torch.device("cpu"))
     return fg_indices, bg_indices
 
 
 def map_classes_to_indices(
-    label: np.ndarray,
+    label: NdarrayOrTensor,
     num_classes: Optional[int] = None,
-    image: Optional[np.ndarray] = None,
+    image: Optional[NdarrayOrTensor] = None,
     image_threshold: float = 0.0,
-) -> List[np.ndarray]:
+) -> List[NdarrayOrTensor]:
     """
     Filter out indices of every class of the input label data, return the indices after fattening.
     It can handle both One-Hot format label and Argmax format label, must provide `num_classes` for
@@ -313,11 +325,11 @@ def map_classes_to_indices(
             determine the valid image content area and select class indices only in this area.
 
     """
-    img_flat: Optional[np.ndarray] = None
+    img_flat: Optional[NdarrayOrTensor] = None
     if image is not None:
-        img_flat = np.any(image > image_threshold, axis=0).ravel()
+        img_flat = ravel((image > image_threshold).any(0))
 
-    indices: List[np.ndarray] = []
+    indices: List[NdarrayOrTensor] = []
     # assuming the first dimension is channel
     channels = len(label)
 
@@ -328,16 +340,18 @@ def map_classes_to_indices(
         num_classes_ = num_classes
 
     for c in range(num_classes_):
-        label_flat = np.any(label[c : c + 1] if channels > 1 else label == c, axis=0).ravel()
-        label_flat = np.logical_and(img_flat, label_flat) if img_flat is not None else label_flat
-        indices.append(np.nonzero(label_flat)[0])
+        label_flat = ravel(any_np_pt(label[c : c + 1] if channels > 1 else label == c, 0))
+        label_flat = img_flat & label_flat if img_flat is not None else label_flat
+        # no need to save the indices in GPU, otherwise, still need to move to CPU at runtime when crop by indices
+        cls_indices, *_ = convert_data_type(nonzero(label_flat), device=torch.device("cpu"))
+        indices.append(cls_indices)
 
     return indices
 
 
 def weighted_patch_samples(
     spatial_size: Union[int, Sequence[int]],
-    w: np.ndarray,
+    w: NdarrayOrTensor,
     n_samples: int = 1,
     r_state: Optional[np.random.RandomState] = None,
 ) -> List:
@@ -366,34 +380,45 @@ def weighted_patch_samples(
     s = tuple(slice(w // 2, m - w + w // 2) if m > w else slice(m // 2, m // 2 + 1) for w, m in zip(win_size, img_size))
     v = w[s]  # weight map in the 'valid' mode
     v_size = v.shape
-    v = v.ravel()
-    if np.any(v < 0):
-        v -= np.min(v)  # shifting to non-negative
-    v = v.cumsum()
-    if not v[-1] or not np.isfinite(v[-1]) or v[-1] < 0:  # uniform sampling
+    v = ravel(v)
+    if (v < 0).any():
+        v -= v.min()  # shifting to non-negative
+    v = cumsum(v)
+    if not v[-1] or not isfinite(v[-1]) or v[-1] < 0:  # uniform sampling
         idx = r_state.randint(0, len(v), size=n_samples)
     else:
-        idx = v.searchsorted(r_state.random(n_samples) * v[-1], side="right")
+        r, *_ = convert_to_dst_type(r_state.random(n_samples), v)  # type: ignore
+        idx = searchsorted(v, r * v[-1], right=True)
+    idx, *_ = convert_to_dst_type(idx, v, dtype=torch.int)  # type: ignore
     # compensate 'valid' mode
     diff = np.minimum(win_size, img_size) // 2
-    return [np.unravel_index(i, v_size) + diff for i in np.asarray(idx, dtype=int)]
+    diff, *_ = convert_to_dst_type(diff, v)  # type: ignore
+    return [unravel_index(i, v_size) + diff for i in idx]
 
 
 def correct_crop_centers(
-    centers: List[np.ndarray], spatial_size: Union[Sequence[int], int], label_spatial_shape: Sequence[int]
-) -> List[np.ndarray]:
+    centers: List[Union[int, torch.Tensor]],
+    spatial_size: Union[Sequence[int], int],
+    label_spatial_shape: Sequence[int],
+    allow_smaller: bool = False,
+):
     """
     Utility to correct the crop center if the crop size is bigger than the image size.
 
     Args:
-        ceters: pre-computed crop centers, will correct based on the valid region.
+        centers: pre-computed crop centers of every dim, will correct based on the valid region.
         spatial_size: spatial size of the ROIs to be sampled.
         label_spatial_shape: spatial shape of the original label data to compare with ROI.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
 
     """
     spatial_size = fall_back_tuple(spatial_size, default=label_spatial_shape)
-    if not (np.subtract(label_spatial_shape, spatial_size) >= 0).all():
-        raise ValueError("The size of the proposed random crop ROI is larger than the image size.")
+    if any(np.subtract(label_spatial_shape, spatial_size) < 0):
+        if not allow_smaller:
+            raise ValueError("The size of the proposed random crop ROI is larger than the image size.")
+        spatial_size = tuple(min(l, s) for l, s in zip(label_spatial_shape, spatial_size))
 
     # Select subregion to assure valid roi
     valid_start = np.floor_divide(spatial_size, 2)
@@ -422,10 +447,11 @@ def generate_pos_neg_label_crop_centers(
     num_samples: int,
     pos_ratio: float,
     label_spatial_shape: Sequence[int],
-    fg_indices: np.ndarray,
-    bg_indices: np.ndarray,
+    fg_indices: NdarrayOrTensor,
+    bg_indices: NdarrayOrTensor,
     rand_state: Optional[np.random.RandomState] = None,
-) -> List[List[np.ndarray]]:
+    allow_smaller: bool = False,
+) -> List[List[int]]:
     """
     Generate valid sample locations based on the label with option for specifying foreground ratio
     Valid: samples sitting entirely within image, expected input shape: [C, H, W, D] or [C, H, W]
@@ -438,6 +464,9 @@ def generate_pos_neg_label_crop_centers(
         fg_indices: pre-computed foreground indices in 1 dimension.
         bg_indices: pre-computed background indices in 1 dimension.
         rand_state: numpy randomState object to align with other modules.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
 
     Raises:
         ValueError: When the proposed roi is larger than the image.
@@ -448,11 +477,12 @@ def generate_pos_neg_label_crop_centers(
         rand_state = np.random.random.__self__  # type: ignore
 
     centers = []
-    fg_indices, bg_indices = np.asarray(fg_indices), np.asarray(bg_indices)
-    if fg_indices.size == 0 and bg_indices.size == 0:
+    fg_indices = np.asarray(fg_indices) if isinstance(fg_indices, Sequence) else fg_indices
+    bg_indices = np.asarray(bg_indices) if isinstance(bg_indices, Sequence) else bg_indices
+    if len(fg_indices) == 0 and len(bg_indices) == 0:
         raise ValueError("No sampling location available.")
 
-    if fg_indices.size == 0 or bg_indices.size == 0:
+    if len(fg_indices) == 0 or len(bg_indices) == 0:
         warnings.warn(
             f"N foreground {len(fg_indices)}, N  background {len(bg_indices)},"
             "unable to generate class balanced samples."
@@ -462,10 +492,10 @@ def generate_pos_neg_label_crop_centers(
     for _ in range(num_samples):
         indices_to_use = fg_indices if rand_state.rand() < pos_ratio else bg_indices
         random_int = rand_state.randint(len(indices_to_use))
-        center = np.unravel_index(indices_to_use[random_int], label_spatial_shape)
+        idx = indices_to_use[random_int]
+        center = unravel_index(idx, label_spatial_shape)
         # shift center to range of valid centers
-        center_ori = list(center)
-        centers.append(correct_crop_centers(center_ori, spatial_size, label_spatial_shape))
+        centers.append(correct_crop_centers(center, spatial_size, label_spatial_shape, allow_smaller))
 
     return centers
 
@@ -474,10 +504,11 @@ def generate_label_classes_crop_centers(
     spatial_size: Union[Sequence[int], int],
     num_samples: int,
     label_spatial_shape: Sequence[int],
-    indices: List[np.ndarray],
+    indices: Sequence[NdarrayOrTensor],
     ratios: Optional[List[Union[float, int]]] = None,
     rand_state: Optional[np.random.RandomState] = None,
-) -> List[List[np.ndarray]]:
+    allow_smaller: bool = False,
+) -> List[List[int]]:
     """
     Generate valid sample locations based on the specified ratios of label classes.
     Valid: samples sitting entirely within image, expected input shape: [C, H, W, D] or [C, H, W]
@@ -490,6 +521,9 @@ def generate_label_classes_crop_centers(
         ratios: ratios of every class in the label to generate crop centers, including background class.
             if None, every class will have the same ratio to generate crop centers.
         rand_state: numpy randomState object to align with other modules.
+        allow_smaller: if `False`, an exception will be raised if the image is smaller than
+            the requested ROI in any dimension. If `True`, any smaller dimensions will be set to
+            match the cropped size (i.e., no cropping in that dimension).
 
     """
     if rand_state is None:
@@ -499,12 +533,10 @@ def generate_label_classes_crop_centers(
         raise ValueError("num_samples must be an int number and greater than 0.")
     ratios_: List[Union[float, int]] = ([1] * len(indices)) if ratios is None else ratios
     if len(ratios_) != len(indices):
-        raise ValueError("random crop radios must match the number of indices of classes.")
+        raise ValueError("random crop ratios must match the number of indices of classes.")
     if any(i < 0 for i in ratios_):
         raise ValueError("ratios should not contain negative number.")
 
-    # ensure indices are numpy array
-    indices = [np.asarray(i) for i in indices]
     for i, array in enumerate(indices):
         if len(array) == 0:
             warnings.warn(f"no available indices of class {i} to crop, set the crop ratio of this class to zero.")
@@ -516,10 +548,10 @@ def generate_label_classes_crop_centers(
         # randomly select the indices of a class based on the ratios
         indices_to_use = indices[i]
         random_int = rand_state.randint(len(indices_to_use))
-        center = np.unravel_index(indices_to_use[random_int], label_spatial_shape)
+        center = unravel_index(indices_to_use[random_int], label_spatial_shape)
         # shift center to range of valid centers
         center_ori = list(center)
-        centers.append(correct_crop_centers(center_ori, spatial_size, label_spatial_shape))
+        centers.append(correct_crop_centers(center_ori, spatial_size, label_spatial_shape, allow_smaller))
 
     return centers
 
@@ -528,7 +560,9 @@ def create_grid(
     spatial_size: Sequence[int],
     spacing: Optional[Sequence[float]] = None,
     homogeneous: bool = True,
-    dtype: DtypeLike = float,
+    dtype=float,
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
 ):
     """
     compute a `spatial_size` mesh.
@@ -538,6 +572,26 @@ def create_grid(
         spacing: same len as ``spatial_size``, defaults to 1.0 (dense grid).
         homogeneous: whether to make homogeneous coordinates.
         dtype: output grid data type.
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_grid_numpy(spatial_size, spacing, homogeneous, dtype)
+    if _backend == TransformBackends.TORCH:
+        return _create_grid_torch(spatial_size, spacing, homogeneous, dtype, device)
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_grid_numpy(
+    spatial_size: Sequence[int],
+    spacing: Optional[Sequence[float]] = None,
+    homogeneous: bool = True,
+    dtype: DtypeLike = float,
+):
+    """
+    compute a `spatial_size` mesh with the numpy API.
     """
     spacing = spacing or tuple(1.0 for _ in spatial_size)
     ranges = [np.linspace(-(d - 1.0) / 2.0 * s, (d - 1.0) / 2.0 * s, int(d)) for d, s in zip(spatial_size, spacing)]
@@ -547,23 +601,58 @@ def create_grid(
     return np.concatenate([coords, np.ones_like(coords[:1])])
 
 
+def _create_grid_torch(
+    spatial_size: Sequence[int],
+    spacing: Optional[Sequence[float]] = None,
+    homogeneous: bool = True,
+    dtype=torch.float32,
+    device: Optional[torch.device] = None,
+):
+    """
+    compute a `spatial_size` mesh with the torch API.
+    """
+    spacing = spacing or tuple(1.0 for _ in spatial_size)
+    ranges = [
+        torch.linspace(-(d - 1.0) / 2.0 * s, (d - 1.0) / 2.0 * s, int(d), device=device, dtype=dtype)
+        for d, s in zip(spatial_size, spacing)
+    ]
+    coords = torch.meshgrid(*ranges)
+    if not homogeneous:
+        return torch.stack(coords)
+    return torch.stack([*coords, torch.ones_like(coords[0])])
+
+
 def create_control_grid(
-    spatial_shape: Sequence[int], spacing: Sequence[float], homogeneous: bool = True, dtype: DtypeLike = float
+    spatial_shape: Sequence[int],
+    spacing: Sequence[float],
+    homogeneous: bool = True,
+    dtype: DtypeLike = float,
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
 ):
     """
     control grid with two additional point in each direction
     """
+    torch_backend = look_up_option(backend, TransformBackends) == TransformBackends.TORCH
+    ceil_func: Callable = torch.ceil if torch_backend else np.ceil  # type: ignore
     grid_shape = []
     for d, s in zip(spatial_shape, spacing):
-        d = int(d)
+        d = torch.as_tensor(d, device=device) if torch_backend else int(d)  # type: ignore
         if d % 2 == 0:
-            grid_shape.append(np.ceil((d - 1.0) / (2.0 * s) + 0.5) * 2.0 + 2.0)
+            grid_shape.append(ceil_func((d - 1.0) / (2.0 * s) + 0.5) * 2.0 + 2.0)
         else:
-            grid_shape.append(np.ceil((d - 1.0) / (2.0 * s)) * 2.0 + 3.0)
-    return create_grid(grid_shape, spacing, homogeneous, dtype)
+            grid_shape.append(ceil_func((d - 1.0) / (2.0 * s)) * 2.0 + 3.0)
+    return create_grid(
+        spatial_size=grid_shape, spacing=spacing, homogeneous=homogeneous, dtype=dtype, device=device, backend=backend
+    )
 
 
-def create_rotate(spatial_dims: int, radians: Union[Sequence[float], float]) -> np.ndarray:
+def create_rotate(
+    spatial_dims: int,
+    radians: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a 2D or 3D rotation matrix
 
@@ -572,48 +661,83 @@ def create_rotate(spatial_dims: int, radians: Union[Sequence[float], float]) ->
         radians: rotation radians
             when spatial_dims == 3, the `radians` sequence corresponds to
             rotation in the 1st, 2nd, and 3rd dim respectively.
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
 
     Raises:
         ValueError: When ``radians`` is empty.
         ValueError: When ``spatial_dims`` is not one of [2, 3].
 
     """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_rotate(
+            spatial_dims=spatial_dims, radians=radians, sin_func=np.sin, cos_func=np.cos, eye_func=np.eye
+        )
+    if _backend == TransformBackends.TORCH:
+        return _create_rotate(
+            spatial_dims=spatial_dims,
+            radians=radians,
+            sin_func=lambda th: torch.sin(torch.as_tensor(th, dtype=torch.float32, device=device)),
+            cos_func=lambda th: torch.cos(torch.as_tensor(th, dtype=torch.float32, device=device)),
+            eye_func=lambda rank: torch.eye(rank, device=device),
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_rotate(
+    spatial_dims: int,
+    radians: Union[Sequence[float], float],
+    sin_func: Callable = np.sin,
+    cos_func: Callable = np.cos,
+    eye_func: Callable = np.eye,
+) -> NdarrayOrTensor:
     radians = ensure_tuple(radians)
     if spatial_dims == 2:
         if len(radians) >= 1:
-            sin_, cos_ = np.sin(radians[0]), np.cos(radians[0])
-            return np.array([[cos_, -sin_, 0.0], [sin_, cos_, 0.0], [0.0, 0.0, 1.0]])
+            sin_, cos_ = sin_func(radians[0]), cos_func(radians[0])
+            out = eye_func(3)
+            out[0, 0], out[0, 1] = cos_, -sin_
+            out[1, 0], out[1, 1] = sin_, cos_
+            return out  # type: ignore
         raise ValueError("radians must be non empty.")
 
     if spatial_dims == 3:
         affine = None
         if len(radians) >= 1:
-            sin_, cos_ = np.sin(radians[0]), np.cos(radians[0])
-            affine = np.array(
-                [[1.0, 0.0, 0.0, 0.0], [0.0, cos_, -sin_, 0.0], [0.0, sin_, cos_, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            sin_, cos_ = sin_func(radians[0]), cos_func(radians[0])
+            affine = eye_func(4)
+            affine[1, 1], affine[1, 2] = cos_, -sin_
+            affine[2, 1], affine[2, 2] = sin_, cos_
         if len(radians) >= 2:
-            sin_, cos_ = np.sin(radians[1]), np.cos(radians[1])
+            sin_, cos_ = sin_func(radians[1]), cos_func(radians[1])
             if affine is None:
                 raise ValueError("Affine should be a matrix.")
-            affine = affine @ np.array(
-                [[cos_, 0.0, sin_, 0.0], [0.0, 1.0, 0.0, 0.0], [-sin_, 0.0, cos_, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            _affine = eye_func(4)
+            _affine[0, 0], _affine[0, 2] = cos_, sin_
+            _affine[2, 0], _affine[2, 2] = -sin_, cos_
+            affine = affine @ _affine
         if len(radians) >= 3:
-            sin_, cos_ = np.sin(radians[2]), np.cos(radians[2])
+            sin_, cos_ = sin_func(radians[2]), cos_func(radians[2])
             if affine is None:
                 raise ValueError("Affine should be a matrix.")
-            affine = affine @ np.array(
-                [[cos_, -sin_, 0.0, 0.0], [sin_, cos_, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            _affine = eye_func(4)
+            _affine[0, 0], _affine[0, 1] = cos_, -sin_
+            _affine[1, 0], _affine[1, 1] = sin_, cos_
+            affine = affine @ _affine
         if affine is None:
             raise ValueError("radians must be non empty.")
-        return affine
+        return affine  # type: ignore
 
     raise ValueError(f"Unsupported spatial_dims: {spatial_dims}, available options are [2, 3].")
 
 
-def create_shear(spatial_dims: int, coefs: Union[Sequence[float], float]) -> np.ndarray:
+def create_shear(
+    spatial_dims: int,
+    coefs: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a shearing matrix
 
@@ -629,55 +753,113 @@ def create_shear(spatial_dims: int, coefs: Union[Sequence[float], float]) -> np.
                     [0.0, 0.0, 0.0, 1.0],
                 ]
 
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+
     Raises:
         NotImplementedError: When ``spatial_dims`` is not one of [2, 3].
 
     """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_shear(spatial_dims=spatial_dims, coefs=coefs, eye_func=np.eye)
+    if _backend == TransformBackends.TORCH:
+        return _create_shear(
+            spatial_dims=spatial_dims, coefs=coefs, eye_func=lambda rank: torch.eye(rank, device=device)
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_shear(spatial_dims: int, coefs: Union[Sequence[float], float], eye_func=np.eye) -> NdarrayOrTensor:
     if spatial_dims == 2:
         coefs = ensure_tuple_size(coefs, dim=2, pad_val=0.0)
-        return np.array([[1, coefs[0], 0.0], [coefs[1], 1.0, 0.0], [0.0, 0.0, 1.0]])
+        out = eye_func(3)
+        out[0, 1], out[1, 0] = coefs[0], coefs[1]
+        return out  # type: ignore
     if spatial_dims == 3:
         coefs = ensure_tuple_size(coefs, dim=6, pad_val=0.0)
-        return np.array(
-            [
-                [1.0, coefs[0], coefs[1], 0.0],
-                [coefs[2], 1.0, coefs[3], 0.0],
-                [coefs[4], coefs[5], 1.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0],
-            ]
-        )
+        out = eye_func(4)
+        out[0, 1], out[0, 2] = coefs[0], coefs[1]
+        out[1, 0], out[1, 2] = coefs[2], coefs[3]
+        out[2, 0], out[2, 1] = coefs[4], coefs[5]
+        return out  # type: ignore
     raise NotImplementedError("Currently only spatial_dims in [2, 3] are supported.")
 
 
-def create_scale(spatial_dims: int, scaling_factor: Union[Sequence[float], float]):
+def create_scale(
+    spatial_dims: int,
+    scaling_factor: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a scaling matrix
 
     Args:
         spatial_dims: spatial rank
         scaling_factor: scaling factors for every spatial dim, defaults to 1.
-    """
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_scale(spatial_dims=spatial_dims, scaling_factor=scaling_factor, array_func=np.diag)
+    if _backend == TransformBackends.TORCH:
+        return _create_scale(
+            spatial_dims=spatial_dims,
+            scaling_factor=scaling_factor,
+            array_func=lambda x: torch.diag(torch.as_tensor(x, device=device)),
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_scale(
+    spatial_dims: int, scaling_factor: Union[Sequence[float], float], array_func=np.diag
+) -> NdarrayOrTensor:
     scaling_factor = ensure_tuple_size(scaling_factor, dim=spatial_dims, pad_val=1.0)
-    return np.diag(scaling_factor[:spatial_dims] + (1.0,))
+    return array_func(scaling_factor[:spatial_dims] + (1.0,))  # type: ignore
 
 
-def create_translate(spatial_dims: int, shift: Union[Sequence[float], float]) -> np.ndarray:
+def create_translate(
+    spatial_dims: int,
+    shift: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a translation matrix
 
     Args:
         spatial_dims: spatial rank
         shift: translate pixel/voxel for every spatial dim, defaults to 0.
-    """
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_translate(spatial_dims=spatial_dims, shift=shift, eye_func=np.eye, array_func=np.asarray)
+    if _backend == TransformBackends.TORCH:
+        return _create_translate(
+            spatial_dims=spatial_dims,
+            shift=shift,
+            eye_func=lambda x: torch.eye(torch.as_tensor(x), device=device),  # type: ignore
+            array_func=lambda x: torch.as_tensor(x, device=device),  # type: ignore
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_translate(
+    spatial_dims: int, shift: Union[Sequence[float], float], eye_func=np.eye, array_func=np.asarray
+) -> NdarrayOrTensor:
     shift = ensure_tuple(shift)
-    affine = np.eye(spatial_dims + 1)
+    affine = eye_func(spatial_dims + 1)
     for i, a in enumerate(shift[:spatial_dims]):
         affine[i, spatial_dims] = a
-    return np.asarray(affine)
+    return array_func(affine)  # type: ignore
 
 
 def generate_spatial_bounding_box(
-    img: np.ndarray,
+    img: NdarrayOrTensor,
     select_fn: Callable = is_positive,
     channel_indices: Optional[IndexSelection] = None,
     margin: Union[Sequence[int], int] = 0,
@@ -702,7 +884,7 @@ def generate_spatial_bounding_box(
         margin: add margin value to spatial dims of the bounding box, if only 1 value provided, use it for all dims.
     """
     data = img[list(ensure_tuple(channel_indices))] if channel_indices is not None else img
-    data = np.any(select_fn(data), axis=0)
+    data = select_fn(data).any(0)
     ndim = len(data.shape)
     margin = ensure_tuple_rep(margin, ndim)
     for m in margin:
@@ -713,13 +895,18 @@ def generate_spatial_bounding_box(
     box_end = [0] * ndim
 
     for di, ax in enumerate(itertools.combinations(reversed(range(ndim)), ndim - 1)):
-        dt = data.any(axis=ax)
-        if not np.any(dt):
+        dt = data
+        if len(ax) != 0:
+            dt = any_np_pt(dt, ax)
+
+        if not dt.any():
             # if no foreground, return all zero bounding box coords
             return [0] * ndim, [0] * ndim
 
-        min_d = max(np.argmax(dt) - margin[di], 0)
-        max_d = max(data.shape[di] - max(np.argmax(dt[::-1]) - margin[di], 0), min_d + 1)
+        arg_max = where(dt == dt.max())[0]
+        min_d = max(arg_max[0] - margin[di], 0)
+        max_d = arg_max[-1] + margin[di] + 1
+
         box_start[di], box_end[di] = min_d, max_d
 
     return box_start, box_end
@@ -804,7 +991,7 @@ def fill_holes(
 
 
 def get_extreme_points(
-    img: np.ndarray, rand_state: Optional[np.random.RandomState] = None, background: int = 0, pert: float = 0.0
+    img: NdarrayOrTensor, rand_state: Optional[np.random.RandomState] = None, background: int = 0, pert: float = 0.0
 ) -> List[Tuple[int, ...]]:
     """
     Generate extreme points from an image. These are used to generate initial segmentation
@@ -828,7 +1015,7 @@ def get_extreme_points(
     """
     if rand_state is None:
         rand_state = np.random.random.__self__  # type: ignore
-    indices = np.where(img != background)
+    indices = where(img != background)
     if np.size(indices[0]) == 0:
         raise ValueError("get_extreme_points: no foreground object in mask!")
 
@@ -840,7 +1027,9 @@ def _get_point(val, dim):
             val : value for comparison
             dim : dimension in which to look for value
         """
-        idx = rand_state.choice(np.where(indices[dim] == val)[0])
+        idx = where(indices[dim] == val)[0]
+        idx = idx.cpu() if isinstance(idx, torch.Tensor) else idx
+        idx = rand_state.choice(idx)
         pt = []
         for j in range(img.ndim):
             # add +- pert to each dimension
@@ -852,19 +1041,19 @@ def _get_point(val, dim):
 
     points = []
     for i in range(img.ndim):
-        points.append(tuple(_get_point(np.min(indices[i][...]), i)))
-        points.append(tuple(_get_point(np.max(indices[i][...]), i)))
+        points.append(tuple(_get_point(indices[i].min(), i)))
+        points.append(tuple(_get_point(indices[i].max(), i)))
 
     return points
 
 
 def extreme_points_to_image(
     points: List[Tuple[int, ...]],
-    label: np.ndarray,
+    label: NdarrayOrTensor,
     sigma: Union[Sequence[float], float, Sequence[torch.Tensor], torch.Tensor] = 0.0,
     rescale_min: float = -1.0,
     rescale_max: float = 1.0,
-):
+) -> torch.Tensor:
     """
     Please refer to :py:class:`monai.transforms.AddExtremePointsChannel` for the usage.
 
@@ -882,27 +1071,30 @@ def extreme_points_to_image(
         rescale_max: maximum value of output data.
     """
     # points to image
-    points_image = torch.zeros(label.shape[1:], dtype=torch.float)
+    # points_image = torch.zeros(label.shape[1:], dtype=torch.float)
+    points_image = torch.zeros_like(torch.as_tensor(label[0]), dtype=torch.float)
     for p in points:
         points_image[p] = 1.0
 
+    if isinstance(sigma, Sequence):
+        sigma = [torch.as_tensor(s, device=points_image.device) for s in sigma]
+    else:
+        sigma = torch.as_tensor(sigma, device=points_image.device)
+
     # add channel and add batch
     points_image = points_image.unsqueeze(0).unsqueeze(0)
     gaussian_filter = GaussianFilter(label.ndim - 1, sigma=sigma)
-    points_image = gaussian_filter(points_image).squeeze(0).detach().numpy()
+    points_image = gaussian_filter(points_image).squeeze(0).detach()
 
     # rescale the points image to [rescale_min, rescale_max]
-    min_intensity = np.min(points_image)
-    max_intensity = np.max(points_image)
+    min_intensity = points_image.min()
+    max_intensity = points_image.max()
     points_image = (points_image - min_intensity) / (max_intensity - min_intensity)
-    points_image = points_image * (rescale_max - rescale_min) + rescale_min
-    return points_image
+    return points_image * (rescale_max - rescale_min) + rescale_min
 
 
 def map_spatial_axes(
-    img_ndim: int,
-    spatial_axes: Optional[Union[Sequence[int], int]] = None,
-    channel_first: bool = True,
+    img_ndim: int, spatial_axes: Optional[Union[Sequence[int], int]] = None, channel_first: bool = True
 ) -> List[int]:
     """
     Utility to map the spatial axes to real axes in channel first/last shape.
@@ -1041,8 +1233,8 @@ def compute_divisible_spatial_size(spatial_shape: Sequence[int], k: Union[Sequen
 
 
 def equalize_hist(
-    img: np.ndarray,
-    mask: Optional[np.ndarray] = None,
+    img: NdarrayOrTensor,
+    mask: Optional[NdarrayOrTensor] = None,
     num_bins: int = 256,
     min: int = 0,
     max: int = 255,
@@ -1064,8 +1256,14 @@ def equalize_hist(
         dtype: data type of the output, default to `float32`.
 
     """
-    orig_shape = img.shape
-    hist_img = img[np.array(mask, dtype=bool)] if mask is not None else img
+    img_np: np.ndarray
+    img_np, *_ = convert_data_type(img, np.ndarray)  # type: ignore
+    mask_np: Optional[np.ndarray] = None
+    if mask is not None:
+        mask_np, *_ = convert_data_type(mask, np.ndarray)  # type: ignore
+
+    orig_shape = img_np.shape
+    hist_img = img_np[np.array(mask_np, dtype=bool)] if mask_np is not None else img_np
     if has_skimage:
         hist, bins = exposure.histogram(hist_img.flatten(), num_bins)
     else:
@@ -1077,9 +1275,9 @@ def equalize_hist(
     cum = rescale_array(arr=cum, minv=min, maxv=max)
 
     # apply linear interpolation
-    img = np.interp(img.flatten(), bins, cum)
+    img_np = np.interp(img_np.flatten(), bins, cum)
 
-    return img.reshape(orig_shape).astype(dtype)
+    return img_np.reshape(orig_shape).astype(dtype)
 
 
 class Fourier:
@@ -1088,38 +1286,70 @@ class Fourier:
     """
 
     @staticmethod
-    def shift_fourier(x: torch.Tensor, n_dims: int) -> torch.Tensor:
+    @deprecated_arg(
+        name="n_dims", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def shift_fourier(x: NdarrayOrTensor, spatial_dims: int, n_dims: Optional[int] = None) -> NdarrayOrTensor:
         """
         Applies fourier transform and shifts the zero-frequency component to the
         center of the spectrum. Only the spatial dimensions get transformed.
 
         Args:
             x: Image to transform.
-            n_dims: Number of spatial dimensions.
+            spatial_dims: Number of spatial dimensions.
+
+        .. deprecated:: 0.6.0
+            ``n_dims`` is deprecated, use ``spatial_dims`` instead.
+
         Returns
             k: K-space data.
         """
-        k: torch.Tensor = torch.fft.fftshift(
-            torch.fft.fftn(x, dim=tuple(range(-n_dims, 0))), dim=tuple(range(-n_dims, 0))
-        )
+        if n_dims is not None:
+            spatial_dims = n_dims
+        dims = tuple(range(-spatial_dims, 0))
+        k: NdarrayOrTensor
+        if isinstance(x, torch.Tensor):
+            if hasattr(torch.fft, "fftshift"):
+                k = torch.fft.fftshift(torch.fft.fftn(x, dim=dims), dim=dims)
+            else:
+                # if using old PyTorch, will convert to numpy array and return
+                k = np.fft.fftshift(np.fft.fftn(x.cpu().numpy(), axes=dims), axes=dims)
+        else:
+            k = np.fft.fftshift(np.fft.fftn(x, axes=dims), axes=dims)
         return k
 
     @staticmethod
-    def inv_shift_fourier(k: torch.Tensor, n_dims: int) -> torch.Tensor:
+    @deprecated_arg(
+        name="n_dims", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def inv_shift_fourier(k: NdarrayOrTensor, spatial_dims: int, n_dims: Optional[int] = None) -> NdarrayOrTensor:
         """
         Applies inverse shift and fourier transform. Only the spatial
         dimensions are transformed.
 
         Args:
             k: K-space data.
-            n_dims: Number of spatial dimensions.
+            spatial_dims: Number of spatial dimensions.
+
+        .. deprecated:: 0.6.0
+            ``n_dims`` is deprecated, use ``spatial_dims`` instead.
+
         Returns:
             x: Tensor in image space.
         """
-        x: torch.Tensor = torch.fft.ifftn(
-            torch.fft.ifftshift(k, dim=tuple(range(-n_dims, 0))), dim=tuple(range(-n_dims, 0))
-        ).real
-        return x
+        if n_dims is not None:
+            spatial_dims = n_dims
+        dims = tuple(range(-spatial_dims, 0))
+        out: NdarrayOrTensor
+        if isinstance(k, torch.Tensor):
+            if hasattr(torch.fft, "ifftshift"):
+                out = torch.fft.ifftn(torch.fft.ifftshift(k, dim=dims), dim=dims, norm="backward").real
+            else:
+                # if using old PyTorch, will convert to numpy array and return
+                out = np.fft.ifftn(np.fft.ifftshift(k.cpu().numpy(), axes=dims), axes=dims).real
+        else:
+            out = np.fft.ifftn(np.fft.ifftshift(k, axes=dims), axes=dims).real
+        return out
 
 
 def get_number_image_type_conversions(transform: Compose, test_data: Any, key: Optional[Hashable] = None) -> int:
@@ -1149,9 +1379,7 @@ def _get_data(obj, key):
         prev_data = _get_data(test_data, key)
         prev_type = type(prev_data)
         prev_device = prev_data.device if isinstance(prev_data, torch.Tensor) else None
-        test_data = monai.transforms.transform.apply_transform(
-            _transform, test_data, transform.map_items, transform.unpack_items
-        )
+        test_data = apply_transform(_transform, test_data, transform.map_items, transform.unpack_items)
         # every time the type or device changes, increment the counter
         curr_data = _get_data(test_data, key)
         curr_device = curr_data.device if isinstance(curr_data, torch.Tensor) else None
@@ -1178,24 +1406,30 @@ def get_transform_backends():
             continue
         unique_transforms.append(obj)
 
-        if isclass(obj) and issubclass(obj, Transform):
-            if n in [
-                "Transform",
+        if (
+            isclass(obj)
+            and issubclass(obj, Transform)
+            and n
+            not in [
+                "BatchInverseTransform",
+                "Compose",
+                "Decollated",
+                "InvertD",
                 "InvertibleTransform",
                 "Lambda",
                 "LambdaD",
-                "Compose",
-                "RandomizableTransform",
+                "MapTransform",
                 "OneOf",
-                "BatchInverseTransform",
-                "InverteD",
-            ]:
-                continue
-
-            backends[n] = [
-                TransformBackends.TORCH in obj.backend,
-                TransformBackends.NUMPY in obj.backend,
+                "PadListDataCollate",
+                "RandLambda",
+                "RandLambdaD",
+                "RandTorchVisionD",
+                "RandomizableTransform",
+                "TorchVisionD",
+                "Transform",
             ]
+        ):
+            backends[n] = [TransformBackends.TORCH in obj.backend, TransformBackends.NUMPY in obj.backend]
     return backends
 
 
@@ -1212,7 +1446,7 @@ def print_color(t, color):
         print(f"\033[{color}m{t}\033[00m")
 
     def print_table_column(name, torch, numpy, color=Colors.none):
-        print_color("{:<50} {:<8} {:<8}".format(name, torch, numpy), color)
+        print_color(f"{name:<50} {torch:<8} {numpy:<8}", color)
 
     backends = get_transform_backends()
     n_total = len(backends)
@@ -1240,5 +1474,30 @@ def print_table_column(name, torch, numpy, color=Colors.none):
     print_color(f"Number of uncategorised: {n_uncategorized}", Colors.red)
 
 
+def convert_pad_mode(dst: NdarrayOrTensor, mode: Union[NumpyPadMode, PytorchPadMode, str]):
+    """
+    Utility to convert padding mode between numpy array and PyTorch Tensor.
+
+    Args:
+        dst: target data to convert padding mode for, should be numpy array or PyTorch Tensor.
+        mode: current padding mode.
+
+    """
+    mode = mode.value if isinstance(mode, (NumpyPadMode, PytorchPadMode)) else mode
+    if isinstance(dst, torch.Tensor):
+        if mode == "wrap":
+            mode = "circular"
+        if mode == "edge":
+            mode = "replicate"
+        return look_up_option(mode, PytorchPadMode)
+    if isinstance(dst, np.ndarray):
+        if mode == "circular":
+            mode = "wrap"
+        if mode == "replicate":
+            mode = "edge"
+        return look_up_option(mode, NumpyPadMode)
+    raise ValueError(f"unsupported data type: {type(dst)}.")
+
+
 if __name__ == "__main__":
     print_transform_backends()
diff --git a/monai/transforms/utils_create_transform_ims.py b/monai/transforms/utils_create_transform_ims.py
new file mode 100644
index 0000000000..1c052e53fe
--- /dev/null
+++ b/monai/transforms/utils_create_transform_ims.py
@@ -0,0 +1,678 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pathlib
+import tempfile
+import textwrap
+from copy import deepcopy
+from glob import glob
+from typing import TYPE_CHECKING, Callable
+
+import numpy as np
+import torch
+
+from monai.apps import download_and_extract
+from monai.transforms import (
+    AddChanneld,
+    Affine,
+    Affined,
+    AsDiscrete,
+    Compose,
+    Flip,
+    Flipd,
+    LoadImaged,
+    MapTransform,
+    Orientation,
+    Orientationd,
+    Rand3DElastic,
+    Rand3DElasticd,
+    RandFlip,
+    RandFlipd,
+    Randomizable,
+    RandRotate,
+    RandRotated,
+    RandZoom,
+    RandZoomd,
+    Rotate,
+    Rotate90,
+    Rotate90d,
+    Rotated,
+    ScaleIntensity,
+    ScaleIntensityd,
+    SpatialPadd,
+    Zoom,
+    Zoomd,
+)
+from monai.transforms.croppad.array import (
+    BorderPad,
+    CenterScaleCrop,
+    CenterSpatialCrop,
+    CropForeground,
+    DivisiblePad,
+    RandCropByLabelClasses,
+    RandCropByPosNegLabel,
+    RandScaleCrop,
+    RandSpatialCrop,
+    RandSpatialCropSamples,
+    RandWeightedCrop,
+    ResizeWithPadOrCrop,
+    SpatialCrop,
+    SpatialPad,
+)
+from monai.transforms.croppad.dictionary import (
+    BorderPadd,
+    CenterScaleCropd,
+    CenterSpatialCropd,
+    CropForegroundd,
+    DivisiblePadd,
+    RandCropByLabelClassesd,
+    RandCropByPosNegLabeld,
+    RandScaleCropd,
+    RandSpatialCropd,
+    RandSpatialCropSamplesd,
+    RandWeightedCropd,
+    ResizeWithPadOrCropd,
+    SpatialCropd,
+)
+from monai.transforms.intensity.array import (
+    AdjustContrast,
+    GaussianSharpen,
+    GaussianSmooth,
+    GibbsNoise,
+    HistogramNormalize,
+    KSpaceSpikeNoise,
+    MaskIntensity,
+    NormalizeIntensity,
+    RandAdjustContrast,
+    RandBiasField,
+    RandCoarseDropout,
+    RandCoarseShuffle,
+    RandGaussianNoise,
+    RandGaussianSharpen,
+    RandGaussianSmooth,
+    RandGibbsNoise,
+    RandHistogramShift,
+    RandKSpaceSpikeNoise,
+    RandScaleIntensity,
+    RandShiftIntensity,
+    RandStdShiftIntensity,
+    ScaleIntensityRange,
+    ScaleIntensityRangePercentiles,
+    ShiftIntensity,
+    StdShiftIntensity,
+    ThresholdIntensity,
+)
+from monai.transforms.intensity.dictionary import (
+    AdjustContrastd,
+    GaussianSharpend,
+    GaussianSmoothd,
+    GibbsNoised,
+    HistogramNormalized,
+    KSpaceSpikeNoised,
+    MaskIntensityd,
+    NormalizeIntensityd,
+    RandAdjustContrastd,
+    RandBiasFieldd,
+    RandCoarseDropoutd,
+    RandCoarseShuffled,
+    RandGaussianNoised,
+    RandGaussianSharpend,
+    RandGaussianSmoothd,
+    RandGibbsNoised,
+    RandHistogramShiftd,
+    RandKSpaceSpikeNoised,
+    RandScaleIntensityd,
+    RandShiftIntensityd,
+    RandStdShiftIntensityd,
+    ScaleIntensityRanged,
+    ScaleIntensityRangePercentilesd,
+    ShiftIntensityd,
+    StdShiftIntensityd,
+    ThresholdIntensityd,
+)
+from monai.transforms.post.array import KeepLargestConnectedComponent, LabelFilter, LabelToContour
+from monai.transforms.post.dictionary import AsDiscreted, KeepLargestConnectedComponentd, LabelFilterd, LabelToContourd
+from monai.transforms.spatial.array import (
+    Rand2DElastic,
+    RandAffine,
+    RandAxisFlip,
+    RandGridDistortion,
+    RandRotate90,
+    Resize,
+    Spacing,
+)
+from monai.transforms.spatial.dictionary import (
+    Rand2DElasticd,
+    RandAffined,
+    RandAxisFlipd,
+    RandGridDistortiond,
+    RandRotate90d,
+    Resized,
+    Spacingd,
+)
+from monai.utils.enums import CommonKeys
+from monai.utils.module import optional_import
+
+if TYPE_CHECKING:
+    import matplotlib.pyplot as plt
+
+    has_matplotlib = True
+
+else:
+    plt, has_matplotlib = optional_import("matplotlib.pyplot")
+
+
+def get_data(keys):
+    """Get the example data to be used.
+
+    Use MarsAtlas as it only contains 1 image for quick download and
+    that image is parcellated.
+    """
+    cache_dir = os.environ.get("MONAI_DATA_DIRECTORY") or tempfile.mkdtemp()
+    fname = "MarsAtlas-MNI-Colin27.zip"
+    url = "https://www.dropbox.com/s/ndz8qtqblkciole/" + fname + "?dl=1"
+    out_path = os.path.join(cache_dir, "MarsAtlas-MNI-Colin27")
+    zip_path = os.path.join(cache_dir, fname)
+
+    download_and_extract(url, zip_path, out_path)
+
+    image, label = sorted(glob(os.path.join(out_path, "*.nii")))
+
+    data = {CommonKeys.IMAGE: image, CommonKeys.LABEL: label}
+
+    transforms = Compose(
+        [LoadImaged(keys), AddChanneld(keys), ScaleIntensityd(CommonKeys.IMAGE), Rotate90d(keys, spatial_axes=[0, 2])]
+    )
+    data = transforms(data)
+    max_size = max(data[keys[0]].shape)
+    padder = SpatialPadd(keys, (max_size, max_size, max_size))
+    return padder(data)
+
+
+def update_docstring(code_path, transform_name):
+    """
+    Find the documentation for a given transform and if it's missing,
+    add a pointer to the transform's example image.
+    """
+    with open(code_path) as f:
+        contents = f.readlines()
+    doc_start = None
+    for i, line in enumerate(contents):
+        # find the line containing start of the transform documentation
+        if "`" + transform_name + "`" in line:
+            doc_start = i
+            break
+    if doc_start is None:
+        raise RuntimeError("Couldn't find transform documentation")
+
+    # if image is already in docs, nothing to do
+    image_line = doc_start + 2
+    if ".. image" in contents[image_line]:
+        return
+
+    # add the line for the image and the alt text
+    contents_orig = deepcopy(contents)
+    contents.insert(
+        image_line,
+        ".. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/" + transform_name + ".png\n",
+    )
+    contents.insert(image_line + 1, "    :alt: example of " + transform_name + "\n")
+
+    # check that we've only added two lines
+    assert len(contents) == len(contents_orig) + 2
+
+    # write the updated doc to overwrite the original
+    with open(code_path, "w") as f:
+        f.writelines(contents)
+
+
+def pre_process_data(data, ndim, is_map, is_post):
+    """If transform requires 2D data, then convert to 2D"""
+    if ndim == 2:
+        for k in keys:
+            data[k] = data[k][..., data[k].shape[-1] // 2]
+    if is_post:
+        for k in keys:
+            data[k] = torch.as_tensor(data[k])
+
+    if is_map:
+        return data
+    return data[CommonKeys.LABEL] if is_post else data[CommonKeys.IMAGE]
+
+
+def get_2d_slice(image, view, is_label):
+    """If image is 3d, get the central slice. If is already 2d, return as-is.
+    If image is label, set 0 to np.nan.
+    """
+    if image.ndim == 2:
+        out = image
+    else:
+        shape = image.shape
+        slices = [slice(0, s) for s in shape]
+        _slice = shape[view] // 2
+        slices[view] = slice(_slice, _slice + 1)
+        slices = tuple(slices)
+        out = np.squeeze(image[slices], view)
+    if is_label:
+        out[out == 0] = np.nan
+    return out
+
+
+def get_stacked_2d_ims(im, is_label):
+    """Get the 3 orthogonal views and stack them into 1 image.
+    Requires that all images be same size, but this is taken care
+    of by the `SpatialPadd` earlier.
+    """
+    return [get_2d_slice(im, i, is_label) for i in range(3)]
+
+
+def get_stacked_before_after(before, after, is_label=False):
+    """Stack before and after images into 1 image if 3d.
+    Requires that before and after images be the same size.
+    """
+    return [get_stacked_2d_ims(d, is_label) for d in (before, after)]
+
+
+def save_image(images, labels, filename, transform_name, transform_args, shapes, colorbar=False):
+    """Save image to file, ensuring there's no whitespace around the edge."""
+    plt.rcParams.update({"font.family": "monospace"})
+    plt.style.use("dark_background")
+    nrow = len(images)  # before and after (should always be 2)
+    ncol = len(images[0])  # num orthogonal views (either 1 or 3)
+    # roughly estimate the height_ratios of the first:second row
+    hs = [float(r[0].shape[0]) for r in images]
+    fig = plt.figure(tight_layout=True)
+    spec = fig.add_gridspec(nrow, ncol, hspace=0, wspace=0, height_ratios=hs)
+    for row in range(nrow):
+        vmin = min(i.min() for i in images[row])
+        vmax = max(i.max() for i in images[row])
+        for col in range(ncol):
+            ax = fig.add_subplot(spec[row, col])
+            imshow = ax.imshow(images[row][col], cmap="gray", vmin=vmin, vmax=vmax)
+            ax.set_aspect("equal")
+            if colorbar and col == ncol - 1:
+                plt.colorbar(imshow, ax=ax)
+            if col == 0:
+                y_label = "After" if row else "Before"
+                y_label += ("\n" + shapes[row]) if shapes[0] != shapes[1] else ""
+                ax.set_ylabel(y_label)
+            # print yticks for the right most column
+            if col != ncol - 1 or colorbar:
+                ax.set_yticks([])
+            else:
+                ax.yaxis.tick_right()
+                for n, label in enumerate(ax.yaxis.get_ticklabels()):
+                    if n > 2:
+                        label.set_visible(False)
+            ax.set_xticks([])
+            ax.set_frame_on(False)
+            if labels is not None:
+                ax.imshow(labels[row][col], cmap="hsv", alpha=0.9, interpolation="nearest")
+    # title is e.g., Flipd(keys=keys, spatial_axis=0)
+    title = transform_name + "("
+    for k, v in transform_args.items():
+        title += k + "="
+        if isinstance(v, str):
+            title += "'" + v + "'"
+        elif isinstance(v, (np.ndarray, torch.Tensor)):
+            title += "[array]"
+        elif isinstance(v, Callable):
+            title += "[callable]"
+        else:
+            title += str(v)
+        title += ", "
+    if len(transform_args) > 0:
+        title = title[:-2]
+    title += ")"
+    # shorten the lines
+    title = textwrap.fill(title, 50, break_long_words=False, subsequent_indent=" " * (len(transform_name) + 1))
+    fig.suptitle(title, x=0.1, horizontalalignment="left")
+    fig.savefig(filename)
+    plt.close(fig)
+
+
+def get_images(data, is_label=False):
+    """Get image. If is dictionary, extract key. If is list, stack. If both dictionary and list, do both.
+    Also return the image size as string to be used im the imshow. If it's a list, return `N x (H,W,D)`.
+    """
+    # If not a list, convert
+    if not isinstance(data, list):
+        data = [data]
+    key = CommonKeys.LABEL if is_label else CommonKeys.IMAGE
+    is_map = isinstance(data[0], dict)
+    # length of the list will be equal to number of samples produced. This will be 1 except for transforms that
+    # produce `num_samples`.
+    data = [d[key] if is_map else d for d in data]
+    data = [d[0] for d in data]  # remove channel component
+
+    # for each sample, create a list of the orthogonal views. If image is 2d, length will be 1. If 3d, there
+    # will be three orthogonal views
+    num_samples = len(data)
+    num_orthog_views = 3 if data[0].ndim == 3 else 1
+    shape_str = (f"{num_samples} x " if num_samples > 1 else "") + str(data[0].shape)
+    for i in range(num_samples):
+        data[i] = [get_2d_slice(data[i], view, is_label) for view in range(num_orthog_views)]
+
+    out = []
+    if num_samples == 1:
+        out = data[0]
+    else:
+        # we might need to panel the images. this happens if a transform produces e.g. 4 output images.
+        # In this case, we create a 2-by-2 grid from them. Output will be a list containing n_orthog_views,
+        # each element being either the image (if num_samples is 1) or the panelled image.
+        nrows = int(np.floor(num_samples ** 0.5))
+        for view in range(num_orthog_views):
+            result = np.asarray([d[view] for d in data])
+            nindex, height, width = result.shape
+            ncols = nindex // nrows
+            # only implemented for square number of images (e.g. 4 images goes to a 2-by-2 panel)
+            if nindex != nrows * ncols:
+                raise NotImplementedError
+            # want result.shape = (height*nrows, width*ncols), have to be careful about striding
+            result = result.reshape(nrows, ncols, height, width).swapaxes(1, 2).reshape(height * nrows, width * ncols)
+            out.append(result)
+    return out, shape_str
+
+
+def create_transform_im(
+    transform, transform_args, data, ndim=3, colorbar=False, update_doc=True, out_dir=None, seed=0, is_post=False
+):
+    """Create an image with the before and after of the transform.
+    Also update the transform's documentation to point to this image."""
+
+    transform = transform(**transform_args)
+
+    if not has_matplotlib:
+        raise RuntimeError
+
+    if isinstance(transform, Randomizable):
+        # increment the seed for map transforms so they're different to the array versions.
+        seed = seed + 1 if isinstance(transform, MapTransform) else seed
+        transform.set_random_state(seed)
+
+    out_dir = os.environ.get("MONAI_DOC_IMAGES")
+    if out_dir is None:
+        raise RuntimeError(
+            "Please git clone https://github.com/Project-MONAI/DocImages"
+            + " and then set the environment variable `MONAI_DOC_IMAGES`"
+        )
+    out_dir = os.path.join(out_dir, "transforms")
+
+    # Path is transform name
+    transform_name = transform.__class__.__name__
+    out_fname = transform_name + ".png"
+    out_file = os.path.join(out_dir, out_fname)
+
+    is_map = isinstance(transform, MapTransform)
+    data_in = pre_process_data(deepcopy(data), ndim, is_map, is_post)
+
+    data_tr = transform(deepcopy(data_in))
+
+    images_before, before_shape = get_images(data_in)
+    images_after, after_shape = get_images(data_tr)
+    images = (images_before, images_after)
+    shapes = (before_shape, after_shape)
+
+    labels = None
+    if is_map:
+        labels_before, *_ = get_images(data_in, is_label=True)
+        labels_after, *_ = get_images(data_tr, is_label=True)
+        labels = (labels_before, labels_after)
+
+    save_image(images, labels, out_file, transform_name, transform_args, shapes, colorbar)
+
+    if update_doc:
+        base_dir = pathlib.Path(__file__).parent.parent.parent
+        rst_path = os.path.join(base_dir, "docs", "source", "transforms.rst")
+        update_docstring(rst_path, transform_name)
+
+
+if __name__ == "__main__":
+
+    keys = [CommonKeys.IMAGE, CommonKeys.LABEL]
+    data = get_data(keys)
+    create_transform_im(RandFlip, dict(prob=1, spatial_axis=1), data)
+    create_transform_im(RandFlipd, dict(keys=keys, prob=1, spatial_axis=2), data)
+    create_transform_im(Flip, dict(spatial_axis=1), data)
+    create_transform_im(Flipd, dict(keys=keys, spatial_axis=2), data)
+    create_transform_im(Flipd, dict(keys=keys, spatial_axis=2), data)
+    create_transform_im(Orientation, dict(axcodes="RPI", image_only=True), data)
+    create_transform_im(Orientationd, dict(keys=keys, axcodes="RPI"), data)
+    create_transform_im(
+        Rand3DElastic, dict(prob=1.0, sigma_range=(1, 2), magnitude_range=(0.5, 0.5), shear_range=(1, 1, 1)), data
+    )
+    create_transform_im(Affine, dict(shear_params=(0, 0.5, 0), image_only=True, padding_mode="zeros"), data)
+    create_transform_im(
+        Affined, dict(keys=keys, shear_params=(0, 0.5, 0), mode=["bilinear", "nearest"], padding_mode="zeros"), data
+    )
+    create_transform_im(RandAffine, dict(prob=1, shear_range=(0.5, 0.5), padding_mode="zeros"), data)
+    create_transform_im(
+        RandAffined,
+        dict(keys=keys, prob=1, shear_range=(0.5, 0.5), mode=["bilinear", "nearest"], padding_mode="zeros"),
+        data,
+    )
+    create_transform_im(
+        Rand3DElastic, dict(sigma_range=(5, 7), magnitude_range=(50, 150), prob=1, padding_mode="zeros"), data
+    )
+    create_transform_im(
+        Rand2DElastic, dict(prob=1, spacing=(20, 20), magnitude_range=(1, 2), padding_mode="zeros"), data, 2
+    )
+    create_transform_im(
+        Rand2DElasticd,
+        dict(
+            keys=keys,
+            prob=1,
+            spacing=(20, 20),
+            magnitude_range=(1, 2),
+            padding_mode="zeros",
+            mode=["bilinear", "nearest"],
+        ),
+        data,
+        2,
+    )
+    create_transform_im(
+        Rand3DElasticd,
+        dict(
+            keys=keys,
+            sigma_range=(5, 7),
+            magnitude_range=(50, 150),
+            prob=1,
+            padding_mode="zeros",
+            mode=["bilinear", "nearest"],
+        ),
+        data,
+    )
+    create_transform_im(Rotate90, dict(spatial_axes=(1, 2)), data)
+    create_transform_im(Rotate90d, dict(keys=keys, spatial_axes=(1, 2)), data)
+    create_transform_im(RandRotate90, dict(prob=1), data)
+    create_transform_im(RandRotate90d, dict(keys=keys, prob=1), data)
+    create_transform_im(Rotate, dict(angle=0.1), data)
+    create_transform_im(Rotated, dict(keys=keys, angle=0.1, mode=["bilinear", "nearest"]), data)
+    create_transform_im(RandRotate, dict(prob=1, range_x=[0.4, 0.4]), data)
+    create_transform_im(RandRotated, dict(keys=keys, prob=1, range_x=[0.4, 0.4], mode=["bilinear", "nearest"]), data)
+    create_transform_im(Zoom, dict(zoom=0.6), data)
+    create_transform_im(Zoomd, dict(keys=keys, zoom=1.3, mode=["area", "nearest"]), data)
+    create_transform_im(RandZoom, dict(prob=1, min_zoom=0.6, max_zoom=0.8), data)
+    create_transform_im(RandZoomd, dict(keys=keys, prob=1, min_zoom=1.3, max_zoom=1.5, mode=["area", "nearest"]), data)
+    create_transform_im(ScaleIntensity, dict(minv=0, maxv=10), data, colorbar=True)
+    create_transform_im(ScaleIntensityd, dict(keys=CommonKeys.IMAGE, minv=0, maxv=10), data, colorbar=True)
+    create_transform_im(RandScaleIntensity, dict(prob=1.0, factors=(5, 10)), data, colorbar=True)
+    create_transform_im(
+        RandScaleIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, factors=(5, 10)), data, colorbar=True
+    )
+    create_transform_im(DivisiblePad, dict(k=64), data)
+    create_transform_im(DivisiblePadd, dict(keys=keys, k=64), data)
+    create_transform_im(CropForeground, dict(), data)
+    create_transform_im(CropForegroundd, dict(keys=keys, source_key=CommonKeys.IMAGE), data)
+    create_transform_im(RandGaussianNoise, dict(prob=1, mean=0, std=0.1), data)
+    create_transform_im(RandGaussianNoised, dict(keys=CommonKeys.IMAGE, prob=1, mean=0, std=0.1), data)
+    create_transform_im(KSpaceSpikeNoise, dict(loc=(100, 100, 100), k_intensity=13), data)
+    create_transform_im(KSpaceSpikeNoised, dict(keys=CommonKeys.IMAGE, loc=(100, 100, 100), k_intensity=13), data)
+    create_transform_im(RandKSpaceSpikeNoise, dict(prob=1, intensity_range=(10, 13)), data)
+    create_transform_im(
+        RandKSpaceSpikeNoised,
+        dict(keys=CommonKeys.IMAGE, global_prob=1, prob=1, common_sampling=True, intensity_range=(13, 15)),
+        data,
+    )
+    create_transform_im(GibbsNoise, dict(alpha=0.8), data)
+    create_transform_im(GibbsNoised, dict(keys=CommonKeys.IMAGE, alpha=0.8), data)
+    create_transform_im(RandGibbsNoise, dict(prob=1.0, alpha=(0.6, 0.8)), data)
+    create_transform_im(RandGibbsNoised, dict(keys=CommonKeys.IMAGE, prob=1.0, alpha=(0.6, 0.8)), data)
+    create_transform_im(ShiftIntensity, dict(offset=1), data, colorbar=True)
+    create_transform_im(ShiftIntensityd, dict(keys=CommonKeys.IMAGE, offset=1), data, colorbar=True)
+    create_transform_im(RandShiftIntensity, dict(prob=1.0, offsets=(10, 20)), data, colorbar=True)
+    create_transform_im(
+        RandShiftIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, offsets=(10, 20)), data, colorbar=True
+    )
+    create_transform_im(StdShiftIntensity, dict(factor=10), data, colorbar=True)
+    create_transform_im(StdShiftIntensityd, dict(keys=CommonKeys.IMAGE, factor=10), data, colorbar=True)
+    create_transform_im(RandStdShiftIntensity, dict(prob=1.0, factors=(5, 10)), data, colorbar=True)
+    create_transform_im(
+        RandStdShiftIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, factors=(5, 10)), data, colorbar=True
+    )
+    create_transform_im(RandBiasField, dict(prob=1, coeff_range=(0.2, 0.3)), data)
+    create_transform_im(RandBiasFieldd, dict(keys=CommonKeys.IMAGE, prob=1, coeff_range=(0.2, 0.3)), data)
+    create_transform_im(NormalizeIntensity, dict(subtrahend=0, divisor=10), data, colorbar=True)
+    create_transform_im(NormalizeIntensityd, dict(keys=CommonKeys.IMAGE, subtrahend=0, divisor=10), data, colorbar=True)
+    create_transform_im(ThresholdIntensity, dict(threshold=0.4, above=False, cval=0.9), data, colorbar=True)
+    create_transform_im(
+        ThresholdIntensityd, dict(keys=CommonKeys.IMAGE, threshold=0.4, above=False, cval=0.9), data, colorbar=True
+    )
+    create_transform_im(ScaleIntensityRange, dict(a_min=0, a_max=1, b_min=1, b_max=10), data, colorbar=True)
+    create_transform_im(
+        ScaleIntensityRanged, dict(keys=CommonKeys.IMAGE, a_min=0, a_max=1, b_min=1, b_max=10), data, colorbar=True
+    )
+    create_transform_im(ScaleIntensityRangePercentiles, dict(lower=5, upper=95, b_min=1, b_max=10), data, colorbar=True)
+    create_transform_im(
+        ScaleIntensityRangePercentilesd,
+        dict(keys=CommonKeys.IMAGE, lower=5, upper=95, b_min=1, b_max=10),
+        data,
+        colorbar=True,
+    )
+    create_transform_im(AdjustContrast, dict(gamma=2), data, colorbar=True)
+    create_transform_im(AdjustContrastd, dict(keys=CommonKeys.IMAGE, gamma=2), data, colorbar=True)
+    create_transform_im(RandAdjustContrast, dict(prob=1, gamma=(1.5, 2)), data, colorbar=True)
+    create_transform_im(RandAdjustContrastd, dict(keys=CommonKeys.IMAGE, prob=1, gamma=(1.5, 2)), data, colorbar=True)
+    create_transform_im(MaskIntensity, dict(mask_data=data[CommonKeys.IMAGE], select_fn=lambda x: x > 0.3), data)
+    create_transform_im(
+        MaskIntensityd, dict(keys=CommonKeys.IMAGE, mask_key=CommonKeys.IMAGE, select_fn=lambda x: x > 0.3), data
+    )
+    create_transform_im(GaussianSmooth, dict(sigma=2), data)
+    create_transform_im(GaussianSmoothd, dict(keys=CommonKeys.IMAGE, sigma=2), data)
+    create_transform_im(RandGaussianSmooth, dict(prob=1.0, sigma_x=(1, 2)), data)
+    create_transform_im(RandGaussianSmoothd, dict(keys=CommonKeys.IMAGE, prob=1.0, sigma_x=(1, 2)), data)
+    create_transform_im(GaussianSharpen, dict(), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(GaussianSharpend, dict(keys=CommonKeys.IMAGE), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(RandGaussianSharpen, dict(prob=1), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(
+        RandGaussianSharpend, dict(keys=CommonKeys.IMAGE, prob=1), GaussianSmoothd(CommonKeys.IMAGE, 2)(data)
+    )
+    create_transform_im(RandHistogramShift, dict(prob=1, num_control_points=3), data, colorbar=True)
+    create_transform_im(
+        RandHistogramShiftd, dict(keys=CommonKeys.IMAGE, prob=1, num_control_points=3), data, colorbar=True
+    )
+    create_transform_im(RandCoarseDropout, dict(prob=1, holes=200, spatial_size=20, fill_value=0), data)
+    create_transform_im(
+        RandCoarseDropoutd, dict(keys=CommonKeys.IMAGE, prob=1, holes=200, spatial_size=20, fill_value=0), data
+    )
+    create_transform_im(RandCoarseShuffle, dict(prob=1, holes=200, spatial_size=20), data)
+    create_transform_im(RandCoarseShuffled, dict(keys=CommonKeys.IMAGE, prob=1, holes=200, spatial_size=20), data)
+    create_transform_im(HistogramNormalize, dict(num_bins=10), data)
+    create_transform_im(HistogramNormalized, dict(keys=CommonKeys.IMAGE, num_bins=10), data)
+    create_transform_im(SpatialPad, dict(spatial_size=(300, 300, 300)), data)
+    create_transform_im(SpatialPadd, dict(keys=keys, spatial_size=(300, 300, 300)), data)
+    create_transform_im(BorderPad, dict(spatial_border=10), data)
+    create_transform_im(BorderPadd, dict(keys=keys, spatial_border=10), data)
+    create_transform_im(SpatialCrop, dict(roi_center=(75, 75, 75), roi_size=(100, 100, 100)), data)
+    create_transform_im(SpatialCropd, dict(keys=keys, roi_center=(75, 75, 75), roi_size=(100, 100, 100)), data)
+    create_transform_im(CenterSpatialCrop, dict(roi_size=(100, 100, 100)), data)
+    create_transform_im(CenterSpatialCropd, dict(keys=keys, roi_size=(100, 100, 100)), data)
+    create_transform_im(RandSpatialCrop, dict(roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(RandSpatialCropd, dict(keys=keys, roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(RandSpatialCropSamples, dict(num_samples=4, roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(
+        RandSpatialCropSamplesd, dict(keys=keys, num_samples=4, roi_size=(100, 100, 100), random_size=False), data
+    )
+    create_transform_im(
+        RandWeightedCrop, dict(spatial_size=(100, 100, 100), num_samples=4, weight_map=data[CommonKeys.IMAGE] > 0), data
+    )
+    create_transform_im(
+        RandWeightedCropd, dict(keys=keys, spatial_size=(100, 100, 100), num_samples=4, w_key=CommonKeys.IMAGE), data
+    )
+    create_transform_im(
+        RandCropByPosNegLabel,
+        dict(spatial_size=(100, 100, 100), label=data[CommonKeys.LABEL], neg=0, num_samples=4),
+        data,
+    )
+    create_transform_im(
+        RandCropByPosNegLabeld,
+        dict(keys=keys, spatial_size=(100, 100, 100), label_key=CommonKeys.LABEL, neg=0, num_samples=4),
+        data,
+    )
+    create_transform_im(
+        RandCropByLabelClasses,
+        dict(
+            spatial_size=(100, 100, 100), label=data[CommonKeys.LABEL] > 0, num_classes=2, ratios=[0, 1], num_samples=4
+        ),
+        data,
+    )
+    create_transform_im(
+        RandCropByLabelClassesd,
+        dict(
+            keys=keys,
+            spatial_size=(100, 100, 100),
+            label_key=CommonKeys.LABEL,
+            num_classes=2,
+            ratios=[0, 1],
+            num_samples=4,
+        ),
+        data,
+    )
+    create_transform_im(ResizeWithPadOrCrop, dict(spatial_size=(100, 100, 100)), data)
+    create_transform_im(ResizeWithPadOrCropd, dict(keys=keys, spatial_size=(100, 100, 100)), data)
+    create_transform_im(RandScaleCrop, dict(roi_scale=0.4), data)
+    create_transform_im(RandScaleCropd, dict(keys=keys, roi_scale=0.4), data)
+    create_transform_im(CenterScaleCrop, dict(roi_scale=0.4), data)
+    create_transform_im(CenterScaleCropd, dict(keys=keys, roi_scale=0.4), data)
+    create_transform_im(
+        AsDiscrete, dict(num_classes=2, threshold_values=True, logit_thresh=10), data, is_post=True, colorbar=True
+    )
+    create_transform_im(
+        AsDiscreted,
+        dict(keys=CommonKeys.LABEL, num_classes=2, threshold_values=True, logit_thresh=10),
+        data,
+        is_post=True,
+    )
+    create_transform_im(LabelFilter, dict(applied_labels=(1, 2, 3, 4, 5, 6)), data, is_post=True)
+    create_transform_im(
+        LabelFilterd, dict(keys=CommonKeys.LABEL, applied_labels=(1, 2, 3, 4, 5, 6)), data, is_post=True
+    )
+    create_transform_im(LabelToContour, dict(), data, is_post=True)
+    create_transform_im(LabelToContourd, dict(keys=CommonKeys.LABEL), data, is_post=True)
+    create_transform_im(Spacing, dict(pixdim=(5, 5, 5), image_only=True), data)
+    create_transform_im(Spacingd, dict(keys=keys, pixdim=(5, 5, 5), mode=["bilinear", "nearest"]), data)
+    create_transform_im(RandAxisFlip, dict(prob=1), data)
+    create_transform_im(RandAxisFlipd, dict(keys=keys, prob=1), data)
+    create_transform_im(Resize, dict(spatial_size=(100, 100, 100)), data)
+    create_transform_im(Resized, dict(keys=keys, spatial_size=(100, 100, 100), mode=["area", "nearest"]), data)
+    data_binary = deepcopy(data)
+    data_binary[CommonKeys.LABEL] = (data_binary[CommonKeys.LABEL] > 0).astype(np.float32)
+    create_transform_im(KeepLargestConnectedComponent, dict(applied_labels=1), data_binary, is_post=True, ndim=2)
+    create_transform_im(
+        KeepLargestConnectedComponentd, dict(keys=CommonKeys.LABEL, applied_labels=1), data_binary, is_post=True, ndim=2
+    )
+    create_transform_im(RandGridDistortion, dict(num_cells=3, prob=1.0, distort_limit=(-0.1, 0.1)), data)
+    create_transform_im(
+        RandGridDistortiond,
+        dict(keys=keys, num_cells=4, prob=1.0, distort_limit=(-0.2, 0.2), mode=["bilinear", "nearest"]),
+        data,
+    )
diff --git a/monai/transforms/utils_pytorch_numpy_unification.py b/monai/transforms/utils_pytorch_numpy_unification.py
index 2eebe3eda3..32da80152e 100644
--- a/monai/transforms/utils_pytorch_numpy_unification.py
+++ b/monai/transforms/utils_pytorch_numpy_unification.py
@@ -9,14 +9,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Sequence, Union
+
 import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
+from monai.utils.misc import is_module_ver_at_least
+from monai.utils.type_conversion import convert_to_dst_type
 
 __all__ = [
     "moveaxis",
     "in1d",
+    "clip",
+    "percentile",
+    "where",
+    "nonzero",
+    "floor_divide",
+    "unravel_index",
+    "unravel_indices",
+    "ravel",
+    "any_np_pt",
+    "maximum",
+    "concatenate",
+    "cumsum",
+    "isfinite",
 ]
 
 
@@ -50,3 +67,236 @@ def in1d(x, y):
     if isinstance(x, np.ndarray):
         return np.in1d(x, y)
     return (x[..., None] == torch.tensor(y, device=x.device)).any(-1).view(-1)
+
+
+def clip(a: NdarrayOrTensor, a_min, a_max) -> NdarrayOrTensor:
+    """`np.clip` with equivalent implementation for torch."""
+    result: NdarrayOrTensor
+    if isinstance(a, np.ndarray):
+        result = np.clip(a, a_min, a_max)
+    else:
+        result = torch.clip(a, a_min, a_max)
+    return result
+
+
+def percentile(x: NdarrayOrTensor, q) -> Union[NdarrayOrTensor, float, int]:
+    """`np.percentile` with equivalent implementation for torch.
+
+    Pytorch uses `quantile`, but this functionality is only available from v1.7.
+    For earlier methods, we calculate it ourselves. This doesn't do interpolation,
+    so is the equivalent of ``numpy.percentile(..., interpolation="nearest")``.
+
+    Args:
+        x: input data
+        q: percentile to compute (should in range 0 <= q <= 100)
+
+    Returns:
+        Resulting value (scalar)
+    """
+    if np.isscalar(q):
+        if not 0 <= q <= 100:
+            raise ValueError
+    elif any(q < 0) or any(q > 100):
+        raise ValueError
+    result: Union[NdarrayOrTensor, float, int]
+    if isinstance(x, np.ndarray):
+        result = np.percentile(x, q)
+    else:
+        q = torch.tensor(q, device=x.device)
+        if hasattr(torch, "quantile"):
+            result = torch.quantile(x, q / 100.0)
+        else:
+            # Note that ``kthvalue()`` works one-based, i.e., the first sorted value
+            # corresponds to k=1, not k=0. Thus, we need the `1 +`.
+            k = 1 + (0.01 * q * (x.numel() - 1)).round().int()
+            if k.numel() > 1:
+                r = [x.view(-1).kthvalue(int(_k)).values.item() for _k in k]
+                result = torch.tensor(r, device=x.device)
+            else:
+                result = x.view(-1).kthvalue(int(k)).values.item()
+
+    return result
+
+
+def where(condition: NdarrayOrTensor, x=None, y=None) -> NdarrayOrTensor:
+    """
+    Note that `torch.where` may convert y.dtype to x.dtype.
+    """
+    result: NdarrayOrTensor
+    if isinstance(condition, np.ndarray):
+        if x is not None:
+            result = np.where(condition, x, y)
+        else:
+            result = np.where(condition)
+    else:
+        if x is not None:
+            x = torch.as_tensor(x, device=condition.device)
+            y = torch.as_tensor(y, device=condition.device, dtype=x.dtype)
+            result = torch.where(condition, x, y)
+        else:
+            result = torch.where(condition)  # type: ignore
+    return result
+
+
+def nonzero(x: NdarrayOrTensor):
+    """`np.nonzero` with equivalent implementation for torch.
+
+    Args:
+        idx: array/tensor
+
+    Returns:
+        Index unravelled for given shape
+    """
+    if isinstance(x, np.ndarray):
+        return np.nonzero(x)[0]
+    return torch.nonzero(x).flatten()
+
+
+def floor_divide(a: NdarrayOrTensor, b) -> NdarrayOrTensor:
+    """`np.floor_divide` with equivalent implementation for torch.
+
+    As of pt1.8, use `torch.div(..., rounding_mode="floor")`, and
+    before that, use `torch.floor_divide`.
+
+    Args:
+        a: first array/tensor
+        b: scalar to divide by
+
+    Returns:
+        Element-wise floor division between two arrays/tensors.
+    """
+    if isinstance(a, torch.Tensor):
+        if is_module_ver_at_least(torch, (1, 8, 0)):
+            return torch.div(a, b, rounding_mode="floor")
+        return torch.floor_divide(a, b)
+    return np.floor_divide(a, b)
+
+
+def unravel_index(idx, shape):
+    """`np.unravel_index` with equivalent implementation for torch.
+
+    Args:
+        idx: index to unravel
+        shape: shape of array/tensor
+
+    Returns:
+        Index unravelled for given shape
+    """
+    if isinstance(idx, torch.Tensor):
+        coord = []
+        for dim in reversed(shape):
+            coord.append(idx % dim)
+            idx = floor_divide(idx, dim)
+        return torch.stack(coord[::-1])
+    return np.asarray(np.unravel_index(idx, shape))
+
+
+def unravel_indices(idx, shape):
+    """Computing unravel cooridnates from indices.
+
+    Args:
+        idx: a sequence of indices to unravel
+        shape: shape of array/tensor
+
+    Returns:
+        Stacked indices unravelled for given shape
+    """
+    lib_stack = torch.stack if isinstance(idx[0], torch.Tensor) else np.stack
+    return lib_stack([unravel_index(i, shape) for i in idx])
+
+
+def ravel(x: NdarrayOrTensor):
+    """`np.ravel` with equivalent implementation for torch.
+
+    Args:
+        x: array/tensor to ravel
+
+    Returns:
+        Return a contiguous flattened array/tensor.
+    """
+    if isinstance(x, torch.Tensor):
+        if hasattr(torch, "ravel"):
+            return x.ravel()
+        return x.flatten().contiguous()
+    return np.ravel(x)
+
+
+def any_np_pt(x: NdarrayOrTensor, axis: Union[int, Sequence[int]]):
+    """`np.any` with equivalent implementation for torch.
+
+    For pytorch, convert to boolean for compatibility with older versions.
+
+    Args:
+        x: input array/tensor
+        axis: axis to perform `any` over
+
+    Returns:
+        Return a contiguous flattened array/tensor.
+    """
+    if isinstance(x, np.ndarray):
+        return np.any(x, axis)
+
+    # pytorch can't handle multiple dimensions to `any` so loop across them
+    axis = [axis] if not isinstance(axis, Sequence) else axis
+    for ax in axis:
+        try:
+            x = torch.any(x, ax)
+        except RuntimeError:
+            # older versions of pytorch require the input to be cast to boolean
+            x = torch.any(x.bool(), ax)
+    return x
+
+
+def maximum(a: NdarrayOrTensor, b: NdarrayOrTensor) -> NdarrayOrTensor:
+    """`np.maximum` with equivalent implementation for torch.
+
+    `torch.maximum` only available from pt>1.6, else use `torch.stack` and `torch.max`.
+
+    Args:
+        a: first array/tensor
+        b: second array/tensor
+
+    Returns:
+        Element-wise maximum between two arrays/tensors.
+    """
+    if isinstance(a, torch.Tensor) and isinstance(b, torch.Tensor):
+        # is torch and has torch.maximum (pt>1.6)
+        if hasattr(torch, "maximum"):
+            return torch.maximum(a, b)
+        return torch.stack((a, b)).max(dim=0)[0]
+    return np.maximum(a, b)
+
+
+def concatenate(to_cat: Sequence[NdarrayOrTensor], axis: int = 0, out=None) -> NdarrayOrTensor:
+    """`np.concatenate` with equivalent implementation for torch (`torch.cat`)."""
+    if isinstance(to_cat[0], np.ndarray):
+        return np.concatenate(to_cat, axis, out)  # type: ignore
+    return torch.cat(to_cat, dim=axis, out=out)  # type: ignore
+
+
+def cumsum(a: NdarrayOrTensor, axis=None):
+    """`np.cumsum` with equivalent implementation for torch."""
+    if isinstance(a, np.ndarray):
+        return np.cumsum(a, axis)
+    if axis is None:
+        return torch.cumsum(a[:], 0)
+    return torch.cumsum(a, dim=axis)
+
+
+def isfinite(x):
+    """`np.isfinite` with equivalent implementation for torch."""
+    if not isinstance(x, torch.Tensor):
+        return np.isfinite(x)
+    return torch.isfinite(x)
+
+
+def searchsorted(a: NdarrayOrTensor, v: NdarrayOrTensor, right=False, sorter=None):
+    side = "right" if right else "left"
+    if isinstance(a, np.ndarray):
+        return np.searchsorted(a, v, side, sorter)  # type: ignore
+    if hasattr(torch, "searchsorted"):
+        return torch.searchsorted(a, v, right=right)  # type: ignore
+    # if using old PyTorch, will convert to numpy array then compute
+    ret = np.searchsorted(a.cpu().numpy(), v.cpu().numpy(), side, sorter)  # type: ignore
+    ret, *_ = convert_to_dst_type(ret, a)
+    return ret
diff --git a/monai/utils/__init__.py b/monai/utils/__init__.py
index aa8f02f815..57bbb0dd5b 100644
--- a/monai/utils/__init__.py
+++ b/monai/utils/__init__.py
@@ -12,7 +12,7 @@
 # have to explicitly bring these in here to resolve circular import issues
 from .aliases import alias, resolve_name
 from .decorators import MethodReplacer, RestartGenerator
-from .deprecated import DeprecatedError, deprecated, deprecated_arg
+from .deprecate_utils import DeprecatedError, deprecated, deprecated_arg
 from .dist import evenly_divisible_all_gather, get_dist_device, string_list_all_gather
 from .enums import (
     Average,
@@ -77,6 +77,7 @@
 from .state_cacher import StateCacher
 from .type_conversion import (
     convert_data_type,
+    convert_to_cupy,
     convert_to_dst_type,
     convert_to_numpy,
     convert_to_tensor,
diff --git a/monai/utils/aliases.py b/monai/utils/aliases.py
index 2b7b29eeb5..a08dab4f95 100644
--- a/monai/utils/aliases.py
+++ b/monai/utils/aliases.py
@@ -70,8 +70,8 @@ def resolve_name(name):
         try:
             mod = importlib.import_module(modname)
             obj = getattr(mod, declname, None)
-        except ModuleNotFoundError:
-            raise ValueError(f"Module {modname!r} not found.")
+        except ModuleNotFoundError as not_found_err:
+            raise ValueError(f"Module {modname!r} not found.") from not_found_err
 
         if obj is None:
             raise ValueError(f"Module {modname!r} does not have member {declname!r}.")
diff --git a/monai/utils/deprecated.py b/monai/utils/deprecate_utils.py
similarity index 87%
rename from monai/utils/deprecated.py
rename to monai/utils/deprecate_utils.py
index 3a4568b06c..4ae5991d9f 100644
--- a/monai/utils/deprecated.py
+++ b/monai/utils/deprecate_utils.py
@@ -60,6 +60,9 @@ def deprecated(
         Decorated definition which warns or raises exception when used
     """
 
+    # if version_val.startswith("0+"):
+    #     # version unknown, set version_val to a large value (assuming the latest version)
+    #     version_val = "100"
     if since is not None and removed is not None and not version_leq(since, removed):
         raise ValueError(f"since must be less or equal to removed, got since={since}, removed={removed}.")
     is_not_yet_deprecated = since is not None and version_val != since and version_leq(version_val, since)
@@ -116,6 +119,7 @@ def deprecated_arg(
     removed: Optional[str] = None,
     msg_suffix: str = "",
     version_val: str = __version__,
+    new_name: Optional[str] = None,
 ):
     """
     Marks a particular named argument of a callable as deprecated. The same conditions for `since` and `removed` as
@@ -130,6 +134,8 @@ def deprecated_arg(
     using the Sphinx directives such as `.. versionchanged:: version` and `.. deprecated:: version`.
     https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-versionadded
 
+    In the current implementation type annotations are not preserved.
+
 
     Args:
         name: name of position or keyword argument to mark as deprecated.
@@ -137,17 +143,21 @@ def deprecated_arg(
         removed: version at which the argument was removed and no longer usable.
         msg_suffix: message appended to warning/exception detailing reasons for deprecation and what to use instead.
         version_val: (used for testing) version to compare since and removed against, default is MONAI version.
+        new_name: name of position or keyword argument to replace the deprecated argument.
 
     Returns:
-        Decorated callable which warns or raises exception when deprecated argument used
+        Decorated callable which warns or raises exception when deprecated argument used.
     """
+
+    if version_val.startswith("0+") or not f"{version_val}".strip()[0].isdigit():
+        # version unknown, set version_val to a large value (assuming the latest version)
+        version_val = "100"
     if since is not None and removed is not None and not version_leq(since, removed):
         raise ValueError(f"since must be less or equal to removed, got since={since}, removed={removed}.")
     is_not_yet_deprecated = since is not None and version_val != since and version_leq(version_val, since)
     if is_not_yet_deprecated:
         # smaller than `since`, do nothing
         return lambda obj: obj
-
     if since is None and removed is None:
         # raise a DeprecatedError directly
         is_removed = True
@@ -157,9 +167,6 @@ def deprecated_arg(
         is_deprecated = since is not None and version_leq(since, version_val)
         is_removed = removed is not None and version_leq(removed, version_val)
 
-    if is_not_yet_deprecated:
-        return lambda obj: obj
-
     def _decorator(func):
         argname = f"{func.__name__}_{name}"
 
@@ -180,6 +187,15 @@ def _decorator(func):
 
         @wraps(func)
         def _wrapper(*args, **kwargs):
+            if new_name is not None and name in kwargs and new_name not in kwargs:
+                # replace the deprecated arg "name" with "new_name"
+                # if name is specified and new_name is not specified
+                kwargs[new_name] = kwargs[name]
+                try:
+                    sig.bind(*args, **kwargs).arguments
+                except TypeError:
+                    # multiple values for new_name using both args and kwargs
+                    kwargs.pop(new_name, None)
             binding = sig.bind(*args, **kwargs).arguments
 
             positional_found = name in binding
diff --git a/monai/utils/jupyter_utils.py b/monai/utils/jupyter_utils.py
index 26487083b1..f862452fb1 100644
--- a/monai/utils/jupyter_utils.py
+++ b/monai/utils/jupyter_utils.py
@@ -16,11 +16,14 @@
 
 from enum import Enum
 from threading import RLock, Thread
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import torch
 
+from monai.config import IgniteInfo
+from monai.utils.module import min_version, optional_import
+
 try:
     import matplotlib.pyplot as plt
 
@@ -28,14 +31,11 @@
 except ImportError:
     has_matplotlib = False
 
-try:
+if TYPE_CHECKING:
     from ignite.engine import Engine, Events
-
-    has_ignite = True
-except ImportError:
-    Engine = object
-    Events = object
-    has_ignite = False
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+    Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
 
 LOSS_NAME = "loss"
 
@@ -128,7 +128,7 @@ def plot_metric_images(
         else:
             im.imshow(np.squeeze(imagemap[n]), cmap="gray")
 
-        im.set_title("%s\n%.3g -> %.3g" % (n, imagemap[n].min(), imagemap[n].max()))
+        im.set_title(f"{n}\n{imagemap[n].min():.3g} -> {imagemap[n].max():.3g}")
         im.axis("off")
         axes.append(im)
 
@@ -161,6 +161,7 @@ def plot_engine_status(
     window_fraction: int = 20,
     image_fn: Optional[Callable] = tensor_to_images,
     fig=None,
+    selected_inst: int = 0,
 ) -> Tuple:
     """
     Plot the status of the given Engine with its logger. The plot will consist of a graph of loss values and metrics
@@ -189,22 +190,36 @@ def plot_engine_status(
     graphmap = {LOSS_NAME: logger.loss}
     graphmap.update(logger.metrics)
 
-    imagemap = {}
+    imagemap: Dict = {}
     if image_fn is not None and engine.state is not None and engine.state.batch is not None:
         for src in (engine.state.batch, engine.state.output):
+            label = "Batch" if src is engine.state.batch else "Output"
+            batch_selected_inst = selected_inst  # selected batch index, set to 0 when src is decollated
+
+            # if the src object is a list of elements, ie. a decollated batch, select an element and keep it as
+            # a dictionary of tensors with a batch dimension added
             if isinstance(src, list):
-                for i, s in enumerate(src):
-                    if isinstance(s, dict):
-                        for k, v in s.items():
-                            if isinstance(v, torch.Tensor):
-                                image = image_fn(k, v)
-                                if image is not None:
-                                    imagemap[f"{k}_{i}"] = image
-                    elif isinstance(s, torch.Tensor):
-                        label = "Batch" if src is engine.state.batch else "Output"
-                        image = image_fn(label, s)
+                selected_dict = src[selected_inst]  # select this element
+                batch_selected_inst = 0  # set the selection to be the single index in the batch dimension
+                # store each tensor that is interpretable as an image with an added batch dimension
+                src = {k: v[None] for k, v in selected_dict.items() if isinstance(v, torch.Tensor) and v.ndim >= 3}
+
+            # images will be generated from the batch item selected above only, or from the single item given as `src`
+
+            if isinstance(src, dict):
+                for k, v in src.items():
+                    if isinstance(v, torch.Tensor) and v.ndim >= 4:
+                        image = image_fn(k, v[batch_selected_inst])
+
+                        # if we have images add each one separately to the map
                         if image is not None:
-                            imagemap[f"{label}_{i}"] = image
+                            for i, im in enumerate(image):
+                                imagemap[f"{k}_{i}"] = im
+
+            elif isinstance(src, torch.Tensor):
+                image = image_fn(label, src)
+                if image is not None:
+                    imagemap[f"{label}_{i}"] = image
 
     axes = plot_metric_images(fig, title, graphmap, imagemap, yscale, avg_keys, window_fraction)
 
diff --git a/monai/utils/misc.py b/monai/utils/misc.py
index a31452f6ae..2940c102ff 100644
--- a/monai/utils/misc.py
+++ b/monai/utils/misc.py
@@ -22,7 +22,7 @@
 import numpy as np
 import torch
 
-from monai.utils.module import get_torch_version_tuple, version_leq
+from monai.utils.module import version_leq
 
 __all__ = [
     "zip_with",
@@ -256,13 +256,11 @@ def set_determinism(
     else:  # restore the original flags
         torch.backends.cudnn.deterministic = _flag_deterministic
         torch.backends.cudnn.benchmark = _flag_cudnn_benchmark
-
     if use_deterministic_algorithms is not None:
-        torch_ver = get_torch_version_tuple()
-        if torch_ver >= (1, 9):
+        if hasattr(torch, "use_deterministic_algorithms"):
             torch.use_deterministic_algorithms(use_deterministic_algorithms)
-        elif torch_ver >= (1, 7):
-            torch.set_deterministic(use_deterministic_algorithms)  # beta feature
+        elif hasattr(torch, "set_deterministic"):
+            torch.set_deterministic(use_deterministic_algorithms)  # type: ignore
         else:
             warnings.warn("use_deterministic_algorithms=True, but PyTorch version is too old to set the mode.")
 
@@ -279,9 +277,7 @@ def list_to_dict(items):
     def _parse_var(s):
         items = s.split("=", maxsplit=1)
         key = items[0].strip(" \n\r\t'")
-        value = None
-        if len(items) > 1:
-            value = items[1].strip(" \n\r\t'")
+        value = items[1].strip(" \n\r\t'") if len(items) > 1 else None
         return key, value
 
     d = {}
@@ -302,10 +298,7 @@ def _parse_var(s):
 
 
 def copy_to_device(
-    obj: Any,
-    device: Optional[Union[str, torch.device]],
-    non_blocking: bool = True,
-    verbose: bool = False,
+    obj: Any, device: Optional[Union[str, torch.device]], non_blocking: bool = True, verbose: bool = False
 ) -> Any:
     """
     Copy object or tuple/list/dictionary of objects to ``device``.
diff --git a/monai/utils/module.py b/monai/utils/module.py
index 33314fb0e3..130b89493e 100644
--- a/monai/utils/module.py
+++ b/monai/utils/module.py
@@ -136,9 +136,7 @@ def damerau_levenshtein_distance(s1: str, s2: str):
         for j, s2j in enumerate(s2):
             cost = 0 if s1i == s2j else 1
             d[(i, j)] = min(
-                d[(i - 1, j)] + 1,  # deletion
-                d[(i, j - 1)] + 1,  # insertion
-                d[(i - 1, j - 1)] + cost,  # substitution
+                d[(i - 1, j)] + 1, d[(i, j - 1)] + 1, d[(i - 1, j - 1)] + cost  # deletion  # insertion  # substitution
             )
             if i and j and s1i == s2[j - 1] and s1[i - 1] == s2j:
                 d[(i, j)] = min(d[(i, j)], d[i - 2, j - 2] + cost)  # transposition
@@ -364,17 +362,25 @@ def get_torch_version_tuple():
     Returns:
         tuple of ints represents the pytorch major/minor version.
     """
-    return tuple((int(x) for x in torch.__version__.split(".")[:2]))
+    return tuple(int(x) for x in torch.__version__.split(".")[:2])
 
 
-def version_leq(lhs, rhs):
-    """Returns True if version `lhs` is earlier or equal to `rhs`."""
+def version_leq(lhs: str, rhs: str):
+    """
+    Returns True if version `lhs` is earlier or equal to `rhs`.
+
+    Args:
+        lhs: version name to compare with `rhs`, return True if earlier or equal to `rhs`.
+        rhs: version name to compare with `lhs`, return True if later or equal to `lhs`.
+
+    """
 
+    lhs, rhs = str(lhs), str(rhs)
     ver, has_ver = optional_import("pkg_resources", name="parse_version")
     if has_ver:
         return ver(lhs) <= ver(rhs)
 
-    def _try_cast(val):
+    def _try_cast(val: str):
         val = val.strip()
         try:
             m = match("(\\d+)(.*)", val)
@@ -390,10 +396,10 @@ def _try_cast(val):
     rhs = rhs.split("+", 1)[0]
 
     # parse the version strings in this basic way without `packaging` package
-    lhs = map(_try_cast, lhs.split("."))
-    rhs = map(_try_cast, rhs.split("."))
+    lhs_ = map(_try_cast, lhs.split("."))
+    rhs_ = map(_try_cast, rhs.split("."))
 
-    for l, r in zip(lhs, rhs):
+    for l, r in zip(lhs_, rhs_):
         if l != r:
             if isinstance(l, int) and isinstance(r, int):
                 return l < r
diff --git a/monai/utils/profiling.py b/monai/utils/profiling.py
index 695653e897..d7459885fb 100644
--- a/monai/utils/profiling.py
+++ b/monai/utils/profiling.py
@@ -56,7 +56,7 @@ def wrapper(*args, **kwargs):
         cpu_time = torch.autograd.profiler.format_time(cpu_time)
         gpu_time = torch.autograd.profiler.format_time(gpu_time)
 
-        print("cpu time: {}, gpu time: {}".format(cpu_time, gpu_time), flush=True)
+        print(f"cpu time: {cpu_time}, gpu time: {gpu_time}", flush=True)
 
         return result
 
@@ -83,7 +83,7 @@ def wrapper(*args, **kwargs):
 
         total_time = (end - start) * 1e6
         total_time_str = torch.autograd.profiler.format_time(total_time)
-        print("end to end time: {}".format(total_time_str), flush=True)
+        print(f"end to end time: {total_time_str}", flush=True)
 
         return result
 
diff --git a/monai/utils/state_cacher.py b/monai/utils/state_cacher.py
index 94943a8c37..35ac72916e 100644
--- a/monai/utils/state_cacher.py
+++ b/monai/utils/state_cacher.py
@@ -34,12 +34,7 @@ class StateCacher:
     >>> model.load_state_dict(state_cacher.retrieve("model"))
     """
 
-    def __init__(
-        self,
-        in_memory: bool,
-        cache_dir: Optional[str] = None,
-        allow_overwrite: bool = True,
-    ) -> None:
+    def __init__(self, in_memory: bool, cache_dir: Optional[str] = None, allow_overwrite: bool = True) -> None:
         """Constructor.
 
         Args:
diff --git a/monai/utils/type_conversion.py b/monai/utils/type_conversion.py
index b0ce187e38..b7f067076c 100644
--- a/monai/utils/type_conversion.py
+++ b/monai/utils/type_conversion.py
@@ -6,6 +6,7 @@
 
 from monai.config.type_definitions import DtypeLike, NdarrayOrTensor
 from monai.utils import optional_import
+from monai.utils.module import look_up_option
 
 cp, has_cp = optional_import("cupy")
 cp_ndarray, _ = optional_import("cupy", name="ndarray")
@@ -16,6 +17,7 @@
     "get_equivalent_dtype",
     "convert_data_type",
     "get_dtype",
+    "convert_to_cupy",
     "convert_to_numpy",
     "convert_to_tensor",
     "convert_to_dst_type",
@@ -40,31 +42,34 @@
 
 def dtype_torch_to_numpy(dtype):
     """Convert a torch dtype to its numpy equivalent."""
-    if dtype not in _torch_to_np_dtype:
-        raise ValueError(f"Unsupported torch to numpy dtype '{dtype}'.")
-    return _torch_to_np_dtype[dtype]
+    return look_up_option(dtype, _torch_to_np_dtype)
 
 
 def dtype_numpy_to_torch(dtype):
     """Convert a numpy dtype to its torch equivalent."""
     # np dtypes can be given as np.float32 and np.dtype(np.float32) so unify them
-    dtype = np.dtype(dtype) if type(dtype) is type else dtype
-    if dtype not in _np_to_torch_dtype:
-        raise ValueError(f"Unsupported numpy to torch dtype '{dtype}'.")
-    return _np_to_torch_dtype[dtype]
+    dtype = np.dtype(dtype) if isinstance(dtype, type) else dtype
+    return look_up_option(dtype, _np_to_torch_dtype)
 
 
 def get_equivalent_dtype(dtype, data_type):
     """Convert to the `dtype` that corresponds to `data_type`.
-    Example:
+
+    Example::
+
         im = torch.tensor(1)
         dtype = get_equivalent_dtype(np.float32, type(im))
+
     """
+    if dtype is None:
+        return None
     if data_type is torch.Tensor:
-        if type(dtype) is torch.dtype:
+        if isinstance(dtype, torch.dtype):
+            # already a torch dtype and target `data_type` is torch.Tensor
             return dtype
         return dtype_numpy_to_torch(dtype)
-    if type(dtype) is not torch.dtype:
+    if not isinstance(dtype, torch.dtype):
+        # assuming the dtype is ok if it is not a torch dtype and target `data_type` is not torch.Tensor
         return dtype
     return dtype_torch_to_numpy(dtype)
 
@@ -83,7 +88,9 @@ def get_dtype(data: Any):
     return type(data)
 
 
-def convert_to_tensor(data, wrap_sequence: bool = False):
+def convert_to_tensor(
+    data, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None, wrap_sequence: bool = False
+):
     """
     Utility to convert the input data to a PyTorch Tensor. If passing a dictionary, list or tuple,
     recursively check every item and convert it to PyTorch Tensor.
@@ -92,34 +99,38 @@ def convert_to_tensor(data, wrap_sequence: bool = False):
         data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
             will convert Tensor, Numpy array, float, int, bool to Tensors, strings and objects keep the original.
             for dictionary, list or tuple, convert every item to a Tensor if applicable.
-        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[tensor(1), tensor(2)]`.
-            If `True`, then `[1, 2]` -> `tensor([1, 2])`.
+        dtype: target data type to when converting to Tensor.
+        device: target device to put the converted Tensor data.
+        wrap_sequence: if `False`, then lists will recursively call this function.
+            E.g., `[1, 2]` -> `[tensor(1), tensor(2)]`. If `True`, then `[1, 2]` -> `tensor([1, 2])`.
 
     """
     if isinstance(data, torch.Tensor):
-        return data.contiguous()
+        return data.to(dtype=dtype, device=device, memory_format=torch.contiguous_format)  # type: ignore
     if isinstance(data, np.ndarray):
         # skip array of string classes and object, refer to:
         # https://github.com/pytorch/pytorch/blob/v1.9.0/torch/utils/data/_utils/collate.py#L13
         if re.search(r"[SaUO]", data.dtype.str) is None:
             # numpy array with 0 dims is also sequence iterable,
             # `ascontiguousarray` will add 1 dim if img has no dim, so we only apply on data with dims
-            return torch.as_tensor(data if data.ndim == 0 else np.ascontiguousarray(data))
-    elif isinstance(data, (float, int, bool)):
-        return torch.as_tensor(data)
-    elif isinstance(data, Sequence) and wrap_sequence:
-        return torch.as_tensor(data)
+            if data.ndim > 0:
+                data = np.ascontiguousarray(data)
+            return torch.as_tensor(data, dtype=dtype, device=device)  # type: ignore
+    elif (has_cp and isinstance(data, cp_ndarray)) or isinstance(data, (float, int, bool)):
+        return torch.as_tensor(data, dtype=dtype, device=device)  # type: ignore
     elif isinstance(data, list):
-        return [convert_to_tensor(i) for i in data]
+        list_ret = [convert_to_tensor(i, dtype=dtype, device=device) for i in data]
+        return torch.as_tensor(list_ret, dtype=dtype, device=device) if wrap_sequence else list_ret  # type: ignore
     elif isinstance(data, tuple):
-        return tuple(convert_to_tensor(i) for i in data)
+        tuple_ret = tuple(convert_to_tensor(i, dtype=dtype, device=device) for i in data)
+        return torch.as_tensor(tuple_ret, dtype=dtype, device=device) if wrap_sequence else tuple_ret  # type: ignore
     elif isinstance(data, dict):
-        return {k: convert_to_tensor(v) for k, v in data.items()}
+        return {k: convert_to_tensor(v, dtype=dtype, device=device) for k, v in data.items()}
 
     return data
 
 
-def convert_to_numpy(data, wrap_sequence: bool = False):
+def convert_to_numpy(data, dtype: Optional[DtypeLike] = None, wrap_sequence: bool = False):
     """
     Utility to convert the input data to a numpy array. If passing a dictionary, list or tuple,
     recursively check every item and convert it to numpy array.
@@ -128,23 +139,24 @@ def convert_to_numpy(data, wrap_sequence: bool = False):
         data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
             will convert Tensor, Numpy array, float, int, bool to numpy arrays, strings and objects keep the original.
             for dictionary, list or tuple, convert every item to a numpy array if applicable.
+        dtype: target data type when converting to numpy array.
         wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
             If `True`, then `[1, 2]` -> `array([1, 2])`.
     """
     if isinstance(data, torch.Tensor):
-        data = data.detach().cpu().numpy()
+        data = data.detach().to(dtype=get_equivalent_dtype(dtype, torch.Tensor), device="cpu").numpy()
     elif has_cp and isinstance(data, cp_ndarray):
-        data = cp.asnumpy(data)
-    elif isinstance(data, (float, int, bool)):
-        data = np.asarray(data)
-    elif isinstance(data, Sequence) and wrap_sequence:
-        return np.asarray(data)
+        data = cp.asnumpy(data).astype(dtype)
+    elif isinstance(data, (np.ndarray, float, int, bool)):
+        data = np.asarray(data, dtype=dtype)
     elif isinstance(data, list):
-        return [convert_to_numpy(i) for i in data]
+        list_ret = [convert_to_numpy(i, dtype=dtype) for i in data]
+        return np.asarray(list_ret) if wrap_sequence else list_ret
     elif isinstance(data, tuple):
-        return tuple(convert_to_numpy(i) for i in data)
+        tuple_ret = tuple(convert_to_numpy(i, dtype=dtype) for i in data)
+        return np.asarray(tuple_ret) if wrap_sequence else tuple_ret
     elif isinstance(data, dict):
-        return {k: convert_to_numpy(v) for k, v in data.items()}
+        return {k: convert_to_numpy(v, dtype=dtype) for k, v in data.items()}
 
     if isinstance(data, np.ndarray) and data.ndim > 0:
         data = np.ascontiguousarray(data)
@@ -152,11 +164,47 @@ def convert_to_numpy(data, wrap_sequence: bool = False):
     return data
 
 
+def convert_to_cupy(data, dtype, wrap_sequence: bool = True):
+    """
+    Utility to convert the input data to a cupy array. If passing a dictionary, list or tuple,
+    recursively check every item and convert it to cupy array.
+
+    Args:
+        data: input data can be PyTorch Tensor, numpy array, cupy array, list, dictionary, int, float, bool, str, etc.
+            Tensor, numpy array, cupy array, float, int, bool are converted to cupy arrays
+
+            for dictionary, list or tuple, convert every item to a numpy array if applicable.
+        dtype: target data type when converting to Cupy array.
+        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
+            If `True`, then `[1, 2]` -> `array([1, 2])`.
+    """
+
+    # direct calls
+    if isinstance(data, (cp_ndarray, np.ndarray, torch.Tensor, float, int, bool)):
+        data = cp.asarray(data, dtype)
+    elif isinstance(data, list):
+        list_ret = [convert_to_cupy(i, dtype) for i in data]
+        return cp.asarray(list_ret) if wrap_sequence else list_ret
+    elif isinstance(data, tuple):
+        tuple_ret = tuple(convert_to_cupy(i, dtype) for i in data)
+        return cp.asarray(tuple_ret) if wrap_sequence else tuple_ret
+    elif isinstance(data, dict):
+        return {k: convert_to_cupy(v, dtype) for k, v in data.items()}
+    # make it contiguous
+    if not isinstance(data, cp.ndarray):
+        raise ValueError(f"The input data type [{type(data)}] cannot be converted into cupy arrays!")
+
+    if data.ndim > 0:
+        data = cp.ascontiguousarray(data)
+    return data
+
+
 def convert_data_type(
     data: Any,
     output_type: Optional[type] = None,
     device: Optional[torch.device] = None,
     dtype: Optional[Union[DtypeLike, torch.dtype]] = None,
+    wrap_sequence: bool = False,
 ) -> Tuple[NdarrayOrTensor, type, Optional[torch.device]]:
     """
     Convert to `torch.Tensor`/`np.ndarray` from `torch.Tensor`/`np.ndarray`/`float`/`int` etc.
@@ -168,14 +216,27 @@ def convert_data_type(
         dtype: dtype of output data. Converted to correct library type (e.g.,
             `np.float32` is converted to `torch.float32` if output type is `torch.Tensor`).
             If left blank, it remains unchanged.
+        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
+            If `True`, then `[1, 2]` -> `array([1, 2])`.
     Returns:
         modified data, orig_type, orig_device
+
+    Note:
+        When both `output_type` and `dtype` are specified with different backend
+        (e.g., `torch.Tensor` and `np.float32`), the `output_type` will be used as the primary type,
+        for example::
+
+            >>> convert_data_type(1, torch.Tensor, dtype=np.float32)
+            (1.0, <class 'torch.Tensor'>, None)
+
     """
     orig_type: Any
     if isinstance(data, torch.Tensor):
         orig_type = torch.Tensor
     elif isinstance(data, np.ndarray):
         orig_type = np.ndarray
+    elif has_cp and isinstance(data, cp.ndarray):
+        orig_type = cp.ndarray
     else:
         orig_type = type(data)
 
@@ -183,33 +244,47 @@ def convert_data_type(
 
     output_type = output_type or orig_type
 
-    dtype = get_equivalent_dtype(dtype or get_dtype(data), output_type)
+    dtype_ = get_equivalent_dtype(dtype or get_dtype(data), output_type)
 
     if output_type is torch.Tensor:
-        if orig_type is not torch.Tensor:
-            data = convert_to_tensor(data)
-        if dtype != data.dtype:
-            data = data.to(dtype)
-        if device is not None:
-            data = data.to(device)
+        data = convert_to_tensor(data, dtype=dtype_, device=device, wrap_sequence=wrap_sequence)
     elif output_type is np.ndarray:
-        if orig_type is not np.ndarray:
-            data = convert_to_numpy(data)
-        if data is not None and dtype != data.dtype:
-            data = data.astype(dtype)
+        data = convert_to_numpy(data, dtype=dtype_, wrap_sequence=wrap_sequence)
+    elif has_cp and output_type is cp.ndarray:
+        data = convert_to_cupy(data, dtype=dtype_, wrap_sequence=wrap_sequence)
     else:
         raise ValueError(f"Unsupported output type: {output_type}")
     return data, orig_type, orig_device
 
 
-def convert_to_dst_type(src: Any, dst: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, type, Optional[torch.device]]:
+def convert_to_dst_type(
+    src: Any, dst: NdarrayOrTensor, dtype: Optional[Union[DtypeLike, torch.dtype]] = None, wrap_sequence: bool = False
+) -> Tuple[NdarrayOrTensor, type, Optional[torch.device]]:
     """
-    Convert `src` to the same `torch.Tensor`/`np.ndarray` and data type as `dst`.
+    Convert source data to the same data type and device as the destination data.
+    If `dst` is an instance of `torch.Tensor` or its subclass, convert `src` to `torch.Tensor` with the same data type as `dst`,
+    if `dst` is an instance of `numpy.ndarray` or its subclass, convert to `numpy.ndarray` with the same data type as `dst`,
+    otherwise, convert to the type of `dst` directly.
+
+    Args:
+        src: sourse data to convert type.
+        dst: destination data that convert to the same data type as it.
+        dtype: an optional argument if the target `dtype` is different from the original `dst`'s data type.
+        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
+            If `True`, then `[1, 2]` -> `array([1, 2])`.
 
     See Also:
         :func:`convert_data_type`
     """
-    device = None
+    device = dst.device if isinstance(dst, torch.Tensor) else None
+    if dtype is None:
+        dtype = dst.dtype
+
+    output_type: Any
     if isinstance(dst, torch.Tensor):
-        device = dst.device
-    return convert_data_type(data=src, output_type=type(dst), device=device, dtype=dst.dtype)
+        output_type = torch.Tensor
+    elif isinstance(dst, np.ndarray):
+        output_type = np.ndarray
+    else:
+        output_type = type(dst)
+    return convert_data_type(data=src, output_type=output_type, device=device, dtype=dtype, wrap_sequence=wrap_sequence)
diff --git a/monai/visualize/class_activation_maps.py b/monai/visualize/class_activation_maps.py
index 992eaecdac..6109d76a8a 100644
--- a/monai/visualize/class_activation_maps.py
+++ b/monai/visualize/class_activation_maps.py
@@ -137,6 +137,11 @@ def __call__(self, x, class_idx=None, retain_graph=False):
             self.score = self.class_score(logits, self.class_idx)
             self.model.zero_grad()
             self.score.sum().backward(retain_graph=retain_graph)
+            for layer in self.target_layers:
+                if layer not in self.gradients:
+                    raise RuntimeError(
+                        f"Backward hook for {layer} is not triggered; `requires_grad` of {layer} should be `True`."
+                    )
             grad = tuple(self.gradients[layer] for layer in self.target_layers)
         if train:
             self.model.train()
@@ -221,6 +226,8 @@ class CAM(CAMBase):
 
     .. code-block:: python
 
+        import torch
+
         # densenet 2d
         from monai.networks.nets import DenseNet121
         from monai.visualize import CAM
@@ -319,6 +326,8 @@ class GradCAM(CAMBase):
 
     .. code-block:: python
 
+        import torch
+
         # densenet 2d
         from monai.networks.nets import DenseNet121
         from monai.visualize import GradCAM
diff --git a/monai/visualize/img2tensorboard.py b/monai/visualize/img2tensorboard.py
index ccdbdc2396..fd6dc9483b 100644
--- a/monai/visualize/img2tensorboard.py
+++ b/monai/visualize/img2tensorboard.py
@@ -44,7 +44,7 @@ def _image3_animated_gif(tag: str, image: Union[np.ndarray, torch.Tensor], scale
     if len(image.shape) != 3:
         raise AssertionError("3D image tensors expected to be in `HWD` format, len(image.shape) != 3")
 
-    ims = [(np.asarray((image[:, :, i])) * scale_factor).astype(np.uint8) for i in range(image.shape[2])]
+    ims = [(np.asarray(image[:, :, i]) * scale_factor).astype(np.uint8) for i in range(image.shape[2])]
     ims = [GifImage.fromarray(im) for im in ims]
     img_str = b""
     for b_data in PIL.GifImagePlugin.getheader(ims[0])[0]:
diff --git a/monai/visualize/occlusion_sensitivity.py b/monai/visualize/occlusion_sensitivity.py
index 61b84bb406..51bcb1f517 100644
--- a/monai/visualize/occlusion_sensitivity.py
+++ b/monai/visualize/occlusion_sensitivity.py
@@ -265,9 +265,7 @@ def _compute_occlusion_sensitivity(self, x, b_box):
         return sensitivity_ims, output_im_shape
 
     def __call__(  # type: ignore
-        self,
-        x: torch.Tensor,
-        b_box: Optional[Sequence] = None,
+        self, x: torch.Tensor, b_box: Optional[Sequence] = None
     ) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         Args:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 785454ad5d..9338306d90 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,6 +1,6 @@
 # Full requirements for developments
 -r requirements-min.txt
-pytorch-ignite==0.4.5
+pytorch-ignite==0.4.6
 gdown>=3.6.4
 scipy
 itk>=5.2
@@ -31,8 +31,12 @@ Sphinx==3.5.3
 recommonmark==0.6.0
 sphinx-autodoc-typehints==1.11.1
 sphinx-rtd-theme==0.5.2
-cucim~=0.19.0; platform_system == "Linux"
+cucim>=21.8.2; platform_system == "Linux"
 openslide-python==1.1.2
+imagecodecs; platform_system == "Linux"
+tifffile; platform_system == "Linux"
 pandas
 requests
 einops
+transformers
+mlflow
diff --git a/runtests.sh b/runtests.sh
index f10e888543..a77f9decd5 100755
--- a/runtests.sh
+++ b/runtests.sh
@@ -397,9 +397,9 @@ then
 
     if [ $doBlackFix = true ]
     then
-        ${cmdPrefix}${PY_EXE} -m black "$(pwd)"
+        ${cmdPrefix}${PY_EXE} -m black --skip-magic-trailing-comma "$(pwd)"
     else
-        ${cmdPrefix}${PY_EXE} -m black --check "$(pwd)"
+        ${cmdPrefix}${PY_EXE} -m black --skip-magic-trailing-comma --check "$(pwd)"
     fi
 
     black_status=$?
diff --git a/setup.cfg b/setup.cfg
index 6efe768a6f..aa015d8ec7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,16 +34,18 @@ all =
     pillow
     tensorboard
     gdown>=3.6.4
-    pytorch-ignite==0.4.5
+    pytorch-ignite==0.4.6
     torchvision
     itk>=5.2
     tqdm>=4.47.0
     lmdb
     psutil
-    cucim~=0.19.0
+    cucim>=21.8.2
     openslide-python==1.1.2
     pandas
     einops
+    transformers
+    mlflow
 nibabel =
     nibabel
 skimage =
@@ -55,7 +57,7 @@ tensorboard =
 gdown =
     gdown>=3.6.4
 ignite =
-    pytorch-ignite==0.4.5
+    pytorch-ignite==0.4.6
 torchvision =
     torchvision
 itk =
@@ -67,22 +69,39 @@ lmdb =
 psutil =
     psutil
 cucim =
-    cucim~=0.19.0
+    cucim>=21.8.2
 openslide =
     openslide-python==1.1.2
 pandas =
     pandas
 einops =
     einops
+transformers =
+    transformers
+mlflow =
+    mlflow
 [flake8]
 select = B,C,E,F,N,P,T4,W,B9
 max_line_length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
-    E203,E305,E402,E501,E721,E741,F821,F841,F999,W503,W504,C408,E302,W291,E303,
-    # N812 lowercase 'torch.nn.functional' imported as non lowercase 'F'
-    N812
+    E203
+    E305
+    E402
+    E501
+    E721
+    E741
+    F821
+    F841
+    F999
+    W503
+    W504
+    C408
+    E302
+    W291
+    E303
+    N812  # lowercase 'torch.nn.functional' imported as non lowercase 'F'
 per_file_ignores = __init__.py: F401
 exclude = *.pyi,.git,.eggs,monai/_version.py,versioneer.py,venv,.venv,_version.py
 
diff --git a/setup.py b/setup.py
index eeaffb7823..95087ce06f 100644
--- a/setup.py
+++ b/setup.py
@@ -53,11 +53,7 @@
 
 def torch_parallel_backend():
     try:
-        match = re.search(
-            "^ATen parallel backend: (?P<backend>.*)$",
-            torch._C._parallel_info(),
-            re.MULTILINE,
-        )
+        match = re.search("^ATen parallel backend: (?P<backend>.*)$", torch._C._parallel_info(), re.MULTILINE)
         if match is None:
             return None
         backend = match.group("backend")
diff --git a/tests/clang_format_utils.py b/tests/clang_format_utils.py
index 41902eb272..1391fdcd47 100644
--- a/tests/clang_format_utils.py
+++ b/tests/clang_format_utils.py
@@ -50,10 +50,10 @@ def get_and_check_clang_format():
     """
     # If the host platform is not in PLATFORM_TO_HASH, it is unsupported.
     if HOST_PLATFORM not in PLATFORM_TO_HASH:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
+        print(f"Unsupported platform: {HOST_PLATFORM}")
         return False
     if HOST_PLATFORM not in PLATFORM_TO_CF_URL:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
+        print(f"Unsupported platform: {HOST_PLATFORM}")
         return False
 
     try:
@@ -69,7 +69,7 @@ def get_and_check_clang_format():
     mode = os.stat(CLANG_FORMAT_PATH).st_mode
     mode |= stat.S_IXUSR
     os.chmod(CLANG_FORMAT_PATH, mode)
-    print("Using clang-format located at {}".format(CLANG_FORMAT_PATH))
+    print(f"Using clang-format located at {CLANG_FORMAT_PATH}")
 
     return True
 
diff --git a/tests/min_tests.py b/tests/min_tests.py
index 5b376d7b57..7ff90cafa9 100644
--- a/tests/min_tests.py
+++ b/tests/min_tests.py
@@ -75,7 +75,6 @@ def run_testsuit():
         "test_handler_surface_distance",
         "test_handler_tb_image",
         "test_handler_tb_stats",
-        "test_handler_transform_inverter",
         "test_handler_validation",
         "test_hausdorff_distance",
         "test_header_correct",
@@ -87,6 +86,7 @@ def run_testsuit():
         "test_integration_unet_2d",
         "test_integration_workflows",
         "test_integration_workflows_gan",
+        "test_integration_fast_train",
         "test_invertd",
         "test_iterable_dataset",
         "test_keep_largest_connected_component",
@@ -103,6 +103,7 @@ def run_testsuit():
         "test_nifti_rw",
         "test_nifti_saver",
         "test_occlusion_sensitivity",
+        "test_openslide_reader",
         "test_orientation",
         "test_orientationd",
         "test_parallel_execution",
@@ -140,6 +141,8 @@ def run_testsuit():
         "test_zoom",
         "test_zoom_affine",
         "test_zoomd",
+        "test_transchex",
+        "test_handler_mlflow",
     ]
     assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
 
diff --git a/tests/ngc_mmar_loading.py b/tests/ngc_mmar_loading.py
new file mode 100644
index 0000000000..c1ed22de5d
--- /dev/null
+++ b/tests/ngc_mmar_loading.py
@@ -0,0 +1,37 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.apps.mmars import MODEL_DESC, load_from_mmar
+from monai.config import print_debug_info
+
+
+class TestAllDownloadingMMAR(unittest.TestCase):
+    def setUp(self):
+        print_debug_info()
+        self.test_dir = "./"
+
+    @parameterized.expand((item,) for item in MODEL_DESC)
+    def test_loading_mmar(self, item):
+        pretrained_model = load_from_mmar(item=item, mmar_dir="./", map_location="cpu")
+        self.assertTrue(isinstance(pretrained_model, torch.nn.Module))
+
+    def tearDown(self):
+        print(os.listdir(self.test_dir))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_activationsd.py b/tests/test_activationsd.py
index 355c50f389..0a981f27e8 100644
--- a/tests/test_activationsd.py
+++ b/tests/test_activationsd.py
@@ -29,10 +29,7 @@
 TEST_CASE_2 = [
     {"keys": ["pred", "label"], "sigmoid": False, "softmax": False, "other": [torch.tanh, None]},
     {"pred": torch.tensor([[[0.0, 1.0], [2.0, 3.0]]]), "label": torch.tensor([[[0.0, 1.0], [2.0, 3.0]]])},
-    {
-        "pred": torch.tensor([[[0.0000, 0.7616], [0.9640, 0.9951]]]),
-        "label": torch.tensor([[[0.0, 1.0], [2.0, 3.0]]]),
-    },
+    {"pred": torch.tensor([[[0.0000, 0.7616], [0.9640, 0.9951]]]), "label": torch.tensor([[[0.0, 1.0], [2.0, 3.0]]])},
     (1, 2, 2),
 ]
 
diff --git a/tests/test_add_coordinate_channels.py b/tests/test_add_coordinate_channels.py
index 3399008e02..4d779cffff 100644
--- a/tests/test_add_coordinate_channels.py
+++ b/tests/test_add_coordinate_channels.py
@@ -12,32 +12,36 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import AddCoordinateChannels
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [{"spatial_channels": (1, 2, 3)}, np.random.randint(0, 2, size=(1, 3, 3, 3)), (4, 3, 3, 3)]
-
-TEST_CASE_2 = [{"spatial_channels": (1,)}, np.random.randint(0, 2, size=(1, 3, 3, 3)), (2, 3, 3, 3)]
-
-TEST_CASE_ERROR_3 = [{"spatial_channels": (3,)}, np.random.randint(0, 2, size=(1, 3, 3))]
-
-TEST_CASE_ERROR_4 = [{"spatial_channels": (0, 1, 2)}, np.random.randint(0, 2, size=(1, 3, 3))]
+TESTS, TEST_CASES_ERROR_1, TEST_CASES_ERROR_2 = [], [], []
+for p in TEST_NDARRAYS:
+    TESTS.append([{"spatial_channels": (1, 2, 3)}, p(np.random.randint(0, 2, size=(1, 3, 3, 3))), (4, 3, 3, 3)])
+    TESTS.append([{"spatial_channels": (1,)}, p(np.random.randint(0, 2, size=(1, 3, 3, 3))), (2, 3, 3, 3)])
+    TEST_CASES_ERROR_1.append([{"spatial_channels": (3,)}, p(np.random.randint(0, 2, size=(1, 3, 3)))])
+    TEST_CASES_ERROR_2.append([{"spatial_channels": (0, 1, 2)}, p(np.random.randint(0, 2, size=(1, 3, 3)))])
 
 
 class TestAddCoordinateChannels(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input, expected_shape):
         result = AddCoordinateChannels(**input_param)(input)
+        self.assertEqual(type(result), type(input))
+        if isinstance(result, torch.Tensor):
+            self.assertEqual(result.device, input.device)
         self.assertEqual(list(result.shape), list(expected_shape))
-        np.testing.assert_array_equal(input[0, ...], result[0, ...])
+        assert_allclose(input[0, ...], result[0, ...])
 
-    @parameterized.expand([TEST_CASE_ERROR_3])
+    @parameterized.expand(TEST_CASES_ERROR_1)
     def test_max_channel(self, input_param, input):
         with self.assertRaises(ValueError):
             AddCoordinateChannels(**input_param)(input)
 
-    @parameterized.expand([TEST_CASE_ERROR_4])
+    @parameterized.expand(TEST_CASES_ERROR_2)
     def test_channel_dim(self, input_param, input):
         with self.assertRaises(ValueError):
             AddCoordinateChannels(**input_param)(input)
diff --git a/tests/test_add_coordinate_channelsd.py b/tests/test_add_coordinate_channelsd.py
index 0fa6aae1c9..08d9e62468 100644
--- a/tests/test_add_coordinate_channelsd.py
+++ b/tests/test_add_coordinate_channelsd.py
@@ -12,40 +12,54 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import AddCoordinateChannelsd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"spatial_channels": (1, 2, 3), "keys": ["img"]},
-    {"img": np.random.randint(0, 2, size=(1, 3, 3, 3))},
-    (4, 3, 3, 3),
-]
+TESTS, TEST_CASES_ERROR_1, TEST_CASES_ERROR_2 = [], [], []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"spatial_channels": (1, 2, 3), "keys": ["img"]},
+            {"img": p(np.random.randint(0, 2, size=(1, 3, 3, 3)))},
+            (4, 3, 3, 3),
+        ]
+    )
+    TESTS.append(
+        [
+            {"spatial_channels": (1,), "keys": ["img"]},
+            {"img": p(np.random.randint(0, 2, size=(1, 3, 3, 3)))},
+            (2, 3, 3, 3),
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"spatial_channels": (1,), "keys": ["img"]},
-    {"img": np.random.randint(0, 2, size=(1, 3, 3, 3))},
-    (2, 3, 3, 3),
-]
-
-TEST_CASE_ERROR_3 = [{"spatial_channels": (3,), "keys": ["img"]}, {"img": np.random.randint(0, 2, size=(1, 3, 3))}]
-
-TEST_CASE_ERROR_4 = [{"spatial_channels": (0, 1, 2), "keys": ["img"]}, {"img": np.random.randint(0, 2, size=(1, 3, 3))}]
+    TEST_CASES_ERROR_1.append(
+        [{"spatial_channels": (3,), "keys": ["img"]}, {"img": p(np.random.randint(0, 2, size=(1, 3, 3)))}]
+    )
+    TEST_CASES_ERROR_2.append(
+        [{"spatial_channels": (0, 1, 2), "keys": ["img"]}, {"img": p(np.random.randint(0, 2, size=(1, 3, 3)))}]
+    )
 
 
 class TestAddCoordinateChannels(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input, expected_shape):
-        result = AddCoordinateChannelsd(**input_param)(input)
-        self.assertEqual(list(result["img"].shape), list(expected_shape))
-        np.testing.assert_array_equal(input["img"][0, ...], result["img"][0, ...])
+        result = AddCoordinateChannelsd(**input_param)(input)["img"]
+        input = input["img"]
+        self.assertEqual(type(result), type(input))
+        if isinstance(result, torch.Tensor):
+            self.assertEqual(result.device, input.device)
+        self.assertEqual(result.shape, expected_shape)
+        assert_allclose(input[0, ...], result[0, ...])
 
-    @parameterized.expand([TEST_CASE_ERROR_3])
+    @parameterized.expand(TEST_CASES_ERROR_1)
     def test_max_channel(self, input_param, input):
         with self.assertRaises(ValueError):
             AddCoordinateChannelsd(**input_param)(input)
 
-    @parameterized.expand([TEST_CASE_ERROR_4])
+    @parameterized.expand(TEST_CASES_ERROR_2)
     def test_channel_dim(self, input_param, input):
         with self.assertRaises(ValueError):
             AddCoordinateChannelsd(**input_param)(input)
diff --git a/tests/test_add_extreme_points_channel.py b/tests/test_add_extreme_points_channel.py
index ecf2c83d3c..06d26dfdfc 100644
--- a/tests/test_add_extreme_points_channel.py
+++ b/tests/test_add_extreme_points_channel.py
@@ -15,52 +15,63 @@
 from parameterized import parameterized
 
 from monai.transforms import AddExtremePointsChannel
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 IMG_CHANNEL = 3
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        TESTS.append(
+            [
+                {
+                    "img": p(np.zeros((IMG_CHANNEL, 4, 3))),
+                    "label": q(np.array([[[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]])),
+                    "sigma": 1.0,
+                    "rescale_min": 0.0,
+                    "rescale_max": 1.0,
+                },
+                p(
+                    np.array(
+                        [
+                            [0.38318458, 0.98615628, 0.85551184],
+                            [0.35422316, 0.94430935, 1.0],
+                            [0.46000731, 0.57319659, 0.46000722],
+                            [0.64577687, 0.38318464, 0.0],
+                        ]
+                    )
+                ),
+            ]
+        )
 
-TEST_CASE_1 = [
-    {
-        "img": np.zeros((IMG_CHANNEL, 4, 3)),
-        "label": np.array([[[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]]),
-        "sigma": 1.0,
-        "rescale_min": 0.0,
-        "rescale_max": 1.0,
-    },
-    np.array(
-        [
-            [0.38318458, 0.98615628, 0.85551184],
-            [0.35422316, 0.94430935, 1.0],
-            [0.46000731, 0.57319659, 0.46000722],
-            [0.64577687, 0.38318464, 0.0],
-        ]
-    ),
-]
-
-TEST_CASE_2 = [
-    {
-        "img": np.zeros((IMG_CHANNEL, 4, 3)),
-        "label": np.array([[[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]]]),
-        "sigma": 1.0,
-        "rescale_min": 0.0,
-        "rescale_max": 1.0,
-    },
-    np.array(
-        [
-            [0.44628328, 0.80495411, 0.44628328],
-            [0.6779086, 1.0, 0.67790854],
-            [0.33002687, 0.62079221, 0.33002687],
-            [0.0, 0.31848389, 0.0],
-        ]
-    ),
-]
+        TESTS.append(
+            [
+                {
+                    "img": p(np.zeros((IMG_CHANNEL, 4, 3))),
+                    "label": q(np.array([[[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]]])),
+                    "sigma": 1.0,
+                    "rescale_min": 0.0,
+                    "rescale_max": 1.0,
+                },
+                p(
+                    np.array(
+                        [
+                            [0.44628328, 0.80495411, 0.44628328],
+                            [0.6779086, 1.0, 0.67790854],
+                            [0.33002687, 0.62079221, 0.33002687],
+                            [0.0, 0.31848389, 0.0],
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAddExtremePointsChannel(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS)
     def test_correct_results(self, input_data, expected):
         add_extreme_points_channel = AddExtremePointsChannel()
         result = add_extreme_points_channel(**input_data)
-        np.testing.assert_allclose(result[IMG_CHANNEL], expected, rtol=1e-4)
+        assert_allclose(result[IMG_CHANNEL], expected, rtol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_add_extreme_points_channeld.py b/tests/test_add_extreme_points_channeld.py
index e33bb0838c..acd0ce69ce 100644
--- a/tests/test_add_extreme_points_channeld.py
+++ b/tests/test_add_extreme_points_channeld.py
@@ -15,42 +15,60 @@
 from parameterized import parameterized
 
 from monai.transforms import AddExtremePointsChanneld
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 IMG_CHANNEL = 3
 
-TEST_CASE_1 = [
-    {"img": np.zeros((IMG_CHANNEL, 4, 3)), "label": np.array([[[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]])},
-    np.array(
-        [
-            [0.38318458, 0.98615628, 0.85551184],
-            [0.35422316, 0.94430935, 1.0],
-            [0.46000731, 0.57319659, 0.46000722],
-            [0.64577687, 0.38318464, 0.0],
-        ]
-    ),
-]
-
-TEST_CASE_2 = [
-    {"img": np.zeros((IMG_CHANNEL, 4, 3)), "label": np.array([[[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]]])},
-    np.array(
-        [
-            [0.44628328, 0.80495411, 0.44628328],
-            [0.6779086, 1.0, 0.67790854],
-            [0.33002687, 0.62079221, 0.33002687],
-            [0.0, 0.31848389, 0.0],
-        ]
-    ),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        TESTS.append(
+            [
+                {
+                    "img": p(np.zeros((IMG_CHANNEL, 4, 3))),
+                    "label": q(np.array([[[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]])),
+                },
+                p(
+                    np.array(
+                        [
+                            [0.38318458, 0.98615628, 0.85551184],
+                            [0.35422316, 0.94430935, 1.0],
+                            [0.46000731, 0.57319659, 0.46000722],
+                            [0.64577687, 0.38318464, 0.0],
+                        ]
+                    )
+                ),
+            ]
+        )
+
+        TESTS.append(
+            [
+                {
+                    "img": p(np.zeros((IMG_CHANNEL, 4, 3))),
+                    "label": q(np.array([[[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]]])),
+                },
+                p(
+                    np.array(
+                        [
+                            [0.44628328, 0.80495411, 0.44628328],
+                            [0.6779086, 1.0, 0.67790854],
+                            [0.33002687, 0.62079221, 0.33002687],
+                            [0.0, 0.31848389, 0.0],
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAddExtremePointsChanneld(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS)
     def test_correct_results(self, input_data, expected):
         add_extreme_points_channel = AddExtremePointsChanneld(
             keys="img", label_key="label", sigma=1.0, rescale_min=0.0, rescale_max=1.0
         )
         result = add_extreme_points_channel(input_data)
-        np.testing.assert_allclose(result["img"][IMG_CHANNEL], expected, rtol=1e-4)
+        assert_allclose(result["img"][IMG_CHANNEL], expected, rtol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_adjust_contrast.py b/tests/test_adjust_contrast.py
index 8e78698360..80ac61cfea 100644
--- a/tests/test_adjust_contrast.py
+++ b/tests/test_adjust_contrast.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import AdjustContrast
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [1.0]
 
@@ -28,15 +28,16 @@ class TestAdjustContrast(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_correct_results(self, gamma):
         adjuster = AdjustContrast(gamma=gamma)
-        result = adjuster(self.imt)
-        if gamma == 1.0:
-            expected = self.imt
-        else:
-            epsilon = 1e-7
-            img_min = self.imt.min()
-            img_range = self.imt.max() - img_min
-            expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
-        np.testing.assert_allclose(expected, result, rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster(p(self.imt))
+            if gamma == 1.0:
+                expected = self.imt
+            else:
+                epsilon = 1e-7
+                img_min = self.imt.min()
+                img_range = self.imt.max() - img_min
+                expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
+            assert_allclose(expected, result, rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_adjust_contrastd.py b/tests/test_adjust_contrastd.py
index 65647607e4..1e1c2cf8bc 100644
--- a/tests/test_adjust_contrastd.py
+++ b/tests/test_adjust_contrastd.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import AdjustContrastd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [1.0]
 
@@ -28,15 +28,16 @@ class TestAdjustContrastd(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_correct_results(self, gamma):
         adjuster = AdjustContrastd("img", gamma=gamma)
-        result = adjuster({"img": self.imt})
-        if gamma == 1.0:
-            expected = self.imt
-        else:
-            epsilon = 1e-7
-            img_min = self.imt.min()
-            img_range = self.imt.max() - img_min
-            expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
-        np.testing.assert_allclose(expected, result["img"], rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster({"img": p(self.imt)})
+            if gamma == 1.0:
+                expected = self.imt
+            else:
+                epsilon = 1e-7
+                img_min = self.imt.min()
+                img_range = self.imt.max() - img_min
+                expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
+            assert_allclose(expected, result["img"], rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine.py b/tests/test_affine.py
index dd82d72e23..bd89f1a436 100644
--- a/tests/test_affine.py
+++ b/tests/test_affine.py
@@ -16,78 +16,139 @@
 from parameterized import parameterized
 
 from monai.transforms import Affine
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(9).reshape((1, 3, 3)), "spatial_size": (-1, 0)},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None, image_only=True),
-        {"img": np.arange(9).reshape((1, 3, 3)), "spatial_size": (-1, 0)},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape(1, 2, 2),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2)), "spatial_size": (4, 4)},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(rotate_params=[np.pi / 2], padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2)), "spatial_size": (4, 4)},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(27).reshape((1, 3, 3, 3)), "spatial_size": (-1, 0, 0)},
-        np.arange(27).reshape(1, 3, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "spatial_size": (4, 4, 4)},
-        np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(9).reshape((1, 3, 3))), "spatial_size": (-1, 0)},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-    [
-        dict(rotate_params=[np.pi / 2], padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "spatial_size": (4, 4, 4)},
-        np.array(
+        )
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 4.0, 0.0], [0.0, 7.0, 5.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(padding_mode="zeros", device=device, image_only=True),
+                {"img": p(np.arange(9).reshape((1, 3, 3))), "spatial_size": (-1, 0)},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape(1, 2, 2)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2))), "spatial_size": (4, 4)},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(rotate_params=[np.pi / 2], padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2))), "spatial_size": (4, 4)},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(27).reshape((1, 3, 3, 3))), "spatial_size": (-1, 0, 0)},
+                p(np.arange(27).reshape(1, 3, 3, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "spatial_size": (4, 4, 4)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 1.0, 0.0],
+                                    [0.0, 2.0, 3.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 4.0, 5.0, 0.0],
+                                    [0.0, 6.0, 7.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(rotate_params=[np.pi / 2], padding_mode="zeros", device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "spatial_size": (4, 4, 4)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 2.0, 0.0, 0.0],
+                                    [0.0, 3.0, 1.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 6.0, 4.0, 0.0],
+                                    [0.0, 7.0, 5.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAffine(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine(self, input_param, input_data, expected_val):
         g = Affine(**input_param)
         result = g(**input_data)
         if isinstance(result, tuple):
             result = result[0]
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine_grid.py b/tests/test_affine_grid.py
index 24772b9a21..c12a395b47 100644
--- a/tests/test_affine_grid.py
+++ b/tests/test_affine_grid.py
@@ -16,88 +16,100 @@
 from parameterized import parameterized
 
 from monai.transforms import AffineGrid
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {"as_tensor_output": False, "device": torch.device("cpu:0")},
-        {"spatial_size": (2, 2)},
-        np.array([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
-    ],
-    [
-        {"as_tensor_output": True, "device": None},
-        {"spatial_size": (2, 2)},
-        torch.tensor([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
-    ],
-    [{"as_tensor_output": False, "device": None}, {"grid": np.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [{"as_tensor_output": True, "device": torch.device("cpu:0")}, {"grid": np.ones((3, 3, 3))}, torch.ones((3, 3, 3))],
-    [{"as_tensor_output": False, "device": None}, {"grid": torch.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [
-        {"as_tensor_output": True, "device": torch.device("cpu:0")},
-        {"grid": torch.ones((3, 3, 3))},
-        torch.ones((3, 3, 3)),
-    ],
-    [
-        {
-            "rotate_params": (1.0, 1.0),
-            "scale_params": (-20, 10),
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-        },
-        {"grid": torch.ones((3, 3, 3))},
-        torch.tensor(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [[-19.2208, -19.2208, -19.2208], [-19.2208, -19.2208, -19.2208], [-19.2208, -19.2208, -19.2208]],
-                [[-11.4264, -11.4264, -11.4264], [-11.4264, -11.4264, -11.4264], [-11.4264, -11.4264, -11.4264]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                {"device": device},
+                {"spatial_size": (2, 2)},
+                np.array([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
             ]
-        ),
-    ],
-    [
-        {
-            "rotate_params": (1.0, 1.0, 1.0),
-            "scale_params": (-20, 10),
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-        },
-        {"grid": torch.ones((4, 3, 3, 3))},
-        torch.tensor(
+        )
+
+        TESTS.append([{"device": device}, {"grid": p(np.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append([{"device": device}, {"grid": p(torch.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append(
+            [
+                {"rotate_params": (1.0, 1.0), "scale_params": (-20, 10), "device": device},
+                {"grid": p(torch.ones((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [
+                                [-19.2208, -19.2208, -19.2208],
+                                [-19.2208, -19.2208, -19.2208],
+                                [-19.2208, -19.2208, -19.2208],
+                            ],
+                            [
+                                [-11.4264, -11.4264, -11.4264],
+                                [-11.4264, -11.4264, -11.4264],
+                                [-11.4264, -11.4264, -11.4264],
+                            ],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                ],
-                [
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                ],
-                [
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                ],
-                [
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                ],
+                {"rotate_params": (1.0, 1.0, 1.0), "scale_params": (-20, 10), "device": device},
+                {"grid": p(torch.ones((4, 3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                            ],
+                            [
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                            ],
+                            [
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                            ],
+                            [
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                            ],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-]
+        )
+
+_rtol = 5e-2 if is_tf32_env() else 1e-4
 
 
 class TestAffineGrid(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine_grid(self, input_param, input_data, expected_val):
         g = AffineGrid(**input_param)
         result, _ = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data[device])
+        assert_allclose(result, expected_val, type_test=False, rtol=_rtol)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine_transform.py b/tests/test_affine_transform.py
index 42af58be73..ef39c297ce 100644
--- a/tests/test_affine_transform.py
+++ b/tests/test_affine_transform.py
@@ -17,6 +17,9 @@
 
 from monai.networks import normalize_transform, to_norm_affine
 from monai.networks.layers import AffineTransform
+from tests.utils import is_tf32_env
+
+_rtol = 1e-4 if not is_tf32_env() else 5e-3
 
 TEST_NORM_CASES = [
     [(4, 5), True, [[[0.666667, 0, -1], [0, 0.5, -1], [0, 0, 1]]]],
@@ -95,7 +98,7 @@ def test_to_norm_affine(self, affine, src_size, dst_size, align_corners, expecte
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
             new_affine = to_norm_affine(affine, src_size, dst_size, align_corners)
             new_affine = new_affine.detach().cpu().numpy()
-            np.testing.assert_allclose(new_affine, expected, atol=1e-4)
+            np.testing.assert_allclose(new_affine, expected, atol=1e-5, rtol=_rtol)
 
     @parameterized.expand(TEST_ILL_TO_NORM_AFFINE_CASES)
     def test_to_norm_affine_ill(self, affine, src_size, dst_size, align_corners):
@@ -113,7 +116,7 @@ def test_affine_shift(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 4, 1, 3], [0, 7, 6, 8], [0, 3, 5, 3]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_shift_1(self):
         affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, -1.0]])
@@ -121,7 +124,7 @@ def test_affine_shift_1(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 0, 0, 0], [0, 4, 1, 3], [0, 7, 6, 8]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_shift_2(self):
         affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
@@ -129,28 +132,28 @@ def test_affine_shift_2(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 0, 0, 0], [4, 1, 3, 2], [7, 6, 8, 5]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_zoom(self):
         affine = torch.as_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]])
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform((3, 2))(image, affine)
         expected = [[[[1, 3], [5, 7], [9, 11]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_zoom_1(self):
         affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform()(image, affine, (1, 4))
         expected = [[[[1, 2, 3, 4]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=_rtol)
 
     def test_zoom_2(self):
         affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0]], dtype=torch.float32)
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform((1, 2))(image, affine)
         expected = [[[[1, 3]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_transform_minimum(self):
         t = np.pi / 3
@@ -169,7 +172,7 @@ def test_affine_transform_minimum(self):
                 ]
             ]
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol)
 
     def test_affine_transform_2d(self):
         t = np.pi / 3
@@ -188,7 +191,7 @@ def test_affine_transform_2d(self):
                 ]
             ]
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol)
 
         if torch.cuda.is_available():
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
@@ -205,7 +208,7 @@ def test_affine_transform_2d(self):
                     ]
                 ]
             ]
-            np.testing.assert_allclose(out, expected, atol=1e-4)
+            np.testing.assert_allclose(out, expected, atol=5e-3)
 
     def test_affine_transform_3d(self):
         t = np.pi / 3
@@ -231,7 +234,7 @@ def test_affine_transform_3d(self):
                 ]
             ],
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-4)
+        np.testing.assert_allclose(out, expected, atol=1e-4, rtol=_rtol)
 
         if torch.cuda.is_available():
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
@@ -255,7 +258,7 @@ def test_affine_transform_3d(self):
                     ]
                 ],
             ]
-            np.testing.assert_allclose(out, expected, atol=1e-4)
+            np.testing.assert_allclose(out, expected, atol=5e-3)
 
     def test_ill_affine_transform(self):
         with self.assertRaises(ValueError):  # image too small
diff --git a/tests/test_affined.py b/tests/test_affined.py
index 850f12905d..e9c468e755 100644
--- a/tests/test_affined.py
+++ b/tests/test_affined.py
@@ -16,85 +16,132 @@
 from parameterized import parameterized
 
 from monai.transforms import Affined
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(keys="img", padding_mode="zeros", as_tensor_output=False, spatial_size=(-1, 0), device=None),
-        {"img": np.arange(9).reshape((1, 3, 3))},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape(1, 2, 2),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(4, 4), as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(
-            keys="img",
-            rotate_params=[np.pi / 2],
-            padding_mode="zeros",
-            spatial_size=(4, 4),
-            as_tensor_output=False,
-            device=None,
-        ),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0, 0), as_tensor_output=False, device=None),
-        {"img": np.arange(27).reshape((1, 3, 3, 3))},
-        np.arange(27).reshape(1, 3, 3, 3),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(4, 4, 4), as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0), device=device),
+                {"img": p(np.arange(9).reshape((1, 3, 3)))},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-    [
-        dict(
-            keys="img",
-            rotate_params=[np.pi / 2],
-            padding_mode="zeros",
-            spatial_size=(4, 4, 4),
-            as_tensor_output=False,
-            device=None,
-        ),
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 4.0, 0.0], [0.0, 7.0, 5.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(keys="img", padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape(1, 2, 2)),
             ]
-        ),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(4, 4), device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", rotate_params=[np.pi / 2], padding_mode="zeros", spatial_size=(4, 4), device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0, 0), device=device),
+                {"img": p(np.arange(27).reshape((1, 3, 3, 3)))},
+                p(np.arange(27).reshape(1, 3, 3, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(4, 4, 4), device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 1.0, 0.0],
+                                    [0.0, 2.0, 3.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 4.0, 5.0, 0.0],
+                                    [0.0, 6.0, 7.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    keys="img", rotate_params=[np.pi / 2], padding_mode="zeros", spatial_size=(4, 4, 4), device=device
+                ),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 2.0, 0.0, 0.0],
+                                    [0.0, 3.0, 1.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 6.0, 4.0, 0.0],
+                                    [0.0, 7.0, 5.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAffined(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine(self, input_param, input_data, expected_val):
         g = Affined(**input_param)
         result = g(input_data)["img"]
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_ahnet.py b/tests/test_ahnet.py
index 777e2637a7..f4bfa555fc 100644
--- a/tests/test_ahnet.py
+++ b/tests/test_ahnet.py
@@ -162,26 +162,14 @@ def test_mcfcn_shape(self, input_param, input_shape, expected_shape):
 
 
 class TestAHNET(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_AHNET_2D_1,
-            TEST_CASE_AHNET_2D_2,
-            TEST_CASE_AHNET_2D_3,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_AHNET_2D_1, TEST_CASE_AHNET_2D_2, TEST_CASE_AHNET_2D_3])
     def test_ahnet_shape_2d(self, input_param, input_shape, expected_shape):
         net = AHNet(**input_param).to(device)
         with eval_mode(net):
             result = net.forward(torch.randn(input_shape).to(device))
             self.assertEqual(result.shape, expected_shape)
 
-    @parameterized.expand(
-        [
-            TEST_CASE_AHNET_3D_1,
-            TEST_CASE_AHNET_3D_2,
-            TEST_CASE_AHNET_3D_3,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_AHNET_3D_1, TEST_CASE_AHNET_3D_2, TEST_CASE_AHNET_3D_3])
     @skip_if_quick
     def test_ahnet_shape_3d(self, input_param, input_shape, expected_shape):
         net = AHNet(**input_param).to(device)
@@ -203,11 +191,7 @@ def test_script(self):
 
 class TestAHNETWithPretrain(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_AHNET_3D_WITH_PRETRAIN_1,
-            TEST_CASE_AHNET_3D_WITH_PRETRAIN_2,
-            TEST_CASE_AHNET_3D_WITH_PRETRAIN_3,
-        ]
+        [TEST_CASE_AHNET_3D_WITH_PRETRAIN_1, TEST_CASE_AHNET_3D_WITH_PRETRAIN_2, TEST_CASE_AHNET_3D_WITH_PRETRAIN_3]
     )
     def test_ahnet_shape(self, input_param, input_shape, expected_shape, fcn_input_param):
         net = AHNet(**input_param).to(device)
diff --git a/tests/test_alias.py b/tests/test_alias.py
new file mode 100644
index 0000000000..0895da5743
--- /dev/null
+++ b/tests/test_alias.py
@@ -0,0 +1,39 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import inspect
+import os
+import unittest
+
+from monai.utils import optional_import
+
+
+class TestModuleAlias(unittest.TestCase):
+    """check that 'import monai.xx.file_name' returns a module"""
+
+    def test_files(self):
+        src_dir = os.path.dirname(os.path.dirname(__file__))
+        monai_dir = os.path.join(src_dir, "monai")
+        py_files = glob.glob(os.path.join(monai_dir, "**", "*.py"), recursive=True)
+        for x in py_files:
+            if os.path.basename(x).startswith("_"):
+                continue
+            mod_name = x[len(src_dir) : -3]  # create relative path
+            mod_name = mod_name[1:].replace(mod_name[0], ".")
+            mod, cls = mod_name.rsplit(".", 1)
+            obj, exist = optional_import(mod, name=cls)
+            if exist:
+                self.assertTrue(inspect.ismodule(obj), msg=mod_name)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_as_channel_first.py b/tests/test_as_channel_first.py
index 0d1b1c7d3a..918e576011 100644
--- a/tests/test_as_channel_first.py
+++ b/tests/test_as_channel_first.py
@@ -34,7 +34,7 @@ def test_value(self, in_type, input_param, expected_shape):
         if isinstance(test_data, torch.Tensor):
             test_data = test_data.cpu().numpy()
         expected = np.moveaxis(test_data, input_param["channel_dim"], 0)
-        assert_allclose(expected, result)
+        assert_allclose(result, expected, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_autoencoder.py b/tests/test_autoencoder.py
index 54d6832c8d..451a93dc01 100644
--- a/tests/test_autoencoder.py
+++ b/tests/test_autoencoder.py
@@ -23,7 +23,7 @@
 
 TEST_CASE_0 = [  # single channel 2D, batch 4, no residual
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_channels": 1,
         "out_channels": 1,
         "channels": (4, 8, 16),
@@ -35,20 +35,14 @@
 ]
 
 TEST_CASE_1 = [  # single channel 2D, batch 4
-    {
-        "dimensions": 2,
-        "in_channels": 1,
-        "out_channels": 1,
-        "channels": (4, 8, 16),
-        "strides": (2, 2, 2),
-    },
+    {"spatial_dims": 2, "in_channels": 1, "out_channels": 1, "channels": (4, 8, 16), "strides": (2, 2, 2)},
     (1, 1, 128, 128),
     (1, 1, 128, 128),
 ]
 
 TEST_CASE_2 = [  # 3-channel 2D, batch 4, LeakyReLU activation
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_channels": 3,
         "out_channels": 3,
         "channels": (4, 8, 16),
@@ -60,13 +54,7 @@
 ]
 
 TEST_CASE_3 = [  # 4-channel 3D, batch 4
-    {
-        "dimensions": 3,
-        "in_channels": 4,
-        "out_channels": 3,
-        "channels": (4, 8, 16),
-        "strides": (2, 2, 2),
-    },
+    {"spatial_dims": 3, "in_channels": 4, "out_channels": 3, "channels": (4, 8, 16), "strides": (2, 2, 2)},
     (1, 4, 128, 128, 128),
     (1, 3, 128, 128, 128),
 ]
@@ -75,7 +63,7 @@
 
 
 TEST_CASE_FAIL = {  # 2-channel 2D, should fail because of stride/channel mismatch.
-    "dimensions": 2,
+    "spatial_dims": 2,
     "in_channels": 2,
     "out_channels": 2,
     "channels": (4, 8, 16),
@@ -92,7 +80,7 @@ def test_shape(self, input_param, input_shape, expected_shape):
             self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = AutoEncoder(dimensions=2, in_channels=1, out_channels=1, channels=(4, 8), strides=(2, 2))
+        net = AutoEncoder(spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8), strides=(2, 2))
         test_data = torch.randn(2, 1, 32, 32)
         test_script_save(net, test_data)
 
diff --git a/tests/test_basic_unet.py b/tests/test_basic_unet.py
index 09d7f72d0e..1de37b316a 100644
--- a/tests/test_basic_unet.py
+++ b/tests/test_basic_unet.py
@@ -20,20 +20,10 @@
 
 CASES_1D = []
 for mode in ["pixelshuffle", "nontrainable", "deconv", None]:
-    kwargs = {
-        "dimensions": 1,
-        "in_channels": 5,
-        "out_channels": 8,
-    }
+    kwargs = {"spatial_dims": 1, "in_channels": 5, "out_channels": 8}
     if mode is not None:
         kwargs["upsample"] = mode  # type: ignore
-    CASES_1D.append(
-        [
-            kwargs,
-            (10, 5, 33),
-            (10, 8, 33),
-        ]
-    )
+    CASES_1D.append([kwargs, (10, 5, 33), (10, 8, 33)])
 
 CASES_2D = []
 for mode in ["pixelshuffle", "nontrainable", "deconv"]:
@@ -43,7 +33,7 @@
             CASES_2D.append(
                 [
                     {
-                        "dimensions": 2,
+                        "spatial_dims": 2,
                         "in_channels": in_channels,
                         "out_channels": out_channels,
                         "features": (12, 12, 13, 14, 15, 16),
@@ -56,7 +46,7 @@
 CASES_3D = [
     [  # single channel 3D, batch 2
         {
-            "dimensions": 3,
+            "spatial_dims": 3,
             "in_channels": 1,
             "out_channels": 2,
             "features": (16, 20, 21, 22, 23, 11),
@@ -67,7 +57,7 @@
     ],
     [  # 2-channel 3D, batch 3
         {
-            "dimensions": 3,
+            "spatial_dims": 3,
             "in_channels": 2,
             "out_channels": 7,
             "features": (14, 15, 16, 17, 18, 11),
@@ -78,7 +68,7 @@
     ],
     [  # 4-channel 3D, batch 5
         {
-            "dimensions": 3,
+            "spatial_dims": 3,
             "in_channels": 4,
             "out_channels": 2,
             "features": (14, 15, 16, 17, 18, 10),
@@ -101,7 +91,7 @@ def test_shape(self, input_param, input_shape, expected_shape):
         self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = BasicUNet(dimensions=2, in_channels=1, out_channels=3)
+        net = BasicUNet(spatial_dims=2, in_channels=1, out_channels=3)
         test_data = torch.randn(16, 1, 32, 32)
         test_script_save(net, test_data)
 
diff --git a/tests/test_bending_energy.py b/tests/test_bending_energy.py
index 8f1fb43535..f254b9624c 100644
--- a/tests/test_bending_energy.py
+++ b/tests/test_bending_energy.py
@@ -20,31 +20,11 @@
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
 TEST_CASES = [
-    [
-        {},
-        {"pred": torch.ones((1, 3, 5, 5, 5), device=device)},
-        0.0,
-    ],
-    [
-        {},
-        {"pred": torch.arange(0, 5, device=device)[None, None, None, None, :].expand(1, 3, 5, 5, 5)},
-        0.0,
-    ],
-    [
-        {},
-        {"pred": torch.arange(0, 5, device=device)[None, None, None, None, :].expand(1, 3, 5, 5, 5) ** 2},
-        4.0,
-    ],
-    [
-        {},
-        {"pred": torch.arange(0, 5, device=device)[None, None, None, :].expand(1, 3, 5, 5) ** 2},
-        4.0,
-    ],
-    [
-        {},
-        {"pred": torch.arange(0, 5, device=device)[None, None, :].expand(1, 3, 5) ** 2},
-        4.0,
-    ],
+    [{}, {"pred": torch.ones((1, 3, 5, 5, 5), device=device)}, 0.0],
+    [{}, {"pred": torch.arange(0, 5, device=device)[None, None, None, None, :].expand(1, 3, 5, 5, 5)}, 0.0],
+    [{}, {"pred": torch.arange(0, 5, device=device)[None, None, None, None, :].expand(1, 3, 5, 5, 5) ** 2}, 4.0],
+    [{}, {"pred": torch.arange(0, 5, device=device)[None, None, None, :].expand(1, 3, 5, 5) ** 2}, 4.0],
+    [{}, {"pred": torch.arange(0, 5, device=device)[None, None, :].expand(1, 3, 5) ** 2}, 4.0],
 ]
 
 
diff --git a/tests/test_bilateral_approx_cpu.py b/tests/test_bilateral_approx_cpu.py
index 7960f76591..d55a6ff5b3 100644
--- a/tests/test_bilateral_approx_cpu.py
+++ b/tests/test_bilateral_approx_cpu.py
@@ -306,7 +306,7 @@
                     # Frame 4
                     [[1, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 1]],
                 ]
-            ],
+            ]
         ],
         # Expected
         [
diff --git a/tests/test_bilateral_approx_cuda.py b/tests/test_bilateral_approx_cuda.py
index 345a920f3c..bc7defdc4e 100644
--- a/tests/test_bilateral_approx_cuda.py
+++ b/tests/test_bilateral_approx_cuda.py
@@ -306,7 +306,7 @@
                     # Frame 4
                     [[1, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 1]],
                 ]
-            ],
+            ]
         ],
         # Expected
         [
diff --git a/tests/test_bilateral_precise.py b/tests/test_bilateral_precise.py
index dfa3ca107d..3b8f6194cf 100644
--- a/tests/test_bilateral_precise.py
+++ b/tests/test_bilateral_precise.py
@@ -306,7 +306,7 @@
                     # Frame 4
                     [[1, 0, 0, 0, 1], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 0, 0, 0, 1]],
                 ]
-            ],
+            ]
         ],
         # Expected
         [
diff --git a/tests/test_border_pad.py b/tests/test_border_pad.py
index 9e6a8a6a08..7bd3f36c20 100644
--- a/tests/test_border_pad.py
+++ b/tests/test_border_pad.py
@@ -18,17 +18,9 @@
 from monai.utils import NumpyPadMode
 from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_1 = [
-    {"spatial_border": 2, "mode": "constant"},
-    np.zeros((3, 8, 8, 4)),
-    np.zeros((3, 12, 12, 8)),
-]
+TEST_CASE_1 = [{"spatial_border": 2, "mode": "constant"}, np.zeros((3, 8, 8, 4)), np.zeros((3, 12, 12, 8))]
 
-TEST_CASE_2 = [
-    {"spatial_border": [1, 2, 3], "mode": "constant"},
-    np.zeros((3, 8, 8, 4)),
-    np.zeros((3, 10, 12, 10)),
-]
+TEST_CASE_2 = [{"spatial_border": [1, 2, 3], "mode": "constant"}, np.zeros((3, 8, 8, 4)), np.zeros((3, 10, 12, 10))]
 
 TEST_CASE_3 = [
     {"spatial_border": [1, 2, 3, 4, 5, 6], "mode": "constant"},
diff --git a/tests/test_cachedataset.py b/tests/test_cachedataset.py
index bbb8143631..b7d2e97590 100644
--- a/tests/test_cachedataset.py
+++ b/tests/test_cachedataset.py
@@ -19,7 +19,7 @@
 from parameterized import parameterized
 
 from monai.data import CacheDataset, DataLoader, PersistentDataset, SmartCacheDataset
-from monai.transforms import Compose, Lambda, LoadImaged, ThreadUnsafe, Transform
+from monai.transforms import Compose, Lambda, LoadImaged, RandLambda, ThreadUnsafe, Transform
 from monai.utils import get_torch_version_tuple
 
 TEST_CASE_1 = [Compose([LoadImaged(keys=["image", "label", "extra"])]), (128, 128, 128)]
@@ -84,7 +84,7 @@ def test_shape(self, transform, expected_shape):
     def test_set_data(self):
         data_list1 = list(range(10))
 
-        transform = Lambda(func=lambda x: np.array([x * 10]))
+        transform = Compose([Lambda(func=lambda x: np.array([x * 10])), RandLambda(func=lambda x: x + 1)])
 
         dataset = CacheDataset(
             data=data_list1,
@@ -92,19 +92,23 @@ def test_set_data(self):
             cache_rate=1.0,
             num_workers=4,
             progress=True,
+            copy_cache=False if sys.platform == "linux" else True,
         )
 
         num_workers = 2 if sys.platform == "linux" else 0
         dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=1)
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list1[i] * 10]], d)
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
+        # simulate another epoch, the cache content should not be modified
+        for i, d in enumerate(dataloader):
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
 
         # update the datalist and fill the cache content
         data_list2 = list(range(-10, 0))
         dataset.set_data(data=data_list2)
         # rerun with updated cache content
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list2[i] * 10]], d)
+            np.testing.assert_allclose([[data_list2[i] * 10 + 1]], d)
 
 
 class _StatefulTransform(Transform, ThreadUnsafe):
@@ -133,11 +137,7 @@ def test_thread_safe(self, persistent_workers, cache_workers, loader_workers):
         _kwg = {"persistent_workers": persistent_workers} if get_torch_version_tuple() > (1, 7) else {}
         data_list = list(range(1, 11))
         dataset = CacheDataset(
-            data=data_list,
-            transform=_StatefulTransform(),
-            cache_rate=1.0,
-            num_workers=cache_workers,
-            progress=False,
+            data=data_list, transform=_StatefulTransform(), cache_rate=1.0, num_workers=cache_workers, progress=False
         )
         self.assertListEqual(expected, list(dataset))
         loader = DataLoader(
diff --git a/tests/test_cachedataset_parallel.py b/tests/test_cachedataset_parallel.py
index 0be3ba085b..96aadd9614 100644
--- a/tests/test_cachedataset_parallel.py
+++ b/tests/test_cachedataset_parallel.py
@@ -42,12 +42,7 @@ def test_shape(self, num_workers, dataset_size, transform):
                     "extra": os.path.join(tempdir, "test_extra1.nii.gz"),
                 }
             ] * dataset_size
-            dataset = CacheDataset(
-                data=test_data,
-                transform=transform,
-                cache_rate=1,
-                num_workers=num_workers,
-            )
+            dataset = CacheDataset(data=test_data, transform=transform, cache_rate=1, num_workers=num_workers)
 
         self.assertEqual(len(dataset._cache), dataset.cache_num)
         for i in range(dataset.cache_num):
diff --git a/tests/test_cachedataset_persistent_workers.py b/tests/test_cachedataset_persistent_workers.py
index 584a053614..d727074bf9 100644
--- a/tests/test_cachedataset_persistent_workers.py
+++ b/tests/test_cachedataset_persistent_workers.py
@@ -23,12 +23,7 @@ def test_duplicate_transforms(self):
         data = [{"img": im} for _ in range(2)]
 
         # at least 1 deterministic followed by at least 1 random
-        transform = Compose(
-            [
-                Spacingd("img", pixdim=(1, 1)),
-                RandAffined("img", prob=1.0),
-            ]
-        )
+        transform = Compose([Spacingd("img", pixdim=(1, 1)), RandAffined("img", prob=1.0)])
 
         # cachedataset and data loader w persistent_workers
         train_ds = CacheDataset(data, transform, cache_num=1)
diff --git a/tests/test_cachentransdataset.py b/tests/test_cachentransdataset.py
index 492db8b16f..13c1e1c68e 100644
--- a/tests/test_cachentransdataset.py
+++ b/tests/test_cachentransdataset.py
@@ -42,19 +42,13 @@ def test_n_trans(self, transform, expected_shape):
 
             cache_dir = os.path.join(os.path.join(tempdir, "cache"), "data")
             dataset_precached = CacheNTransDataset(
-                data=test_data,
-                transform=transform,
-                cache_dir=cache_dir,
-                cache_n_trans=2,
+                data=test_data, transform=transform, cache_dir=cache_dir, cache_n_trans=2
             )
             data_precached = dataset_precached[0]
             self.assertTupleEqual(data_precached["image"].shape, expected_shape)
 
             dataset_postcached = CacheNTransDataset(
-                data=test_data,
-                transform=transform,
-                cache_dir=cache_dir,
-                cache_n_trans=2,
+                data=test_data, transform=transform, cache_dir=cache_dir, cache_n_trans=2
             )
             data_postcached = dataset_postcached[0]
             self.assertTupleEqual(data_postcached["image"].shape, expected_shape)
diff --git a/tests/test_cast_to_type.py b/tests/test_cast_to_type.py
index 0ef25cbafa..d06efb17b5 100644
--- a/tests/test_cast_to_type.py
+++ b/tests/test_cast_to_type.py
@@ -16,14 +16,23 @@
 from parameterized import parameterized
 
 from monai.transforms import CastToType
+from monai.utils import optional_import
 from monai.utils.type_conversion import get_equivalent_dtype
 from tests.utils import TEST_NDARRAYS
 
+cp, has_cp = optional_import("cupy")
+
 TESTS = []
 for p in TEST_NDARRAYS:
     for out_dtype in (np.float64, torch.float64):
         TESTS.append([out_dtype, p(np.array([[0, 1], [1, 2]], dtype=np.float32)), out_dtype])
 
+TESTS_CUPY = [
+    [np.float32, np.array([[0, 1], [1, 2]], dtype=np.float32), np.float32],
+    [np.float32, np.array([[0, 1], [1, 2]], dtype=np.uint8), np.float32],
+    [np.uint8, np.array([[0, 1], [1, 2]], dtype=np.float32), np.uint8],
+]
+
 
 class TestCastToType(unittest.TestCase):
     @parameterized.expand(TESTS)
@@ -35,6 +44,19 @@ def test_type(self, out_dtype, input_data, expected_type):
         result = CastToType()(input_data, out_dtype)
         self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
 
+    @parameterized.expand(TESTS_CUPY)
+    @unittest.skipUnless(has_cp, "Requires CuPy")
+    def test_type_cupy(self, out_dtype, input_data, expected_type):
+        input_data = cp.asarray(input_data)
+
+        result = CastToType(dtype=out_dtype)(input_data)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
+
+        result = CastToType()(input_data, out_dtype)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_cast_to_typed.py b/tests/test_cast_to_typed.py
index be495564fb..007598c200 100644
--- a/tests/test_cast_to_typed.py
+++ b/tests/test_cast_to_typed.py
@@ -16,6 +16,9 @@
 from parameterized import parameterized
 
 from monai.transforms import CastToTyped
+from monai.utils import optional_import
+
+cp, has_cp = optional_import("cupy")
 
 TEST_CASE_1 = [
     {"keys": ["img"], "dtype": np.float64},
@@ -33,6 +36,20 @@
 ]
 
 
+TESTS_CUPY = [
+    [
+        {"keys": "image", "dtype": np.uint8},
+        {"image": np.array([[0, 1], [1, 2]], dtype=np.float32), "label": np.array([[0, 1], [1, 1]], dtype=np.float32)},
+        {"image": np.uint8, "label": np.float32},
+    ],
+    [
+        {"keys": ["image", "label"], "dtype": np.float32},
+        {"image": np.array([[0, 1], [1, 2]], dtype=np.uint8), "label": np.array([[0, 1], [1, 1]], dtype=np.uint8)},
+        {"image": np.float32, "label": np.float32},
+    ],
+]
+
+
 class TestCastToTyped(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_type(self, input_param, input_data, expected_type):
@@ -40,6 +57,16 @@ def test_type(self, input_param, input_data, expected_type):
         for k, v in result.items():
             self.assertEqual(v.dtype, expected_type[k])
 
+    @parameterized.expand(TESTS_CUPY)
+    @unittest.skipUnless(has_cp, "Requires CuPy")
+    def test_type_cupy(self, input_param, input_data, expected_type):
+        input_data = {k: cp.asarray(v) for k, v in input_data.items()}
+
+        result = CastToTyped(**input_param)(input_data)
+        for k, v in result.items():
+            self.assertTrue(isinstance(v, cp.ndarray))
+            self.assertEqual(v.dtype, expected_type[k])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_center_scale_crop.py b/tests/test_center_scale_crop.py
index e28849ce90..4c5bfc4fac 100644
--- a/tests/test_center_scale_crop.py
+++ b/tests/test_center_scale_crop.py
@@ -38,11 +38,13 @@ class TestCenterScaleCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_3])
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterScaleCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_2])
     def test_value(self, input_param, input_data, expected_value):
         result = CenterScaleCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result, expected_value)
 
 
diff --git a/tests/test_center_spatial_crop.py b/tests/test_center_spatial_crop.py
index 3e828176a5..d6a7edb305 100644
--- a/tests/test_center_spatial_crop.py
+++ b/tests/test_center_spatial_crop.py
@@ -38,11 +38,13 @@ class TestCenterSpatialCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_3])
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterSpatialCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_2])
     def test_value(self, input_param, input_data, expected_value):
         result = CenterSpatialCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result, expected_value)
 
 
diff --git a/tests/test_center_spatial_cropd.py b/tests/test_center_spatial_cropd.py
index 349253ab56..be44468987 100644
--- a/tests/test_center_spatial_cropd.py
+++ b/tests/test_center_spatial_cropd.py
@@ -15,36 +15,43 @@
 from parameterized import parameterized
 
 from monai.transforms import CenterSpatialCropd
-
-TEST_CASE_0 = [
-    {"keys": "img", "roi_size": [2, -1, -1]},
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    (3, 2, 3, 3),
-]
-
-TEST_CASE_1 = [
-    {"keys": "img", "roi_size": [2, 2, 2]},
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    (3, 2, 2, 2),
-]
-
-TEST_CASE_2 = [
-    {"keys": "img", "roi_size": [2, 2]},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[1, 2], [2, 3]]]),
-]
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TEST_SHAPES = []
+for p in TEST_NDARRAYS:
+    TEST_SHAPES.append(
+        [{"keys": "img", "roi_size": [2, -1, -1]}, {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))}, (3, 2, 3, 3)]
+    )
+
+    TEST_SHAPES.append(
+        [{"keys": "img", "roi_size": [2, 2, 2]}, {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))}, (3, 2, 2, 2)]
+    )
+
+TEST_CASES = []
+for p in TEST_NDARRAYS:
+    TEST_CASES.append(
+        [
+            {"keys": "img", "roi_size": [2, 2]},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[1, 2], [2, 3]]])),
+        ]
+    )
 
 
 class TestCenterSpatialCropd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TEST_SHAPES)
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterSpatialCropd(**input_param)(input_data)
         self.assertTupleEqual(result["img"].shape, expected_shape)
 
-    @parameterized.expand([TEST_CASE_2])
+    @parameterized.expand(TEST_CASES)
     def test_value(self, input_param, input_data, expected_value):
         result = CenterSpatialCropd(**input_param)(input_data)
-        np.testing.assert_allclose(result["img"], expected_value)
+        assert_allclose(result["img"], expected_value, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_classes_to_indices.py b/tests/test_classes_to_indices.py
index 0ba3dd094a..7c89e3179d 100644
--- a/tests/test_classes_to_indices.py
+++ b/tests/test_classes_to_indices.py
@@ -11,68 +11,80 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"num_classes": 3, "image_threshold": 0.0},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    None,
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            # test Argmax data
+            {"num_classes": 3, "image_threshold": 0.0},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            None,
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"num_classes": 3, "image_threshold": 60},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"num_classes": 3, "image_threshold": 60},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {"image_threshold": 0.0},
-    np.array(
+    TESTS_CASES.append(
         [
-            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+            # test One-Hot data
+            {"image_threshold": 0.0},
+            p(
+                [
+                    [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                    [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                    [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                ]
+            ),
+            None,
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
         ]
-    ),
-    None,
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+    )
 
-TEST_CASE_4 = [
-    {"num_classes": None, "image_threshold": 60},
-    np.array(
+    TESTS_CASES.append(
         [
-            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+            {"num_classes": None, "image_threshold": 60},
+            p(
+                [
+                    [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                    [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                    [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                ]
+            ),
+            p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            [p([0, 8]), p([1, 5, 6]), p([3])],
         ]
-    ),
-    np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    )
 
-TEST_CASE_5 = [
-    # test output_shape
-    {"num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    None,
-    [np.array([[0, 0], [1, 1], [2, 2]]), np.array([[0, 1], [1, 2], [2, 0]]), np.array([[0, 2], [1, 0], [2, 1]])],
-]
+    TESTS_CASES.append(
+        [
+            # test output_shape
+            {"num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            None,
+            [p([[0, 0], [1, 1], [2, 2]]), p([[0, 1], [1, 2], [2, 0]]), p([[0, 2], [1, 0], [2, 1]])],
+        ]
+    )
 
 
 class TestClassesToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_value(self, input_args, label, image, expected_indices):
         indices = ClassesToIndices(**input_args)(label, image)
         for i, e in zip(indices, expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_classes_to_indicesd.py b/tests/test_classes_to_indicesd.py
index 67fac95c8c..0df7490ec5 100644
--- a/tests/test_classes_to_indicesd.py
+++ b/tests/test_classes_to_indicesd.py
@@ -11,73 +11,91 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndicesd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"keys": "label", "num_classes": 3, "image_threshold": 0.0},
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            # test Argmax data
+            {"keys": "label", "num_classes": 3, "image_threshold": 0.0},
+            {"label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "label", "image_key": "image", "num_classes": 3, "image_threshold": 60},
-    {
-        "label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "num_classes": 3, "image_threshold": 60},
+            {
+                "label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+                "image": p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            },
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {"keys": "label", "image_threshold": 0.0},
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        )
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+    TESTS_CASES.append(
+        [
+            # test One-Hot data
+            {"keys": "label", "image_threshold": 0.0},
+            {
+                "label": p(
+                    [
+                        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                        [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                        [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                    ]
+                )
+            },
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"keys": "label", "image_key": "image", "num_classes": None, "image_threshold": 60},
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "num_classes": None, "image_threshold": 60},
+            {
+                "label": p(
+                    [
+                        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                        [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                        [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                    ]
+                ),
+                "image": p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            },
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_5 = [
-    # test output_shape
-    {"keys": "label", "indices_postfix": "cls", "num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
-    [np.array([[0, 0], [1, 1], [2, 2]]), np.array([[0, 1], [1, 2], [2, 0]]), np.array([[0, 2], [1, 0], [2, 1]])],
-]
+    TESTS_CASES.append(
+        [
+            # test output_shape
+            {
+                "keys": "label",
+                "indices_postfix": "cls",
+                "num_classes": 3,
+                "image_threshold": 0.0,
+                "output_shape": [3, 3],
+            },
+            {"label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
+            [p([[0, 0], [1, 1], [2, 2]]), p([[0, 1], [1, 2], [2, 0]]), p([[0, 2], [1, 0], [2, 1]])],
+        ]
+    )
 
 
 class TestClassesToIndicesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_value(self, input_args, input_data, expected_indices):
         result = ClassesToIndicesd(**input_args)(input_data)
         key_postfix = input_args.get("indices_postfix")
         key_postfix = "_cls_indices" if key_postfix is None else key_postfix
         for i, e in zip(result["label" + key_postfix], expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_compute_confusion_matrix.py b/tests/test_compute_confusion_matrix.py
index 69a95e0c8b..ef65c474c8 100644
--- a/tests/test_compute_confusion_matrix.py
+++ b/tests/test_compute_confusion_matrix.py
@@ -174,22 +174,10 @@
 # 3. test metric with compute_sample, denominator may have zeros
 TEST_CASES_COMPUTE_SAMPLE_NAN = []
 metric_names = ["tpr", "tnr"]
-result_sum = [
-    torch.tensor([0.5000]),
-    torch.tensor([4.8333]),
-]
-not_nans_sum = [
-    torch.tensor([6]),
-    torch.tensor([8]),
-]
-result_sum_batch = [
-    torch.tensor([0.0000, 0.5000, 0.0000]),
-    torch.tensor([1.6667, 2.5000, 0.6667]),
-]
-not_nans_sum_batch = [
-    torch.tensor([3.0, 2.0, 1.0]),
-    torch.tensor([2.0, 3.0, 3.0]),
-]
+result_sum = [torch.tensor([0.5000]), torch.tensor([4.8333])]
+not_nans_sum = [torch.tensor([6]), torch.tensor([8])]
+result_sum_batch = [torch.tensor([0.0000, 0.5000, 0.0000]), torch.tensor([1.6667, 2.5000, 0.6667])]
+not_nans_sum_batch = [torch.tensor([3.0, 2.0, 1.0]), torch.tensor([2.0, 3.0, 3.0])]
 for idx in range(2):
     for reduction in ["sum", "sum_batch"]:
         TEST_CASE = [data_nan.copy()]
diff --git a/tests/test_compute_meandice.py b/tests/test_compute_meandice.py
index f9e494efc7..f96563e22e 100644
--- a/tests/test_compute_meandice.py
+++ b/tests/test_compute_meandice.py
@@ -168,10 +168,7 @@
 ]
 
 TEST_CASE_10 = [
-    {
-        "y": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))],
-        "y_pred": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))],
-    },
+    {"y": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))], "y_pred": [torch.ones((2, 3, 3)), torch.ones((2, 3, 3))]},
     [[1.0000, 1.0000], [1.0000, 1.0000]],
 ]
 
diff --git a/tests/test_compute_roc_auc.py b/tests/test_compute_roc_auc.py
index 1cec357b93..bfecb4ce5b 100644
--- a/tests/test_compute_roc_auc.py
+++ b/tests/test_compute_roc_auc.py
@@ -37,23 +37,9 @@
     0.875,
 ]
 
-TEST_CASE_3 = [
-    torch.tensor([[0.5], [0.5], [0.2], [8.3]]),
-    torch.tensor([0, 1, 0, 1]),
-    False,
-    False,
-    "macro",
-    0.875,
-]
+TEST_CASE_3 = [torch.tensor([[0.5], [0.5], [0.2], [8.3]]), torch.tensor([0, 1, 0, 1]), False, False, "macro", 0.875]
 
-TEST_CASE_4 = [
-    torch.tensor([0.5, 0.5, 0.2, 8.3]),
-    torch.tensor([0, 1, 0, 1]),
-    False,
-    False,
-    "macro",
-    0.875,
-]
+TEST_CASE_4 = [torch.tensor([0.5, 0.5, 0.2, 8.3]), torch.tensor([0, 1, 0, 1]), False, False, "macro", 0.875]
 
 TEST_CASE_5 = [
     torch.tensor([[0.1, 0.9], [0.3, 1.4], [0.2, 0.1], [0.1, 0.5]]),
diff --git a/tests/test_convert_data_type.py b/tests/test_convert_data_type.py
index a7fc64f950..aba10fd717 100644
--- a/tests/test_convert_data_type.py
+++ b/tests/test_convert_data_type.py
@@ -24,6 +24,24 @@
     for out_type in TEST_NDARRAYS:
         TESTS.append((in_type(np.array(1.0)), out_type(np.array(1.0))))  # type: ignore
 
+TESTS_LIST: List[Tuple] = []
+for in_type in TEST_NDARRAYS + (int, float):
+    for out_type in TEST_NDARRAYS:
+        TESTS_LIST.append(
+            ([in_type(np.array(1.0)), in_type(np.array(1.0))], out_type(np.array([1.0, 1.0])), True)  # type: ignore
+        )
+        TESTS_LIST.append(
+            (
+                [in_type(np.array(1.0)), in_type(np.array(1.0))],  # type: ignore
+                [out_type(np.array(1.0)), out_type(np.array(1.0))],
+                False,
+            )
+        )
+
+
+class TestTensor(torch.Tensor):
+    pass
+
 
 class TestConvertDataType(unittest.TestCase):
     @parameterized.expand(TESTS)
@@ -47,9 +65,23 @@ def test_ill_arg(self):
             convert_data_type(None, torch.Tensor)
         convert_data_type(None, np.ndarray)
 
+    @parameterized.expand(TESTS_LIST)
+    def test_convert_list(self, in_image, im_out, wrap):
+        output_type = type(im_out) if wrap else type(im_out[0])
+        converted_im, *_ = convert_data_type(in_image, output_type, wrap_sequence=wrap)
+        # check output is desired type
+        if not wrap:
+            converted_im = converted_im[0]
+            im_out = im_out[0]
+        self.assertEqual(type(converted_im), type(im_out))
+        # check dtype is unchanged
+        if isinstance(in_type, (np.ndarray, torch.Tensor)):
+            self.assertEqual(converted_im.dtype, im_out.dtype)
+
 
 class TestConvertDataSame(unittest.TestCase):
-    @parameterized.expand(TESTS)
+    # add test for subclass of Tensor
+    @parameterized.expand(TESTS + [(np.array(1.0), TestTensor(np.array(1.0)))])
     def test_convert_data_type(self, in_image, im_out):
         converted_im, orig_type, orig_device = convert_to_dst_type(in_image, im_out)
         # check input is unchanged
@@ -57,7 +89,11 @@ def test_convert_data_type(self, in_image, im_out):
         if isinstance(in_image, torch.Tensor):
             self.assertEqual(in_image.device, orig_device)
         # check output is desired type
-        self.assertEqual(type(converted_im), type(im_out))
+        if isinstance(im_out, torch.Tensor):
+            output_type = torch.Tensor
+        else:
+            output_type = np.ndarray
+        self.assertEqual(type(converted_im), output_type)
         # check dtype is unchanged
         if isinstance(in_type, (np.ndarray, torch.Tensor)):
             self.assertEqual(converted_im.dtype, im_out.dtype)
diff --git a/tests/test_copy_model_state.py b/tests/test_copy_model_state.py
index 6330a1918a..438c521479 100644
--- a/tests/test_copy_model_state.py
+++ b/tests/test_copy_model_state.py
@@ -21,7 +21,7 @@
 
 class _TestModelOne(torch.nn.Module):
     def __init__(self, n_n, n_m, n_class):
-        super(_TestModelOne, self).__init__()
+        super().__init__()
         self.layer = torch.nn.Linear(n_n, n_m)
         self.class_layer = torch.nn.Linear(n_m, n_class)
 
@@ -33,7 +33,7 @@ def forward(self, x):
 
 class _TestModelTwo(torch.nn.Module):
     def __init__(self, n_n, n_m, n_d, n_class):
-        super(_TestModelTwo, self).__init__()
+        super().__init__()
         self.layer = torch.nn.Linear(n_n, n_m)
         self.layer_1 = torch.nn.Linear(n_m, n_d)
         self.class_layer = torch.nn.Linear(n_d, n_class)
diff --git a/tests/test_correct_crop_centers.py b/tests/test_correct_crop_centers.py
new file mode 100644
index 0000000000..8804223719
--- /dev/null
+++ b/tests/test_correct_crop_centers.py
@@ -0,0 +1,33 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.transforms.utils import correct_crop_centers
+from tests.utils import assert_allclose
+
+TESTS = [[[1, 5, 0], [2, 2, 2], [10, 10, 10]]]
+
+
+class TestCorrectCropCenters(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_torch(self, spatial_size, centers, label_spatial_shape):
+        result1 = correct_crop_centers(centers, spatial_size, label_spatial_shape)
+        centers = [torch.tensor(i) for i in centers]
+        result2 = correct_crop_centers(centers, spatial_size, label_spatial_shape)
+        assert_allclose(result1, result2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_create_grid_and_affine.py b/tests/test_create_grid_and_affine.py
index 0c0e52e04a..cd8d75f63e 100644
--- a/tests/test_create_grid_and_affine.py
+++ b/tests/test_create_grid_and_affine.py
@@ -12,6 +12,7 @@
 import unittest
 
 import numpy as np
+import torch
 
 from monai.transforms import (
     create_control_grid,
@@ -21,6 +22,7 @@
     create_shear,
     create_translate,
 )
+from tests.utils import assert_allclose, is_tf32_env
 
 
 class TestCreateGrid(unittest.TestCase):
@@ -32,50 +34,47 @@ def test_create_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
             create_grid((1, 1), spacing=2.0)
 
-        g = create_grid((1, 1))
-        expected = np.array([[[0.0]], [[0.0]], [[1.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1),), np.array([[[0.0]], [[0.0]], [[1.0]]]))
 
-        g = create_grid((1, 1), homogeneous=False)
-        expected = np.array([[[0.0]], [[0.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1), None, False), np.array([[[0.0]], [[0.0]]]))
 
-        g = create_grid((1, 1), spacing=(1.2, 1.3))
-        expected = np.array([[[0.0]], [[0.0]], [[1.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1), (1.2, 1.3)), np.array([[[0.0]], [[0.0]], [[1.0]]]))
 
-        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0))
-        expected = np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]], [[[1.0]]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1, 1), (1.2, 1.3, 1.0)), np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]], [[[1.0]]]]))
 
-        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), homogeneous=False)
-        expected = np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1, 1), (1.2, 1.3, 1.0), False), np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]]]))
 
         g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), dtype=np.int32)
         np.testing.assert_equal(g.dtype, np.int32)
 
-        g = create_grid((2, 2, 2))
-        expected = np.array(
-            [
-                [[[-0.5, -0.5], [-0.5, -0.5]], [[0.5, 0.5], [0.5, 0.5]]],
-                [[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, -0.5], [0.5, 0.5]]],
-                [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
-                [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
-            ]
+        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), dtype=torch.float64, backend="torch")
+        np.testing.assert_equal(g.dtype, torch.float64)
+
+        test_assert(
+            create_grid,
+            ((2, 2, 2),),
+            np.array(
+                [
+                    [[[-0.5, -0.5], [-0.5, -0.5]], [[0.5, 0.5], [0.5, 0.5]]],
+                    [[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, -0.5], [0.5, 0.5]]],
+                    [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
+                    [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_grid((2, 2, 2), spacing=(1.2, 1.3, 1.0))
-        expected = np.array(
-            [
-                [[[-0.6, -0.6], [-0.6, -0.6]], [[0.6, 0.6], [0.6, 0.6]]],
-                [[[-0.65, -0.65], [0.65, 0.65]], [[-0.65, -0.65], [0.65, 0.65]]],
-                [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
-                [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
-            ]
+        test_assert(
+            create_grid,
+            ((2, 2, 2), (1.2, 1.3, 1.0)),
+            np.array(
+                [
+                    [[[-0.6, -0.6], [-0.6, -0.6]], [[0.6, 0.6], [0.6, 0.6]]],
+                    [[[-0.65, -0.65], [0.65, 0.65]], [[-0.65, -0.65], [0.65, 0.65]]],
+                    [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
+                    [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
     def test_create_control_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
@@ -83,72 +82,87 @@ def test_create_control_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
             create_control_grid((1, 1), 2.0)
 
-        g = create_control_grid((1.0, 1.0), (1.0, 1.0))
-        expected = np.array(
-            [
-                [[-1.0, -1.0, -1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]],
-                [[-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0), (1.0, 1.0)),
+            np.array(
+                [
+                    [[-1.0, -1.0, -1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]],
+                    [[-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0]],
+                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((1.0, 1.0), (2.0, 2.0))
-        expected = np.array(
-            [
-                [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0), (2.0, 2.0)),
+            np.array(
+                [
+                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((2.0, 2.0), (1.0, 1.0))
-        expected = np.array(
-            [
-                [[-1.5, -1.5, -1.5, -1.5], [-0.5, -0.5, -0.5, -0.5], [0.5, 0.5, 0.5, 0.5], [1.5, 1.5, 1.5, 1.5]],
-                [[-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5]],
-                [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((2.0, 2.0), (1.0, 1.0)),
+            np.array(
+                [
+                    [[-1.5, -1.5, -1.5, -1.5], [-0.5, -0.5, -0.5, -0.5], [0.5, 0.5, 0.5, 0.5], [1.5, 1.5, 1.5, 1.5]],
+                    [[-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5]],
+                    [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((2.0, 2.0), (2.0, 2.0))
-        expected = np.array(
-            [
-                [[-3.0, -3.0, -3.0, -3.0], [-1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0]],
-                [[-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0]],
-                [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((2.0, 2.0), (2.0, 2.0)),
+            np.array(
+                [
+                    [[-3.0, -3.0, -3.0, -3.0], [-1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0]],
+                    [[-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0]],
+                    [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((1.0, 1.0, 1.0), (2.0, 2.0, 2.0), homogeneous=False)
-        expected = np.array(
-            [
-                [
-                    [[-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0]],
-                    [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
-                    [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
-                ],
-                [
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                ],
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0, 1.0), (2.0, 2.0, 2.0), False),
+            np.array(
                 [
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                ],
-            ]
+                    [
+                        [[-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0]],
+                        [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
+                    ],
+                    [
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                    ],
+                    [
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                    ],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
 
 def test_assert(func, params, expected):
-    m = func(*params)
-    np.testing.assert_allclose(m, expected, atol=1e-7)
+    gpu_test = ("torch_gpu",) if torch.cuda.is_available() else ()
+    for b in ("torch", "numpy") + gpu_test:
+        if b == "torch_gpu":
+            m = func(*params, device="cuda:0", backend="torch")
+        else:
+            m = func(*params, backend=b)
+        assert_allclose(m, expected, type_test=False, rtol=1e-2 if is_tf32_env() else 1e-5, atol=1e-5)
 
 
 class TestCreateAffine(unittest.TestCase):
diff --git a/tests/test_crf_cpu.py b/tests/test_crf_cpu.py
index ed1860943f..6f9864e934 100644
--- a/tests/test_crf_cpu.py
+++ b/tests/test_crf_cpu.py
@@ -55,12 +55,12 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 1, 1, 0.5, 0],
+                [1, 1, 1, 0.5, 0]
             ],
             # Batch 1
             [
                 # Channel 0
-                [1, 1, 0.5, 0, 0],
+                [1, 1, 0.5, 0, 0]
             ],
         ],
         # Expected
@@ -117,12 +117,12 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 1, 1, 0.5, 0],
+                [1, 1, 1, 0.5, 0]
             ],
             # Batch 1
             [
                 # Channel 0
-                [1, 1, 0.5, 0, 0],
+                [1, 1, 0.5, 0, 0]
             ],
         ],
         # Expected
@@ -185,7 +185,7 @@
                     [1.0, 1.0, 1.0, 0.0, 0.0],
                     [1.0, 1.0, 0.0, 0.0, 0.0],
                 ],
-            ],
+            ]
         ],
         # Features
         [
@@ -207,7 +207,7 @@
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                 ],
-            ],
+            ]
         ],
         # Expected
         [
@@ -237,7 +237,7 @@
                     [0.688815, 0.687855, 0.687076, 0.228579, 0.227552],
                     [0.687434, 0.686453, 0.445019, 0.229047, 0.227588],
                 ],
-            ],
+            ]
         ],
     ],
     [
@@ -344,7 +344,7 @@
                         [0.0, 0.0, 0.0, 1.0, 1.0],
                     ],
                 ],
-            ],
+            ]
         ],
         # Features
         [
@@ -392,8 +392,8 @@
                         [0.0, 0.0, 1.0, 1.0, 1.0],
                         [0.0, 0.0, 1.0, 1.0, 1.0],
                     ],
-                ],
-            ],
+                ]
+            ]
         ],
         # Expected
         [
@@ -485,7 +485,7 @@
                         [0.500533, 0.500745, 0.553344, 0.771576, 0.772222],
                     ],
                 ],
-            ],
+            ]
         ],
     ],
 ]
diff --git a/tests/test_crf_cuda.py b/tests/test_crf_cuda.py
index adf8c440c0..8881b9aec5 100644
--- a/tests/test_crf_cuda.py
+++ b/tests/test_crf_cuda.py
@@ -55,12 +55,12 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 1, 1, 0.5, 0],
+                [1, 1, 1, 0.5, 0]
             ],
             # Batch 1
             [
                 # Channel 0
-                [1, 1, 0.5, 0, 0],
+                [1, 1, 0.5, 0, 0]
             ],
         ],
         # Expected
@@ -117,12 +117,12 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 1, 1, 0.5, 0],
+                [1, 1, 1, 0.5, 0]
             ],
             # Batch 1
             [
                 # Channel 0
-                [1, 1, 0.5, 0, 0],
+                [1, 1, 0.5, 0, 0]
             ],
         ],
         # Expected
@@ -185,7 +185,7 @@
                     [0.5, 1.0, 0.5, 0.0, 0.0],
                     [1.0, 0.5, 0.0, 0.0, 0.0],
                 ],
-            ],
+            ]
         ],
         # Features
         [
@@ -207,7 +207,7 @@
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                 ],
-            ],
+            ]
         ],
         # Expected
         [
@@ -237,7 +237,7 @@
                     [0.492602, 0.609557, 0.480947, 0.161909, 0.161476],
                     [0.610678, 0.480516, 0.352479, 0.159380, 0.158274],
                 ],
-            ],
+            ]
         ],
     ],
     [
@@ -344,7 +344,7 @@
                         [0.0, 0.0, 0.0, 1.0, 1.0],
                     ],
                 ],
-            ],
+            ]
         ],
         # Features
         [
@@ -392,8 +392,8 @@
                         [0.0, 0.0, 1.0, 1.0, 1.0],
                         [0.0, 0.0, 1.0, 1.0, 1.0],
                     ],
-                ],
-            ],
+                ]
+            ]
         ],
         # Expected
         [
@@ -485,7 +485,7 @@
                         [0.500663, 0.500887, 0.556332, 0.773597, 0.775210],
                     ],
                 ],
-            ],
+            ]
         ],
     ],
 ]
diff --git a/tests/test_crop_foreground.py b/tests/test_crop_foreground.py
index 71e488cac8..0bae1f90f3 100644
--- a/tests/test_crop_foreground.py
+++ b/tests/test_crop_foreground.py
@@ -12,60 +12,79 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import CropForeground
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_1 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_2 = [
-    {"select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[3]]]),
-]
-
-TEST_CASE_3 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_4 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_5 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_6 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 4},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1, 0], [2, 3, 2, 0], [1, 2, 1, 0], [0, 0, 0, 0]]]),
-]
-
-TEST_CASE_7 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 10, "constant_values": 2},
-    np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.zeros((1, 0, 0)),
-]
+TEST_COORDS, TESTS = [], []
+
+for p in TEST_NDARRAYS:
+    TEST_COORDS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[3]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 4},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1, 0], [2, 3, 2, 0], [1, 2, 1, 0], [0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 10},
+            p([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p(np.zeros((1, 0, 0), dtype=np.int64)),
+        ]
+    )
 
 
 class TestCropForeground(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7])
+    @parameterized.expand(TEST_COORDS + TESTS)
     def test_value(self, argments, image, expected_data):
         result = CropForeground(**argments)(image)
-        np.testing.assert_allclose(result, expected_data)
+        torch.testing.assert_allclose(result, expected_data, rtol=1e-7, atol=0)
 
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand(TEST_COORDS)
     def test_return_coords(self, argments, image, _):
         argments["return_coords"] = True
         _, start_coord, end_coord = CropForeground(**argments)(image)
diff --git a/tests/test_crop_foregroundd.py b/tests/test_crop_foregroundd.py
index efe6b65b4b..5fa474d6ac 100644
--- a/tests/test_crop_foregroundd.py
+++ b/tests/test_crop_foregroundd.py
@@ -12,85 +12,128 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import CropForegroundd
-from monai.utils import NumpyPadMode
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {
-        "keys": ["img", "label"],
-        "source_key": "label",
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 0,
-        "mode": "constant",
-        "constant_values": 2,
-    },
-    {
-        "img": np.array([[[1, 0, 2, 0, 1], [0, 1, 2, 1, 0], [2, 2, 3, 2, 2], [0, 1, 2, 1, 0], [1, 0, 2, 0, 1]]]),
-        "label": np.array([[[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 0, 0]]]),
-    },
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
+TEST_POSITION, TESTS = [], []
+for p in TEST_NDARRAYS:
 
-TEST_CASE_2 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[3]]]),
-]
-
-TEST_CASE_3 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_4 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_5 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_6 = [
-    {
-        "keys": ["img", "seg"],
-        "source_key": "img",
-        "select_fn": lambda x: x > 0,
-        "channel_indices": 0,
-        "margin": 0,
-        "k_divisible": [4, 6],
-        "mode": ["edge", NumpyPadMode.CONSTANT],
-    },
-    {
-        "img": np.array([[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]]),
-        "seg": np.array([[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]]),
-    },
-    np.array([[[0, 2, 1, 2, 0, 0], [1, 1, 2, 1, 1, 1], [2, 2, 3, 2, 2, 2], [1, 1, 2, 1, 1, 1]]]),
-]
+    TEST_POSITION.append(
+        [
+            {
+                "keys": ["img", "label"],
+                "source_key": "label",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            {
+                "img": p(
+                    np.array([[[1, 0, 2, 0, 1], [0, 1, 2, 1, 0], [2, 2, 3, 2, 2], [0, 1, 2, 1, 0], [1, 0, 2, 0, 1]]])
+                ),
+                "label": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+            },
+            p(np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[3]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "keys": ["img"],
+                "source_key": "img",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": [2, 1],
+            },
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "keys": ["img"],
+                "source_key": "img",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": 0,
+                "margin": 0,
+                "k_divisible": [4, 6],
+                "mode": "edge",
+            },
+            {
+                "img": p(
+                    np.array(
+                        [[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]],
+                        dtype=np.float32,
+                    )
+                )
+            },
+            p(np.array([[[0, 2, 1, 2, 0, 0], [1, 1, 2, 1, 1, 1], [2, 2, 3, 2, 2, 2], [1, 1, 2, 1, 1, 1]]])),
+        ]
+    )
 
 
 class TestCropForegroundd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
-    def test_value(self, argments, image, expected_data):
-        result = CropForegroundd(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data)
+    @parameterized.expand(TEST_POSITION + TESTS)
+    def test_value(self, argments, input_data, expected_data):
+        result = CropForegroundd(**argments)(input_data)
+        r, i = result["img"], input_data["img"]
+        self.assertEqual(type(r), type(i))
+        if isinstance(r, torch.Tensor):
+            self.assertEqual(r.device, i.device)
+        assert_allclose(r, expected_data)
 
-    @parameterized.expand([TEST_CASE_1])
-    def test_foreground_position(self, argments, image, _):
-        result = CropForegroundd(**argments)(image)
+    @parameterized.expand(TEST_POSITION)
+    def test_foreground_position(self, argments, input_data, _):
+        result = CropForegroundd(**argments)(input_data)
         np.testing.assert_allclose(result["foreground_start_coord"], np.array([1, 1]))
         np.testing.assert_allclose(result["foreground_end_coord"], np.array([4, 4]))
 
         argments["start_coord_key"] = "test_start_coord"
         argments["end_coord_key"] = "test_end_coord"
-        result = CropForegroundd(**argments)(image)
+        result = CropForegroundd(**argments)(input_data)
         np.testing.assert_allclose(result["test_start_coord"], np.array([1, 1]))
         np.testing.assert_allclose(result["test_end_coord"], np.array([4, 4]))
 
diff --git a/tests/test_csv_saver.py b/tests/test_csv_saver.py
index 6dd0159322..a279599463 100644
--- a/tests/test_csv_saver.py
+++ b/tests/test_csv_saver.py
@@ -29,7 +29,7 @@ def test_saved_content(self):
             saver.finalize()
             filepath = os.path.join(tempdir, "predictions.csv")
             self.assertTrue(os.path.exists(filepath))
-            with open(filepath, "r") as f:
+            with open(filepath) as f:
                 reader = csv.reader(f)
                 i = 0
                 for row in reader:
diff --git a/tests/test_cucim_dict_transform.py b/tests/test_cucim_dict_transform.py
new file mode 100644
index 0000000000..4936375142
--- /dev/null
+++ b/tests/test_cucim_dict_transform.py
@@ -0,0 +1,141 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import CuCIMd
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_COLOR_JITTER_2 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_ROTATE_1 = [
+    {"name": "image_rotate_90", "k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestCuCIMDict(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        input = {"image": input}
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = {"image": input[cp.newaxis, ...]}
+        expected = expected[cp.newaxis, ...]
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = {"image": cp.asarray(input)}
+        expected = cp.asarray(expected)
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = {"image": cp.asarray(input)[cp.newaxis, ...]}
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_cucim_transform.py b/tests/test_cucim_transform.py
new file mode 100644
index 0000000000..a6c0084c99
--- /dev/null
+++ b/tests/test_cucim_transform.py
@@ -0,0 +1,140 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import CuCIM
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_COLOR_JITTER_2 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_ROTATE_1 = [
+    {"name": "image_rotate_90", "k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestCuCIM(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = input[cp.newaxis, ...]
+        expected = expected[cp.newaxis, ...]
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = cp.asarray(input)
+        expected = cp.asarray(expected)
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = cp.asarray(input)[cp.newaxis, ...]
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py
index 2cbfaec113..c8391939f1 100644
--- a/tests/test_cuimage_reader.py
+++ b/tests/test_cuimage_reader.py
@@ -21,10 +21,11 @@
 from monai.data.image_reader import WSIReader
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+cucim, has_cucim = optional_import("cucim")
+has_cucim = has_cucim and hasattr(cucim, "CuImage")
 PILImage, has_pil = optional_import("PIL.Image")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
@@ -46,13 +47,7 @@
 
 TEST_CASE_3 = [
     FILE_PATH,
-    {
-        "location": (0, 0),
-        "size": (8, 8),
-        "level": 2,
-        "grid_shape": (2, 1),
-        "patch_size": 2,
-    },
+    {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2},
     np.array(
         [
             [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]],
@@ -63,27 +58,17 @@
 
 TEST_CASE_4 = [
     FILE_PATH,
-    {
-        "location": (0, 0),
-        "size": (8, 8),
-        "level": 2,
-        "grid_shape": (2, 1),
-        "patch_size": 1,
-    },
+    {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1},
     np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]),
 ]
 
-TEST_CASE_RGB_0 = [
-    np.ones((3, 2, 2), dtype=np.uint8),  # CHW
-]
+TEST_CASE_RGB_0 = [np.ones((3, 2, 2), dtype=np.uint8)]  # CHW
 
-TEST_CASE_RGB_1 = [
-    np.ones((3, 100, 100), dtype=np.uint8),  # CHW
-]
+TEST_CASE_RGB_1 = [np.ones((3, 100, 100), dtype=np.uint8)]  # CHW
 
 
 class TestCuCIMReader(unittest.TestCase):
-    @skipUnless(has_cim, "Requires CuCIM")
+    @skipUnless(has_cucim, "Requires CuCIM")
     def setUp(self):
         download_url(FILE_URL, FILE_PATH, "5a3cfd4fd725c50578ddb80b517b759f")
 
@@ -112,6 +97,7 @@ def test_read_patches(self, file_path, patch_info, expected_img):
 
     @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1])
     @skipUnless(has_pil, "Requires PIL")
+    @skipUnless(has_cucim and cucim.__version__ == "0.19.0", "Skipped for cicum>0.19.0")
     def test_read_rgba(self, img_expected):
         image = {}
         reader = WSIReader("cuCIM")
diff --git a/tests/test_data_stats.py b/tests/test_data_stats.py
index 50536f2a5c..535b28bcf1 100644
--- a/tests/test_data_stats.py
+++ b/tests/test_data_stats.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import sys
 import tempfile
 import unittest
 
@@ -126,7 +127,7 @@
 
 TEST_CASE_8 = [
     np.array([[0, 1], [1, 2]]),
-    "test data statistics:\nType: <class 'numpy.ndarray'>\nShape: (2, 2)\nValue range: (0, 2)\n"
+    "test data statistics:\nType: <class 'numpy.ndarray'> int64\nShape: (2, 2)\nValue range: (0, 2)\n"
     "Value: [[0 1]\n [1 2]]\nAdditional info: 1.0\n",
 ]
 
@@ -159,9 +160,10 @@ def test_file(self, input_data, expected_print):
             for h in _logger.handlers[:]:
                 h.close()
                 _logger.removeHandler(h)
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 content = f.read()
-            self.assertEqual(content, expected_print)
+            if sys.platform != "win32":
+                self.assertEqual(content, expected_print)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_data_statsd.py b/tests/test_data_statsd.py
index aea0f1e721..4c33a82b67 100644
--- a/tests/test_data_statsd.py
+++ b/tests/test_data_statsd.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import sys
 import tempfile
 import unittest
 
@@ -147,23 +148,14 @@
 
 TEST_CASE_9 = [
     {"img": np.array([[0, 1], [1, 2]])},
-    "test data statistics:\nType: <class 'numpy.ndarray'>\nShape: (2, 2)\nValue range: (0, 2)\n"
+    "test data statistics:\nType: <class 'numpy.ndarray'> int64\nShape: (2, 2)\nValue range: (0, 2)\n"
     "Value: [[0 1]\n [1 2]]\nAdditional info: 1.0\n",
 ]
 
 
 class TestDataStatsd(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-            TEST_CASE_8,
-        ]
+        [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8]
     )
     def test_value(self, input_param, input_data, expected_print):
         transform = DataStatsd(**input_param)
@@ -192,9 +184,10 @@ def test_file(self, input_data, expected_print):
                 h.close()
                 _logger.removeHandler(h)
             del handler
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 content = f.read()
-            self.assertEqual(content, expected_print)
+            if sys.platform != "win32":
+                self.assertEqual(content, expected_print)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_dataloader.py b/tests/test_dataloader.py
index 3b159fb5b8..035f145c9a 100644
--- a/tests/test_dataloader.py
+++ b/tests/test_dataloader.py
@@ -20,19 +20,9 @@
 from monai.transforms import Compose, DataStatsd, Randomizable, SimulateDelayd
 from monai.utils import set_determinism
 
-TEST_CASE_1 = [
-    [
-        {"image": np.asarray([1, 2, 3])},
-        {"image": np.asarray([4, 5])},
-    ]
-]
-
-TEST_CASE_2 = [
-    [
-        {"label": torch.as_tensor([[3], [2]])},
-        {"label": np.asarray([[1], [2]])},
-    ]
-]
+TEST_CASE_1 = [[{"image": np.asarray([1, 2, 3])}, {"image": np.asarray([4, 5])}]]
+
+TEST_CASE_2 = [[{"label": torch.as_tensor([[3], [2]])}, {"label": np.asarray([[1], [2]])}]]
 
 
 class TestDataLoader(unittest.TestCase):
diff --git a/tests/test_dataset_summary.py b/tests/test_dataset_summary.py
index 5307bc7e66..172d4980dd 100644
--- a/tests/test_dataset_summary.py
+++ b/tests/test_dataset_summary.py
@@ -56,13 +56,7 @@ def test_spacing_intensity(self):
     def test_anisotropic_spacing(self):
         with tempfile.TemporaryDirectory() as tempdir:
 
-            pixdims = [
-                [1.0, 1.0, 5.0],
-                [1.0, 1.0, 4.0],
-                [1.0, 1.0, 4.5],
-                [1.0, 1.0, 2.0],
-                [1.0, 1.0, 1.0],
-            ]
+            pixdims = [[1.0, 1.0, 5.0], [1.0, 1.0, 4.0], [1.0, 1.0, 4.5], [1.0, 1.0, 2.0], [1.0, 1.0, 1.0]]
             for i in range(5):
                 im, seg = create_test_image_3d(32, 32, 32, num_seg_classes=1, num_objs=3, rad_max=6, channel_dim=0)
                 n = nib.Nifti1Image(im, np.eye(4))
diff --git a/tests/test_decathlondataset.py b/tests/test_decathlondataset.py
index 15dbceb8ad..db07d361db 100644
--- a/tests/test_decathlondataset.py
+++ b/tests/test_decathlondataset.py
@@ -68,12 +68,7 @@ def _test_dataset(dataset):
         self.assertEqual(len(data), 208)
 
         # test dataset properties
-        data = DecathlonDataset(
-            root_dir=testing_dir,
-            task="Task04_Hippocampus",
-            section="validation",
-            download=False,
-        )
+        data = DecathlonDataset(root_dir=testing_dir, task="Task04_Hippocampus", section="validation", download=False)
         properties = data.get_properties(keys="labels")
         self.assertDictEqual(properties["labels"], {"0": "background", "1": "Anterior", "2": "Posterior"})
 
diff --git a/tests/test_decollate.py b/tests/test_decollate.py
index 521d263663..f35988e215 100644
--- a/tests/test_decollate.py
+++ b/tests/test_decollate.py
@@ -120,7 +120,7 @@ def check_match(self, in1, in2):
 
     def check_decollate(self, dataset):
         batch_size = 2
-        num_workers = 2
+        num_workers = 2 if sys.platform == "linux" else 0
 
         loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
 
@@ -170,10 +170,7 @@ def test_decollation_examples(self, input_val, expected_out):
         self.assertListEqual(expected_out, out)
 
     def test_dict_examples(self):
-        test_case = {
-            "meta": {"out": ["test", "test"]},
-            "image_meta_dict": {"scl_slope": torch.Tensor((0.0, 0.0))},
-        }
+        test_case = {"meta": {"out": ["test", "test"]}, "image_meta_dict": {"scl_slope": torch.Tensor((0.0, 0.0))}}
         out = decollate_batch(test_case)
         self.assertEqual(out[0]["meta"]["out"], "test")
         self.assertEqual(out[0]["image_meta_dict"]["scl_slope"], 0.0)
diff --git a/tests/test_deepedit_transforms.py b/tests/test_deepedit_transforms.py
index c2b11e8ee7..391b724da9 100644
--- a/tests/test_deepedit_transforms.py
+++ b/tests/test_deepedit_transforms.py
@@ -28,11 +28,7 @@
     "background": [0, 0, 0],
 }
 
-DISCARD_ADD_GUIDANCE_TEST_CASE = [
-    {"image": IMAGE, "label": LABEL},
-    DATA_1,
-    (3, 1, 5, 5),
-]
+DISCARD_ADD_GUIDANCE_TEST_CASE = [{"image": IMAGE, "label": LABEL}, DATA_1, (3, 1, 5, 5)]
 
 DATA_2 = {
     "image": IMAGE,
diff --git a/tests/test_deepgrow_transforms.py b/tests/test_deepgrow_transforms.py
index f50e92d146..3085309bdc 100644
--- a/tests/test_deepgrow_transforms.py
+++ b/tests/test_deepgrow_transforms.py
@@ -31,12 +31,7 @@
 IMAGE = np.array([[[[1, 0, 2, 0, 1], [0, 1, 2, 1, 0], [2, 2, 3, 2, 2], [0, 1, 2, 1, 0], [1, 0, 2, 0, 1]]]])
 LABEL = np.array([[[[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 0, 0]]]])
 
-DATA_1 = {
-    "image": IMAGE,
-    "label": LABEL,
-    "image_meta_dict": {},
-    "label_meta_dict": {},
-}
+DATA_1 = {"image": IMAGE, "label": LABEL, "image_meta_dict": {}, "label_meta_dict": {}}
 
 DATA_2 = {
     "image": np.array(
@@ -141,23 +136,11 @@
     "pred": np.array([[[[1, 2], [3, 4]], [[5, 6], [7, 8]]]]),
 }
 
-DATA_12 = {
-    "image": np.arange(27).reshape(3, 3, 3),
-    "image_meta_dict": {},
-    "guidance": [[0, 0, 0], [0, 1, 1], 1],
-}
+DATA_12 = {"image": np.arange(27).reshape(3, 3, 3), "image_meta_dict": {}, "guidance": [[0, 0, 0], [0, 1, 1], 1]}
 
-FIND_SLICE_TEST_CASE_1 = [
-    {"label": "label", "sids": "sids"},
-    DATA_1,
-    [0],
-]
+FIND_SLICE_TEST_CASE_1 = [{"label": "label", "sids": "sids"}, DATA_1, [0]]
 
-FIND_SLICE_TEST_CASE_2 = [
-    {"label": "label", "sids": "sids"},
-    DATA_2,
-    [0, 1],
-]
+FIND_SLICE_TEST_CASE_2 = [{"label": "label", "sids": "sids"}, DATA_2, [0, 1]]
 
 CROP_TEST_CASE_1 = [
     {
@@ -338,14 +321,10 @@
         [[1.0, 1.0, 2.0, 2.0], [1.0, 1.0, 2.0, 2.0], [3.0, 3.0, 4.0, 4.0], [3.0, 3.0, 4.0, 4.0]],
         [[5.0, 5.0, 6.0, 6.0], [5.0, 5.0, 6.0, 6.0], [7.0, 7.0, 8.0, 8.0], [7.0, 7.0, 8.0, 8.0]],
         [[5.0, 5.0, 6.0, 6.0], [5.0, 5.0, 6.0, 6.0], [7.0, 7.0, 8.0, 8.0], [7.0, 7.0, 8.0, 8.0]],
-    ],
+    ]
 )
 
-RESTORE_LABEL_TEST_CASE_2 = [
-    {"keys": ["pred"], "ref_image": "image", "mode": "nearest"},
-    DATA_11,
-    RESULT,
-]
+RESTORE_LABEL_TEST_CASE_2 = [{"keys": ["pred"], "ref_image": "image", "mode": "nearest"}, DATA_11, RESULT]
 
 FETCH_2D_SLICE_TEST_CASE_1 = [
     {"keys": ["image"], "guidance": "guidance"},
diff --git a/tests/test_delete_itemsd.py b/tests/test_delete_itemsd.py
index 7426e39ff0..b7cd104c46 100644
--- a/tests/test_delete_itemsd.py
+++ b/tests/test_delete_itemsd.py
@@ -19,19 +19,36 @@
 
 TEST_CASE_1 = [{"keys": [str(i) for i in range(30)]}, 20]
 
+TEST_CASE_2 = [{"keys": ["image/" + str(i) for i in range(30)], "sep": "/"}, 20]
+
+TEST_CASE_3 = [{"keys": "meta_dict%0008\\|[0-9]", "sep": "%", "use_re": True}]
+
 
 class TestDeleteItemsd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_memory(self, input_param, expected_key_size):
-        input_data = {}
+        input_data = {"image": {}} if "sep" in input_param else {}
         for i in range(50):
-            input_data[str(i)] = [time.time()] * 100000
+            if "sep" in input_param:
+                input_data["image"][str(i)] = [time.time()] * 100000
+            else:
+                input_data[str(i)] = [time.time()] * 100000
         result = DeleteItemsd(**input_param)(input_data)
-        self.assertEqual(len(result.keys()), expected_key_size)
+        if "sep" in input_param:
+            self.assertEqual(len(result["image"].keys()), expected_key_size)
+        else:
+            self.assertEqual(len(result.keys()), expected_key_size)
         self.assertGreaterEqual(
             sys.getsizeof(input_data) * float(expected_key_size) / len(input_data), sys.getsizeof(result)
         )
 
+    @parameterized.expand([TEST_CASE_3])
+    def test_re(self, input_param):
+        input_data = {"image": [1, 2, 3], "meta_dict": {"0008|0005": 1, "0008|1050": 2, "0008test": 3}}
+        result = DeleteItemsd(**input_param)(input_data)
+        self.assertEqual(result["meta_dict"]["0008test"], 3)
+        self.assertTrue(len(result["meta_dict"]), 1)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py
index 429d5ee767..9c7fe4f632 100644
--- a/tests/test_deprecated.py
+++ b/tests/test_deprecated.py
@@ -222,3 +222,33 @@ def future1():
             warnings.warn("fake warning", DeprecationWarning)
 
         self.assertEqual(aw.warning.args[0], "fake warning")
+
+    def test_arg_except2_unknown(self):
+        """
+        Test deprecated_arg decorator raises exception with `removed` set in the past.
+        with unknown version
+        """
+
+        @deprecated_arg("b", removed=self.prev_version, version_val="0+untagged.1.g3131155")
+        def afoo4(a, b=None):
+            pass
+
+        self.assertRaises(DeprecatedError, lambda: afoo4(1, b=2))
+
+    def test_replacement_arg(self):
+        """
+        Test deprecated arg being replaced.
+        """
+
+        @deprecated_arg("b", new_name="a", since=self.prev_version, version_val=self.test_version)
+        def afoo4(a, b=None):
+            return a
+
+        self.assertEqual(afoo4(b=2), 2)
+        # self.assertRaises(DeprecatedError, lambda: afoo4(1, b=2))
+        self.assertEqual(afoo4(1, b=2), 1)  # new name is in use
+        self.assertEqual(afoo4(a=1, b=2), 1)  # prefers the new arg
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_detect_envelope.py b/tests/test_detect_envelope.py
index ded0290de2..30d6d889eb 100644
--- a/tests/test_detect_envelope.py
+++ b/tests/test_detect_envelope.py
@@ -12,6 +12,7 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import DetectEnvelope
@@ -70,9 +71,9 @@
 TEST_CASE_2_CHAN_3D_SINE = [
     {},  # args (empty, so use default (i.e. process along first spatial dimension, axis=1)
     # Create 100 identical windowed sine waves as a (n_samples x 10 x 10) 3D numpy array, twice (2 channels)
-    np.stack([np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2)] * 2, axis=0),
+    torch.as_tensor(np.stack([np.stack([np.stack([hann_windowed_sine] * 10, axis=1)] * 10, axis=2)] * 2, axis=0)),
     # Expected output: Set of 100 identical Hann windows in (n_samples x 10 x 10) 3D numpy array, twice (2 channels)
-    np.stack([np.stack([np.stack([np.hanning(n_samples)] * 10, axis=1)] * 10, axis=2)] * 2, axis=0),
+    torch.as_tensor(np.stack([np.stack([np.stack([np.hanning(n_samples)] * 10, axis=1)] * 10, axis=2)] * 2, axis=0)),
     1e-4,  # absolute tolerance
 ]
 
diff --git a/tests/test_dice_focal_loss.py b/tests/test_dice_focal_loss.py
index 920994f8de..a69df21693 100644
--- a/tests/test_dice_focal_loss.py
+++ b/tests/test_dice_focal_loss.py
@@ -24,11 +24,7 @@ def test_result_onehot_target_include_bg(self):
         label = torch.randint(low=0, high=2, size=size)
         pred = torch.randn(size)
         for reduction in ["sum", "mean", "none"]:
-            common_params = {
-                "include_background": True,
-                "to_onehot_y": False,
-                "reduction": reduction,
-            }
+            common_params = {"include_background": True, "to_onehot_y": False, "reduction": reduction}
             for focal_weight in [None, torch.tensor([1.0, 1.0, 2.0]), (3, 2.0, 1)]:
                 for lambda_focal in [0.5, 1.0, 1.5]:
                     dice_focal = DiceFocalLoss(
@@ -46,11 +42,7 @@ def test_result_no_onehot_no_bg(self):
         label = torch.argmax(label, dim=1, keepdim=True)
         pred = torch.randn(size)
         for reduction in ["sum", "mean", "none"]:
-            common_params = {
-                "include_background": False,
-                "to_onehot_y": True,
-                "reduction": reduction,
-            }
+            common_params = {"include_background": False, "to_onehot_y": True, "reduction": reduction}
             for focal_weight in [2.0, torch.tensor([1.0, 2.0]), (2.0, 1)]:
                 for lambda_focal in [0.5, 1.0, 1.5]:
                     dice_focal = DiceFocalLoss(focal_weight=focal_weight, lambda_focal=lambda_focal, **common_params)
diff --git a/tests/test_dice_loss.py b/tests/test_dice_loss.py
index ef0a51eb15..32d8ae694d 100644
--- a/tests/test_dice_loss.py
+++ b/tests/test_dice_loss.py
@@ -21,10 +21,7 @@
 TEST_CASES = [
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (2, 1, 2, 2), (2, 1, 2, 2)
@@ -91,26 +88,17 @@
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "squared_pred": True},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.178337,
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "jaccard": True},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.470451,
     ],
     [  # shape: (2, 1, 2, 2), (2, 1, 2, 2)
diff --git a/tests/test_distributed_weighted_random_sampler.py b/tests/test_distributed_weighted_random_sampler.py
index b8e088fdcf..23574e5121 100644
--- a/tests/test_distributed_weighted_random_sampler.py
+++ b/tests/test_distributed_weighted_random_sampler.py
@@ -25,10 +25,7 @@ def test_sampling(self):
         data = [1, 2, 3, 4, 5]
         weights = [1, 2, 3, 4, 5]
         sampler = DistributedWeightedRandomSampler(
-            weights=weights,
-            dataset=data,
-            shuffle=False,
-            generator=torch.Generator().manual_seed(0),
+            weights=weights, dataset=data, shuffle=False, generator=torch.Generator().manual_seed(0)
         )
         samples = np.array([data[i] for i in list(sampler)])
 
diff --git a/tests/test_divisible_pad.py b/tests/test_divisible_pad.py
index ca15b4b347..bb58668908 100644
--- a/tests/test_divisible_pad.py
+++ b/tests/test_divisible_pad.py
@@ -22,21 +22,11 @@
 
 for p in TEST_NDARRAYS:
     # pad first dim to be divisible by 7, the second unchanged.
-    TESTS.append(
-        [
-            {"k": (7, -1), "mode": "constant"},
-            p(np.zeros((3, 8, 7))),
-            p(np.zeros((3, 14, 7))),
-        ]
-    )
+    TESTS.append([{"k": (7, -1), "mode": "constant"}, p(np.zeros((3, 8, 7))), p(np.zeros((3, 14, 7)))])
 
     # pad all dimensions to be divisible by 5
     TESTS.append(
-        [
-            {"k": 5, "mode": "constant", "method": "end"},
-            p(np.zeros((3, 10, 5, 17))),
-            p(np.zeros((3, 10, 5, 20))),
-        ]
+        [{"k": 5, "mode": "constant", "method": "end"}, p(np.zeros((3, 10, 5, 17))), p(np.zeros((3, 10, 5, 20)))]
     )
 
 
@@ -50,11 +40,13 @@ def test_pad_shape(self, input_param, input_data, expected_val):
         self.assertAlmostEqual(result.shape, expected_val.shape)
 
     def test_pad_kwargs(self):
-        padder = DivisiblePad(k=5, mode="constant", constant_values=((0, 0), (1, 1), (2, 2)))
         for p in TEST_NDARRAYS:
-            result = padder(p(np.zeros((3, 8, 4))))
-            result = result.cpu() if isinstance(result, torch.Tensor) else result
-            torch.testing.assert_allclose(result[:, :1, :4], np.ones((3, 1, 4)), rtol=1e-7, atol=0)
+            input_data = p(np.zeros((3, 8, 4)))
+            if isinstance(input_data, np.ndarray):
+                result = DivisiblePad(k=5, mode="constant", constant_values=((0, 0), (1, 1), (2, 2)))(input_data)
+                np.testing.assert_allclose(result[:, :1, :4], np.ones((3, 1, 4)), rtol=1e-7, atol=0)
+            else:
+                result = DivisiblePad(k=5, mode="constant", value=2)(input_data).cpu()
             torch.testing.assert_allclose(result[:, :, 4:5], np.ones((3, 10, 1)) + 1, rtol=1e-7, atol=0)
 
 
diff --git a/tests/test_divisible_padd.py b/tests/test_divisible_padd.py
index c834adac6d..44faeced7b 100644
--- a/tests/test_divisible_padd.py
+++ b/tests/test_divisible_padd.py
@@ -28,11 +28,7 @@
     np.zeros((3, 14, 7)),
 ]
 
-TEST_CASE_3 = [
-    {"keys": ["img"], "k": 0, "mode": {"constant"}},
-    {"img": np.zeros((3, 8))},
-    np.zeros((3, 8)),
-]
+TEST_CASE_3 = [{"keys": ["img"], "k": 0, "mode": {"constant"}}, {"img": np.zeros((3, 8))}, np.zeros((3, 8))]
 
 
 class TestDivisiblePadd(unittest.TestCase):
diff --git a/tests/test_downsample_block.py b/tests/test_downsample_block.py
index f4ae30198f..d35218c6d7 100644
--- a/tests/test_downsample_block.py
+++ b/tests/test_downsample_block.py
@@ -20,11 +20,7 @@
 TEST_CASES = [
     [{"spatial_dims": 2, "kernel_size": 2}, (7, 4, 64, 48), (7, 8, 32, 24)],  # 4-channel 2D, batch 7
     [{"spatial_dims": 1, "kernel_size": 4}, (16, 4, 63), (16, 8, 15)],  # 4-channel 1D, batch 16
-    [  # 4-channel 1D, batch 16
-        {"spatial_dims": 1, "kernel_size": 4, "padding": 1},
-        (16, 4, 63),
-        (16, 8, 16),
-    ],
+    [{"spatial_dims": 1, "kernel_size": 4, "padding": 1}, (16, 4, 63), (16, 8, 16)],  # 4-channel 1D, batch 16
     [  # 4-channel 3D, batch 16
         {"spatial_dims": 3, "kernel_size": 3, "ceil_mode": True},
         (16, 4, 32, 24, 48),
diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index 81ed239461..18fe146a40 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -26,14 +26,14 @@
 expected_shape: Sequence[Any]
 
 TEST_CASE_DYNUNET_2D = []
+out_channels = 2
+in_size = 64
+spatial_dims = 2
 for kernel_size in [(3, 3, 3, 1), ((3, 1), 1, (3, 3), (1, 1))]:
     for strides in [(1, 1, 1, 1), (2, 2, 2, 1)]:
+        expected_shape = (1, out_channels, *[in_size // strides[0]] * spatial_dims)
         for in_channels in [2, 3]:
             for res_block in [True, False]:
-                out_channels = 2
-                in_size = 64
-                spatial_dims = 2
-                expected_shape = (1, out_channels, *[in_size // strides[0]] * spatial_dims)
                 test_case = [
                     {
                         "spatial_dims": spatial_dims,
@@ -45,6 +45,7 @@
                         "norm_name": "batch",
                         "deep_supervision": False,
                         "res_block": res_block,
+                        "dropout": None,
                     },
                     (1, in_channels, in_size, in_size),
                     expected_shape,
@@ -52,11 +53,11 @@
                 TEST_CASE_DYNUNET_2D.append(test_case)
 
 TEST_CASE_DYNUNET_3D = []  # in 3d cases, also test anisotropic kernel/strides
+in_channels = 1
+in_size = 64
 for out_channels in [2, 3]:
+    expected_shape = (1, out_channels, 64, 32, 64)
     for res_block in [True, False]:
-        in_channels = 1
-        in_size = 64
-        expected_shape = (1, out_channels, 64, 32, 64)
         test_case = [
             {
                 "spatial_dims": 3,
@@ -68,6 +69,7 @@
                 "norm_name": ("INSTANCE", {"affine": True}),
                 "deep_supervision": False,
                 "res_block": res_block,
+                "dropout": ("alphadropout", {"p": 0.25}),
             },
             (1, in_channels, in_size, in_size, in_size),
             expected_shape,
diff --git a/tests/test_efficientnet.py b/tests/test_efficientnet.py
index 6befba108a..d36157e6fa 100644
--- a/tests/test_efficientnet.py
+++ b/tests/test_efficientnet.py
@@ -44,7 +44,7 @@
 
 
 def get_model_names():
-    return ["efficientnet-b{}".format(d) for d in range(8)]
+    return [f"efficientnet-b{d}" for d in range(8)]
 
 
 def get_expected_model_shape(model_name):
@@ -107,11 +107,7 @@ def make_shape_cases(
                     ret_tests.append(
                         [
                             kwargs,
-                            (
-                                batch,
-                                in_channels,
-                            )
-                            + (get_expected_model_shape(model),) * spatial_dim,
+                            (batch, in_channels) + (get_expected_model_shape(model),) * spatial_dim,
                             (batch, num_classes),
                         ]
                     )
@@ -245,7 +241,7 @@ def make_shape_cases(
         },
         [1, 2, 224, 224],
         ([1, 32, 112, 112], [1, 56, 56, 56], [1, 88, 28, 28], [1, 248, 14, 14], [1, 704, 7, 7]),
-    ),
+    )
 ]
 
 
@@ -254,8 +250,12 @@ class TestEFFICIENTNET(unittest.TestCase):
     def test_shape(self, input_param, input_shape, expected_shape):
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
-        # initialize model
-        net = EfficientNetBN(**input_param).to(device)
+        try:
+            # initialize model
+            net = EfficientNetBN(**input_param).to(device)
+        except (ContentTooShortError, HTTPError, RuntimeError) as e:
+            print(str(e))
+            return  # skipping the tests because of http errors
 
         # run inference with random tensor
         with eval_mode(net):
@@ -268,8 +268,12 @@ def test_shape(self, input_param, input_shape, expected_shape):
     def test_non_default_shapes(self, input_param, input_shape, expected_shape):
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
-        # initialize model
-        net = EfficientNetBN(**input_param).to(device)
+        try:
+            # initialize model
+            net = EfficientNetBN(**input_param).to(device)
+        except (ContentTooShortError, HTTPError, RuntimeError) as e:
+            print(str(e))
+            return  # skipping the tests because of http errors
 
         # override input shape with different variations
         num_dims = len(input_shape) - 2
@@ -382,8 +386,12 @@ class TestExtractFeatures(unittest.TestCase):
     def test_shape(self, input_param, input_shape, expected_shapes):
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
-        # initialize model
-        net = EfficientNetBNFeatures(**input_param).to(device)
+        try:
+            # initialize model
+            net = EfficientNetBNFeatures(**input_param).to(device)
+        except (ContentTooShortError, HTTPError, RuntimeError) as e:
+            print(str(e))
+            return  # skipping the tests because of http errors
 
         # run inference with random tensor
         with eval_mode(net):
diff --git a/tests/test_ensure_channel_first.py b/tests/test_ensure_channel_first.py
index 23126d326f..0fb7759219 100644
--- a/tests/test_ensure_channel_first.py
+++ b/tests/test_ensure_channel_first.py
@@ -27,11 +27,7 @@
 
 TEST_CASE_2 = [{"image_only": False}, ["test_image.nii.gz"], -1]
 
-TEST_CASE_3 = [
-    {"image_only": False},
-    ["test_image.nii.gz", "test_image2.nii.gz", "test_image3.nii.gz"],
-    None,
-]
+TEST_CASE_3 = [{"image_only": False}, ["test_image.nii.gz", "test_image2.nii.gz", "test_image3.nii.gz"], None]
 
 TEST_CASE_4 = [{"reader": ITKReader(), "image_only": False}, ["test_image.nii.gz"], None]
 
@@ -43,11 +39,7 @@
     None,
 ]
 
-TEST_CASE_7 = [
-    {"image_only": False, "reader": ITKReader(pixel_type=itk.UC)},
-    "tests/testing_data/CT_DICOM",
-    None,
-]
+TEST_CASE_7 = [{"image_only": False, "reader": ITKReader(pixel_type=itk.UC)}, "tests/testing_data/CT_DICOM", None]
 
 
 class TestEnsureChannelFirst(unittest.TestCase):
diff --git a/tests/test_ensure_channel_firstd.py b/tests/test_ensure_channel_firstd.py
index b4cde02a8f..b5e1abe4ca 100644
--- a/tests/test_ensure_channel_firstd.py
+++ b/tests/test_ensure_channel_firstd.py
@@ -25,11 +25,7 @@
 
 TEST_CASE_2 = [{"keys": "img"}, ["test_image.nii.gz"], -1]
 
-TEST_CASE_3 = [
-    {"keys": "img"},
-    ["test_image.nii.gz", "test_image2.nii.gz", "test_image3.nii.gz"],
-    None,
-]
+TEST_CASE_3 = [{"keys": "img"}, ["test_image.nii.gz", "test_image2.nii.gz", "test_image3.nii.gz"], None]
 
 
 class TestEnsureChannelFirstd(unittest.TestCase):
diff --git a/tests/test_ensure_type.py b/tests/test_ensure_type.py
index 8feb96ed37..64094b2360 100644
--- a/tests/test_ensure_type.py
+++ b/tests/test_ensure_type.py
@@ -25,9 +25,11 @@ def test_array_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "NUMPY"):
-                result = EnsureType(data_type=dtype)(test_data)
+                result = EnsureType(dtype, dtype=np.float32 if dtype == "NUMPY" else None, device="cpu")(test_data)
+                if dtype == "NUMPY":
+                    self.assertTrue(result.dtype == np.float32)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
-                assert_allclose(result, test_data)
+                assert_allclose(result, test_data, type_test=False)
                 self.assertTupleEqual(result.shape, (2, 2))
 
     def test_single_input(self):
@@ -36,12 +38,12 @@ def test_single_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "numpy"):
-                result = EnsureType(data_type=dtype)(test_data)
+                result = EnsureType(data_type=dtype, device="cpu")(test_data)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
                 if isinstance(test_data, bool):
                     self.assertFalse(result)
                 else:
-                    assert_allclose(result, test_data)
+                    assert_allclose(result, test_data, type_test=False)
                 self.assertEqual(result.ndim, 0)
 
     def test_string(self):
diff --git a/tests/test_ensure_typed.py b/tests/test_ensure_typed.py
index 96f482afc2..283f3fa730 100644
--- a/tests/test_ensure_typed.py
+++ b/tests/test_ensure_typed.py
@@ -25,9 +25,13 @@ def test_array_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "NUMPY"):
-                result = EnsureTyped(keys="data", data_type=dtype)({"data": test_data})["data"]
+                result = EnsureTyped(
+                    keys="data", data_type=dtype, dtype=np.float32 if dtype == "NUMPY" else None, device="cpu"
+                )({"data": test_data})["data"]
+                if dtype == "NUMPY":
+                    self.assertTrue(result.dtype == np.float32)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
-                assert_allclose(result, test_data)
+                assert_allclose(result, test_data, type_test=False)
                 self.assertTupleEqual(result.shape, (2, 2))
 
     def test_single_input(self):
@@ -41,7 +45,7 @@ def test_single_input(self):
                 if isinstance(test_data, bool):
                     self.assertFalse(result)
                 else:
-                    assert_allclose(result, test_data)
+                    assert_allclose(result, test_data, type_test=False)
                 self.assertEqual(result.ndim, 0)
 
     def test_string(self):
@@ -75,7 +79,7 @@ def test_dict(self):
             "extra": None,
         }
         for dtype in ("tensor", "numpy"):
-            result = EnsureTyped(keys="data", data_type=dtype)({"data": test_data})["data"]
+            result = EnsureTyped(keys="data", data_type=dtype, device="cpu")({"data": test_data})["data"]
             self.assertTrue(isinstance(result, dict))
             self.assertTrue(isinstance(result["img"], torch.Tensor if dtype == "tensor" else np.ndarray))
             torch.testing.assert_allclose(result["img"], torch.as_tensor([1.0, 2.0]))
diff --git a/tests/test_fg_bg_to_indices.py b/tests/test_fg_bg_to_indices.py
index 98626c7028..0d35dd23f8 100644
--- a/tests/test_fg_bg_to_indices.py
+++ b/tests/test_fg_bg_to_indices.py
@@ -11,58 +11,70 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import FgBgToIndices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"image_threshold": 0.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    None,
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 0.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            None,
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 4, 8]),
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"image_threshold": 0.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 0.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            p([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_3 = [
-    {"image_threshold": 1.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"image_threshold": 1.0, "output_shape": None},
-    np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": None},
+            p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_5 = [
-    {"image_threshold": 1.0, "output_shape": [3, 3]},
-    np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
-    np.array([[0, 0], [2, 2]]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": [3, 3]},
+            p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
+            p([[0, 0], [2, 2]]),
+        ]
+    )
 
 
 class TestFgBgToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_type_shape(self, input_data, label, image, expected_fg, expected_bg):
         fg_indices, bg_indices = FgBgToIndices(**input_data)(label, image)
-        np.testing.assert_allclose(fg_indices, expected_fg)
-        np.testing.assert_allclose(bg_indices, expected_bg)
+        assert_allclose(fg_indices, expected_fg)
+        assert_allclose(bg_indices, expected_bg)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_fg_bg_to_indicesd.py b/tests/test_fg_bg_to_indicesd.py
index ce6ca30f1b..4691526d94 100644
--- a/tests/test_fg_bg_to_indicesd.py
+++ b/tests/test_fg_bg_to_indicesd.py
@@ -11,53 +11,66 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import FgBgToIndicesd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "label", "image_key": None, "image_threshold": 0.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
+TEST_CASES = []
+for p in TEST_NDARRAYS:
 
-TEST_CASE_2 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 0.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": None, "image_threshold": 0.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 4, 8]),
+        ]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 0.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": p([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_5 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": [3, 3]},
-    {"label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
-    np.array([[0, 0], [2, 2]]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
+            {"label": p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
+
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": [3, 3]},
+            {"label": p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
+            p([[0, 0], [2, 2]]),
+        ]
+    )
 
 
 class TestFgBgToIndicesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TEST_CASES)
     def test_type_shape(self, input_data, data, expected_fg, expected_bg):
         result = FgBgToIndicesd(**input_data)(data)
-        np.testing.assert_allclose(result["label_fg_indices"], expected_fg)
-        np.testing.assert_allclose(result["label_bg_indices"], expected_bg)
+        assert_allclose(result["label_fg_indices"], expected_fg)
+        assert_allclose(result["label_bg_indices"], expected_bg)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_fill_holes.py b/tests/test_fill_holes.py
index 6ea83c239b..c6ad202587 100644
--- a/tests/test_fill_holes.py
+++ b/tests/test_fill_holes.py
@@ -18,29 +18,13 @@
 from monai.transforms import FillHoles
 from tests.utils import assert_allclose, clone
 
-grid_1_raw = [
-    [1, 1, 1],
-    [1, 0, 1],
-    [1, 1, 1],
-]
+grid_1_raw = [[1, 1, 1], [1, 0, 1], [1, 1, 1]]
 
-grid_2_raw = [
-    [0, 1, 0],
-    [1, 0, 1],
-    [0, 1, 0],
-]
+grid_2_raw = [[0, 1, 0], [1, 0, 1], [0, 1, 0]]
 
-grid_3_raw = [
-    [1, 1, 1],
-    [1, 1, 1],
-    [1, 1, 1],
-]
+grid_3_raw = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
 
-grid_4_raw = [
-    [0, 1, 0],
-    [1, 1, 1],
-    [0, 1, 0],
-]
+grid_4_raw = [[0, 1, 0], [1, 1, 1], [0, 1, 0]]
 
 grid_1 = torch.tensor([grid_1_raw])
 
@@ -50,49 +34,15 @@
 
 grid_4 = torch.tensor([grid_4_raw])
 
-grid_5 = torch.tensor(
-    [
-        [
-            [1, 1, 1],
-            [1, 0, 0],
-            [1, 1, 1],
-        ]
-    ]
-)
-
-grid_6 = torch.tensor(
-    [
-        [
-            [1, 1, 2, 2, 2],
-            [1, 0, 2, 0, 2],
-            [1, 1, 2, 2, 2],
-        ]
-    ]
-)
-
-grid_7 = torch.tensor(
-    [
-        [
-            [1, 1, 2, 2, 2],
-            [1, 0, 2, 2, 2],
-            [1, 1, 2, 2, 2],
-        ]
-    ]
-)
-
-TEST_CASE_0 = [
-    "enclosed_default_full_connectivity_default_applied_labels",
-    {},
-    grid_1,
-    grid_3,
-]
+grid_5 = torch.tensor([[[1, 1, 1], [1, 0, 0], [1, 1, 1]]])
 
-TEST_CASE_1 = [
-    "enclosed_full_connectivity_default_applied_labels",
-    {"connectivity": 2},
-    grid_1,
-    grid_3,
-]
+grid_6 = torch.tensor([[[1, 1, 2, 2, 2], [1, 0, 2, 0, 2], [1, 1, 2, 2, 2]]])
+
+grid_7 = torch.tensor([[[1, 1, 2, 2, 2], [1, 0, 2, 2, 2], [1, 1, 2, 2, 2]]])
+
+TEST_CASE_0 = ["enclosed_default_full_connectivity_default_applied_labels", {}, grid_1, grid_3]
+
+TEST_CASE_1 = ["enclosed_full_connectivity_default_applied_labels", {"connectivity": 2}, grid_1, grid_3]
 
 TEST_CASE_2 = [
     "enclosed_full_connectivity_applied_labels_same_single",
@@ -129,40 +79,15 @@
     grid_3,
 ]
 
-TEST_CASE_7 = [
-    "enclosed_connectivity_1_default_applied_labels",
-    {"connectivity": 1},
-    grid_1,
-    grid_3,
-]
+TEST_CASE_7 = ["enclosed_connectivity_1_default_applied_labels", {"connectivity": 1}, grid_1, grid_3]
 
-TEST_CASE_8 = [
-    "enclosed_connectivity_1_default_applied_labels",
-    {"connectivity": 1},
-    grid_2,
-    grid_4,
-]
+TEST_CASE_8 = ["enclosed_connectivity_1_default_applied_labels", {"connectivity": 1}, grid_2, grid_4]
 
-TEST_CASE_9 = [
-    "open_full_connectivity_default_applied_labels",
-    {"connectivity": 2},
-    grid_2,
-    grid_2,
-]
+TEST_CASE_9 = ["open_full_connectivity_default_applied_labels", {"connectivity": 2}, grid_2, grid_2]
 
-TEST_CASE_10 = [
-    "open_to_edge_connectivity_1_default_applied_labels",
-    {"connectivity": 1},
-    grid_5,
-    grid_5,
-]
+TEST_CASE_10 = ["open_to_edge_connectivity_1_default_applied_labels", {"connectivity": 1}, grid_5, grid_5]
 
-TEST_CASE_11 = [
-    "open_to_other_label_connectivity_1_default_applied_labels",
-    {"connectivity": 1},
-    grid_6,
-    grid_7,
-]
+TEST_CASE_11 = ["open_to_other_label_connectivity_1_default_applied_labels", {"connectivity": 1}, grid_6, grid_7]
 
 TEST_CASE_12 = [
     "open_to_other_label_connectivity_1_applied_labels_other",
diff --git a/tests/test_flip.py b/tests/test_flip.py
index 404a3def7d..8547f8aeb4 100644
--- a/tests/test_flip.py
+++ b/tests/test_flip.py
@@ -34,12 +34,10 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             im = p(self.imt[0])
             flip = Flip(spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip(im)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_flipd.py b/tests/test_flipd.py
index 1676723800..2fa783f8ad 100644
--- a/tests/test_flipd.py
+++ b/tests/test_flipd.py
@@ -33,12 +33,10 @@ def test_invalid_cases(self, _, spatial_axis, raises):
     def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             flip = Flipd(keys="img", spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip({"img": p(self.imt[0])})["img"]
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian.py b/tests/test_gaussian.py
index e2659abb0c..b17663652b 100644
--- a/tests/test_gaussian.py
+++ b/tests/test_gaussian.py
@@ -241,16 +241,8 @@ def test_gaussian(self):
             rtol=1e-4,
         )
 
-        np.testing.assert_allclose(
-            gaussian_1d(1, 1),
-            torch.tensor([0.24173, 0.382925, 0.24173]),
-            rtol=1e-4,
-        )
-        np.testing.assert_allclose(
-            gaussian_1d(1, 1, normalize=True),
-            torch.tensor([0.2790, 0.4420, 0.2790]),
-            rtol=1e-4,
-        )
+        np.testing.assert_allclose(gaussian_1d(1, 1), torch.tensor([0.24173, 0.382925, 0.24173]), rtol=1e-4)
+        np.testing.assert_allclose(gaussian_1d(1, 1, normalize=True), torch.tensor([0.2790, 0.4420, 0.2790]), rtol=1e-4)
 
     def test_scalespace_gaussian(self):
         np.testing.assert_allclose(
@@ -272,15 +264,11 @@ def test_scalespace_gaussian(self):
         )
 
         np.testing.assert_allclose(
-            gaussian_1d(1, 1, "scalespace"),
-            torch.tensor([0.20791, 0.46576, 0.20791]),
-            rtol=1e-3,
+            gaussian_1d(1, 1, "scalespace"), torch.tensor([0.20791, 0.46576, 0.20791]), rtol=1e-3
         )
 
         np.testing.assert_allclose(
-            gaussian_1d(1, 1, "scalespace", normalize=True),
-            torch.tensor([0.2358, 0.5283, 0.2358]),
-            rtol=1e-3,
+            gaussian_1d(1, 1, "scalespace", normalize=True), torch.tensor([0.2358, 0.5283, 0.2358]), rtol=1e-3
         )
 
         np.testing.assert_allclose(
diff --git a/tests/test_gaussian_filter.py b/tests/test_gaussian_filter.py
index 7636aa5459..62aea524b8 100644
--- a/tests/test_gaussian_filter.py
+++ b/tests/test_gaussian_filter.py
@@ -19,10 +19,7 @@
 from tests.utils import SkipIfBeforePyTorchVersion, skip_if_quick
 
 TEST_CASES = [[{"type": "erf", "gt": 2.0}], [{"type": "scalespace", "gt": 3.0}], [{"type": "sampled", "gt": 5.0}]]
-TEST_CASES_GPU = [
-    [{"type": "erf", "gt": 0.8, "device": "cuda"}],
-    [{"type": "sampled", "gt": 5.0, "device": "cuda"}],
-]
+TEST_CASES_GPU = [[{"type": "erf", "gt": 0.8, "device": "cuda"}], [{"type": "sampled", "gt": 5.0, "device": "cuda"}]]
 TEST_CASES_3d = [
     [{"type": "scalespace", "gt": 0.5, "dims": (2, 3, 8, 9, 10), "lr": 0.01, "device": "cuda"}],
     [{"type": "erf", "gt": 3.8, "dims": (2, 3, 8, 9, 10), "lr": 0.1, "device": "cuda"}],
diff --git a/tests/test_gaussian_sharpen.py b/tests/test_gaussian_sharpen.py
index 9d078e65e5..9130e33656 100644
--- a/tests/test_gaussian_sharpen.py
+++ b/tests/test_gaussian_sharpen.py
@@ -11,50 +11,79 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import GaussianSharpen
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[4.1081963, 3.4950666, 4.1081963], [3.7239995, 2.8491793, 3.7239995], [4.569839, 3.9529324, 4.569839]],
-            [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+            {},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.1081963, 3.4950666, 4.1081963],
+                        [3.7239995, 2.8491793, 3.7239995],
+                        [4.569839, 3.9529324, 4.569839],
+                    ],
+                    [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.513644, 4.869134, 4.513644], [8.467242, 9.4004135, 8.467242], [10.416813, 12.0653515, 10.416813]],
-            [[15.711488, 17.569994, 15.711488], [21.16811, 23.501041, 21.16811], [21.614658, 24.766209, 21.614658]],
+            {"sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.513644, 4.869134, 4.513644],
+                        [8.467242, 9.4004135, 8.467242],
+                        [10.416813, 12.0653515, 10.416813],
+                    ],
+                    [
+                        [15.711488, 17.569994, 15.711488],
+                        [21.16811, 23.501041, 21.16811],
+                        [21.614658, 24.766209, 21.614658],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[3.3324685, 3.335536, 3.3324673], [7.7666636, 8.16056, 7.7666636], [12.662973, 14.317837, 12.6629715]],
-            [[15.329051, 16.57557, 15.329051], [19.41665, 20.40139, 19.416655], [24.659554, 27.557873, 24.659554]],
+            {"sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [3.3324685, 3.335536, 3.3324673],
+                        [7.7666636, 8.16056, 7.7666636],
+                        [12.662973, 14.317837, 12.6629715],
+                    ],
+                    [
+                        [15.329051, 16.57557, 15.329051],
+                        [19.41665, 20.40139, 19.416655],
+                        [24.659554, 27.557873, 24.659554],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSharpen(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSharpen(**argments)(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_sharpend.py b/tests/test_gaussian_sharpend.py
index c795b11762..4b84eb9c12 100644
--- a/tests/test_gaussian_sharpend.py
+++ b/tests/test_gaussian_sharpend.py
@@ -15,46 +15,75 @@
 from parameterized import parameterized
 
 from monai.transforms import GaussianSharpend
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img"},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[4.1081963, 3.4950666, 4.1081963], [3.7239995, 2.8491793, 3.7239995], [4.569839, 3.9529324, 4.569839]],
-            [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+            {"keys": "img"},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.1081963, 3.4950666, 4.1081963],
+                        [3.7239995, 2.8491793, 3.7239995],
+                        [4.569839, 3.9529324, 4.569839],
+                    ],
+                    [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.513644, 4.869134, 4.513644], [8.467242, 9.4004135, 8.467242], [10.416813, 12.0653515, 10.416813]],
-            [[15.711488, 17.569994, 15.711488], [21.16811, 23.501041, 21.16811], [21.614658, 24.766209, 21.614658]],
+            {"keys": "img", "sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.513644, 4.869134, 4.513644],
+                        [8.467242, 9.4004135, 8.467242],
+                        [10.416813, 12.0653515, 10.416813],
+                    ],
+                    [
+                        [15.711488, 17.569994, 15.711488],
+                        [21.16811, 23.501041, 21.16811],
+                        [21.614658, 24.766209, 21.614658],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[3.3324685, 3.335536, 3.3324673], [7.7666636, 8.16056, 7.7666636], [12.662973, 14.317837, 12.6629715]],
-            [[15.329051, 16.57557, 15.329051], [19.41665, 20.40139, 19.416655], [24.659554, 27.557873, 24.659554]],
+            {"keys": "img", "sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [3.3324685, 3.335536, 3.3324673],
+                        [7.7666636, 8.16056, 7.7666636],
+                        [12.662973, 14.317837, 12.6629715],
+                    ],
+                    [
+                        [15.329051, 16.57557, 15.329051],
+                        [19.41665, 20.40139, 19.416655],
+                        [24.659554, 27.557873, 24.659554],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSharpend(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSharpend(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_smooth.py b/tests/test_gaussian_smooth.py
index e51977fbee..24ecfb88e8 100644
--- a/tests/test_gaussian_smooth.py
+++ b/tests/test_gaussian_smooth.py
@@ -11,54 +11,83 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import GaussianSmooth
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"sigma": 1.5},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [
-                [0.59167546, 0.69312394, 0.59167546],
-                [0.7956997, 0.93213004, 0.7956997],
-                [0.7668002, 0.8982755, 0.7668002],
-            ],
-            [[1.6105323, 1.8866735, 1.6105323], [1.9892492, 2.3303251, 1.9892492], [1.7856569, 2.091825, 1.7856569]],
+            {"sigma": 1.5},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.59167546, 0.69312394, 0.59167546],
+                        [0.7956997, 0.93213004, 0.7956997],
+                        [0.7668002, 0.8982755, 0.7668002],
+                    ],
+                    [
+                        [1.6105323, 1.8866735, 1.6105323],
+                        [1.9892492, 2.3303251, 1.9892492],
+                        [1.7856569, 2.091825, 1.7856569],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma": 0.5},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8424794, 0.99864554, 0.8424794], [1.678146, 1.9892154, 1.678146], [1.9889624, 2.3576462, 1.9889624]],
-            [[2.966061, 3.5158648, 2.966061], [4.1953645, 4.973038, 4.1953645], [4.112544, 4.8748655, 4.1125436]],
+            {"sigma": 0.5},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.8424794, 0.99864554, 0.8424794],
+                        [1.678146, 1.9892154, 1.678146],
+                        [1.9889624, 2.3576462, 1.9889624],
+                    ],
+                    [
+                        [2.966061, 3.5158648, 2.966061],
+                        [4.1953645, 4.973038, 4.1953645],
+                        [4.112544, 4.8748655, 4.1125436],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma": [1.5, 0.5]},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8542037, 1.0125432, 0.8542037], [1.1487541, 1.3616928, 1.1487541], [1.1070318, 1.3122368, 1.1070318]],
-            [[2.3251305, 2.756128, 2.3251305], [2.8718853, 3.4042323, 2.8718853], [2.5779586, 3.0558217, 2.5779586]],
+            {"sigma": [1.5, 0.5]},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.8542037, 1.0125432, 0.8542037],
+                        [1.1487541, 1.3616928, 1.1487541],
+                        [1.1070318, 1.3122368, 1.1070318],
+                    ],
+                    [
+                        [2.3251305, 2.756128, 2.3251305],
+                        [2.8718853, 3.4042323, 2.8718853],
+                        [2.5779586, 3.0558217, 2.5779586],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSmooth(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSmooth(**argments)(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_smoothd.py b/tests/test_gaussian_smoothd.py
index 3d7eb6195e..ae358dd59a 100644
--- a/tests/test_gaussian_smoothd.py
+++ b/tests/test_gaussian_smoothd.py
@@ -15,50 +15,79 @@
 from parameterized import parameterized
 
 from monai.transforms import GaussianSmoothd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "sigma": 1.5},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [
-                [0.59167546, 0.69312394, 0.59167546],
-                [0.7956997, 0.93213004, 0.7956997],
-                [0.7668002, 0.8982755, 0.7668002],
-            ],
-            [[1.6105323, 1.8866735, 1.6105323], [1.9892492, 2.3303251, 1.9892492], [1.7856569, 2.091825, 1.7856569]],
+            {"keys": "img", "sigma": 1.5},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.59167546, 0.69312394, 0.59167546],
+                        [0.7956997, 0.93213004, 0.7956997],
+                        [0.7668002, 0.8982755, 0.7668002],
+                    ],
+                    [
+                        [1.6105323, 1.8866735, 1.6105323],
+                        [1.9892492, 2.3303251, 1.9892492],
+                        [1.7856569, 2.091825, 1.7856569],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma": 0.5},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8424794, 0.99864554, 0.8424794], [1.678146, 1.9892154, 1.678146], [1.9889624, 2.3576462, 1.9889624]],
-            [[2.966061, 3.5158648, 2.966061], [4.1953645, 4.973038, 4.1953645], [4.112544, 4.8748655, 4.1125436]],
+            {"keys": "img", "sigma": 0.5},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8424794, 0.99864554, 0.8424794],
+                        [1.678146, 1.9892154, 1.678146],
+                        [1.9889624, 2.3576462, 1.9889624],
+                    ],
+                    [
+                        [2.966061, 3.5158648, 2.966061],
+                        [4.1953645, 4.973038, 4.1953645],
+                        [4.112544, 4.8748655, 4.1125436],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma": [1.5, 0.5]},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8542037, 1.0125432, 0.8542037], [1.1487541, 1.3616928, 1.1487541], [1.1070318, 1.3122368, 1.1070318]],
-            [[2.3251305, 2.756128, 2.3251305], [2.8718853, 3.4042323, 2.8718853], [2.5779586, 3.0558217, 2.5779586]],
+            {"keys": "img", "sigma": [1.5, 0.5]},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8542037, 1.0125432, 0.8542037],
+                        [1.1487541, 1.3616928, 1.1487541],
+                        [1.1070318, 1.3122368, 1.1070318],
+                    ],
+                    [
+                        [2.3251305, 2.756128, 2.3251305],
+                        [2.8718853, 3.4042323, 2.8718853],
+                        [2.5779586, 3.0558217, 2.5779586],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSmoothd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSmoothd(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generalized_dice_loss.py b/tests/test_generalized_dice_loss.py
index 06446204fb..7285f01a41 100644
--- a/tests/test_generalized_dice_loss.py
+++ b/tests/test_generalized_dice_loss.py
@@ -21,10 +21,7 @@
 TEST_CASES = [
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (2, 1, 2, 2), (2, 1, 2, 2)
@@ -99,10 +96,7 @@
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (1, 2, 4), (1, 1, 4)
diff --git a/tests/test_generalized_wasserstein_dice_loss.py b/tests/test_generalized_wasserstein_dice_loss.py
index 295a4a6d70..5ad946d20d 100644
--- a/tests/test_generalized_wasserstein_dice_loss.py
+++ b/tests/test_generalized_wasserstein_dice_loss.py
@@ -159,7 +159,7 @@ def test_convergence(self):
         # define a model with one layer
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer = nn.Linear(num_voxels, num_voxels * num_classes)
 
             def forward(self, x):
diff --git a/tests/test_generate_label_classes_crop_centers.py b/tests/test_generate_label_classes_crop_centers.py
index 38f2a3e0d1..0e40750276 100644
--- a/tests/test_generate_label_classes_crop_centers.py
+++ b/tests/test_generate_label_classes_crop_centers.py
@@ -10,11 +10,13 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import generate_label_classes_crop_centers
+from monai.utils.misc import set_determinism
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {
@@ -23,7 +25,6 @@
         "ratios": [1, 2],
         "label_spatial_shape": [3, 3, 3],
         "indices": [[3, 12, 21], [1, 9, 18]],
-        "rand_state": np.random.RandomState(),
     },
     list,
     2,
@@ -37,7 +38,6 @@
         "ratios": None,
         "label_spatial_shape": [3, 3, 3],
         "indices": [[3, 12, 21], [1, 9, 18]],
-        "rand_state": np.random.RandomState(),
     },
     list,
     1,
@@ -48,10 +48,21 @@
 class TestGenerateLabelClassesCropCenters(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_type_shape(self, input_data, expected_type, expected_count, expected_shape):
-        result = generate_label_classes_crop_centers(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertEqual(len(result), expected_count)
-        self.assertEqual(len(result[0]), expected_shape)
+        results = []
+        for p in TEST_NDARRAYS + (None,):
+            input_data = deepcopy(input_data)
+            if p is not None:
+                input_data["indices"] = p(input_data["indices"])
+            set_determinism(0)
+            result = generate_label_classes_crop_centers(**input_data)
+            self.assertIsInstance(result, expected_type)
+            self.assertEqual(len(result), expected_count)
+            self.assertEqual(len(result[0]), expected_shape)
+            # check for consistency between numpy, torch and torch.cuda
+            results.append(result)
+            if len(results) > 1:
+                for x, y in zip(result[0], result[-1]):
+                    assert_allclose(x, y, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generate_param_groups.py b/tests/test_generate_param_groups.py
index ea1fad44f9..c718f2c729 100644
--- a/tests/test_generate_param_groups.py
+++ b/tests/test_generate_param_groups.py
@@ -18,15 +18,7 @@
 from monai.optimizers import generate_param_groups
 from monai.utils import ensure_tuple
 
-TEST_CASE_1 = [
-    {
-        "layer_matches": [lambda x: x.model[-1]],
-        "match_types": "select",
-        "lr_values": [1],
-    },
-    (1, 100),
-    [5, 21],
-]
+TEST_CASE_1 = [{"layer_matches": [lambda x: x.model[-1]], "match_types": "select", "lr_values": [1]}, (1, 100), [5, 21]]
 
 TEST_CASE_2 = [
     {
@@ -39,11 +31,7 @@
 ]
 
 TEST_CASE_3 = [
-    {
-        "layer_matches": [lambda x: x.model[2][1].conv[0].conv],
-        "match_types": ["select"],
-        "lr_values": [1],
-    },
+    {"layer_matches": [lambda x: x.model[2][1].conv[0].conv], "match_types": ["select"], "lr_values": [1]},
     (1, 100),
     [2, 24],
 ]
@@ -59,12 +47,7 @@
 ]
 
 TEST_CASE_5 = [
-    {
-        "layer_matches": [lambda x: x.model[-1]],
-        "match_types": ["select"],
-        "lr_values": [1],
-        "include_others": False,
-    },
+    {"layer_matches": [lambda x: x.model[-1]], "match_types": ["select"], "lr_values": [1], "include_others": False},
     (1),
     [5],
 ]
@@ -86,12 +69,7 @@ class TestGenerateParamGroups(unittest.TestCase):
     def test_lr_values(self, input_param, expected_values, expected_groups):
         device = "cuda" if torch.cuda.is_available() else "cpu"
         net = Unet(
-            dimensions=3,
-            in_channels=1,
-            out_channels=3,
-            channels=(16, 32, 64),
-            strides=(2, 2),
-            num_res_units=1,
+            spatial_dims=3, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=1
         ).to(device)
 
         params = generate_param_groups(network=net, **input_param)
@@ -107,12 +85,7 @@ def test_wrong(self):
         """overlapped"""
         device = "cuda" if torch.cuda.is_available() else "cpu"
         net = Unet(
-            dimensions=3,
-            in_channels=1,
-            out_channels=3,
-            channels=(16, 32, 64),
-            strides=(2, 2),
-            num_res_units=1,
+            spatial_dims=3, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=1
         ).to(device)
 
         params = generate_param_groups(
diff --git a/tests/test_generate_pos_neg_label_crop_centers.py b/tests/test_generate_pos_neg_label_crop_centers.py
index 40181aa9ea..b8f2840757 100644
--- a/tests/test_generate_pos_neg_label_crop_centers.py
+++ b/tests/test_generate_pos_neg_label_crop_centers.py
@@ -10,35 +10,52 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import generate_pos_neg_label_crop_centers
-
-TEST_CASE_1 = [
-    {
-        "spatial_size": [2, 2, 2],
-        "num_samples": 2,
-        "pos_ratio": 1.0,
-        "label_spatial_shape": [3, 3, 3],
-        "fg_indices": [1, 9, 18],
-        "bg_indices": [3, 12, 21],
-        "rand_state": np.random.RandomState(),
-    },
-    list,
-    2,
-    3,
-]
+from monai.utils.misc import set_determinism
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+TESTS.append(
+    [
+        {
+            "spatial_size": [2, 2, 2],
+            "num_samples": 2,
+            "pos_ratio": 1.0,
+            "label_spatial_shape": [3, 3, 3],
+            "fg_indices": [1, 9, 18],
+            "bg_indices": [3, 12, 21],
+        },
+        list,
+        2,
+        3,
+    ]
+)
 
 
 class TestGeneratePosNegLabelCropCenters(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_data, expected_type, expected_count, expected_shape):
-        result = generate_pos_neg_label_crop_centers(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertEqual(len(result), expected_count)
-        self.assertEqual(len(result[0]), expected_shape)
+        results = []
+        for p in TEST_NDARRAYS + (None,):
+            input_data = deepcopy(input_data)
+            if p is not None:
+                for k in ["fg_indices", "bg_indices"]:
+                    input_data[k] = p(input_data[k])
+            set_determinism(0)
+            result = generate_pos_neg_label_crop_centers(**input_data)
+            self.assertIsInstance(result, expected_type)
+            self.assertEqual(len(result), expected_count)
+            self.assertEqual(len(result[0]), expected_shape)
+            # check for consistency between numpy, torch and torch.cuda
+            results.append(result)
+            if len(results) > 1:
+                # compare every crop center
+                for x, y in zip(results[0], results[-1]):
+                    assert_allclose(x, y, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generate_spatial_bounding_box.py b/tests/test_generate_spatial_bounding_box.py
index 32a45d8d1c..d73b9fafcc 100644
--- a/tests/test_generate_spatial_bounding_box.py
+++ b/tests/test_generate_spatial_bounding_box.py
@@ -15,60 +15,79 @@
 from parameterized import parameterized
 
 from monai.transforms import generate_spatial_bounding_box
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_1 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 0,
-    },
-    ([1, 1], [4, 4]),
-]
-
-TEST_CASE_2 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 1,
-        "channel_indices": None,
-        "margin": 0,
-    },
-    ([2, 2], [3, 3]),
-]
-
-TEST_CASE_3 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": 0,
-        "margin": 0,
-    },
-    ([1, 1], [4, 4]),
-]
-
-TEST_CASE_4 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 1,
-    },
-    ([0, 0], [4, 5]),
-]
-
-TEST_CASE_5 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": [2, 1],
-    },
-    ([0, 0], [5, 5]),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            ([1, 1], [4, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 1,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            ([2, 2], [3, 3]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": 0,
+                "margin": 0,
+            },
+            ([1, 1], [4, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 1,
+            },
+            ([0, 0], [4, 5]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": [2, 1],
+            },
+            ([0, 0], [5, 5]),
+        ]
+    )
 
 
 class TestGenerateSpatialBoundingBox(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS)
     def test_value(self, input_data, expected_box):
         result = generate_spatial_bounding_box(**input_data)
         self.assertTupleEqual(result, expected_box)
diff --git a/tests/test_get_equivalent_dtype.py b/tests/test_get_equivalent_dtype.py
index 04ba5ae5fb..de2379b15b 100644
--- a/tests/test_get_equivalent_dtype.py
+++ b/tests/test_get_equivalent_dtype.py
@@ -32,6 +32,14 @@ def test_get_equivalent_dtype(self, im, input_dtype):
         out_dtype = get_equivalent_dtype(input_dtype, type(im))
         self.assertEqual(out_dtype, im.dtype)
 
+    def test_native_type(self):
+        """the get_equivalent_dtype currently doesn't change the build-in type"""
+        n_type = [float, int, bool]
+        for n in n_type:
+            for im_dtype in DTYPES:
+                out_dtype = get_equivalent_dtype(n, type(im_dtype))
+                self.assertEqual(out_dtype, n)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_get_extreme_points.py b/tests/test_get_extreme_points.py
index a334c12415..269cf63cce 100644
--- a/tests/test_get_extreme_points.py
+++ b/tests/test_get_extreme_points.py
@@ -15,30 +15,37 @@
 from parameterized import parameterized
 
 from monai.transforms import get_extreme_points
-
-TEST_CASE_1 = [
-    {
-        "img": np.array([[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]]),
-        "rand_state": np.random,
-        "background": 0,
-        "pert": 0.0,
-    },
-    [(0, 1), (3, 0), (3, 0), (1, 2)],
-]
-
-TEST_CASE_2 = [
-    {
-        "img": np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]]),
-        "rand_state": np.random,
-        "background": 0,
-        "pert": 0.0,
-    },
-    [(0, 1), (3, 1), (1, 0), (1, 2)],
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {
+                "img": p(np.array([[0, 1, 0], [0, 0, 1], [0, 1, 0], [1, 0, 0]])),
+                "rand_state": np.random,
+                "background": 0,
+                "pert": 0.0,
+            },
+            [(0, 1), (3, 0), (3, 0), (1, 2)],
+        ]
+    )
+
+    TESTS.append(
+        [
+            {
+                "img": p(np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0], [0, 1, 0]])),
+                "rand_state": np.random,
+                "background": 0,
+                "pert": 0.0,
+            },
+            [(0, 1), (3, 1), (1, 0), (1, 2)],
+        ]
+    )
 
 
 class TestGetExtremePoints(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_data, expected):
         result = get_extreme_points(**input_data)
         self.assertEqual(result, expected)
diff --git a/tests/test_gibbs_noise.py b/tests/test_gibbs_noise.py
index 264e2e630a..2c5e117eaf 100644
--- a/tests/test_gibbs_noise.py
+++ b/tests/test_gibbs_noise.py
@@ -19,17 +19,17 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import GibbsNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestGibbsNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -39,36 +39,39 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, num_objs=4, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return input_type(im)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 0.8
-        t = GibbsNoise(alpha, as_tensor_output)
+        t = GibbsNoise(alpha)
         out1 = t(deepcopy(im))
         out2 = t(deepcopy(im))
-        np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
+        self.assertEqual(type(out1), type(im))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(out1.device, im.device)
+        torch.testing.assert_allclose(out1, out2, rtol=1e-7, atol=0)
+        self.assertIsInstance(out1, type(im))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 0.0
         t = GibbsNoise(alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(im, out, atol=1e-2)
+        torch.testing.assert_allclose(im, out, atol=1e-2, rtol=1e-7)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 1.0
         t = GibbsNoise(alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(0 * im, out)
+        torch.testing.assert_allclose(0 * im, out, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gibbs_noised.py b/tests/test_gibbs_noised.py
index 558556489a..f02052818f 100644
--- a/tests/test_gibbs_noised.py
+++ b/tests/test_gibbs_noised.py
@@ -19,19 +19,18 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import GibbsNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
-
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 KEYS = ["im", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestGibbsNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,49 +40,56 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        return {k: input_type(deepcopy(v)) for k, v in zip(KEYS, ims)}
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 0.8
-        t = GibbsNoised(KEYS, alpha, as_tensor_output)
+        t = GibbsNoised(KEYS, alpha)
         out1 = t(deepcopy(data))
         out2 = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
+            torch.testing.assert_allclose(out1[k], out2[k], rtol=1e-7, atol=0)
+            self.assertIsInstance(out1[k], type(data[k]))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 0.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
             np.testing.assert_allclose(data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 1.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(0 * data[k], out[k])
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
+            np.testing.assert_allclose(0.0 * data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_dict_matches(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         alpha = 1.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
-        np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
+        torch.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]], rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_global_mutual_information_loss.py b/tests/test_global_mutual_information_loss.py
index 3373b59621..6a658563bc 100644
--- a/tests/test_global_mutual_information_loss.py
+++ b/tests/test_global_mutual_information_loss.py
@@ -16,6 +16,7 @@
 from parameterized import parameterized
 
 from monai.losses.image_dissimilarity import GlobalMutualInformationLoss
+from tests.utils import SkipIfBeforePyTorchVersion
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -45,6 +46,31 @@
         },
         -1.083999,
     ],
+    [
+        {"kernel_type": "b-spline"},
+        {
+            "pred": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+            "target": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+        },
+        -1.0986018,
+    ],
+    [
+        {"kernel_type": "b-spline"},
+        {
+            "pred": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+            "target": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3)
+            ** 2,
+        },
+        -1.09861,
+    ],
     [
         {},
         {
@@ -85,9 +111,10 @@
 
 class TestGlobalMutualInformationLoss(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
+    @SkipIfBeforePyTorchVersion((1, 9))
     def test_shape(self, input_param, input_data, expected_val):
         result = GlobalMutualInformationLoss(**input_param).forward(**input_data)
-        np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-4)
+        np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-3, atol=1e-3)
 
     def test_ill_shape(self):
         loss = GlobalMutualInformationLoss()
diff --git a/tests/test_gmm.py b/tests/test_gmm.py
index 0e2401b452..641f6b998b 100644
--- a/tests/test_gmm.py
+++ b/tests/test_gmm.py
@@ -47,12 +47,12 @@
             # Batch 0
             [
                 # Channel 0
-                [1, -1, 0, -1, 1],
+                [1, -1, 0, -1, 1]
             ],
             # Batch 1
             [
                 # Channel 0
-                [1, 1, 0, 0, -1],
+                [1, 1, 0, 0, -1]
             ],
         ],
         # Expected
@@ -94,15 +94,15 @@
                 [0.7, 0.9, 0.0, 0.0, 0.0],
                 # Channel 4
                 [0.2, 0.1, 0.2, 0.2, 0.1],
-            ],
+            ]
         ],
         # Labels
         [
             # Batch 0
             [
                 # Channel 0
-                [0, 0, -1, 1, 1],
-            ],
+                [0, 0, -1, 1, 1]
+            ]
         ],
         # Expected
         [
@@ -112,7 +112,7 @@
                 [1, 1, 0, 0, 0],
                 # Channel 1
                 [0, 0, 1, 1, 1],
-            ],
+            ]
         ],
     ],
     [
@@ -142,21 +142,15 @@
                     [0.4, 0.5, 0.0, 0.0, 0.0],
                     [0.7, 0.6, 0.0, 0.0, 0.0],
                 ],
-            ],
+            ]
         ],
         # Labels
         [
             # Batch 0
             [
                 # Channel 0
-                [
-                    [-1, 1, -1, 0, -1],
-                    [1, -1, -1, -1, -1],
-                    [-1, -1, 0, -1, -1],
-                    [2, 2, -1, 3, -1],
-                    [-1, -1, -1, -1, 3],
-                ],
-            ],
+                [[-1, 1, -1, 0, -1], [1, -1, -1, -1, -1], [-1, -1, 0, -1, -1], [2, 2, -1, 3, -1], [-1, -1, -1, -1, 3]]
+            ]
         ],
         # Expected
         [
@@ -194,7 +188,7 @@
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                     [0.0, 0.0, 0.0, 1.0, 1.0],
                 ],
-            ],
+            ]
         ],
     ],
     [
@@ -211,25 +205,13 @@
                 # Channel 0
                 [
                     # Slice 0
-                    [
-                        [0.7, 0.6, 0.0],
-                        [0.5, 0.4, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
+                    [[0.7, 0.6, 0.0], [0.5, 0.4, 0.0], [0.0, 0.0, 0.0]],
                     # Slice 1
-                    [
-                        [0.5, 0.6, 0.0],
-                        [0.4, 0.3, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
+                    [[0.5, 0.6, 0.0], [0.4, 0.3, 0.0], [0.0, 0.0, 0.0]],
                     # Slice 2
-                    [
-                        [0.3, 0.3, 0.0],
-                        [0.2, 0.1, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
-                ],
-            ],
+                    [[0.3, 0.3, 0.0], [0.2, 0.1, 0.0], [0.0, 0.0, 0.0]],
+                ]
+            ]
         ],
         # Labels
         [
@@ -238,25 +220,13 @@
                 # Channel 0
                 [
                     # Slice 0
-                    [
-                        [0, -1, -1],
-                        [0, -1, -1],
-                        [-1, -1, 1],
-                    ],
+                    [[0, -1, -1], [0, -1, -1], [-1, -1, 1]],
                     # Slice 1
-                    [
-                        [0, 0, -1],
-                        [-1, -1, 1],
-                        [-1, 1, 1],
-                    ],
+                    [[0, 0, -1], [-1, -1, 1], [-1, 1, 1]],
                     # Slice 2
-                    [
-                        [0, -1, -1],
-                        [-1, -1, -1],
-                        [-1, -1, -1],
-                    ],
-                ],
-            ],
+                    [[0, -1, -1], [-1, -1, -1], [-1, -1, -1]],
+                ]
+            ]
         ],
         # Expected
         [
@@ -265,46 +235,22 @@
                 # Channel 0
                 [
                     # Slice 0
-                    [
-                        [1.0, 1.0, 0.0],
-                        [1.0, 1.0, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
+                    [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
                     # Slice 1
-                    [
-                        [1.0, 1.0, 0.0],
-                        [1.0, 1.0, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
+                    [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
                     # Slice 2
-                    [
-                        [1.0, 1.0, 0.0],
-                        [1.0, 1.0, 0.0],
-                        [0.0, 0.0, 0.0],
-                    ],
+                    [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
                 ],
                 # Channel 1
                 [
                     # Slice 0
-                    [
-                        [0.0, 0.0, 1.0],
-                        [0.0, 0.0, 1.0],
-                        [1.0, 1.0, 1.0],
-                    ],
+                    [[0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0]],
                     # Slice 1
-                    [
-                        [0.0, 0.0, 1.0],
-                        [0.0, 0.0, 1.0],
-                        [1.0, 1.0, 1.0],
-                    ],
+                    [[0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0]],
                     # Slice 2
-                    [
-                        [0.0, 0.0, 1.0],
-                        [0.0, 0.0, 1.0],
-                        [1.0, 1.0, 1.0],
-                    ],
+                    [[0.0, 0.0, 1.0], [0.0, 0.0, 1.0], [1.0, 1.0, 1.0]],
                 ],
-            ],
+            ]
         ],
     ],
 ]
diff --git a/tests/test_grid_dataset.py b/tests/test_grid_dataset.py
index 6e0aa4023e..3c5c5bd4fa 100644
--- a/tests/test_grid_dataset.py
+++ b/tests/test_grid_dataset.py
@@ -56,26 +56,20 @@ def test_loading_array(self):
             np.testing.assert_equal(tuple(item[0].shape), (2, 1, 2, 2))
         np.testing.assert_allclose(
             item[0],
-            np.array([[[[1.7413, 2.7413], [5.7413, 6.7413]]], [[[9.1419, 10.1419], [13.1419, 14.1419]]]]),
-            rtol=1e-5,
-        )
-        np.testing.assert_allclose(
-            item[1],
-            np.array([[[0, 1], [0, 2], [2, 4]], [[0, 1], [2, 4], [2, 4]]]),
+            np.array([[[[2.0577, 3.0577], [6.0577, 7.0577]]], [[[10.5540, 11.5540], [14.5540, 15.5540]]]]),
             rtol=1e-5,
         )
+        np.testing.assert_allclose(item[1], np.array([[[0, 1], [0, 2], [2, 4]], [[0, 1], [2, 4], [2, 4]]]), rtol=1e-5)
         if sys.platform != "win32":
             for item in DataLoader(ds, batch_size=2, shuffle=False, num_workers=2):
                 np.testing.assert_equal(tuple(item[0].shape), (2, 1, 2, 2))
             np.testing.assert_allclose(
                 item[0],
-                np.array([[[[2.3944, 3.3944], [6.3944, 7.3944]]], [[[10.6551, 11.6551], [14.6551, 15.6551]]]]),
+                np.array([[[[1.6533, 2.6533], [5.6533, 6.6533]]], [[[9.8524, 10.8524], [13.8524, 14.8524]]]]),
                 rtol=1e-3,
             )
             np.testing.assert_allclose(
-                item[1],
-                np.array([[[0, 1], [0, 2], [2, 4]], [[0, 1], [2, 4], [2, 4]]]),
-                rtol=1e-5,
+                item[1], np.array([[[0, 1], [0, 2], [2, 4]], [[0, 1], [2, 4], [2, 4]]]), rtol=1e-5
             )
 
 
diff --git a/tests/test_grid_distortion.py b/tests/test_grid_distortion.py
new file mode 100644
index 0000000000..09fcb856f3
--- /dev/null
+++ b/tests/test_grid_distortion.py
@@ -0,0 +1,108 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import GridDistortion
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            dict(num_cells=3, distort_steps=[(1.5,) * 4] * 2, mode="nearest", padding_mode="zeros"),
+            p(np.indices([6, 6]).astype(np.float32)),
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [3.0, 3.0, 3.0, 0.0, 0.0, 0.0],
+                            [3.0, 3.0, 3.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                        [
+                            [0.0, 3.0, 3.0, 0.0, 0.0, 0.0],
+                            [0.0, 3.0, 3.0, 0.0, 0.0, 0.0],
+                            [0.0, 3.0, 3.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+    num_cells = (2, 2)
+    distort_steps = [(1.5,) * (1 + num_cells[0]), (1.0,) * (1 + num_cells[1])]
+    TESTS.append(
+        [
+            dict(num_cells=num_cells, distort_steps=distort_steps, mode="bilinear", padding_mode="reflection"),
+            p(np.indices([6, 6]).astype(np.float32)),
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [2.25, 2.25, 2.25, 2.25, 2.25, 2.25],
+                            [4.5, 4.5, 4.5, 4.5, 4.5, 4.5],
+                            [4.5, 4.5, 4.5, 4.5, 4.5, 4.5],
+                            [3.25, 3.25, 3.25, 3.25, 3.25, 3.25],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                        ],
+                        [
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                            [0.0, 1.5, 3.0, 3.0, 4.5, 4.0],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            dict(num_cells=2, distort_steps=[(1.25,) * 3] * 3, mode="nearest", padding_mode="zeros"),
+            p(np.indices([3, 3, 3])[:1].astype(np.float32)),
+            p(
+                np.array(
+                    [
+                        [
+                            [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                            [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 0.0, 0.0]],
+                            [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                        ]
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+
+
+class TestGridDistortion(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_grid_distortion(self, input_param, input_data, expected_val):
+        g = GridDistortion(**input_param)
+        result = g(input_data)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_grid_distortiond.py b/tests/test_grid_distortiond.py
new file mode 100644
index 0000000000..55e2e6ad1d
--- /dev/null
+++ b/tests/test_grid_distortiond.py
@@ -0,0 +1,85 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import GridDistortiond
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+num_cells = (2, 2)
+distort_steps = [(1.5,) * (1 + n_c) for n_c in num_cells]
+for p in TEST_NDARRAYS:
+    img = np.indices([6, 6]).astype(np.float32)
+    TESTS.append(
+        [
+            dict(
+                keys=["img", "mask"],
+                num_cells=num_cells,
+                distort_steps=distort_steps,
+                mode=["bilinear", "nearest"],
+                padding_mode=["reflection", "zeros"],
+            ),
+            {"img": p(img), "mask": p(np.ones_like(img[:1]))},
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [2.25, 2.25, 2.25, 2.25, 2.25, 2.25],
+                            [4.5, 4.5, 4.5, 4.5, 4.5, 4.5],
+                            [4.5, 4.5, 4.5, 4.5, 4.5, 4.5],
+                            [3.25, 3.25, 3.25, 3.25, 3.25, 3.25],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                        ],
+                        [
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                            [0.0, 2.25, 4.5, 4.5, 3.25, 1.0],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+            p(
+                np.array(
+                    [
+                        [
+                            [1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ]
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+
+
+class TestGridDistortiond(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_grid_distortiond(self, input_param, input_data, expected_val_img, expected_val_mask):
+        g = GridDistortiond(**input_param)
+        result = g(input_data)
+        assert_allclose(result["img"], expected_val_img, rtol=1e-4, atol=1e-4)
+        assert_allclose(result["mask"], expected_val_mask, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_grid_pull.py b/tests/test_grid_pull.py
index 9e4d2e8237..25e1d7c1d8 100644
--- a/tests/test_grid_pull.py
+++ b/tests/test_grid_pull.py
@@ -53,11 +53,7 @@ def make_grid(shape, dtype=None, device=None, requires_grad=True):
                         "interpolation": interp,
                         "bound": bound,
                     },
-                    {
-                        "val": torch.tensor([[expected_val]]),
-                        "device": device,
-                        "grad": torch.tensor(expected_grad),
-                    },
+                    {"val": torch.tensor([[expected_val]]), "device": device, "grad": torch.tensor(expected_grad)},
                 ]
                 TEST_1D_GP.append(test_case)
 
@@ -85,7 +81,7 @@ def test_grid_pull(self, input_param, expected):
             grads = grads[0]
         else:
             grads = torch.cat(grads, dim=0)
-        self.assertTrue("{}".format(result.device).startswith(expected["device"]))
+        self.assertTrue(f"{result.device}".startswith(expected["device"]))
         np.testing.assert_allclose(result.detach().cpu().numpy(), expected["val"].cpu().numpy(), rtol=1e-4, atol=1e-4)
         np.testing.assert_allclose(grads.detach().cpu().numpy(), expected["grad"].cpu().numpy(), rtol=1e-4, atol=1e-4)
 
diff --git a/tests/test_handler_checkpoint_saver.py b/tests/test_handler_checkpoint_saver.py
index bcab49f12b..86544e5321 100644
--- a/tests/test_handler_checkpoint_saver.py
+++ b/tests/test_handler_checkpoint_saver.py
@@ -112,16 +112,7 @@
 
 class TestHandlerCheckpointSaver(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-            TEST_CASE_8,
-        ]
+        [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8]
     )
     def test_file(
         self,
diff --git a/tests/test_handler_classification_saver.py b/tests/test_handler_classification_saver.py
index 87ce5ca3f8..e06c6e95f0 100644
--- a/tests/test_handler_classification_saver.py
+++ b/tests/test_handler_classification_saver.py
@@ -45,7 +45,7 @@ def _train_func(engine, batch):
             def _test_file(filename):
                 filepath = os.path.join(tempdir, filename)
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_handler_classification_saver_dist.py b/tests/test_handler_classification_saver_dist.py
index 70cc0ca42f..d9bbe67ecd 100644
--- a/tests/test_handler_classification_saver_dist.py
+++ b/tests/test_handler_classification_saver_dist.py
@@ -61,7 +61,7 @@ def _train_func(engine, batch):
             filepath = os.path.join(tempdir, "predictions.csv")
             if rank == 1:
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_handler_confusion_matrix.py b/tests/test_handler_confusion_matrix.py
index 0c6e36066b..2765bf6799 100644
--- a/tests/test_handler_confusion_matrix.py
+++ b/tests/test_handler_confusion_matrix.py
@@ -39,16 +39,8 @@
 }
 
 data_2: Dict[Any, Any] = {
-    "y_pred": torch.tensor(
-        [
-            [[[0.0, 1.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 1.0]], [[0.0, 1.0], [0.0, 0.0]]],
-        ]
-    ),
-    "y": torch.tensor(
-        [
-            [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
-        ]
-    ),
+    "y_pred": torch.tensor([[[[0.0, 1.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 1.0]], [[0.0, 1.0], [0.0, 0.0]]]]),
+    "y": torch.tensor([[[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]]),
 }
 
 
diff --git a/tests/test_handler_confusion_matrix_dist.py b/tests/test_handler_confusion_matrix_dist.py
index 40245bce2e..31e853cd00 100644
--- a/tests/test_handler_confusion_matrix_dist.py
+++ b/tests/test_handler_confusion_matrix_dist.py
@@ -54,12 +54,10 @@ def _val_func(engine, batch):
 
         if dist.get_rank() == 1:
             y_pred = torch.tensor(
-                [[[[0.0, 1.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 1.0]], [[0.0, 1.0], [0.0, 0.0]]]],
-                device=device,
+                [[[[0.0, 1.0], [1.0, 0.0]], [[1.0, 0.0], [1.0, 1.0]], [[0.0, 1.0], [0.0, 0.0]]]], device=device
             )
             y = torch.tensor(
-                [[[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]],
-                device=device,
+                [[[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]]], device=device
             )
             metric.update([y_pred, y])
 
diff --git a/tests/test_handler_early_stop.py b/tests/test_handler_early_stop.py
index efe8e89825..4707a8b3cc 100644
--- a/tests/test_handler_early_stop.py
+++ b/tests/test_handler_early_stop.py
@@ -23,10 +23,7 @@ def _train_func(engine, batch):
 
         trainer = Engine(_train_func)
         EarlyStopHandler(
-            patience=5,
-            score_function=lambda x: x.state.output["loss"],
-            trainer=trainer,
-            epoch_level=False,
+            patience=5, score_function=lambda x: x.state.output["loss"], trainer=trainer, epoch_level=False
         ).attach(trainer)
 
         trainer.run(range(4), max_epochs=2)
diff --git a/tests/test_handler_garbage_collector.py b/tests/test_handler_garbage_collector.py
index 75ab9ceb99..23ecfbbf37 100644
--- a/tests/test_handler_garbage_collector.py
+++ b/tests/test_handler_garbage_collector.py
@@ -34,13 +34,7 @@
 
 class TestHandlerGarbageCollector(unittest.TestCase):
     @skipUnless(has_ignite, "Requires ignite")
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
     def test_content(self, data, trigger_event):
         # set up engine
         gb_count_dict = {}
diff --git a/tests/test_handler_hausdorff_distance.py b/tests/test_handler_hausdorff_distance.py
index bbc36cc2b5..ced9503499 100644
--- a/tests/test_handler_hausdorff_distance.py
+++ b/tests/test_handler_hausdorff_distance.py
@@ -20,9 +20,7 @@
 
 
 def create_spherical_seg_3d(
-    radius: float = 20.0,
-    centre: Tuple[int, int, int] = (49, 49, 49),
-    im_shape: Tuple[int, int, int] = (99, 99, 99),
+    radius: float = 20.0, centre: Tuple[int, int, int] = (49, 49, 49), im_shape: Tuple[int, int, int] = (99, 99, 99)
 ) -> np.ndarray:
     """
     Return a 3D image with a sphere inside. Voxel values will be
diff --git a/tests/test_handler_metrics_saver_dist.py b/tests/test_handler_metrics_saver_dist.py
index 0a36a19c66..0b65b14886 100644
--- a/tests/test_handler_metrics_saver_dist.py
+++ b/tests/test_handler_metrics_saver_dist.py
@@ -52,10 +52,7 @@ def _val_func(engine, batch):
             @engine.on(Events.EPOCH_COMPLETED)
             def _save_metrics0(engine):
                 engine.state.metrics = {"metric1": 1, "metric2": 2}
-                engine.state.metric_details = {
-                    "metric3": torch.tensor([[1, 2]]),
-                    "metric4": torch.tensor([[5, 6]]),
-                }
+                engine.state.metric_details = {"metric3": torch.tensor([[1, 2]]), "metric4": torch.tensor([[5, 6]])}
 
         if dist.get_rank() == 1:
             # different ranks have different data length
diff --git a/tests/test_handler_mlflow.py b/tests/test_handler_mlflow.py
new file mode 100644
index 0000000000..808ebffe33
--- /dev/null
+++ b/tests/test_handler_mlflow.py
@@ -0,0 +1,51 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import glob
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+from ignite.engine import Engine, Events
+
+from monai.handlers import MLFlowHandler
+
+
+class TestHandlerMLFlow(unittest.TestCase):
+    def test_metrics_track(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+
+            # set up engine
+            def _train_func(engine, batch):
+                return [batch + 1.0]
+
+            engine = Engine(_train_func)
+
+            # set up dummy metric
+            @engine.on(Events.EPOCH_COMPLETED)
+            def _update_metric(engine):
+                current_metric = engine.state.metrics.get("acc", 0.1)
+                engine.state.metrics["acc"] = current_metric + 0.1
+                engine.state.test = current_metric
+
+            # set up testing handler
+            test_path = os.path.join(tempdir, "mlflow_test")
+            handler = MLFlowHandler(tracking_uri=Path(test_path).as_uri(), state_attributes=["test"])
+            handler.attach(engine)
+            engine.run(range(3), max_epochs=2)
+            handler.close()
+            # check logging output
+            self.assertTrue(len(glob.glob(test_path)) > 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_handler_nvtx.py b/tests/test_handler_nvtx.py
index fee29af344..6723e55892 100644
--- a/tests/test_handler_nvtx.py
+++ b/tests/test_handler_nvtx.py
@@ -22,42 +22,18 @@
 
 _, has_nvtx = optional_import("torch._C._nvtx", descriptor="NVTX is not installed. Are you sure you have a CUDA build?")
 
-TENSOR_0 = torch.tensor(
-    [
-        [
-            [[1.0], [2.0]],
-            [[3.0], [4.0]],
-        ]
-    ]
-)
+TENSOR_0 = torch.tensor([[[[1.0], [2.0]], [[3.0], [4.0]]]])
 
-TENSOR_1 = torch.tensor(
-    [
-        [
-            [[0.0], [-2.0]],
-            [[-3.0], [4.0]],
-        ]
-    ]
-)
+TENSOR_1 = torch.tensor([[[[0.0], [-2.0]], [[-3.0], [4.0]]]])
 
-TENSOR_1_EXPECTED = torch.tensor(
-    [
-        [[1.0], [0.5]],
-        [[0.25], [5.0]],
-    ]
-)
+TENSOR_1_EXPECTED = torch.tensor([[[1.0], [0.5]], [[0.25], [5.0]]])
 
 TEST_CASE_0 = [[{"image": TENSOR_0}], TENSOR_0[0] + 1.0]
 TEST_CASE_1 = [[{"image": TENSOR_1}], TENSOR_1_EXPECTED]
 
 
 class TestHandlerDecollateBatch(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
     @unittest.skipUnless(has_nvtx, "CUDA is required for NVTX!")
     def test_compute(self, data, expected):
         # Set up handlers
diff --git a/tests/test_handler_parameter_scheduler.py b/tests/test_handler_parameter_scheduler.py
index 5b3e845ace..55ea6a4af2 100644
--- a/tests/test_handler_parameter_scheduler.py
+++ b/tests/test_handler_parameter_scheduler.py
@@ -9,7 +9,7 @@
 
 class ToyNet(Module):
     def __init__(self, value):
-        super(ToyNet, self).__init__()
+        super().__init__()
         self.value = value
 
     def forward(self, input):
diff --git a/tests/test_handler_prob_map_producer.py b/tests/test_handler_prob_map_producer.py
index b21cf03171..316cd6f70a 100644
--- a/tests/test_handler_prob_map_producer.py
+++ b/tests/test_handler_prob_map_producer.py
@@ -32,12 +32,7 @@ class TestDataset(Dataset):
     def __init__(self, name, size):
         super().__init__(
             data=[
-                {
-                    "name": name,
-                    "mask_shape": (size, size),
-                    "mask_locations": [[i, i] for i in range(size)],
-                    "level": 0,
-                }
+                {"name": name, "mask_shape": (size, size), "mask_locations": [[i, i] for i in range(size)], "level": 0}
             ]
         )
         self.len = size
@@ -46,11 +41,7 @@ def __len__(self):
         return self.len
 
     def __getitem__(self, index):
-        return {
-            "name": self.data[0]["name"],
-            "mask_location": self.data[0]["mask_locations"][index],
-            "pred": index + 1,
-        }
+        return {"name": self.data[0]["name"], "mask_location": self.data[0]["mask_locations"][index], "pred": index + 1}
 
 
 class TestEvaluator(Evaluator):
@@ -59,13 +50,7 @@ def _iteration(self, engine, batchdata):
 
 
 class TestHandlerProbMapGenerator(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
     def test_prob_map_generator(self, name, size):
         # set up dataset
         dataset = TestDataset(name, size)
diff --git a/tests/test_handler_smartcache.py b/tests/test_handler_smartcache.py
index b67f1226cd..23a6aa7500 100644
--- a/tests/test_handler_smartcache.py
+++ b/tests/test_handler_smartcache.py
@@ -24,13 +24,7 @@
 class TestHandlerSmartCache(unittest.TestCase):
     def test_content(self):
         data = [0, 1, 2, 3, 4, 5, 6, 7, 8]
-        expected = [
-            [0, 1, 2, 3, 4],
-            [1, 2, 3, 4, 5],
-            [2, 3, 4, 5, 6],
-            [3, 4, 5, 6, 7],
-            [4, 5, 6, 7, 8],
-        ]
+        expected = [[0, 1, 2, 3, 4], [1, 2, 3, 4, 5], [2, 3, 4, 5, 6], [3, 4, 5, 6, 7], [4, 5, 6, 7, 8]]
 
         # set up engine
         def _train_func(engine, batch):
diff --git a/tests/test_handler_stats.py b/tests/test_handler_stats.py
index 84cdef59a8..49433195c8 100644
--- a/tests/test_handler_stats.py
+++ b/tests/test_handler_stats.py
@@ -140,7 +140,7 @@ def _train_func(engine, batch):
             engine.run(range(3), max_epochs=2)
             handler.close()
             stats_handler.logger.removeHandler(handler)
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 output_str = f.read()
                 grep = re.compile(f".*{key_to_handler}.*")
                 has_key_word = re.compile(f".*{key_to_print}.*")
@@ -163,6 +163,45 @@ def _train_func(engine, batch):
         with self.assertRaises(RuntimeError):
             engine.run(range(3), max_epochs=2)
 
+    def test_attributes_print(self):
+        log_stream = StringIO()
+        log_handler = logging.StreamHandler(log_stream)
+        log_handler.setLevel(logging.INFO)
+        key_to_handler = "test_logging"
+
+        # set up engine
+        def _train_func(engine, batch):
+            return [torch.tensor(0.0)]
+
+        engine = Engine(_train_func)
+
+        # set up dummy metric
+        @engine.on(Events.EPOCH_COMPLETED)
+        def _update_metric(engine):
+            if not hasattr(engine.state, "test1"):
+                engine.state.test1 = 0.1
+                engine.state.test2 = 0.2
+            else:
+                engine.state.test1 += 0.1
+                engine.state.test2 += 0.2
+
+        # set up testing handler
+        stats_handler = StatsHandler(
+            name=key_to_handler, state_attributes=["test1", "test2", "test3"], logger_handler=log_handler
+        )
+        stats_handler.attach(engine)
+
+        engine.run(range(3), max_epochs=2)
+
+        # check logging output
+        output_str = log_stream.getvalue()
+        log_handler.close()
+        grep = re.compile(f".*{key_to_handler}.*")
+        has_key_word = re.compile(".*State values.*")
+        for idx, line in enumerate(output_str.split("\n")):
+            if grep.match(line) and idx in [5, 10]:
+                self.assertTrue(has_key_word.match(line))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_handler_surface_distance.py b/tests/test_handler_surface_distance.py
index 82cdb50d90..076f84eca9 100644
--- a/tests/test_handler_surface_distance.py
+++ b/tests/test_handler_surface_distance.py
@@ -20,9 +20,7 @@
 
 
 def create_spherical_seg_3d(
-    radius: float = 20.0,
-    centre: Tuple[int, int, int] = (49, 49, 49),
-    im_shape: Tuple[int, int, int] = (99, 99, 99),
+    radius: float = 20.0, centre: Tuple[int, int, int] = (49, 49, 49), im_shape: Tuple[int, int, int] = (99, 99, 99)
 ) -> np.ndarray:
     """
     Return a 3D image with a sphere inside. Voxel values will be
diff --git a/tests/test_handler_tb_stats.py b/tests/test_handler_tb_stats.py
index 1d722e7f66..f0c4d49fd0 100644
--- a/tests/test_handler_tb_stats.py
+++ b/tests/test_handler_tb_stats.py
@@ -57,11 +57,15 @@ def _train_func(engine, batch):
             def _update_metric(engine):
                 current_metric = engine.state.metrics.get("acc", 0.1)
                 engine.state.metrics["acc"] = current_metric + 0.1
+                engine.state.test = current_metric
 
             # set up testing handler
             writer = SummaryWriter(log_dir=tempdir)
             stats_handler = TensorBoardStatsHandler(
-                writer, output_transform=lambda x: {"loss": x[0] * 2.0}, global_epoch_transform=lambda x: x * 3.0
+                summary_writer=writer,
+                output_transform=lambda x: {"loss": x[0] * 2.0},
+                global_epoch_transform=lambda x: x * 3.0,
+                state_attributes=["test"],
             )
             stats_handler.attach(engine)
             engine.run(range(3), max_epochs=2)
diff --git a/tests/test_handler_transform_inverter.py b/tests/test_handler_transform_inverter.py
deleted file mode 100644
index 385311eba7..0000000000
--- a/tests/test_handler_transform_inverter.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright 2020 - 2021 MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import unittest
-
-import numpy as np
-import torch
-from ignite.engine import Engine
-
-from monai.data import CacheDataset, DataLoader, create_test_image_3d, decollate_batch
-from monai.engines.utils import IterationEvents
-from monai.handlers import TransformInverter
-from monai.transforms import (
-    AddChanneld,
-    CastToTyped,
-    Compose,
-    CopyItemsd,
-    LoadImaged,
-    Orientationd,
-    RandAffined,
-    RandAxisFlipd,
-    RandFlipd,
-    RandRotate90d,
-    RandRotated,
-    RandZoomd,
-    ResizeWithPadOrCropd,
-    ScaleIntensityd,
-    Spacingd,
-    ToTensord,
-)
-from monai.utils.misc import set_determinism
-from tests.utils import make_nifti_image
-
-KEYS = ["image", "label"]
-
-
-class TestTransformInverter(unittest.TestCase):
-    def test_invert(self):
-        set_determinism(seed=0)
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100)]
-        transform = Compose(
-            [
-                LoadImaged(KEYS),
-                AddChanneld(KEYS),
-                Orientationd(KEYS, "RPS"),
-                Spacingd(KEYS, pixdim=(1.2, 1.01, 0.9), mode=["bilinear", "nearest"], dtype=np.float32),
-                ScaleIntensityd("image", minv=1, maxv=10),
-                RandFlipd(KEYS, prob=0.5, spatial_axis=[1, 2]),
-                RandAxisFlipd(KEYS, prob=0.5),
-                RandRotate90d(KEYS, spatial_axes=(1, 2)),
-                RandZoomd(KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
-                RandRotated(KEYS, prob=0.5, range_x=np.pi, mode="bilinear", align_corners=True),
-                RandAffined(KEYS, prob=0.5, rotate_range=np.pi, mode="nearest"),
-                ResizeWithPadOrCropd(KEYS, 100),
-                ToTensord("image"),  # test to support both Tensor and Numpy array when inverting
-                CastToTyped(KEYS, dtype=[torch.uint8, np.uint8]),
-                CopyItemsd("label", times=2, names=["label_inverted1", "label_inverted2"]),
-                CopyItemsd("image", times=2, names=["image_inverted1", "image_inverted2"]),
-            ]
-        )
-        data = [{"image": im_fname, "label": seg_fname} for _ in range(12)]
-
-        # num workers = 0 for mac or gpu transforms
-        num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
-
-        dataset = CacheDataset(data, transform=transform, progress=False)
-        loader = DataLoader(dataset, num_workers=num_workers, batch_size=5)
-
-        # set up engine
-        def _train_func(engine, batch):
-            self.assertTupleEqual(batch["image"].shape[1:], (1, 100, 100, 100))
-            engine.state.output = engine.state.batch = decollate_batch(batch)
-            engine.fire_event(IterationEvents.MODEL_COMPLETED)
-            return engine.state.output
-
-        engine = Engine(_train_func)
-        engine.register_events(*IterationEvents)
-
-        # set up testing handler
-        TransformInverter(
-            transform=transform,
-            output_keys=["image_inverted1", "label_inverted1"],
-            batch_keys="label",
-            meta_keys=["image_inverted1_meta_dict", "label_inverted1_meta_dict"],
-            batch_meta_keys="label_meta_dict",
-            nearest_interp=True,
-            to_tensor=[True, False],
-            device="cpu",
-        ).attach(engine)
-
-        # test different nearest interpolation values
-        TransformInverter(
-            transform=transform,
-            output_keys=["image_inverted2", "label_inverted2"],
-            batch_keys="image",
-            meta_keys=None,
-            batch_meta_keys="image_meta_dict",
-            meta_key_postfix="meta_dict",
-            nearest_interp=[True, False],
-            post_func=[lambda x: x + 10, lambda x: x],
-        ).attach(engine)
-
-        engine.run(loader, max_epochs=1)
-        set_determinism(seed=None)
-
-        for output in engine.state.output:
-            self.assertTupleEqual(output["image"].shape, (1, 100, 100, 100))
-            self.assertTupleEqual(output["label"].shape, (1, 100, 100, 100))
-            # check the nearest inerpolation mode
-            i = output["image_inverted1"]
-            torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
-            self.assertTupleEqual(i.shape, (1, 100, 101, 107))
-            i = output["label_inverted1"]
-            np.testing.assert_allclose(i.astype(np.uint8).astype(np.float32), i.astype(np.float32))
-            self.assertTupleEqual(i.shape, (1, 100, 101, 107))
-
-            # check the case that different items use different interpolation mode to invert transforms
-            d = output["image_inverted2"]
-            # if the interpolation mode is nearest, accumulated diff should be smaller than 1
-            self.assertLess(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 1.0)
-            self.assertTupleEqual(d.shape, (1, 100, 101, 107))
-
-            d = output["label_inverted2"]
-            # if the interpolation mode is not nearest, accumulated diff should be greater than 10000
-            self.assertGreater(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 10000.0)
-            self.assertTupleEqual(d.shape, (1, 100, 101, 107))
-
-        # check labels match
-        reverted = engine.state.output[-1]["label_inverted1"].astype(np.int32)
-        original = LoadImaged(KEYS)(data[-1])["label"]
-        n_good = np.sum(np.isclose(reverted, original, atol=1e-3))
-        reverted_name = engine.state.batch[-1]["label_inverted1_meta_dict"]["filename_or_obj"]
-        original_name = data[-1]["label"]
-        self.assertEqual(reverted_name, original_name)
-        print("invert diff", reverted.size - n_good)
-        # 25300: 2 workers (cpu, non-macos)
-        # 1812: 0 workers (gpu or macos)
-        # 1824: torch 1.5.1
-        self.assertTrue((reverted.size - n_good) in (25300, 1812, 1824), "diff. in 3 possible values")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_hausdorff_distance.py b/tests/test_hausdorff_distance.py
index 0b313f722f..182c8cdaca 100644
--- a/tests/test_hausdorff_distance.py
+++ b/tests/test_hausdorff_distance.py
@@ -20,9 +20,7 @@
 
 
 def create_spherical_seg_3d(
-    radius: float = 20.0,
-    centre: Tuple[int, int, int] = (49, 49, 49),
-    im_shape: Tuple[int, int, int] = (99, 99, 99),
+    radius: float = 20.0, centre: Tuple[int, int, int] = (49, 49, 49), im_shape: Tuple[int, int, int] = (99, 99, 99)
 ) -> np.ndarray:
     """
     Return a 3D image with a sphere inside. Voxel values will be
@@ -49,10 +47,7 @@ def create_spherical_seg_3d(
 
 
 TEST_CASES = [
-    [
-        [create_spherical_seg_3d(), create_spherical_seg_3d(), 1],
-        [0, 0, 0, 0, 0, 0],
-    ],
+    [[create_spherical_seg_3d(), create_spherical_seg_3d(), 1], [0, 0, 0, 0, 0, 0]],
     [
         [
             create_spherical_seg_3d(radius=20, centre=(20, 20, 20)),
@@ -106,8 +101,8 @@ def create_spherical_seg_3d(
             # both pred and gt do not have foreground, metric and not_nans should be 0
             np.zeros([99, 99, 99]),
             np.zeros([99, 99, 99]),
-        ],
-    ],
+        ]
+    ]
 ]
 
 
diff --git a/tests/test_hilbert_transform.py b/tests/test_hilbert_transform.py
index 82454c34d0..ffe5824034 100644
--- a/tests/test_hilbert_transform.py
+++ b/tests/test_hilbert_transform.py
@@ -28,10 +28,7 @@
 
 def create_expected_numpy_output(input_datum, **kwargs):
 
-    x = np.fft.fft(
-        input_datum.cpu().numpy() if input_datum.device.type == "cuda" else input_datum.numpy(),
-        **kwargs,
-    )
+    x = np.fft.fft(input_datum.cpu().numpy() if input_datum.device.type == "cuda" else input_datum.numpy(), **kwargs)
     f = np.fft.fftfreq(x.shape[kwargs["axis"]])
     u = np.heaviside(f, 0.5)
     new_dims_before = kwargs["axis"]
diff --git a/tests/test_histogram_normalize.py b/tests/test_histogram_normalize.py
index b69fb1d927..e0178166d9 100644
--- a/tests/test_histogram_normalize.py
+++ b/tests/test_histogram_normalize.py
@@ -15,28 +15,37 @@
 from parameterized import parameterized
 
 from monai.transforms import HistogramNormalize
-
-TEST_CASE_1 = [
-    {"num_bins": 4, "min": 1, "max": 5, "mask": np.array([1, 1, 1, 1, 1, 0])},
-    np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
-    np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
-]
-
-TEST_CASE_2 = [
-    {"num_bins": 4, "max": 4, "dtype": np.uint8},
-    np.array([0.0, 1.0, 2.0, 3.0, 4.0]),
-    np.array([0, 0, 1, 3, 4]),
-]
-
-TEST_CASE_3 = [
-    {"num_bins": 256, "max": 255, "dtype": np.uint8},
-    np.array([[[100.0, 200.0], [150.0, 250.0]]]),
-    np.array([[[0, 170], [70, 255]]]),
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"num_bins": 4, "min": 1, "max": 5, "mask": np.array([1, 1, 1, 1, 1, 0])},
+            p(np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])),
+            np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"num_bins": 4, "max": 4, "dtype": np.uint8},
+            p(np.array([0.0, 1.0, 2.0, 3.0, 4.0])),
+            np.array([0, 0, 1, 3, 4]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"num_bins": 256, "max": 255, "dtype": np.uint8},
+            p(np.array([[[100.0, 200.0], [150.0, 250.0]]])),
+            np.array([[[0, 170], [70, 255]]]),
+        ]
+    )
 
 
 class TestHistogramNormalize(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = HistogramNormalize(**argments)(image)
         np.testing.assert_allclose(result, expected_data)
diff --git a/tests/test_histogram_normalized.py b/tests/test_histogram_normalized.py
index 68647e82fb..314c7bd75b 100644
--- a/tests/test_histogram_normalized.py
+++ b/tests/test_histogram_normalized.py
@@ -15,28 +15,37 @@
 from parameterized import parameterized
 
 from monai.transforms import HistogramNormalized
-
-TEST_CASE_1 = [
-    {"keys": "img", "num_bins": 4, "min": 1, "max": 5, "mask_key": "mask"},
-    {"img": np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), "mask": np.array([1, 1, 1, 1, 1, 0])},
-    np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
-]
-
-TEST_CASE_2 = [
-    {"keys": "img", "num_bins": 4, "max": 4, "dtype": np.uint8},
-    {"img": np.array([0.0, 1.0, 2.0, 3.0, 4.0])},
-    np.array([0, 0, 1, 3, 4]),
-]
-
-TEST_CASE_3 = [
-    {"keys": "img", "num_bins": 256, "max": 255, "dtype": np.uint8},
-    {"img": np.array([[[100.0, 200.0], [150.0, 250.0]]])},
-    np.array([[[0, 170], [70, 255]]]),
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 4, "min": 1, "max": 5, "mask_key": "mask"},
+            {"img": p(np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])), "mask": p(np.array([1, 1, 1, 1, 1, 0]))},
+            np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 4, "max": 4, "dtype": np.uint8},
+            {"img": p(np.array([0.0, 1.0, 2.0, 3.0, 4.0]))},
+            np.array([0, 0, 1, 3, 4]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 256, "max": 255, "dtype": np.uint8},
+            {"img": p(np.array([[[100.0, 200.0], [150.0, 250.0]]]))},
+            np.array([[[0, 170], [70, 255]]]),
+        ]
+    )
 
 
 class TestHistogramNormalized(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = HistogramNormalized(**argments)(image)["img"]
         np.testing.assert_allclose(result, expected_data)
diff --git a/tests/test_integration_classification_2d.py b/tests/test_integration_classification_2d.py
index 03b5571973..7a94780f82 100644
--- a/tests/test_integration_classification_2d.py
+++ b/tests/test_integration_classification_2d.py
@@ -197,7 +197,7 @@ def setUp(self):
 
         assert os.path.exists(data_dir)
 
-        class_names = sorted((x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x))))
+        class_names = sorted(x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x)))
         image_files = [
             [os.path.join(data_dir, class_name, x) for x in sorted(os.listdir(os.path.join(data_dir, class_name)))]
             for class_name in class_names
diff --git a/tests/test_integration_determinism.py b/tests/test_integration_determinism.py
index e077420420..6c858b7832 100644
--- a/tests/test_integration_determinism.py
+++ b/tests/test_integration_determinism.py
@@ -41,7 +41,7 @@ def __len__(self):
             return train_steps
 
     net = UNet(
-        dimensions=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+        spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
     ).to(device)
 
     loss = DiceLoss(sigmoid=True)
@@ -81,7 +81,7 @@ def test_training(self):
         loss, step = run_test(device=self.device)
         print(f"Deterministic loss {loss} at training step {step}")
         np.testing.assert_allclose(step, 4)
-        np.testing.assert_allclose(loss, 0.535927, rtol=1e-4)
+        np.testing.assert_allclose(loss, 0.536134, rtol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_integration_fast_train.py b/tests/test_integration_fast_train.py
new file mode 100644
index 0000000000..9fd37a0897
--- /dev/null
+++ b/tests/test_integration_fast_train.py
@@ -0,0 +1,234 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import shutil
+import tempfile
+import time
+import unittest
+from glob import glob
+
+import nibabel as nib
+import numpy as np
+import torch
+
+import monai
+from monai.data import CacheDataset, ThreadDataLoader, create_test_image_3d, decollate_batch
+from monai.inferers import sliding_window_inference
+from monai.losses import DiceCELoss
+from monai.metrics import DiceMetric
+from monai.networks.layers import Norm
+from monai.networks.nets import UNet
+from monai.optimizers import Novograd
+from monai.transforms import (
+    AsDiscrete,
+    Compose,
+    CropForegroundd,
+    EnsureChannelFirstd,
+    EnsureType,
+    EnsureTyped,
+    FgBgToIndicesd,
+    LoadImaged,
+    RandAffined,
+    RandAxisFlipd,
+    RandCropByPosNegLabeld,
+    RandFlipd,
+    RandGaussianNoised,
+    RandRotate90d,
+    RandRotated,
+    RandStdShiftIntensityd,
+    RandZoomd,
+    ScaleIntensityd,
+    Spacingd,
+    ToDeviced,
+)
+from monai.utils import set_determinism
+from tests.utils import DistTestCase, SkipIfBeforePyTorchVersion, TimedCall, skip_if_no_cuda, skip_if_quick
+
+
+@skip_if_no_cuda
+@skip_if_quick
+@SkipIfBeforePyTorchVersion((1, 7))
+class IntegrationFastTrain(DistTestCase):
+    def setUp(self):
+        set_determinism(seed=0)
+        monai.config.print_config()
+
+        self.data_dir = tempfile.mkdtemp()
+        for i in range(41):
+            im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1)
+            n = nib.Nifti1Image(im, np.eye(4))
+            nib.save(n, os.path.join(self.data_dir, f"img{i:d}.nii.gz"))
+            n = nib.Nifti1Image(seg, np.eye(4))
+            nib.save(n, os.path.join(self.data_dir, f"seg{i:d}.nii.gz"))
+
+    def tearDown(self):
+        set_determinism(seed=None)
+        shutil.rmtree(self.data_dir)
+
+    # test the fast training speed is as expected
+    @TimedCall(seconds=100, daemon=False, force_quit=False)
+    def test_train_timing(self):
+        images = sorted(glob(os.path.join(self.data_dir, "img*.nii.gz")))
+        segs = sorted(glob(os.path.join(self.data_dir, "seg*.nii.gz")))
+        train_files = [{"image": img, "label": seg} for img, seg in zip(images[:32], segs[:32])]
+        val_files = [{"image": img, "label": seg} for img, seg in zip(images[-9:], segs[-9:])]
+
+        device = torch.device("cuda:0")
+        # define transforms for train and validation
+        train_transforms = Compose(
+            [
+                LoadImaged(keys=["image", "label"]),
+                EnsureChannelFirstd(keys=["image", "label"]),
+                Spacingd(keys=["image", "label"], pixdim=(1.0, 1.0, 1.0), mode=("bilinear", "nearest")),
+                ScaleIntensityd(keys="image"),
+                CropForegroundd(keys=["image", "label"], source_key="image"),
+                # pre-compute foreground and background indexes
+                # and cache them to accelerate training
+                FgBgToIndicesd(keys="label", fg_postfix="_fg", bg_postfix="_bg"),
+                # change to execute transforms with Tensor data
+                EnsureTyped(keys=["image", "label"]),
+                # move the data to GPU and cache to avoid CPU -> GPU sync in every epoch
+                ToDeviced(keys=["image", "label"], device=device),
+                # randomly crop out patch samples from big
+                # image based on pos / neg ratio
+                # the image centers of negative samples
+                # must be in valid image area
+                RandCropByPosNegLabeld(
+                    keys=["image", "label"],
+                    label_key="label",
+                    spatial_size=(64, 64, 64),
+                    pos=1,
+                    neg=1,
+                    num_samples=4,
+                    fg_indices_key="label_fg",
+                    bg_indices_key="label_bg",
+                ),
+                RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=[1, 2]),
+                RandAxisFlipd(keys=["image", "label"], prob=0.5),
+                RandRotate90d(keys=["image", "label"], prob=0.5, spatial_axes=(1, 2)),
+                RandZoomd(keys=["image", "label"], prob=0.5, min_zoom=0.8, max_zoom=1.2, keep_size=True),
+                RandRotated(
+                    keys=["image", "label"],
+                    prob=0.5,
+                    range_x=np.pi / 4,
+                    mode=("bilinear", "nearest"),
+                    align_corners=True,
+                ),
+                RandAffined(keys=["image", "label"], prob=0.5, rotate_range=np.pi / 2, mode=("bilinear", "nearest")),
+                RandGaussianNoised(keys="image", prob=0.5),
+                RandStdShiftIntensityd(keys="image", prob=0.5, factors=0.05, nonzero=True),
+            ]
+        )
+
+        val_transforms = Compose(
+            [
+                LoadImaged(keys=["image", "label"]),
+                EnsureChannelFirstd(keys=["image", "label"]),
+                Spacingd(keys=["image", "label"], pixdim=(1.0, 1.0, 1.0), mode=("bilinear", "nearest")),
+                ScaleIntensityd(keys="image"),
+                CropForegroundd(keys=["image", "label"], source_key="image"),
+                EnsureTyped(keys=["image", "label"]),
+                # move the data to GPU and cache to avoid CPU -> GPU sync in every epoch
+                ToDeviced(keys=["image", "label"], device=device),
+            ]
+        )
+
+        max_epochs = 5
+        learning_rate = 2e-4
+        val_interval = 1  # do validation for every epoch
+
+        # set CacheDataset, ThreadDataLoader and DiceCE loss for MONAI fast training
+        train_ds = CacheDataset(data=train_files, transform=train_transforms, cache_rate=1.0, num_workers=8)
+        val_ds = CacheDataset(data=val_files, transform=val_transforms, cache_rate=1.0, num_workers=5)
+        # disable multi-workers because `ThreadDataLoader` works with multi-threads
+        train_loader = ThreadDataLoader(train_ds, num_workers=0, batch_size=4, shuffle=True)
+        val_loader = ThreadDataLoader(val_ds, num_workers=0, batch_size=1)
+
+        loss_function = DiceCELoss(to_onehot_y=True, softmax=True, squared_pred=True, batch=True)
+        model = UNet(
+            spatial_dims=3,
+            in_channels=1,
+            out_channels=2,
+            channels=(16, 32, 64, 128, 256),
+            strides=(2, 2, 2, 2),
+            num_res_units=2,
+            norm=Norm.BATCH,
+        ).to(device)
+
+        # Novograd paper suggests to use a bigger LR than Adam,
+        # because Adam does normalization by element-wise second moments
+        optimizer = Novograd(model.parameters(), learning_rate * 10)
+        scaler = torch.cuda.amp.GradScaler()
+
+        post_pred = Compose([EnsureType(), AsDiscrete(argmax=True, to_onehot=True, num_classes=2)])
+        post_label = Compose([EnsureType(), AsDiscrete(to_onehot=True, num_classes=2)])
+
+        dice_metric = DiceMetric(include_background=True, reduction="mean", get_not_nans=False)
+
+        best_metric = -1
+        total_start = time.time()
+        for epoch in range(max_epochs):
+            epoch_start = time.time()
+            print("-" * 10)
+            print(f"epoch {epoch + 1}/{max_epochs}")
+            model.train()
+            epoch_loss = 0
+            step = 0
+            for batch_data in train_loader:
+                step_start = time.time()
+                step += 1
+                optimizer.zero_grad()
+                # set AMP for training
+                with torch.cuda.amp.autocast():
+                    outputs = model(batch_data["image"])
+                    loss = loss_function(outputs, batch_data["label"])
+                scaler.scale(loss).backward()
+                scaler.step(optimizer)
+                scaler.update()
+                epoch_loss += loss.item()
+                epoch_len = math.ceil(len(train_ds) / train_loader.batch_size)
+                print(
+                    f"{step}/{epoch_len}, train_loss: {loss.item():.4f}" f" step time: {(time.time() - step_start):.4f}"
+                )
+            epoch_loss /= step
+            print(f"epoch {epoch + 1} average loss: {epoch_loss:.4f}")
+
+            if (epoch + 1) % val_interval == 0:
+                model.eval()
+                with torch.no_grad():
+                    for val_data in val_loader:
+                        roi_size = (96, 96, 96)
+                        sw_batch_size = 4
+                        # set AMP for validation
+                        with torch.cuda.amp.autocast():
+                            val_outputs = sliding_window_inference(val_data["image"], roi_size, sw_batch_size, model)
+
+                        val_outputs = [post_pred(i) for i in decollate_batch(val_outputs)]
+                        val_labels = [post_label(i) for i in decollate_batch(val_data["label"])]
+                        dice_metric(y_pred=val_outputs, y=val_labels)
+
+                    metric = dice_metric.aggregate().item()
+                    dice_metric.reset()
+                    if metric > best_metric:
+                        best_metric = metric
+                    print(f"epoch: {epoch + 1} current mean dice: {metric:.4f}, best mean dice: {best_metric:.4f}")
+            print(f"time consuming of epoch {epoch + 1} is: {(time.time() - epoch_start):.4f}")
+
+        total_time = time.time() - total_start
+        print(f"train completed, best_metric: {best_metric:.4f} total time: {total_time:.4f}")
+        # test expected metrics
+        self.assertGreater(best_metric, 0.95)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
index d5eb69f7af..215a5b3f9a 100644
--- a/tests/test_integration_segmentation_3d.py
+++ b/tests/test_integration_segmentation_3d.py
@@ -100,7 +100,7 @@ def run_training_test(root_dir, device="cuda:0", cachedataset=0, readers=(None,
 
     # create UNet, DiceLoss and Adam optimizer
     model = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
@@ -199,7 +199,7 @@ def run_inference_test(root_dir, device="cuda:0"):
     dice_metric = DiceMetric(include_background=True, reduction="mean", get_not_nans=False)
 
     model = UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
diff --git a/tests/test_integration_sliding_window.py b/tests/test_integration_sliding_window.py
index b63f331ba6..0522bf080e 100644
--- a/tests/test_integration_sliding_window.py
+++ b/tests/test_integration_sliding_window.py
@@ -34,7 +34,7 @@ def run_test(batch_size, img_name, seg_name, output_dir, device="cuda:0"):
     loader = DataLoader(ds, batch_size=1, pin_memory=torch.cuda.is_available())
 
     net = UNet(
-        dimensions=3, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+        spatial_dims=3, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
     ).to(device)
     roi_size = (16, 32, 48)
     sw_batch_size = batch_size
diff --git a/tests/test_integration_stn.py b/tests/test_integration_stn.py
index c1fcfe7a89..ae00892159 100644
--- a/tests/test_integration_stn.py
+++ b/tests/test_integration_stn.py
@@ -9,7 +9,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from __future__ import print_function
 
 import unittest
 
@@ -104,7 +103,7 @@ def setUp(self):
     def tearDown(self):
         set_determinism(seed=None)
 
-    @TimedCall(seconds=60)
+    @TimedCall(seconds=100)
     def test_training(self):
         """
         check that the quality AffineTransform backpropagation
diff --git a/tests/test_integration_unet_2d.py b/tests/test_integration_unet_2d.py
index a46a174dc9..88e6d7e795 100644
--- a/tests/test_integration_unet_2d.py
+++ b/tests/test_integration_unet_2d.py
@@ -32,10 +32,10 @@ def __len__(self):
             return train_steps
 
     if net_name == "basicunet":
-        net = BasicUNet(dimensions=2, in_channels=1, out_channels=1, features=(4, 8, 8, 16, 16, 32))
+        net = BasicUNet(spatial_dims=2, in_channels=1, out_channels=1, features=(4, 8, 8, 16, 16, 32))
     elif net_name == "unet":
         net = UNet(
-            dimensions=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+            spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
         )
     net.to(device)
 
diff --git a/tests/test_integration_workflows.py b/tests/test_integration_workflows.py
index 7fcc0b4064..0ffef2935b 100644
--- a/tests/test_integration_workflows.py
+++ b/tests/test_integration_workflows.py
@@ -98,7 +98,7 @@ def run_training_test(root_dir, device="cuda:0", amp=False, num_workers=4):
 
     # create UNet, DiceLoss and Adam optimizer
     net = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
@@ -230,7 +230,7 @@ def run_inference_test(root_dir, model_file, device="cuda:0", amp=False, num_wor
 
     # create UNet, DiceLoss and Adam optimizer
     net = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
@@ -245,12 +245,7 @@ def run_inference_test(root_dir, model_file, device="cuda:0", amp=False, num_wor
             AsDiscreted(keys="pred", threshold_values=True),
             KeepLargestConnectedComponentd(keys="pred", applied_labels=[1]),
             # test the case that `pred` in `engine.state.output`, while `image_meta_dict` in `engine.state.batch`
-            SaveImaged(
-                keys="pred",
-                meta_keys="image_meta_dict",
-                output_dir=root_dir,
-                output_postfix="seg_transform",
-            ),
+            SaveImaged(keys="pred", meta_keys="image_meta_dict", output_dir=root_dir, output_postfix="seg_transform"),
         ]
     )
     val_handlers = [
@@ -357,11 +352,7 @@ def test_training(self):
             repeated.append(results)
         np.testing.assert_allclose(repeated[0], repeated[1])
 
-    @TimedCall(
-        seconds=300,
-        skip_timing=not torch.cuda.is_available(),
-        daemon=False,
-    )
+    @TimedCall(seconds=300, skip_timing=not torch.cuda.is_available(), daemon=False)
     def test_timing(self):
         if monai.utils.module.get_torch_version_tuple() >= (1, 6):
             self.train_and_infer(idx=2)
diff --git a/tests/test_integration_workflows_gan.py b/tests/test_integration_workflows_gan.py
index c54e8b01f2..d4b7d99b62 100644
--- a/tests/test_integration_workflows_gan.py
+++ b/tests/test_integration_workflows_gan.py
@@ -90,8 +90,7 @@ def generator_loss(gen_images):
 
     train_handlers = [
         StatsHandler(
-            name="training_loss",
-            output_transform=lambda x: {Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS]},
+            name="training_loss", output_transform=lambda x: {Keys.GLOSS: x[Keys.GLOSS], Keys.DLOSS: x[Keys.DLOSS]}
         ),
         TensorBoardStatsHandler(
             log_dir=root_dir,
diff --git a/tests/test_intensity_stats.py b/tests/test_intensity_stats.py
index 059271e442..2647efd7c2 100644
--- a/tests/test_intensity_stats.py
+++ b/tests/test_intensity_stats.py
@@ -23,15 +23,10 @@
     {"orig_max": 3.0, "orig_mean": 1.5},
 ]
 
-TEST_CASE_2 = [
-    {"ops": "std", "key_prefix": "orig"},
-    np.array([[[0.0, 1.0], [2.0, 3.0]]]),
-    None,
-    {"orig_std": 1.118034},
-]
+TEST_CASE_2 = [{"ops": "std", "key_prefix": "orig"}, np.array([[[0.0, 1.0], [2.0, 3.0]]]), None, {"orig_std": 1.118034}]
 
 TEST_CASE_3 = [
-    {"ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     np.array([[[0.0, 1.0], [2.0, 3.0]]]),
     None,
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},
diff --git a/tests/test_intensity_statsd.py b/tests/test_intensity_statsd.py
index 8c8bc8795a..596c80deb5 100644
--- a/tests/test_intensity_statsd.py
+++ b/tests/test_intensity_statsd.py
@@ -34,7 +34,7 @@
 ]
 
 TEST_CASE_3 = [
-    {"keys": "img", "ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"keys": "img", "ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     {"img": np.array([[[0.0, 1.0], [2.0, 3.0]]])},
     "img_meta_dict",
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},
diff --git a/tests/test_inverse.py b/tests/test_inverse.py
index f2470d47fd..d547fe7595 100644
--- a/tests/test_inverse.py
+++ b/tests/test_inverse.py
@@ -122,23 +122,9 @@
     )
 )
 
-TESTS.append(
-    (
-        "SpatialPadd 3d",
-        "3D",
-        0,
-        SpatialPadd(KEYS, spatial_size=[112, 113, 116]),
-    )
-)
+TESTS.append(("SpatialPadd 3d", "3D", 0, SpatialPadd(KEYS, spatial_size=[112, 113, 116])))
 
-TESTS.append(
-    (
-        "SpatialCropd 2d",
-        "2D",
-        0,
-        SpatialCropd(KEYS, [49, 51], [90, 89]),
-    )
-)
+TESTS.append(("SpatialCropd 2d", "2D", 0, SpatialCropd(KEYS, [49, 51], [90, 89])))
 
 TESTS.append(
     (
@@ -149,91 +135,28 @@
     )
 )
 
-TESTS.append(
-    (
-        "SpatialCropd 2d",
-        "2D",
-        0,
-        SpatialCropd(KEYS, [49, 51], [390, 89]),
-    )
-)
+TESTS.append(("SpatialCropd 2d", "2D", 0, SpatialCropd(KEYS, [49, 51], [390, 89])))
 
-TESTS.append(
-    (
-        "SpatialCropd 3d",
-        "3D",
-        0,
-        SpatialCropd(KEYS, [49, 51, 44], [90, 89, 93]),
-    )
-)
+TESTS.append(("SpatialCropd 3d", "3D", 0, SpatialCropd(KEYS, [49, 51, 44], [90, 89, 93])))
 
 TESTS.append(("RandSpatialCropd 2d", "2D", 0, RandSpatialCropd(KEYS, [96, 93], None, True, False)))
 
 TESTS.append(("RandSpatialCropd 3d", "3D", 0, RandSpatialCropd(KEYS, [96, 93, 92], None, False, False)))
 
-TESTS.append(
-    (
-        "BorderPadd 2d",
-        "2D",
-        0,
-        BorderPadd(KEYS, [3, 7, 2, 5]),
-    )
-)
+TESTS.append(("BorderPadd 2d", "2D", 0, BorderPadd(KEYS, [3, 7, 2, 5])))
 
-TESTS.append(
-    (
-        "BorderPadd 2d",
-        "2D",
-        0,
-        BorderPadd(KEYS, [3, 7]),
-    )
-)
+TESTS.append(("BorderPadd 2d", "2D", 0, BorderPadd(KEYS, [3, 7])))
 
-TESTS.append(
-    (
-        "BorderPadd 3d",
-        "3D",
-        0,
-        BorderPadd(KEYS, [4]),
-    )
-)
+TESTS.append(("BorderPadd 3d", "3D", 0, BorderPadd(KEYS, [4])))
 
-TESTS.append(
-    (
-        "DivisiblePadd 2d",
-        "2D",
-        0,
-        DivisiblePadd(KEYS, k=4),
-    )
-)
+TESTS.append(("DivisiblePadd 2d", "2D", 0, DivisiblePadd(KEYS, k=4)))
 
-TESTS.append(
-    (
-        "DivisiblePadd 3d",
-        "3D",
-        0,
-        DivisiblePadd(KEYS, k=[4, 8, 11]),
-    )
-)
+TESTS.append(("DivisiblePadd 3d", "3D", 0, DivisiblePadd(KEYS, k=[4, 8, 11])))
 
 
-TESTS.append(
-    (
-        "CenterSpatialCropd 2d",
-        "2D",
-        0,
-        CenterSpatialCropd(KEYS, roi_size=95),
-    )
-)
+TESTS.append(("CenterSpatialCropd 2d", "2D", 0, CenterSpatialCropd(KEYS, roi_size=95)))
 
-TESTS.append(
-    (
-        "CenterSpatialCropd 3d",
-        "3D",
-        0,
-        CenterSpatialCropd(KEYS, roi_size=[95, 97, 98]),
-    )
-)
+TESTS.append(("CenterSpatialCropd 3d", "3D", 0, CenterSpatialCropd(KEYS, roi_size=[95, 97, 98])))
 
 TESTS.append(("CropForegroundd 2d", "2D", 0, CropForegroundd(KEYS, source_key="label", margin=2)))
 
@@ -242,69 +165,20 @@
 
 TESTS.append(("ResizeWithPadOrCropd 3d", "3D", 0, ResizeWithPadOrCropd(KEYS, [201, 150, 105])))
 
-TESTS.append(
-    (
-        "Flipd 3d",
-        "3D",
-        0,
-        Flipd(KEYS, [1, 2]),
-    )
-)
+TESTS.append(("Flipd 3d", "3D", 0, Flipd(KEYS, [1, 2])))
 
-TESTS.append(
-    (
-        "RandFlipd 3d",
-        "3D",
-        0,
-        RandFlipd(KEYS, 1, [1, 2]),
-    )
-)
+TESTS.append(("RandFlipd 3d", "3D", 0, RandFlipd(KEYS, 1, [1, 2])))
 
-TESTS.append(
-    (
-        "RandAxisFlipd 3d",
-        "3D",
-        0,
-        RandAxisFlipd(KEYS, 1),
-    )
-)
+TESTS.append(("RandAxisFlipd 3d", "3D", 0, RandAxisFlipd(KEYS, 1)))
 
 for acc in [True, False]:
-    TESTS.append(
-        (
-            "Orientationd 3d",
-            "3D",
-            0,
-            Orientationd(KEYS, "RAS", as_closest_canonical=acc),
-        )
-    )
+    TESTS.append(("Orientationd 3d", "3D", 0, Orientationd(KEYS, "RAS", as_closest_canonical=acc)))
 
-TESTS.append(
-    (
-        "Rotate90d 2d",
-        "2D",
-        0,
-        Rotate90d(KEYS),
-    )
-)
+TESTS.append(("Rotate90d 2d", "2D", 0, Rotate90d(KEYS)))
 
-TESTS.append(
-    (
-        "Rotate90d 3d",
-        "3D",
-        0,
-        Rotate90d(KEYS, k=2, spatial_axes=(1, 2)),
-    )
-)
+TESTS.append(("Rotate90d 3d", "3D", 0, Rotate90d(KEYS, k=2, spatial_axes=(1, 2))))
 
-TESTS.append(
-    (
-        "RandRotate90d 3d",
-        "3D",
-        0,
-        RandRotate90d(KEYS, prob=1, spatial_axes=(1, 2)),
-    )
-)
+TESTS.append(("RandRotate90d 3d", "3D", 0, RandRotate90d(KEYS, prob=1, spatial_axes=(1, 2))))
 
 TESTS.append(("Spacingd 3d", "3D", 3e-2, Spacingd(KEYS, [0.5, 0.7, 0.9], diagonal=False)))
 
@@ -327,51 +201,18 @@
     )
 )
 
-TESTS.append(
-    (
-        "Zoomd 1d",
-        "1D odd",
-        0,
-        Zoomd(KEYS, zoom=2, keep_size=False),
-    )
-)
+TESTS.append(("Zoomd 1d", "1D odd", 0, Zoomd(KEYS, zoom=2, keep_size=False)))
 
-TESTS.append(
-    (
-        "Zoomd 2d",
-        "2D",
-        2e-1,
-        Zoomd(KEYS, zoom=0.9),
-    )
-)
+TESTS.append(("Zoomd 2d", "2D", 2e-1, Zoomd(KEYS, zoom=0.9)))
 
-TESTS.append(
-    (
-        "Zoomd 3d",
-        "3D",
-        3e-2,
-        Zoomd(KEYS, zoom=[2.5, 1, 3], keep_size=False),
-    )
-)
+TESTS.append(("Zoomd 3d", "3D", 3e-2, Zoomd(KEYS, zoom=[2.5, 1, 3], keep_size=False)))
 
 TESTS.append(("RandZoom 3d", "3D", 9e-2, RandZoomd(KEYS, 1, [0.5, 0.6, 0.9], [1.1, 1, 1.05], keep_size=True)))
 
-TESTS.append(
-    (
-        "RandRotated, prob 0",
-        "2D",
-        0,
-        RandRotated(KEYS, prob=0),
-    )
-)
+TESTS.append(("RandRotated, prob 0", "2D", 0, RandRotated(KEYS, prob=0)))
 
 TESTS.append(
-    (
-        "Rotated 2d",
-        "2D",
-        8e-2,
-        Rotated(KEYS, random.uniform(np.pi / 6, np.pi), keep_size=True, align_corners=False),
-    )
+    ("Rotated 2d", "2D", 8e-2, Rotated(KEYS, random.uniform(np.pi / 6, np.pi), keep_size=True, align_corners=False))
 )
 
 TESTS.append(
@@ -392,23 +233,9 @@
     )
 )
 
-TESTS.append(
-    (
-        "Transposed 2d",
-        "2D",
-        0,
-        Transposed(KEYS, [0, 2, 1]),  # channel=0
-    )
-)
+TESTS.append(("Transposed 2d", "2D", 0, Transposed(KEYS, [0, 2, 1])))  # channel=0
 
-TESTS.append(
-    (
-        "Transposed 3d",
-        "3D",
-        0,
-        Transposed(KEYS, [0, 3, 1, 2]),  # channel=0
-    )
-)
+TESTS.append(("Transposed 3d", "3D", 0, Transposed(KEYS, [0, 3, 1, 2])))  # channel=0
 
 TESTS.append(
     (
@@ -444,14 +271,7 @@
     )
 )
 
-TESTS.append(
-    (
-        "RandAffine 3d",
-        "3D",
-        0,
-        RandAffined(KEYS, spatial_size=None, prob=0),
-    )
-)
+TESTS.append(("RandAffine 3d", "3D", 0, RandAffined(KEYS, spatial_size=None, prob=0)))
 
 TESTS.append(
     (
@@ -462,32 +282,11 @@
     )
 )
 
-TESTS.append(
-    (
-        "RandCropByPosNegLabeld 2d",
-        "2D",
-        1e-7,
-        RandCropByPosNegLabeld(KEYS, "label", (99, 96), num_samples=10),
-    )
-)
+TESTS.append(("RandCropByPosNegLabeld 2d", "2D", 1e-7, RandCropByPosNegLabeld(KEYS, "label", (99, 96), num_samples=10)))
 
-TESTS.append(
-    (
-        "RandSpatialCropSamplesd 2d",
-        "2D",
-        1e-7,
-        RandSpatialCropSamplesd(KEYS, (90, 91), num_samples=10),
-    )
-)
+TESTS.append(("RandSpatialCropSamplesd 2d", "2D", 1e-7, RandSpatialCropSamplesd(KEYS, (90, 91), num_samples=10)))
 
-TESTS.append(
-    (
-        "RandWeightedCropd 2d",
-        "2D",
-        1e-7,
-        RandWeightedCropd(KEYS, "label", (90, 91), num_samples=10),
-    )
-)
+TESTS.append(("RandWeightedCropd 2d", "2D", 1e-7, RandWeightedCropd(KEYS, "label", (90, 91), num_samples=10)))
 
 TESTS_COMPOSE_X2 = [(t[0] + " Compose", t[1], t[2], Compose(Compose(t[3:]))) for t in TESTS]
 
@@ -566,8 +365,8 @@ def setUp(self):
                 "other": np.array(im_1d, copy=True),
             }
 
-        im_2d_fname, seg_2d_fname = [make_nifti_image(i) for i in create_test_image_2d(101, 100)]
-        im_3d_fname, seg_3d_fname = [make_nifti_image(i, affine) for i in create_test_image_3d(100, 101, 107)]
+        im_2d_fname, seg_2d_fname = (make_nifti_image(i) for i in create_test_image_2d(101, 100))
+        im_3d_fname, seg_3d_fname = (make_nifti_image(i, affine) for i in create_test_image_3d(100, 101, 107))
 
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.all_data["2D"] = load_ims({"image": im_2d_fname, "label": seg_2d_fname})
@@ -652,26 +451,14 @@ def test_inverse_inferred_seg(self, extra_transform):
         batch_size = 10
         # num workers = 0 for mac
         num_workers = 2 if sys.platform != "darwin" else 0
-        transforms = Compose(
-            [
-                AddChanneld(KEYS),
-                SpatialPadd(KEYS, (150, 153)),
-                extra_transform,
-            ]
-        )
+        transforms = Compose([AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform])
         num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform))
 
         dataset = CacheDataset(test_data, transform=transforms, progress=False)
         loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
 
         device = "cuda" if torch.cuda.is_available() else "cpu"
-        model = UNet(
-            dimensions=2,
-            in_channels=1,
-            out_channels=1,
-            channels=(2, 4),
-            strides=(2,),
-        ).to(device)
+        model = UNet(spatial_dims=2, in_channels=1, out_channels=1, channels=(2, 4), strides=(2,)).to(device)
 
         data = first(loader)
         self.assertEqual(len(data["label_transforms"]), num_invertible_transforms)
diff --git a/tests/test_inverse_collation.py b/tests/test_inverse_collation.py
index c302e04017..d04360a95d 100644
--- a/tests/test_inverse_collation.py
+++ b/tests/test_inverse_collation.py
@@ -48,15 +48,11 @@
     for t in [
         RandFlipd(keys=KEYS, prob=0.5, spatial_axis=[1, 2]),
         RandAxisFlipd(keys=KEYS, prob=0.5),
-        RandRotate90d(keys=KEYS, spatial_axes=(1, 2)),
+        Compose([RandRotate90d(keys=KEYS, spatial_axes=(1, 2)), ToTensord(keys=KEYS)]),
         RandZoomd(keys=KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
         RandRotated(keys=KEYS, prob=0.5, range_x=np.pi),
         RandAffined(
-            keys=KEYS,
-            prob=0.5,
-            rotate_range=np.pi,
-            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
-            as_tensor_output=False,
+            keys=KEYS, prob=0.5, rotate_range=np.pi, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
         ),
     ]
 ]
@@ -67,15 +63,11 @@
     for t in [
         RandFlipd(keys=KEYS, prob=0.5, spatial_axis=[1]),
         RandAxisFlipd(keys=KEYS, prob=0.5),
-        RandRotate90d(keys=KEYS, prob=0.5, spatial_axes=(0, 1)),
+        Compose([RandRotate90d(keys=KEYS, prob=0.5, spatial_axes=(0, 1)), ToTensord(keys=KEYS)]),
         RandZoomd(keys=KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
         RandRotated(keys=KEYS, prob=0.5, range_x=np.pi),
         RandAffined(
-            keys=KEYS,
-            prob=0.5,
-            rotate_range=np.pi,
-            device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
-            as_tensor_output=False,
+            keys=KEYS, prob=0.5, rotate_range=np.pi, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
         ),
     ]
 ]
@@ -91,12 +83,12 @@ def setUp(self):
         set_determinism(seed=0)
 
         b_size = 11
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_3d(101, 100, 107))
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.data_3d = [load_ims({"image": im_fname, "label": seg_fname}) for _ in range(b_size)]
 
         b_size = 8
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_2d(62, 37, rad_max=10)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_2d(62, 37, rad_max=10))
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.data_2d = [load_ims({"image": im_fname, "label": seg_fname}) for _ in range(b_size)]
 
@@ -107,17 +99,14 @@ def tearDown(self):
 
     @parameterized.expand(TESTS_2D + TESTS_3D)
     def test_collation(self, _, transform, collate_fn, ndim):
-        if ndim == 3:
-            data = self.data_3d
-        else:
-            data = self.data_2d
+        data = self.data_3d if ndim == 3 else self.data_2d
         if collate_fn:
             modified_transform = transform
         else:
             modified_transform = Compose([transform, ResizeWithPadOrCropd(KEYS, 100), ToTensord(KEYS)])
 
         # num workers = 0 for mac or gpu transforms
-        num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
+        num_workers = 0 if sys.platform != "linux" or torch.cuda.is_available() else 2
 
         dataset = CacheDataset(data, transform=modified_transform, progress=False)
         loader = DataLoader(dataset, num_workers, batch_size=self.batch_size, collate_fn=collate_fn)
diff --git a/tests/test_invertd.py b/tests/test_invertd.py
index 5b98653f0a..a04064c315 100644
--- a/tests/test_invertd.py
+++ b/tests/test_invertd.py
@@ -34,8 +34,9 @@
     ResizeWithPadOrCropd,
     ScaleIntensityd,
     Spacingd,
+    ToTensord,
 )
-from monai.utils.misc import set_determinism
+from monai.utils import set_determinism
 from tests.utils import make_nifti_image
 
 KEYS = ["image", "label"]
@@ -44,7 +45,7 @@
 class TestInvertd(unittest.TestCase):
     def test_invert(self):
         set_determinism(seed=0)
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100))
         transform = Compose(
             [
                 LoadImaged(KEYS),
@@ -63,47 +64,107 @@ def test_invert(self):
                 CopyItemsd("image_meta_dict", times=1, names="test_dict"),
                 # test to support Tensor, Numpy array and dictionary when inverting
                 EnsureTyped(keys=["image", "test_dict"]),
+                ToTensord("image"),
                 CastToTyped(KEYS, dtype=[torch.uint8, np.uint8]),
-                CopyItemsd("label", times=1, names="label_inverted"),
+                CopyItemsd("label", times=2, names=["label_inverted", "label_inverted1"]),
+                CopyItemsd("image", times=2, names=["image_inverted", "image_inverted1"]),
             ]
         )
         data = [{"image": im_fname, "label": seg_fname} for _ in range(12)]
 
         # num workers = 0 for mac or gpu transforms
-        num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
+        num_workers = 0 if sys.platform != "linux" or torch.cuda.is_available() else 2
 
         dataset = CacheDataset(data, transform=transform, progress=False)
         loader = DataLoader(dataset, num_workers=num_workers, batch_size=5)
         inverter = Invertd(
             # `image` was not copied, invert the original value directly
-            keys=["image", "label_inverted", "test_dict"],
+            keys=["image_inverted", "label_inverted", "test_dict"],
             transform=transform,
             orig_keys=["label", "label", "test_dict"],
-            meta_keys=["image_meta_dict", "label_inverted_meta_dict", None],
+            meta_keys=["image_inverted_meta_dict", "label_inverted_meta_dict", None],
             orig_meta_keys=["label_meta_dict", "label_meta_dict", None],
             nearest_interp=True,
             to_tensor=[True, False, False],
             device="cpu",
         )
 
+        inverter_1 = Invertd(
+            # `image` was not copied, invert the original value directly
+            keys=["image_inverted1", "label_inverted1"],
+            transform=transform,
+            orig_keys=["image", "image"],
+            meta_keys=["image_inverted1_meta_dict", "label_inverted1_meta_dict"],
+            orig_meta_keys=["image_meta_dict", "image_meta_dict"],
+            nearest_interp=[True, False],
+            to_tensor=[True, True],
+            device="cpu",
+        )
+
+        expected_keys = [
+            "image",
+            "image_inverted",
+            "image_inverted1",
+            "image_inverted1_meta_dict",
+            "image_inverted_meta_dict",
+            "image_meta_dict",
+            "image_transforms",
+            "label",
+            "label_inverted",
+            "label_inverted1",
+            "label_inverted1_meta_dict",
+            "label_inverted_meta_dict",
+            "label_meta_dict",
+            "label_transforms",
+            "test_dict",
+            "test_dict_transforms",
+        ]
         # execute 1 epoch
         for d in loader:
             d = decollate_batch(d)
             for item in d:
                 item = inverter(item)
-                # this unit test only covers basic function, test_handler_transform_inverter covers more
+                item = inverter_1(item)
+
+                self.assertListEqual(sorted(item), expected_keys)
+                self.assertTupleEqual(item["image"].shape[1:], (100, 100, 100))
                 self.assertTupleEqual(item["label"].shape[1:], (100, 100, 100))
-                # check the nearest inerpolation mode
-                i = item["image"]
+                # check the nearest interpolation mode
+                i = item["image_inverted"]
                 torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
                 self.assertTupleEqual(i.shape[1:], (100, 101, 107))
                 i = item["label_inverted"]
-                np.testing.assert_allclose(i.astype(np.uint8).astype(np.float32), i.astype(np.float32))
+                torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
                 self.assertTupleEqual(i.shape[1:], (100, 101, 107))
                 # test inverted test_dict
                 self.assertTrue(isinstance(item["test_dict"]["affine"], np.ndarray))
                 self.assertTrue(isinstance(item["test_dict"]["filename_or_obj"], str))
 
+                # check the case that different items use different interpolation mode to invert transforms
+                d = item["image_inverted1"]
+                # if the interpolation mode is nearest, accumulated diff should be smaller than 1
+                self.assertLess(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 1.0)
+                self.assertTupleEqual(d.shape, (1, 100, 101, 107))
+
+                d = item["label_inverted1"]
+                # if the interpolation mode is not nearest, accumulated diff should be greater than 10000
+                self.assertGreater(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 10000.0)
+                self.assertTupleEqual(d.shape, (1, 100, 101, 107))
+
+        # check labels match
+        reverted = item["label_inverted"].detach().cpu().numpy().astype(np.int32)
+        original = LoadImaged(KEYS)(data[-1])["label"]
+        n_good = np.sum(np.isclose(reverted, original, atol=1e-3))
+        reverted_name = item["label_inverted_meta_dict"]["filename_or_obj"]
+        original_name = data[-1]["label"]
+        self.assertEqual(reverted_name, original_name)
+        print("invert diff", reverted.size - n_good)
+        # 25300: 2 workers (cpu, non-macos)
+        # 1812: 0 workers (gpu or macos)
+        # 1824: torch 1.5.1
+        # 1821: windows torch 1.10.0
+        self.assertTrue((reverted.size - n_good) in (34007, 1812, 1824, 1821), f"diff.  {reverted.size - n_good}")
+
         set_determinism(seed=None)
 
 
diff --git a/tests/test_is_supported_format.py b/tests/test_is_supported_format.py
index c0af8f4395..0008712f96 100644
--- a/tests/test_is_supported_format.py
+++ b/tests/test_is_supported_format.py
@@ -15,35 +15,17 @@
 
 from monai.data import is_supported_format
 
-TEST_CASE_1 = [
-    {"filename": "testfile.nii.gz", "suffixes": ["nii", "nii.gz"]},
-    True,
-]
-
-TEST_CASE_2 = [
-    {"filename": "./testfile.nii.gz", "suffixes": ["nii", "nii.gz"]},
-    True,
-]
-
-TEST_CASE_3 = [
-    {"filename": "./test.data/file.nii.gz", "suffixes": ["nii", "nii.gz"]},
-    True,
-]
-
-TEST_CASE_4 = [
-    {"filename": "./test.data/file.nii", "suffixes": ["nii", "nii.gz"]},
-    True,
-]
-
-TEST_CASE_5 = [
-    {"filename": "C:\\documents\\testfile.nii.gz", "suffixes": ["nii", "nii.gz"]},
-    True,
-]
-
-TEST_CASE_6 = [
-    {"filename": "1.3.12.2.1107.5.4.4.145.nii.gz", "suffixes": ["nii.gz"]},
-    True,
-]
+TEST_CASE_1 = [{"filename": "testfile.nii.gz", "suffixes": ["nii", "nii.gz"]}, True]
+
+TEST_CASE_2 = [{"filename": "./testfile.nii.gz", "suffixes": ["nii", "nii.gz"]}, True]
+
+TEST_CASE_3 = [{"filename": "./test.data/file.nii.gz", "suffixes": ["nii", "nii.gz"]}, True]
+
+TEST_CASE_4 = [{"filename": "./test.data/file.nii", "suffixes": ["nii", "nii.gz"]}, True]
+
+TEST_CASE_5 = [{"filename": "C:\\documents\\testfile.nii.gz", "suffixes": ["nii", "nii.gz"]}, True]
+
+TEST_CASE_6 = [{"filename": "1.3.12.2.1107.5.4.4.145.nii.gz", "suffixes": ["nii.gz"]}, True]
 
 
 class TestIsSupportedFormat(unittest.TestCase):
diff --git a/tests/test_iterable_dataset.py b/tests/test_iterable_dataset.py
index 7b16eaf594..bf6946101e 100644
--- a/tests/test_iterable_dataset.py
+++ b/tests/test_iterable_dataset.py
@@ -38,12 +38,7 @@ def test_shape(self):
                 nib.save(test_image, os.path.join(tempdir, f"test_image{str(i)}.nii.gz"))
                 test_data.append({"image": os.path.join(tempdir, f"test_image{str(i)}.nii.gz")})
 
-            test_transform = Compose(
-                [
-                    LoadImaged(keys="image"),
-                    SimulateDelayd(keys="image", delay_time=1e-7),
-                ]
-            )
+            test_transform = Compose([LoadImaged(keys="image"), SimulateDelayd(keys="image", delay_time=1e-7)])
 
             data_iterator = _Stream(test_data)
             with self.assertRaises(TypeError):  # Dataset doesn't work
diff --git a/tests/test_k_space_spike_noise.py b/tests/test_k_space_spike_noise.py
index bb6d05e676..66763f286f 100644
--- a/tests/test_k_space_spike_noise.py
+++ b/tests/test_k_space_spike_noise.py
@@ -20,17 +20,14 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -40,34 +37,44 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
-        im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        im, _ = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
+        return im_type(im[None])
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
 
-        im = self.get_data(im_shape, as_tensor_input)
+        im = self.get_data(im_shape, im_type)
         loc = [0, int(im.shape[1] / 2), 0] if len(im_shape) == 2 else [0, int(im.shape[1] / 2), 0, 0]
         k_intensity = 10
-        t = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoise(loc, k_intensity)
 
         out1 = t(deepcopy(im))
         out2 = t(deepcopy(im))
 
+        self.assertEqual(type(im), type(out1))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(im.device, out1.device)
+            out1 = out1.cpu()
+            out2 = out2.cpu()
+
         np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_input):
 
         im = self.get_data(im_shape, as_tensor_input)
         loc = [0, int(im.shape[1] / 2), 0] if len(im_shape) == 2 else [0, int(im.shape[1] / 2), 0, 0]
         k_intensity = 10
-        t = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoise(loc, k_intensity)
         out = t(im)
 
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(im.device, out.device)
+            out = out.cpu()
+
         n_dims = len(im_shape)
         out_k = fftshift(fftn(out, axes=tuple(range(-n_dims, 0))), axes=tuple(range(-n_dims, 0)))
         log_mag = np.log(np.absolute(out_k))
diff --git a/tests/test_k_space_spike_noised.py b/tests/test_k_space_spike_noised.py
index 616662b3cd..3fa6a394f3 100644
--- a/tests/test_k_space_spike_noised.py
+++ b/tests/test_k_space_spike_noised.py
@@ -20,19 +20,16 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 KEYS = ["image", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -42,55 +39,69 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [im[None] for im in ims]
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        ims = [im_type(im[None]) for im in ims]
+        return {k: v for k, v in zip(KEYS, ims)}
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out1 = t(deepcopy(data))
         out2 = t(deepcopy(data))
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
             np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_highlighted_kspace_pixel(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out = t(data)
 
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k] = out[k].cpu()
+
             n_dims = len(im_shape)
             out_k = fftshift(fftn(out[k], axes=tuple(range(-n_dims, 0))), axes=tuple(range(-n_dims, 0)))
             log_mag = np.log(np.absolute(out_k))
             np.testing.assert_allclose(k_intensity, log_mag[tuple(loc)], 1e-1)
 
-    @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_dict_matches(self, im_shape, im_type):
+        data = self.get_data(im_shape, im_type)
         # use same image for both dictionary entries to check same trans is applied to them
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out = t(deepcopy(data))
 
+        for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k] = out[k].cpu()
+
         np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
 
 
diff --git a/tests/test_keep_largest_connected_component.py b/tests/test_keep_largest_connected_component.py
index 527d986614..50caa0bb31 100644
--- a/tests/test_keep_largest_connected_component.py
+++ b/tests/test_keep_largest_connected_component.py
@@ -42,7 +42,7 @@
             [0.0, 1.0, 0.0, 0.0, 0.0],
             [1.0, 1.0, 0.0, 0.0, 1.0],
         ],
-    ],
+    ]
 )
 grid_4 = torch.tensor(
     [
@@ -67,7 +67,7 @@
             [0.0, 0.0, 0.0, 1.0, 1.0],
             [0.0, 0.0, 0.0, 0.0, 0.0],
         ],
-    ],
+    ]
 )
 
 
@@ -168,7 +168,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 1.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -199,7 +199,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 1.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -230,7 +230,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -261,7 +261,7 @@
                 [0.0, 0.0, 0.0, 1.0, 1.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -292,7 +292,7 @@
                 [0.0, 0.0, 0.0, 1.0, 1.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
diff --git a/tests/test_keep_largest_connected_componentd.py b/tests/test_keep_largest_connected_componentd.py
index 9478cfb965..96a8154b65 100644
--- a/tests/test_keep_largest_connected_componentd.py
+++ b/tests/test_keep_largest_connected_componentd.py
@@ -42,7 +42,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 1.0],
             ],
-        ],
+        ]
     )
 }
 grid_4 = {
@@ -69,7 +69,7 @@
                 [0.0, 0.0, 0.0, 1.0, 1.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     )
 }
 
@@ -170,7 +170,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 1.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -201,7 +201,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 1.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -232,7 +232,7 @@
                 [0.0, 1.0, 0.0, 0.0, 0.0],
                 [1.0, 1.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -263,7 +263,7 @@
                 [0.0, 0.0, 0.0, 1.0, 1.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
@@ -294,7 +294,7 @@
                 [0.0, 0.0, 0.0, 1.0, 1.0],
                 [0.0, 0.0, 0.0, 0.0, 0.0],
             ],
-        ],
+        ]
     ),
 ]
 
diff --git a/tests/test_label_filter.py b/tests/test_label_filter.py
index c699fb31fd..05f64a5fd9 100644
--- a/tests/test_label_filter.py
+++ b/tests/test_label_filter.py
@@ -18,34 +18,14 @@
 from monai.transforms import LabelFilter
 from tests.utils import assert_allclose, clone
 
-grid_1 = torch.tensor(
-    [
-        [
-            [
-                [1, 2, 3],
-                [4, 5, 6],
-                [7, 8, 9],
-            ]
-        ]
-    ]
-)
+grid_1 = torch.tensor([[[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]])
 
 
 TEST_CASE_0 = [
     "filter_single_label",
     {"applied_labels": 3},
     grid_1,
-    torch.tensor(
-        [
-            [
-                [
-                    [0, 0, 3],
-                    [0, 0, 0],
-                    [0, 0, 0],
-                ]
-            ]
-        ]
-    ),
+    torch.tensor([[[[0, 0, 3], [0, 0, 0], [0, 0, 0]]]]),
 ]
 
 
@@ -53,49 +33,19 @@
     "filter_single_label_list",
     {"applied_labels": [3]},
     grid_1,
-    torch.tensor(
-        [
-            [
-                [
-                    [0, 0, 3],
-                    [0, 0, 0],
-                    [0, 0, 0],
-                ]
-            ]
-        ]
-    ),
+    torch.tensor([[[[0, 0, 3], [0, 0, 0], [0, 0, 0]]]]),
 ]
 
 TEST_CASE_2 = [
     "filter_multi_label",
     {"applied_labels": [3, 5, 8]},
     grid_1,
-    torch.tensor(
-        [
-            [
-                [
-                    [0, 0, 3],
-                    [0, 5, 0],
-                    [0, 8, 0],
-                ]
-            ]
-        ]
-    ),
+    torch.tensor([[[[0, 0, 3], [0, 5, 0], [0, 8, 0]]]]),
 ]
 
-TEST_CASE_3 = [
-    "filter_all",
-    {"applied_labels": [1, 2, 3, 4, 5, 6, 7, 8, 9]},
-    grid_1,
-    grid_1,
-]
+TEST_CASE_3 = ["filter_all", {"applied_labels": [1, 2, 3, 4, 5, 6, 7, 8, 9]}, grid_1, grid_1]
 
-VALID_CASES = [
-    TEST_CASE_0,
-    TEST_CASE_1,
-    TEST_CASE_2,
-    TEST_CASE_3,
-]
+VALID_CASES = [TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]
 
 ITEST_CASE_1 = ["invalid_image_data_type", {"applied_labels": 1}, [[[[1, 1, 1]]]], NotImplementedError]
 
diff --git a/tests/test_label_to_mask.py b/tests/test_label_to_mask.py
index 9caa7252f3..6c8f935fbc 100644
--- a/tests/test_label_to_mask.py
+++ b/tests/test_label_to_mask.py
@@ -64,7 +64,7 @@ def test_value(self, argments, image, expected_data):
         self.assertEqual(type(result), type(image))
         if isinstance(result, torch.Tensor):
             self.assertEqual(result.device, image.device)
-        assert_allclose(result, expected_data)
+        assert_allclose(result, expected_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_label_to_maskd.py b/tests/test_label_to_maskd.py
index b8f0d3c171..b2073e8ac3 100644
--- a/tests/test_label_to_maskd.py
+++ b/tests/test_label_to_maskd.py
@@ -65,7 +65,7 @@ def test_value(self, argments, input_data, expected_data):
         self.assertEqual(type(r), type(i))
         if isinstance(r, torch.Tensor):
             self.assertEqual(r.device, i.device)
-        assert_allclose(r, expected_data)
+        assert_allclose(r, expected_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_lesion_froc.py b/tests/test_lesion_froc.py
index 2454de88fa..4a67c8d0b3 100644
--- a/tests/test_lesion_froc.py
+++ b/tests/test_lesion_froc.py
@@ -19,18 +19,19 @@
 from monai.apps.pathology.metrics import LesionFROC
 from monai.utils import optional_import
 
-_, has_cucim = optional_import("cucim")
+_cucim, has_cucim = optional_import("cucim")
+has_cucim = has_cucim and hasattr(_cucim, "CuImage")
 _, has_skimage = optional_import("skimage.measure")
 _, has_sp = optional_import("scipy.ndimage")
-PILImage, has_pil = optional_import("PIL.Image")
+imwrite, has_tif = optional_import("tifffile", name="imwrite")
 
 
 def save_as_tif(filename, array):
     array = array[::-1, ...]  # Upside-down
-    img = PILImage.fromarray(array)
     if not filename.endswith(".tif"):
         filename += ".tif"
-    img.save(os.path.join("tests", "testing_data", filename))
+    file_path = os.path.join("tests", "testing_data", filename)
+    imwrite(file_path, array, compress="jpeg", tile=(16, 16))
 
 
 def around(val, interval=3):
@@ -301,7 +302,7 @@ class TestEvaluateTumorFROC(unittest.TestCase):
     @skipUnless(has_cucim, "Requires cucim")
     @skipUnless(has_skimage, "Requires skimage")
     @skipUnless(has_sp, "Requires scipy")
-    @skipUnless(has_pil, "Requires PIL")
+    @skipUnless(has_tif, "Requires tifffile")
     def setUp(self):
         prepare_test_data()
 
diff --git a/tests/test_list_to_dict.py b/tests/test_list_to_dict.py
index 2f026f3e29..c366e8f3bd 100644
--- a/tests/test_list_to_dict.py
+++ b/tests/test_list_to_dict.py
@@ -15,25 +15,13 @@
 
 from monai.utils import list_to_dict
 
-TEST_CASE_1 = [
-    ["a=1", "b=2", "c=3", "d=4"],
-    {"a": 1, "b": 2, "c": 3, "d": 4},
-]
+TEST_CASE_1 = [["a=1", "b=2", "c=3", "d=4"], {"a": 1, "b": 2, "c": 3, "d": 4}]
 
-TEST_CASE_2 = [
-    ["a=a", "b=b", "c=c", "d=d"],
-    {"a": "a", "b": "b", "c": "c", "d": "d"},
-]
+TEST_CASE_2 = [["a=a", "b=b", "c=c", "d=d"], {"a": "a", "b": "b", "c": "c", "d": "d"}]
 
-TEST_CASE_3 = [
-    ["a=0.1", "b=0.2", "c=0.3", "d=0.4"],
-    {"a": 0.1, "b": 0.2, "c": 0.3, "d": 0.4},
-]
+TEST_CASE_3 = [["a=0.1", "b=0.2", "c=0.3", "d=0.4"], {"a": 0.1, "b": 0.2, "c": 0.3, "d": 0.4}]
 
-TEST_CASE_4 = [
-    ["a=True", "b=TRUE", "c=false", "d=FALSE"],
-    {"a": True, "b": True, "c": False, "d": False},
-]
+TEST_CASE_4 = [["a=True", "b=TRUE", "c=false", "d=FALSE"], {"a": True, "b": True, "c": False, "d": False}]
 
 TEST_CASE_5 = [
     ["a='1'", "b=2 ", " c = 3", "d='test'", "'e'=0", "f", "g=None"],
diff --git a/tests/test_lltm.py b/tests/test_lltm.py
index f1311379bc..4186c91246 100644
--- a/tests/test_lltm.py
+++ b/tests/test_lltm.py
@@ -15,7 +15,9 @@
 from parameterized import parameterized
 
 from monai.networks.layers import LLTM
-from tests.utils import SkipIfNoModule
+from tests.utils import SkipIfNoModule, is_tf32_env
+
+_rtol = 0.001 if is_tf32_env() else 0.0001
 
 TEST_CASE_1 = [
     {"input_features": 32, "state_size": 2},
@@ -50,8 +52,8 @@ def test_value_cuda(self, input_param, expected_h, expected_c):
         new_h, new_c = lltm(x, (h, c))
         (new_h.sum() + new_c.sum()).backward()
 
-        torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=0.0001, atol=1e-04)
-        torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=0.0001, atol=1e-04)
+        torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=_rtol, atol=0.001)
+        torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=_rtol, atol=0.001)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_load_image.py b/tests/test_load_image.py
index 2aa6eced65..3f78d3892d 100644
--- a/tests/test_load_image.py
+++ b/tests/test_load_image.py
@@ -68,11 +68,7 @@ def get_data(self, _obj):
     (3, 128, 128, 128),
 ]
 
-TEST_CASE_5 = [
-    {"reader": NibabelReader(mmap=False), "image_only": False},
-    ["test_image.nii.gz"],
-    (128, 128, 128),
-]
+TEST_CASE_5 = [{"reader": NibabelReader(mmap=False), "image_only": False}, ["test_image.nii.gz"], (128, 128, 128)]
 
 TEST_CASE_6 = [{"reader": ITKReader(), "image_only": True}, ["test_image.nii.gz"], (128, 128, 128)]
 
diff --git a/tests/test_load_imaged.py b/tests/test_load_imaged.py
index ca5b56a7d9..cfe85c7c9c 100644
--- a/tests/test_load_imaged.py
+++ b/tests/test_load_imaged.py
@@ -81,12 +81,7 @@ class TestConsistency(unittest.TestCase):
     def _cmp(self, filename, shape, ch_shape, reader_1, reader_2, outname, ext):
         data_dict = {"img": filename}
         keys = data_dict.keys()
-        xforms = Compose(
-            [
-                LoadImaged(keys, reader=reader_1),
-                EnsureChannelFirstD(keys),
-            ]
-        )
+        xforms = Compose([LoadImaged(keys, reader=reader_1), EnsureChannelFirstD(keys)])
         img_dict = xforms(data_dict)  # load dicom with itk
         self.assertTupleEqual(img_dict["img"].shape, ch_shape)
         self.assertTupleEqual(tuple(img_dict["img_meta_dict"]["spatial_shape"]), shape)
@@ -97,12 +92,7 @@ def _cmp(self, filename, shape, ch_shape, reader_1, reader_2, outname, ext):
             )
             save_xform(img_dict)  # save to nifti
 
-            new_xforms = Compose(
-                [
-                    LoadImaged(keys, reader=reader_2),
-                    EnsureChannelFirstD(keys),
-                ]
-            )
+            new_xforms = Compose([LoadImaged(keys, reader=reader_2), EnsureChannelFirstD(keys)])
             out = new_xforms({"img": os.path.join(tempdir, outname)})  # load nifti with itk
             self.assertTupleEqual(out["img"].shape, ch_shape)
             self.assertTupleEqual(tuple(out["img_meta_dict"]["spatial_shape"]), shape)
diff --git a/tests/test_lr_scheduler.py b/tests/test_lr_scheduler.py
index aa126f7848..acafc87131 100644
--- a/tests/test_lr_scheduler.py
+++ b/tests/test_lr_scheduler.py
@@ -19,7 +19,7 @@
 
 class SchedulerTestNet(torch.nn.Module):
     def __init__(self):
-        super(SchedulerTestNet, self).__init__()
+        super().__init__()
         self.conv1 = torch.nn.Conv2d(1, 1, 1)
         self.conv2 = torch.nn.Conv2d(1, 1, 1)
 
@@ -28,13 +28,7 @@ def forward(self, x):
 
 
 TEST_CASE_LRSCHEDULER = [
-    [
-        {
-            "warmup_steps": 2,
-            "t_total": 10,
-        },
-        [0.000, 0.500, 1.00, 0.962, 0.854, 0.691, 0.500, 0.309, 0.146, 0.038],
-    ]
+    [{"warmup_steps": 2, "t_total": 10}, [0.000, 0.500, 1.00, 0.962, 0.854, 0.691, 0.500, 0.309, 0.146, 0.038]]
 ]
 
 
@@ -47,11 +41,11 @@ def test_shape(self, input_param, expected_lr):
         self.assertEqual(len([scheduler.get_last_lr()[0]]), 1)
         lrs_1 = []
         for _ in range(input_param["t_total"]):
-            lrs_1.append(float("{:.3f}".format(scheduler.get_last_lr()[0])))
+            lrs_1.append(float(f"{scheduler.get_last_lr()[0]:.3f}"))
             optimizer.step()
             scheduler.step()
         for a, b in zip(lrs_1, expected_lr):
-            self.assertEqual(a, b, msg="LR is wrong ! expected {}, got {}".format(b, a))
+            self.assertEqual(a, b, msg=f"LR is wrong ! expected {b}, got {a}")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_map_binary_to_indices.py b/tests/test_map_binary_to_indices.py
index 1fafa6f446..2d29aa7c0d 100644
--- a/tests/test_map_binary_to_indices.py
+++ b/tests/test_map_binary_to_indices.py
@@ -15,50 +15,58 @@
 from parameterized import parameterized
 
 from monai.transforms import map_binary_to_indices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": None, "image_threshold": 0.0},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
-
-TEST_CASE_2 = [
-    {
-        "label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-        "image": np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
-        "image_threshold": 0.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
-
-TEST_CASE_3 = [
-    {
-        "label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-        "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-        "image_threshold": 1.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
-
-TEST_CASE_4 = [
-    {
-        "label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-        "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-        "image_threshold": 1.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])), "image": None, "image_threshold": 0.0},
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 4, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])),
+                "image": p(np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])),
+                "image_threshold": 0.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])),
+                "image": p(np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])),
+                "image_threshold": 1.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]])),
+                "image": p(np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])),
+                "image_threshold": 1.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
 
 
 class TestMapBinaryToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_data, expected_fg, expected_bg):
         fg_indices, bg_indices = map_binary_to_indices(**input_data)
-        np.testing.assert_allclose(fg_indices, expected_fg)
-        np.testing.assert_allclose(bg_indices, expected_bg)
+        assert_allclose(fg_indices, expected_fg, type_test=False)
+        assert_allclose(bg_indices, expected_bg, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_map_classes_to_indices.py b/tests/test_map_classes_to_indices.py
index 2320954520..ae75b90c16 100644
--- a/tests/test_map_classes_to_indices.py
+++ b/tests/test_map_classes_to_indices.py
@@ -15,86 +15,117 @@
 from parameterized import parameterized
 
 from monai.transforms import map_classes_to_indices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]), "num_classes": 3, "image": None, "image_threshold": 0.0},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            # test Argmax data
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 3,
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {
-        "label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-        "num_classes": 3,
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-        "image_threshold": 60,
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 3,
+                "image": p(np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]])),
+                "image_threshold": 60,
+            },
+            [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "image": None,
-        "image_threshold": 0.0,
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+    TESTS.append(
+        [
+            # test One-Hot data
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                        ]
+                    )
+                ),
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_4 = [
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "num_classes": None,
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-        "image_threshold": 60,
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS.append(
+        [
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                        ]
+                    )
+                ),
+                "num_classes": None,
+                "image": p(np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]])),
+                "image_threshold": 60,
+            },
+            [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
+        ]
+    )
 
-TEST_CASE_5 = [
-    # test empty class
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]), "num_classes": 5, "image": None, "image_threshold": 0.0},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
-]
+    TESTS.append(
+        [
+            # test empty class
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 5,
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
+        ]
+    )
 
-TEST_CASE_6 = [
-    # test empty class
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-                [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
-                [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
-            ]
-        ),
-        "image": None,
-        "image_threshold": 0.0,
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
-]
+    TESTS.append(
+        [
+            # test empty class
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                            [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                            [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                        ]
+                    )
+                ),
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
+        ]
+    )
 
 
 class TestMapClassesToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
+    @parameterized.expand(TESTS)
     def test_value(self, input_data, expected_indices):
         indices = map_classes_to_indices(**input_data)
         for i, e in zip(indices, expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_map_label_value.py b/tests/test_map_label_value.py
index ff1d7d1eef..388b6db973 100644
--- a/tests/test_map_label_value.py
+++ b/tests/test_map_label_value.py
@@ -28,11 +28,7 @@
     np.array([[[0], [1], [1], [2]]]),
 ]
 
-TEST_CASE_3 = [
-    {"orig_labels": [1, 2, 3], "target_labels": [0, 1, 2]},
-    np.array([3, 1, 1, 2]),
-    np.array([2, 0, 0, 1]),
-]
+TEST_CASE_3 = [{"orig_labels": [1, 2, 3], "target_labels": [0, 1, 2]}, np.array([3, 1, 1, 2]), np.array([2, 0, 0, 1])]
 
 TEST_CASE_4 = [
     {"orig_labels": [1, 2, 3], "target_labels": [0.5, 1.5, 2.5]},
@@ -67,16 +63,7 @@
 
 class TestMapLabelValue(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-            TEST_CASE_8,
-        ]
+        [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8]
     )
     def test_shape(self, input_param, input_data, expected_value):
         result = MapLabelValue(**input_param)(input_data)
diff --git a/tests/test_mask_intensity.py b/tests/test_mask_intensity.py
index a3662eec49..c2f7d661d6 100644
--- a/tests/test_mask_intensity.py
+++ b/tests/test_mask_intensity.py
@@ -12,6 +12,7 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import MaskIntensity
@@ -43,9 +44,15 @@
     np.array([[[0, 0, 0], [2, 2, 2], [0, 0, 0]], [[0, 0, 0], [5, 5, 5], [0, 0, 0]]]),
 ]
 
+TEST_CASE_5 = [
+    {"mask_data": np.array([[[0, 0, 0], [0, 1, 0], [0, 0, 0]], [[0, 1, 0], [0, 1, 0], [0, 1, 0]]])},
+    torch.as_tensor([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+    torch.as_tensor([[[0, 0, 0], [0, 2, 0], [0, 0, 0]], [[0, 4, 0], [0, 5, 0], [0, 6, 0]]]),
+]
+
 
 class TestMaskIntensity(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
     def test_value(self, argments, image, expected_data):
         result = MaskIntensity(**argments)(image)
         np.testing.assert_allclose(result, expected_data)
diff --git a/tests/test_masked_dice_loss.py b/tests/test_masked_dice_loss.py
index b8d69bc8f9..acfdb60ab0 100644
--- a/tests/test_masked_dice_loss.py
+++ b/tests/test_masked_dice_loss.py
@@ -94,26 +94,17 @@
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "squared_pred": True, "smooth_nr": 1e-5, "smooth_dr": 1e-5},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.178337,
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "jaccard": True, "smooth_nr": 1e-5, "smooth_dr": 1e-5},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.470451,
     ],
 ]
diff --git a/tests/test_masked_inference_wsi_dataset.py b/tests/test_masked_inference_wsi_dataset.py
index 361c17e106..f5b2611fd9 100644
--- a/tests/test_masked_inference_wsi_dataset.py
+++ b/tests/test_masked_inference_wsi_dataset.py
@@ -22,10 +22,10 @@
 from monai.utils import optional_import
 from tests.utils import skip_if_quick
 
-_, has_cim = optional_import("cucim")
+_, has_cim = optional_import("cucim", name="CuImage")
 _, has_osl = optional_import("openslide")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 base_name, extension = os.path.splitext(os.path.basename(FILE_URL))
 FILE_NAME = "temp_" + base_name
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", FILE_NAME + extension)
@@ -50,28 +50,12 @@ def prepare_data():
 
 
 TEST_CASE_0 = [
-    {
-        "data": [
-            {"image": FILE_PATH, "mask": MASK1},
-        ],
-        "patch_size": 1,
-        "image_reader_name": "cuCIM",
-    },
-    [
-        {
-            "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": FILE_NAME,
-            "mask_location": [100, 100],
-        },
-    ],
+    {"data": [{"image": FILE_PATH, "mask": MASK1}], "patch_size": 1, "image_reader_name": "cuCIM"},
+    [{"image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8), "name": FILE_NAME, "mask_location": [100, 100]}],
 ]
 
 TEST_CASE_1 = [
-    {
-        "data": [{"image": FILE_PATH, "mask": MASK2}],
-        "patch_size": 1,
-        "image_reader_name": "cuCIM",
-    },
+    {"data": [{"image": FILE_PATH, "mask": MASK2}], "patch_size": 1, "image_reader_name": "cuCIM"},
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
@@ -87,11 +71,7 @@ def prepare_data():
 ]
 
 TEST_CASE_2 = [
-    {
-        "data": [{"image": FILE_PATH, "mask": MASK4}],
-        "patch_size": 1,
-        "image_reader_name": "cuCIM",
-    },
+    {"data": [{"image": FILE_PATH, "mask": MASK4}], "patch_size": 1, "image_reader_name": "cuCIM"},
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
@@ -117,35 +97,21 @@ def prepare_data():
 ]
 
 TEST_CASE_3 = [
-    {
-        "data": [
-            {"image": FILE_PATH, "mask": MASK1},
-        ],
-        "patch_size": 2,
-        "image_reader_name": "cuCIM",
-    },
+    {"data": [{"image": FILE_PATH, "mask": MASK1}], "patch_size": 2, "image_reader_name": "cuCIM"},
     [
         {
             "image": np.array(
-                [
-                    [[243, 243], [243, 243]],
-                    [[243, 243], [243, 243]],
-                    [[243, 243], [243, 243]],
-                ],
-                dtype=np.uint8,
+                [[[243, 243], [243, 243]], [[243, 243], [243, 243]], [[243, 243], [243, 243]]], dtype=np.uint8
             ),
             "name": FILE_NAME,
             "mask_location": [100, 100],
-        },
+        }
     ],
 ]
 
 TEST_CASE_4 = [
     {
-        "data": [
-            {"image": FILE_PATH, "mask": MASK1},
-            {"image": FILE_PATH, "mask": MASK2},
-        ],
+        "data": [{"image": FILE_PATH, "mask": MASK1}, {"image": FILE_PATH, "mask": MASK2}],
         "patch_size": 1,
         "image_reader_name": "cuCIM",
     },
@@ -170,28 +136,12 @@ def prepare_data():
 
 
 TEST_CASE_OPENSLIDE_0 = [
-    {
-        "data": [
-            {"image": FILE_PATH, "mask": MASK1},
-        ],
-        "patch_size": 1,
-        "image_reader_name": "OpenSlide",
-    },
-    [
-        {
-            "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
-            "name": FILE_NAME,
-            "mask_location": [100, 100],
-        },
-    ],
+    {"data": [{"image": FILE_PATH, "mask": MASK1}], "patch_size": 1, "image_reader_name": "OpenSlide"},
+    [{"image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8), "name": FILE_NAME, "mask_location": [100, 100]}],
 ]
 
 TEST_CASE_OPENSLIDE_1 = [
-    {
-        "data": [{"image": FILE_PATH, "mask": MASK2}],
-        "patch_size": 1,
-        "image_reader_name": "OpenSlide",
-    },
+    {"data": [{"image": FILE_PATH, "mask": MASK2}], "patch_size": 1, "image_reader_name": "OpenSlide"},
     [
         {
             "image": np.array([[[243]], [[243]], [[243]]], dtype=np.uint8),
@@ -212,27 +162,14 @@ def setUp(self):
         prepare_data()
         download_url(FILE_URL, FILE_PATH, "5a3cfd4fd725c50578ddb80b517b759f")
 
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
     @skipUnless(has_cim, "Requires CuCIM")
     @skip_if_quick
     def test_read_patches_cucim(self, input_parameters, expected):
         dataset = MaskedInferenceWSIDataset(**input_parameters)
         self.compare_samples_expected(dataset, expected)
 
-    @parameterized.expand(
-        [
-            TEST_CASE_OPENSLIDE_0,
-            TEST_CASE_OPENSLIDE_1,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_OPENSLIDE_0, TEST_CASE_OPENSLIDE_1])
     @skipUnless(has_osl, "Requires OpenSlide")
     @skip_if_quick
     def test_read_patches_openslide(self, input_parameters, expected):
diff --git a/tests/test_masked_loss.py b/tests/test_masked_loss.py
index 225e3d9668..b56fcbbcdb 100644
--- a/tests/test_masked_loss.py
+++ b/tests/test_masked_loss.py
@@ -33,7 +33,7 @@
             "reduction": "sum",
         },
         [(14.538666, 20.191753), (13.17672, 8.251623)],
-    ],
+    ]
 ]
 
 
diff --git a/tests/test_mean_ensemble.py b/tests/test_mean_ensemble.py
index 7e08846beb..532a6c21c4 100644
--- a/tests/test_mean_ensemble.py
+++ b/tests/test_mean_ensemble.py
@@ -17,23 +17,11 @@
 
 from monai.transforms import MeanEnsemble
 
-TEST_CASE_1 = [
-    {"weights": None},
-    [torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2],
-    torch.ones(2, 2, 2) + 1,
-]
+TEST_CASE_1 = [{"weights": None}, [torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2], torch.ones(2, 2, 2) + 1]
 
-TEST_CASE_2 = [
-    {"weights": None},
-    torch.stack([torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2]),
-    torch.ones(2, 2, 2) + 1,
-]
+TEST_CASE_2 = [{"weights": None}, torch.stack([torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2]), torch.ones(2, 2, 2) + 1]
 
-TEST_CASE_3 = [
-    {"weights": [1, 3]},
-    [torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2],
-    torch.ones(2, 2, 2) * 2.5,
-]
+TEST_CASE_3 = [{"weights": [1, 3]}, [torch.ones(2, 2, 2), torch.ones(2, 2, 2) + 2], torch.ones(2, 2, 2) * 2.5]
 
 TEST_CASE_4 = [
     {"weights": [[1, 3], [3, 1]]},
diff --git a/tests/test_mlp.py b/tests/test_mlp.py
index 7a93f81ec3..b6e78c9a66 100644
--- a/tests/test_mlp.py
+++ b/tests/test_mlp.py
@@ -24,11 +24,7 @@
         for mlp_dim in [512, 1028, 2048, 3072]:
 
             test_case = [
-                {
-                    "hidden_size": hidden_size,
-                    "mlp_dim": mlp_dim,
-                    "dropout_rate": dropout_rate,
-                },
+                {"hidden_size": hidden_size, "mlp_dim": mlp_dim, "dropout_rate": dropout_rate},
                 (2, 512, hidden_size),
                 (2, 512, hidden_size),
             ]
diff --git a/tests/test_mmar_download.py b/tests/test_mmar_download.py
index 6952e62c3c..725a6a8823 100644
--- a/tests/test_mmar_download.py
+++ b/tests/test_mmar_download.py
@@ -138,7 +138,7 @@ def test_load_ckpt(self, input_args, expected_name, expected_val):
 
     def test_unique(self):
         # model ids are unique
-        keys = sorted([m["id"] for m in MODEL_DESC])
+        keys = sorted(m["id"] for m in MODEL_DESC)
         self.assertTrue(keys == sorted(set(keys)))
 
     @SkipIfAtLeastPyTorchVersion((1, 6))
diff --git a/tests/test_net_adapter.py b/tests/test_net_adapter.py
index b2d55129a7..198de8d142 100644
--- a/tests/test_net_adapter.py
+++ b/tests/test_net_adapter.py
@@ -19,23 +19,11 @@
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
-TEST_CASE_0 = [
-    {"num_classes": 1, "use_conv": True, "dim": 2},
-    (2, 3, 224, 224),
-    (2, 1, 8, 1),
-]
+TEST_CASE_0 = [{"num_classes": 1, "use_conv": True, "dim": 2}, (2, 3, 224, 224), (2, 1, 8, 1)]
 
-TEST_CASE_1 = [
-    {"num_classes": 1, "use_conv": True, "dim": 3, "pool": None},
-    (2, 3, 32, 32, 32),
-    (2, 1, 1, 1, 1),
-]
+TEST_CASE_1 = [{"num_classes": 1, "use_conv": True, "dim": 3, "pool": None}, (2, 3, 32, 32, 32), (2, 1, 1, 1, 1)]
 
-TEST_CASE_2 = [
-    {"num_classes": 5, "use_conv": True, "dim": 3, "pool": None},
-    (2, 3, 32, 32, 32),
-    (2, 5, 1, 1, 1),
-]
+TEST_CASE_2 = [{"num_classes": 5, "use_conv": True, "dim": 3, "pool": None}, (2, 3, 32, 32, 32), (2, 5, 1, 1, 1)]
 
 TEST_CASE_3 = [
     {"num_classes": 5, "use_conv": True, "pool": ("avg", {"kernel_size": 4, "stride": 1}), "dim": 3},
diff --git a/tests/test_nifti_rw.py b/tests/test_nifti_rw.py
index f16d80659c..ff7f11e47f 100644
--- a/tests/test_nifti_rw.py
+++ b/tests/test_nifti_rw.py
@@ -19,54 +19,66 @@
 
 from monai.data import write_nifti
 from monai.transforms import LoadImage, Orientation, Spacing
-from tests.utils import make_nifti_image
-
-TEST_IMAGE = np.arange(24).reshape((2, 4, 3))
-TEST_AFFINE = np.array(
-    [[-5.3, 0.0, 0.0, 102.01], [0.0, 0.52, 2.17, -7.50], [-0.0, 1.98, -0.26, -23.12], [0.0, 0.0, 0.0, 1.0]]
-)
-
-TEST_CASES = [
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=True),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=True, as_closest_canonical=True),
-        np.array(
+from tests.utils import TEST_NDARRAYS, assert_allclose, make_nifti_image
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        TEST_IMAGE = p(np.arange(24).reshape((2, 4, 3)))
+        TEST_AFFINE = q(
+            np.array(
+                [[-5.3, 0.0, 0.0, 102.01], [0.0, 0.52, 2.17, -7.50], [-0.0, 1.98, -0.26, -23.12], [0.0, 0.0, 0.0, 1.0]]
+            )
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=True),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=True, as_closest_canonical=True),
+                np.array(
+                    [
+                        [[12.0, 15.0, 18.0, 21.0], [13.0, 16.0, 19.0, 22.0], [14.0, 17.0, 20.0, 23.0]],
+                        [[0.0, 3.0, 6.0, 9.0], [1.0, 4.0, 7.0, 10.0], [2.0, 5.0, 8.0, 11.0]],
+                    ]
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [[12.0, 15.0, 18.0, 21.0], [13.0, 16.0, 19.0, 22.0], [14.0, 17.0, 20.0, 23.0]],
-                [[0.0, 3.0, 6.0, 9.0], [1.0, 4.0, 7.0, 10.0], [2.0, 5.0, 8.0, 11.0]],
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=True, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
             ]
-        ),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=True, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        None,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                None,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
 
 
 class TestNiftiLoadRead(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_orientation(self, array, affine, reader_param, expected):
         test_image = make_nifti_image(array, affine)
 
@@ -93,8 +105,8 @@ def test_orientation(self, array, affine, reader_param, expected):
             os.remove(test_image)
 
         if affine is not None:
-            np.testing.assert_allclose(saved_affine, affine)
-        np.testing.assert_allclose(saved_data, expected)
+            assert_allclose(saved_affine, affine, type_test=False)
+        assert_allclose(saved_data, expected, type_test=False)
 
     def test_consistency(self):
         np.set_printoptions(suppress=True, precision=3)
@@ -140,69 +152,81 @@ def test_consistency(self):
     def test_write_2d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((2, 3))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[0, 1, 2], [3.0, 4, 5]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 5))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 1, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[0, 2, 4]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 1, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((2, 3)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[0, 1, 2], [3.0, 4, 5]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = np.arange(5).reshape((1, 5))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 1, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[0, 2, 4]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 1, 1]))
 
     def test_write_3d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((1, 2, 3))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[0, 1, 2], [3, 4, 5]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 1, 5))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[0, 2, 4]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((1, 2, 3)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[0, 1, 2], [3, 4, 5]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(5).reshape((1, 1, 5)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[0, 2, 4]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
     def test_write_4d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((1, 1, 3, 2))
-            write_nifti(img, image_name, affine=np.diag([1.4, 1]), target_affine=np.diag([1, 1.4, 1]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[[0, 1], [2, 3], [4, 5]]]])
-            np.testing.assert_allclose(out.affine, np.diag([1, 1.4, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 1, 5, 1))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[[0], [2], [4]]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((1, 1, 3, 2)))
+                write_nifti(img, image_name, affine=np.diag([1.4, 1]), target_affine=np.diag([1, 1.4, 1]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[[0, 1], [2, 3], [4, 5]]]])
+                np.testing.assert_allclose(out.affine, np.diag([1, 1.4, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(5).reshape((1, 1, 5, 1)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[[0], [2], [4]]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
     def test_write_5d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(12).reshape((1, 1, 3, 2, 2))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(
-                out.get_fdata(),
-                np.array([[[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]], [[8.0, 9.0], [10.0, 11.0]]]]]),
-            )
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(10).reshape((1, 1, 5, 1, 2))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), np.array([[[[[0.0, 1.0]], [[4.0, 5.0]], [[8.0, 9.0]]]]]))
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(12).reshape((1, 1, 3, 2, 2)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(
+                    out.get_fdata(),
+                    np.array([[[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]], [[8.0, 9.0], [10.0, 11.0]]]]]),
+                )
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(10).reshape((1, 1, 5, 1, 2)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), np.array([[[[[0.0, 1.0]], [[4.0, 5.0]], [[8.0, 9.0]]]]]))
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_nifti_saver.py b/tests/test_nifti_saver.py
index c07084172f..e22a6e6620 100644
--- a/tests/test_nifti_saver.py
+++ b/tests/test_nifti_saver.py
@@ -36,12 +36,7 @@ def test_saved_content(self):
     def test_saved_resize_content(self):
         with tempfile.TemporaryDirectory() as tempdir:
 
-            saver = NiftiSaver(
-                output_dir=tempdir,
-                output_postfix="seg",
-                output_ext=".nii.gz",
-                dtype=np.float32,
-            )
+            saver = NiftiSaver(output_dir=tempdir, output_postfix="seg", output_ext=".nii.gz", dtype=np.float32)
 
             meta_data = {
                 "filename_or_obj": ["testfile" + str(i) + ".nii" for i in range(8)],
@@ -56,12 +51,7 @@ def test_saved_resize_content(self):
     def test_saved_3d_resize_content(self):
         with tempfile.TemporaryDirectory() as tempdir:
 
-            saver = NiftiSaver(
-                output_dir=tempdir,
-                output_postfix="seg",
-                output_ext=".nii.gz",
-                dtype=np.float32,
-            )
+            saver = NiftiSaver(output_dir=tempdir, output_postfix="seg", output_ext=".nii.gz", dtype=np.float32)
 
             meta_data = {
                 "filename_or_obj": ["testfile" + str(i) + ".nii.gz" for i in range(8)],
diff --git a/tests/test_normalize_intensity.py b/tests/test_normalize_intensity.py
index 2755eb4c25..41c6b053ec 100644
--- a/tests/test_normalize_intensity.py
+++ b/tests/test_normalize_intensity.py
@@ -31,51 +31,51 @@
                         "divisor": u(np.array([0.5, 0.5, 0.5, 0.5])),
                         "nonzero": True,
                     },
-                    np.array([0.0, 3.0, 0.0, 4.0]),
-                    np.array([0.0, -1.0, 0.0, 1.0]),
+                    p(np.array([0.0, 3.0, 0.0, 4.0])),
+                    p(np.array([0.0, -1.0, 0.0, 1.0])),
                 ]
             )
-    TESTS.append([p, {"nonzero": True}, np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0])])
-    TESTS.append([p, {"nonzero": False}, np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0])])
-    TESTS.append([p, {"nonzero": False}, np.array([1, 1, 1, 1]), np.array([0.0, 0.0, 0.0, 0.0])])
+    TESTS.append([p, {"nonzero": True}, p(np.array([0.0, 0.0, 0.0, 0.0])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
+    TESTS.append([p, {"nonzero": False}, p(np.array([0.0, 0.0, 0.0, 0.0])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
+    TESTS.append([p, {"nonzero": False}, p(np.array([1, 1, 1, 1])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
     TESTS.append(
         [
             p,
             {"nonzero": False, "channel_wise": True, "subtrahend": [1, 2, 3]},
-            np.ones((3, 2, 2)),
-            np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-2.0, -2.0], [-2.0, -2.0]]]),
+            p(np.ones((3, 2, 2))),
+            p(np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-2.0, -2.0], [-2.0, -2.0]]])),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": True, "subtrahend": [1, 2, 3], "divisor": [0, 0, 2]},
-            np.ones((3, 2, 2)),
-            np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-1.0, -1.0], [-1.0, -1.0]]]),
+            p(np.ones((3, 2, 2))),
+            p(np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-1.0, -1.0], [-1.0, -1.0]]])),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": False, "subtrahend": 2, "divisor": 0},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * -1.0,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * -1.0),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": False, "subtrahend": np.ones((3, 2, 2)) * 0.5, "divisor": 0},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * 0.5,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * 0.5),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": True, "subtrahend": np.ones((3, 2, 2)) * 0.5, "divisor": [0, 1, 0]},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * 0.5,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * 0.5),
         ]
     )
 
@@ -91,17 +91,14 @@ def test_default(self, im_type):
             self.assertEqual(im.device, normalized.device)
         self.assertTrue(normalized.dtype in (np.float32, torch.float32))
         expected = (self.imt - np.mean(self.imt)) / np.std(self.imt)
-        assert_allclose(expected, normalized, rtol=1e-3)
+        assert_allclose(normalized, expected, type_test=False, rtol=1e-3)
 
     @parameterized.expand(TESTS)
     def test_nonzero(self, in_type, input_param, input_data, expected_data):
         normalizer = NormalizeIntensity(**input_param)
         im = in_type(input_data)
         normalized = normalizer(im)
-        self.assertEqual(type(im), type(normalized))
-        if isinstance(normalized, torch.Tensor):
-            self.assertEqual(im.device, normalized.device)
-        assert_allclose(expected_data, normalized)
+        assert_allclose(normalized, in_type(expected_data))
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, im_type):
@@ -109,10 +106,7 @@ def test_channel_wise(self, im_type):
         input_data = im_type(np.array([[0.0, 3.0, 0.0, 4.0], [0.0, 4.0, 0.0, 5.0]]))
         expected = np.array([[0.0, -1.0, 0.0, 1.0], [0.0, -1.0, 0.0, 1.0]])
         normalized = normalizer(input_data)
-        self.assertEqual(type(input_data), type(normalized))
-        if isinstance(normalized, torch.Tensor):
-            self.assertEqual(input_data.device, normalized.device)
-        assert_allclose(expected, normalized)
+        assert_allclose(normalized, im_type(expected))
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_value_errors(self, im_type):
diff --git a/tests/test_normalize_intensityd.py b/tests/test_normalize_intensityd.py
index e2cec5407a..60b1d05456 100644
--- a/tests/test_normalize_intensityd.py
+++ b/tests/test_normalize_intensityd.py
@@ -25,7 +25,7 @@
             [
                 {"keys": ["img"], "nonzero": True},
                 {"img": p(np.array([0.0, 3.0, 0.0, 4.0]))},
-                np.array([0.0, -1.0, 0.0, 1.0]),
+                p(np.array([0.0, -1.0, 0.0, 1.0])),
             ]
         )
         TESTS.append(
@@ -37,14 +37,14 @@
                     "nonzero": True,
                 },
                 {"img": p(np.array([0.0, 3.0, 0.0, 4.0]))},
-                np.array([0.0, -1.0, 0.0, 1.0]),
+                p(np.array([0.0, -1.0, 0.0, 1.0])),
             ]
         )
         TESTS.append(
             [
                 {"keys": ["img"], "nonzero": True},
                 {"img": p(np.array([0.0, 0.0, 0.0, 0.0]))},
-                np.array([0.0, 0.0, 0.0, 0.0]),
+                p(np.array([0.0, 0.0, 0.0, 0.0])),
             ]
         )
 
@@ -60,7 +60,7 @@ def test_image_normalize_intensityd(self, im_type):
         self.assertEqual(type(im), type(normalized))
         if isinstance(normalized, torch.Tensor):
             self.assertEqual(im.device, normalized.device)
-        assert_allclose(normalized, expected, rtol=1e-3)
+        assert_allclose(normalized, im_type(expected), rtol=1e-3)
 
     @parameterized.expand(TESTS)
     def test_nonzero(self, input_param, input_data, expected_data):
@@ -82,7 +82,7 @@ def test_channel_wise(self, im_type):
         if isinstance(normalized, torch.Tensor):
             self.assertEqual(input_data[key].device, normalized.device)
         expected = np.array([[0.0, -1.0, 0.0, 1.0], [0.0, -1.0, 0.0, 1.0]])
-        assert_allclose(normalized, expected)
+        assert_allclose(normalized, im_type(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_nvtx_decorator.py b/tests/test_nvtx_decorator.py
index e2a9ad67b8..0955fbb712 100644
--- a/tests/test_nvtx_decorator.py
+++ b/tests/test_nvtx_decorator.py
@@ -31,38 +31,21 @@
 _, has_nvtx = optional_import("torch._C._nvtx", descriptor="NVTX is not installed. Are you sure you have a CUDA build?")
 
 
-TEST_CASE_ARRAY_0 = [
-    np.random.randn(3, 3),
-]
-TEST_CASE_ARRAY_1 = [
-    np.random.randn(3, 10, 10),
-]
-
-TEST_CASE_DICT_0 = [
-    {"image": np.random.randn(3, 3)},
-]
-TEST_CASE_DICT_1 = [
-    {"image": np.random.randn(3, 10, 10)},
-]
-
-TEST_CASE_TORCH_0 = [
-    torch.randn(3, 3),
-]
-TEST_CASE_TORCH_1 = [
-    torch.randn(3, 10, 10),
-]
+TEST_CASE_ARRAY_0 = [np.random.randn(3, 3)]
+TEST_CASE_ARRAY_1 = [np.random.randn(3, 10, 10)]
+
+TEST_CASE_DICT_0 = [{"image": np.random.randn(3, 3)}]
+TEST_CASE_DICT_1 = [{"image": np.random.randn(3, 10, 10)}]
+
+TEST_CASE_TORCH_0 = [torch.randn(3, 3)]
+TEST_CASE_TORCH_1 = [torch.randn(3, 10, 10)]
 
 
 class TestNVTXRangeDecorator(unittest.TestCase):
     @parameterized.expand([TEST_CASE_ARRAY_0, TEST_CASE_ARRAY_1])
     @unittest.skipUnless(has_nvtx, "CUDA is required for NVTX Range!")
     def test_tranform_array(self, input):
-        transforms = Compose(
-            [
-                Range("random flip")(Flip()),
-                Range()(ToTensor()),
-            ]
-        )
+        transforms = Compose([Range("random flip")(Flip()), Range()(ToTensor())])
         # Apply transforms
         output = transforms(input)
 
@@ -88,12 +71,7 @@ def test_tranform_array(self, input):
     @parameterized.expand([TEST_CASE_DICT_0, TEST_CASE_DICT_1])
     @unittest.skipUnless(has_nvtx, "CUDA is required for NVTX Range!")
     def test_tranform_dict(self, input):
-        transforms = Compose(
-            [
-                Range("random flip dict")(FlipD(keys="image")),
-                Range()(ToTensorD("image")),
-            ]
-        )
+        transforms = Compose([Range("random flip dict")(FlipD(keys="image")), Range()(ToTensorD("image"))])
         # Apply transforms
         output = transforms(input)["image"]
 
@@ -161,10 +139,7 @@ def test_tranform_randomized(self, input):
     @unittest.skipUnless(has_nvtx, "CUDA is required for NVTX Range!")
     def test_network(self, input):
         # Create a network
-        model = torch.nn.Sequential(
-            torch.nn.ReLU(),
-            torch.nn.Sigmoid(),
-        )
+        model = torch.nn.Sequential(torch.nn.ReLU(), torch.nn.Sigmoid())
 
         # Forward
         output = model(input)
diff --git a/tests/test_nvtx_transform.py b/tests/test_nvtx_transform.py
index 6bcfe00078..36a924dd1c 100644
--- a/tests/test_nvtx_transform.py
+++ b/tests/test_nvtx_transform.py
@@ -35,29 +35,14 @@
 _, has_nvtx = optional_import("torch._C._nvtx", descriptor="NVTX is not installed. Are you sure you have a CUDA build?")
 
 
-TEST_CASE_ARRAY_0 = [
-    np.random.randn(3, 3),
-]
-TEST_CASE_ARRAY_1 = [
-    np.random.randn(3, 10, 10),
-]
-TEST_CASE_DICT_0 = [
-    {"image": np.random.randn(3, 3)},
-]
-TEST_CASE_DICT_1 = [
-    {"image": np.random.randn(3, 10, 10)},
-]
+TEST_CASE_ARRAY_0 = [np.random.randn(3, 3)]
+TEST_CASE_ARRAY_1 = [np.random.randn(3, 10, 10)]
+TEST_CASE_DICT_0 = [{"image": np.random.randn(3, 3)}]
+TEST_CASE_DICT_1 = [{"image": np.random.randn(3, 10, 10)}]
 
 
 class TestNVTXTransforms(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_ARRAY_0,
-            TEST_CASE_ARRAY_1,
-            TEST_CASE_DICT_0,
-            TEST_CASE_DICT_1,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_ARRAY_0, TEST_CASE_ARRAY_1, TEST_CASE_DICT_0, TEST_CASE_DICT_1])
     @unittest.skipUnless(has_nvtx, "CUDA is required for NVTX!")
     def test_nvtx_transfroms_alone(self, input):
         transforms = Compose(
diff --git a/tests/test_occlusion_sensitivity.py b/tests/test_occlusion_sensitivity.py
index d58359a598..18da4057ab 100644
--- a/tests/test_occlusion_sensitivity.py
+++ b/tests/test_occlusion_sensitivity.py
@@ -29,47 +29,27 @@
 
 # 2D w/ bounding box
 TEST_CASE_0 = [
-    {
-        "nn_module": model_2d,
-    },
-    {
-        "x": torch.rand(1, 1, 48, 64).to(device),
-        "b_box": [-1, -1, 2, 40, 1, 62],
-    },
+    {"nn_module": model_2d},
+    {"x": torch.rand(1, 1, 48, 64).to(device), "b_box": [-1, -1, 2, 40, 1, 62]},
     (1, 1, 39, 62, out_channels_2d),
     (1, 1, 39, 62),
 ]
 # 3D w/ bounding box and stride
 TEST_CASE_1 = [
     {"nn_module": model_3d, "n_batch": 10, "stride": (2, 1, 2), "mask_size": (16, 15, 14)},
-    {
-        "x": torch.rand(1, 1, 6, 6, 6).to(device),
-        "b_box": [-1, -1, 2, 3, -1, -1, -1, -1],
-    },
+    {"x": torch.rand(1, 1, 6, 6, 6).to(device), "b_box": [-1, -1, 2, 3, -1, -1, -1, -1]},
     (1, 1, 2, 6, 6, out_channels_3d),
     (1, 1, 2, 6, 6),
 ]
 
 TEST_CASE_FAIL_0 = [  # 2D should fail, since 3 stride values given
-    {
-        "nn_module": model_2d,
-        "n_batch": 10,
-        "stride": (2, 2, 2),
-    },
-    {
-        "x": torch.rand(1, 1, 48, 64).to(device),
-        "b_box": [-1, -1, 2, 3, -1, -1],
-    },
+    {"nn_module": model_2d, "n_batch": 10, "stride": (2, 2, 2)},
+    {"x": torch.rand(1, 1, 48, 64).to(device), "b_box": [-1, -1, 2, 3, -1, -1]},
 ]
 
 TEST_CASE_FAIL_1 = [  # 2D should fail, since stride is not a factor of image size
-    {
-        "nn_module": model_2d,
-        "stride": 3,
-    },
-    {
-        "x": torch.rand(1, 1, 48, 64).to(device),
-    },
+    {"nn_module": model_2d, "stride": 3},
+    {"x": torch.rand(1, 1, 48, 64).to(device)},
 ]
 
 
diff --git a/tests/test_one_of.py b/tests/test_one_of.py
index d45d0f3f61..9fe9f193a3 100644
--- a/tests/test_one_of.py
+++ b/tests/test_one_of.py
@@ -93,9 +93,7 @@ def __init__(self, keys):
         self.inv_fn = lambda x: x - 100
 
 
-TESTS = [
-    ((X(), Y(), X()), (1, 2, 1), (0.25, 0.5, 0.25)),
-]
+TESTS = [((X(), Y(), X()), (1, 2, 1), (0.25, 0.5, 0.25))]
 
 KEYS = ["x", "y"]
 TEST_INVERSES = [
diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py
index c0b395fd02..cf092158f2 100644
--- a/tests/test_openslide_reader.py
+++ b/tests/test_openslide_reader.py
@@ -24,7 +24,7 @@
 _, has_osl = optional_import("openslide")
 
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
@@ -46,13 +46,7 @@
 
 TEST_CASE_3 = [
     FILE_PATH,
-    {
-        "location": (0, 0),
-        "size": (8, 8),
-        "level": 2,
-        "grid_shape": (2, 1),
-        "patch_size": 2,
-    },
+    {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 2},
     np.array(
         [
             [[[239, 239], [239, 239]], [[239, 239], [239, 239]], [[239, 239], [239, 239]]],
@@ -63,13 +57,7 @@
 
 TEST_CASE_4 = [
     FILE_PATH,
-    {
-        "location": (0, 0),
-        "size": (8, 8),
-        "level": 2,
-        "grid_shape": (2, 1),
-        "patch_size": 1,
-    },
+    {"location": (0, 0), "size": (8, 8), "level": 2, "grid_shape": (2, 1), "patch_size": 1},
     np.array([[[[239]], [[239]], [[239]]], [[[243]], [[243]], [[243]]]]),
 ]
 
diff --git a/tests/test_optim_novograd.py b/tests/test_optim_novograd.py
index c76501cd6f..35f54d67ae 100644
--- a/tests/test_optim_novograd.py
+++ b/tests/test_optim_novograd.py
@@ -38,37 +38,19 @@ def build_test_cases(data):
     return test_cases
 
 
-TEST_CASES_ALL = build_test_cases(  # normal parameters
-    [
-        torch.randn(10, 5),
-        torch.randn(10),
-        torch.randn(5),
-    ]
-)
+TEST_CASES_ALL = build_test_cases([torch.randn(10, 5), torch.randn(10), torch.randn(5)])  # normal parameters
 
 TEST_CASES_ALL += build_test_cases(  # non-contiguous parameters
-    [
-        torch.randn(10, 5, 2)[..., 0],
-        torch.randn(10, 2)[..., 0],
-        torch.randn(5),
-    ]
+    [torch.randn(10, 5, 2)[..., 0], torch.randn(10, 2)[..., 0], torch.randn(5)]
 )
 
 if torch.cuda.is_available():
     TEST_CASES_ALL += build_test_cases(  # gpu parameters
-        [
-            torch.randn(10, 5).cuda(),
-            torch.randn(10).cuda(),
-            torch.randn(5).cuda(),
-        ]
+        [torch.randn(10, 5).cuda(), torch.randn(10).cuda(), torch.randn(5).cuda()]
     )
 if torch.cuda.device_count() > 1:
     TEST_CASES_ALL += build_test_cases(  # multi-gpu parameters
-        [
-            torch.randn(10, 5).cuda(0),
-            torch.randn(10).cuda(1),
-            torch.randn(5).cuda(0),
-        ]
+        [torch.randn(10, 5).cuda(0), torch.randn(10).cuda(1), torch.randn(5).cuda(0)]
     )
 
 
diff --git a/tests/test_pad_collation.py b/tests/test_pad_collation.py
index a8c544558f..eda36f4761 100644
--- a/tests/test_pad_collation.py
+++ b/tests/test_pad_collation.py
@@ -20,6 +20,7 @@
 from monai.data import CacheDataset, DataLoader
 from monai.data.utils import decollate_batch, pad_list_data_collate
 from monai.transforms import (
+    Compose,
     PadListDataCollate,
     RandRotate,
     RandRotate90,
@@ -29,24 +30,26 @@
     RandSpatialCropd,
     RandZoom,
     RandZoomd,
+    ToTensor,
+    ToTensord,
 )
 from monai.utils import set_determinism
 
 TESTS: List[Tuple] = []
 
 for pad_collate in [
-    lambda x: pad_list_data_collate(batch=x, method="end", mode="constant", constant_values=1),
-    PadListDataCollate(method="end", mode="constant", constant_values=1),
+    lambda x: pad_list_data_collate(batch=x, method="end", mode="constant"),
+    PadListDataCollate(method="end", mode="constant"),
 ]:
     TESTS.append((dict, pad_collate, RandSpatialCropd("image", roi_size=[8, 7], random_size=True)))
     TESTS.append((dict, pad_collate, RandRotated("image", prob=1, range_x=np.pi, keep_size=False)))
     TESTS.append((dict, pad_collate, RandZoomd("image", prob=1, min_zoom=1.1, max_zoom=2.0, keep_size=False)))
-    TESTS.append((dict, pad_collate, RandRotate90d("image", prob=1, max_k=2)))
+    TESTS.append((dict, pad_collate, Compose([RandRotate90d("image", prob=1, max_k=2), ToTensord("image")])))
 
     TESTS.append((list, pad_collate, RandSpatialCrop(roi_size=[8, 7], random_size=True)))
     TESTS.append((list, pad_collate, RandRotate(prob=1, range_x=np.pi, keep_size=False)))
     TESTS.append((list, pad_collate, RandZoom(prob=1, min_zoom=1.1, max_zoom=2.0, keep_size=False)))
-    TESTS.append((list, pad_collate, RandRotate90(prob=1, max_k=2)))
+    TESTS.append((list, pad_collate, Compose([RandRotate90(prob=1, max_k=2), ToTensor()])))
 
 
 class _Dataset(torch.utils.data.Dataset):
diff --git a/tests/test_partition_dataset.py b/tests/test_partition_dataset.py
index a954bfae91..b036cd6827 100644
--- a/tests/test_partition_dataset.py
+++ b/tests/test_partition_dataset.py
@@ -117,16 +117,7 @@
 
 class TestPartitionDataset(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-            TEST_CASE_8,
-        ]
+        [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7, TEST_CASE_8]
     )
     def test_value(self, input_param, result):
         self.assertListEqual(partition_dataset(**input_param), result)
diff --git a/tests/test_patch_dataset.py b/tests/test_patch_dataset.py
index 4f6e9a25fd..40e8bbb20a 100644
--- a/tests/test_patch_dataset.py
+++ b/tests/test_patch_dataset.py
@@ -59,7 +59,7 @@ def test_loading_array(self):
         np.testing.assert_allclose(
             item[0],
             np.array(
-                [[[1.779992, 2.779992, 3.779992], [5.779992, 6.779992, 7.779992], [9.779992, 10.779992, 11.779992]]]
+                [[[1.338681, 2.338681, 3.338681], [5.338681, 6.338681, 7.338681], [9.338681, 10.338681, 11.338681]]]
             ),
             rtol=1e-5,
         )
@@ -71,9 +71,9 @@ def test_loading_array(self):
                 np.array(
                     [
                         [
-                            [5.025618, 6.025618, 7.025618],
-                            [9.025618, 10.025618, 11.025618],
-                            [13.025618, 14.025618, 15.025618],
+                            [4.957847, 5.957847, 6.957847],
+                            [8.957847, 9.957847, 10.957847],
+                            [12.957847, 13.957847, 14.957847],
                         ]
                     ]
                 ),
diff --git a/tests/test_patch_wsi_dataset.py b/tests/test_patch_wsi_dataset.py
index f775f28376..9259cd0e7b 100644
--- a/tests/test_patch_wsi_dataset.py
+++ b/tests/test_patch_wsi_dataset.py
@@ -21,25 +21,22 @@
 from monai.apps.utils import download_url
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+_cucim, has_cim = optional_import("cucim")
+has_cim = has_cim and hasattr(_cucim, "CuImage")
 _, has_osl = optional_import("openslide")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
     {
-        "data": [
-            {"image": FILE_PATH, "location": [0, 0], "label": [1]},
-        ],
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
         "region_size": (1, 1),
         "grid_shape": (1, 1),
         "patch_size": 1,
         "image_reader_name": "cuCIM",
     },
-    [
-        {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])},
-    ],
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
 ]
 
 TEST_CASE_1 = [
@@ -60,47 +57,35 @@
 
 TEST_CASE_2 = [
     {
-        "data": [
-            {"image": FILE_PATH, "location": [0, 0], "label": [1]},
-        ],
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
         "region_size": 1,
         "grid_shape": 1,
         "patch_size": 1,
         "image_reader_name": "cuCIM",
     },
-    [
-        {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])},
-    ],
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
 ]
 
 TEST_CASE_3 = [
     {
-        "data": [
-            {"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]},
-        ],
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [[[0, 1], [1, 0]]]}],
         "region_size": 1,
         "grid_shape": 1,
         "patch_size": 1,
         "image_reader_name": "cuCIM",
     },
-    [
-        {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])},
-    ],
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[0, 1], [1, 0]]])}],
 ]
 
 TEST_CASE_OPENSLIDE_0 = [
     {
-        "data": [
-            {"image": FILE_PATH, "location": [0, 0], "label": [1]},
-        ],
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
         "region_size": (1, 1),
         "grid_shape": (1, 1),
         "patch_size": 1,
         "image_reader_name": "OpenSlide",
     },
-    [
-        {"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])},
-    ],
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
 ]
 
 TEST_CASE_OPENSLIDE_1 = [
@@ -124,14 +109,7 @@ class TestPatchWSIDataset(unittest.TestCase):
     def setUp(self):
         download_url(FILE_URL, FILE_PATH, "5a3cfd4fd725c50578ddb80b517b759f")
 
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     @skipUnless(has_cim, "Requires CuCIM")
     def test_read_patches_cucim(self, input_parameters, expected):
         dataset = PatchWSIDataset(**input_parameters)
@@ -142,12 +120,7 @@ def test_read_patches_cucim(self, input_parameters, expected):
             self.assertIsNone(assert_array_equal(samples[i]["label"], expected[i]["label"]))
             self.assertIsNone(assert_array_equal(samples[i]["image"], expected[i]["image"]))
 
-    @parameterized.expand(
-        [
-            TEST_CASE_OPENSLIDE_0,
-            TEST_CASE_OPENSLIDE_1,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_OPENSLIDE_0, TEST_CASE_OPENSLIDE_1])
     @skipUnless(has_osl, "Requires OpenSlide")
     def test_read_patches_openslide(self, input_parameters, expected):
         dataset = PatchWSIDataset(**input_parameters)
diff --git a/tests/test_pathology_he_stain.py b/tests/test_pathology_he_stain.py
index 1d74f485e9..7f76c3f03e 100644
--- a/tests/test_pathology_he_stain.py
+++ b/tests/test_pathology_he_stain.py
@@ -73,12 +73,7 @@
 
 class TestExtractHEStains(unittest.TestCase):
     @parameterized.expand(
-        [
-            NEGATIVE_VALUE_TEST_CASE,
-            INVALID_VALUE_TEST_CASE,
-            EXTRACT_STAINS_TEST_CASE_0,
-            EXTRACT_STAINS_TEST_CASE_1,
-        ]
+        [NEGATIVE_VALUE_TEST_CASE, INVALID_VALUE_TEST_CASE, EXTRACT_STAINS_TEST_CASE_0, EXTRACT_STAINS_TEST_CASE_1]
     )
     def test_transparent_image(self, image):
         """
@@ -112,13 +107,7 @@ def test_identical_result_vectors(self, image):
             result = ExtractHEStains()(image)
             np.testing.assert_array_equal(result[:, 0], result[:, 1])
 
-    @parameterized.expand(
-        [
-            EXTRACT_STAINS_TEST_CASE_00,
-            EXTRACT_STAINS_TEST_CASE_4,
-            EXTRACT_STAINS_TEST_CASE_5,
-        ]
-    )
+    @parameterized.expand([EXTRACT_STAINS_TEST_CASE_00, EXTRACT_STAINS_TEST_CASE_4, EXTRACT_STAINS_TEST_CASE_5])
     def test_result_value(self, image, expected_data):
         """
         Test that an input image returns an expected stain matrix.
@@ -156,12 +145,7 @@ def test_result_value(self, image, expected_data):
 
 class TestNormalizeHEStains(unittest.TestCase):
     @parameterized.expand(
-        [
-            NEGATIVE_VALUE_TEST_CASE,
-            INVALID_VALUE_TEST_CASE,
-            NORMALIZE_STAINS_TEST_CASE_0,
-            NORMALIZE_STAINS_TEST_CASE_1,
-        ]
+        [NEGATIVE_VALUE_TEST_CASE, INVALID_VALUE_TEST_CASE, NORMALIZE_STAINS_TEST_CASE_0, NORMALIZE_STAINS_TEST_CASE_1]
     )
     def test_transparent_image(self, image):
         """
diff --git a/tests/test_pathology_he_stain_dict.py b/tests/test_pathology_he_stain_dict.py
index 8d51579cb2..2ba2c3f71b 100644
--- a/tests/test_pathology_he_stain_dict.py
+++ b/tests/test_pathology_he_stain_dict.py
@@ -100,13 +100,7 @@ def test_identical_result_vectors(self, image):
             result = ExtractHEStainsD([key])({key: image})
             np.testing.assert_array_equal(result[key][:, 0], result[key][:, 1])
 
-    @parameterized.expand(
-        [
-            EXTRACT_STAINS_TEST_CASE_00,
-            EXTRACT_STAINS_TEST_CASE_4,
-            EXTRACT_STAINS_TEST_CASE_5,
-        ]
-    )
+    @parameterized.expand([EXTRACT_STAINS_TEST_CASE_00, EXTRACT_STAINS_TEST_CASE_4, EXTRACT_STAINS_TEST_CASE_5])
     def test_result_value(self, image, expected_data):
         """
         Test that an input image returns an expected stain matrix.
diff --git a/tests/test_pathology_prob_nms.py b/tests/test_pathology_prob_nms.py
index 223b136ea7..879ca88821 100644
--- a/tests/test_pathology_prob_nms.py
+++ b/tests/test_pathology_prob_nms.py
@@ -41,12 +41,7 @@
 
 
 class TestPathologyProbNMS(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASES_2D,
-            TEST_CASES_3D,
-        ]
-    )
+    @parameterized.expand([TEST_CASES_2D, TEST_CASES_3D])
     def test_output(self, class_args, call_args, probs_map, expected):
         nms = PathologyProbNMS(**class_args)
         output = nms(probs_map, **call_args)
diff --git a/tests/test_phl_cpu.py b/tests/test_phl_cpu.py
index 31e28bd39d..3583c4e996 100644
--- a/tests/test_phl_cpu.py
+++ b/tests/test_phl_cpu.py
@@ -42,7 +42,7 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 0.2, 0.5, 0, 1],
+                [1, 0.2, 0.5, 0, 1]
             ],
             # Batch 1
             [
@@ -79,15 +79,15 @@
                 [0, 0, 0, 0, 1],
                 # Channel 2
                 [0, 0, 1, 0, 0],
-            ],
+            ]
         ],
         # Features
         [
             # Batch 0
             [
                 # Channel 0
-                [1, 0.2, 0.5, 0.2, 1],
-            ],
+                [1, 0.2, 0.5, 0.2, 1]
+            ]
         ],
         # Expected
         [
@@ -99,7 +99,7 @@
                 [0.229572, 0.182884, 0.202637, 0.182884, 0.229572],
                 # Channel 2
                 [0.201235, 0.208194, 0.205409, 0.208194, 0.201235],
-            ],
+            ]
         ],
     ],
     [
@@ -113,7 +113,7 @@
             [
                 # Channel 0
                 [[9, 9, 0, 0, 0], [9, 9, 0, 0, 0], [9, 9, 0, 0, 0], [9, 9, 6, 6, 6], [9, 9, 6, 6, 6]]
-            ],
+            ]
         ],
         # Features
         [
@@ -125,7 +125,7 @@
                 [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
                 # Channel 2
                 [[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]],
-            ],
+            ]
         ],
         # Expected
         [
@@ -139,7 +139,7 @@
                     [7.613517, 7.359183, 5.846500, 5.638952, 5.350098],
                     [7.598255, 7.458446, 5.912375, 5.583625, 5.233126],
                 ]
-            ],
+            ]
         ],
     ],
     [
@@ -164,7 +164,7 @@
                     # Frame 4
                     [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
                 ]
-            ],
+            ]
         ],
         # Features
         [
@@ -183,7 +183,7 @@
                     # Frame 4
                     [[0, 0, 5, 5, 5], [0, 0, 5, 5, 5], [0, 0, 5, 5, 5], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]],
                 ]
-            ],
+            ]
         ],
         # Expected
         [
@@ -232,7 +232,7 @@
                         [0.284234, 0.284234, 0.284234, 0.284234, 0.284234],
                     ],
                 ]
-            ],
+            ]
         ],
     ],
 ]
diff --git a/tests/test_phl_cuda.py b/tests/test_phl_cuda.py
index 8f7fc6fc3d..4ba47e4fca 100644
--- a/tests/test_phl_cuda.py
+++ b/tests/test_phl_cuda.py
@@ -42,7 +42,7 @@
             # Batch 0
             [
                 # Channel 0
-                [1, 0.2, 0.5, 0, 1],
+                [1, 0.2, 0.5, 0, 1]
             ],
             # Batch 1
             [
@@ -79,15 +79,15 @@
                 [0, 0, 0, 0, 1],
                 # Channel 2
                 [0, 0, 1, 0, 0],
-            ],
+            ]
         ],
         # Features
         [
             # Batch 0
             [
                 # Channel 0
-                [1, 0.2, 0.5, 0.2, 1],
-            ],
+                [1, 0.2, 0.5, 0.2, 1]
+            ]
         ],
         # Expected
         [
@@ -99,7 +99,7 @@
                 [0.229572, 0.182884, 0.202637, 0.182884, 0.229572],
                 # Channel 2
                 [0.201235, 0.208194, 0.205409, 0.208194, 0.201235],
-            ],
+            ]
         ],
     ],
     [
@@ -113,7 +113,7 @@
             [
                 # Channel 0
                 [[9, 9, 0, 0, 0], [9, 9, 0, 0, 0], [9, 9, 0, 0, 0], [9, 9, 6, 6, 6], [9, 9, 6, 6, 6]]
-            ],
+            ]
         ],
         # Features
         [
@@ -125,7 +125,7 @@
                 [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4], [0, 1, 2, 3, 4]],
                 # Channel 2
                 [[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 3, 3, 3, 3], [4, 4, 4, 4, 4]],
-            ],
+            ]
         ],
         # Expected
         [
@@ -139,7 +139,7 @@
                     [7.712976, 7.429060, 5.789552, 5.594258, 5.371737],
                     [7.701185, 7.492719, 5.860026, 5.538241, 5.281656],
                 ]
-            ],
+            ]
         ],
     ],
 ]
diff --git a/tests/test_png_saver.py b/tests/test_png_saver.py
index f8ea1df54b..e807cf2927 100644
--- a/tests/test_png_saver.py
+++ b/tests/test_png_saver.py
@@ -60,11 +60,7 @@ def test_saved_specified_root(self):
         with tempfile.TemporaryDirectory() as tempdir:
 
             saver = PNGSaver(
-                output_dir=tempdir,
-                output_postfix="seg",
-                output_ext=".png",
-                scale=255,
-                data_root_dir="test",
+                output_dir=tempdir, output_postfix="seg", output_ext=".png", scale=255, data_root_dir="test"
             )
 
             meta_data = {
diff --git a/tests/test_probnms.py b/tests/test_probnms.py
index e51d1017d8..cc055d883c 100644
--- a/tests/test_probnms.py
+++ b/tests/test_probnms.py
@@ -24,32 +24,20 @@
 probs_map_2[33, 33] = 0.7
 probs_map_2[66, 66] = 0.9
 expected_2 = [[0.9, 66, 66], [0.7, 33, 33]]
-TEST_CASES_2D_2 = [
-    {"spatial_dims": 2, "prob_threshold": 0.5, "box_size": [10, 10]},
-    probs_map_2,
-    expected_2,
-]
+TEST_CASES_2D_2 = [{"spatial_dims": 2, "prob_threshold": 0.5, "box_size": [10, 10]}, probs_map_2, expected_2]
 
 probs_map_3 = np.random.rand(100, 100).clip(0, 0.5)
 probs_map_3[56, 58] = 0.7
 probs_map_3[60, 66] = 0.8
 probs_map_3[66, 66] = 0.9
 expected_3 = [[0.9, 66, 66], [0.8, 60, 66]]
-TEST_CASES_2D_3 = [
-    {"spatial_dims": 2, "prob_threshold": 0.5, "box_size": (10, 20)},
-    probs_map_3,
-    expected_3,
-]
+TEST_CASES_2D_3 = [{"spatial_dims": 2, "prob_threshold": 0.5, "box_size": (10, 20)}, probs_map_3, expected_3]
 
 probs_map_4 = np.random.rand(100, 100).clip(0, 0.5)
 probs_map_4[33, 33] = 0.7
 probs_map_4[66, 66] = 0.9
 expected_4 = [[0.9, 66, 66]]
-TEST_CASES_2D_4 = [
-    {"spatial_dims": 2, "prob_threshold": 0.8, "box_size": 10},
-    probs_map_4,
-    expected_4,
-]
+TEST_CASES_2D_4 = [{"spatial_dims": 2, "prob_threshold": 0.8, "box_size": 10}, probs_map_4, expected_4]
 
 probs_map_5 = np.random.rand(100, 100).clip(0, 0.5)
 TEST_CASES_2D_5 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, probs_map_5, []]
@@ -63,21 +51,13 @@
 if torch.cuda.is_available():
     probs_map_7 = probs_map_7.cuda()
 expected_7 = [[0.9, 66, 66], [0.7, 33, 33]]
-TEST_CASES_2D_7 = [
-    {"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1},
-    probs_map_7,
-    expected_7,
-]
+TEST_CASES_2D_7 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, probs_map_7, expected_7]
 
 probs_map_3d = torch.rand([50, 50, 50]).uniform_(0, 0.5)
 probs_map_3d[25, 25, 25] = 0.7
 probs_map_3d[45, 45, 45] = 0.9
 expected_3d = [[0.9, 45, 45, 45], [0.7, 25, 25, 25]]
-TEST_CASES_3D = [
-    {"spatial_dims": 3, "prob_threshold": 0.5, "box_size": (10, 10, 10)},
-    probs_map_3d,
-    expected_3d,
-]
+TEST_CASES_3D = [{"spatial_dims": 3, "prob_threshold": 0.5, "box_size": (10, 10, 10)}, probs_map_3d, expected_3d]
 
 
 class TestProbNMS(unittest.TestCase):
diff --git a/tests/test_probnmsd.py b/tests/test_probnmsd.py
index 5b75d4310f..b0b3227531 100644
--- a/tests/test_probnmsd.py
+++ b/tests/test_probnmsd.py
@@ -45,11 +45,7 @@
 probs_map_4[33, 33] = 0.7
 probs_map_4[66, 66] = 0.9
 expected_4 = [[0.9, 66, 66]]
-TEST_CASES_2D_4 = [
-    {"spatial_dims": 2, "prob_threshold": 0.8, "box_size": 10},
-    {"prob_map": probs_map_4},
-    expected_4,
-]
+TEST_CASES_2D_4 = [{"spatial_dims": 2, "prob_threshold": 0.8, "box_size": 10}, {"prob_map": probs_map_4}, expected_4]
 
 probs_map_5 = np.random.rand(100, 100).clip(0, 0.5)
 TEST_CASES_2D_5 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, {"prob_map": probs_map_5}, []]
@@ -63,11 +59,7 @@
 if torch.cuda.is_available():
     probs_map_7 = probs_map_7.cuda()
 expected_7 = [[0.9, 66, 66], [0.7, 33, 33]]
-TEST_CASES_2D_7 = [
-    {"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1},
-    {"prob_map": probs_map_7},
-    expected_7,
-]
+TEST_CASES_2D_7 = [{"spatial_dims": 2, "prob_threshold": 0.5, "sigma": 0.1}, {"prob_map": probs_map_7}, expected_7]
 
 probs_map_3d = torch.rand([50, 50, 50]).uniform_(0, 0.5)
 probs_map_3d[25, 25, 25] = 0.7
diff --git a/tests/test_rand_adjust_contrast.py b/tests/test_rand_adjust_contrast.py
index d7d750957d..db408dda42 100644
--- a/tests/test_rand_adjust_contrast.py
+++ b/tests/test_rand_adjust_contrast.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAdjustContrast
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [(0.5, 4.5)]
 
@@ -26,14 +26,16 @@ class TestRandAdjustContrast(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_correct_results(self, gamma):
         adjuster = RandAdjustContrast(prob=1.0, gamma=gamma)
-        result = adjuster(self.imt)
-        epsilon = 1e-7
-        img_min = self.imt.min()
-        img_range = self.imt.max() - img_min
-        expected = (
-            np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range + img_min
-        )
-        np.testing.assert_allclose(expected, result, rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster(p(self.imt))
+            epsilon = 1e-7
+            img_min = self.imt.min()
+            img_range = self.imt.max() - img_min
+            expected = (
+                np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range
+                + img_min
+            )
+            assert_allclose(expected, result, rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_adjust_contrastd.py b/tests/test_rand_adjust_contrastd.py
index e4b61293bb..87a3752b26 100644
--- a/tests/test_rand_adjust_contrastd.py
+++ b/tests/test_rand_adjust_contrastd.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAdjustContrastd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [(0.5, 4.5)]
 
@@ -26,14 +26,16 @@ class TestRandAdjustContrastd(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_correct_results(self, gamma):
         adjuster = RandAdjustContrastd("img", prob=1.0, gamma=gamma)
-        result = adjuster({"img": self.imt})
-        epsilon = 1e-7
-        img_min = self.imt.min()
-        img_range = self.imt.max() - img_min
-        expected = (
-            np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range + img_min
-        )
-        np.testing.assert_allclose(expected, result["img"], rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster({"img": p(self.imt)})
+            epsilon = 1e-7
+            img_min = self.imt.min()
+            img_range = self.imt.max() - img_min
+            expected = (
+                np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.adjuster.gamma_value) * img_range
+                + img_min
+            )
+            assert_allclose(expected, result["img"], rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_affine.py b/tests/test_rand_affine.py
index 1e1a23bc09..4cef6b4d44 100644
--- a/tests/test_rand_affine.py
+++ b/tests/test_rand_affine.py
@@ -16,114 +16,130 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAffine
+from monai.utils.type_conversion import convert_data_type
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        dict(as_tensor_output=False, device=None),
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=-1),
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None),
-        {"img": torch.arange(27).reshape((3, 3, 3)), "spatial_size": (2, 2)},
-        np.array([[[2.0, 3.0], [5.0, 6.0]], [[11.0, 12.0], [14.0, 15.0]], [[20.0, 21.0], [23.0, 24.0]]]),
-    ],
-    [
-        dict(as_tensor_output=True, device=None),
-        {"img": torch.ones((1, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=True, device=None, spatial_size=(2, 2, 2), cache_grid=True),
-        {"img": torch.ones((1, 3, 3, 3))},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            padding_mode="zeros",
-            spatial_size=(2, 2, 2),
-            device=None,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "mode": "bilinear"},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            padding_mode="zeros",
-            spatial_size=(2, 2, 2),
-            cache_grid=True,
-            device=None,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "mode": "bilinear"},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=True,
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "spatial_size": (3, 3)},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            spatial_size=(3, 3),
-            cache_grid=True,
-            as_tensor_output=True,
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8))},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-]
+_rtol = 1e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [dict(device=device), {"img": p(torch.arange(27).reshape((3, 3, 3)))}, p(np.arange(27).reshape((3, 3, 3)))]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=-1),
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device),
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "spatial_size": (2, 2)},
+                p(np.array([[[2.0, 3.0], [5.0, 6.0]], [[11.0, 12.0], [14.0, 15.0]], [[20.0, 21.0], [23.0, 24.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device),
+                {"img": p(torch.ones((1, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2, 2), cache_grid=True),
+                {"img": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    padding_mode="zeros",
+                    spatial_size=(2, 2, 2),
+                    device=device,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "mode": "bilinear"},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    padding_mode="zeros",
+                    spatial_size=(2, 2, 2),
+                    cache_grid=True,
+                    device=device,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "mode": "bilinear"},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "spatial_size": (3, 3)},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    cache_grid=True,
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8)))},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
 
-ARR_NUMPY = np.arange(9 * 10).reshape(1, 9, 10)
-ARR_TORCH = torch.Tensor(ARR_NUMPY)
 TEST_CASES_SKIPPED_CONSISTENCY = []
-for im in (ARR_NUMPY, ARR_TORCH):
-    for as_tensor_output in (True, False):
-        for in_dtype_is_int in (True, False):
-            TEST_CASES_SKIPPED_CONSISTENCY.append((im, as_tensor_output, in_dtype_is_int))
+for p in TEST_NDARRAYS:
+    for in_dtype in (np.int32, np.float32):
+        TEST_CASES_SKIPPED_CONSISTENCY.append((p(np.arange(9 * 10).reshape(1, 9, 10)), in_dtype))
 
 
 class TestRandAffine(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affine(self, input_param, input_data, expected_val):
         g = RandAffine(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
         if input_param.get("cache_grid", False):
             self.assertTrue(g._cached_grid is not None)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4)
 
     def test_ill_cache(self):
         with self.assertWarns(UserWarning):
@@ -132,15 +148,11 @@ def test_ill_cache(self):
             RandAffine(cache_grid=True, spatial_size=(1, 1, -1))
 
     @parameterized.expand(TEST_CASES_SKIPPED_CONSISTENCY)
-    def test_skipped_transform_consistency(self, im, as_tensor_output, in_dtype_is_int):
-        t1 = RandAffine(prob=0, as_tensor_output=as_tensor_output)
-        t2 = RandAffine(prob=1, spatial_size=(10, 11), as_tensor_output=as_tensor_output)
+    def test_skipped_transform_consistency(self, im, in_dtype):
+        t1 = RandAffine(prob=0)
+        t2 = RandAffine(prob=1, spatial_size=(10, 11))
 
-        # change dtype to int32 or float32
-        if in_dtype_is_int:
-            im = im.astype("int32") if isinstance(im, np.ndarray) else im.int()
-        else:
-            im = im.astype("float32") if isinstance(im, np.ndarray) else im.float()
+        im, *_ = convert_data_type(im, dtype=in_dtype)
 
         out1 = t1(im)
         out2 = t2(im)
diff --git a/tests/test_rand_affine_grid.py b/tests/test_rand_affine_grid.py
index 605d0a30ba..ade615cd65 100644
--- a/tests/test_rand_affine_grid.py
+++ b/tests/test_rand_affine_grid.py
@@ -16,182 +16,194 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAffineGrid
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [{"as_tensor_output": False, "device": None}, {"grid": torch.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [
-        {"rotate_range": (1, 2), "translate_range": (3, 3, 3)},
-        {"grid": torch.arange(0, 27).reshape((3, 3, 3))},
-        torch.tensor(
-            np.array(
-                [
-                    [
-                        [-32.81998, -33.910976, -35.001972],
-                        [-36.092968, -37.183964, -38.27496],
-                        [-39.36596, -40.456955, -41.54795],
-                    ],
-                    [[2.1380205, 3.1015975, 4.0651755], [5.028752, 5.9923296, 6.955907], [7.919484, 8.883063, 9.84664]],
-                    [[18.0, 19.0, 20.0], [21.0, 22.0, 23.0], [24.0, 25.0, 26.0]],
-                ]
-            )
-        ),
-    ],
-    [
-        {"translate_range": (3, 3, 3), "as_tensor_output": False, "device": torch.device("cpu:0")},
-        {"spatial_size": (3, 3, 3)},
-        np.array(
+_rtol = 1e-1 if is_tf32_env else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append([{"device": device}, {"grid": p(torch.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append(
             [
-                [
-                    [
-                        [0.17881513, 0.17881513, 0.17881513],
-                        [0.17881513, 0.17881513, 0.17881513],
-                        [0.17881513, 0.17881513, 0.17881513],
-                    ],
-                    [
-                        [1.1788151, 1.1788151, 1.1788151],
-                        [1.1788151, 1.1788151, 1.1788151],
-                        [1.1788151, 1.1788151, 1.1788151],
-                    ],
-                    [
-                        [2.1788151, 2.1788151, 2.1788151],
-                        [2.1788151, 2.1788151, 2.1788151],
-                        [2.1788151, 2.1788151, 2.1788151],
-                    ],
-                ],
-                [
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                ],
-                [
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                ],
-                [
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                ],
-            ]
-        ),
-    ],
-    [
-        {"rotate_range": (1.0, 1.0, 1.0), "shear_range": (0.1,), "scale_range": (1.2,)},
-        {"grid": torch.arange(0, 108).reshape((4, 3, 3, 3))},
-        torch.tensor(
-            np.array(
-                [
-                    [
-                        [
-                            [-9.4201e00, -8.1672e00, -6.9143e00],
-                            [-5.6614e00, -4.4085e00, -3.1556e00],
-                            [-1.9027e00, -6.4980e-01, 6.0310e-01],
-                        ],
-                        [
-                            [1.8560e00, 3.1089e00, 4.3618e00],
-                            [5.6147e00, 6.8676e00, 8.1205e00],
-                            [9.3734e00, 1.0626e01, 1.1879e01],
-                        ],
+                {"rotate_range": (1, 2), "translate_range": (3, 3, 3)},
+                {"grid": p(torch.arange(0, 27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
                         [
-                            [1.3132e01, 1.4385e01, 1.5638e01],
-                            [1.6891e01, 1.8144e01, 1.9397e01],
-                            [2.0650e01, 2.1902e01, 2.3155e01],
-                        ],
-                    ],
-                    [
-                        [
-                            [9.9383e-02, -4.8845e-01, -1.0763e00],
-                            [-1.6641e00, -2.2519e00, -2.8398e00],
-                            [-3.4276e00, -4.0154e00, -4.6032e00],
-                        ],
-                        [
-                            [-5.1911e00, -5.7789e00, -6.3667e00],
-                            [-6.9546e00, -7.5424e00, -8.1302e00],
-                            [-8.7180e00, -9.3059e00, -9.8937e00],
-                        ],
-                        [
-                            [-1.0482e01, -1.1069e01, -1.1657e01],
-                            [-1.2245e01, -1.2833e01, -1.3421e01],
-                            [-1.4009e01, -1.4596e01, -1.5184e01],
-                        ],
-                    ],
+                            [
+                                [-32.81998, -33.910976, -35.001972],
+                                [-36.092968, -37.183964, -38.27496],
+                                [-39.36596, -40.456955, -41.54795],
+                            ],
+                            [
+                                [2.1380205, 3.1015975, 4.0651755],
+                                [5.028752, 5.9923296, 6.955907],
+                                [7.919484, 8.883063, 9.84664],
+                            ],
+                            [[18.0, 19.0, 20.0], [21.0, 22.0, 23.0], [24.0, 25.0, 26.0]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {"translate_range": (3, 3, 3), "device": device},
+                {"spatial_size": (3, 3, 3)},
+                np.array(
                     [
                         [
-                            [5.9635e01, 6.1199e01, 6.2764e01],
-                            [6.4328e01, 6.5892e01, 6.7456e01],
-                            [6.9021e01, 7.0585e01, 7.2149e01],
-                        ],
-                        [
-                            [7.3714e01, 7.5278e01, 7.6842e01],
-                            [7.8407e01, 7.9971e01, 8.1535e01],
-                            [8.3099e01, 8.4664e01, 8.6228e01],
+                            [
+                                [0.17881513, 0.17881513, 0.17881513],
+                                [0.17881513, 0.17881513, 0.17881513],
+                                [0.17881513, 0.17881513, 0.17881513],
+                            ],
+                            [
+                                [1.1788151, 1.1788151, 1.1788151],
+                                [1.1788151, 1.1788151, 1.1788151],
+                                [1.1788151, 1.1788151, 1.1788151],
+                            ],
+                            [
+                                [2.1788151, 2.1788151, 2.1788151],
+                                [2.1788151, 2.1788151, 2.1788151],
+                                [2.1788151, 2.1788151, 2.1788151],
+                            ],
                         ],
                         [
-                            [8.7792e01, 8.9357e01, 9.0921e01],
-                            [9.2485e01, 9.4049e01, 9.5614e01],
-                            [9.7178e01, 9.8742e01, 1.0031e02],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
                         ],
-                    ],
-                    [
                         [
-                            [8.1000e01, 8.2000e01, 8.3000e01],
-                            [8.4000e01, 8.5000e01, 8.6000e01],
-                            [8.7000e01, 8.8000e01, 8.9000e01],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
                         ],
                         [
-                            [9.0000e01, 9.1000e01, 9.2000e01],
-                            [9.3000e01, 9.4000e01, 9.5000e01],
-                            [9.6000e01, 9.7000e01, 9.8000e01],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
                         ],
+                    ]
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {"device": device, "rotate_range": (1.0, 1.0, 1.0), "shear_range": (0.1,), "scale_range": (1.2,)},
+                {"grid": p(torch.arange(0, 108).reshape((4, 3, 3, 3)))},
+                p(
+                    np.array(
                         [
-                            [9.9000e01, 1.0000e02, 1.0100e02],
-                            [1.0200e02, 1.0300e02, 1.0400e02],
-                            [1.0500e02, 1.0600e02, 1.0700e02],
-                        ],
-                    ],
-                ]
-            )
-        ),
-    ],
-]
+                            [
+                                [
+                                    [-9.4201e00, -8.1672e00, -6.9143e00],
+                                    [-5.6614e00, -4.4085e00, -3.1556e00],
+                                    [-1.9027e00, -6.4980e-01, 6.0310e-01],
+                                ],
+                                [
+                                    [1.8560e00, 3.1089e00, 4.3618e00],
+                                    [5.6147e00, 6.8676e00, 8.1205e00],
+                                    [9.3734e00, 1.0626e01, 1.1879e01],
+                                ],
+                                [
+                                    [1.3132e01, 1.4385e01, 1.5638e01],
+                                    [1.6891e01, 1.8144e01, 1.9397e01],
+                                    [2.0650e01, 2.1902e01, 2.3155e01],
+                                ],
+                            ],
+                            [
+                                [
+                                    [9.9383e-02, -4.8845e-01, -1.0763e00],
+                                    [-1.6641e00, -2.2519e00, -2.8398e00],
+                                    [-3.4276e00, -4.0154e00, -4.6032e00],
+                                ],
+                                [
+                                    [-5.1911e00, -5.7789e00, -6.3667e00],
+                                    [-6.9546e00, -7.5424e00, -8.1302e00],
+                                    [-8.7180e00, -9.3059e00, -9.8937e00],
+                                ],
+                                [
+                                    [-1.0482e01, -1.1069e01, -1.1657e01],
+                                    [-1.2245e01, -1.2833e01, -1.3421e01],
+                                    [-1.4009e01, -1.4596e01, -1.5184e01],
+                                ],
+                            ],
+                            [
+                                [
+                                    [5.9635e01, 6.1199e01, 6.2764e01],
+                                    [6.4328e01, 6.5892e01, 6.7456e01],
+                                    [6.9021e01, 7.0585e01, 7.2149e01],
+                                ],
+                                [
+                                    [7.3714e01, 7.5278e01, 7.6842e01],
+                                    [7.8407e01, 7.9971e01, 8.1535e01],
+                                    [8.3099e01, 8.4664e01, 8.6228e01],
+                                ],
+                                [
+                                    [8.7792e01, 8.9357e01, 9.0921e01],
+                                    [9.2485e01, 9.4049e01, 9.5614e01],
+                                    [9.7178e01, 9.8742e01, 1.0031e02],
+                                ],
+                            ],
+                            [
+                                [
+                                    [8.1000e01, 8.2000e01, 8.3000e01],
+                                    [8.4000e01, 8.5000e01, 8.6000e01],
+                                    [8.7000e01, 8.8000e01, 8.9000e01],
+                                ],
+                                [
+                                    [9.0000e01, 9.1000e01, 9.2000e01],
+                                    [9.3000e01, 9.4000e01, 9.5000e01],
+                                    [9.6000e01, 9.7000e01, 9.8000e01],
+                                ],
+                                [
+                                    [9.9000e01, 1.0000e02, 1.0100e02],
+                                    [1.0200e02, 1.0300e02, 1.0400e02],
+                                    [1.0500e02, 1.0600e02, 1.0700e02],
+                                ],
+                            ],
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestRandAffineGrid(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affine_grid(self, input_param, input_data, expected_val):
         g = RandAffineGrid(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data[device])
+        assert_allclose(result, expected_val, type_test=False, rtol=_rtol, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_affined.py b/tests/test_rand_affined.py
index d2f8a60665..e59a345d0d 100644
--- a/tests/test_rand_affined.py
+++ b/tests/test_rand_affined.py
@@ -17,179 +17,190 @@
 
 from monai.transforms import RandAffined
 from monai.utils import GridSampleMode
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=None, keys=("img", "seg")),
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=(2, 2), keys=("img", "seg")),
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=(2, 2), cache_grid=True, keys=("img", "seg")),
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=True, device=None, spatial_size=(2, 2, 2), keys=("img", "seg")),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            keys=("img", "seg"),
-            mode="bilinear",
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=False,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            cache_grid=True,
-            keys=("img", "seg"),
-            mode="bilinear",
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        np.array([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=True,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=("bilinear", "nearest"),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            keys=("img", "seg"),
-            mode=GridSampleMode.BILINEAR,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            cache_grid=True,
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-]
+_rtol = 1e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=None, keys=("img", "seg")),
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2), keys=("img", "seg")),
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2), cache_grid=True, keys=("img", "seg")),
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2, 2), keys=("img", "seg")),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    spatial_size=(2, 2, 2),
+                    padding_mode="zeros",
+                    device=device,
+                    keys=("img", "seg"),
+                    mode="bilinear",
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=("bilinear", "nearest"),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    spatial_size=(2, 2, 2),
+                    padding_mode="zeros",
+                    device=device,
+                    keys=("img", "seg"),
+                    mode=GridSampleMode.BILINEAR,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    cache_grid=True,
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
 
 
 class TestRandAffined(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affined(self, input_param, input_data, expected_val):
         g = RandAffined(**input_param).set_random_state(123)
         res = g(input_data)
@@ -200,28 +211,20 @@ def test_rand_affined(self, input_param, input_data, expected_val):
             if "_transforms" in key:
                 continue
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=_rtol, atol=1e-3)
+
+        g.set_random_state(4)
+        res = g(input_data)
+        # affine should be tensor because the resampler only supports pytorch backend
+        self.assertTrue(isinstance(res["img_transforms"][0]["extra_info"]["affine"], torch.Tensor))
 
     def test_ill_cache(self):
         with self.assertWarns(UserWarning):
             # spatial size is None
-            RandAffined(
-                as_tensor_output=False, device=None, spatial_size=None, prob=1.0, cache_grid=True, keys=("img", "seg")
-            )
+            RandAffined(device=device, spatial_size=None, prob=1.0, cache_grid=True, keys=("img", "seg"))
         with self.assertWarns(UserWarning):
             # spatial size is dynamic
-            RandAffined(
-                as_tensor_output=False,
-                device=None,
-                spatial_size=(2, -1),
-                prob=1.0,
-                cache_grid=True,
-                keys=("img", "seg"),
-            )
+            RandAffined(device=device, spatial_size=(2, -1), prob=1.0, cache_grid=True, keys=("img", "seg"))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_axis_flip.py b/tests/test_rand_axis_flip.py
index c05c3a1e0d..1772ef4987 100644
--- a/tests/test_rand_axis_flip.py
+++ b/tests/test_rand_axis_flip.py
@@ -22,10 +22,8 @@ def test_correct_results(self):
         for p in TEST_NDARRAYS:
             flip = RandAxisFlip(prob=1.0)
             result = flip(p(self.imt[0]))
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, flip._axis))
-            assert_allclose(np.stack(expected), result)
+            expected = [np.flip(channel, flip._axis) for channel in self.imt[0]]
+            assert_allclose(result, p(np.stack(expected)))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_axis_flipd.py b/tests/test_rand_axis_flipd.py
index 7bef0baa63..8ccc9b35d7 100644
--- a/tests/test_rand_axis_flipd.py
+++ b/tests/test_rand_axis_flipd.py
@@ -23,10 +23,8 @@ def test_correct_results(self):
             flip = RandAxisFlipd(keys="img", prob=1.0)
             result = flip({"img": p(self.imt[0])})["img"]
 
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, flip._axis))
-            assert_allclose(np.stack(expected), result)
+            expected = [np.flip(channel, flip.flipper._axis) for channel in self.imt[0]]
+            assert_allclose(result, p(np.stack(expected)))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_random_bias_field.py b/tests/test_rand_bias_field.py
similarity index 66%
rename from tests/test_random_bias_field.py
rename to tests/test_rand_bias_field.py
index 5aeeb79874..ba755337d4 100644
--- a/tests/test_random_bias_field.py
+++ b/tests/test_rand_bias_field.py
@@ -12,29 +12,34 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandBiasField
 
-TEST_CASES_2D = [{}, (3, 32, 32)]
-TEST_CASES_3D = [{}, (3, 32, 32, 32)]
-TEST_CASES_2D_ZERO_RANGE = [{"coeff_range": (0.0, 0.0)}, (2, 3, 3)]
-TEST_CASES_2D_ONES = [{"coeff_range": (1.0, 1.0)}, np.asarray([[[7.389056, 0.1353353], [7.389056, 22026.46]]])]
+TEST_CASES_2D = [{"prob": 1.0}, (3, 32, 32)]
+TEST_CASES_3D = [{"prob": 1.0}, (3, 32, 32, 32)]
+TEST_CASES_2D_ZERO_RANGE = [{"prob": 1.0, "coeff_range": (0.0, 0.0)}, (2, 3, 3)]
+TEST_CASES_2D_ONES = [
+    {"prob": 1.0, "coeff_range": (1.0, 1.0)},
+    np.asarray([[[7.389056, 0.1353353], [7.389056, 22026.46]]]),
+]
 
 
 class TestRandBiasField(unittest.TestCase):
     @parameterized.expand([TEST_CASES_2D, TEST_CASES_3D])
     def test_output_shape(self, class_args, img_shape):
-        for degree in [1, 2, 3]:
-            bias_field = RandBiasField(degree=degree, **class_args)
-            img = np.random.rand(*img_shape)
-            output = bias_field(img)
-            np.testing.assert_equal(output.shape, img_shape)
-            np.testing.assert_equal(output.dtype, bias_field.dtype)
-
-            img_zero = np.zeros([*img_shape])
-            output_zero = bias_field(img_zero)
-            np.testing.assert_equal(output_zero, img_zero)
+        for fn in (np.random, torch):
+            for degree in [1, 2, 3]:
+                bias_field = RandBiasField(degree=degree, **class_args)
+                img = fn.rand(*img_shape)
+                output = bias_field(img)
+                np.testing.assert_equal(output.shape, img_shape)
+                self.assertTrue(output.dtype in (np.float32, torch.float32))
+
+                img_zero = np.zeros([*img_shape])
+                output_zero = bias_field(img_zero)
+                np.testing.assert_equal(output_zero, img_zero)
 
     @parameterized.expand([TEST_CASES_2D_ZERO_RANGE])
     def test_zero_range(self, class_args, img_shape):
diff --git a/tests/test_random_bias_fieldd.py b/tests/test_rand_bias_fieldd.py
similarity index 91%
rename from tests/test_random_bias_fieldd.py
rename to tests/test_rand_bias_fieldd.py
index aa2e206de9..b82d435f40 100644
--- a/tests/test_random_bias_fieldd.py
+++ b/tests/test_rand_bias_fieldd.py
@@ -16,11 +16,11 @@
 
 from monai.transforms import RandBiasFieldd
 
-TEST_CASES_2D = [{}, (3, 32, 32)]
-TEST_CASES_3D = [{}, (3, 32, 32, 32)]
-TEST_CASES_2D_ZERO_RANGE = [{"coeff_range": (0.0, 0.0)}, (3, 32, 32)]
+TEST_CASES_2D = [{"prob": 1.0}, (3, 32, 32)]
+TEST_CASES_3D = [{"prob": 1.0}, (3, 32, 32, 32)]
+TEST_CASES_2D_ZERO_RANGE = [{"prob": 1.0, "coeff_range": (0.0, 0.0)}, (3, 32, 32)]
 TEST_CASES_2D_ONES = [
-    {"coeff_range": (1.0, 1.0)},
+    {"prob": 1.0, "coeff_range": (1.0, 1.0)},
     np.asarray([[[7.3890562e00, 1.3533528e-01], [7.3890562e00, 2.2026465e04]]]),
 ]
 
diff --git a/tests/test_rand_coarse_dropout.py b/tests/test_rand_coarse_dropout.py
index 830832c2a5..db26ea3c7a 100644
--- a/tests/test_rand_coarse_dropout.py
+++ b/tests/test_rand_coarse_dropout.py
@@ -12,6 +12,7 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandCoarseDropout
@@ -52,12 +53,20 @@
     np.random.randint(0, 2, size=[3, 3, 3, 4]),
 ]
 
+TEST_CASE_7 = [
+    {"holes": 2, "spatial_size": [2, 2, 2], "dropout_holes": False, "fill_value": (3, 6), "prob": 1.0},
+    torch.randint(0, 2, size=[3, 3, 3, 4]),
+]
+
 
 class TestRandCoarseDropout(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
+    @parameterized.expand(
+        [TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7]
+    )
     def test_value(self, input_param, input_data):
         dropout = RandCoarseDropout(**input_param)
         result = dropout(input_data)
+        self.assertEqual(type(result), type(input_data))
         holes = input_param.get("holes")
         max_holes = input_param.get("max_holes")
         spatial_size = fall_back_tuple(input_param.get("spatial_size"), input_data.shape[1:])
diff --git a/tests/test_rand_coarse_dropoutd.py b/tests/test_rand_coarse_dropoutd.py
index fc898a9fca..ebb090e378 100644
--- a/tests/test_rand_coarse_dropoutd.py
+++ b/tests/test_rand_coarse_dropoutd.py
@@ -28,14 +28,7 @@
 ]
 
 TEST_CASE_2 = [
-    {
-        "keys": "img",
-        "holes": 2,
-        "spatial_size": [2, 2, 2],
-        "fill_value": 5,
-        "max_spatial_size": [4, 4, 3],
-        "prob": 1.0,
-    },
+    {"keys": "img", "holes": 2, "spatial_size": [2, 2, 2], "fill_value": 5, "max_spatial_size": [4, 4, 3], "prob": 1.0},
     {"img": np.random.randint(0, 2, size=[3, 3, 3, 4])},
 ]
 
diff --git a/tests/test_rand_coarse_shuffle.py b/tests/test_rand_coarse_shuffle.py
new file mode 100644
index 0000000000..0262fe2b3a
--- /dev/null
+++ b/tests/test_rand_coarse_shuffle.py
@@ -0,0 +1,62 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+from parameterized import parameterized
+
+from monai.transforms import RandCoarseShuffle
+
+TEST_CASES = [
+    [
+        {"holes": 5, "spatial_size": 1, "max_spatial_size": -1, "prob": 0.0},
+        {"img": np.arange(8).reshape((1, 2, 2, 2))},
+        np.arange(8).reshape((1, 2, 2, 2)),
+    ],
+    [
+        {"holes": 10, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(27).reshape((1, 3, 3, 3))},
+        np.asarray(
+            [
+                [
+                    [[8, 19, 26], [24, 6, 15], [0, 13, 25]],
+                    [[17, 3, 5], [10, 1, 12], [22, 4, 11]],
+                    [[21, 20, 23], [14, 2, 16], [18, 9, 7]],
+                ]
+            ]
+        ),
+    ],
+    [
+        {"holes": 2, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(16).reshape((2, 2, 2, 2))},
+        np.asarray([[[[6, 1], [4, 3]], [[0, 2], [7, 5]]], [[[14, 10], [9, 8]], [[12, 15], [13, 11]]]]),
+    ],
+    [
+        {"holes": 2, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": torch.arange(16).reshape((2, 2, 2, 2))},
+        torch.as_tensor([[[[6, 1], [4, 3]], [[0, 2], [7, 5]]], [[[14, 10], [9, 8]], [[12, 15], [13, 11]]]]),
+    ],
+]
+
+
+class TestRandCoarseShuffle(unittest.TestCase):
+    @parameterized.expand(TEST_CASES)
+    def test_shuffle(self, input_param, input_data, expected_val):
+        g = RandCoarseShuffle(**input_param)
+        g.set_random_state(seed=12)
+        result = g(**input_data)
+        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_coarse_shuffled.py b/tests/test_rand_coarse_shuffled.py
new file mode 100644
index 0000000000..ad49c8d02d
--- /dev/null
+++ b/tests/test_rand_coarse_shuffled.py
@@ -0,0 +1,56 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCoarseShuffled
+
+TEST_CASES = [
+    [
+        {"keys": "img", "holes": 5, "spatial_size": 1, "max_spatial_size": -1, "prob": 0.0},
+        {"img": np.arange(8).reshape((1, 2, 2, 2))},
+        np.arange(8).reshape((1, 2, 2, 2)),
+    ],
+    [
+        {"keys": "img", "holes": 10, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(27).reshape((1, 3, 3, 3))},
+        np.asarray(
+            [
+                [
+                    [[8, 19, 26], [24, 6, 15], [0, 13, 25]],
+                    [[17, 3, 5], [10, 1, 12], [22, 4, 11]],
+                    [[21, 20, 23], [14, 2, 16], [18, 9, 7]],
+                ]
+            ]
+        ),
+    ],
+    [
+        {"keys": "img", "holes": 2, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(16).reshape((2, 2, 2, 2))},
+        np.asarray([[[[6, 1], [4, 3]], [[0, 2], [7, 5]]], [[[14, 10], [9, 8]], [[12, 15], [13, 11]]]]),
+    ],
+]
+
+
+class TestRandCoarseShuffled(unittest.TestCase):
+    @parameterized.expand(TEST_CASES)
+    def test_shuffle(self, input_param, input_data, expected_val):
+        g = RandCoarseShuffled(**input_param)
+        g.set_random_state(seed=12)
+        result = g(input_data)
+        np.testing.assert_allclose(result["img"], expected_val, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_crop_by_label_classes.py b/tests/test_rand_crop_by_label_classes.py
index b21f971042..c987c3f0fd 100644
--- a/tests/test_rand_crop_by_label_classes.py
+++ b/tests/test_rand_crop_by_label_classes.py
@@ -15,68 +15,121 @@
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndices, RandCropByLabelClasses
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
+TESTS_INDICES, TESTS_SHAPE = [], []
+for p in TEST_NDARRAYS:
     # One-Hot label
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "num_classes": None,
-        "spatial_size": [2, 2, -1],
-        "ratios": [1, 1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 3),
-]
+    TESTS_INDICES.append(
+        [
+            {
+                "label": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "num_classes": None,
+                "spatial_size": [2, 2, -1],
+                "ratios": [1, 1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            list,
+            (3, 2, 2, 3),
+        ]
+    )
 
-TEST_CASE_1 = [
-    # Argmax label
-    {
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS_INDICES.append(
+        [
+            # Argmax label
+            {
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
 
-TEST_CASE_2 = [
-    # provide label at runtime
-    {
-        "label": None,
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS_SHAPE.append(
+        [
+            # provide label at runtime
+            {
+                "label": None,
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS_SHAPE.append(
+        [
+            # provide label at runtime
+            {
+                "label": None,
+                "num_classes": 2,
+                "spatial_size": [4, 4, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+                "allow_smaller": True,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 3, 3, 2),
+        ]
+    )
+    TESTS_SHAPE.append(
+        [
+            # provide label at runtime
+            {
+                "label": None,
+                "num_classes": 2,
+                "spatial_size": [4, 4, 4],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+                "allow_smaller": True,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 3, 3, 3),
+        ]
+    )
 
 
 class TestRandCropByLabelClasses(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS_INDICES + TESTS_SHAPE)
     def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
         result = RandCropByLabelClasses(**input_param)(**input_data)
         self.assertIsInstance(result, expected_type)
         self.assertTupleEqual(result[0].shape, expected_shape)
 
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TESTS_INDICES)
     def test_indices(self, input_param, input_data, expected_type, expected_shape):
         input_param["indices"] = ClassesToIndices(num_classes=input_param["num_classes"])(input_param["label"])
         result = RandCropByLabelClasses(**input_param)(**input_data)
diff --git a/tests/test_rand_crop_by_label_classesd.py b/tests/test_rand_crop_by_label_classesd.py
index 829096953b..e51413a8d0 100644
--- a/tests/test_rand_crop_by_label_classesd.py
+++ b/tests/test_rand_crop_by_label_classesd.py
@@ -15,52 +15,107 @@
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndicesd, RandCropByLabelClassesd
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    # One-Hot label
-    {
-        "keys": "img",
-        "label_key": "label",
-        "num_classes": None,
-        "spatial_size": [2, 2, -1],
-        "ratios": [1, 1, 1],
-        "num_samples": 2,
-        "image_key": "image",
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 3),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            # One-Hot label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": None,
+                "spatial_size": [2, 2, -1],
+                "ratios": [1, 1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 3),
+        ]
+    )
 
-TEST_CASE_1 = [
-    # Argmax label
-    {
-        "keys": "img",
-        "label_key": "label",
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image_key": "image",
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS.append(
+        [
+            # Argmax label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
+
+    TESTS.append(
+        [
+            # Argmax label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": 2,
+                "spatial_size": [4, 4, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+                "allow_smaller": True,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+            },
+            list,
+            (3, 3, 3, 2),
+        ]
+    )
+
+    TESTS.append(
+        [
+            # Argmax label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": 2,
+                "spatial_size": [4, 4, 4],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+                "allow_smaller": True,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+            },
+            list,
+            (3, 3, 3, 3),
+        ]
+    )
 
 
 class TestRandCropByLabelClassesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
         result = RandCropByLabelClassesd(**input_param)(input_data)
         self.assertIsInstance(result, expected_type)
diff --git a/tests/test_rand_crop_by_pos_neg_label.py b/tests/test_rand_crop_by_pos_neg_label.py
index e0f669ab3f..42a72ccf2b 100644
--- a/tests/test_rand_crop_by_pos_neg_label.py
+++ b/tests/test_rand_crop_by_pos_neg_label.py
@@ -10,68 +10,123 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
 import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandCropByPosNegLabel
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "spatial_size": [2, 2, -1],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 3),
-]
+TESTS = []
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [2, 2, -1],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 2, 2, 3),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 2, 2, 2),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": None,
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {
+            "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+        },
+        (3, 2, 2, 2),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [4, 4, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "allow_smaller": True,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 3, 3, 2),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [4, 4, 4],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "allow_smaller": True,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 3, 3, 3),
+    ]
+)
 
-TEST_CASE_1 = [
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 2),
-]
 
-TEST_CASE_2 = [
-    {
-        "label": None,
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+class TestRandCropByPosNegLabel(unittest.TestCase):
+    @staticmethod
+    def convert_data_type(im_type, d, keys=("img", "image", "label")):
+        out = deepcopy(d)
+        for k, v in out.items():
+            if k in keys and isinstance(v, np.ndarray):
+                out[k] = im_type(v)
+        return out
 
+    @parameterized.expand(TESTS)
+    def test_type_shape(self, input_param, input_data, expected_shape):
+        results = []
+        for p in TEST_NDARRAYS:
+            input_param_mod = self.convert_data_type(p, input_param)
+            input_data_mod = self.convert_data_type(p, input_data)
+            cropper = RandCropByPosNegLabel(**input_param_mod)
+            cropper.set_random_state(0)
+            result = cropper(**input_data_mod)
 
-class TestRandCropByPosNegLabel(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
-    def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
-        result = RandCropByPosNegLabel(**input_param)(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertTupleEqual(result[0].shape, expected_shape)
+            self.assertIsInstance(result, list)
+            self.assertTupleEqual(result[0].shape, expected_shape)
+
+            # check for same results across numpy, torch.Tensor and torch.cuda.Tensor
+            result = np.asarray([i if isinstance(i, np.ndarray) else i.cpu().numpy() for i in result])
+            results.append(np.asarray(result))
+            if len(results) > 1:
+                np.testing.assert_allclose(results[0], results[-1])
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_crop_by_pos_neg_labeld.py b/tests/test_rand_crop_by_pos_neg_labeld.py
index 17a3e117bb..c200b8acac 100644
--- a/tests/test_rand_crop_by_pos_neg_labeld.py
+++ b/tests/test_rand_crop_by_pos_neg_labeld.py
@@ -10,90 +10,141 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
 import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandCropByPosNegLabeld
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [-1, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 3, 2, 2),
+TESTS = [
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [-1, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 3, 2, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 2, 2, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.zeros([3, 3, 3, 3]) - 1,
+            "extra": np.zeros([3, 3, 3, 3]),
+            "label": np.ones([3, 3, 3, 3]),
+            "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 2, 2, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [4, 4, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+            "allow_smaller": True,
+        },
+        {
+            "image": np.zeros([3, 3, 3, 3]) - 1,
+            "extra": np.zeros([3, 3, 3, 3]),
+            "label": np.ones([3, 3, 3, 3]),
+            "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 3, 3, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [4, 4, 4],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+            "allow_smaller": True,
+        },
+        {
+            "image": np.zeros([3, 3, 3, 3]) - 1,
+            "extra": np.zeros([3, 3, 3, 3]),
+            "label": np.ones([3, 3, 3, 3]),
+            "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 3, 3, 3),
+    ],
 ]
 
-TEST_CASE_1 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 2, 2, 2),
-]
 
-TEST_CASE_2 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.zeros([3, 3, 3, 3]) - 1,
-        "extra": np.zeros([3, 3, 3, 3]),
-        "label": np.ones([3, 3, 3, 3]),
-        "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 2, 2, 2),
-]
+class TestRandCropByPosNegLabeld(unittest.TestCase):
+    @staticmethod
+    def convert_data_type(im_type, d, keys=("img", "image", "label")):
+        out = deepcopy(d)
+        for k, v in out.items():
+            if k in keys and isinstance(v, np.ndarray):
+                out[k] = im_type(v)
+        return out
 
+    @parameterized.expand(TESTS)
+    def test_type_shape(self, input_param, input_data, expected_shape):
+        for p in TEST_NDARRAYS:
+            input_param_mod = self.convert_data_type(p, input_param)
+            input_data_mod = self.convert_data_type(p, input_data)
+            cropper = RandCropByPosNegLabeld(**input_param_mod)
+            cropper.set_random_state(0)
+            result = cropper(input_data_mod)
 
-class TestRandCropByPosNegLabeld(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
-    def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
-        result = RandCropByPosNegLabeld(**input_param)(input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertTupleEqual(result[0]["image"].shape, expected_shape)
-        self.assertTupleEqual(result[0]["extra"].shape, expected_shape)
-        self.assertTupleEqual(result[0]["label"].shape, expected_shape)
-        _len = len(tuple(input_data.keys()))
-        self.assertTupleEqual(tuple(result[0].keys())[:_len], tuple(input_data.keys()))
-        for i, item in enumerate(result):
-            self.assertEqual(item["image_meta_dict"]["patch_index"], i)
-            self.assertEqual(item["label_meta_dict"]["patch_index"], i)
-            self.assertEqual(item["extra_meta_dict"]["patch_index"], i)
+            self.assertIsInstance(result, list)
+
+            _len = len(tuple(input_data.keys()))
+            self.assertTupleEqual(tuple(result[0].keys())[:_len], tuple(input_data.keys()))
+            for k in ("image", "extra", "label"):
+                self.assertTupleEqual(result[0][k].shape, expected_shape)
+                for i, item in enumerate(result):
+                    self.assertEqual(item[k + "_meta_dict"]["patch_index"], i)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_cucim_dict_transform.py b/tests/test_rand_cucim_dict_transform.py
new file mode 100644
index 0000000000..c084331e0e
--- /dev/null
+++ b/tests/test_rand_cucim_dict_transform.py
@@ -0,0 +1,185 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCuCIMd
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_RAND_ROTATE_1 = [
+    {"name": "rand_image_rotate_90", "prob": 1.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_RAND_ROTATE_2 = [
+    {"name": "rand_image_rotate_90", "prob": 0.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_1 = [
+    {"name": "rand_zoom", "prob": 1.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_2 = [
+    {"name": "rand_zoom", "prob": 0.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestRandCuCIMDict(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        input = {"image": input}
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = {"image": input[cp.newaxis, ...]}
+        expected = expected[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = {"image": cp.asarray(input)}
+        expected = cp.asarray(expected)
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = {"image": cp.asarray(input)[cp.newaxis, ...]}
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_cucim_transform.py b/tests/test_rand_cucim_transform.py
new file mode 100644
index 0000000000..907bc35e01
--- /dev/null
+++ b/tests/test_rand_cucim_transform.py
@@ -0,0 +1,184 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCuCIM
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_RAND_ROTATE_1 = [
+    {"name": "rand_image_rotate_90", "prob": 1.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_RAND_ROTATE_2 = [
+    {"name": "rand_image_rotate_90", "prob": 0.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_1 = [
+    {"name": "rand_zoom", "prob": 1.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_2 = [
+    {"name": "rand_zoom", "prob": 0.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestRandCuCIM(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = input[cp.newaxis, ...]
+        expected = expected[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = cp.asarray(input)
+        expected = cp.asarray(expected)
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = cp.asarray(input)[cp.newaxis, ...]
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_deform_grid.py b/tests/test_rand_deform_grid.py
index 7c12c263d2..4725e28339 100644
--- a/tests/test_rand_deform_grid.py
+++ b/tests/test_rand_deform_grid.py
@@ -12,10 +12,10 @@
 import unittest
 
 import numpy as np
-import torch
 from parameterized import parameterized
 
 from monai.transforms import RandDeformGrid
+from tests.utils import assert_allclose
 
 TEST_CASES = [
     [
@@ -129,11 +129,7 @@ def test_rand_deform_grid(self, input_param, input_data, expected_val):
         g = RandDeformGrid(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, type_test=False, rtol=1e-3, atol=1e-3)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elastic_2d.py b/tests/test_rand_elastic_2d.py
index fbfb7d5761..22920d0f35 100644
--- a/tests/test_rand_elastic_2d.py
+++ b/tests/test_rand_elastic_2d.py
@@ -16,90 +16,103 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand2DElastic
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "as_tensor_output": False, "device": None},
-        {"img": torch.ones((3, 3, 3)), "spatial_size": (2, 2)},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "as_tensor_output": False, "device": None},
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        {
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "padding_mode": "zeros",
-        },
-        {"img": torch.ones((3, 3, 3)), "spatial_size": (2, 2), "mode": "bilinear"},
-        np.array(
+_rtol = 5e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "device": device},
+                {"img": p(torch.ones((3, 3, 3))), "spatial_size": (2, 2)},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "device": device},
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
             [
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
+                {
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "device": device,
+                    "padding_mode": "zeros",
+                },
+                {"img": p(torch.ones((3, 3, 3))), "spatial_size": (2, 2), "mode": "bilinear"},
+                p(
+                    np.array(
+                        [
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "spacing": (1.0, 1.0),
-            "magnitude_range": (1.0, 1.0),
-            "scale_range": [1.2, 2.2],
-            "prob": 0.9,
-            "padding_mode": "border",
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        torch.tensor(
+        )
+        TESTS.append(
             [
-                [[3.0793, 2.6141], [4.0568, 5.9978]],
-                [[12.0793, 11.6141], [13.0568, 14.9978]],
-                [[21.0793, 20.6141], [22.0568, 23.9978]],
+                {
+                    "spacing": (1.0, 1.0),
+                    "magnitude_range": (1.0, 1.0),
+                    "scale_range": [1.2, 2.2],
+                    "prob": 0.9,
+                    "padding_mode": "border",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [[3.0793, 2.6141], [4.0568, 5.9978]],
+                            [[12.0793, 11.6141], [13.0568, 14.9978]],
+                            [[21.0793, 20.6141], [22.0568, 23.9978]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": "cuda" if torch.cuda.is_available() else "cpu",
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [[1.3584113, 1.9251312], [5.626623, 6.642721]],
-                [[10.358411, 10.925131], [14.626623, 15.642721]],
-                [[19.358412, 19.92513], [23.626623, 24.642721]],
+                {
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[1.3584113, 1.9251312], [5.626623, 6.642721]],
+                            [[10.358411, 10.925131], [14.626623, 15.642721]],
+                            [[19.358412, 19.92513], [23.626623, 24.642721]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-]
+        )
 
 
 class TestRand2DElastic(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_2d_elastic(self, input_param, input_data, expected_val):
         g = Rand2DElastic(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elastic_3d.py b/tests/test_rand_elastic_3d.py
index c63282d571..712049ec1a 100644
--- a/tests/test_rand_elastic_3d.py
+++ b/tests/test_rand_elastic_3d.py
@@ -16,69 +16,79 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand3DElastic
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        {
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(72).reshape((2, 3, 3, 4))},
-        np.arange(72).reshape((2, 3, 3, 4)),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        np.ones((2, 2, 2, 2)),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        np.array([[[[6.4939356, 7.50289], [9.518351, 10.522849]], [[15.512375, 16.523542], [18.531467, 19.53646]]]]),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": False,
-            "device": "cuda" if torch.cuda.is_available() else "cpu",
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "mode": "bilinear"},
-        np.array([[[[5.0069294, 9.463932], [9.287769, 13.739735]], [[12.319424, 16.777205], [16.594296, 21.045748]]]]),
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(72).reshape((2, 3, 3, 4)))},
+                p(np.arange(72).reshape((2, 3, 3, 4))),
+            ]
+        )
+        TESTS.append(
+            [
+                {"magnitude_range": (0.3, 2.3), "sigma_range": (1.0, 20.0), "prob": 0.0, "device": device},
+                {"img": p(torch.ones((2, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(np.ones((2, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {"magnitude_range": (0.3, 0.3), "sigma_range": (1.0, 2.0), "prob": 0.9, "device": device},
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[6.4939356, 7.50289], [9.518351, 10.522849]],
+                                [[15.512375, 16.523542], [18.531467, 19.53646]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "mode": "bilinear"},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[5.0069294, 9.463932], [9.287769, 13.739735]],
+                                [[12.319424, 16.777205], [16.594296, 21.045748]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestRand3DElastic(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_3d_elastic(self, input_param, input_data, expected_val):
         g = Rand3DElastic(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elasticd_2d.py b/tests/test_rand_elasticd_2d.py
index f8eb026088..77e6489d50 100644
--- a/tests/test_rand_elasticd_2d.py
+++ b/tests/test_rand_elasticd_2d.py
@@ -16,127 +16,149 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand2DElasticd
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.3, 0.3),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(4).reshape((1, 2, 2)), "seg": torch.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape((1, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "padding_mode": "zeros",
-            "device": None,
-            "spatial_size": (2, 2),
-            "mode": "bilinear",
-        },
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.array(
+_rtol = 5e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.3, 0.3),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(4).reshape((1, 2, 2))), "seg": p(torch.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape((1, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "padding_mode": "zeros",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                    "mode": "bilinear",
+                },
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (1.0, 1.0),
+                    "magnitude_range": (1.0, 1.0),
+                    "scale_range": [1.2, 2.2],
+                    "prob": 0.9,
+                    "padding_mode": "border",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [[3.0793, 2.6141], [4.0568, 5.9978]],
+                            [[12.0793, 11.6141], [13.0568, 14.9978]],
+                            [[21.0793, 20.6141], [22.0568, 23.9978]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (1.0, 1.0),
-            "magnitude_range": (1.0, 1.0),
-            "scale_range": [1.2, 2.2],
-            "prob": 0.9,
-            "padding_mode": "border",
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        torch.tensor(
+        )
+        TESTS.append(
             [
-                [[3.0793, 2.6141], [4.0568, 5.9978]],
-                [[12.0793, 11.6141], [13.0568, 14.9978]],
-                [[21.0793, 20.6141], [22.0568, 23.9978]],
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[1.3584113, 1.9251312], [5.626623, 6.642721]],
+                            [[10.358411, 10.925131], [14.626623, 15.642721]],
+                            [[19.358412, 19.92513], [23.626623, 24.642721]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [[1.3584113, 1.9251312], [5.626623, 6.642721]],
-                [[10.358411, 10.925131], [14.626623, 15.642721]],
-                [[19.358412, 19.92513], [23.626623, 24.642721]],
+                {
+                    "keys": ("img", "seg"),
+                    "mode": ("bilinear", "nearest"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                {
+                    "img": p(
+                        torch.tensor(
+                            [
+                                [[1.3584, 1.9251], [5.6266, 6.6427]],
+                                [[10.3584, 10.9251], [14.6266, 15.6427]],
+                                [[19.3584, 19.9251], [23.6266, 24.6427]],
+                            ]
+                        )
+                    ),
+                    "seg": p(
+                        torch.tensor(
+                            [[[0.0, 2.0], [6.0, 8.0]], [[9.0, 11.0], [15.0, 17.0]], [[18.0, 20.0], [24.0, 26.0]]]
+                        )
+                    ),
+                },
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "mode": ("bilinear", "nearest"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        {
-            "img": torch.tensor(
-                [
-                    [[1.3584, 1.9251], [5.6266, 6.6427]],
-                    [[10.3584, 10.9251], [14.6266, 15.6427]],
-                    [[19.3584, 19.9251], [23.6266, 24.6427]],
-                ]
-            ),
-            "seg": torch.tensor([[[0.0, 2.0], [6.0, 8.0]], [[9.0, 11.0], [15.0, 17.0]], [[18.0, 20.0], [24.0, 26.0]]]),
-        },
-    ],
-]
+        )
 
 
 class TestRand2DElasticd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_2d_elasticd(self, input_param, input_data, expected_val):
         g = Rand2DElasticd(**input_param)
         g.set_random_state(123)
@@ -144,11 +166,7 @@ def test_rand_2d_elasticd(self, input_param, input_data, expected_val):
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=_rtol, atol=5e-3)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elasticd_3d.py b/tests/test_rand_elasticd_3d.py
index 47ab814882..5f8a5f47ed 100644
--- a/tests/test_rand_elasticd_3d.py
+++ b/tests/test_rand_elasticd_3d.py
@@ -16,98 +16,128 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand3DElasticd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "seg": torch.ones((2, 3, 3, 3))},
-        np.ones((2, 2, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, -1, -1),
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "seg": torch.ones((2, 3, 3, 3))},
-        np.ones((2, 2, 3, 3)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(8).reshape((1, 2, 2, 2)), "seg": torch.arange(8).reshape((1, 2, 2, 2))},
-        np.arange(8).reshape((1, 2, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        np.array([[[[6.4939356, 7.50289], [9.518351, 10.522849]], [[15.512375, 16.523542], [18.531467, 19.53646]]]]),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-            "mode": "bilinear",
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        np.array([[[[5.0069294, 9.463932], [9.287769, 13.739735]], [[12.319424, 16.777205], [16.594296, 21.045748]]]]),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "mode": ("bilinear", "nearest"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        {
-            "img": torch.tensor([[[[5.0069, 9.4639], [9.2878, 13.7397]], [[12.3194, 16.7772], [16.5943, 21.0457]]]]),
-            "seg": torch.tensor([[[[4.0, 14.0], [7.0, 14.0]], [[9.0, 19.0], [12.0, 22.0]]]]),
-        },
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.ones((2, 3, 3, 3))), "seg": p(torch.ones((2, 3, 3, 3)))},
+                p(np.ones((2, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, -1, -1),
+                },
+                {"img": p(torch.ones((2, 3, 3, 3))), "seg": p(torch.ones((2, 3, 3, 3)))},
+                p(np.ones((2, 2, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(8).reshape((1, 2, 2, 2))), "seg": p(torch.arange(8).reshape((1, 2, 2, 2)))},
+                p(np.arange(8).reshape((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[6.4939356, 7.50289], [9.518351, 10.522849]],
+                                [[15.512375, 16.523542], [18.531467, 19.53646]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                    "mode": "bilinear",
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[5.0069294, 9.463932], [9.287769, 13.739735]],
+                                [[12.319424, 16.777205], [16.594296, 21.045748]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "mode": ("bilinear", "nearest"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                {
+                    "img": p(
+                        torch.tensor(
+                            [[[[5.0069, 9.4639], [9.2878, 13.7397]], [[12.3194, 16.7772], [16.5943, 21.0457]]]]
+                        )
+                    ),
+                    "seg": p(torch.tensor([[[[4.0, 14.0], [7.0, 14.0]], [[9.0, 19.0], [12.0, 22.0]]]])),
+                },
+            ]
+        )
 
 
 class TestRand3DElasticd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_3d_elasticd(self, input_param, input_data, expected_val):
         g = Rand3DElasticd(**input_param)
         g.set_random_state(123)
@@ -115,11 +145,7 @@ def test_rand_3d_elasticd(self, input_param, input_data, expected_val):
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_flip.py b/tests/test_rand_flip.py
index b3c514cb1f..df49d60861 100644
--- a/tests/test_rand_flip.py
+++ b/tests/test_rand_flip.py
@@ -34,12 +34,10 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             im = p(self.imt[0])
             flip = RandFlip(prob=1.0, spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip(im)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_flipd.py b/tests/test_rand_flipd.py
index 8972024fd8..c2869537cb 100644
--- a/tests/test_rand_flipd.py
+++ b/tests/test_rand_flipd.py
@@ -26,11 +26,9 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             flip = RandFlipd(keys="img", prob=1.0, spatial_axis=spatial_axis)
             result = flip({"img": p(self.imt[0])})["img"]
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_noised.py b/tests/test_rand_gaussian_noised.py
index 4b0d2a311a..d9fd5d023d 100644
--- a/tests/test_rand_gaussian_noised.py
+++ b/tests/test_rand_gaussian_noised.py
@@ -34,9 +34,11 @@ def test_correct_results(self, _, im_type, keys, mean, std):
         im = im_type(self.imt)
         noised = gaussian_fn({k: im for k in keys})
         np.random.seed(seed)
+        # simulate the randomize() of transform
         np.random.random()
+        noise = np.random.normal(mean, np.random.uniform(0, std), size=self.imt.shape)
         for k in keys:
-            expected = self.imt + np.random.normal(mean, np.random.uniform(0, std), size=self.imt.shape)
+            expected = self.imt + noise
             self.assertEqual(type(im), type(noised[k]))
             if isinstance(noised[k], torch.Tensor):
                 noised[k] = noised[k].cpu()
diff --git a/tests/test_rand_gaussian_sharpen.py b/tests/test_rand_gaussian_sharpen.py
index 909f96f56b..4804fc2422 100644
--- a/tests/test_rand_gaussian_sharpen.py
+++ b/tests/test_rand_gaussian_sharpen.py
@@ -11,88 +11,127 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSharpen
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[5.2919216, 5.5854445, 5.29192], [11.3982, 12.62332, 11.398202], [14.870525, 17.323769, 14.870527]],
-            [[20.413757, 22.767355, 20.413757], [28.495504, 31.558315, 28.495499], [29.99236, 34.505676, 29.992361]],
+            {"prob": 1.0},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [5.2919216, 5.5854445, 5.29192],
+                        [11.3982, 12.62332, 11.398202],
+                        [14.870525, 17.323769, 14.870527],
+                    ],
+                    [
+                        [20.413757, 22.767355, 20.413757],
+                        [28.495504, 31.558315, 28.495499],
+                        [29.99236, 34.505676, 29.992361],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": 0.4,
-        "sigma2_y": 0.4,
-        "sigma2_z": 0.4,
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.1071496, 3.597953, 4.1071477], [10.062014, 9.825114, 10.0620165], [14.698058, 15.818766, 14.698058]],
-            [[18.211048, 18.16049, 18.211048], [25.155039, 24.56279, 25.155039], [28.801964, 30.381308, 28.801964]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": 0.4,
+                "sigma2_y": 0.4,
+                "sigma2_z": 0.4,
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.1071496, 3.597953, 4.1071477],
+                        [10.062014, 9.825114, 10.0620165],
+                        [14.698058, 15.818766, 14.698058],
+                    ],
+                    [
+                        [18.211048, 18.16049, 18.211048],
+                        [25.155039, 24.56279, 25.155039],
+                        [28.801964, 30.381308, 28.801964],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.81077, 4.4237204, 4.81077], [12.061236, 12.298177, 12.061236], [17.362553, 19.201174, 17.362553]],
-            [[21.440754, 22.142393, 21.440754], [30.15308, 30.745445, 30.153086], [33.99255, 36.919838, 33.99255]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.81077, 4.4237204, 4.81077],
+                        [12.061236, 12.298177, 12.061236],
+                        [17.362553, 19.201174, 17.362553],
+                    ],
+                    [
+                        [21.440754, 22.142393, 21.440754],
+                        [30.15308, 30.745445, 30.153086],
+                        [33.99255, 36.919838, 33.99255],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_4 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "approx": "scalespace",
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.430213, 3.2278745, 4.4302144], [10.325399, 8.507457, 10.325399], [17.494898, 16.5609, 17.494894]],
-            [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "approx": "scalespace",
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.430213, 3.2278745, 4.4302144],
+                        [10.325399, 8.507457, 10.325399],
+                        [17.494898, 16.5609, 17.494894],
+                    ],
+                    [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSharpen(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSharpen(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_sharpend.py b/tests/test_rand_gaussian_sharpend.py
index 9ba29ee71b..3508ebaa19 100644
--- a/tests/test_rand_gaussian_sharpend.py
+++ b/tests/test_rand_gaussian_sharpend.py
@@ -15,87 +15,126 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSharpend
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[5.2919216, 5.5854445, 5.29192], [11.3982, 12.62332, 11.398202], [14.870525, 17.323769, 14.870527]],
-            [[20.413757, 22.767355, 20.413757], [28.495504, 31.558315, 28.495499], [29.99236, 34.505676, 29.992361]],
+            {"keys": "img", "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [5.2919216, 5.5854445, 5.29192],
+                        [11.3982, 12.62332, 11.398202],
+                        [14.870525, 17.323769, 14.870527],
+                    ],
+                    [
+                        [20.413757, 22.767355, 20.413757],
+                        [28.495504, 31.558315, 28.495499],
+                        [29.99236, 34.505676, 29.992361],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": 0.4,
-        "sigma2_y": 0.4,
-        "sigma2_z": 0.4,
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.1071496, 3.597953, 4.1071477], [10.062014, 9.825114, 10.0620165], [14.698058, 15.818766, 14.698058]],
-            [[18.211048, 18.16049, 18.211048], [25.155039, 24.56279, 25.155039], [28.801964, 30.381308, 28.801964]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": 0.4,
+                "sigma2_y": 0.4,
+                "sigma2_z": 0.4,
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.1071496, 3.597953, 4.1071477],
+                        [10.062014, 9.825114, 10.0620165],
+                        [14.698058, 15.818766, 14.698058],
+                    ],
+                    [
+                        [18.211048, 18.16049, 18.211048],
+                        [25.155039, 24.56279, 25.155039],
+                        [28.801964, 30.381308, 28.801964],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.81077, 4.4237204, 4.81077], [12.061236, 12.298177, 12.061236], [17.362553, 19.201174, 17.362553]],
-            [[21.440754, 22.142393, 21.440754], [30.15308, 30.745445, 30.153086], [33.99255, 36.919838, 33.99255]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.81077, 4.4237204, 4.81077],
+                        [12.061236, 12.298177, 12.061236],
+                        [17.362553, 19.201174, 17.362553],
+                    ],
+                    [
+                        [21.440754, 22.142393, 21.440754],
+                        [30.15308, 30.745445, 30.153086],
+                        [33.99255, 36.919838, 33.99255],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_4 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "approx": "scalespace",
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.430213, 3.2278745, 4.4302144], [10.325399, 8.507457, 10.325399], [17.494898, 16.5609, 17.494894]],
-            [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "approx": "scalespace",
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.430213, 3.2278745, 4.4302144],
+                        [10.325399, 8.507457, 10.325399],
+                        [17.494898, 16.5609, 17.494894],
+                    ],
+                    [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSharpend(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSharpend(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_smooth.py b/tests/test_rand_gaussian_smooth.py
index 889ed7d6d5..b4d4304b67 100644
--- a/tests/test_rand_gaussian_smooth.py
+++ b/tests/test_rand_gaussian_smooth.py
@@ -15,48 +15,81 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSmooth
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"sigma_x": (0.5, 1.5), "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[0.71806467, 0.9074683, 0.71806467], [1.0718315, 1.3545481, 1.0718315], [1.0337002, 1.306359, 1.0337002]],
-            [[2.0318885, 2.5678391, 2.0318885], [2.6795788, 3.3863702, 2.6795788], [2.3475242, 2.9667296, 2.3475242]],
+            {"sigma_x": (0.5, 1.5), "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.71806467, 0.9074683, 0.71806467],
+                        [1.0718315, 1.3545481, 1.0718315],
+                        [1.0337002, 1.306359, 1.0337002],
+                    ],
+                    [
+                        [2.0318885, 2.5678391, 2.0318885],
+                        [2.6795788, 3.3863702, 2.6795788],
+                        [2.3475242, 2.9667296, 2.3475242],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.7686928, 0.9848021, 0.7686928], [1.1474025, 1.4699818, 1.1474024], [1.1065826, 1.4176859, 1.1065826]],
-            [[2.1751494, 2.7866683, 2.1751497], [2.8685062, 3.6749542, 2.8685062], [2.5130394, 3.219552, 2.5130394]],
+            {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.7686928, 0.9848021, 0.7686928],
+                        [1.1474025, 1.4699818, 1.1474024],
+                        [1.1065826, 1.4176859, 1.1065826],
+                    ],
+                    [
+                        [2.1751494, 2.7866683, 2.1751497],
+                        [2.8685062, 3.6749542, 2.8685062],
+                        [2.5130394, 3.219552, 2.5130394],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8128456, 0.96736777, 0.8128456], [1.2742369, 1.5164697, 1.2742369], [1.2800367, 1.5233722, 1.2800368]],
-            [[2.3825073, 2.8354228, 2.3825073], [3.1855922, 3.7911744, 3.1855922], [2.8496985, 3.391427, 2.8496985]],
+            {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.8128456, 0.96736777, 0.8128456],
+                        [1.2742369, 1.5164697, 1.2742369],
+                        [1.2800367, 1.5233722, 1.2800368],
+                    ],
+                    [
+                        [2.3825073, 2.8354228, 2.3825073],
+                        [3.1855922, 3.7911744, 3.1855922],
+                        [2.8496985, 3.391427, 2.8496985],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSmooth(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSmooth(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_smoothd.py b/tests/test_rand_gaussian_smoothd.py
index 2eedc9071c..2c80b978f2 100644
--- a/tests/test_rand_gaussian_smoothd.py
+++ b/tests/test_rand_gaussian_smoothd.py
@@ -15,48 +15,81 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSmoothd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[0.71806467, 0.9074683, 0.71806467], [1.0718315, 1.3545481, 1.0718315], [1.0337002, 1.306359, 1.0337002]],
-            [[2.0318885, 2.5678391, 2.0318885], [2.6795788, 3.3863702, 2.6795788], [2.3475242, 2.9667296, 2.3475242]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.71806467, 0.9074683, 0.71806467],
+                        [1.0718315, 1.3545481, 1.0718315],
+                        [1.0337002, 1.306359, 1.0337002],
+                    ],
+                    [
+                        [2.0318885, 2.5678391, 2.0318885],
+                        [2.6795788, 3.3863702, 2.6795788],
+                        [2.3475242, 2.9667296, 2.3475242],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.7686928, 0.9848021, 0.7686928], [1.1474025, 1.4699818, 1.1474024], [1.1065826, 1.4176859, 1.1065826]],
-            [[2.1751494, 2.7866683, 2.1751497], [2.8685062, 3.6749542, 2.8685062], [2.5130394, 3.219552, 2.5130394]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.7686928, 0.9848021, 0.7686928],
+                        [1.1474025, 1.4699818, 1.1474024],
+                        [1.1065826, 1.4176859, 1.1065826],
+                    ],
+                    [
+                        [2.1751494, 2.7866683, 2.1751497],
+                        [2.8685062, 3.6749542, 2.8685062],
+                        [2.5130394, 3.219552, 2.5130394],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8128456, 0.96736777, 0.8128456], [1.2742369, 1.5164697, 1.2742369], [1.2800367, 1.5233722, 1.2800368]],
-            [[2.3825073, 2.8354228, 2.3825073], [3.1855922, 3.7911744, 3.1855922], [2.8496985, 3.391427, 2.8496985]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8128456, 0.96736777, 0.8128456],
+                        [1.2742369, 1.5164697, 1.2742369],
+                        [1.2800367, 1.5233722, 1.2800368],
+                    ],
+                    [
+                        [2.3825073, 2.8354228, 2.3825073],
+                        [3.1855922, 3.7911744, 3.1855922],
+                        [2.8496985, 3.391427, 2.8496985],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSmoothd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSmoothd(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gibbs_noise.py b/tests/test_rand_gibbs_noise.py
index a0701d09c3..15cadea0e2 100644
--- a/tests/test_rand_gibbs_noise.py
+++ b/tests/test_rand_gibbs_noise.py
@@ -19,17 +19,17 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandGibbsNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandGibbsNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -39,50 +39,50 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return input_type(im)
 
     @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_0_prob(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 1.0]
-        t = RandGibbsNoise(0.0, alpha, as_tensor_output)
+        t = RandGibbsNoise(0.0, alpha)
         out = t(im)
-        np.testing.assert_allclose(im, out)
+        torch.testing.assert_allclose(im, out, rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.8]
-        t = RandGibbsNoise(1.0, alpha, as_tensor_output)
+        t = RandGibbsNoise(1.0, alpha)
         t.set_random_state(42)
         out1 = t(deepcopy(im))
         t.set_random_state(42)
         out2 = t(deepcopy(im))
-        np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
+        torch.testing.assert_allclose(out1, out2, rtol=1e-7, atol=0)
+        self.assertIsInstance(out1, type(im))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.0, 0.0]
         t = RandGibbsNoise(1.0, alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(im, out, atol=1e-2)
+        torch.testing.assert_allclose(im, out, atol=1e-2, rtol=1e-7)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [1.0, 1.0]
         t = RandGibbsNoise(1.0, alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(0 * im, out)
+        torch.testing.assert_allclose(0 * im, out, rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.51]
         t = RandGibbsNoise(1.0, alpha)
         _ = t(deepcopy(im))
diff --git a/tests/test_rand_gibbs_noised.py b/tests/test_rand_gibbs_noised.py
index b778bffdda..ac5fc164e2 100644
--- a/tests/test_rand_gibbs_noised.py
+++ b/tests/test_rand_gibbs_noised.py
@@ -19,19 +19,19 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandGibbsNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 KEYS = ["im", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandGibbsNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,70 +41,76 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        return {k: input_type(v) for k, v in zip(KEYS, ims)}
 
     @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_0_prob(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 1.0]
-        t = RandGibbsNoised(KEYS, 0.0, alpha, as_tensor_output)
+        t = RandGibbsNoised(KEYS, 0.0, alpha)
         out = t(data)
         for k in KEYS:
-            np.testing.assert_allclose(data[k], out[k])
+            torch.testing.assert_allclose(data[k], out[k], rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.8]
-        t = RandGibbsNoised(KEYS, 1.0, alpha, as_tensor_output)
+        t = RandGibbsNoised(KEYS, 1.0, alpha)
         t.set_random_state(42)
         out1 = t(deepcopy(data))
         t.set_random_state(42)
         out2 = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
+            torch.testing.assert_allclose(out1[k], out2[k], rtol=1e-7, atol=0)
+            self.assertIsInstance(out1[k], type(data[k]))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.0, 0.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
             np.testing.assert_allclose(data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [1.0, 1.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(0 * data[k], out[k])
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
+            np.testing.assert_allclose(0.0 * data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_dict_matches(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         # use same image for both dictionary entries to check same trans is applied to them
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         alpha = [0.5, 1.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
-        np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
+        torch.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]], rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.51]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         _ = t(deepcopy(data))
-        self.assertGreaterEqual(t.sampled_alpha, 0.5)
-        self.assertLessEqual(t.sampled_alpha, 0.51)
+        self.assertTrue(0.5 <= t.rand_gibbs_noise.sampled_alpha <= 0.51)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_grid_distortion.py b/tests/test_rand_grid_distortion.py
new file mode 100644
index 0000000000..eabe6d34a1
--- /dev/null
+++ b/tests/test_rand_grid_distortion.py
@@ -0,0 +1,94 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandGridDistortion
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    seed = 0
+    TESTS.append(
+        [
+            dict(num_cells=2, prob=1.0, distort_limit=0.5, mode="nearest", padding_mode="zeros"),
+            seed,
+            p(np.indices([6, 6]).astype(np.float32)),
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [2.0, 2.0, 2.0, 2.0, 2.0, 0.0],
+                            [4.0, 4.0, 4.0, 4.0, 4.0, 0.0],
+                            [4.0, 4.0, 4.0, 4.0, 4.0, 0.0],
+                            [5.0, 5.0, 5.0, 5.0, 5.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                        [
+                            [0.0, 1.0, 3.0, 3.0, 4.0, 0.0],
+                            [0.0, 1.0, 3.0, 3.0, 4.0, 0.0],
+                            [0.0, 1.0, 3.0, 3.0, 4.0, 0.0],
+                            [0.0, 1.0, 3.0, 3.0, 4.0, 0.0],
+                            [0.0, 1.0, 3.0, 3.0, 4.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+    seed = 1
+    TESTS.append(
+        [
+            dict(num_cells=(2, 2), prob=1.0, distort_limit=0.1, mode="bilinear", padding_mode="reflection"),
+            seed,
+            p(np.indices([6, 6]).astype(np.float32)),
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [1.5660975, 1.5660975, 1.5660975, 1.5660975, 1.5660974, 1.5660975],
+                            [3.132195, 3.132195, 3.132195, 3.132195, 3.132195, 3.132195],
+                            [3.132195, 3.132195, 3.132195, 3.132195, 3.132195, 3.132195],
+                            [4.482229, 4.482229, 4.482229, 4.482229, 4.482229, 4.482229],
+                            [4.167737, 4.167737, 4.167737, 4.167737, 4.167737, 4.167737],
+                        ],
+                        [
+                            [0.0, 1.3940268, 2.7880535, 2.7880535, 4.1657553, 4.4565434],
+                            [0.0, 1.3940268, 2.7880535, 2.7880535, 4.1657553, 4.4565434],
+                            [0.0, 1.3940268, 2.7880535, 2.7880535, 4.1657553, 4.4565434],
+                            [0.0, 1.3940268, 2.7880535, 2.7880535, 4.1657553, 4.4565434],
+                            [0.0, 1.3940268, 2.7880535, 2.7880535, 4.1657553, 4.4565434],
+                            [0.0, 1.3940266, 2.7880538, 2.7880538, 4.1657557, 4.456543],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+        ]
+    )
+
+
+class TestRandGridDistortion(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_rand_grid_distortion(self, input_param, seed, input_data, expected_val):
+        g = RandGridDistortion(**input_param)
+        g.set_random_state(seed=seed)
+        result = g(input_data)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_grid_distortiond.py b/tests/test_rand_grid_distortiond.py
new file mode 100644
index 0000000000..835f38743c
--- /dev/null
+++ b/tests/test_rand_grid_distortiond.py
@@ -0,0 +1,88 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandGridDistortiond
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+num_cells = 2
+seed = 0
+for p in TEST_NDARRAYS:
+    img = np.indices([6, 6]).astype(np.float32)
+    TESTS.append(
+        [
+            dict(
+                keys=["img", "mask"],
+                num_cells=num_cells,
+                prob=1.0,
+                distort_limit=(-0.1, 0.1),
+                mode=["bilinear", "nearest"],
+                padding_mode="zeros",
+            ),
+            seed,
+            {"img": p(img), "mask": p(np.ones_like(img[:1]))},
+            p(
+                np.array(
+                    [
+                        [
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                            [1.5645568, 1.5645568, 1.5645568, 1.5645568, 1.5645568, 0.0],
+                            [3.1291137, 3.1291137, 3.1291137, 3.1291137, 3.1291137, 0.0],
+                            [3.1291137, 3.1291137, 3.1291137, 3.1291137, 3.1291137, 0.0],
+                            [4.6599426, 4.6599426, 4.6599426, 4.6599426, 4.6599426, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                        [
+                            [0.0, 1.4770963, 2.9541926, 2.9541926, 4.497961, 0.0],
+                            [0.0, 1.4770963, 2.9541926, 2.9541926, 4.497961, 0.0],
+                            [0.0, 1.4770963, 2.9541926, 2.9541926, 4.497961, 0.0],
+                            [0.0, 1.4770963, 2.9541926, 2.9541926, 4.497961, 0.0],
+                            [0.0, 1.4770963, 2.9541926, 2.9541926, 4.497961, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ],
+                    ]
+                ).astype(np.float32)
+            ),
+            p(
+                np.array(
+                    [
+                        [
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
+                            [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
+                            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
+                        ]
+                    ]
+                )
+            ),
+        ]
+    )
+
+
+class TestRandGridDistortiond(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_rand_grid_distortiond(self, input_param, seed, input_data, expected_val_img, expected_val_mask):
+        g = RandGridDistortiond(**input_param)
+        g.set_random_state(seed=seed)
+        result = g(input_data)
+        assert_allclose(result["img"], expected_val_img, rtol=1e-4, atol=1e-4)
+        assert_allclose(result["mask"], expected_val_mask, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_histogram_shift.py b/tests/test_rand_histogram_shift.py
index b258cc5a7e..e38e2ea5f8 100644
--- a/tests/test_rand_histogram_shift.py
+++ b/tests/test_rand_histogram_shift.py
@@ -15,33 +15,40 @@
 from parameterized import parameterized
 
 from monai.transforms import RandHistogramShift
-
-TEST_CASES = [
-    [
-        {"num_control_points": 5, "prob": 0.0},
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.arange(8).reshape((1, 2, 2, 2)),
-    ],
-    [
-        {"num_control_points": 5, "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)},
-        np.array([[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]),
-    ],
-    [
-        {"num_control_points": (5, 20), "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)},
-        np.array([[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]),
-    ],
-]
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"num_control_points": 5, "prob": 0.0},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+            np.arange(8).reshape((1, 2, 2, 2)),
+        ]
+    )
+    TESTS.append(
+        [
+            {"num_control_points": 5, "prob": 0.9},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32))},
+            np.array([[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"num_control_points": (5, 20), "prob": 0.9},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32))},
+            np.array([[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]),
+        ]
+    )
 
 
 class TestRandHistogramShift(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_histogram_shift(self, input_param, input_data, expected_val):
         g = RandHistogramShift(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_histogram_shiftd.py b/tests/test_rand_histogram_shiftd.py
index 806e4f5cf2..2191e99518 100644
--- a/tests/test_rand_histogram_shiftd.py
+++ b/tests/test_rand_histogram_shiftd.py
@@ -14,47 +14,60 @@
 import numpy as np
 from parameterized import parameterized
 
-from monai.transforms import RandHistogramShiftD
-
-TEST_CASES = [
-    [
-        {"keys": ("img",), "num_control_points": 5, "prob": 0.0},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-    ],
-    [
-        {"keys": ("img",), "num_control_points": 5, "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {
-            "img": np.array(
-                [[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]
-            ),
-            "seg": np.ones(8).reshape((1, 2, 2, 2)),
-        },
-    ],
-    [
-        {"keys": ("img",), "num_control_points": (5, 20), "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {
-            "img": np.array(
-                [[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]
-            ),
-            "seg": np.ones(8).reshape((1, 2, 2, 2)),
-        },
-    ],
-]
+from monai.transforms.intensity.dictionary import RandHistogramShiftd
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": 5, "prob": 0.0},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "seg": p(np.ones(8).reshape((1, 2, 2, 2)))},
+            {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": 5, "prob": 0.9},
+            {
+                "img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)),
+                "seg": p(np.ones(8).reshape((1, 2, 2, 2))),
+            },
+            {
+                "img": np.array(
+                    [[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]
+                ),
+                "seg": np.ones(8).reshape((1, 2, 2, 2)),
+            },
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": (5, 20), "prob": 0.9},
+            {
+                "img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)),
+                "seg": p(np.ones(8).reshape((1, 2, 2, 2))),
+            },
+            {
+                "img": np.array(
+                    [[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]
+                ),
+                "seg": np.ones(8).reshape((1, 2, 2, 2)),
+            },
+        ]
+    )
 
 
 class TestRandHistogramShiftD(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_histogram_shiftd(self, input_param, input_data, expected_val):
-        g = RandHistogramShiftD(**input_param)
+        g = RandHistogramShiftd(**input_param)
         g.set_random_state(123)
         res = g(input_data)
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=1e-4, atol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_k_space_spike_noise.py b/tests/test_rand_k_space_spike_noise.py
index 71f7e36d9b..1c9ca9c1d5 100644
--- a/tests/test_rand_k_space_spike_noise.py
+++ b/tests/test_rand_k_space_spike_noise.py
@@ -19,18 +19,15 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoise, RandKSpaceSpikeNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            for channel_wise in (True, False):
-                TEST_CASES.append((shape, as_tensor_output, as_tensor_input, channel_wise))
+    for p in TEST_NDARRAYS:
+        for channel_wise in (True, False):
+            TESTS.append((shape, p, channel_wise))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandKSpaceSpikeNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -40,44 +37,55 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return im_type(im)
 
-    @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_0_prob(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 15]
-        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise)
         out = t(im)
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(out.device, im.device)
+            im, out = im.cpu(), out.cpu()
         np.testing.assert_allclose(im, out)
 
-    @parameterized.expand(TEST_CASES)
-    def test_1_prob(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_1_prob(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 14]
-        t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise)
         out = t(im)
-        base_t = KSpaceSpikeNoise(t.sampled_locs, [14], as_tensor_output)
+        base_t = KSpaceSpikeNoise(t.sampled_locs, [14])
         out = out - base_t(im)
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(out.device, im.device)
+            im, out = im.cpu(), out.cpu()
         np.testing.assert_allclose(out, im * 0)
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 15]
-        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise)
         t.set_random_state(42)
         out1 = t(deepcopy(im))
         t.set_random_state(42)
         out2 = t(deepcopy(im))
+        self.assertEqual(type(im), type(out1))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(out1.device, im.device)
+            out1, out2 = out1.cpu(), out2.cpu()
         np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_intensity(self, im_shape, _, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_intensity(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 14.1]
         t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise)
         _ = t(deepcopy(im))
diff --git a/tests/test_rand_k_space_spike_noised.py b/tests/test_rand_k_space_spike_noised.py
index 1056ebf163..9036166b61 100644
--- a/tests/test_rand_k_space_spike_noised.py
+++ b/tests/test_rand_k_space_spike_noised.py
@@ -19,19 +19,16 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandKSpaceSpikeNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 KEYS = ["image", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,107 +38,53 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [im[None] for im in ims]
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
-
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-
-        data = self.get_data(im_shape, as_tensor_input)
-
-        intensity_ranges = {"image": (13, 15), "label": (13, 15)}
-        t = RandKSpaceSpikeNoised(
-            KEYS,
-            global_prob=1.0,
-            prob=1.0,
-            intensity_ranges=intensity_ranges,
-            channel_wise=True,
-            as_tensor_output=as_tensor_output,
-        )
-        t.set_rand_state(42)
+        ims = [im_type(im[None]) for im in ims]
+        return {k: v for k, v in zip(KEYS, ims)}
+
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
+
+        data = self.get_data(im_shape, im_type)
+
+        t = RandKSpaceSpikeNoised(KEYS, prob=1.0, intensity_range=(13, 15), channel_wise=True)
+        t.set_random_state(42)
         out1 = t(deepcopy(data))
 
-        t.set_rand_state(42)
+        t.set_random_state(42)
         out2 = t(deepcopy(data))
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
             np.testing.assert_allclose(out1[k], out2[k], atol=1e-10)
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
-
-    @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
-        intensity_ranges = {"image": (13, 15), "label": (13, 15)}
-        t1 = RandKSpaceSpikeNoised(
-            KEYS,
-            global_prob=0.0,
-            prob=1.0,
-            intensity_ranges=intensity_ranges,
-            channel_wise=True,
-            as_tensor_output=as_tensor_output,
-        )
-
-        t2 = RandKSpaceSpikeNoised(
-            KEYS,
-            global_prob=0.0,
-            prob=1.0,
-            intensity_ranges=intensity_ranges,
-            channel_wise=True,
-            as_tensor_output=as_tensor_output,
-        )
+
+    @parameterized.expand(TESTS)
+    def test_0_prob(self, im_shape, im_type):
+        data = self.get_data(im_shape, im_type)
+
+        t1 = RandKSpaceSpikeNoised(KEYS, prob=0.0, intensity_range=(13, 15), channel_wise=True)
+
+        t2 = RandKSpaceSpikeNoised(KEYS, prob=0.0, intensity_range=(13, 15), channel_wise=True)
         out1 = t1(data)
         out2 = t2(data)
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
+                data[k] = data[k].cpu()
+
             np.testing.assert_allclose(data[k], out1[k])
             np.testing.assert_allclose(data[k], out2[k])
 
-    @parameterized.expand(TEST_CASES)
-    def test_intensity(self, im_shape, as_tensor_output, as_tensor_input):
-
-        data = self.get_data(im_shape, as_tensor_input)
-        intensity_ranges = {"image": (13, 13.1), "label": (13, 13.1)}
-        t = RandKSpaceSpikeNoised(
-            KEYS,
-            global_prob=1.0,
-            prob=1.0,
-            intensity_ranges=intensity_ranges,
-            channel_wise=True,
-            as_tensor_output=True,
-        )
-
-        _ = t(data)
-        self.assertGreaterEqual(t.transforms["image"].sampled_k_intensity[0], 13)
-        self.assertLessEqual(t.transforms["image"].sampled_k_intensity[0], 13.1)
-        self.assertGreaterEqual(t.transforms["label"].sampled_k_intensity[0], 13)
-        self.assertLessEqual(t.transforms["label"].sampled_k_intensity[0], 13.1)
-
-    @parameterized.expand(TEST_CASES)
-    def test_same_transformation(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
-        # use same image for both dictionary entries to check same trans is applied to them
-        data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
-
-        intensity_ranges = {"image": (13, 15), "label": (13, 15)}
-        # use common_sampling = True to ask for the same transformation
-        t = RandKSpaceSpikeNoised(
-            KEYS,
-            global_prob=1.0,
-            prob=1.0,
-            intensity_ranges=intensity_ranges,
-            channel_wise=True,
-            common_sampling=True,
-            as_tensor_output=True,
-        )
-
-        out = t(deepcopy(data))
-
-        np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_rand_local_patch_shuffle.py b/tests/test_rand_local_patch_shuffle.py
deleted file mode 100644
index 8e2eefb5d1..0000000000
--- a/tests/test_rand_local_patch_shuffle.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# Copyright 2020 - 2021 MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest
-
-import numpy as np
-from parameterized import parameterized
-
-from monai.transforms import LocalPatchShuffling
-
-TEST_CASES = [
-    [
-        {"number_blocks": 10, "blocksize_ratio": 1, "prob": 0.0},
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.arange(8).reshape((1, 2, 2, 2)),
-    ],
-    [
-        {"number_blocks": 10, "blocksize_ratio": 1, "prob": 1.0},
-        {"img": np.arange(27).reshape((1, 3, 3, 3))},
-        [
-            [
-                [[9, 1, 2], [3, 4, 5], [6, 7, 8]],
-                [[0, 10, 11], [12, 4, 14], [15, 16, 17]],
-                [[18, 19, 20], [21, 22, 23], [24, 25, 26]],
-            ]
-        ],
-    ],
-]
-
-
-class TestLocalPatchShuffle(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
-    def test_local_patch_shuffle(self, input_param, input_data, expected_val):
-        g = LocalPatchShuffling(**input_param)
-        g.set_random_state(seed=12)
-        result = g(**input_data)
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_rand_rician_noised.py b/tests/test_rand_rician_noised.py
index 010bbcb310..e8cb84dc99 100644
--- a/tests/test_rand_rician_noised.py
+++ b/tests/test_rand_rician_noised.py
@@ -34,6 +34,7 @@ def test_correct_results(self, _, in_type, keys, mean, std):
         noised = rician_fn({k: in_type(self.imt) for k in keys})
         np.random.seed(seed)
         for k in keys:
+            # simulate the `randomize` function of transform
             np.random.random()
             _std = np.random.uniform(0, std)
             expected = np.sqrt(
diff --git a/tests/test_rand_rotate.py b/tests/test_rand_rotate.py
index 0ff8508a0f..4817e81735 100644
--- a/tests/test_rand_rotate.py
+++ b/tests/test_rand_rotate.py
@@ -10,25 +10,60 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandRotate
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 2, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "nearest", "border", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", True))
+    TEST_CASES_2D.append((p, (-np.pi / 4, 0), False, "nearest", "zeros", True))
 
-class TestRandRotate2D(NumpyImageTestCase2D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, True, "bilinear", "border", False),
-            (np.pi / 4, True, "nearest", "border", False),
-            (np.pi, False, "nearest", "zeros", True),
-            ((-np.pi / 4, 0), False, "nearest", "zeros", True),
-        ]
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append(
+        (p, np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109))
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            "nearest",
+            "border",
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            "nearest",
+            "zeros",
+            True,
+            (1, 48, 64, 80),
+        )
     )
-    def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_corners):
+    TEST_CASES_3D.append((p, (-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)))
+
+
+class TestRandRotate2D(NumpyImageTestCase2D):
+    @parameterized.expand(TEST_CASES_2D)
+    def test_correct_results(self, im_type, degrees, keep_size, mode, padding_mode, align_corners):
         rotate_fn = RandRotate(
             range_x=degrees,
             prob=1.0,
@@ -38,7 +73,7 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
 
         _order = 0 if mode == "nearest" else 1
         if mode == "border":
@@ -52,38 +87,14 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         good = np.sum(np.isclose(expected, rotated[0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRandRotate3D(NumpyImageTestCase3D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109)),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                "nearest",
-                "border",
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                "nearest",
-                "zeros",
-                True,
-                (1, 48, 64, 80),
-            ),
-            ((-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)),
-        ]
-    )
-    def test_correct_results(self, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
+    @parameterized.expand(TEST_CASES_3D)
+    def test_correct_results(self, im_type, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
         rotate_fn = RandRotate(
             range_x=x,
             range_y=y,
@@ -95,8 +106,8 @@ def test_correct_results(self, x, y, z, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn(self.imt[0])
-        np.testing.assert_allclose(rotated.shape, expected)
+        rotated = rotate_fn(im_type(self.imt[0]))
+        torch.testing.assert_allclose(rotated.shape, expected, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_rotate90.py b/tests/test_rand_rotate90.py
index 50a1b28e53..8d4e591559 100644
--- a/tests/test_rand_rotate90.py
+++ b/tests/test_rand_rotate90.py
@@ -14,49 +14,45 @@
 import numpy as np
 
 from monai.transforms import RandRotate90
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRandRotate90(NumpyImageTestCase2D):
     def test_default(self):
         rotate = RandRotate90()
-        rotate.set_random_state(123)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_k(self):
         rotate = RandRotate90(max_k=2)
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_spatial_axes(self):
         rotate = RandRotate90(spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_prob_k_spatial_axes(self):
         rotate = RandRotate90(prob=1.0, max_k=2, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_rotate90d.py b/tests/test_rand_rotate90d.py
index a487b695f5..3071aa82c8 100644
--- a/tests/test_rand_rotate90d.py
+++ b/tests/test_rand_rotate90d.py
@@ -14,53 +14,49 @@
 import numpy as np
 
 from monai.transforms import RandRotate90d
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRandRotate90d(NumpyImageTestCase2D):
     def test_default(self):
         key = None
         rotate = RandRotate90d(keys=key)
-        rotate.set_random_state(123)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_k(self):
         key = "test"
         rotate = RandRotate90d(keys=key, max_k=2)
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_spatial_axes(self):
         key = "test"
         rotate = RandRotate90d(keys=key, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_prob_k_spatial_axes(self):
         key = "test"
         rotate = RandRotate90d(keys=key, prob=1.0, max_k=2, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_no_key(self):
         key = "unknown"
diff --git a/tests/test_rand_rotated.py b/tests/test_rand_rotated.py
index 47b4b7107e..fb2038e8c3 100644
--- a/tests/test_rand_rotated.py
+++ b/tests/test_rand_rotated.py
@@ -10,26 +10,104 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandRotated
 from monai.utils import GridSampleMode, GridSamplePadMode
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 2, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "nearest", "border", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", True))
+    TEST_CASES_2D.append((p, (-np.pi / 4, 0), False, "nearest", "zeros", True))
 
-class TestRandRotated2D(NumpyImageTestCase2D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, True, "bilinear", "border", False),
-            (np.pi / 4, True, "nearest", "border", False),
-            (np.pi, False, "nearest", "zeros", True),
-            ((-np.pi / 4, 0), False, "nearest", "zeros", True),
-        ]
+
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append(
+        (p, np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109))
     )
-    def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_corners):
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 2,
+            -np.pi / 6,
+            (0.0, np.pi),
+            False,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.BORDER,
+            False,
+            (1, 87, 104, 109),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            "nearest",
+            "border",
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.BORDER,
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            "nearest",
+            "zeros",
+            True,
+            (1, 48, 64, 80),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.ZEROS,
+            True,
+            (1, 48, 64, 80),
+        )
+    )
+    TEST_CASES_3D.append((p, (-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)))
+    TEST_CASES_3D.append(
+        (p, (-np.pi / 4, 0), 0, 0, False, GridSampleMode.NEAREST, GridSamplePadMode.ZEROS, False, (1, 48, 77, 90))
+    )
+
+
+class TestRandRotated2D(NumpyImageTestCase2D):
+    @parameterized.expand(TEST_CASES_2D)
+    def test_correct_results(self, im_type, degrees, keep_size, mode, padding_mode, align_corners):
         rotate_fn = RandRotated(
             "img",
             range_x=degrees,
@@ -40,7 +118,7 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
 
         _order = 0 if mode == "nearest" else 1
         if padding_mode == "border":
@@ -49,74 +127,20 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             _mode = "reflect"
         else:
             _mode = "constant"
-        angle = rotate_fn.x
+        angle = rotate_fn.rand_rotate.x
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         expected = np.stack(expected).astype(np.float32)
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRandRotated3D(NumpyImageTestCase3D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109)),
-            (
-                np.pi / 2,
-                -np.pi / 6,
-                (0.0, np.pi),
-                False,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.BORDER,
-                False,
-                (1, 87, 104, 109),
-            ),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                "nearest",
-                "border",
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.BORDER,
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                "nearest",
-                "zeros",
-                True,
-                (1, 48, 64, 80),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.ZEROS,
-                True,
-                (1, 48, 64, 80),
-            ),
-            ((-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)),
-            ((-np.pi / 4, 0), 0, 0, False, GridSampleMode.NEAREST, GridSamplePadMode.ZEROS, False, (1, 48, 77, 90)),
-        ]
-    )
-    def test_correct_shapes(self, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
+    @parameterized.expand(TEST_CASES_3D)
+    def test_correct_shapes(self, im_type, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
         rotate_fn = RandRotated(
             "img",
             range_x=x,
@@ -129,7 +153,7 @@ def test_correct_shapes(self, x, y, z, keep_size, mode, padding_mode, align_corn
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         np.testing.assert_allclose(rotated["img"].shape, expected)
 
 
diff --git a/tests/test_rand_scale_crop.py b/tests/test_rand_scale_crop.py
index db5487ebff..a0c5471ffb 100644
--- a/tests/test_rand_scale_crop.py
+++ b/tests/test_rand_scale_crop.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandScaleCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"roi_scale": [1.0, 1.0, -1.0], "random_center": True},
@@ -55,22 +56,25 @@
 class TestRandScaleCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape):
-        result = RandScaleCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            result = RandScaleCrop(**input_param)(p(input_data))
+            self.assertTupleEqual(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandScaleCrop(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCrop(**input_param)
+            result = cropper(p(input_data))
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_random_shape(self, input_param, input_data, expected_shape):
-        cropper = RandScaleCrop(**input_param)
-        cropper.set_random_state(seed=123)
-        result = cropper(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCrop(**input_param)
+            cropper.set_random_state(seed=123)
+            result = cropper(p(input_data))
+            self.assertTupleEqual(result.shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_scale_cropd.py b/tests/test_rand_scale_cropd.py
index 265c6c467d..f78a81d339 100644
--- a/tests/test_rand_scale_cropd.py
+++ b/tests/test_rand_scale_cropd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandScaleCropd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"keys": "img", "roi_scale": [1.0, 1.0, -1.0], "random_center": True},
@@ -66,10 +67,14 @@ def test_shape(self, input_param, input_data, expected_shape):
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandScaleCropd(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result["img"], input_data["img"][:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCropd(**input_param)
+            input_data["img"] = p(input_data["img"])
+            result = cropper(input_data)
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(
+                result["img"], input_data["img"][:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False
+            )
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_random_shape(self, input_param, input_data, expected_shape):
diff --git a/tests/test_rand_scale_intensity.py b/tests/test_rand_scale_intensity.py
index 750d88bfad..c3d18330ea 100644
--- a/tests/test_rand_scale_intensity.py
+++ b/tests/test_rand_scale_intensity.py
@@ -24,8 +24,10 @@ def test_value(self):
             scaler.set_random_state(seed=0)
             result = scaler(p(self.imt))
             np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
             expected = p((self.imt * (1 + np.random.uniform(low=-0.5, high=0.5))).astype(np.float32))
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, p(expected), rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_scale_intensityd.py b/tests/test_rand_scale_intensityd.py
index a8d2e63f65..7c2392fded 100644
--- a/tests/test_rand_scale_intensityd.py
+++ b/tests/test_rand_scale_intensityd.py
@@ -19,14 +19,16 @@
 
 class TestRandScaleIntensityd(NumpyImageTestCase2D):
     def test_value(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = RandScaleIntensityd(keys=[key], factors=0.5, prob=1.0)
             scaler.set_random_state(seed=0)
             result = scaler({key: p(self.imt)})
             np.random.seed(0)
+            # simulate the randomize function of transform
+            np.random.random()
             expected = (self.imt * (1 + np.random.uniform(low=-0.5, high=0.5))).astype(np.float32)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_shift_intensity.py b/tests/test_rand_shift_intensity.py
index 4c4dd87dfe..7f5b278fd0 100644
--- a/tests/test_rand_shift_intensity.py
+++ b/tests/test_rand_shift_intensity.py
@@ -23,6 +23,8 @@ def test_value(self):
         shifter.set_random_state(seed=0)
         result = shifter(self.imt, factor=1.0)
         np.random.seed(0)
+        # simulate the randomize() of transform
+        np.random.random()
         expected = self.imt + np.random.uniform(low=-1.0, high=1.0)
         np.testing.assert_allclose(result, expected)
 
diff --git a/tests/test_rand_shift_intensityd.py b/tests/test_rand_shift_intensityd.py
index 6766236146..5950faac26 100644
--- a/tests/test_rand_shift_intensityd.py
+++ b/tests/test_rand_shift_intensityd.py
@@ -19,14 +19,16 @@
 
 class TestRandShiftIntensityd(NumpyImageTestCase2D):
     def test_value(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             shifter = RandShiftIntensityd(keys=[key], offsets=1.0, prob=1.0)
             shifter.set_random_state(seed=0)
             result = shifter({key: p(self.imt)})
             np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
             expected = self.imt + np.random.uniform(low=-1.0, high=1.0)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor(self):
         key = "img"
@@ -36,6 +38,8 @@ def test_factor(self):
         shifter.set_random_state(seed=0)
         result = shifter(stats(data))
         np.random.seed(0)
+        # simulate the randomize() of transform
+        np.random.random()
         expected = self.imt + np.random.uniform(low=-1.0, high=1.0) * np.nanmax(self.imt)
         np.testing.assert_allclose(result[key], expected)
 
diff --git a/tests/test_rand_spatial_crop.py b/tests/test_rand_spatial_crop.py
index 01e057e589..19b1841c6d 100644
--- a/tests/test_rand_spatial_crop.py
+++ b/tests/test_rand_spatial_crop.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_0 = [
     {"roi_size": [3, 3, -1], "random_center": True},
@@ -56,10 +57,11 @@ def test_shape(self, input_param, input_data, expected_shape):
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandSpatialCrop(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandSpatialCrop(**input_param)
+            result = cropper(p(input_data))
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5])
     def test_random_shape(self, input_param, input_data, expected_shape):
diff --git a/tests/test_rand_spatial_crop_samples.py b/tests/test_rand_spatial_crop_samples.py
index 0ade9bbbba..eefe7d0e0a 100644
--- a/tests/test_rand_spatial_crop_samples.py
+++ b/tests/test_rand_spatial_crop_samples.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCropSamples
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"roi_size": [3, 3, 3], "num_samples": 4, "random_center": True, "random_size": False},
@@ -70,14 +71,15 @@
 class TestRandSpatialCropSamples(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape, expected_last_item):
-        xform = RandSpatialCropSamples(**input_param)
-        xform.set_random_state(1234)
-        result = xform(input_data)
+        for p in TEST_NDARRAYS:
+            xform = RandSpatialCropSamples(**input_param)
+            xform.set_random_state(1234)
+            result = xform(p(input_data))
 
-        np.testing.assert_equal(len(result), input_param["num_samples"])
-        for item, expected in zip(result, expected_shape):
-            self.assertTupleEqual(item.shape, expected)
-        np.testing.assert_allclose(result[-1], expected_last_item)
+            np.testing.assert_equal(len(result), input_param["num_samples"])
+            for item, expected in zip(result, expected_shape):
+                self.assertTupleEqual(item.shape, expected)
+            assert_allclose(result[-1], expected_last_item, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_spatial_crop_samplesd.py b/tests/test_rand_spatial_crop_samplesd.py
index 3f5eee7b27..a4e8bdb2e6 100644
--- a/tests/test_rand_spatial_crop_samplesd.py
+++ b/tests/test_rand_spatial_crop_samplesd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Compose, RandSpatialCropSamplesd, ToTensord
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"keys": ["img", "seg"], "num_samples": 4, "roi_size": [2, 2, 2], "random_center": True},
@@ -38,31 +39,48 @@
     },
 ]
 
-TEST_CASE_2 = [
-    {"keys": ["img", "seg"], "num_samples": 8, "roi_size": [2, 2, 3], "random_center": False},
-    {"img": np.arange(81).reshape(3, 3, 3, 3), "seg": np.arange(81, 0, -1).reshape(3, 3, 3, 3)},
-    [(3, 3, 3, 3), (3, 2, 3, 3), (3, 2, 2, 3), (3, 2, 3, 3), (3, 3, 3, 3), (3, 3, 3, 3), (3, 2, 2, 3), (3, 3, 2, 3)],
-    {
-        "img": np.array(
+TEST_CASE_2 = []
+for p in TEST_NDARRAYS:
+    TEST_CASE_2.append(
+        [
+            {"keys": ["img", "seg"], "num_samples": 8, "roi_size": [2, 2, 3], "random_center": False},
+            {"img": p(np.arange(81).reshape(3, 3, 3, 3)), "seg": p(np.arange(81, 0, -1).reshape(3, 3, 3, 3))},
             [
-                [[[0, 1, 2], [3, 4, 5]], [[9, 10, 11], [12, 13, 14]], [[18, 19, 20], [21, 22, 23]]],
-                [[[27, 28, 29], [30, 31, 32]], [[36, 37, 38], [39, 40, 41]], [[45, 46, 47], [48, 49, 50]]],
-                [[[54, 55, 56], [57, 58, 59]], [[63, 64, 65], [66, 67, 68]], [[72, 73, 74], [75, 76, 77]]],
-            ]
-        ),
-        "seg": np.array(
-            [
-                [[[81, 80, 79], [78, 77, 76]], [[72, 71, 70], [69, 68, 67]], [[63, 62, 61], [60, 59, 58]]],
-                [[[54, 53, 52], [51, 50, 49]], [[45, 44, 43], [42, 41, 40]], [[36, 35, 34], [33, 32, 31]]],
-                [[[27, 26, 25], [24, 23, 22]], [[18, 17, 16], [15, 14, 13]], [[9, 8, 7], [6, 5, 4]]],
-            ]
-        ),
-    },
-]
+                (3, 3, 3, 3),
+                (3, 2, 3, 3),
+                (3, 2, 2, 3),
+                (3, 2, 3, 3),
+                (3, 3, 3, 3),
+                (3, 3, 3, 3),
+                (3, 2, 2, 3),
+                (3, 3, 2, 3),
+            ],
+            {
+                "img": p(
+                    np.array(
+                        [
+                            [[[0, 1, 2], [3, 4, 5]], [[9, 10, 11], [12, 13, 14]], [[18, 19, 20], [21, 22, 23]]],
+                            [[[27, 28, 29], [30, 31, 32]], [[36, 37, 38], [39, 40, 41]], [[45, 46, 47], [48, 49, 50]]],
+                            [[[54, 55, 56], [57, 58, 59]], [[63, 64, 65], [66, 67, 68]], [[72, 73, 74], [75, 76, 77]]],
+                        ]
+                    )
+                ),
+                "seg": p(
+                    np.array(
+                        [
+                            [[[81, 80, 79], [78, 77, 76]], [[72, 71, 70], [69, 68, 67]], [[63, 62, 61], [60, 59, 58]]],
+                            [[[54, 53, 52], [51, 50, 49]], [[45, 44, 43], [42, 41, 40]], [[36, 35, 34], [33, 32, 31]]],
+                            [[[27, 26, 25], [24, 23, 22]], [[18, 17, 16], [15, 14, 13]], [[9, 8, 7], [6, 5, 4]]],
+                        ]
+                    )
+                ),
+            },
+        ]
+    )
 
 
 class TestRandSpatialCropSamplesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand([TEST_CASE_1, *TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape, expected_last):
         xform = RandSpatialCropSamplesd(**input_param)
         xform.set_random_state(1234)
@@ -73,18 +91,14 @@ def test_shape(self, input_param, input_data, expected_shape, expected_last):
         for i, item in enumerate(result):
             self.assertEqual(item["img_meta_dict"]["patch_index"], i)
             self.assertEqual(item["seg_meta_dict"]["patch_index"], i)
-        np.testing.assert_allclose(item["img"], expected_last["img"])
-        np.testing.assert_allclose(item["seg"], expected_last["seg"])
+        assert_allclose(item["img"], expected_last["img"], type_test=True)
+        assert_allclose(item["seg"], expected_last["seg"], type_test=True)
 
     def test_deep_copy(self):
         data = {"img": np.ones((1, 10, 11, 12))}
         num_samples = 3
         sampler = RandSpatialCropSamplesd(
-            keys=["img"],
-            roi_size=(3, 3, 3),
-            num_samples=num_samples,
-            random_center=True,
-            random_size=False,
+            keys=["img"], roi_size=(3, 3, 3), num_samples=num_samples, random_center=True, random_size=False
         )
         transform = Compose([ToTensord(keys="img"), sampler])
         samples = transform(data)
diff --git a/tests/test_rand_spatial_cropd.py b/tests/test_rand_spatial_cropd.py
index 610c1974aa..edcb61dc99 100644
--- a/tests/test_rand_spatial_cropd.py
+++ b/tests/test_rand_spatial_cropd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCropd
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASE_0 = [
     {"keys": "img", "roi_size": [3, 3, -1], "random_center": True},
@@ -67,10 +68,12 @@ def test_value(self, input_param, input_data):
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5])
     def test_random_shape(self, input_param, input_data, expected_shape):
-        cropper = RandSpatialCropd(**input_param)
-        cropper.set_random_state(seed=123)
-        result = cropper(input_data)
-        self.assertTupleEqual(result["img"].shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            cropper = RandSpatialCropd(**input_param)
+            cropper.set_random_state(seed=123)
+            input_data["img"] = p(input_data["img"])
+            result = cropper(input_data)
+            self.assertTupleEqual(result["img"].shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_std_shift_intensity.py b/tests/test_rand_std_shift_intensity.py
index 0c6382555e..5b0db09063 100644
--- a/tests/test_rand_std_shift_intensity.py
+++ b/tests/test_rand_std_shift_intensity.py
@@ -22,6 +22,8 @@ class TestRandStdShiftIntensity(NumpyImageTestCase2D):
     def test_value(self):
         for p in TEST_NDARRAYS:
             np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
             factor = np.random.uniform(low=-1.0, high=1.0)
             offset = factor * np.std(self.imt)
             expected = p(self.imt + offset)
diff --git a/tests/test_rand_std_shift_intensityd.py b/tests/test_rand_std_shift_intensityd.py
index 0ab017a42d..fbc71721d0 100644
--- a/tests/test_rand_std_shift_intensityd.py
+++ b/tests/test_rand_std_shift_intensityd.py
@@ -23,6 +23,8 @@ def test_value(self):
         for p in TEST_NDARRAYS:
             key = "img"
             np.random.seed(0)
+            # simulate the randomize() of transform
+            np.random.random()
             factor = np.random.uniform(low=-1.0, high=1.0)
             expected = self.imt + factor * np.std(self.imt)
             shifter = RandStdShiftIntensityd(keys=[key], factors=1.0, prob=1.0)
diff --git a/tests/test_rand_weighted_crop.py b/tests/test_rand_weighted_crop.py
index 39a9439122..eb0e4244f7 100644
--- a/tests/test_rand_weighted_crop.py
+++ b/tests/test_rand_weighted_crop.py
@@ -12,127 +12,159 @@
 import unittest
 
 import numpy as np
+import torch
+from parameterized.parameterized import parameterized
 
 from monai.transforms.croppad.array import RandWeightedCrop
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
 
 
-class TestRandWeightedCrop2D(NumpyImageTestCase2D):
-    def test_rand_weighted_crop_small_roi(self):
-        img = self.seg1[0]
-        n_samples = 3
-        crop = RandWeightedCrop((10, 12), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = 1.1
-        weight[0, 40, 31] = 1
-        weight[0, 80, 21] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 10, 12))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[80, 21], [30, 17], [40, 31]])
+def get_data(ndim):
+    im_gen = NumpyImageTestCase2D() if ndim == 2 else NumpyImageTestCase3D()
+    im_gen.setUp()
+    return im_gen.imt[0], im_gen.seg1[0], im_gen.segn[0]
+
+
+IMT_2D, SEG1_2D, SEGN_2D = get_data(ndim=2)
+IMT_3D, SEG1_3D, SEGN_3D = get_data(ndim=3)
+
 
-    def test_rand_weighted_crop_default_roi(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCrop((10, -1), n_samples)
-        weight = np.zeros_like(img)
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        im = SEG1_2D
+        weight = np.zeros_like(im)
         weight[0, 30, 17] = 1.1
         weight[0, 40, 31] = 1
         weight[0, 80, 21] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 10, 64))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[14, 32], [105, 32], [20, 32]])
-
-    def test_rand_weighted_crop_large_roi(self):
-        img = self.segn[0]
-        n_samples = 3
-        crop = RandWeightedCrop((10000, 400), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "small roi 2d",
+                dict(spatial_size=(10, 12), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 10, 12),
+                [[80, 21], [30, 17], [40, 31]],
+            ]
+        )
+        im = IMT_2D
+        TESTS.append(
+            [
+                "default roi 2d",
+                dict(spatial_size=(10, -1), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 10, 64),
+                [[14, 32], [105, 32], [20, 32]],
+            ]
+        )
+        im = SEGN_2D
+        weight = np.zeros_like(im)
         weight[0, 30, 17] = 1.1
         weight[0, 10, 1] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 128, 64))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[64, 32], [64, 32], [64, 32]])
-        for res in result:
-            np.testing.assert_allclose(res, self.segn[0])
-
-    def test_rand_weighted_crop_bad_w(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCrop((20, 40), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "large roi 2d",
+                dict(spatial_size=(10000, 400), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 128, 64),
+                [[64, 32], [64, 32], [64, 32]],
+            ]
+        )
+        im = IMT_2D
+        weight = np.zeros_like(im)
         weight[0, 30, 17] = np.inf
         weight[0, 10, 1] = -np.inf
         weight[0, 10, 20] = -np.nan
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 20, 40))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[63, 37], [31, 43], [66, 20]])
-
-
-class TestRandWeightedCrop(NumpyImageTestCase3D):
-    def test_rand_weighted_crop_small_roi(self):
-        img = self.seg1[0]
-        n_samples = 3
-        crop = RandWeightedCrop((8, 10, 12), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "bad w 2d",
+                dict(spatial_size=(20, 40), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 20, 40),
+                [[63, 37], [31, 43], [66, 20]],
+            ]
+        )
+        im = SEG1_3D
+        weight = np.zeros_like(im)
         weight[0, 5, 30, 17] = 1.1
         weight[0, 8, 40, 31] = 1
         weight[0, 11, 23, 21] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 8, 10, 12))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[11, 23, 21], [5, 30, 17], [8, 40, 31]])
-
-    def test_rand_weighted_crop_default_roi(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCrop((10, -1, -1), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "small roi 3d",
+                dict(spatial_size=(8, 10, 12), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 8, 10, 12),
+                [[11, 23, 21], [5, 30, 17], [8, 40, 31]],
+            ]
+        )
+        im = IMT_3D
+        weight = np.zeros_like(im)
         weight[0, 7, 17] = 1.1
         weight[0, 13, 31] = 1.1
         weight[0, 24, 21] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 10, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[14, 32, 40], [41, 32, 40], [20, 32, 40]])
-
-    def test_rand_weighted_crop_large_roi(self):
-        img = self.segn[0]
-        n_samples = 3
-        crop = RandWeightedCrop((10000, 400, 80), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "default roi 3d",
+                dict(spatial_size=(10, -1, -1), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 10, 64, 80),
+                [[14, 32, 40], [41, 32, 40], [20, 32, 40]],
+            ]
+        )
+        im = SEGN_3D
+        weight = np.zeros_like(im)
         weight[0, 30, 17, 20] = 1.1
         weight[0, 10, 1, 17] = 1
-        crop.set_random_state(10)
-        result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 48, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[24, 32, 40], [24, 32, 40], [24, 32, 40]])
-        for res in result:
-            np.testing.assert_allclose(res, self.segn[0])
-
-    def test_rand_weighted_crop_bad_w(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCrop((48, 64, 80), n_samples)
-        weight = np.zeros_like(img)
+        TESTS.append(
+            [
+                "large roi 3d",
+                dict(spatial_size=(10000, 400, 80), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 48, 64, 80),
+                [[24, 32, 40], [24, 32, 40], [24, 32, 40]],
+            ]
+        )
+        im = IMT_3D
+        weight = np.zeros_like(im)
         weight[0, 30, 17] = np.inf
         weight[0, 10, 1] = -np.inf
         weight[0, 10, 20] = -np.nan
+        TESTS.append(
+            [
+                "bad w 3d",
+                dict(spatial_size=(48, 64, 80), num_samples=3),
+                p(im),
+                q(weight),
+                (1, 48, 64, 80),
+                [[24, 32, 40], [24, 32, 40], [24, 32, 40]],
+            ]
+        )
+
+
+class TestRandWeightedCrop(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_rand_weighted_crop(self, _, input_params, img, weight, expected_shape, expected_vals):
+        crop = RandWeightedCrop(**input_params)
         crop.set_random_state(10)
         result = crop(img, weight)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0].shape, (1, 48, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[24, 32, 40], [24, 32, 40], [24, 32, 40]])
+        self.assertTrue(len(result) == input_params["num_samples"])
+        assert_allclose(result[0].shape, expected_shape)
+        for c, e in zip(crop.centers, expected_vals):
+            assert_allclose(c, e, type_test=False)
+        # if desired ROI is larger than image, check image is unchanged
+        if all(s >= i for i, s in zip(img.shape[1:], input_params["spatial_size"])):
+            for res in result:
+                self.assertEqual(type(img), type(res))
+                if isinstance(img, torch.Tensor):
+                    self.assertEqual(res.device, img.device)
+                assert_allclose(res, img)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_weighted_cropd.py b/tests/test_rand_weighted_cropd.py
index 367ce3beb9..f53238d17c 100644
--- a/tests/test_rand_weighted_cropd.py
+++ b/tests/test_rand_weighted_cropd.py
@@ -14,148 +14,177 @@
 import numpy as np
 
 from monai.transforms.croppad.dictionary import RandWeightedCropd
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D, assert_allclose
 
 
 class TestRandWeightedCrop(NumpyImageTestCase2D):
     def test_rand_weighted_crop_small_roi(self):
-        img = self.seg1[0]
-        n_samples = 3
-        crop = RandWeightedCropd("img", "w", (10, 12), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = 1.1
-        weight[0, 40, 31] = 1
-        weight[0, 80, 21] = 1
-        crop.set_random_state(10)
-        d = {"img": img, "w": weight}
-        result = crop(d)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 10, 12))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[80, 21], [30, 17], [40, 31]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.seg1[0]
+                n_samples = 3
+                crop = RandWeightedCropd("img", "w", (10, 12), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 30, 17] = 1.1
+                weight[0, 40, 31] = 1
+                weight[0, 80, 21] = 1
+                crop.set_random_state(10)
+                d = {"img": p(img), "w": q(weight)}
+                result = crop(d)
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 10, 12))
+                for c, e in zip(crop.centers, [[80, 21], [30, 17], [40, 31]]):
+                    assert_allclose(c, e, type_test=False)
 
     def test_rand_weighted_crop_default_roi(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCropd("im", "weight", (10, -1), n_samples, "coords")
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = 1.1
-        weight[0, 40, 31] = 1
-        weight[0, 80, 21] = 1
-        crop.set_random_state(10)
-        data = {"im": img, "weight": weight, "others": np.nan}
-        result = crop(data)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["im"].shape, (1, 10, 64))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[14, 32], [105, 32], [20, 32]])
-        np.testing.assert_allclose(result[1]["coords"], [105, 32])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.imt[0]
+                n_samples = 3
+                crop = RandWeightedCropd("im", "weight", (10, -1), n_samples, "coords")
+                weight = np.zeros_like(img)
+                weight[0, 30, 17] = 1.1
+                weight[0, 40, 31] = 1
+                weight[0, 80, 21] = 1
+                crop.set_random_state(10)
+                data = {"im": p(img), "weight": q(weight), "others": np.nan}
+                result = crop(data)
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["im"].shape, (1, 10, 64))
+                for c, e in zip(crop.centers, [[14, 32], [105, 32], [20, 32]]):
+                    assert_allclose(c, e, type_test=False)
+                assert_allclose(result[1]["coords"], [105, 32], type_test=False)
 
     def test_rand_weighted_crop_large_roi(self):
-        img = self.segn[0]
-        n_samples = 3
-        crop = RandWeightedCropd(("img", "seg"), "weight", (10000, 400), n_samples, "location")
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = 1.1
-        weight[0, 10, 1] = 1
-        crop.set_random_state(10)
-        data = {"img": img, "seg": self.imt[0], "weight": weight}
-        result = crop(data)
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 128, 64))
-        np.testing.assert_allclose(result[0]["seg"].shape, (1, 128, 64))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[64, 32], [64, 32], [64, 32]])
-        np.testing.assert_allclose(result[1]["location"], [64, 32])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.segn[0]
+                n_samples = 3
+                crop = RandWeightedCropd(("img", "seg"), "weight", (10000, 400), n_samples, "location")
+                weight = np.zeros_like(img)
+                weight[0, 30, 17] = 1.1
+                weight[0, 10, 1] = 1
+                crop.set_random_state(10)
+                data = {"img": p(img), "seg": p(self.imt[0]), "weight": q(weight)}
+                result = crop(data)
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 128, 64))
+                np.testing.assert_allclose(result[0]["seg"].shape, (1, 128, 64))
+                for c, e in zip(crop.centers, [[64, 32], [64, 32], [64, 32]]):
+                    assert_allclose(c, e, type_test=False)
+                assert_allclose(result[1]["location"], [64, 32], type_test=False)
 
     def test_rand_weighted_crop_bad_w(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCropd(("img", "seg"), "w", (20, 40), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = np.inf
-        weight[0, 10, 1] = -np.inf
-        weight[0, 10, 20] = -np.nan
-        crop.set_random_state(10)
-        result = crop({"img": img, "seg": self.segn[0], "w": weight})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 20, 40))
-        np.testing.assert_allclose(result[0]["seg"].shape, (1, 20, 40))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[63, 37], [31, 43], [66, 20]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.imt[0]
+                n_samples = 3
+                crop = RandWeightedCropd(("img", "seg"), "w", (20, 40), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 30, 17] = np.inf
+                weight[0, 10, 1] = -np.inf
+                weight[0, 10, 20] = -np.nan
+                crop.set_random_state(10)
+                result = crop({"img": p(img), "seg": p(self.segn[0]), "w": q(weight)})
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 20, 40))
+                np.testing.assert_allclose(result[0]["seg"].shape, (1, 20, 40))
+                for c, e in zip(crop.centers, [[63, 37], [31, 43], [66, 20]]):
+                    assert_allclose(c, e, type_test=False)
 
 
 class TestRandWeightedCrop3D(NumpyImageTestCase3D):
     def test_rand_weighted_crop_small_roi(self):
-        img = self.seg1[0]
-        n_samples = 3
-        crop = RandWeightedCropd("img", "w", (8, 10, 12), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 5, 30, 17] = 1.1
-        weight[0, 8, 40, 31] = 1
-        weight[0, 11, 23, 21] = 1
-        crop.set_random_state(10)
-        result = crop({"img": img, "w": weight})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 8, 10, 12))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[11, 23, 21], [5, 30, 17], [8, 40, 31]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.seg1[0]
+                n_samples = 3
+                crop = RandWeightedCropd("img", "w", (8, 10, 12), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 5, 30, 17] = 1.1
+                weight[0, 8, 40, 31] = 1
+                weight[0, 11, 23, 21] = 1
+                crop.set_random_state(10)
+                result = crop({"img": p(img), "w": q(weight)})
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 8, 10, 12))
+                for c, e in zip(crop.centers, [[11, 23, 21], [5, 30, 17], [8, 40, 31]]):
+                    assert_allclose(c, e, type_test=False)
 
     def test_rand_weighted_crop_default_roi(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCropd(("img", "seg"), "w", (10, -1, -1), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 7, 17] = 1.1
-        weight[0, 13, 31] = 1.1
-        weight[0, 24, 21] = 1
-        crop.set_random_state(10)
-        result = crop({"img": img, "seg": self.segn[0], "w": weight})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 10, 64, 80))
-        np.testing.assert_allclose(result[0]["seg"].shape, (1, 10, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[14, 32, 40], [41, 32, 40], [20, 32, 40]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.imt[0]
+                n_samples = 3
+                crop = RandWeightedCropd(("img", "seg"), "w", (10, -1, -1), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 7, 17] = 1.1
+                weight[0, 13, 31] = 1.1
+                weight[0, 24, 21] = 1
+                crop.set_random_state(10)
+                result = crop({"img": p(img), "seg": p(self.segn[0]), "w": q(weight)})
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 10, 64, 80))
+                np.testing.assert_allclose(result[0]["seg"].shape, (1, 10, 64, 80))
+                for c, e in zip(crop.centers, [[14, 32, 40], [41, 32, 40], [20, 32, 40]]):
+                    assert_allclose(c, e, type_test=False)
 
     def test_rand_weighted_crop_large_roi(self):
-        img = self.segn[0]
-        n_samples = 3
-        crop = RandWeightedCropd("img", "w", (10000, 400, 80), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 30, 17, 20] = 1.1
-        weight[0, 10, 1, 17] = 1
-        crop.set_random_state(10)
-        result = crop({"img": img, "w": weight})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 48, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[24, 32, 40], [24, 32, 40], [24, 32, 40]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.segn[0]
+                n_samples = 3
+                crop = RandWeightedCropd("img", "w", (10000, 400, 80), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 30, 17, 20] = 1.1
+                weight[0, 10, 1, 17] = 1
+                crop.set_random_state(10)
+                result = crop({"img": p(img), "w": q(weight)})
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 48, 64, 80))
+                for c, e in zip(crop.centers, [[24, 32, 40], [24, 32, 40], [24, 32, 40]]):
+                    assert_allclose(c, e, type_test=False)
 
     def test_rand_weighted_crop_bad_w(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCropd(("img", "seg"), "w", (48, 64, 80), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 30, 17] = np.inf
-        weight[0, 10, 1] = -np.inf
-        weight[0, 10, 20] = -np.nan
-        crop.set_random_state(10)
-        result = crop({"img": img, "seg": self.segn[0], "w": weight})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(result[0]["img"].shape, (1, 48, 64, 80))
-        np.testing.assert_allclose(result[0]["seg"].shape, (1, 48, 64, 80))
-        np.testing.assert_allclose(np.asarray(crop.centers), [[24, 32, 40], [24, 32, 40], [24, 32, 40]])
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.imt[0]
+                n_samples = 3
+                crop = RandWeightedCropd(("img", "seg"), "w", (48, 64, 80), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 30, 17] = np.inf
+                weight[0, 10, 1] = -np.inf
+                weight[0, 10, 20] = -np.nan
+                crop.set_random_state(10)
+                result = crop({"img": p(img), "seg": p(self.segn[0]), "w": q(weight)})
+                self.assertTrue(len(result) == n_samples)
+                np.testing.assert_allclose(result[0]["img"].shape, (1, 48, 64, 80))
+                np.testing.assert_allclose(result[0]["seg"].shape, (1, 48, 64, 80))
+                for c, e in zip(crop.centers, [[24, 32, 40], [24, 32, 40], [24, 32, 40]]):
+                    assert_allclose(c, e, type_test=False)
 
     def test_rand_weighted_crop_patch_index(self):
-        img = self.imt[0]
-        n_samples = 3
-        crop = RandWeightedCropd(("img", "seg"), "w", (10, -1, -1), n_samples)
-        weight = np.zeros_like(img)
-        weight[0, 7, 17] = 1.1
-        weight[0, 13, 31] = 1.1
-        weight[0, 24, 21] = 1
-        crop.set_random_state(10)
-        result = crop({"img": img, "seg": self.segn[0], "w": weight, "img_meta_dict": {"affine": None}})
-        self.assertTrue(len(result) == n_samples)
-        np.testing.assert_allclose(np.asarray(crop.centers), [[14, 32, 40], [41, 32, 40], [20, 32, 40]])
-        for i in range(n_samples):
-            np.testing.assert_allclose(result[i]["img"].shape, (1, 10, 64, 80))
-            np.testing.assert_allclose(result[i]["seg"].shape, (1, 10, 64, 80))
-            np.testing.assert_allclose(result[i]["img_meta_dict"]["patch_index"], i)
-            np.testing.assert_allclose(result[i]["seg_meta_dict"]["patch_index"], i)
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS:
+                img = self.imt[0]
+                n_samples = 3
+                crop = RandWeightedCropd(("img", "seg"), "w", (10, -1, -1), n_samples)
+                weight = np.zeros_like(img)
+                weight[0, 7, 17] = 1.1
+                weight[0, 13, 31] = 1.1
+                weight[0, 24, 21] = 1
+                crop.set_random_state(10)
+                result = crop(
+                    {"img": p(img), "seg": p(self.segn[0]), "w": q(weight), "img_meta_dict": {"affine": None}}
+                )
+                self.assertTrue(len(result) == n_samples)
+                for c, e in zip(crop.centers, [[14, 32, 40], [41, 32, 40], [20, 32, 40]]):
+                    assert_allclose(c, e, type_test=False)
+                for i in range(n_samples):
+                    np.testing.assert_allclose(result[i]["img"].shape, (1, 10, 64, 80))
+                    np.testing.assert_allclose(result[i]["seg"].shape, (1, 10, 64, 80))
+                    np.testing.assert_allclose(result[i]["img_meta_dict"]["patch_index"], i)
+                    np.testing.assert_allclose(result[i]["seg_meta_dict"]["patch_index"], i)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_zoom.py b/tests/test_rand_zoom.py
index c21bc8b9e9..da630853fe 100644
--- a/tests/test_rand_zoom.py
+++ b/tests/test_rand_zoom.py
@@ -17,7 +17,7 @@
 
 from monai.transforms import RandZoom
 from monai.utils import GridSampleMode, InterpolateMode
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(0.8, 1.2, "nearest", False), (0.8, 1.2, InterpolateMode.NEAREST, False)]
 
@@ -25,36 +25,28 @@
 class TestRandZoom(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, min_zoom, max_zoom, mode, keep_size):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=min_zoom,
-            max_zoom=max_zoom,
-            mode=mode,
-            keep_size=keep_size,
-        )
-        random_zoom.set_random_state(1234)
-        zoomed = random_zoom(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(zoomed, expected, atol=1.0)
+        for p in TEST_NDARRAYS:
+            random_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode, keep_size=keep_size)
+            random_zoom.set_random_state(1234)
+            zoomed = random_zoom(p(self.imt[0]))
+            expected = [
+                zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False)
+                for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed, p(expected), atol=1.0)
 
     def test_keep_size(self):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=0.6,
-            max_zoom=0.7,
-            keep_size=True,
-            padding_mode="constant",
-            constant_values=2,
-        )
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            im = p(self.imt[0])
+            random_zoom = RandZoom(prob=1.0, min_zoom=0.6, max_zoom=0.7, keep_size=True)
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
 
     @parameterized.expand(
         [
@@ -64,23 +56,19 @@ def test_keep_size(self):
         ]
     )
     def test_invalid_inputs(self, _, min_zoom, max_zoom, mode, raises):
-        with self.assertRaises(raises):
-            random_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
-            random_zoom(self.imt[0])
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                random_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
+                random_zoom(p(self.imt[0]))
 
     def test_auto_expand_3d(self):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=[0.8, 0.7],
-            max_zoom=[1.2, 1.3],
-            mode="nearest",
-            keep_size=False,
-        )
-        random_zoom.set_random_state(1234)
-        test_data = np.random.randint(0, 2, size=[2, 2, 3, 4])
-        zoomed = random_zoom(test_data)
-        np.testing.assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
-        np.testing.assert_allclose(zoomed.shape, (2, 2, 3, 3))
+        for p in TEST_NDARRAYS:
+            random_zoom = RandZoom(prob=1.0, min_zoom=[0.8, 0.7], max_zoom=[1.2, 1.3], mode="nearest", keep_size=False)
+            random_zoom.set_random_state(1234)
+            test_data = p(np.random.randint(0, 2, size=[2, 2, 3, 4]))
+            zoomed = random_zoom(test_data)
+            assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
+            assert_allclose(zoomed.shape, (2, 2, 3, 3))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_zoomd.py b/tests/test_rand_zoomd.py
index 4ccb1aad64..89a997e925 100644
--- a/tests/test_rand_zoomd.py
+++ b/tests/test_rand_zoomd.py
@@ -16,7 +16,7 @@
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import RandZoomd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(0.8, 1.2, "nearest", None, False)]
 
@@ -34,52 +34,47 @@ def test_correct_results(self, min_zoom, max_zoom, mode, align_corners, keep_siz
             align_corners=align_corners,
             keep_size=keep_size,
         )
-        random_zoom.set_random_state(1234)
+        for p in TEST_NDARRAYS:
+            random_zoom.set_random_state(1234)
 
-        zoomed = random_zoom({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(expected, zoomed[key], atol=1.0)
+            zoomed = random_zoom({key: p(self.imt[0])})
+            expected = [
+                zoom_scipy(channel, zoom=random_zoom.rand_zoom._zoom, mode="nearest", order=0, prefilter=False)
+                for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed[key], p(expected), atol=1.0)
 
     def test_keep_size(self):
         key = "img"
         random_zoom = RandZoomd(
-            keys=key,
-            prob=1.0,
-            min_zoom=0.6,
-            max_zoom=0.7,
-            keep_size=True,
-            padding_mode="constant",
-            constant_values=2,
+            keys=key, prob=1.0, min_zoom=0.6, max_zoom=0.7, keep_size=True, padding_mode="constant", constant_values=2
         )
-        zoomed = random_zoom({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            zoomed = random_zoom({key: p(self.imt[0])})
+            np.testing.assert_array_equal(zoomed[key].shape, self.imt.shape[1:])
 
     @parameterized.expand(
         [("no_min_zoom", None, 1.1, "bilinear", TypeError), ("invalid_order", 0.9, 1.1, "s", ValueError)]
     )
     def test_invalid_inputs(self, _, min_zoom, max_zoom, mode, raises):
         key = "img"
-        with self.assertRaises(raises):
-            random_zoom = RandZoomd(key, prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
-            random_zoom({key: self.imt[0]})
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                random_zoom = RandZoomd(key, prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
+                random_zoom({key: p(self.imt[0])})
 
     def test_auto_expand_3d(self):
         random_zoom = RandZoomd(
-            keys="img",
-            prob=1.0,
-            min_zoom=[0.8, 0.7],
-            max_zoom=[1.2, 1.3],
-            mode="nearest",
-            keep_size=False,
+            keys="img", prob=1.0, min_zoom=[0.8, 0.7], max_zoom=[1.2, 1.3], mode="nearest", keep_size=False
         )
-        random_zoom.set_random_state(1234)
-        test_data = {"img": np.random.randint(0, 2, size=[2, 2, 3, 4])}
-        zoomed = random_zoom(test_data)
-        np.testing.assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
-        np.testing.assert_allclose(zoomed["img"].shape, (2, 2, 3, 3))
+        for p in TEST_NDARRAYS:
+            random_zoom.set_random_state(1234)
+            test_data = {"img": p(np.random.randint(0, 2, size=[2, 2, 3, 4]))}
+            zoomed = random_zoom(test_data)
+            assert_allclose(random_zoom.rand_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
+            assert_allclose(zoomed["img"].shape, (2, 2, 3, 3))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_randtorchvisiond.py b/tests/test_randtorchvisiond.py
index d0485ce405..2dffe67994 100644
--- a/tests/test_randtorchvisiond.py
+++ b/tests/test_randtorchvisiond.py
@@ -29,19 +29,10 @@
     {"img": torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]])},
     torch.tensor(
         [
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-        ],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+        ]
     ),
 ]
 
@@ -50,24 +41,9 @@
     {"img": torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]])},
     torch.tensor(
         [
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
         ]
     ),
 ]
diff --git a/tests/test_reg_loss_integration.py b/tests/test_reg_loss_integration.py
index b864a64647..2949ee1519 100644
--- a/tests/test_reg_loss_integration.py
+++ b/tests/test_reg_loss_integration.py
@@ -17,25 +17,15 @@
 from parameterized import parameterized
 
 from monai.losses import BendingEnergyLoss, GlobalMutualInformationLoss, LocalNormalizedCrossCorrelationLoss
+from tests.utils import SkipIfBeforePyTorchVersion
 
 TEST_CASES = [
     [BendingEnergyLoss, {}, ["pred"]],
-    [
-        LocalNormalizedCrossCorrelationLoss,
-        {"kernel_size": 7, "kernel_type": "rectangular"},
-        ["pred", "target"],
-    ],
-    [
-        LocalNormalizedCrossCorrelationLoss,
-        {"kernel_size": 5, "kernel_type": "triangular"},
-        ["pred", "target"],
-    ],
-    [
-        LocalNormalizedCrossCorrelationLoss,
-        {"kernel_size": 3, "kernel_type": "gaussian"},
-        ["pred", "target"],
-    ],
+    [LocalNormalizedCrossCorrelationLoss, {"kernel_size": 7, "kernel_type": "rectangular"}, ["pred", "target"]],
+    [LocalNormalizedCrossCorrelationLoss, {"kernel_size": 5, "kernel_type": "triangular"}, ["pred", "target"]],
+    [LocalNormalizedCrossCorrelationLoss, {"kernel_size": 3, "kernel_type": "gaussian"}, ["pred", "target"]],
     [GlobalMutualInformationLoss, {"num_bins": 10}, ["pred", "target"]],
+    [GlobalMutualInformationLoss, {"kernel_type": "b-spline", "num_bins": 10}, ["pred", "target"]],
 ]
 
 
@@ -51,6 +41,7 @@ def tearDown(self):
         torch.backends.cudnn.benchmark = True
 
     @parameterized.expand(TEST_CASES)
+    @SkipIfBeforePyTorchVersion((1, 9))
     def test_convergence(self, loss_type, loss_args, forward_args):
         """
         The goal of this test is to assess if the gradient of the loss function
@@ -69,7 +60,7 @@ def test_convergence(self, loss_type, loss_args, forward_args):
         # define a one layer model
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer = nn.Sequential(
                     nn.Conv3d(in_channels=1, out_channels=1, kernel_size=3, padding=1),
                     nn.ReLU(),
diff --git a/tests/test_resampler.py b/tests/test_resampler.py
index 2be94acebd..af23421ecc 100644
--- a/tests/test_resampler.py
+++ b/tests/test_resampler.py
@@ -17,69 +17,146 @@
 
 from monai.transforms import Resample
 from monai.transforms.utils import create_grid
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((2, 2)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 1.0], [2.0, 3.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="border", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3, 3.0], [2.0, 2.0, 3.0, 3.0]]]),
-    ],
-    [
-        dict(padding_mode="reflection", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2)), "mode": "nearest"},
-        np.array([[[3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0], [3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4, 4)), "img": np.arange(8).reshape((1, 2, 2, 2)), "mode": "bilinear"},
-        np.array(
-            [
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+            TESTS.append(
                 [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+                    dict(padding_mode="zeros", device=device),
+                    {"grid": p(create_grid((2, 2))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(np.array([[[0.0, 1.0], [2.0, 3.0]]])),
                 ]
-            ]
-        ),
-    ],
-    [
-        dict(padding_mode="border", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4, 4)), "img": np.arange(8).reshape((1, 2, 2, 2)), "mode": "bilinear"},
-        np.array(
-            [
+            )
+            TESTS.append(
                 [
-                    [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3.0, 3.0], [2.0, 2.0, 3.0, 3.0]],
-                    [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3.0, 3.0], [2.0, 2.0, 3.0, 3.0]],
-                    [[4.0, 4.0, 5.0, 5.0], [4.0, 4.0, 5.0, 5.0], [6.0, 6.0, 7.0, 7.0], [6.0, 6.0, 7.0, 7.0]],
-                    [[4.0, 4.0, 5.0, 5.0], [4.0, 4.0, 5.0, 5.0], [6.0, 6.0, 7.0, 7.0], [6.0, 6.0, 7.0, 7.0]],
+                    dict(padding_mode="zeros", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(
+                        np.array(
+                            [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]
+                        )
+                    ),
                 ]
-            ]
-        ),
-    ],
-]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="border", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(
+                        np.array(
+                            [[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3, 3.0], [2.0, 2.0, 3.0, 3.0]]]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="reflection", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2))), "mode": "nearest"},
+                    q(
+                        np.array(
+                            [[[3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0], [3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0]]]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="zeros", device=device),
+                    {
+                        "grid": p(create_grid((4, 4, 4))),
+                        "img": q(np.arange(8).reshape((1, 2, 2, 2))),
+                        "mode": "bilinear",
+                    },
+                    q(
+                        np.array(
+                            [
+                                [
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 1.0, 0.0],
+                                        [0.0, 2.0, 3.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 4.0, 5.0, 0.0],
+                                        [0.0, 6.0, 7.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                ]
+                            ]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="border", device=device),
+                    {
+                        "grid": p(create_grid((4, 4, 4))),
+                        "img": q(np.arange(8).reshape((1, 2, 2, 2))),
+                        "mode": "bilinear",
+                    },
+                    q(
+                        np.array(
+                            [
+                                [
+                                    [
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                    ],
+                                    [
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                    ],
+                                    [
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                    ],
+                                ]
+                            ]
+                        )
+                    ),
+                ]
+            )
 
 
 class TestResample(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_resample(self, input_param, input_data, expected_val):
         g = Resample(**input_param)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data["device"])
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resize.py b/tests/test_resize.py
index e5ec5dd1a9..65d934afe6 100644
--- a/tests/test_resize.py
+++ b/tests/test_resize.py
@@ -16,7 +16,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Resize
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_0 = [{"spatial_size": 15}, (6, 10, 15)]
 
@@ -45,16 +45,17 @@ def test_correct_results(self, spatial_size, mode):
             _order = 1
         if spatial_size == (32, -1):
             spatial_size = (32, 64)
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(
-                skimage.transform.resize(
-                    channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
-                )
+        expected = [
+            skimage.transform.resize(
+                channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
             )
+            for channel in self.imt[0]
+        ]
+
         expected = np.stack(expected).astype(np.float32)
-        out = resize(self.imt[0])
-        np.testing.assert_allclose(out, expected, atol=0.9)
+        for p in TEST_NDARRAYS:
+            out = resize(p(self.imt[0]))
+            assert_allclose(out, expected, type_test=False, atol=0.9)
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
     def test_longest_shape(self, input_param, expected_shape):
diff --git a/tests/test_resize_with_pad_or_crop.py b/tests/test_resize_with_pad_or_crop.py
index 46f1fc86cc..262cd2ffdb 100644
--- a/tests/test_resize_with_pad_or_crop.py
+++ b/tests/test_resize_with_pad_or_crop.py
@@ -12,47 +12,38 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ResizeWithPadOrCrop
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASES = [
-    [
-        {"spatial_size": [15, 8, 8], "mode": "constant"},
-        (3, 8, 8, 4),
-        (3, 15, 8, 8),
-    ],
+    [{"spatial_size": [15, 8, 8], "mode": "constant"}, (3, 8, 8, 4), (3, 15, 8, 8)],
     [
         {"spatial_size": [15, 4, 8], "mode": "constant", "method": "end", "constant_values": 1},
         (3, 8, 8, 4),
         (3, 15, 4, 8),
     ],
-    [
-        {"spatial_size": [15, 4, -1], "mode": "constant"},
-        (3, 8, 8, 4),
-        (3, 15, 4, 4),
-    ],
-    [
-        {"spatial_size": [15, 4, -1], "mode": "reflect"},
-        (3, 8, 8, 4),
-        (3, 15, 4, 4),
-    ],
-    [
-        {"spatial_size": [-1, -1, -1], "mode": "reflect"},
-        (3, 8, 8, 4),
-        (3, 8, 8, 4),
-    ],
+    [{"spatial_size": [15, 4, -1], "mode": "constant"}, (3, 8, 8, 4), (3, 15, 4, 4)],
+    [{"spatial_size": [15, 4, -1], "mode": "reflect"}, (3, 8, 8, 4), (3, 15, 4, 4)],
+    [{"spatial_size": [-1, -1, -1], "mode": "reflect"}, (3, 8, 8, 4), (3, 8, 8, 4)],
 ]
 
 
 class TestResizeWithPadOrCrop(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
     def test_pad_shape(self, input_param, input_shape, expected_shape):
-        paddcroper = ResizeWithPadOrCrop(**input_param)
-        result = paddcroper(np.zeros(input_shape))
-        np.testing.assert_allclose(result.shape, expected_shape)
-        result = paddcroper(np.zeros(input_shape), mode="constant")
-        np.testing.assert_allclose(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            if isinstance(p(0), torch.Tensor) and (
+                "constant_values" in input_param or input_param["mode"] == "reflect"
+            ):
+                continue
+            paddcroper = ResizeWithPadOrCrop(**input_param)
+            result = paddcroper(p(np.zeros(input_shape)))
+            np.testing.assert_allclose(result.shape, expected_shape)
+            result = paddcroper(p(np.zeros(input_shape)), mode="constant")
+            np.testing.assert_allclose(result.shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resize_with_pad_or_cropd.py b/tests/test_resize_with_pad_or_cropd.py
index 32a62a9e16..91201bba53 100644
--- a/tests/test_resize_with_pad_or_cropd.py
+++ b/tests/test_resize_with_pad_or_cropd.py
@@ -12,45 +12,37 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ResizeWithPadOrCropd
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASES = [
-    [
-        {"keys": "img", "spatial_size": [15, 8, 8], "mode": "constant"},
-        {"img": np.zeros((3, 8, 8, 4))},
-        (3, 15, 8, 8),
-    ],
+    [{"keys": "img", "spatial_size": [15, 8, 8], "mode": "constant"}, {"img": np.zeros((3, 8, 8, 4))}, (3, 15, 8, 8)],
     [
         {"keys": "img", "spatial_size": [15, 4, 8], "mode": "constant", "method": "end", "constant_values": 1},
         {"img": np.zeros((3, 8, 8, 4))},
         (3, 15, 4, 8),
     ],
-    [
-        {"keys": "img", "spatial_size": [15, 4, -1], "mode": "constant"},
-        {"img": np.zeros((3, 8, 8, 4))},
-        (3, 15, 4, 4),
-    ],
-    [
-        {"keys": "img", "spatial_size": [15, 4, -1], "mode": "reflect"},
-        {"img": np.zeros((3, 8, 8, 4))},
-        (3, 15, 4, 4),
-    ],
-    [
-        {"keys": "img", "spatial_size": [-1, -1, -1], "mode": "reflect"},
-        {"img": np.zeros((3, 8, 8, 4))},
-        (3, 8, 8, 4),
-    ],
+    [{"keys": "img", "spatial_size": [15, 4, -1], "mode": "constant"}, {"img": np.zeros((3, 8, 8, 4))}, (3, 15, 4, 4)],
+    [{"keys": "img", "spatial_size": [15, 4, -1], "mode": "reflect"}, {"img": np.zeros((3, 8, 8, 4))}, (3, 15, 4, 4)],
+    [{"keys": "img", "spatial_size": [-1, -1, -1], "mode": "reflect"}, {"img": np.zeros((3, 8, 8, 4))}, (3, 8, 8, 4)],
 ]
 
 
 class TestResizeWithPadOrCropd(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
     def test_pad_shape(self, input_param, input_data, expected_val):
-        paddcroper = ResizeWithPadOrCropd(**input_param)
-        result = paddcroper(input_data)
-        np.testing.assert_allclose(result["img"].shape, expected_val)
+        for p in TEST_NDARRAYS:
+            if isinstance(p(0), torch.Tensor) and (
+                "constant_values" in input_param or input_param["mode"] == "reflect"
+            ):
+                continue
+            paddcroper = ResizeWithPadOrCropd(**input_param)
+            input_data["img"] = p(input_data["img"])
+            result = paddcroper(input_data)
+            np.testing.assert_allclose(result["img"].shape, expected_val)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resized.py b/tests/test_resized.py
index 930faf00eb..7d09f13bad 100644
--- a/tests/test_resized.py
+++ b/tests/test_resized.py
@@ -16,7 +16,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Resized
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_0 = [{"keys": "img", "spatial_size": 15}, (6, 10, 15)]
 
@@ -48,16 +48,17 @@ def test_correct_results(self, spatial_size, mode):
             _order = 1
         if spatial_size == (32, -1):
             spatial_size = (32, 64)
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(
-                skimage.transform.resize(
-                    channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
-                )
+        expected = [
+            skimage.transform.resize(
+                channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
             )
+            for channel in self.imt[0]
+        ]
+
         expected = np.stack(expected).astype(np.float32)
-        out = resize({"img": self.imt[0]})["img"]
-        np.testing.assert_allclose(out, expected, atol=0.9)
+        for p in TEST_NDARRAYS:
+            out = resize({"img": p(self.imt[0])})["img"]
+            assert_allclose(out, expected, type_test=False, atol=0.9)
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_longest_shape(self, input_param, expected_shape):
diff --git a/tests/test_resnet.py b/tests/test_resnet.py
index c4ba5c2e16..16cd6f4865 100644
--- a/tests/test_resnet.py
+++ b/tests/test_resnet.py
@@ -42,14 +42,26 @@
     (2, 3),
 ]
 
+TEST_CASE_2_A = [  # 2D, batch 2, 1 input channel, shortcut type A
+    {"pretrained": False, "spatial_dims": 2, "n_input_channels": 1, "num_classes": 3, "shortcut_type": "A"},
+    (2, 1, 32, 64),
+    (2, 3),
+]
+
 TEST_CASE_3 = [  # 1D, batch 1, 2 input channels
     {"pretrained": False, "spatial_dims": 1, "n_input_channels": 2, "num_classes": 3},
     (1, 2, 32),
     (1, 3),
 ]
 
+TEST_CASE_3_A = [  # 1D, batch 1, 2 input channels
+    {"pretrained": False, "spatial_dims": 1, "n_input_channels": 2, "num_classes": 3, "shortcut_type": "A"},
+    (1, 2, 32),
+    (1, 3),
+]
+
 TEST_CASES = []
-for case in [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]:
+for case in [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_2_A, TEST_CASE_3_A]:
     for model in [resnet10, resnet18, resnet34, resnet50, resnet101, resnet152, resnet200]:
         TEST_CASES.append([model, *case])
 
diff --git a/tests/test_rotate.py b/tests/test_rotate.py
index 436c952d4b..411fed3d1d 100644
--- a/tests/test_rotate.py
+++ b/tests/test_rotate.py
@@ -10,42 +10,44 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Rotate
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
-
-TEST_CASES_2D = [
-    (np.pi / 6, False, "bilinear", "border", False),
-    (np.pi / 4, True, "bilinear", "border", False),
-    (-np.pi / 4.5, True, "nearest", "reflection", False),
-    (np.pi, False, "nearest", "zeros", False),
-    (-np.pi / 2, False, "bilinear", "zeros", True),
-]
-
-TEST_CASES_3D = [
-    (-np.pi / 2, True, "nearest", "border", False),
-    (np.pi / 4, True, "bilinear", "border", False),
-    (-np.pi / 4.5, True, "nearest", "reflection", False),
-    (np.pi, False, "nearest", "zeros", False),
-    (-np.pi / 2, False, "bilinear", "zeros", False),
-]
-
-TEST_CASES_SHAPE_3D = [
-    ([-np.pi / 2, 1.0, 2.0], "nearest", "border", False),
-    ([np.pi / 4, 0, 0], "bilinear", "border", False),
-    ([-np.pi / 4.5, -20, 20], "nearest", "reflection", False),
-]
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
+
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, -np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_2D.append((p, -np.pi / 2, False, "bilinear", "zeros", True))
+
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append((p, -np.pi / 2, True, "nearest", "border", False))
+    TEST_CASES_3D.append((p, np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, -np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_3D.append((p, np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_3D.append((p, -np.pi / 2, False, "bilinear", "zeros", False))
+
+TEST_CASES_SHAPE_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_SHAPE_3D.append((p, [-np.pi / 2, 1.0, 2.0], "nearest", "border", False))
+    TEST_CASES_SHAPE_3D.append((p, [np.pi / 4, 0, 0], "bilinear", "border", False))
+    TEST_CASES_SHAPE_3D.append((p, [-np.pi / 4.5, -20, 20], "nearest", "reflection", False))
 
 
 class TestRotate2D(NumpyImageTestCase2D):
     @parameterized.expand(TEST_CASES_2D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotate(angle, keep_size, mode, padding_mode, align_corners)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
         _order = 0 if mode == "nearest" else 1
@@ -60,25 +62,20 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         for channel in self.imt[0]:
             expected.append(
                 scipy.ndimage.rotate(
-                    channel,
-                    -np.rad2deg(angle),
-                    (0, 1),
-                    not keep_size,
-                    order=_order,
-                    mode=_mode,
-                    prefilter=False,
+                    channel, -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
                 )
             )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         good = np.sum(np.isclose(expected, rotated, atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRotate3D(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotate([angle, 0, 0], keep_size, mode, padding_mode, align_corners)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
         _order = 0 if mode == "nearest" else 1
@@ -93,33 +90,29 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         for channel in self.imt[0]:
             expected.append(
                 scipy.ndimage.rotate(
-                    channel,
-                    -np.rad2deg(angle),
-                    (1, 2),
-                    not keep_size,
-                    order=_order,
-                    mode=_mode,
-                    prefilter=False,
+                    channel, -np.rad2deg(angle), (1, 2), not keep_size, order=_order, mode=_mode, prefilter=False
                 )
             )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         n_good = np.sum(np.isclose(expected, rotated, atol=1e-3))
         self.assertLessEqual(expected.size - n_good, 5, "diff at most 5 pixels")
 
     @parameterized.expand(TEST_CASES_SHAPE_3D)
-    def test_correct_shape(self, angle, mode, padding_mode, align_corners):
+    def test_correct_shape(self, im_type, angle, mode, padding_mode, align_corners):
         rotate_fn = Rotate(angle, True, align_corners=align_corners)
-        rotated = rotate_fn(self.imt[0], mode=mode, padding_mode=padding_mode)
+        rotated = rotate_fn(im_type(self.imt[0]), mode=mode, padding_mode=padding_mode)
         np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
 
     def test_ill_case(self):
-        rotate_fn = Rotate(10, True)
-        with self.assertRaises(ValueError):  # wrong shape
-            rotate_fn(self.imt)
-
-        rotate_fn = Rotate(10, keep_size=False)
-        with self.assertRaises(ValueError):  # wrong mode
-            rotate_fn(self.imt[0], mode="trilinear")
+        for p in TEST_NDARRAYS:
+            rotate_fn = Rotate(10, True)
+            with self.assertRaises(ValueError):  # wrong shape
+                rotate_fn(p(self.imt))
+
+            rotate_fn = Rotate(10, keep_size=False)
+            with self.assertRaises(ValueError):  # wrong mode
+                rotate_fn(p(self.imt[0]), mode="trilinear")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rotate90.py b/tests/test_rotate90.py
index 4ab39d5cf6..9857b26fe8 100644
--- a/tests/test_rotate90.py
+++ b/tests/test_rotate90.py
@@ -14,45 +14,41 @@
 import numpy as np
 
 from monai.transforms import Rotate90
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRotate90(NumpyImageTestCase2D):
     def test_rotate90_default(self):
         rotate = Rotate90()
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_k(self):
         rotate = Rotate90(k=2)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_spatial_axes(self):
         rotate = Rotate90(spatial_axes=(0, -1))
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, -1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, -1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_prob_k_spatial_axes(self):
         rotate = Rotate90(k=2, spatial_axes=(0, 1))
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rotate90d.py b/tests/test_rotate90d.py
index 3d71ead82a..a2a4a27521 100644
--- a/tests/test_rotate90d.py
+++ b/tests/test_rotate90d.py
@@ -14,49 +14,45 @@
 import numpy as np
 
 from monai.transforms import Rotate90d
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRotate90d(NumpyImageTestCase2D):
     def test_rotate90_default(self):
         key = "test"
         rotate = Rotate90d(keys=key)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_k(self):
         key = None
         rotate = Rotate90d(keys=key, k=2)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_spatial_axes(self):
         key = "test"
         rotate = Rotate90d(keys=key, spatial_axes=(0, 1))
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_prob_k_spatial_axes(self):
         key = "test"
         rotate = Rotate90d(keys=key, k=2, spatial_axes=(0, 1))
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_no_key(self):
         key = "unknown"
diff --git a/tests/test_rotated.py b/tests/test_rotated.py
index 2ea421101b..91918513b8 100644
--- a/tests/test_rotated.py
+++ b/tests/test_rotated.py
@@ -10,36 +10,38 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Rotated
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
-TEST_CASES_2D = [
-    (-np.pi / 6, False, "bilinear", "border", False),
-    (-np.pi / 4, True, "bilinear", "border", False),
-    (np.pi / 4.5, True, "nearest", "reflection", False),
-    (-np.pi, False, "nearest", "zeros", False),
-    (np.pi / 2, False, "bilinear", "zeros", True),
-]
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, -np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, -np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_2D.append((p, -np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_2D.append((p, np.pi / 2, False, "bilinear", "zeros", True))
 
-TEST_CASES_3D = [
-    (-np.pi / 6, False, "bilinear", "border", False),
-    (-np.pi / 4, True, "bilinear", "border", False),
-    (np.pi / 4.5, True, "nearest", "reflection", False),
-    (-np.pi, False, "nearest", "zeros", False),
-    (np.pi / 2, False, "bilinear", "zeros", True),
-]
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append((p, -np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, -np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_3D.append((p, -np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_3D.append((p, np.pi / 2, False, "bilinear", "zeros", True))
 
 
 class TestRotated2D(NumpyImageTestCase2D):
     @parameterized.expand(TEST_CASES_2D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), angle, keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -52,6 +54,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
@@ -64,9 +68,9 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
 
 class TestRotated3D(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), [0, angle, 0], keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -79,6 +83,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], np.rad2deg(angle), (0, 2), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected.astype(np.float32), rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 voxels.")
 
@@ -86,14 +92,14 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
             self.segn[0, 0], np.rad2deg(angle), (0, 2), not keep_size, order=0, mode=_mode, prefilter=False
         )
         expected = np.stack(expected).astype(int)
-        self.assertLessEqual(np.count_nonzero(expected != rotated["seg"][0]), 130)
+        self.assertLessEqual(np.count_nonzero(expected != rotated["seg"][0]), 160)
 
 
 class TestRotated3DXY(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), [0, 0, angle], keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -106,6 +112,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 voxels")
 
@@ -113,7 +121,7 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
             self.segn[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=0, mode=_mode, prefilter=False
         )
         expected = np.stack(expected).astype(int)
-        self.assertLessEqual(np.count_nonzero(expected != rotated["seg"][0]), 130)
+        self.assertLessEqual(np.count_nonzero(expected != rotated["seg"][0]), 160)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_save_classificationd.py b/tests/test_save_classificationd.py
index 67dc0320a6..26ce3176e8 100644
--- a/tests/test_save_classificationd.py
+++ b/tests/test_save_classificationd.py
@@ -83,7 +83,7 @@ def test_saved_content(self):
             def _test_file(filename, count):
                 filepath = os.path.join(tempdir, filename)
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_save_image.py b/tests/test_save_image.py
index f7c8e07f06..be77b12b8e 100644
--- a/tests/test_save_image.py
+++ b/tests/test_save_image.py
@@ -18,19 +18,9 @@
 
 from monai.transforms import SaveImage
 
-TEST_CASE_1 = [
-    torch.randint(0, 255, (1, 2, 3, 4)),
-    {"filename_or_obj": "testfile0.nii.gz"},
-    ".nii.gz",
-    False,
-]
-
-TEST_CASE_2 = [
-    torch.randint(0, 255, (1, 2, 3, 4)),
-    None,
-    ".nii.gz",
-    False,
-]
+TEST_CASE_1 = [torch.randint(0, 255, (1, 2, 3, 4)), {"filename_or_obj": "testfile0.nii.gz"}, ".nii.gz", False]
+
+TEST_CASE_2 = [torch.randint(0, 255, (1, 2, 3, 4)), None, ".nii.gz", False]
 
 
 class TestSaveImage(unittest.TestCase):
diff --git a/tests/test_save_imaged.py b/tests/test_save_imaged.py
index 35bbea9628..f05a83dd9a 100644
--- a/tests/test_save_imaged.py
+++ b/tests/test_save_imaged.py
@@ -19,10 +19,7 @@
 from monai.transforms import SaveImaged
 
 TEST_CASE_1 = [
-    {
-        "img": torch.randint(0, 255, (1, 2, 3, 4)),
-        "img_meta_dict": {"filename_or_obj": "testfile0.nii.gz"},
-    },
+    {"img": torch.randint(0, 255, (1, 2, 3, 4)), "img_meta_dict": {"filename_or_obj": "testfile0.nii.gz"}},
     ".nii.gz",
     False,
 ]
diff --git a/tests/test_savitzky_golay_filter.py b/tests/test_savitzky_golay_filter.py
index c9bcd9687e..fa38659acb 100644
--- a/tests/test_savitzky_golay_filter.py
+++ b/tests/test_savitzky_golay_filter.py
@@ -99,13 +99,7 @@
 
 class TestSavitzkyGolayCPU(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_SINGLE_VALUE,
-            TEST_CASE_1D,
-            TEST_CASE_2D_AXIS_2,
-            TEST_CASE_2D_AXIS_3,
-            TEST_CASE_SINE_SMOOTH,
-        ]
+        [TEST_CASE_SINGLE_VALUE, TEST_CASE_1D, TEST_CASE_2D_AXIS_2, TEST_CASE_2D_AXIS_3, TEST_CASE_SINE_SMOOTH]
     )
     def test_value(self, arguments, image, expected_data, atol):
         result = SavitzkyGolayFilter(**arguments)(image)
@@ -124,13 +118,7 @@ def test_value(self, arguments, image, expected_data, atol):
 @skip_if_no_cuda
 class TestSavitzkyGolayGPU(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_SINGLE_VALUE,
-            TEST_CASE_1D,
-            TEST_CASE_2D_AXIS_2,
-            TEST_CASE_2D_AXIS_3,
-            TEST_CASE_SINE_SMOOTH,
-        ]
+        [TEST_CASE_SINGLE_VALUE, TEST_CASE_1D, TEST_CASE_2D_AXIS_2, TEST_CASE_2D_AXIS_3, TEST_CASE_SINE_SMOOTH]
     )
     def test_value(self, arguments, image, expected_data, atol):
         result = SavitzkyGolayFilter(**arguments)(image.to(device="cuda"))
@@ -140,12 +128,7 @@ def test_value(self, arguments, image, expected_data, atol):
 @skip_if_no_cuda
 class TestSavitzkyGolayGPUREP(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_SINGLE_VALUE_REP,
-            TEST_CASE_1D_REP,
-            TEST_CASE_2D_AXIS_2_REP,
-            TEST_CASE_2D_AXIS_3_REP,
-        ]
+        [TEST_CASE_SINGLE_VALUE_REP, TEST_CASE_1D_REP, TEST_CASE_2D_AXIS_2_REP, TEST_CASE_2D_AXIS_3_REP]
     )
     def test_value(self, arguments, image, expected_data, atol):
         result = SavitzkyGolayFilter(**arguments)(image.to(device="cuda"))
diff --git a/tests/test_savitzky_golay_smooth.py b/tests/test_savitzky_golay_smooth.py
index 45d0ea3e4d..0f398bc48f 100644
--- a/tests/test_savitzky_golay_smooth.py
+++ b/tests/test_savitzky_golay_smooth.py
@@ -25,14 +25,14 @@
     np.expand_dims(np.array([1.0]), 0),  # Input data: Single value
     np.expand_dims(np.array([1 / 3]), 0),  # Expected output: With a window length of 3 and polyorder 1
     # output should be equal to mean of 0, 1 and 0 = 1/3 (because input will be zero-padded and a linear fit performed)
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 TEST_CASE_2D_AXIS_2 = [
     {"window_length": 3, "order": 1, "axis": 2},  # along axis 2 (second spatial dim)
     np.expand_dims(np.ones((2, 3)), 0),
     np.expand_dims(np.array([[2 / 3, 1.0, 2 / 3], [2 / 3, 1.0, 2 / 3]]), 0),
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 # Replicated-padding trivial tests
@@ -42,7 +42,7 @@
     np.expand_dims(np.array([1.0]), 0),  # Input data: Single value
     np.expand_dims(np.array([1.0]), 0),  # Expected output: With a window length of 3 and polyorder 1
     # output will be equal to mean of [1, 1, 1] = 1 (input will be nearest-neighbour-padded and a linear fit performed)
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 # Sine smoothing
@@ -62,16 +62,16 @@ class TestSavitzkyGolaySmooth(unittest.TestCase):
     @parameterized.expand([TEST_CASE_SINGLE_VALUE, TEST_CASE_2D_AXIS_2, TEST_CASE_SINE_SMOOTH])
     def test_value(self, arguments, image, expected_data, atol):
         for p in TEST_NDARRAYS:
-            result = SavitzkyGolaySmooth(**arguments)(p(image))
-            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-7, atol=atol)
+            result = SavitzkyGolaySmooth(**arguments)(p(image.astype(np.float32)))
+            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-4, atol=atol)
 
 
 class TestSavitzkyGolaySmoothREP(unittest.TestCase):
     @parameterized.expand([TEST_CASE_SINGLE_VALUE_REP])
     def test_value(self, arguments, image, expected_data, atol):
         for p in TEST_NDARRAYS:
-            result = SavitzkyGolaySmooth(**arguments)(p(image))
-            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-7, atol=atol)
+            result = SavitzkyGolaySmooth(**arguments)(p(image.astype(np.float32)))
+            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-4, atol=atol)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity.py b/tests/test_scale_intensity.py
index c2485af616..ddc2fb08e1 100644
--- a/tests/test_scale_intensity.py
+++ b/tests/test_scale_intensity.py
@@ -26,14 +26,26 @@ def test_range_scale(self):
             maxa = self.imt.max()
             norm = (self.imt - mina) / (maxa - mina)
             expected = p((norm * (2.0 - 1.0)) + 1.0)
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, expected, type_test=False, rtol=1e-7, atol=0)
 
     def test_factor_scale(self):
         for p in TEST_NDARRAYS:
             scaler = ScaleIntensity(minv=None, maxv=None, factor=0.1)
             result = scaler(p(self.imt))
             expected = p((self.imt * (1 + 0.1)).astype(np.float32))
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, p(expected), rtol=1e-7, atol=0)
+
+    def test_channel_wise(self):
+        for p in TEST_NDARRAYS:
+            scaler = ScaleIntensity(minv=1.0, maxv=2.0, channel_wise=True)
+            data = p(self.imt)
+            result = scaler(data)
+            mina = self.imt.min()
+            maxa = self.imt.max()
+            for i, c in enumerate(data):
+                norm = (c - mina) / (maxa - mina)
+                expected = p((norm * (2.0 - 1.0)) + 1.0)
+                assert_allclose(result[i], expected, type_test=False, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity_range.py b/tests/test_scale_intensity_range.py
index cba07d9157..d06bfd3596 100644
--- a/tests/test_scale_intensity_range.py
+++ b/tests/test_scale_intensity_range.py
@@ -11,19 +11,18 @@
 
 import unittest
 
-import numpy as np
-
 from monai.transforms import ScaleIntensityRange
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class IntensityScaleIntensityRange(NumpyImageTestCase2D):
     def test_image_scale_intensity_range(self):
         scaler = ScaleIntensityRange(a_min=20, a_max=108, b_min=50, b_max=80)
-        scaled = scaler(self.imt)
-        expected = (self.imt - 20) / 88
-        expected = expected * 30 + 50
-        self.assertTrue(np.allclose(scaled, expected))
+        for p in TEST_NDARRAYS:
+            scaled = scaler(p(self.imt))
+            expected = (self.imt - 20) / 88
+            expected = expected * 30 + 50
+            assert_allclose(scaled, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity_range_percentiles.py b/tests/test_scale_intensity_range_percentiles.py
index 015162c8de..0024cb349d 100644
--- a/tests/test_scale_intensity_range_percentiles.py
+++ b/tests/test_scale_intensity_range_percentiles.py
@@ -14,7 +14,7 @@
 import numpy as np
 
 from monai.transforms.intensity.array import ScaleIntensityRangePercentiles
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestScaleIntensityRangePercentiles(NumpyImageTestCase2D):
@@ -30,7 +30,9 @@ def test_scaling(self):
         expected = (img - a_min) / (a_max - a_min)
         expected = (expected * (b_max - b_min)) + b_min
         scaler = ScaleIntensityRangePercentiles(lower=lower, upper=upper, b_min=b_min, b_max=b_max)
-        self.assertTrue(np.allclose(expected, scaler(img)))
+        for p in TEST_NDARRAYS:
+            result = scaler(p(img))
+            assert_allclose(result, p(expected), rtol=1e-4)
 
     def test_relative_scaling(self):
         img = self.imt
@@ -47,7 +49,9 @@ def test_relative_scaling(self):
         expected_img = (img - expected_a_min) / (expected_a_max - expected_a_min)
         expected_img = (expected_img * (expected_b_max - expected_b_min)) + expected_b_min
 
-        self.assertTrue(np.allclose(expected_img, scaler(img)))
+        for p in TEST_NDARRAYS:
+            result = scaler(p(img))
+            assert_allclose(result, p(expected_img), rtol=1e-4)
 
     def test_invalid_instantiation(self):
         self.assertRaises(ValueError, ScaleIntensityRangePercentiles, lower=-10, upper=99, b_min=0, b_max=255)
diff --git a/tests/test_scale_intensity_ranged.py b/tests/test_scale_intensity_ranged.py
index a8cac414e8..dc064a7708 100644
--- a/tests/test_scale_intensity_ranged.py
+++ b/tests/test_scale_intensity_ranged.py
@@ -11,20 +11,19 @@
 
 import unittest
 
-import numpy as np
-
 from monai.transforms import ScaleIntensityRanged
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class IntensityScaleIntensityRanged(NumpyImageTestCase2D):
     def test_image_scale_intensity_ranged(self):
         key = "img"
         scaler = ScaleIntensityRanged(keys=key, a_min=20, a_max=108, b_min=50, b_max=80)
-        scaled = scaler({key: self.imt})
-        expected = (self.imt - 20) / 88
-        expected = expected * 30 + 50
-        self.assertTrue(np.allclose(scaled[key], expected))
+        for p in TEST_NDARRAYS:
+            scaled = scaler({key: p(self.imt)})
+            expected = (self.imt - 20) / 88
+            expected = expected * 30 + 50
+            assert_allclose(scaled[key], p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensityd.py b/tests/test_scale_intensityd.py
index 6e13dbc272..93449b15e2 100644
--- a/tests/test_scale_intensityd.py
+++ b/tests/test_scale_intensityd.py
@@ -19,23 +19,36 @@
 
 class TestScaleIntensityd(NumpyImageTestCase2D):
     def test_range_scale(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = ScaleIntensityd(keys=[key], minv=1.0, maxv=2.0)
             result = scaler({key: p(self.imt)})
             mina = np.min(self.imt)
             maxa = np.max(self.imt)
             norm = (self.imt - mina) / (maxa - mina)
             expected = (norm * (2.0 - 1.0)) + 1.0
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor_scale(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = ScaleIntensityd(keys=[key], minv=None, maxv=None, factor=0.1)
             result = scaler({key: p(self.imt)})
             expected = (self.imt * (1 + 0.1)).astype(np.float32)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
+
+    def test_channel_wise(self):
+        key = "img"
+        for p in TEST_NDARRAYS:
+            scaler = ScaleIntensityd(keys=[key], minv=1.0, maxv=2.0, channel_wise=True)
+            data = p(self.imt)
+            result = scaler({key: data})
+            mina = self.imt.min()
+            maxa = self.imt.max()
+            for i, c in enumerate(data):
+                norm = (c - mina) / (maxa - mina)
+                expected = p((norm * (2.0 - 1.0)) + 1.0)
+                assert_allclose(result[key][i], expected, type_test=False, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_seg_loss_integration.py b/tests/test_seg_loss_integration.py
index d2f991f160..98d840afea 100644
--- a/tests/test_seg_loss_integration.py
+++ b/tests/test_seg_loss_integration.py
@@ -91,7 +91,7 @@ def test_convergence(self, loss_type, loss_args, forward_args):
         # define a one layer model
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer_1 = nn.Linear(num_voxels, 200)
                 self.acti = nn.ReLU()
                 self.layer_2 = nn.Linear(200, num_voxels * num_classes)
diff --git a/tests/test_selfattention.py b/tests/test_selfattention.py
index 3d561aac2f..559e86487b 100644
--- a/tests/test_selfattention.py
+++ b/tests/test_selfattention.py
@@ -28,11 +28,7 @@
         for num_heads in [4, 6, 8, 12]:
 
             test_case = [
-                {
-                    "hidden_size": hidden_size,
-                    "num_heads": num_heads,
-                    "dropout_rate": dropout_rate,
-                },
+                {"hidden_size": hidden_size, "num_heads": num_heads, "dropout_rate": dropout_rate},
                 (2, 512, hidden_size),
                 (2, 512, hidden_size),
             ]
diff --git a/tests/test_senet.py b/tests/test_senet.py
index 1c6222d6a0..9aae5e5e54 100644
--- a/tests/test_senet.py
+++ b/tests/test_senet.py
@@ -9,6 +9,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 import unittest
 from typing import TYPE_CHECKING
 from unittest import skipUnless
@@ -16,10 +17,11 @@
 import torch
 from parameterized import parameterized
 
+import monai.networks.nets.senet as se_mod
 from monai.networks import eval_mode
 from monai.networks.nets import SENet154, SEResNet50, SEResNet101, SEResNet152, SEResNext50, SEResNext101
 from monai.utils import optional_import
-from tests.utils import test_pretrained_networks, test_script_save
+from tests.utils import test_is_quick, test_pretrained_networks, test_script_save
 
 if TYPE_CHECKING:
     import pretrainedmodels
@@ -31,6 +33,7 @@
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
+
 NET_ARGS = {"spatial_dims": 3, "in_channels": 2, "num_classes": 2}
 TEST_CASE_1 = [SENet154, NET_ARGS]
 TEST_CASE_2 = [SEResNet50, NET_ARGS]
@@ -60,6 +63,43 @@ def test_script(self, net, net_args):
 
 
 class TestPretrainedSENET(unittest.TestCase):
+    def setUp(self):
+        self.original_urls = se_mod.SE_NET_MODELS.copy()
+        if test_is_quick():
+            testing_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "testing_data")
+            testing_data_urls = {
+                "senet154": {
+                    "url": "https://drive.google.com/uc?id=1e10LFGVIV9L8_Q5Fhwi3X5nU6mDRrCDh",
+                    "filename": "senet154-c7b49a05.pth",
+                },
+                "se_resnet50": {
+                    "url": "https://drive.google.com/uc?id=1WCeveS0tvjta4Wcp1wAGRi_uyXRfXAGA",
+                    "filename": "se_resnet50-ce0d4300.pth",
+                },
+                "se_resnet101": {
+                    "url": "https://drive.google.com/uc?id=1Bh0PmLISUltsY8FevtlTbt6vT35clzWg",
+                    "filename": "se_resnet101-7e38fcc6.pth",
+                },
+                "se_resnet152": {
+                    "url": "https://drive.google.com/uc?id=1fcqpP0ITOcALy_TZAcBdkyf7HcH687J-",
+                    "filename": "se_resnet152-d17c99b7.pth",
+                },
+                "se_resnext50_32x4d": {
+                    "url": "https://drive.google.com/uc?id=1kRKW8YjGaEwYdQUyhoCIDg1H9ZAoJ-jI",
+                    "filename": "se_resnext50_32x4d-a260b3a4.pth",
+                },
+                "se_resnext101_32x4d": {
+                    "url": "https://drive.google.com/uc?id=1Tg6Zim1lXgmYgH7FyTXAgihbkq5Jegni",
+                    "filename": "se_resnext101_32x4d-3b2fe3d8.pth",
+                },
+            }
+            for item in testing_data_urls:
+                testing_data_urls[item]["filename"] = os.path.join(testing_dir, testing_data_urls[item]["filename"])
+            se_mod.SE_NET_MODELS = testing_data_urls
+
+    def tearDown(self):
+        se_mod.SE_NET_MODELS = self.original_urls.copy()
+
     @parameterized.expand([TEST_CASE_PRETRAINED_1])
     def test_senet_shape(self, model, input_param):
         net = test_pretrained_networks(model, input_param, device)
diff --git a/tests/test_shift_intensityd.py b/tests/test_shift_intensityd.py
index 0396857781..66aad23b1e 100644
--- a/tests/test_shift_intensityd.py
+++ b/tests/test_shift_intensityd.py
@@ -24,7 +24,7 @@ def test_value(self):
             shifter = ShiftIntensityd(keys=[key], offset=1.0)
             result = shifter({key: p(self.imt)})
             expected = self.imt + 1.0
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor(self):
         key = "img"
diff --git a/tests/test_skip_connection.py b/tests/test_skip_connection.py
index 2118842ed0..462acd9242 100644
--- a/tests/test_skip_connection.py
+++ b/tests/test_skip_connection.py
@@ -24,11 +24,7 @@
         result_shape = (input_shape[0] * 2, *input_shape[1:])
     else:
         result_shape = input_shape
-    test_case = [
-        {"dim": 0, "mode": type_1},
-        input_shape,
-        result_shape,
-    ]
+    test_case = [{"dim": 0, "mode": type_1}, input_shape, result_shape]
     TEST_CASES_3D.append(test_case)
 
 
diff --git a/tests/test_sliding_window_inference.py b/tests/test_sliding_window_inference.py
index a22e5990bf..c5b941bf3d 100644
--- a/tests/test_sliding_window_inference.py
+++ b/tests/test_sliding_window_inference.py
@@ -33,14 +33,7 @@
     [(1, 3, 16, 7), (80, 50), 7, 0.5, "gaussian", torch.device("cpu:0")],  # 2D large overlap, gaussian
     [(1, 3, 16, 15, 7), (4, 10, 7), 3, 0.25, "gaussian", torch.device("cpu:0")],  # 3D small roi, gaussian
     [(3, 3, 16, 15, 7), (4, 10, 7), 3, 0.25, "gaussian", torch.device("cpu:0")],  # 3D small roi, gaussian
-    [
-        (1, 3, 16, 15, 7),
-        (4, 10, 7),
-        3,
-        0.25,
-        "gaussian",
-        torch.device("cuda:0"),
-    ],  # test inference on gpu if availabe
+    [(1, 3, 16, 15, 7), (4, 10, 7), 3, 0.25, "gaussian", torch.device("cuda:0")],  # test inference on gpu if availabe
     [(1, 3, 16, 15, 7), (4, 1, 7), 3, 0.25, "constant", torch.device("cpu:0")],  # 3D small roi
     [(5, 3, 16, 15, 7), (4, 1, 7), 3, 0.25, "constant", torch.device("cpu:0")],  # 3D small roi
 ]
diff --git a/tests/test_smartcache_patch_wsi_dataset.py b/tests/test_smartcache_patch_wsi_dataset.py
index c484e5fc69..317b6cba63 100644
--- a/tests/test_smartcache_patch_wsi_dataset.py
+++ b/tests/test_smartcache_patch_wsi_dataset.py
@@ -21,9 +21,10 @@
 from monai.apps.utils import download_url
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+_cucim, has_cim = optional_import("cucim")
+has_cim = has_cim and hasattr(_cucim, "CuImage")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
@@ -133,13 +134,7 @@ class TestSmartCachePatchWSIDataset(unittest.TestCase):
     def setUp(self):
         download_url(FILE_URL, FILE_PATH, "5a3cfd4fd725c50578ddb80b517b759f")
 
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
     @skipUnless(has_cim, "Requires CuCIM")
     def test_read_patches(self, input_parameters, expected):
         dataset = SmartCachePatchWSIDataset(**input_parameters)
diff --git a/tests/test_smartcachedataset.py b/tests/test_smartcachedataset.py
index e2675f4d8c..f390a9127e 100644
--- a/tests/test_smartcachedataset.py
+++ b/tests/test_smartcachedataset.py
@@ -174,13 +174,7 @@ def test_datalist(self):
         data_list = [np.array([i]) for i in range(5)]
         data_list_backup = copy.copy(data_list)
 
-        SmartCacheDataset(
-            data=data_list,
-            transform=None,
-            cache_rate=0.5,
-            replace_rate=0.4,
-            shuffle=True,
-        )
+        SmartCacheDataset(data=data_list, transform=None, cache_rate=0.5, replace_rate=0.4, shuffle=True)
         np.testing.assert_allclose(data_list, data_list_backup)
 
 
diff --git a/tests/test_spacing.py b/tests/test_spacing.py
index 6be6730c5a..cd362bccea 100644
--- a/tests/test_spacing.py
+++ b/tests/test_spacing.py
@@ -12,155 +12,204 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Spacing
 from monai.utils import ensure_tuple, fall_back_tuple
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = [
-    [
-        {"pixdim": (1.0, 1.5), "padding_mode": "zeros", "dtype": float},
-        np.arange(4).reshape((1, 2, 2)) + 1.0,  # data
-        {"affine": np.eye(4)},
-        np.array([[[1.0, 1.0], [3.0, 2.0]]]),
-    ],
-    [
-        {"pixdim": 1.0, "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (1.0, 1.0, 1.0), "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (1.0, 0.2, 1.5), "diagonal": False, "padding_mode": "zeros", "align_corners": True},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.array([[2, 1, 0, 4], [-1, -3, 0, 5], [0, 0, 2.0, 5], [0, 0, 0, 1]])},
-        np.array([[[[0.95527864, 0.95527864]], [[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {"affine": np.diag([-3.0, 0.2, 1.5, 1])},
-        np.array([[[0, 0], [4, 0], [8, 0]], [[12, 0], [16, 0], [20, 0]]]),
-    ],
-    [
-        {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {},
-        np.array([[[0, 1, 2, 3], [0, 0, 0, 0]], [[12, 13, 14, 15], [0, 0, 0, 0]]]),
-    ],
-    [
-        {"pixdim": (1.0, 1.0)},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {},
-        np.array(
-            [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0)},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, -5], [0, 0, 6, -6], [0, 0, 0, 1]])},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (1.9, 4.0), "padding_mode": "zeros", "diagonal": True},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
-        np.array(
-            [
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.5), "padding_mode": "zeros", "dtype": float},
+            np.arange(4).reshape((1, 2, 2)) + 1.0,  # data
+            {"affine": np.eye(4)},
+            np.array([[[1.0, 1.0], [3.0, 2.0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": 1.0, "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.0, 1.0), "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 0.2, 1.5), "diagonal": False, "padding_mode": "zeros", "align_corners": True},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.array([[2, 1, 0, 4], [-1, -3, 0, 5], [0, 0, 2.0, 5], [0, 0, 0, 1]])},
+            np.array([[[[0.95527864, 0.95527864]], [[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {"affine": np.diag([-3.0, 0.2, 1.5, 1])},
+            np.array([[[0, 0], [4, 0], [8, 0]], [[12, 0], [16, 0], [20, 0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {},
+            np.array([[[0, 1, 2, 3], [0, 0, 0, 0]], [[12, 13, 14, 15], [0, 0, 0, 0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.0)},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {},
+            np.array(
+                [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0)},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, -5], [0, 0, 6, -6], [0, 0, 0, 1]])},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.9, 4.0), "padding_mode": "zeros", "diagonal": True},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
+            np.array(
                 [
-                    [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
-                    [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
-                    [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
-                    [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
-                    [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
-                    [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
-                    [0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0],
+                    [
+                        [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
+                        [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
+                        [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
+                        [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
+                        [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
+                        [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
+                        [0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": (5.0, 3.0), "padding_mode": "border", "diagonal": True, "dtype": np.float32},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
-        np.array(
-            [
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (5.0, 3.0), "padding_mode": "border", "diagonal": True, "dtype": np.float32},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
+            np.array(
                 [
-                    [18.0, 18.6, 19.2, 19.8, 20.400002, 21.0, 21.6, 22.2, 22.8],
-                    [10.5, 11.1, 11.700001, 12.299999, 12.900001, 13.5, 14.1, 14.700001, 15.3],
-                    [3.0, 3.6000001, 4.2000003, 4.8, 5.4000006, 6.0, 6.6000004, 7.200001, 7.8],
+                    [
+                        [18.0, 18.6, 19.2, 19.8, 20.400002, 21.0, 21.6, 22.2, 22.8],
+                        [10.5, 11.1, 11.700001, 12.299999, 12.900001, 13.5, 14.1, 14.700001, 15.3],
+                        [3.0, 3.6000001, 4.2000003, 4.8, 5.4000006, 6.0, 6.6000004, 7.200001, 7.8],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": (5.0, 3.0), "padding_mode": "zeros", "diagonal": True, "dtype": np.float32},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
-        np.array(
-            [
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (5.0, 3.0), "padding_mode": "zeros", "diagonal": True, "dtype": np.float32},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
+            np.array(
                 [
-                    [18.0000, 18.6000, 19.2000, 19.8000, 20.4000, 21.0000, 21.6000, 22.2000, 22.8000],
-                    [10.5000, 11.1000, 11.7000, 12.3000, 12.9000, 13.5000, 14.1000, 14.7000, 15.3000],
-                    [3.0000, 3.6000, 4.2000, 4.8000, 5.4000, 6.0000, 6.6000, 7.2000, 7.8000],
+                    [
+                        [18.0000, 18.6000, 19.2000, 19.8000, 20.4000, 21.0000, 21.6000, 22.2000, 22.8000],
+                        [10.5000, 11.1000, 11.7000, 12.3000, 12.9000, 13.5000, 14.1000, 14.7000, 15.3000],
+                        [3.0000, 3.6000, 4.2000, 4.8000, 5.4000, 6.0000, 6.6000, 7.2000, 7.8000],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": [-1, -1, 0.5], "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0, 1.0]], [[1.0, 1.0, 1.0]]]]),
-    ],
-]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": [-1, -1, 0.5], "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0, 1.0]], [[1.0, 1.0, 1.0]]]]),
+        ]
+    )
 
 
 class TestSpacingCase(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
-    def test_spacing(self, init_param, img, data_param, expected_output):
-        res = Spacing(**init_param)(img, **data_param)
-        if not isinstance(res, tuple):
-            np.testing.assert_allclose(res, expected_output, atol=1e-6)
-            return
-        np.testing.assert_allclose(res[0], expected_output, atol=1e-6)
-        sr = len(res[0].shape) - 1
+    @parameterized.expand(TESTS)
+    def test_spacing(self, in_type, init_param, img, data_param, expected_output):
+        _img = in_type(img)
+        output_data, _, new_affine = Spacing(**init_param)(_img, **data_param)
+        if isinstance(_img, torch.Tensor):
+            self.assertEqual(_img.device, output_data.device)
+            output_data = output_data.cpu()
+
+        np.testing.assert_allclose(output_data, expected_output, atol=1e-3, rtol=1e-3)
+        sr = len(output_data.shape) - 1
         if isinstance(init_param["pixdim"], float):
             init_param["pixdim"] = [init_param["pixdim"]] * sr
         init_pixdim = ensure_tuple(init_param["pixdim"])
         init_pixdim = init_param["pixdim"][:sr]
-        norm = np.sqrt(np.sum(np.square(res[2]), axis=0))[:sr]
+        norm = np.sqrt(np.sum(np.square(new_affine), axis=0))[:sr]
         np.testing.assert_allclose(fall_back_tuple(init_pixdim, norm), norm)
 
 
diff --git a/tests/test_spacingd.py b/tests/test_spacingd.py
index 61a4a4c38b..355706f65a 100644
--- a/tests/test_spacingd.py
+++ b/tests/test_spacingd.py
@@ -10,82 +10,88 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
+import torch
+from parameterized import parameterized
 
 from monai.transforms import Spacingd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-
-class TestSpacingDCase(unittest.TestCase):
-    def test_spacingd_3d(self):
-        data = {"image": np.ones((2, 10, 15, 20)), "image_meta_dict": {"affine": np.eye(4)}}
-        spacing = Spacingd(keys="image", pixdim=(1, 2, 1.4))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 8, 15))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag([1, 2, 1.4, 1.0]))
-
-    def test_spacingd_2d(self):
-        data = {"image": np.ones((2, 10, 20)), "image_meta_dict": {"affine": np.eye(3)}}
-        spacing = Spacingd(keys="image", pixdim=(1, 2))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 10))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 2, 1)))
-
-    def test_spacingd_2d_no_metadata(self):
-        data = {"image": np.ones((2, 10, 20))}
-        spacing = Spacingd(keys="image", pixdim=(1, 2))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 10))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 2, 1)))
-
-    def test_interp_all(self):
-        data = {
-            "image": np.arange(20).reshape((2, 1, 10)),
-            "seg": np.ones((2, 1, 10)),
-            "image_meta_dict": {"affine": np.eye(4)},
-            "seg_meta_dict": {"affine": np.eye(4)},
-        }
-        spacing = Spacingd(
-            keys=("image", "seg"),
-            mode="nearest",
-            pixdim=(
-                1,
-                0.2,
-            ),
+TESTS: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        (
+            "spacing 3d",
+            {"image": p(np.ones((2, 10, 15, 20))), "image_meta_dict": {"affine": p(np.eye(4))}},
+            dict(keys="image", pixdim=(1, 2, 1.4)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 8, 15),
+            p(np.diag([1, 2, 1.4, 1.0])),
         )
-        res = spacing(data)
-        self.assertEqual(
-            ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
-            tuple(sorted(res)),
+    )
+    TESTS.append(
+        (
+            "spacing 2d",
+            {"image": np.ones((2, 10, 20)), "image_meta_dict": {"affine": np.eye(3)}},
+            dict(keys="image", pixdim=(1, 2)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 10),
+            np.diag((1, 2, 1)),
         )
-        np.testing.assert_allclose(res["image"].shape, (2, 1, 46))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 0.2, 1, 1)))
-
-    def test_interp_sep(self):
-        data = {
-            "image": np.ones((2, 1, 10)),
-            "seg": np.ones((2, 1, 10)),
-            "image_meta_dict": {"affine": np.eye(4)},
-            "seg_meta_dict": {"affine": np.eye(4)},
-        }
-        spacing = Spacingd(
-            keys=("image", "seg"),
-            mode=("bilinear", "nearest"),
-            pixdim=(
-                1,
-                0.2,
-            ),
+    )
+    TESTS.append(
+        (
+            "spacing 2d no metadata",
+            {"image": np.ones((2, 10, 20))},
+            dict(keys="image", pixdim=(1, 2)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 10),
+            np.diag((1, 2, 1)),
+        )
+    )
+    TESTS.append(
+        (
+            "interp all",
+            {
+                "image": np.arange(20).reshape((2, 1, 10)),
+                "seg": np.ones((2, 1, 10)),
+                "image_meta_dict": {"affine": np.eye(4)},
+                "seg_meta_dict": {"affine": np.eye(4)},
+            },
+            dict(keys=("image", "seg"), mode="nearest", pixdim=(1, 0.2)),
+            ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
+            (2, 1, 46),
+            np.diag((1, 0.2, 1, 1)),
         )
-        res = spacing(data)
-        self.assertEqual(
+    )
+    TESTS.append(
+        (
+            "interp sep",
+            {
+                "image": np.ones((2, 1, 10)),
+                "seg": np.ones((2, 1, 10)),
+                "image_meta_dict": {"affine": np.eye(4)},
+                "seg_meta_dict": {"affine": np.eye(4)},
+            },
+            dict(keys=("image", "seg"), mode=("bilinear", "nearest"), pixdim=(1, 0.2)),
             ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
-            tuple(sorted(res)),
+            (2, 1, 46),
+            np.diag((1, 0.2, 1, 1)),
         )
-        np.testing.assert_allclose(res["image"].shape, (2, 1, 46))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 0.2, 1, 1)))
+    )
+
+
+class TestSpacingDCase(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_spacingd(self, _, data, kw_args, expected_keys, expected_shape, expected_affine):
+        res = Spacingd(**kw_args)(data)
+        if isinstance(data["image"], torch.Tensor):
+            self.assertEqual(data["image"].device, res["image"].device)
+        self.assertEqual(expected_keys, tuple(sorted(res)))
+        np.testing.assert_allclose(res["image"].shape, expected_shape)
+        assert_allclose(res["image_meta_dict"]["affine"], expected_affine)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_spatial_crop.py b/tests/test_spatial_crop.py
index c76915f0a3..652c420a4d 100644
--- a/tests/test_spatial_crop.py
+++ b/tests/test_spatial_crop.py
@@ -16,54 +16,40 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        {"roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
-        (3, 3, 3, 3),
-        (3, 2, 2, 2),
-    ],
+TESTS = [
+    [{"roi_center": [1, 1, 1], "roi_size": [2, 2, 2]}, (3, 3, 3, 3), (3, 2, 2, 2)],
     [{"roi_start": [0, 0, 0], "roi_end": [2, 2, 2]}, (3, 3, 3, 3), (3, 2, 2, 2)],
     [{"roi_start": [0, 0], "roi_end": [2, 2]}, (3, 3, 3, 3), (3, 2, 2, 3)],
-    [
-        {"roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]},
-        (3, 3, 3, 3),
-        (3, 2, 2, 2),
-    ],
-    [
-        {"roi_start": [0, 0, 0, 0, 0], "roi_end": [8, 8, 8, 2, 2]},
-        (3, 3, 3, 3),
-        (3, 3, 3, 3),
-    ],
-    [
-        {"roi_start": [1, 0, 0], "roi_end": [1, 8, 8]},
-        (3, 3, 3, 3),
-        (3, 0, 3, 3),
-    ],
-    [
-        {"roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]},
-        (3, 3, 3, 3),
-        (3, 1, 2, 2),
-    ],
+    [{"roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]}, (3, 3, 3, 3), (3, 2, 2, 2)],
+    [{"roi_start": [0, 0, 0, 0, 0], "roi_end": [8, 8, 8, 2, 2]}, (3, 3, 3, 3), (3, 3, 3, 3)],
+    [{"roi_start": [1, 0, 0], "roi_end": [1, 8, 8]}, (3, 3, 3, 3), (3, 0, 3, 3)],
+    [{"roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]}, (3, 3, 3, 3), (3, 1, 2, 2)],
 ]
 
-TEST_ERRORS = [
-    [{"roi_slices": [slice(s, e, 2) for s, e in zip([-1, -2, 0], [None, None, 2])]}],
-]
+TEST_ERRORS = [[{"roi_slices": [slice(s, e, 2) for s, e in zip([-1, -2, 0], [None, None, 2])]}]]
 
 
 class TestSpatialCrop(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input_shape, expected_shape):
         input_data = np.random.randint(0, 2, size=input_shape)
-        result = SpatialCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
-
-    @parameterized.expand(TEST_CASES)
-    def test_tensor_shape(self, input_param, input_shape, expected_shape):
-        input_data = torch.randint(0, 2, size=input_shape, device="cuda" if torch.cuda.is_available() else "cpu")
-        result = SpatialCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        results = []
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS + (None,):
+                input_param_mod = {
+                    k: q(v) if k != "roi_slices" and q is not None else v for k, v in input_param.items()
+                }
+                im = p(input_data)
+                result = SpatialCrop(**input_param_mod)(im)
+                self.assertEqual(type(im), type(result))
+                if isinstance(result, torch.Tensor):
+                    self.assertEqual(result.device, im.device)
+                self.assertTupleEqual(result.shape, expected_shape)
+                results.append(result)
+                if len(results) > 1:
+                    assert_allclose(results[0], results[-1], type_test=False)
 
     @parameterized.expand(TEST_ERRORS)
     def test_error(self, input_param):
diff --git a/tests/test_spatial_cropd.py b/tests/test_spatial_cropd.py
index 797c25d34b..17743124e0 100644
--- a/tests/test_spatial_cropd.py
+++ b/tests/test_spatial_cropd.py
@@ -15,38 +15,49 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialCropd
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = [
-    [
-        {"keys": ["img"], "roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0, 0], "roi_end": [2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0], "roi_end": [2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 3),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 1, 2, 2),
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0, 0], "roi_end": [2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0], "roi_end": [2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 3),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 1, 2, 2),
+        ]
+    )
 
 
 class TestSpatialCropd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input_data, expected_shape):
         result = SpatialCropd(**input_param)(input_data)
         self.assertTupleEqual(result["img"].shape, expected_shape)
diff --git a/tests/test_spatial_pad.py b/tests/test_spatial_pad.py
index 86d010bbad..83a261138e 100644
--- a/tests/test_spatial_pad.py
+++ b/tests/test_spatial_pad.py
@@ -17,44 +17,39 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialPad
-from monai.utils.enums import NumpyPadMode
+from monai.utils.enums import NumpyPadMode, PytorchPadMode
 from monai.utils.misc import set_determinism
 from tests.utils import TEST_NDARRAYS
 
 TESTS = []
 
-# Numpy modes
-MODES: List = [
+MODES = []
+
+# Test modes
+NP_MODES: List = [
     "constant",
     "edge",
-    "linear_ramp",
-    "maximum",
-    "mean",
-    "median",
-    "minimum",
-    "reflect",
-    "symmetric",
+    # `reflect` mode is not supported in some PyTorch versions, skip the test
+    # "reflect",
     "wrap",
-    "empty",
 ]
-MODES += [NumpyPadMode(i) for i in MODES]
+MODES += NP_MODES
+MODES += [NumpyPadMode(i) for i in NP_MODES]
+
+PT_MODES: list = [
+    "constant",
+    "replicate",
+    "circular",
+    # `reflect` mode is not supported in some PyTorch versions, skip the test
+    # "reflect",
+]
+MODES += PT_MODES
+MODES += [PytorchPadMode(i) for i in PT_MODES]
 
 for mode in MODES:
-    TESTS.append(
-        [
-            {"spatial_size": [50, 50], "method": "end", "mode": mode},
-            (1, 2, 2),
-            (1, 50, 50),
-        ]
-    )
-
-    TESTS.append(
-        [
-            {"spatial_size": [15, 4, -1], "method": "symmetric", "mode": mode},
-            (3, 8, 8, 4),
-            (3, 15, 8, 4),
-        ]
-    )
+    TESTS.append([{"spatial_size": [3, 4], "method": "end", "mode": mode}, (1, 2, 3), (1, 3, 4)])
+
+    TESTS.append([{"spatial_size": [15, 4, -1], "method": "symmetric", "mode": mode}, (3, 8, 8, 4), (3, 15, 8, 4)])
 
 
 class TestSpatialPad(unittest.TestCase):
@@ -86,14 +81,19 @@ def test_pad_shape(self, input_param, input_shape, expected_shape):
                     torch.testing.assert_allclose(results[0], results[-1], atol=0, rtol=1e-5)
 
     def test_pad_kwargs(self):
-        padder = SpatialPad(
-            spatial_size=[15, 8], method="end", mode="constant", constant_values=((0, 0), (1, 1), (2, 2))
-        )
         for p in TEST_NDARRAYS:
-            result = padder(p(np.zeros((3, 8, 4))))
-            if isinstance(result, torch.Tensor):
-                result = result.cpu().numpy()
-            torch.testing.assert_allclose(result[:, 8:, :4], np.ones((3, 7, 4)), rtol=1e-7, atol=0)
+            input_data = p(np.zeros((3, 8, 4)))
+            if isinstance(input_data, torch.Tensor):
+                result = (
+                    SpatialPad(spatial_size=[15, 8], method="end", mode="constant", value=2)(img=input_data)
+                    .cpu()
+                    .numpy()
+                )
+            else:
+                result = SpatialPad(
+                    spatial_size=[15, 8], method="end", mode="constant", constant_values=((0, 0), (1, 1), (2, 2))
+                )(img=input_data)
+                torch.testing.assert_allclose(result[:, 8:, :4], np.ones((3, 7, 4)), rtol=1e-7, atol=0)
             torch.testing.assert_allclose(result[:, :, 4:], np.ones((3, 15, 4)) + 1, rtol=1e-7, atol=0)
 
 
diff --git a/tests/test_split_channeld.py b/tests/test_split_channeld.py
index f1df24364d..344f206c86 100644
--- a/tests/test_split_channeld.py
+++ b/tests/test_split_channeld.py
@@ -51,13 +51,7 @@
         ]
     )
 
-    TESTS.append(
-        [
-            {"keys": "pred", "channel_dim": 1},
-            {"pred": p(np.random.randint(2, size=(3, 2, 4)))},
-            (3, 1, 4),
-        ]
-    )
+    TESTS.append([{"keys": "pred", "channel_dim": 1}, {"pred": p(np.random.randint(2, size=(3, 2, 4)))}, (3, 1, 4)])
 
 
 class TestSplitChanneld(unittest.TestCase):
diff --git a/tests/test_split_on_grid.py b/tests/test_split_on_grid.py
index a187835e7b..4893c4c78a 100644
--- a/tests/test_split_on_grid.py
+++ b/tests/test_split_on_grid.py
@@ -26,100 +26,41 @@
 A2 = torch.cat([A21, A22], 2)
 A = torch.cat([A1, A2], 1)
 
-TEST_CASE_0 = [
-    {"grid_size": (2, 2)},
-    A,
-    torch.stack([A11, A12, A21, A22]),
-]
-
-TEST_CASE_1 = [
-    {"grid_size": (2, 1)},
-    A,
-    torch.stack([A1, A2]),
-]
-
-TEST_CASE_2 = [
-    {"grid_size": (1, 2)},
-    A1,
-    torch.stack([A11, A12]),
-]
-
-TEST_CASE_3 = [
-    {"grid_size": (1, 2)},
-    A2,
-    torch.stack([A21, A22]),
-]
-
-TEST_CASE_4 = [
-    {"grid_size": (1, 1), "patch_size": (2, 2)},
-    A,
-    torch.stack([A11]),
-]
-
-TEST_CASE_5 = [
-    {"grid_size": 1, "patch_size": 4},
-    A,
-    torch.stack([A]),
-]
-
-TEST_CASE_6 = [
-    {"grid_size": 2, "patch_size": 2},
-    A,
-    torch.stack([A11, A12, A21, A22]),
-]
-
-TEST_CASE_7 = [
-    {"grid_size": 1},
-    A,
-    torch.stack([A]),
-]
-
-TEST_CASE_MC_0 = [
-    {"grid_size": (2, 2)},
-    [A, A],
-    [torch.stack([A11, A12, A21, A22]), torch.stack([A11, A12, A21, A22])],
-]
-
-
-TEST_CASE_MC_1 = [
-    {"grid_size": (2, 1)},
-    [A] * 5,
-    [torch.stack([A1, A2])] * 5,
-]
-
-
-TEST_CASE_MC_2 = [
-    {"grid_size": (1, 2)},
-    [A1, A2],
-    [torch.stack([A11, A12]), torch.stack([A21, A22])],
-]
+TEST_CASE_0 = [{"grid_size": (2, 2)}, A, torch.stack([A11, A12, A21, A22])]
+
+TEST_CASE_1 = [{"grid_size": (2, 1)}, A, torch.stack([A1, A2])]
+
+TEST_CASE_2 = [{"grid_size": (1, 2)}, A1, torch.stack([A11, A12])]
+
+TEST_CASE_3 = [{"grid_size": (1, 2)}, A2, torch.stack([A21, A22])]
+
+TEST_CASE_4 = [{"grid_size": (1, 1), "patch_size": (2, 2)}, A, torch.stack([A11])]
+
+TEST_CASE_5 = [{"grid_size": 1, "patch_size": 4}, A, torch.stack([A])]
+
+TEST_CASE_6 = [{"grid_size": 2, "patch_size": 2}, A, torch.stack([A11, A12, A21, A22])]
+
+TEST_CASE_7 = [{"grid_size": 1}, A, torch.stack([A])]
+
+TEST_CASE_MC_0 = [{"grid_size": (2, 2)}, [A, A], [torch.stack([A11, A12, A21, A22]), torch.stack([A11, A12, A21, A22])]]
+
+
+TEST_CASE_MC_1 = [{"grid_size": (2, 1)}, [A] * 5, [torch.stack([A1, A2])] * 5]
+
+
+TEST_CASE_MC_2 = [{"grid_size": (1, 2)}, [A1, A2], [torch.stack([A11, A12]), torch.stack([A21, A22])]]
 
 
 class TestSplitOnGrid(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-        ]
+        [TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7]
     )
     def test_split_pathce_single_call(self, input_parameters, img, expected):
         splitter = SplitOnGrid(**input_parameters)
         output = splitter(img)
         np.testing.assert_equal(output.numpy(), expected.numpy())
 
-    @parameterized.expand(
-        [
-            TEST_CASE_MC_0,
-            TEST_CASE_MC_1,
-            TEST_CASE_MC_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_MC_0, TEST_CASE_MC_1, TEST_CASE_MC_2])
     def test_split_pathce_multiple_call(self, input_parameters, img_list, expected_list):
         splitter = SplitOnGrid(**input_parameters)
         for img, expected in zip(img_list, expected_list):
diff --git a/tests/test_split_on_grid_dict.py b/tests/test_split_on_grid_dict.py
index 96ec095423..f22e58515f 100644
--- a/tests/test_split_on_grid_dict.py
+++ b/tests/test_split_on_grid_dict.py
@@ -26,53 +26,21 @@
 A2 = torch.cat([A21, A22], 2)
 A = torch.cat([A1, A2], 1)
 
-TEST_CASE_0 = [
-    {"keys": "image", "grid_size": (2, 2)},
-    {"image": A},
-    torch.stack([A11, A12, A21, A22]),
-]
+TEST_CASE_0 = [{"keys": "image", "grid_size": (2, 2)}, {"image": A}, torch.stack([A11, A12, A21, A22])]
 
-TEST_CASE_1 = [
-    {"keys": "image", "grid_size": (2, 1)},
-    {"image": A},
-    torch.stack([A1, A2]),
-]
+TEST_CASE_1 = [{"keys": "image", "grid_size": (2, 1)}, {"image": A}, torch.stack([A1, A2])]
 
-TEST_CASE_2 = [
-    {"keys": "image", "grid_size": (1, 2)},
-    {"image": A1},
-    torch.stack([A11, A12]),
-]
+TEST_CASE_2 = [{"keys": "image", "grid_size": (1, 2)}, {"image": A1}, torch.stack([A11, A12])]
 
-TEST_CASE_3 = [
-    {"keys": "image", "grid_size": (1, 2)},
-    {"image": A2},
-    torch.stack([A21, A22]),
-]
+TEST_CASE_3 = [{"keys": "image", "grid_size": (1, 2)}, {"image": A2}, torch.stack([A21, A22])]
 
-TEST_CASE_4 = [
-    {"keys": "image", "grid_size": (1, 1), "patch_size": (2, 2)},
-    {"image": A},
-    torch.stack([A11]),
-]
+TEST_CASE_4 = [{"keys": "image", "grid_size": (1, 1), "patch_size": (2, 2)}, {"image": A}, torch.stack([A11])]
 
-TEST_CASE_5 = [
-    {"keys": "image", "grid_size": 1, "patch_size": 4},
-    {"image": A},
-    torch.stack([A]),
-]
+TEST_CASE_5 = [{"keys": "image", "grid_size": 1, "patch_size": 4}, {"image": A}, torch.stack([A])]
 
-TEST_CASE_6 = [
-    {"keys": "image", "grid_size": 2, "patch_size": 2},
-    {"image": A},
-    torch.stack([A11, A12, A21, A22]),
-]
+TEST_CASE_6 = [{"keys": "image", "grid_size": 2, "patch_size": 2}, {"image": A}, torch.stack([A11, A12, A21, A22])]
 
-TEST_CASE_7 = [
-    {"keys": "image", "grid_size": 1},
-    {"image": A},
-    torch.stack([A]),
-]
+TEST_CASE_7 = [{"keys": "image", "grid_size": 1}, {"image": A}, torch.stack([A])]
 
 TEST_CASE_MC_0 = [
     {"keys": "image", "grid_size": (2, 2)},
@@ -81,11 +49,7 @@
 ]
 
 
-TEST_CASE_MC_1 = [
-    {"keys": "image", "grid_size": (2, 1)},
-    [{"image": A}] * 5,
-    [torch.stack([A1, A2])] * 5,
-]
+TEST_CASE_MC_1 = [{"keys": "image", "grid_size": (2, 1)}, [{"image": A}] * 5, [torch.stack([A1, A2])] * 5]
 
 
 TEST_CASE_MC_2 = [
@@ -97,29 +61,14 @@
 
 class TestSplitOnGridDict(unittest.TestCase):
     @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-            TEST_CASE_7,
-        ]
+        [TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7]
     )
     def test_split_pathce_single_call(self, input_parameters, img_dict, expected):
         splitter = SplitOnGridDict(**input_parameters)
         output = splitter(img_dict)[input_parameters["keys"]]
         np.testing.assert_equal(output.numpy(), expected.numpy())
 
-    @parameterized.expand(
-        [
-            TEST_CASE_MC_0,
-            TEST_CASE_MC_1,
-            TEST_CASE_MC_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_MC_0, TEST_CASE_MC_1, TEST_CASE_MC_2])
     def test_split_pathce_multiple_call(self, input_parameters, img_list, expected_list):
         splitter = SplitOnGridDict(**input_parameters)
         for img_dict, expected in zip(img_list, expected_list):
diff --git a/tests/test_state_cacher.py b/tests/test_state_cacher.py
index 139e7b8374..5835bfdb5c 100644
--- a/tests/test_state_cacher.py
+++ b/tests/test_state_cacher.py
@@ -20,18 +20,9 @@
 
 DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
 
-TEST_CASE_0 = [
-    torch.Tensor([1]).to(DEVICE),
-    {"in_memory": True},
-]
-TEST_CASE_1 = [
-    torch.Tensor([1]).to(DEVICE),
-    {"in_memory": False, "cache_dir": gettempdir()},
-]
-TEST_CASE_2 = [
-    torch.Tensor([1]).to(DEVICE),
-    {"in_memory": False, "allow_overwrite": False},
-]
+TEST_CASE_0 = [torch.Tensor([1]).to(DEVICE), {"in_memory": True}]
+TEST_CASE_1 = [torch.Tensor([1]).to(DEVICE), {"in_memory": False, "cache_dir": gettempdir()}]
+TEST_CASE_2 = [torch.Tensor([1]).to(DEVICE), {"in_memory": False, "allow_overwrite": False}]
 
 TEST_CASES = [TEST_CASE_0, TEST_CASE_1, TEST_CASE_2]
 
diff --git a/tests/test_subpixel_upsample.py b/tests/test_subpixel_upsample.py
index 07e110d7a7..05390d231a 100644
--- a/tests/test_subpixel_upsample.py
+++ b/tests/test_subpixel_upsample.py
@@ -24,37 +24,30 @@
     for dim in range(1, 4):
         for factor in range(1, 3):
             test_case = [
-                {"dimensions": dim, "in_channels": inch, "scale_factor": factor},
+                {"spatial_dims": dim, "in_channels": inch, "scale_factor": factor},
                 (2, inch, *([8] * dim)),
                 (2, inch, *([8 * factor] * dim)),
             ]
             TEST_CASE_SUBPIXEL.append(test_case)
 
 TEST_CASE_SUBPIXEL_2D_EXTRA = [
-    {"dimensions": 2, "in_channels": 2, "scale_factor": 3},
+    {"spatial_dims": 2, "in_channels": 2, "scale_factor": 3},
     (2, 2, 8, 4),  # different size for H and W
     (2, 2, 24, 12),
 ]
 
 TEST_CASE_SUBPIXEL_3D_EXTRA = [
-    {"dimensions": 3, "in_channels": 1, "scale_factor": 2},
+    {"spatial_dims": 3, "in_channels": 1, "scale_factor": 2},
     (2, 1, 16, 8, 4),  # different size for H, W and D
     (2, 1, 32, 16, 8),
 ]
 
 conv_block = nn.Sequential(
-    Conv[Conv.CONV, 3](1, 4, kernel_size=1),
-    Conv[Conv.CONV, 3](
-        4,
-        8,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-    ),
+    Conv[Conv.CONV, 3](1, 4, kernel_size=1), Conv[Conv.CONV, 3](4, 8, kernel_size=3, stride=1, padding=1)
 )
 
 TEST_CASE_SUBPIXEL_CONV_BLOCK_EXTRA = [
-    {"dimensions": 3, "in_channels": 1, "scale_factor": 2, "conv_block": conv_block},
+    {"spatial_dims": 3, "in_channels": 1, "scale_factor": 2, "conv_block": conv_block},
     (2, 1, 16, 8, 4),  # different size for H, W and D
     (2, 1, 32, 16, 8),
 ]
diff --git a/tests/test_surface_distance.py b/tests/test_surface_distance.py
index e5d2145a1f..8f09218f57 100644
--- a/tests/test_surface_distance.py
+++ b/tests/test_surface_distance.py
@@ -20,9 +20,7 @@
 
 
 def create_spherical_seg_3d(
-    radius: float = 20.0,
-    centre: Tuple[int, int, int] = (49, 49, 49),
-    im_shape: Tuple[int, int, int] = (99, 99, 99),
+    radius: float = 20.0, centre: Tuple[int, int, int] = (49, 49, 49), im_shape: Tuple[int, int, int] = (99, 99, 99)
 ) -> np.ndarray:
     """
     Return a 3D image with a sphere inside. Voxel values will be
@@ -49,10 +47,7 @@ def create_spherical_seg_3d(
 
 
 TEST_CASES = [
-    [
-        [create_spherical_seg_3d(), create_spherical_seg_3d()],
-        [0, 0],
-    ],
+    [[create_spherical_seg_3d(), create_spherical_seg_3d()], [0, 0]],
     [
         [
             create_spherical_seg_3d(radius=20, centre=(20, 20, 20)),
@@ -91,21 +86,8 @@ def create_spherical_seg_3d(
         ],
         [17.32691760951026, 12.432687531048186],
     ],
-    [
-        [
-            np.zeros([99, 99, 99]),
-            create_spherical_seg_3d(radius=40, centre=(20, 33, 22)),
-        ],
-        [np.inf, np.inf],
-    ],
-    [
-        [
-            create_spherical_seg_3d(),
-            np.zeros([99, 99, 99]),
-            "taxicab",
-        ],
-        [np.inf, np.inf],
-    ],
+    [[np.zeros([99, 99, 99]), create_spherical_seg_3d(radius=40, centre=(20, 33, 22))], [np.inf, np.inf]],
+    [[create_spherical_seg_3d(), np.zeros([99, 99, 99]), "taxicab"], [np.inf, np.inf]],
 ]
 
 TEST_CASES_NANS = [
@@ -114,8 +96,8 @@ def create_spherical_seg_3d(
             # both pred and gt do not have foreground, metric and not_nans should be 0
             np.zeros([99, 99, 99]),
             np.zeros([99, 99, 99]),
-        ],
-    ],
+        ]
+    ]
 ]
 
 
diff --git a/tests/test_synthetic.py b/tests/test_synthetic.py
index 97ab12a588..6b08df8b00 100644
--- a/tests/test_synthetic.py
+++ b/tests/test_synthetic.py
@@ -18,29 +18,10 @@
 from monai.utils import set_determinism
 
 TEST_CASES = [
+    [2, {"width": 64, "height": 64, "rad_max": 10, "rad_min": 4}, 0.1479004, 0.739502, (64, 64), 5],
     [
         2,
-        {
-            "width": 64,
-            "height": 64,
-            "rad_max": 10,
-            "rad_min": 4,
-        },
-        0.1479004,
-        0.739502,
-        (64, 64),
-        5,
-    ],
-    [
-        2,
-        {
-            "width": 32,
-            "height": 28,
-            "num_objs": 3,
-            "rad_max": 5,
-            "rad_min": 1,
-            "noise_max": 0.2,
-        },
+        {"width": 32, "height": 28, "num_objs": 3, "rad_max": 5, "rad_min": 1, "noise_max": 0.2},
         0.1709315,
         0.4040179,
         (32, 28),
@@ -48,15 +29,7 @@
     ],
     [
         3,
-        {
-            "width": 64,
-            "height": 64,
-            "depth": 45,
-            "num_seg_classes": 3,
-            "channel_dim": -1,
-            "rad_max": 10,
-            "rad_min": 4,
-        },
+        {"width": 64, "height": 64, "depth": 45, "num_seg_classes": 3, "channel_dim": -1, "rad_max": 10, "rad_min": 4},
         0.025132,
         0.0753961,
         (64, 64, 45, 1),
diff --git a/tests/test_testtimeaugmentation.py b/tests/test_testtimeaugmentation.py
index a07d59703d..a64e4258ba 100644
--- a/tests/test_testtimeaugmentation.py
+++ b/tests/test_testtimeaugmentation.py
@@ -113,12 +113,7 @@ def test_test_time_augmentation(self):
 
             epoch_loss /= len(train_loader)
 
-        post_trans = Compose(
-            [
-                Activations(sigmoid=True),
-                AsDiscrete(threshold_values=True),
-            ]
-        )
+        post_trans = Compose([Activations(sigmoid=True), AsDiscrete(threshold_values=True)])
 
         def inferrer_fn(x):
             return post_trans(model(x))
@@ -155,7 +150,7 @@ def test_image_no_label(self):
 
     @unittest.skipUnless(has_nib, "Requires nibabel")
     def test_requires_meta_dict(self):
-        transforms = Compose([RandFlipd("image"), Spacingd("image", pixdim=1.0)])
+        transforms = Compose([AddChanneld("image"), RandFlipd("image"), Spacingd("image", pixdim=1.1)])
         tta = TestTimeAugmentation(transforms, batch_size=5, num_workers=0, inferrer_fn=lambda x: x, orig_key="image")
         tta(self.get_data(1, (20, 20), include_label=False))
 
diff --git a/tests/test_threadcontainer.py b/tests/test_threadcontainer.py
index 543dab4d0c..c33261710a 100644
--- a/tests/test_threadcontainer.py
+++ b/tests/test_threadcontainer.py
@@ -79,7 +79,7 @@ def test_plot(self):
         # a third non-image key is added to test that this is correctly ignored when plotting
         data = {CommonKeys.IMAGE: img, CommonKeys.LABEL: img, "Not Image Data": ["This isn't an image"]}
 
-        loader = DataLoader([data] * 10)
+        loader = DataLoader([data] * 20, batch_size=2)
 
         trainer = SupervisedTrainer(
             device=torch.device("cpu"),
@@ -102,7 +102,7 @@ def test_plot(self):
         with tempfile.TemporaryDirectory() as tempdir:
             tempimg = f"{tempdir}/threadcontainer_plot_test.png"
             fig.savefig(tempimg)
-            comp = compare_images(f"{testing_dir}/threadcontainer_plot_test.png", tempimg, 1e-2)
+            comp = compare_images(f"{testing_dir}/threadcontainer_plot_test.png", tempimg, 5e-2)
 
             self.assertIsNone(comp, comp)  # None indicates test passed
 
diff --git a/tests/test_threshold_intensity.py b/tests/test_threshold_intensity.py
index a6d3895709..075a650ec0 100644
--- a/tests/test_threshold_intensity.py
+++ b/tests/test_threshold_intensity.py
@@ -15,20 +15,21 @@
 from parameterized import parameterized
 
 from monai.transforms import ThresholdIntensity
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [{"threshold": 5, "above": True, "cval": 0}, (0, 0, 0, 0, 0, 0, 6, 7, 8, 9)]
-
-TEST_CASE_2 = [{"threshold": 5, "above": False, "cval": 0}, (0, 1, 2, 3, 4, 0, 0, 0, 0, 0)]
-
-TEST_CASE_3 = [{"threshold": 5, "above": True, "cval": 5}, (5, 5, 5, 5, 5, 5, 6, 7, 8, 9)]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append([p, {"threshold": 5, "above": True, "cval": 0}, (0, 0, 0, 0, 0, 0, 6, 7, 8, 9)])
+    TESTS.append([p, {"threshold": 5, "above": False, "cval": 0}, (0, 1, 2, 3, 4, 0, 0, 0, 0, 0)])
+    TESTS.append([p, {"threshold": 5, "above": True, "cval": 5}, (5, 5, 5, 5, 5, 5, 6, 7, 8, 9)])
 
 
 class TestThresholdIntensity(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
-    def test_value(self, input_param, expected_value):
-        test_data = np.arange(10)
+    @parameterized.expand(TESTS)
+    def test_value(self, in_type, input_param, expected_value):
+        test_data = in_type(np.arange(10))
         result = ThresholdIntensity(**input_param)(test_data)
-        np.testing.assert_allclose(result, expected_value)
+        assert_allclose(result, in_type(expected_value))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_threshold_intensityd.py b/tests/test_threshold_intensityd.py
index efcfcfe604..a2a9fdcf2b 100644
--- a/tests/test_threshold_intensityd.py
+++ b/tests/test_threshold_intensityd.py
@@ -15,31 +15,41 @@
 from parameterized import parameterized
 
 from monai.transforms import ThresholdIntensityd
-
-TEST_CASE_1 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 0},
-    (0, 0, 0, 0, 0, 0, 6, 7, 8, 9),
-]
-
-TEST_CASE_2 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": False, "cval": 0},
-    (0, 1, 2, 3, 4, 0, 0, 0, 0, 0),
-]
-
-TEST_CASE_3 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 5},
-    (5, 5, 5, 5, 5, 5, 6, 7, 8, 9),
-]
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 0},
+            (0, 0, 0, 0, 0, 0, 6, 7, 8, 9),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": False, "cval": 0},
+            (0, 1, 2, 3, 4, 0, 0, 0, 0, 0),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 5},
+            (5, 5, 5, 5, 5, 5, 6, 7, 8, 9),
+        ]
+    )
 
 
 class TestThresholdIntensityd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
-    def test_value(self, input_param, expected_value):
-        test_data = {"image": np.arange(10), "label": np.arange(10), "extra": np.arange(10)}
+    @parameterized.expand(TESTS)
+    def test_value(self, in_type, input_param, expected_value):
+        test_data = {"image": in_type(np.arange(10)), "label": in_type(np.arange(10)), "extra": in_type(np.arange(10))}
         result = ThresholdIntensityd(**input_param)(test_data)
-        np.testing.assert_allclose(result["image"], expected_value)
-        np.testing.assert_allclose(result["label"], expected_value)
-        np.testing.assert_allclose(result["extra"], expected_value)
+        assert_allclose(result["image"], in_type(expected_value))
+        assert_allclose(result["label"], in_type(expected_value))
+        assert_allclose(result["extra"], in_type(expected_value))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_cupy.py b/tests/test_to_cupy.py
index 8b00e12539..0fd9607339 100644
--- a/tests/test_to_cupy.py
+++ b/tests/test_to_cupy.py
@@ -22,49 +22,81 @@
 cp, has_cp = optional_import("cupy")
 
 
+@skipUnless(has_cp, "CuPy is required.")
 class TestToCupy(unittest.TestCase):
-    @skipUnless(has_cp, "CuPy is required.")
     def test_cupy_input(self):
-        test_data = cp.array([[1, 2], [3, 4]])
+        test_data = cp.array([[1, 2], [3, 4]], dtype=cp.float32)
         test_data = cp.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
+
+    def test_cupy_input_dtype(self):
+        test_data = cp.array([[1, 2], [3, 4]], dtype=cp.float32)
+        test_data = cp.rot90(test_data)
+        self.assertFalse(test_data.flags["C_CONTIGUOUS"])
+        result = ToCupy(cp.uint8)(test_data)
+        self.assertTrue(result.dtype == cp.uint8)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
         cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_numpy_input(self):
-        test_data = np.array([[1, 2], [3, 4]])
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
         test_data = np.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
+
+    def test_numpy_input_dtype(self):
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
+        test_data = np.rot90(test_data)
+        self.assertFalse(test_data.flags["C_CONTIGUOUS"])
+        result = ToCupy(np.uint8)(test_data)
+        self.assertTrue(result.dtype == cp.uint8)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
         cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_tensor_input(self):
-        test_data = torch.tensor([[1, 2], [3, 4]])
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
         test_data = test_data.rot90()
         self.assertFalse(test_data.is_contiguous())
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        cp.testing.assert_allclose(result, test_data.numpy())
+        cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
-        test_data = torch.tensor([[1, 2], [3, 4]]).cuda()
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32).cuda()
         test_data = test_data.rot90()
         self.assertFalse(test_data.is_contiguous())
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        cp.testing.assert_allclose(result, test_data.cpu().numpy())
+        cp.testing.assert_allclose(result, test_data)
+
+    @skip_if_no_cuda
+    def test_tensor_cuda_input_dtype(self):
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8).cuda()
+        test_data = test_data.rot90()
+        self.assertFalse(test_data.is_contiguous())
+
+        result = ToCupy(dtype="float32")(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_list_tuple(self):
         test_data = [[1, 2], [3, 4]]
         result = ToCupy()(test_data)
diff --git a/tests/test_to_deviced.py b/tests/test_to_deviced.py
index 0d5d1d1cdc..3b3a7a2e8f 100644
--- a/tests/test_to_deviced.py
+++ b/tests/test_to_deviced.py
@@ -24,9 +24,7 @@ def test_value(self):
         device = "cuda:0"
         data = [{"img": torch.tensor(i)} for i in range(4)]
         dataset = CacheDataset(
-            data=data,
-            transform=ToDeviced(keys="img", device=device, non_blocking=True),
-            cache_rate=1.0,
+            data=data, transform=ToDeviced(keys="img", device=device, non_blocking=True), cache_rate=1.0
         )
         dataloader = ThreadDataLoader(dataset=dataset, num_workers=0, batch_size=1)
         for i, d in enumerate(dataloader):
diff --git a/tests/test_to_numpy.py b/tests/test_to_numpy.py
index b48727c01d..c7631540b8 100644
--- a/tests/test_to_numpy.py
+++ b/tests/test_to_numpy.py
@@ -31,16 +31,17 @@ def test_cupy_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data.get())
+        assert_allclose(result, test_data.get(), type_test=False)
 
     def test_numpy_input(self):
         test_data = np.array([[1, 2], [3, 4]])
         test_data = np.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
-        result = ToNumpy()(test_data)
+        result = ToNumpy(dtype="float32")(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
+        self.assertTrue(result.dtype == np.float32)
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_tensor_input(self):
         test_data = torch.tensor([[1, 2], [3, 4]])
@@ -49,7 +50,7 @@ def test_tensor_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
@@ -59,21 +60,21 @@ def test_tensor_cuda_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_list_tuple(self):
         test_data = [[1, 2], [3, 4]]
         result = ToNumpy()(test_data)
-        assert_allclose(result, np.asarray(test_data))
+        assert_allclose(result, np.asarray(test_data), type_test=False)
         test_data = ((1, 2), (3, 4))
         result = ToNumpy()(test_data)
-        assert_allclose(result, np.asarray(test_data))
+        assert_allclose(result, np.asarray(test_data), type_test=False)
 
     def test_single_value(self):
         for test_data in [5, np.array(5), torch.tensor(5)]:
             result = ToNumpy()(test_data)
             self.assertTrue(isinstance(result, np.ndarray))
-            assert_allclose(result, np.asarray(test_data))
+            assert_allclose(result, np.asarray(test_data), type_test=False)
             self.assertEqual(result.ndim, 0)
 
 
diff --git a/tests/test_to_numpyd.py b/tests/test_to_numpyd.py
index 5acaef39c7..0b0b032ef2 100644
--- a/tests/test_to_numpyd.py
+++ b/tests/test_to_numpyd.py
@@ -31,7 +31,7 @@ def test_cupy_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data.get())
+        assert_allclose(result, test_data.get(), type_test=False)
 
     def test_numpy_input(self):
         test_data = np.array([[1, 2], [3, 4]])
@@ -40,7 +40,7 @@ def test_numpy_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_tensor_input(self):
         test_data = torch.tensor([[1, 2], [3, 4]])
@@ -49,7 +49,7 @@ def test_tensor_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
@@ -59,7 +59,7 @@ def test_tensor_cuda_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_pil.py b/tests/test_to_pil.py
index 5690645dd8..b4581053c0 100644
--- a/tests/test_to_pil.py
+++ b/tests/test_to_pil.py
@@ -43,7 +43,7 @@ class TestToPIL(unittest.TestCase):
     def test_value(self, test_data):
         result = ToPIL()(test_data)
         self.assertTrue(isinstance(result, PILImageImage))
-        assert_allclose(np.array(result), test_data)
+        assert_allclose(np.array(result), test_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_pild.py b/tests/test_to_pild.py
index 3a15b1e507..3b83fa5258 100644
--- a/tests/test_to_pild.py
+++ b/tests/test_to_pild.py
@@ -30,9 +30,7 @@
     PILImageImage, _ = optional_import("PIL.Image", name="Image")
 
 im = [[1.0, 2.0], [3.0, 4.0]]
-TESTS = []
-for p in TEST_NDARRAYS:
-    TESTS.append([{"keys": "image"}, {"image": p(im)}])
+TESTS = [[{"keys": "image"}, {"image": p(im)}] for p in TEST_NDARRAYS]
 if has_pil:
     TESTS.append([{"keys": "image"}, {"image": pil_image_fromarray(np.array(im))}])
 
@@ -43,7 +41,7 @@ class TestToPIL(unittest.TestCase):
     def test_values(self, input_param, test_data):
         result = ToPILd(**input_param)(test_data)[input_param["keys"]]
         self.assertTrue(isinstance(result, PILImageImage))
-        assert_allclose(np.array(result), test_data[input_param["keys"]])
+        assert_allclose(np.array(result), test_data[input_param["keys"]], type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_tensor.py b/tests/test_to_tensor.py
index 6ac06983f6..b065595e89 100644
--- a/tests/test_to_tensor.py
+++ b/tests/test_to_tensor.py
@@ -11,10 +11,13 @@
 
 import unittest
 
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ToTensor
-from tests.utils import TEST_NDARRAYS, assert_allclose
+from tests.utils import TEST_NDARRAYS, assert_allclose, optional_import
+
+cp, has_cp = optional_import("cupy")
 
 im = [[1, 2], [3, 4]]
 
@@ -32,16 +35,26 @@
 class TestToTensor(unittest.TestCase):
     @parameterized.expand(TESTS)
     def test_array_input(self, test_data, expected_shape):
-        result = ToTensor()(test_data)
-        assert_allclose(result, test_data)
+        result = ToTensor(dtype=torch.float32, device="cpu")(test_data)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
         self.assertTupleEqual(result.shape, expected_shape)
 
     @parameterized.expand(TESTS_SINGLE)
     def test_single_input(self, test_data):
         result = ToTensor()(test_data)
-        assert_allclose(result, test_data)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
         self.assertEqual(result.ndim, 0)
 
+    @unittest.skipUnless(has_cp, "CuPy is required.")
+    def test_cupy(self):
+        test_data = [[1, 2], [3, 4]]
+        cupy_array = cp.ascontiguousarray(cp.asarray(test_data))
+        result = ToTensor()(cupy_array)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_torchvision.py b/tests/test_torchvision.py
index 0846b7f6b6..58e7d9295f 100644
--- a/tests/test_torchvision.py
+++ b/tests/test_torchvision.py
@@ -29,19 +29,10 @@
     torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]]),
     torch.tensor(
         [
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-        ],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+        ]
     ),
 ]
 
@@ -50,24 +41,9 @@
     torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]]),
     torch.tensor(
         [
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
         ]
     ),
 ]
diff --git a/tests/test_torchvision_fc_model.py b/tests/test_torchvision_fc_model.py
index d6d3ea69c9..cc603e2585 100644
--- a/tests/test_torchvision_fc_model.py
+++ b/tests/test_torchvision_fc_model.py
@@ -115,17 +115,7 @@
 
 
 class TestTorchVisionFCModel(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-            TEST_CASE_4,
-            TEST_CASE_5,
-            TEST_CASE_6,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     @skipUnless(has_tv, "Requires TorchVision.")
     def test_without_pretrained(self, input_param, input_shape, expected_shape):
         net = TorchVisionFCModel(**input_param).to(device)
diff --git a/tests/test_torchvision_fully_conv_model.py b/tests/test_torchvision_fully_conv_model.py
index af2c1458d3..444e871c45 100644
--- a/tests/test_torchvision_fully_conv_model.py
+++ b/tests/test_torchvision_fully_conv_model.py
@@ -23,23 +23,11 @@
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
-TEST_CASE_0 = [
-    {"model_name": "resnet18", "num_classes": 1, "pretrained": False},
-    (2, 3, 224, 224),
-    (2, 1, 1, 1),
-]
+TEST_CASE_0 = [{"model_name": "resnet18", "num_classes": 1, "pretrained": False}, (2, 3, 224, 224), (2, 1, 1, 1)]
 
-TEST_CASE_1 = [
-    {"model_name": "resnet18", "num_classes": 1, "pretrained": False},
-    (2, 3, 256, 256),
-    (2, 1, 2, 2),
-]
+TEST_CASE_1 = [{"model_name": "resnet18", "num_classes": 1, "pretrained": False}, (2, 3, 256, 256), (2, 1, 2, 2)]
 
-TEST_CASE_2 = [
-    {"model_name": "resnet101", "num_classes": 5, "pretrained": False},
-    (2, 3, 256, 256),
-    (2, 5, 2, 2),
-]
+TEST_CASE_2 = [{"model_name": "resnet101", "num_classes": 5, "pretrained": False}, (2, 3, 256, 256), (2, 5, 2, 2)]
 
 TEST_CASE_3 = [
     {"model_name": "resnet101", "num_classes": 5, "pool_size": 6, "pretrained": False},
@@ -70,14 +58,7 @@
 
 
 class TestTorchVisionFullyConvModel(unittest.TestCase):
-    @parameterized.expand(
-        [
-            TEST_CASE_0,
-            TEST_CASE_1,
-            TEST_CASE_2,
-            TEST_CASE_3,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     @skipUnless(has_tv, "Requires TorchVision.")
     def test_without_pretrained(self, input_param, input_shape, expected_shape):
         net = TorchVisionFullyConvModel(**input_param).to(device)
@@ -85,13 +66,7 @@ def test_without_pretrained(self, input_param, input_shape, expected_shape):
             result = net.forward(torch.randn(input_shape).to(device))
             self.assertEqual(result.shape, expected_shape)
 
-    @parameterized.expand(
-        [
-            TEST_CASE_PRETRAINED_0,
-            TEST_CASE_PRETRAINED_1,
-            TEST_CASE_PRETRAINED_2,
-        ]
-    )
+    @parameterized.expand([TEST_CASE_PRETRAINED_0, TEST_CASE_PRETRAINED_1, TEST_CASE_PRETRAINED_2])
     @skipUnless(has_tv, "Requires TorchVision.")
     def test_with_pretrained(self, input_param, input_shape, expected_shape, expected_value):
         net = TorchVisionFullyConvModel(**input_param).to(device)
diff --git a/tests/test_torchvisiond.py b/tests/test_torchvisiond.py
index 4f42bc95f7..1530691824 100644
--- a/tests/test_torchvisiond.py
+++ b/tests/test_torchvisiond.py
@@ -29,19 +29,10 @@
     {"img": torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]])},
     torch.tensor(
         [
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-            [
-                [0.1090, 0.6193],
-                [0.6193, 0.9164],
-            ],
-        ],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+            [[0.1090, 0.6193], [0.6193, 0.9164]],
+        ]
     ),
 ]
 
@@ -50,24 +41,9 @@
     {"img": torch.tensor([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]])},
     torch.tensor(
         [
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
-            [
-                [0.0, 0.0, 0.0, 0.0],
-                [0.0, 0.0, 1.0, 0.0],
-                [0.0, 1.0, 2.0, 0.0],
-                [0.0, 0.0, 0.0, 0.0],
-            ],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+            [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 1.0, 2.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
         ]
     ),
 ]
diff --git a/tests/test_transchex.py b/tests/test_transchex.py
new file mode 100644
index 0000000000..e178cb5184
--- /dev/null
+++ b/tests/test_transchex.py
@@ -0,0 +1,82 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.networks import eval_mode
+from monai.networks.nets.transchex import Transchex
+from tests.utils import skip_if_quick
+
+TEST_CASE_TRANSCHEX = []
+for drop_out in [0.4]:
+    for in_channels in [3]:
+        for img_size in [224]:
+            for patch_size in [16, 32]:
+                for num_language_layers in [2]:
+                    for num_vision_layers in [4]:
+                        for num_mixed_layers in [3]:
+                            for num_classes in [8]:
+                                test_case = [
+                                    {
+                                        "in_channels": in_channels,
+                                        "img_size": (img_size,) * 2,
+                                        "patch_size": (patch_size,) * 2,
+                                        "num_vision_layers": num_vision_layers,
+                                        "num_mixed_layers": num_mixed_layers,
+                                        "num_language_layers": num_language_layers,
+                                        "num_classes": num_classes,
+                                        "drop_out": drop_out,
+                                    },
+                                    (2, num_classes),  # type: ignore
+                                ]
+                                TEST_CASE_TRANSCHEX.append(test_case)
+
+
+@skip_if_quick
+class TestTranschex(unittest.TestCase):
+    @parameterized.expand(TEST_CASE_TRANSCHEX)
+    def test_shape(self, input_param, expected_shape):
+        net = Transchex(**input_param)
+        with eval_mode(net):
+            result = net(torch.randint(2, (2, 512)), torch.randint(2, (2, 512)), torch.randn((2, 3, 224, 224)))
+            self.assertEqual(result.shape, expected_shape)
+
+    def test_ill_arg(self):
+        with self.assertRaises(ValueError):
+            Transchex(
+                in_channels=3,
+                img_size=(128, 128),
+                patch_size=(16, 16),
+                num_language_layers=2,
+                num_mixed_layers=4,
+                num_vision_layers=2,
+                num_classes=2,
+                drop_out=5.0,
+            )
+
+        with self.assertRaises(ValueError):
+            Transchex(
+                in_channels=1,
+                img_size=(97, 97),
+                patch_size=(16, 16),
+                num_language_layers=6,
+                num_mixed_layers=6,
+                num_vision_layers=8,
+                num_classes=8,
+                drop_out=0.4,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_transpose.py b/tests/test_transpose.py
index 10882c9dd8..176fa6f10e 100644
--- a/tests/test_transpose.py
+++ b/tests/test_transpose.py
@@ -20,18 +20,8 @@
 
 TESTS = []
 for p in TEST_NDARRAYS:
-    TESTS.append(
-        [
-            p(np.arange(5 * 4).reshape(5, 4)),
-            None,
-        ]
-    )
-    TESTS.append(
-        [
-            p(np.arange(5 * 4 * 3).reshape(5, 4, 3)),
-            [2, 0, 1],
-        ]
-    )
+    TESTS.append([p(np.arange(5 * 4).reshape(5, 4)), None])
+    TESTS.append([p(np.arange(5 * 4 * 3).reshape(5, 4, 3)), [2, 0, 1]])
 
 
 class TestTranspose(unittest.TestCase):
@@ -42,7 +32,7 @@ def test_transpose(self, im, indices):
         if isinstance(im, torch.Tensor):
             im = im.cpu().numpy()
         out2 = np.transpose(im, indices)
-        assert_allclose(out1, out2)
+        assert_allclose(out1, out2, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_transposed.py b/tests/test_transposed.py
index 88ecd0c872..719efea4c3 100644
--- a/tests/test_transposed.py
+++ b/tests/test_transposed.py
@@ -21,30 +21,10 @@
 
 TESTS = []
 for p in TEST_NDARRAYS:
-    TESTS.append(
-        [
-            p(np.arange(5 * 4).reshape(5, 4)),
-            [1, 0],
-        ]
-    )
-    TESTS.append(
-        [
-            p(np.arange(5 * 4).reshape(5, 4)),
-            None,
-        ]
-    )
-    TESTS.append(
-        [
-            p(np.arange(5 * 4 * 3).reshape(5, 4, 3)),
-            [2, 0, 1],
-        ]
-    )
-    TESTS.append(
-        [
-            p(np.arange(5 * 4 * 3).reshape(5, 4, 3)),
-            None,
-        ]
-    )
+    TESTS.append([p(np.arange(5 * 4).reshape(5, 4)), [1, 0]])
+    TESTS.append([p(np.arange(5 * 4).reshape(5, 4)), None])
+    TESTS.append([p(np.arange(5 * 4 * 3).reshape(5, 4, 3)), [2, 0, 1]])
+    TESTS.append([p(np.arange(5 * 4 * 3).reshape(5, 4, 3)), None])
 
 
 class TestTranspose(unittest.TestCase):
@@ -57,13 +37,13 @@ def test_transpose(self, im, indices):
         if isinstance(im, torch.Tensor):
             im = im.cpu().numpy()
         out_gt = np.transpose(im, indices)
-        assert_allclose(out_im1, out_gt)
-        assert_allclose(out_im2, out_gt)
+        assert_allclose(out_im1, out_gt, type_test=False)
+        assert_allclose(out_im2, out_gt, type_test=False)
 
         # test inverse
         fwd_inv_data = tr.inverse(out_data)
         for i, j in zip(data.values(), fwd_inv_data.values()):
-            assert_allclose(i, j)
+            assert_allclose(i, j, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_tversky_loss.py b/tests/test_tversky_loss.py
index 0bc2ca2e70..57e887d3f0 100644
--- a/tests/test_tversky_loss.py
+++ b/tests/test_tversky_loss.py
@@ -21,10 +21,7 @@
 TEST_CASES = [
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.307576,
     ],
     [  # shape: (2, 1, 2, 2), (2, 1, 2, 2)
@@ -107,18 +104,12 @@
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "alpha": 0.3, "beta": 0.7, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.3589,
     ],
     [  # shape: (1, 1, 2, 2), (1, 1, 2, 2)
         {"include_background": True, "sigmoid": True, "alpha": 0.7, "beta": 0.3, "smooth_nr": 1e-6, "smooth_dr": 1e-6},
-        {
-            "input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]),
-            "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]]),
-        },
+        {"input": torch.tensor([[[[1.0, -1.0], [-1.0, 1.0]]]]), "target": torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]]])},
         0.247366,
     ],
     [  # shape: (2, 1, 2, 2), (2, 1, 2, 2)
diff --git a/tests/test_unet.py b/tests/test_unet.py
index 4091c4e9d7..e1dabc4ed0 100644
--- a/tests/test_unet.py
+++ b/tests/test_unet.py
@@ -23,7 +23,7 @@
 
 TEST_CASE_0 = [  # single channel 2D, batch 16, no residual
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_channels": 1,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -36,7 +36,7 @@
 
 TEST_CASE_1 = [  # single channel 2D, batch 16
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_channels": 1,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -49,7 +49,7 @@
 
 TEST_CASE_2 = [  # single channel 3D, batch 16
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_channels": 1,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -62,7 +62,7 @@
 
 TEST_CASE_3 = [  # 4-channel 3D, batch 16
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_channels": 4,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -75,7 +75,7 @@
 
 TEST_CASE_4 = [  # 4-channel 3D, batch 16, batch normalization
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_channels": 4,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -89,7 +89,7 @@
 
 TEST_CASE_5 = [  # 4-channel 3D, batch 16, LeakyReLU activation
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_channels": 4,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -103,7 +103,7 @@
 
 TEST_CASE_6 = [  # 4-channel 3D, batch 16, LeakyReLU activation explicit
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_channels": 4,
         "out_channels": 3,
         "channels": (16, 32, 64),
@@ -120,7 +120,7 @@
 ILL_CASES = [
     [
         {  # len(channels) < 2
-            "dimensions": 2,
+            "spatial_dims": 2,
             "in_channels": 1,
             "out_channels": 3,
             "channels": (16,),
@@ -130,7 +130,7 @@
     ],
     [
         {  # len(strides) < len(channels) - 1
-            "dimensions": 2,
+            "spatial_dims": 2,
             "in_channels": 1,
             "out_channels": 3,
             "channels": (8, 8, 8),
@@ -139,8 +139,8 @@
         }
     ],
     [
-        {  # len(kernel_size) = 3, dimensions = 2
-            "dimensions": 2,
+        {  # len(kernel_size) = 3, spatial_dims = 2
+            "spatial_dims": 2,
             "in_channels": 1,
             "out_channels": 3,
             "channels": (8, 8, 8),
@@ -149,8 +149,8 @@
         }
     ],
     [
-        {  # len(up_kernel_size) = 2, dimensions = 3
-            "dimensions": 3,
+        {  # len(up_kernel_size) = 2, spatial_dims = 3
+            "spatial_dims": 3,
             "in_channels": 1,
             "out_channels": 3,
             "channels": (8, 8, 8),
@@ -170,13 +170,15 @@ def test_shape(self, input_param, input_shape, expected_shape):
             self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = UNet(dimensions=2, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=0)
+        net = UNet(
+            spatial_dims=2, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=0
+        )
         test_data = torch.randn(16, 1, 32, 32)
         test_script_save(net, test_data)
 
     def test_script_without_running_stats(self):
         net = UNet(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=1,
             out_channels=3,
             channels=(16, 32, 64),
@@ -188,13 +190,7 @@ def test_script_without_running_stats(self):
         test_script_save(net, test_data)
 
     def test_ill_input_shape(self):
-        net = UNet(
-            dimensions=2,
-            in_channels=1,
-            out_channels=3,
-            channels=(16, 32, 64),
-            strides=(2, 2),
-        )
+        net = UNet(spatial_dims=2, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2))
         with eval_mode(net):
             with self.assertRaisesRegex(RuntimeError, "Sizes of tensors must match"):
                 net.forward(torch.randn(2, 1, 16, 5))
diff --git a/tests/test_upsample_block.py b/tests/test_upsample_block.py
index 7b8ada399c..02dbaacdc6 100644
--- a/tests/test_upsample_block.py
+++ b/tests/test_upsample_block.py
@@ -20,11 +20,7 @@
 
 TEST_CASES = [
     [{"dimensions": 2, "in_channels": 4}, (7, 4, 32, 48), (7, 4, 64, 96)],  # 4-channel 2D, batch 7
-    [
-        {"dimensions": 1, "in_channels": 4, "out_channels": 3},
-        (16, 4, 63),
-        (16, 3, 126),
-    ],  # 4-channel 1D, batch 16
+    [{"dimensions": 1, "in_channels": 4, "out_channels": 3}, (16, 4, 63), (16, 3, 126)],  # 4-channel 1D, batch 16
     [
         {"dimensions": 1, "in_channels": 4, "out_channels": 8, "mode": "deconv", "align_corners": False},
         (16, 4, 20),
@@ -78,14 +74,7 @@
     expected_shape = (16, 5, 4 * s, 5 * s, 6 * s)
     for t in UpsampleMode:
         test_case = [
-            {
-                "dimensions": 3,
-                "in_channels": 3,
-                "out_channels": 5,
-                "mode": t,
-                "scale_factor": s,
-                "align_corners": True,
-            },
+            {"dimensions": 3, "in_channels": 3, "out_channels": 5, "mode": t, "scale_factor": s, "align_corners": True},
             (16, 3, 4, 5, 6),
         ]
         test_case.append(expected_shape)
diff --git a/tests/test_utils_pytorch_numpy_unification.py b/tests/test_utils_pytorch_numpy_unification.py
new file mode 100644
index 0000000000..c8e0a35c92
--- /dev/null
+++ b/tests/test_utils_pytorch_numpy_unification.py
@@ -0,0 +1,46 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+
+from monai.transforms.utils_pytorch_numpy_unification import percentile
+from tests.utils import TEST_NDARRAYS, assert_allclose, set_determinism
+
+
+class TestPytorchNumpyUnification(unittest.TestCase):
+    def setUp(self) -> None:
+        set_determinism(0)
+
+    def test_percentile(self):
+        for size in (1, 100):
+            q = np.random.randint(0, 100, size=size)
+            results = []
+            for p in TEST_NDARRAYS:
+                arr = p(np.arange(100 * 101).reshape(1, 100, 101).astype(np.float32))
+                results.append(percentile(arr, q))
+                # pre torch 1.7, no `quantile`. Our own method doesn't interpolate,
+                # so we can only be accurate to 0.5
+                atol = 0.5 if not hasattr(torch, "quantile") else 1e-4
+                assert_allclose(results[0], results[-1], type_test=False, atol=atol)
+
+    def test_fails(self):
+        for p in TEST_NDARRAYS:
+            for q in (-1, 101):
+                arr = p(np.arange(100 * 101).reshape(1, 100, 101).astype(np.float32))
+                with self.assertRaises(ValueError):
+                    percentile(arr, q)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_varautoencoder.py b/tests/test_varautoencoder.py
index 7a4a546d87..e22a017fc6 100644
--- a/tests/test_varautoencoder.py
+++ b/tests/test_varautoencoder.py
@@ -23,7 +23,7 @@
 
 TEST_CASE_0 = [  # single channel 2D, batch 4, no residual
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_shape": (1, 128, 128),
         "out_channels": 1,
         "latent_size": 2,
@@ -37,7 +37,7 @@
 
 TEST_CASE_1 = [  # single channel 2D, batch 4
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_shape": (1, 128, 128),
         "out_channels": 1,
         "latent_size": 2,
@@ -50,7 +50,7 @@
 
 TEST_CASE_2 = [  # 3-channel 2D, batch 4, LeakyReLU activation
     {
-        "dimensions": 2,
+        "spatial_dims": 2,
         "in_shape": (3, 128, 128),
         "out_channels": 3,
         "latent_size": 2,
@@ -64,7 +64,7 @@
 
 TEST_CASE_3 = [  # 4-channel 3D, batch 4
     {
-        "dimensions": 3,
+        "spatial_dims": 3,
         "in_shape": (4, 128, 128, 128),
         "out_channels": 3,
         "latent_size": 2,
@@ -88,7 +88,7 @@ def test_shape(self, input_param, input_shape, expected_shape):
 
     def test_script(self):
         net = VarAutoEncoder(
-            dimensions=2, in_shape=(1, 32, 32), out_channels=1, latent_size=2, channels=(4, 8), strides=(2, 2)
+            spatial_dims=2, in_shape=(1, 32, 32), out_channels=1, latent_size=2, channels=(4, 8), strides=(2, 2)
         )
         test_data = torch.randn(2, 1, 32, 32)
         test_script_save(net, test_data)
diff --git a/tests/test_version_leq.py b/tests/test_version_leq.py
index a1913069d3..042a561a90 100644
--- a/tests/test_version_leq.py
+++ b/tests/test_version_leq.py
@@ -67,6 +67,9 @@ def _pairwise(iterable):
     ("0post1", "0.4post1"),
     ("2.1.0-rc1", "2.1.0"),
     ("2.1dev", "2.1a0"),
+    (1.6, "1.6.0"),
+    ("1.6.0", 1.6),
+    (1.6, 1.7),
 ) + tuple(_pairwise(reversed(torture.split())))
 
 
diff --git a/tests/test_vis_gradcam.py b/tests/test_vis_gradcam.py
index eebf32d70b..72385a37a7 100644
--- a/tests/test_vis_gradcam.py
+++ b/tests/test_vis_gradcam.py
@@ -40,23 +40,13 @@
 ]
 # 2D
 TEST_CASE_2 = [
-    {
-        "model": "senet2d",
-        "shape": (2, 3, 64, 64),
-        "feature_shape": (2, 1, 2, 2),
-        "target_layers": "layer4",
-    },
+    {"model": "senet2d", "shape": (2, 3, 64, 64), "feature_shape": (2, 1, 2, 2), "target_layers": "layer4"},
     (2, 1, 64, 64),
 ]
 
 # 3D
 TEST_CASE_3 = [
-    {
-        "model": "senet3d",
-        "shape": (2, 3, 8, 8, 48),
-        "feature_shape": (2, 1, 1, 1, 2),
-        "target_layers": "layer4",
-    },
+    {"model": "senet3d", "shape": (2, 3, 8, 8, 48), "feature_shape": (2, 1, 1, 1, 2), "target_layers": "layer4"},
     (2, 1, 8, 8, 48),
 ]
 
@@ -88,6 +78,16 @@ def test_shape(self, input_data, expected_shape):
         result2 = cam(x=image, layer_idx=-1, class_idx=model(image).max(1)[-1].cpu())
         torch.testing.assert_allclose(result, result2)
 
+    def test_ill(self):
+        model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=3)
+        for name, x in model.named_parameters():
+            if "features" in name:
+                x.requires_grad = False
+        cam = GradCAM(nn_module=model, target_layers="class_layers.relu")
+        image = torch.rand((2, 1, 48, 64))
+        with self.assertRaises(RuntimeError):
+            cam(x=image)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_vis_gradcampp.py b/tests/test_vis_gradcampp.py
index 92a4b2ac7b..5f801dce45 100644
--- a/tests/test_vis_gradcampp.py
+++ b/tests/test_vis_gradcampp.py
@@ -39,23 +39,13 @@
 ]
 # 2D
 TEST_CASE_2 = [
-    {
-        "model": "senet2d",
-        "shape": (2, 3, 64, 64),
-        "feature_shape": (2, 1, 2, 2),
-        "target_layers": "layer4",
-    },
+    {"model": "senet2d", "shape": (2, 3, 64, 64), "feature_shape": (2, 1, 2, 2), "target_layers": "layer4"},
     (2, 1, 64, 64),
 ]
 
 # 3D
 TEST_CASE_3 = [
-    {
-        "model": "senet3d",
-        "shape": (2, 3, 8, 8, 48),
-        "feature_shape": (2, 1, 1, 1, 2),
-        "target_layers": "layer4",
-    },
+    {"model": "senet3d", "shape": (2, 3, 8, 8, 48), "feature_shape": (2, 1, 1, 1, 2), "target_layers": "layer4"},
     (2, 1, 8, 8, 48),
 ]
 
diff --git a/tests/test_vote_ensemble.py b/tests/test_vote_ensemble.py
index 74c19d5f48..434f0079dd 100644
--- a/tests/test_vote_ensemble.py
+++ b/tests/test_vote_ensemble.py
@@ -45,18 +45,10 @@
 ]
 
 # shape: [1]
-TEST_CASE_5 = [
-    {"num_classes": 3},
-    [torch.tensor([2]), torch.tensor([2]), torch.tensor([1])],
-    torch.tensor([2]),
-]
+TEST_CASE_5 = [{"num_classes": 3}, [torch.tensor([2]), torch.tensor([2]), torch.tensor([1])], torch.tensor([2])]
 
 # shape: 1
-TEST_CASE_6 = [
-    {"num_classes": 3},
-    [torch.tensor(2), torch.tensor(2), torch.tensor(1)],
-    torch.tensor(2),
-]
+TEST_CASE_6 = [{"num_classes": 3}, [torch.tensor(2), torch.tensor(2), torch.tensor(1)], torch.tensor(2)]
 
 
 class TestVoteEnsemble(unittest.TestCase):
diff --git a/tests/test_vote_ensembled.py b/tests/test_vote_ensembled.py
index e94213733f..1c2ac8a339 100644
--- a/tests/test_vote_ensembled.py
+++ b/tests/test_vote_ensembled.py
@@ -41,22 +41,14 @@
 # shape: [1, 2, 1]
 TEST_CASE_3 = [
     {"keys": ["pred0", "pred1", "pred2"], "output_key": "output", "num_classes": 3},
-    {
-        "pred0": torch.tensor([[[0], [2]]]),
-        "pred1": torch.tensor([[[0], [2]]]),
-        "pred2": torch.tensor([[[1], [1]]]),
-    },
+    {"pred0": torch.tensor([[[0], [2]]]), "pred1": torch.tensor([[[0], [2]]]), "pred2": torch.tensor([[[1], [1]]])},
     torch.tensor([[[0], [2]]]),
 ]
 
 # shape: [1, 2, 1]
 TEST_CASE_4 = [
     {"keys": ["pred0", "pred1", "pred2"], "output_key": "output", "num_classes": 5},
-    {
-        "pred0": torch.tensor([[[0], [2]]]),
-        "pred1": torch.tensor([[[0], [2]]]),
-        "pred2": torch.tensor([[[1], [1]]]),
-    },
+    {"pred0": torch.tensor([[[0], [2]]]), "pred1": torch.tensor([[[0], [2]]]), "pred2": torch.tensor([[[1], [1]]])},
     torch.tensor([[[0], [2]]]),
 ]
 
diff --git a/tests/test_zoom.py b/tests/test_zoom.py
index e6710ede29..9411988a7e 100644
--- a/tests/test_zoom.py
+++ b/tests/test_zoom.py
@@ -12,11 +12,12 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import Zoom
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(1.5, "nearest"), (1.5, "nearest"), (0.8, "bilinear"), (0.8, "area")]
 
@@ -26,38 +27,42 @@
 class TestZoom(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, zoom, mode):
-        zoom_fn = Zoom(zoom=zoom, mode=mode, keep_size=False)
-        zoomed = zoom_fn(self.imt[0])
-        _order = 0
-        if mode.endswith("linear"):
-            _order = 1
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(zoomed, expected, atol=1.0)
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=zoom, mode=mode, keep_size=False)
+            zoomed = zoom_fn(p(self.imt[0]))
+            _order = 0
+            if mode.endswith("linear"):
+                _order = 1
+            expected = []
+            for channel in self.imt[0]:
+                expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed, p(expected), atol=1.0)
 
     def test_keep_size(self):
-        zoom_fn = Zoom(zoom=[0.6, 0.6], keep_size=True, align_corners=True, padding_mode="constant", constant_values=2)
-        zoomed = zoom_fn(self.imt[0], mode="bilinear")
-        np.testing.assert_allclose(zoomed.shape, self.imt.shape[1:])
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=[0.6, 0.6], keep_size=True, align_corners=True)
+            zoomed = zoom_fn(p(self.imt[0]), mode="bilinear")
+            assert_allclose(zoomed.shape, self.imt.shape[1:])
 
-        zoom_fn = Zoom(zoom=[1.3, 1.3], keep_size=True)
-        zoomed = zoom_fn(self.imt[0])
-        np.testing.assert_allclose(zoomed.shape, self.imt.shape[1:])
+            zoom_fn = Zoom(zoom=[1.3, 1.3], keep_size=True)
+            zoomed = zoom_fn(p(self.imt[0]))
+            assert_allclose(zoomed.shape, self.imt.shape[1:])
 
     @parameterized.expand(INVALID_CASES)
     def test_invalid_inputs(self, zoom, mode, raises):
-        with self.assertRaises(raises):
-            zoom_fn = Zoom(zoom=zoom, mode=mode)
-            zoom_fn(self.imt[0])
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                zoom_fn = Zoom(zoom=zoom, mode=mode)
+                zoom_fn(p(self.imt[0]))
 
     def test_padding_mode(self):
-        zoom_fn = Zoom(zoom=0.5, mode="nearest", padding_mode="constant", keep_size=True)
-        test_data = np.array([[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]])
-        zoomed = zoom_fn(test_data)
-        expected = np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])
-        np.testing.assert_allclose(zoomed, expected)
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=0.5, mode="nearest", padding_mode="constant", keep_size=True)
+            test_data = p([[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]])
+            zoomed = zoom_fn(test_data)
+            expected = p([[[0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])
+            torch.testing.assert_allclose(zoomed, expected)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_zoomd.py b/tests/test_zoomd.py
index 1a1a905d80..6231978ca7 100644
--- a/tests/test_zoomd.py
+++ b/tests/test_zoomd.py
@@ -16,7 +16,7 @@
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import Zoomd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(1.5, "nearest", False), (0.3, "bilinear", False), (0.8, "bilinear", False)]
 
@@ -27,38 +27,37 @@ class TestZoomd(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, zoom, mode, keep_size):
         key = "img"
-        zoom_fn = Zoomd(
-            key,
-            zoom=zoom,
-            mode=mode,
-            keep_size=keep_size,
-        )
-        zoomed = zoom_fn({key: self.imt[0]})
-        _order = 0
-        if mode.endswith("linear"):
-            _order = 1
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(expected, zoomed[key], atol=1.0)
+        zoom_fn = Zoomd(key, zoom=zoom, mode=mode, keep_size=keep_size)
+        for p in TEST_NDARRAYS:
+            zoomed = zoom_fn({key: p(self.imt[0])})
+            _order = 0
+            if mode.endswith("linear"):
+                _order = 1
+            expected = [
+                zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False) for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed[key], p(expected), atol=1.0)
 
     def test_keep_size(self):
         key = "img"
         zoom_fn = Zoomd(key, zoom=0.6, keep_size=True, padding_mode="constant", constant_values=2)
-        zoomed = zoom_fn({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            zoomed = zoom_fn({key: p(self.imt[0])})
+            np.testing.assert_array_equal(zoomed[key].shape, self.imt.shape[1:])
 
-        zoom_fn = Zoomd(key, zoom=1.3, keep_size=True)
-        zoomed = zoom_fn({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+            zoom_fn = Zoomd(key, zoom=1.3, keep_size=True)
+            zoomed = zoom_fn({key: self.imt[0]})
+            self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
 
     @parameterized.expand(INVALID_CASES)
     def test_invalid_inputs(self, _, zoom, mode, raises):
         key = "img"
-        with self.assertRaises(raises):
-            zoom_fn = Zoomd(key, zoom=zoom, mode=mode)
-            zoom_fn({key: self.imt[0]})
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                zoom_fn = Zoomd(key, zoom=zoom, mode=mode)
+                zoom_fn({key: p(self.imt[0])})
 
 
 if __name__ == "__main__":
diff --git a/tests/testing_data/cpp_resample_answers.py b/tests/testing_data/cpp_resample_answers.py
index 51ac6ccda9..67af152059 100644
--- a/tests/testing_data/cpp_resample_answers.py
+++ b/tests/testing_data/cpp_resample_answers.py
@@ -23,7 +23,7 @@ def _read_testing_data_answers(fname: Optional[str] = None, delimiter=",") -> Li
     pwd = os.path.dirname(os.path.abspath(__file__))
     filename = os.path.join(pwd, fname)
     if not os.path.isfile(filename):
-        warnings.warn("test data {} not found.".format(filename))
+        warnings.warn(f"test data {filename} not found.")
         return answers
     with open(filename) as f:
         res_reader = csv.reader(f, delimiter=delimiter)
diff --git a/tests/testing_data/integration_answers.py b/tests/testing_data/integration_answers.py
index ccb4293a40..6932ee6e71 100644
--- a/tests/testing_data/integration_answers.py
+++ b/tests/testing_data/integration_answers.py
@@ -418,7 +418,7 @@
                 0.17794132232666016,
                 0.18584394454956055,
                 0.03577899932861328,
-            ],
+            ]
         },
         "integration_segmentation_3d": {  # for the mixed readers
             "losses": [
diff --git a/tests/utils.py b/tests/utils.py
index 1375cd2d72..c73a87d141 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -25,7 +25,7 @@
 from io import BytesIO
 from subprocess import PIPE, Popen
 from typing import Callable, Optional, Tuple
-from urllib.error import ContentTooShortError, HTTPError, URLError
+from urllib.error import HTTPError, URLError
 
 import numpy as np
 import torch
@@ -38,10 +38,12 @@
 from monai.utils import ensure_tuple, optional_import, set_determinism
 from monai.utils.misc import is_module_ver_at_least
 from monai.utils.module import version_leq
+from monai.utils.type_conversion import convert_data_type
 
 nib, _ = optional_import("nibabel")
 
 quick_test_var = "QUICKTEST"
+_tf32_enabled = None
 
 
 def clone(data: NdarrayTensor) -> NdarrayTensor:
@@ -57,31 +59,79 @@ def clone(data: NdarrayTensor) -> NdarrayTensor:
     return copy.deepcopy(data)
 
 
-def assert_allclose(a: NdarrayOrTensor, b: NdarrayOrTensor, *args, **kwargs):
+def assert_allclose(
+    actual: NdarrayOrTensor,
+    desired: NdarrayOrTensor,
+    type_test: bool = True,
+    device_test: bool = False,
+    *args,
+    **kwargs,
+):
     """
-    Assert that all values of two data objects are close.
+    Assert that types and all values of two data objects are close.
 
     Args:
-        a (NdarrayOrTensor): Pytorch Tensor or numpy array for comparison
-        b (NdarrayOrTensor): Pytorch Tensor or numpy array to compare against
+        actual: Pytorch Tensor or numpy array for comparison.
+        desired: Pytorch Tensor or numpy array to compare against.
+        type_test: whether to test that `actual` and `desired` are both numpy arrays or torch tensors.
+        device_test: whether to test the device property.
+        args: extra arguments to pass on to `np.testing.assert_allclose`.
+        kwargs: extra arguments to pass on to `np.testing.assert_allclose`.
+
+
     """
-    a = a.cpu() if isinstance(a, torch.Tensor) else a
-    b = b.cpu() if isinstance(b, torch.Tensor) else b
-    np.testing.assert_allclose(a, b, *args, **kwargs)
+    if type_test:
+        # check both actual and desired are of the same type
+        np.testing.assert_equal(isinstance(actual, np.ndarray), isinstance(desired, np.ndarray), "numpy type")
+        np.testing.assert_equal(isinstance(actual, torch.Tensor), isinstance(desired, torch.Tensor), "torch type")
+
+    if isinstance(desired, torch.Tensor) or isinstance(actual, torch.Tensor):
+        if device_test:
+            np.testing.assert_equal(str(actual.device), str(desired.device), "torch device check")  # type: ignore
+        actual = actual.cpu().numpy() if isinstance(actual, torch.Tensor) else actual
+        desired = desired.cpu().numpy() if isinstance(desired, torch.Tensor) else desired
+    np.testing.assert_allclose(actual, desired, *args, **kwargs)
 
 
 def test_pretrained_networks(network, input_param, device):
     try:
-        net = network(**input_param).to(device)
-    except (URLError, HTTPError, ContentTooShortError) as e:
-        raise unittest.SkipTest(e)
-    return net
+        return network(**input_param).to(device)
+    except (URLError, HTTPError) as e:
+        raise unittest.SkipTest(e) from e
 
 
 def test_is_quick():
     return os.environ.get(quick_test_var, "").lower() == "true"
 
 
+def is_tf32_env():
+    """
+    The environment variable NVIDIA_TF32_OVERRIDE=0 will override any defaults
+    or programmatic configuration of NVIDIA libraries, and consequently,
+    cuBLAS will not accelerate FP32 computations with TF32 tensor cores.
+    """
+    global _tf32_enabled
+    if _tf32_enabled is None:
+        _tf32_enabled = False
+        if (
+            torch.cuda.is_available()
+            and not version_leq(f"{torch.version.cuda}", "10.100")
+            and os.environ.get("NVIDIA_TF32_OVERRIDE", "1") != "0"
+            and torch.cuda.device_count() > 0  # at least 11.0
+        ):
+            try:
+                # with TF32 enabled, the speed is ~8x faster, but the precision has ~2 digits less in the result
+                g_gpu = torch.Generator(device="cuda")
+                g_gpu.manual_seed(2147483647)
+                a_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu)
+                b_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu)
+                _tf32_enabled = (a_full.float() @ b_full.float() - a_full @ b_full).abs().max().item() > 0.001  # 0.1713
+            except BaseException:
+                pass
+        print(f"tf32 enabled: {_tf32_enabled}")
+    return _tf32_enabled
+
+
 def skip_if_quick(obj):
     """
     Skip the unit tests if environment variable `quick_test_var=true`.
@@ -166,11 +216,15 @@ def __call__(self, obj):
         )(obj)
 
 
-def make_nifti_image(array, affine=None):
+def make_nifti_image(array: NdarrayOrTensor, affine=None):
     """
     Create a temporary nifti image on the disk and return the image name.
     User is responsible for deleting the temporary file when done with it.
     """
+    if isinstance(array, torch.Tensor):
+        array, *_ = convert_data_type(array, np.ndarray)
+    if isinstance(affine, torch.Tensor):
+        affine, *_ = convert_data_type(affine, np.ndarray)
     if affine is None:
         affine = np.eye(4)
     test_image = nib.Nifti1Image(array, affine)
@@ -587,7 +641,7 @@ def query_memory(n=2):
         free_memory = np.asarray(free_memory, dtype=float).T
         free_memory[1] += free_memory[0]  # combine 0/1 column measures
         ids = np.lexsort(free_memory)[:n]
-    except (FileNotFoundError, TypeError, IndexError):
+    except (TypeError, IndexError, OSError):
         ids = range(n) if isinstance(n, int) else []
     return ",".join(f"{int(x)}" for x in ids)