diff --git a/.circleci/config.yml b/.circleci/config.yml
index 081337bdcf9..f3fc23b7c92 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -107,6 +107,8 @@ jobs:
       - checkout
       - run:
           command: |
+            sudo apt-get update -y
+            sudo apt install -y libturbojpeg-dev
             pip install --user --progress-bar off numpy mypy
             pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
             pip install --user --progress-bar off --editable .
@@ -155,106 +157,7 @@ jobs:
       - store_test_results:
           path: build_results/
 
-  binary_linux_conda_cuda:
-    <<: *binary_common
-    machine:
-      image: ubuntu-1604:201903-01
-    resource_class: gpu.medium
-    steps:
-    - checkout_merge
-    - run:
-        name: Setup environment
-        command: |
-          set -ex
-
-          curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
-          curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
-
-          sudo apt-get update
-
-          sudo apt-get install \
-              apt-transport-https \
-              ca-certificates \
-              curl \
-              gnupg-agent \
-              software-properties-common
-
-          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
-
-          sudo add-apt-repository \
-             "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
-             $(lsb_release -cs) \
-             stable"
-
-          sudo apt-get update
-          export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial"
-          sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3
-
-          # Add the package repositories
-          distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
-          curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-          curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
-
-          export NVIDIA_CONTAINER_VERSION="1.0.3-1"
-          sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION}
-          sudo systemctl restart docker
-
-          DRIVER_FN="NVIDIA-Linux-x86_64-440.59.run"
-          wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
-          sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
-          nvidia-smi
-
-    - run:
-        name: Pull docker image
-        command: |
-          set -ex
-          export DOCKER_IMAGE=pytorch/conda-cuda
-          echo Pulling docker image $DOCKER_IMAGE
-          docker pull $DOCKER_IMAGE >/dev/null
-
-    - run:
-        name: Build and run tests
-        command: |
-          set -ex
-
-          cd ${HOME}/project/
-
-          export DOCKER_IMAGE=pytorch/conda-cuda
-          export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION"
-
-          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
-
   binary_win_conda:
-    <<: *binary_common
-    executor: windows-cpu
-    steps:
-      - checkout_merge
-      - run:
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
-            conda activate base
-            conda install -yq conda-build "conda-package-handling!=1.5.0"
-            packaging/build_conda.sh
-      - store_test_results:
-          path: build_results/
-
-  binary_win_conda_cuda:
-    <<: *binary_common
-    executor: windows-gpu
-    steps:
-      - checkout_merge
-      - run:
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
-            conda activate base
-            conda install -yq conda-build "conda-package-handling!=1.5.0"
-            packaging/build_conda.sh
-
-  binary_win_conda_release:
     <<: *binary_common
     executor: windows-cpu
     steps:
@@ -279,7 +182,7 @@ jobs:
       - store_test_results:
           path: build_results/
 
-  binary_win_wheel_release:
+  binary_win_wheel:
     <<: *binary_common
     executor: windows-cpu
     steps:
@@ -385,6 +288,159 @@ jobs:
               aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read
             done
 
+  unittest_linux_cpu:
+    <<: *binary_common
+    docker:
+      - image: "pytorch/manylinux-cuda102"
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+
+          keys:
+            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+      - run:
+          name: Setup
+          command: .circleci/unittest/linux/scripts/setup_env.sh
+      - save_cache:
+
+          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/linux/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/linux/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/linux/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_linux_gpu:
+    <<: *binary_common
+    machine:
+      image: ubuntu-1604-cuda-10.1:201909-23
+    resource_class: gpu.small
+    environment:
+      image_name: "pytorch/manylinux-cuda101"
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+
+          keys:
+            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+      - run:
+          name: Setup
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
+      - save_cache:
+
+          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
+      - run:
+          name: Run tests
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
+      - run:
+          name: Post Process
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_windows_cpu:
+    <<: *binary_common
+    executor:
+      name: windows-cpu
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+
+          keys:
+            - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+      - run:
+          name: Setup
+          command: .circleci/unittest/windows/scripts/setup_env.sh
+      - save_cache:
+
+          key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/windows/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/windows/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/windows/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_windows_gpu:
+    <<: *binary_common
+    executor:
+      name: windows-gpu
+    environment:
+      CUDA_VERSION: "10.1"
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+
+          keys:
+            - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+      - run:
+          name: Setup
+          command: .circleci/unittest/windows/scripts/setup_env.sh
+      - save_cache:
+
+          key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/windows/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/windows/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/windows/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
 
 workflows:
   build:
@@ -465,81 +521,101 @@ workflows:
           name: binary_macos_wheel_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cpu
           python_version: '3.6'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu92
           python_version: '3.6'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu101
           python_version: '3.6'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.6_cu102
           python_version: '3.6'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cpu
           python_version: '3.7'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu92
           python_version: '3.7'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu101
           python_version: '3.7'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.7_cu102
           python_version: '3.7'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           name: binary_win_wheel_py3.8_cpu
           python_version: '3.8'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cu92
           python_version: '3.8'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_wheel_py3.8_cu101
           python_version: '3.8'
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           name: binary_win_wheel_py3.8_cu102
           python_version: '3.8'
@@ -618,100 +694,178 @@ workflows:
           name: binary_macos_conda_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cpu
           python_version: '3.6'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu92
           python_version: '3.6'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu101
           python_version: '3.6'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.6_cu102
           python_version: '3.6'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cpu
           python_version: '3.7'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu92
           python_version: '3.7'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu101
           python_version: '3.7'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.7_cu102
           python_version: '3.7'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           name: binary_win_conda_py3.8_cpu
           python_version: '3.8'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cu92
           python_version: '3.8'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: master
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: binary_win_conda_py3.8_cu101
           python_version: '3.8'
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           name: binary_win_conda_py3.8_cu102
           python_version: '3.8'
-      - binary_linux_conda_cuda:
-          name: torchvision_linux_py3.8_cu102_cuda
-          python_version: "3.8"
-          cu_version: "cu102"
-      - binary_win_conda:
-          name: torchvision_win_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
-      - binary_win_conda_cuda:
-          name: torchvision_win_py3.6_cu101
-          python_version: "3.6"
-          cu_version: "cu101"
       - python_lint
       - python_type_check
       - clang_format
 
+  unittest:
+    jobs:
+      - unittest_linux_cpu:
+          cu_version: cpu
+          name: unittest_linux_cpu_py3.6
+          python_version: '3.6'
+      - unittest_linux_cpu:
+          cu_version: cpu
+          name: unittest_linux_cpu_py3.7
+          python_version: '3.7'
+      - unittest_linux_cpu:
+          cu_version: cpu
+          name: unittest_linux_cpu_py3.8
+          python_version: '3.8'
+      - unittest_linux_gpu:
+          cu_version: cu101
+          filters:
+            branches:
+              only:
+              - master
+              - nightly
+          name: unittest_linux_gpu_py3.6
+          python_version: '3.6'
+      - unittest_linux_gpu:
+          cu_version: cu101
+          filters:
+            branches:
+              only:
+              - master
+              - nightly
+          name: unittest_linux_gpu_py3.7
+          python_version: '3.7'
+      - unittest_linux_gpu:
+          cu_version: cu101
+          name: unittest_linux_gpu_py3.8
+          python_version: '3.8'
+      - unittest_windows_cpu:
+          cu_version: cpu
+          name: unittest_windows_cpu_py3.6
+          python_version: '3.6'
+      - unittest_windows_cpu:
+          cu_version: cpu
+          name: unittest_windows_cpu_py3.7
+          python_version: '3.7'
+      - unittest_windows_cpu:
+          cu_version: cpu
+          name: unittest_windows_cpu_py3.8
+          python_version: '3.8'
+      - unittest_windows_gpu:
+          cu_version: cu101
+          filters:
+            branches:
+              only:
+              - master
+              - nightly
+          name: unittest_windows_gpu_py3.6
+          python_version: '3.6'
+      - unittest_windows_gpu:
+          cu_version: cu101
+          filters:
+            branches:
+              only:
+              - master
+              - nightly
+          name: unittest_windows_gpu_py3.7
+          python_version: '3.7'
+      - unittest_windows_gpu:
+          cu_version: cu101
+          name: unittest_windows_gpu_py3.8
+          python_version: '3.8'
   nightly:
     jobs:
       - circleci_consistency
@@ -723,6 +877,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.6_cpu
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -742,6 +898,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.6_cu92
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -761,6 +919,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.6_cu101
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -780,6 +940,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.6_cu102
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -799,6 +961,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.7_cpu
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -818,6 +982,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.7_cu92
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -837,6 +1003,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.7_cu101
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -856,6 +1024,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.7_cu102
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -875,6 +1045,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -894,6 +1066,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.8_cu92
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -913,6 +1087,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.8_cu101
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -932,6 +1108,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_wheel_py3.8_cu102
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -951,6 +1129,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_wheel_py3.6_cpu
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -970,6 +1150,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_wheel_py3.7_cpu
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -989,6 +1171,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_wheel_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1003,11 +1187,13 @@ workflows:
           requires:
           - nightly_binary_macos_wheel_py3.8_cpu
           subfolder: ''
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.6_cpu
           python_version: '3.6'
       - binary_wheel_upload:
@@ -1021,11 +1207,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.6_cpu
           subfolder: cpu/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.6_cu92
           python_version: '3.6'
       - binary_wheel_upload:
@@ -1039,11 +1227,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.6_cu92
           subfolder: cu92/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.6_cu101
           python_version: '3.6'
       - binary_wheel_upload:
@@ -1057,11 +1247,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.6_cu101
           subfolder: cu101/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.6_cu102
           python_version: '3.6'
       - binary_wheel_upload:
@@ -1075,11 +1267,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.6_cu102
           subfolder: cu102/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.7_cpu
           python_version: '3.7'
       - binary_wheel_upload:
@@ -1093,11 +1287,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.7_cpu
           subfolder: cpu/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.7_cu92
           python_version: '3.7'
       - binary_wheel_upload:
@@ -1111,11 +1307,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.7_cu92
           subfolder: cu92/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.7_cu101
           python_version: '3.7'
       - binary_wheel_upload:
@@ -1129,11 +1327,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.7_cu101
           subfolder: cu101/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.7_cu102
           python_version: '3.7'
       - binary_wheel_upload:
@@ -1147,11 +1347,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.7_cu102
           subfolder: cu102/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.8_cpu
           python_version: '3.8'
       - binary_wheel_upload:
@@ -1165,11 +1367,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.8_cpu
           subfolder: cpu/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.8_cu92
           python_version: '3.8'
       - binary_wheel_upload:
@@ -1183,11 +1387,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.8_cu92
           subfolder: cu92/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.8_cu101
           python_version: '3.8'
       - binary_wheel_upload:
@@ -1201,11 +1407,13 @@ workflows:
           requires:
           - nightly_binary_win_wheel_py3.8_cu101
           subfolder: cu101/
-      - binary_win_wheel_release:
+      - binary_win_wheel:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_wheel_py3.8_cu102
           python_version: '3.8'
       - binary_wheel_upload:
@@ -1224,6 +1432,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.6_cpu
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1242,6 +1452,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.6_cu92
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -1260,6 +1472,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.6_cu101
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -1278,6 +1492,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.6_cu102
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1296,6 +1512,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.7_cpu
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1314,6 +1532,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.7_cu92
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -1332,6 +1552,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.7_cu101
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -1350,6 +1572,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.7_cu102
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1368,6 +1592,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1386,6 +1612,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.8_cu92
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda92
@@ -1404,6 +1632,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.8_cu101
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda101
@@ -1422,6 +1652,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_linux_conda_py3.8_cu102
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1440,6 +1672,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_conda_py3.6_cpu
           python_version: '3.6'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1458,6 +1692,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_conda_py3.7_cpu
           python_version: '3.7'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1476,6 +1712,8 @@ workflows:
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_macos_conda_py3.8_cpu
           python_version: '3.8'
           wheel_docker_image: pytorch/manylinux-cuda102
@@ -1489,11 +1727,13 @@ workflows:
           name: nightly_binary_macos_conda_py3.8_cpu_upload
           requires:
           - nightly_binary_macos_conda_py3.8_cpu
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.6_cpu
           python_version: '3.6'
       - binary_conda_upload:
@@ -1506,11 +1746,13 @@ workflows:
           name: nightly_binary_win_conda_py3.6_cpu_upload
           requires:
           - nightly_binary_win_conda_py3.6_cpu
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.6_cu92
           python_version: '3.6'
       - binary_conda_upload:
@@ -1523,11 +1765,13 @@ workflows:
           name: nightly_binary_win_conda_py3.6_cu92_upload
           requires:
           - nightly_binary_win_conda_py3.6_cu92
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.6_cu101
           python_version: '3.6'
       - binary_conda_upload:
@@ -1540,11 +1784,13 @@ workflows:
           name: nightly_binary_win_conda_py3.6_cu101_upload
           requires:
           - nightly_binary_win_conda_py3.6_cu101
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.6_cu102
           python_version: '3.6'
       - binary_conda_upload:
@@ -1557,11 +1803,13 @@ workflows:
           name: nightly_binary_win_conda_py3.6_cu102_upload
           requires:
           - nightly_binary_win_conda_py3.6_cu102
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.7_cpu
           python_version: '3.7'
       - binary_conda_upload:
@@ -1574,11 +1822,13 @@ workflows:
           name: nightly_binary_win_conda_py3.7_cpu_upload
           requires:
           - nightly_binary_win_conda_py3.7_cpu
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.7_cu92
           python_version: '3.7'
       - binary_conda_upload:
@@ -1591,11 +1841,13 @@ workflows:
           name: nightly_binary_win_conda_py3.7_cu92_upload
           requires:
           - nightly_binary_win_conda_py3.7_cu92
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.7_cu101
           python_version: '3.7'
       - binary_conda_upload:
@@ -1608,11 +1860,13 @@ workflows:
           name: nightly_binary_win_conda_py3.7_cu101_upload
           requires:
           - nightly_binary_win_conda_py3.7_cu101
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.7_cu102
           python_version: '3.7'
       - binary_conda_upload:
@@ -1625,11 +1879,13 @@ workflows:
           name: nightly_binary_win_conda_py3.7_cu102_upload
           requires:
           - nightly_binary_win_conda_py3.7_cu102
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cpu
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.8_cpu
           python_version: '3.8'
       - binary_conda_upload:
@@ -1642,11 +1898,13 @@ workflows:
           name: nightly_binary_win_conda_py3.8_cpu_upload
           requires:
           - nightly_binary_win_conda_py3.8_cpu
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu92
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.8_cu92
           python_version: '3.8'
       - binary_conda_upload:
@@ -1659,11 +1917,13 @@ workflows:
           name: nightly_binary_win_conda_py3.8_cu92_upload
           requires:
           - nightly_binary_win_conda_py3.8_cu92
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu101
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.8_cu101
           python_version: '3.8'
       - binary_conda_upload:
@@ -1676,11 +1936,13 @@ workflows:
           name: nightly_binary_win_conda_py3.8_cu101_upload
           requires:
           - nightly_binary_win_conda_py3.8_cu101
-      - binary_win_conda_release:
+      - binary_win_conda:
           cu_version: cu102
           filters:
             branches:
               only: nightly
+            tags:
+              only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/
           name: nightly_binary_win_conda_py3.8_cu102
           python_version: '3.8'
       - binary_conda_upload:
diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
index 620e00807d4..f63c3f408ba 100644
--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -107,6 +107,8 @@ jobs:
       - checkout
       - run:
           command: |
+            sudo apt-get update -y
+            sudo apt install -y libturbojpeg-dev
             pip install --user --progress-bar off numpy mypy
             pip install --user --progress-bar off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
             pip install --user --progress-bar off --editable .
@@ -155,106 +157,7 @@ jobs:
       - store_test_results:
           path: build_results/
 
-  binary_linux_conda_cuda:
-    <<: *binary_common
-    machine:
-      image: ubuntu-1604:201903-01
-    resource_class: gpu.medium
-    steps:
-    - checkout_merge
-    - run:
-        name: Setup environment
-        command: |
-          set -ex
-
-          curl -L https://packagecloud.io/circleci/trusty/gpgkey | sudo apt-key add -
-          curl -L https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
-
-          sudo apt-get update
-
-          sudo apt-get install \
-              apt-transport-https \
-              ca-certificates \
-              curl \
-              gnupg-agent \
-              software-properties-common
-
-          curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
-
-          sudo add-apt-repository \
-             "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
-             $(lsb_release -cs) \
-             stable"
-
-          sudo apt-get update
-          export DOCKER_VERSION="5:19.03.2~3-0~ubuntu-xenial"
-          sudo apt-get install docker-ce=${DOCKER_VERSION} docker-ce-cli=${DOCKER_VERSION} containerd.io=1.2.6-3
-
-          # Add the package repositories
-          distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
-          curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
-          curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
-
-          export NVIDIA_CONTAINER_VERSION="1.0.3-1"
-          sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit=${NVIDIA_CONTAINER_VERSION}
-          sudo systemctl restart docker
-
-          DRIVER_FN="NVIDIA-Linux-x86_64-440.59.run"
-          wget "https://s3.amazonaws.com/ossci-linux/nvidia_driver/$DRIVER_FN"
-          sudo /bin/bash "$DRIVER_FN" -s --no-drm || (sudo cat /var/log/nvidia-installer.log && false)
-          nvidia-smi
-
-    - run:
-        name: Pull docker image
-        command: |
-          set -ex
-          export DOCKER_IMAGE=pytorch/conda-cuda
-          echo Pulling docker image $DOCKER_IMAGE
-          docker pull $DOCKER_IMAGE >/dev/null
-
-    - run:
-        name: Build and run tests
-        command: |
-          set -ex
-
-          cd ${HOME}/project/
-
-          export DOCKER_IMAGE=pytorch/conda-cuda
-          export VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e UNICODE_ABI -e CU_VERSION"
-
-          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
-
   binary_win_conda:
-    <<: *binary_common
-    executor: windows-cpu
-    steps:
-      - checkout_merge
-      - run:
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
-            conda activate base
-            conda install -yq conda-build "conda-package-handling!=1.5.0"
-            packaging/build_conda.sh
-      - store_test_results:
-          path: build_results/
-
-  binary_win_conda_cuda:
-    <<: *binary_common
-    executor: windows-gpu
-    steps:
-      - checkout_merge
-      - run:
-          command: |
-            set -ex
-            source packaging/windows/internal/vc_install_helper.sh
-            eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')"
-            conda activate base
-            conda install -yq conda-build "conda-package-handling!=1.5.0"
-            packaging/build_conda.sh
-
-  binary_win_conda_release:
     <<: *binary_common
     executor: windows-cpu
     steps:
@@ -279,7 +182,7 @@ jobs:
       - store_test_results:
           path: build_results/
 
-  binary_win_wheel_release:
+  binary_win_wheel:
     <<: *binary_common
     executor: windows-cpu
     steps:
@@ -385,29 +288,173 @@ jobs:
               aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read
             done
 
+  unittest_linux_cpu:
+    <<: *binary_common
+    docker:
+      - image: "pytorch/manylinux-cuda102"
+    resource_class: 2xlarge+
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          {% raw %}
+          keys:
+            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+      - run:
+          name: Setup
+          command: .circleci/unittest/linux/scripts/setup_env.sh
+      - save_cache:
+          {% raw %}
+          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/linux/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/linux/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/linux/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_linux_gpu:
+    <<: *binary_common
+    machine:
+      image: ubuntu-1604-cuda-10.1:201909-23
+    resource_class: gpu.small
+    environment:
+      image_name: "pytorch/manylinux-cuda101"
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          {% raw %}
+          keys:
+            - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+      - run:
+          name: Setup
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
+      - save_cache:
+          {% raw %}
+          key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
+      - run:
+          name: Run tests
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
+      - run:
+          name: Post Process
+          command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_windows_cpu:
+    <<: *binary_common
+    executor:
+      name: windows-cpu
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          {% raw %}
+          keys:
+            - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+      - run:
+          name: Setup
+          command: .circleci/unittest/windows/scripts/setup_env.sh
+      - save_cache:
+          {% raw %}
+          key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/windows/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/windows/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/windows/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
+
+  unittest_windows_gpu:
+    <<: *binary_common
+    executor:
+      name: windows-gpu
+    environment:
+      CUDA_VERSION: "10.1"
+    steps:
+      - checkout
+      - run:
+          name: Generate cache key
+          # This will refresh cache on Sundays, nightly build should generate new cache.
+          command: echo "$(date +"%Y-%U")" > .circleci-weekly
+      - restore_cache:
+          {% raw %}
+          keys:
+            - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+      - run:
+          name: Setup
+          command: .circleci/unittest/windows/scripts/setup_env.sh
+      - save_cache:
+          {% raw %}
+          key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }}
+          {% endraw %}
+          paths:
+            - conda
+            - env
+      - run:
+          name: Install torchvision
+          command: .circleci/unittest/windows/scripts/install.sh
+      - run:
+          name: Run tests
+          command: .circleci/unittest/windows/scripts/run_test.sh
+      - run:
+          name: Post process
+          command: .circleci/unittest/windows/scripts/post_process.sh
+      - store_test_results:
+          path: test-results
 
 workflows:
   build:
 {%- if True %}
     jobs:
       - circleci_consistency
-      {{ workflows(windows_latest_only=True) }}
-      - binary_linux_conda_cuda:
-          name: torchvision_linux_py3.8_cu102_cuda
-          python_version: "3.8"
-          cu_version: "cu102"
-      - binary_win_conda:
-          name: torchvision_win_py3.6_cpu
-          python_version: "3.6"
-          cu_version: "cpu"
-      - binary_win_conda_cuda:
-          name: torchvision_win_py3.6_cu101
-          python_version: "3.6"
-          cu_version: "cu101"
+      {{ build_workflows(windows_latest_only=True) }}
       - python_lint
       - python_type_check
       - clang_format
 
+  unittest:
+    jobs:
+      {{ unittest_workflows() }}
   nightly:
 {%- endif %}
     jobs:
@@ -415,4 +462,4 @@ workflows:
       - python_lint
       - python_type_check
       - clang_format
-      {{ workflows(prefix="nightly_", filter_branch="nightly", upload=True) }}
+      {{ build_workflows(prefix="nightly_", filter_branch="nightly", upload=True) }}
diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
index 1e929242974..cf940f42f04 100755
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -19,11 +19,14 @@
 import os.path
 
 
-def workflows(prefix='', filter_branch=None, upload=False, indentation=6, windows_latest_only=False):
+PYTHON_VERSIONS = ["3.6", "3.7", "3.8"]
+
+
+def build_workflows(prefix='', filter_branch=None, upload=False, indentation=6, windows_latest_only=False):
     w = []
     for btype in ["wheel", "conda"]:
         for os_type in ["linux", "macos", "win"]:
-            python_versions = ["3.6", "3.7", "3.8"]
+            python_versions = PYTHON_VERSIONS
             cu_versions = (["cpu", "cu92", "cu101", "cu102"] if os_type == "linux" or os_type == "win" else ["cpu"])
             for python_version in python_versions:
                 for cu_version in cu_versions:
@@ -86,12 +89,25 @@ def generate_base_workflow(base_workflow_name, python_version, cu_version,
         d["wheel_docker_image"] = get_manylinux_image(cu_version)
 
     if filter_branch is not None:
-        d["filters"] = {"branches": {"only": filter_branch}}
+        d["filters"] = {
+            "branches": {
+                "only": filter_branch
+            },
+            "tags": {
+                # Using a raw string here to avoid having to escape
+                # anything
+                "only": r"/v[0-9]+(\.[0-9]+)*-rc[0-9]+/"
+            }
+        }
 
-    w = f"binary_{os_type}_{btype}_release" if os_type == "win" else f"binary_{os_type}_{btype}"
+    w = f"binary_{os_type}_{btype}"
     return {w: d}
 
 
+def gen_filter_branch_tree(*branches):
+    return {"branches": {"only": [b for b in branches]}}
+
+
 def generate_upload_workflow(base_workflow_name, os_type, btype, cu_version, *, filter_branch=None):
     d = {
         "name": f"{base_workflow_name}_upload",
@@ -122,6 +138,28 @@ def indent(indentation, data_list):
         yaml.dump(data_list, default_flow_style=False).splitlines())
 
 
+def unittest_workflows(indentation=6):
+    jobs = []
+    for os_type in ["linux", "windows"]:
+        for device_type in ["cpu", "gpu"]:
+            for i, python_version in enumerate(PYTHON_VERSIONS):
+                job = {
+                    "name": f"unittest_{os_type}_{device_type}_py{python_version}",
+                    "python_version": python_version,
+                }
+
+                if device_type == 'gpu':
+                    if python_version != "3.8":
+                        job['filters'] = gen_filter_branch_tree('master', 'nightly')
+                    job['cu_version'] = 'cu101'
+                else:
+                    job['cu_version'] = 'cpu'
+
+                jobs.append({f"unittest_{os_type}_{device_type}": job})
+
+    return indent(indentation, jobs)
+
+
 if __name__ == "__main__":
     d = os.path.dirname(__file__)
     env = jinja2.Environment(
@@ -131,4 +169,7 @@ def indent(indentation, data_list):
     )
 
     with open(os.path.join(d, 'config.yml'), 'w') as f:
-        f.write(env.get_template('config.yml.in').render(workflows=workflows))
+        f.write(env.get_template('config.yml.in').render(
+            build_workflows=build_workflows,
+            unittest_workflows=unittest_workflows,
+        ))
diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml
new file mode 100644
index 00000000000..96b66319ed6
--- /dev/null
+++ b/.circleci/unittest/linux/scripts/environment.yml
@@ -0,0 +1,16 @@
+channels:
+  - defaults
+dependencies:
+  - numpy
+  - pytest
+  - pytest-cov
+  - codecov
+  - pip
+  - libpng
+  - jpeg
+  - ca-certificates
+  - pip:
+    - future
+    - pillow>=4.1.1
+    - scipy
+    - av
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh
new file mode 100755
index 00000000000..65273c75152
--- /dev/null
+++ b/.circleci/unittest/linux/scripts/install.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+# For unittest, nightly PyTorch is used as the following section,
+# so no need to set PYTORCH_VERSION.
+# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    cudatoolkit="cpuonly"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+    cudatoolkit="cudatoolkit=${version}"
+fi
+printf "Installing PyTorch with %s\n" "${cudatoolkit}"
+conda install -y -c pytorch-nightly pytorch "${cudatoolkit}"
+
+printf "* Installing torchvision\n"
+python setup.py develop
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/post_process.sh b/.circleci/unittest/linux/scripts/post_process.sh
new file mode 100755
index 00000000000..b05be6da37e
--- /dev/null
+++ b/.circleci/unittest/linux/scripts/post_process.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+codecov
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh
new file mode 100755
index 00000000000..c572cfea2c5
--- /dev/null
+++ b/.circleci/unittest/linux/scripts/run_test.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+python -m torch.utils.collect_env
+pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20 test
\ No newline at end of file
diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh
new file mode 100755
index 00000000000..356c806b240
--- /dev/null
+++ b/.circleci/unittest/linux/scripts/setup_env.sh
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+
+# This script is for setting up environment in which unit test is ran.
+# To speed up the CI time, the resulting environment is cached.
+#
+# Do not install PyTorch and torchvision here, otherwise they also get cached.
+
+set -e
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+root_dir="$(git rev-parse --show-toplevel)"
+conda_dir="${root_dir}/conda"
+env_dir="${root_dir}/env"
+
+cd "${root_dir}"
+
+# 1. Install conda at ./conda
+if [ ! -d "${conda_dir}" ]; then
+    printf "* Installing conda\n"
+    wget -O miniconda.sh http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
+    bash ./miniconda.sh -b -f -p "${conda_dir}"
+fi
+eval "$(${conda_dir}/bin/conda shell.bash hook)"
+
+# 2. Create test environment at ./env
+if [ ! -d "${env_dir}" ]; then
+    printf "* Creating a test environment\n"
+    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
+fi
+conda activate "${env_dir}"
+
+# 3. Install Conda dependencies
+printf "* Installing dependencies (except PyTorch)\n"
+conda env update --file "${this_dir}/environment.yml" --prune
diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml
new file mode 100644
index 00000000000..49795f73bc3
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/environment.yml
@@ -0,0 +1,16 @@
+channels:
+  - defaults
+dependencies:
+  - numpy
+  - pytest
+  - pytest-cov
+  - codecov
+  - pip
+  - libpng
+  - jpeg
+  - ca-certificates
+  - pip:
+    - future
+    - pillow>=4.1.1
+    - scipy==1.4.1
+    - av
diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh
new file mode 100644
index 00000000000..b0f585a5483
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/install.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+# For unittest, nightly PyTorch is used as the following section,
+# so no need to set PYTORCH_VERSION.
+# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config.
+
+set -e
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+
+eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    cudatoolkit="cpuonly"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+    cudatoolkit="cudatoolkit=${version}"
+fi
+printf "Installing PyTorch with %s\n" "${cudatoolkit}"
+conda install -y -c pytorch-nightly pytorch "${cudatoolkit}"
+
+printf "* Installing torchvision\n"
+"$this_dir/vc_env_helper.bat" python setup.py develop
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/install_conda.bat b/.circleci/unittest/windows/scripts/install_conda.bat
new file mode 100644
index 00000000000..6612fba56f6
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/install_conda.bat
@@ -0,0 +1 @@
+start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda%
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/post_process.sh b/.circleci/unittest/windows/scripts/post_process.sh
new file mode 100644
index 00000000000..b132113194b
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/post_process.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
+conda activate ./env
+
+codecov
diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh
new file mode 100644
index 00000000000..34de9339429
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/run_test.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')"
+conda activate ./env
+
+python -m torch.utils.collect_env
+pytest --cov=torchvision --junitxml=test-results/junit.xml -v --durations 20 test
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/setup_env.sh b/.circleci/unittest/windows/scripts/setup_env.sh
new file mode 100644
index 00000000000..9b8d26a3e94
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/setup_env.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# This script is for setting up environment in which unit test is ran.
+# To speed up the CI time, the resulting environment is cached.
+#
+# Do not install PyTorch and torchvision here, otherwise they also get cached.
+
+set -e
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+root_dir="$(git rev-parse --show-toplevel)"
+conda_dir="${root_dir}/conda"
+env_dir="${root_dir}/env"
+
+cd "${root_dir}"
+
+# 1. Install conda at ./conda
+if [ ! -d "${conda_dir}" ]; then
+    printf "* Installing conda\n"
+    export tmp_conda="$(echo $conda_dir | tr '/' '\\')"
+    export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe"
+    curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
+    "$this_dir/install_conda.bat"
+    unset tmp_conda
+    unset miniconda_exe
+fi
+
+eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')"
+
+# 2. Create test environment at ./env
+if [ ! -d "${env_dir}" ]; then
+    printf "* Creating a test environment\n"
+    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
+fi
+conda activate "${env_dir}"
+
+# 3. Install Conda dependencies
+printf "* Installing dependencies (except PyTorch)\n"
+conda env update --file "${this_dir}/environment.yml" --prune
\ No newline at end of file
diff --git a/.circleci/unittest/windows/scripts/vc_env_helper.bat b/.circleci/unittest/windows/scripts/vc_env_helper.bat
new file mode 100644
index 00000000000..9410135677a
--- /dev/null
+++ b/.circleci/unittest/windows/scripts/vc_env_helper.bat
@@ -0,0 +1,39 @@
+@echo on
+
+set VC_VERSION_LOWER=16
+set VC_VERSION_UPPER=17
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
+    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
+        set "VS15INSTALLDIR=%%i"
+        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
+        goto vswhere
+    )
+)
+
+:vswhere
+if "%VSDEVCMD_ARGS%" == "" (
+    call "%VS15VCVARSALL%" x64 || exit /b 1
+) else (
+    call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1
+)
+
+@echo on
+
+set DISTUTILS_USE_SDK=1
+
+set args=%1
+shift
+:start
+if [%1] == [] goto done
+set args=%args% %1
+shift
+goto start
+
+:done
+if "%args%" == "" (
+    echo Usage: vc_env_helper.bat [command] [args]
+    echo e.g. vc_env_helper.bat cl /c test.cpp
+)
+
+%args% || exit /b 1
diff --git a/.github/pytorch-probot.yml b/.github/pytorch-probot.yml
new file mode 100644
index 00000000000..27d0f2a1f0b
--- /dev/null
+++ b/.github/pytorch-probot.yml
@@ -0,0 +1 @@
+tracking_issue: 2447
diff --git a/.gitignore b/.gitignore
index 6bea8609b93..6d649a7c019 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,3 +21,5 @@ htmlcov
 *.swo
 gen.yml
 .mypy_cache
+.vscode/
+*.orig
diff --git a/.travis.yml b/.travis.yml
index 53f66794c48..f5656f926f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,11 @@
 language: python
 
-dist: xenial
-matrix:
+os:
+  - linux
+
+dist: bionic
+
+jobs:
   include:
     - python: "3.6"
       env: IMAGE_BACKEND=Pillow-SIMD
@@ -9,6 +13,7 @@ matrix:
 
 before_install:
   - sudo apt-get update
+  - sudo apt-get install -y libpng-dev libjpeg-turbo8-dev
   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
   - bash miniconda.sh -b -p $HOME/miniconda
   - export PATH="$HOME/miniconda/bin:$PATH"
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fa50f155ce4..5d2e86291f6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,22 +11,30 @@ if(WITH_CUDA)
 endif()
 
 find_package(Python3 COMPONENTS Development)
+
 find_package(Torch REQUIRED)
+find_package(PNG REQUIRED)
+find_package(JPEG REQUIRED)
+
 
 file(GLOB HEADERS torchvision/csrc/*.h)
-file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp torchvision/csrc/*.cpp)
+# Image extension
+file(GLOB IMAGE_HEADERS torchvision/csrc/cpu/image/*.h)
+file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.cpp)
+file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp)
 if(WITH_CUDA)
   file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} torchvision/csrc/cuda/*.h torchvision/csrc/cuda/*.cu)
 endif()
 file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h)
 file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp)
 
-add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES})
-target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} Python3::Python)
+add_library(${PROJECT_NAME} SHARED ${MODELS_SOURCES} ${OPERATOR_SOURCES} ${IMAGE_SOURCES})
+target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} ${PNG_LIBRARY} ${JPEG_LIBRARIES} Python3::Python)
+# target_link_libraries(${PROJECT_NAME} PRIVATE ${PNG_LIBRARY} Python3::Python)
 set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchVision)
 
 target_include_directories(${PROJECT_NAME} INTERFACE
-  $<BUILD_INTERFACE:${HEADERS}>
+  $<BUILD_INTERFACE:${HEADERS}:${PNG_INCLUDE_DIR}:${JPEG_INCLUDE_DIRS}>
   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 
 include(GNUInstallDirs)
@@ -61,7 +69,7 @@ install(FILES
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu)
 if(WITH_CUDA)
   install(FILES
-    torchvision/csrc/cuda/vision_cuda.h 
+    torchvision/csrc/cuda/vision_cuda.h
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
 endif()
 install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/models)
diff --git a/README.rst b/README.rst
index 3150a3023ad..1d55e9387c3 100644
--- a/README.rst
+++ b/README.rst
@@ -78,13 +78,23 @@ Torchvision currently supports the following image backends:
 
 * `accimage`_ - if installed can be activated by calling :code:`torchvision.set_image_backend('accimage')`
 
+* `libpng`_ - can be installed via conda :code:`conda install libpng` or any of the package managers for debian-based and RHEL-based Linux distributions.
+
+* `libjpeg`_ - can be installed via conda :code:`conda install jpeg` or any of the package managers for debian-based and RHEL-based Linux distributions. `libjpeg-turbo`_ can be used as well.
+
+**Notes:** ``libpng`` and ``libjpeg`` must be available at compilation time in order to be available. Make sure that it is available on the standard library locations,
+otherwise, add the include and library paths in the environment variables ``TORCHVISION_INCLUDE`` and ``TORCHVISION_LIBRARY``, respectively.
+
+.. _libpng : http://www.libpng.org/pub/png/libpng.html
 .. _Pillow : https://python-pillow.org/
 .. _Pillow-SIMD : https://github.com/uploadcare/pillow-simd
 .. _accimage: https://github.com/pytorch/accimage
+.. _libjpeg: http://ijg.org/
+.. _libjpeg-turbo: https://libjpeg-turbo.org/
 
 C++ API
 =======
-TorchVision also offers a C++ API that contains C++ equivalent of python models. 
+TorchVision also offers a C++ API that contains C++ equivalent of python models.
 
 Installation From source:
 
@@ -94,7 +104,7 @@ Installation From source:
     cd build
     # Add -DWITH_CUDA=on support for the CUDA if needed
     cmake ..
-    make 
+    make
     make install
 
 Once installed, the library can be accessed in cmake (after properly configuring ``CMAKE_PREFIX_PATH``) via the :code:`TorchVision::TorchVision` target:
diff --git a/docs/source/ops.rst b/docs/source/ops.rst
index ec87d02556e..8c619334582 100644
--- a/docs/source/ops.rst
+++ b/docs/source/ops.rst
@@ -6,12 +6,20 @@ torchvision.ops
 :mod:`torchvision.ops` implements operators that are specific for Computer Vision.
 
 .. note::
-  Those operators currently do not support TorchScript.
+  All operators have native support for TorchScript.
 
 
 .. autofunction:: nms
 .. autofunction:: roi_align
+.. autofunction:: ps_roi_align
 .. autofunction:: roi_pool
+.. autofunction:: ps_roi_pool
+.. autofunction:: deform_conv2d
 
 .. autoclass:: RoIAlign
+.. autoclass:: PSRoIAlign
 .. autoclass:: RoIPool
+.. autoclass:: PSRoIPool
+.. autoclass:: DeformConv2d
+.. autoclass:: MultiScaleRoIAlign
+.. autoclass:: FeaturePyramidNetwork
diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh
index ba2f25d2ff1..c4f5cc860a2 100755
--- a/packaging/build_conda.sh
+++ b/packaging/build_conda.sh
@@ -5,7 +5,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 . "$script_dir/pkg_helpers.bash"
 
 export BUILD_TYPE=conda
-setup_env 0.7.0
+setup_env 0.8.0
 export SOURCE_ROOT_DIR="$PWD"
 setup_conda_pytorch_constraint
 setup_conda_cudatoolkit_constraint
diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh
index 3b0e0f46cec..043d2ed7ea9 100755
--- a/packaging/build_wheel.sh
+++ b/packaging/build_wheel.sh
@@ -5,11 +5,33 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 . "$script_dir/pkg_helpers.bash"
 
 export BUILD_TYPE=wheel
-setup_env 0.7.0
+setup_env 0.8.0
 setup_wheel_python
-pip_install numpy pyyaml future "ninja==1.9.0.post1"
+pip_install numpy pyyaml future ninja
 setup_pip_pytorch_version
 python setup.py clean
+
+# Copy binaries to be included in the wheel distribution
+if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then
+    python_exec="$(which python)"
+    bin_path=$(dirname $python_exec)
+    env_path=$(dirname $bin_path)
+    if [[ "$(uname)" == Darwin ]]; then
+        # Include LibPNG
+        cp "$env_path/lib/libpng16.dylib" torchvision
+        # Include LibJPEG
+        cp "$env_path/lib/libjpeg.dylib" torchvision
+    else
+        cp "$bin_path/Library/bin/libpng16.dll" torchvision
+        cp "$bin_path/Library/bin/libjpeg.dll" torchvision
+    fi
+else
+    # Include LibPNG
+    cp "/usr/lib64/libpng.so" torchvision
+    # Include LibJPEG
+    cp "/usr/lib64/libjpeg.so" torchvision
+fi
+
 if [[ "$OSTYPE" == "msys" ]]; then
     IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel
 else
diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash
index d8bdfc61e5d..ff7abdab7fb 100644
--- a/packaging/pkg_helpers.bash
+++ b/packaging/pkg_helpers.bash
@@ -170,7 +170,11 @@ setup_wheel_python() {
     conda env remove -n "env$PYTHON_VERSION" || true
     conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION"
     conda activate "env$PYTHON_VERSION"
+    # Install libpng from Anaconda (defaults)
+    conda install libpng jpeg -y
   else
+    # Install native CentOS libPNG
+    yum install -y libpng-devel libjpeg-turbo-devel
     case "$PYTHON_VERSION" in
       2.7)
         if [[ -n "$UNICODE_ABI" ]]; then
@@ -214,7 +218,7 @@ setup_pip_pytorch_version() {
   else
     pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \
       -f https://download.pytorch.org/whl/torch_stable.html \
-      -f https://download.pytorch.org/whl/test/torch_test.html
+      -f https://download.pytorch.org/whl/test/torch_test.html \
       -f https://download.pytorch.org/whl/nightly/torch_nightly.html
   fi
 }
diff --git a/packaging/torchvision/meta.yaml b/packaging/torchvision/meta.yaml
index 8be8eabdd09..1b61464f01e 100644
--- a/packaging/torchvision/meta.yaml
+++ b/packaging/torchvision/meta.yaml
@@ -8,6 +8,8 @@ source:
 requirements:
   build:
     - {{ compiler('c') }} # [win]
+    - libpng
+    - jpeg
 
   host:
     - python
@@ -18,6 +20,8 @@ requirements:
 
   run:
     - python
+    - libpng
+    - jpeg
     - pillow >=4.1.1
     - numpy >=1.11
     {{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
@@ -47,8 +51,6 @@ test:
     - av
     - ca-certificates
     {{ environ.get('CONDA_TYPING_CONSTRAINT') }}
-  commands:
-    pytest . --verbose --junitxml={{ environ.get("CONDA_PYTORCH_BUILD_RESULTS_DIRECTORY", "build/test_results.xml" )}}
 
 
 about:
diff --git a/packaging/windows/internal/build_conda.bat b/packaging/windows/internal/build_conda.bat
index e66d5596298..18f0bf13467 100644
--- a/packaging/windows/internal/build_conda.bat
+++ b/packaging/windows/internal/build_conda.bat
@@ -1,4 +1,4 @@
-if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.11
+if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13
 if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1
 if errorlevel 1 exit /b 1
 
diff --git a/packaging/windows/internal/build_wheels.bat b/packaging/windows/internal/build_wheels.bat
index 869e594b395..a321c3ce6e7 100644
--- a/packaging/windows/internal/build_wheels.bat
+++ b/packaging/windows/internal/build_wheels.bat
@@ -1,4 +1,4 @@
-if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.11
+if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13
 if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1
 if errorlevel 1 exit /b 1
 
diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat
index 49a79e2b60e..8bdac633adf 100644
--- a/packaging/windows/internal/nightly_defaults.bat
+++ b/packaging/windows/internal/nightly_defaults.bat
@@ -144,7 +144,7 @@ if "%CUDA_VERSION%" == "cpu" (
 ::       pytorch-nightly==1.0.0.dev20180908
 ::   or in manylinux like
 ::       torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl
-if "%TORCHVISION_BUILD_VERSION%" == "" set TORCHVISION_BUILD_VERSION=0.7.0.dev%NIGHTLIES_DATE_COMPACT%
+if "%TORCHVISION_BUILD_VERSION%" == "" set TORCHVISION_BUILD_VERSION=0.8.0.dev%NIGHTLIES_DATE_COMPACT%
 
 if "%~1" == "Wheels" (
     if not "%CUDA_VERSION%" == "102" (
diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh
index 9910677acac..cdae18065b9 100644
--- a/packaging/windows/internal/vc_install_helper.sh
+++ b/packaging/windows/internal/vc_install_helper.sh
@@ -4,7 +4,7 @@ set -ex
 
 if [[ "$CU_VERSION" == "cu92" ]]; then
   export VC_YEAR=2017
-  export VSDEVCMD_ARGS="-vcvars_ver=14.11"
+  export VSDEVCMD_ARGS="-vcvars_ver=14.13"
   powershell packaging/windows/internal/vs2017_install.ps1
 elif [[ "$CU_VERSION" == "cu100" ]]; then
   export VC_YEAR=2017
diff --git a/packaging/windows/internal/vs2017_install.ps1 b/packaging/windows/internal/vs2017_install.ps1
index 6bbb1deb310..3e953de1ab7 100644
--- a/packaging/windows/internal/vs2017_install.ps1
+++ b/packaging/windows/internal/vs2017_install.ps1
@@ -1,6 +1,6 @@
 $VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
 $VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.14.11",
+                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
                                                      "--add Microsoft.Component.MSBuild",
                                                      "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
                                                      "--add Microsoft.VisualStudio.Component.TextTemplating",
diff --git a/references/segmentation/utils.py b/references/segmentation/utils.py
index d9251b72b9f..b67c18052fb 100644
--- a/references/segmentation/utils.py
+++ b/references/segmentation/utils.py
@@ -1,6 +1,5 @@
 from collections import defaultdict, deque
 import datetime
-import math
 import time
 import torch
 import torch.distributed as dist
diff --git a/setup.py b/setup.py
index 85a692120b3..13c7a98ec74 100644
--- a/setup.py
+++ b/setup.py
@@ -2,8 +2,9 @@
 import io
 import re
 import sys
+import csv
 from setuptools import setup, find_packages
-from pkg_resources import get_distribution, DistributionNotFound
+from pkg_resources import parse_version, get_distribution, DistributionNotFound
 import subprocess
 import distutils.command.clean
 import distutils.spawn
@@ -30,7 +31,7 @@ def get_dist(pkgname):
         return None
 
 
-version = '0.7.0a0'
+version = '0.8.0a0'
 sha = 'Unknown'
 package_name = 'torchvision'
 
@@ -76,6 +77,66 @@ def write_version_file():
 requirements.append(pillow_req + pillow_ver)
 
 
+def find_library(name, vision_include):
+    this_dir = os.path.dirname(os.path.abspath(__file__))
+    build_prefix = os.environ.get('BUILD_PREFIX', None)
+    is_conda_build = build_prefix is not None
+
+    library_found = False
+    conda_installed = False
+    lib_folder = None
+    include_folder = None
+    library_header = '{0}.h'.format(name)
+
+    # Lookup in TORCHVISION_INCLUDE or in the package file
+    package_path = [os.path.join(this_dir, 'torchvision')]
+    for folder in vision_include + package_path:
+        candidate_path = os.path.join(folder, library_header)
+        library_found = os.path.exists(candidate_path)
+        if library_found:
+            break
+
+    if not library_found:
+        print('Running build on conda-build: {0}'.format(is_conda_build))
+        if is_conda_build:
+            # Add conda headers/libraries
+            if os.name == 'nt':
+                build_prefix = os.path.join(build_prefix, 'Library')
+            include_folder = os.path.join(build_prefix, 'include')
+            lib_folder = os.path.join(build_prefix, 'lib')
+            library_header_path = os.path.join(
+                include_folder, library_header)
+            library_found = os.path.isfile(library_header_path)
+            conda_installed = library_found
+        else:
+            # Check if using Anaconda to produce wheels
+            conda = distutils.spawn.find_executable('conda')
+            is_conda = conda is not None
+            print('Running build on conda: {0}'.format(is_conda))
+            if is_conda:
+                python_executable = sys.executable
+                py_folder = os.path.dirname(python_executable)
+                if os.name == 'nt':
+                    env_path = os.path.join(py_folder, 'Library')
+                else:
+                    env_path = os.path.dirname(py_folder)
+                lib_folder = os.path.join(env_path, 'lib')
+                include_folder = os.path.join(env_path, 'include')
+                library_header_path = os.path.join(
+                    include_folder, library_header)
+                library_found = os.path.isfile(library_header_path)
+                conda_installed = library_found
+
+        if not library_found:
+            if sys.platform == 'linux':
+                library_found = os.path.exists('/usr/include/{0}'.format(
+                    library_header))
+                library_found = library_found or os.path.exists(
+                    '/usr/local/include/{0}'.format(library_header))
+
+    return library_found, conda_installed, include_folder, lib_folder
+
+
 def get_extensions():
     this_dir = os.path.dirname(os.path.abspath(__file__))
     extensions_dir = os.path.join(this_dir, 'torchvision', 'csrc')
@@ -171,6 +232,91 @@ def get_extensions():
             )
         )
 
+    # ------------------- Torchvision extra extensions ------------------------
+    vision_include = os.environ.get('TORCHVISION_INCLUDE', None)
+    vision_library = os.environ.get('TORCHVISION_LIBRARY', None)
+    vision_include = (vision_include.split(os.pathsep)
+                      if vision_include is not None else [])
+    vision_library = (vision_library.split(os.pathsep)
+                      if vision_library is not None else [])
+    include_dirs += vision_include
+    library_dirs = vision_library
+
+    # Image reading extension
+    image_macros = []
+    image_include = [extensions_dir]
+    image_library = []
+    image_link_flags = []
+
+    # Locating libPNG
+    libpng = distutils.spawn.find_executable('libpng-config')
+    pngfix = distutils.spawn.find_executable('pngfix')
+    png_found = libpng is not None or pngfix is not None
+    image_macros += [('PNG_FOUND', str(int(png_found)))]
+    print('PNG found: {0}'.format(png_found))
+    if png_found:
+        if libpng is not None:
+            # Linux / Mac
+            png_version = subprocess.run([libpng, '--version'],
+                                         stdout=subprocess.PIPE)
+            png_version = png_version.stdout.strip().decode('utf-8')
+            print('libpng version: {0}'.format(png_version))
+            png_version = parse_version(png_version)
+            if png_version >= parse_version("1.6.0"):
+                print('Building torchvision with PNG image support')
+                png_lib = subprocess.run([libpng, '--libdir'],
+                                         stdout=subprocess.PIPE)
+                png_lib = png_lib.stdout.strip().decode('utf-8')
+                if 'disabled' not in png_lib:
+                    image_library += [png_lib]
+                png_include = subprocess.run([libpng, '--I_opts'],
+                                             stdout=subprocess.PIPE)
+                png_include = png_include.stdout.strip().decode('utf-8')
+                _, png_include = png_include.split('-I')
+                print('libpng include path: {0}'.format(png_include))
+                image_include += [png_include]
+                image_link_flags.append('png')
+            else:
+                print('libpng installed version is less than 1.6.0, '
+                      'disabling PNG support')
+                png_found = False
+        else:
+            # Windows
+            png_lib = os.path.join(
+                os.path.dirname(os.path.dirname(pngfix)), 'lib')
+            png_include = os.path.join(os.path.dirname(
+                os.path.dirname(pngfix)), 'include', 'libpng16')
+            image_library += [png_lib]
+            image_include += [png_include]
+            image_link_flags.append('libpng')
+
+    # Locating libjpeg
+    (jpeg_found, jpeg_conda,
+     jpeg_include, jpeg_lib) = find_library('jpeglib', vision_include)
+
+    print('JPEG found: {0}'.format(jpeg_found))
+    image_macros += [('JPEG_FOUND', str(int(jpeg_found)))]
+    if jpeg_found:
+        print('Building torchvision with JPEG image support')
+        image_link_flags.append('jpeg')
+        if jpeg_conda:
+            image_library += [jpeg_lib]
+            image_include += [jpeg_include]
+
+    image_path = os.path.join(extensions_dir, 'cpu', 'image')
+    image_src = glob.glob(os.path.join(image_path, '*.cpp'))
+
+    if png_found or jpeg_found:
+        ext_modules.append(extension(
+            'torchvision.image',
+            image_src,
+            include_dirs=image_include + include_dirs + [image_path],
+            library_dirs=image_library + library_dirs,
+            define_macros=image_macros,
+            libraries=image_link_flags,
+            extra_compile_args=extra_compile_args
+        ))
+
     ffmpeg_exe = distutils.spawn.find_executable('ffmpeg')
     has_ffmpeg = ffmpeg_exe is not None
 
@@ -243,7 +389,9 @@ def run(self):
 
     # Package info
     packages=find_packages(exclude=('test',)),
-
+    package_data={
+        package_name: ['*.dll', '*.dylib', '*.so']
+    },
     zip_safe=False,
     install_requires=requirements,
     extras_require={
diff --git a/test/assets/damaged_jpeg/TensorFlow-LICENSE b/test/assets/damaged_jpeg/TensorFlow-LICENSE
new file mode 100644
index 00000000000..c7563fe4e5b
--- /dev/null
+++ b/test/assets/damaged_jpeg/TensorFlow-LICENSE
@@ -0,0 +1,13 @@
+   Copyright 2019 The TensorFlow Authors.  All rights reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/test/assets/damaged_jpeg/bad_huffman.jpg b/test/assets/damaged_jpeg/bad_huffman.jpg
new file mode 100644
index 00000000000..ef5b6f12c55
Binary files /dev/null and b/test/assets/damaged_jpeg/bad_huffman.jpg differ
diff --git a/test/assets/damaged_jpeg/corrupt.jpg b/test/assets/damaged_jpeg/corrupt.jpg
new file mode 100644
index 00000000000..5e2fe6c56f5
Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt.jpg differ
diff --git a/test/assets/damaged_jpeg/corrupt34_2.jpg b/test/assets/damaged_jpeg/corrupt34_2.jpg
new file mode 100644
index 00000000000..4211155c455
Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_2.jpg differ
diff --git a/test/assets/damaged_jpeg/corrupt34_3.jpg b/test/assets/damaged_jpeg/corrupt34_3.jpg
new file mode 100644
index 00000000000..c1c2a9d1e1e
Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_3.jpg differ
diff --git a/test/assets/damaged_jpeg/corrupt34_4.jpg b/test/assets/damaged_jpeg/corrupt34_4.jpg
new file mode 100644
index 00000000000..b8e7308ba00
Binary files /dev/null and b/test/assets/damaged_jpeg/corrupt34_4.jpg differ
diff --git a/test/assets/grace_hopper_517x606.pth b/test/assets/grace_hopper_517x606.pth
new file mode 100644
index 00000000000..54b39dc0cd7
Binary files /dev/null and b/test/assets/grace_hopper_517x606.pth differ
diff --git a/test/common_utils.py b/test/common_utils.py
index 9dbd04f4217..d3b6e97a6dc 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -85,7 +85,7 @@ def is_iterable(obj):
 class TestCase(unittest.TestCase):
     precision = 1e-5
 
-    def assertExpected(self, output, subname=None, prec=None):
+    def assertExpected(self, output, subname=None, prec=None, strip_suffix=None):
         r"""
         Test that a python value matches the recorded contents of a file
         derived from the name of this test and subname.  The value must be
@@ -96,16 +96,24 @@ def assertExpected(self, output, subname=None, prec=None):
 
         If you call this multiple times in a single function, you must
         give a unique subname each time.
+
+        strip_suffix allows different tests that expect similar numerics, e.g.
+        "test_xyz_cuda" and "test_xyz_cpu", to use the same pickled data.
+        test_xyz_cuda would pass strip_suffix="_cuda", test_xyz_cpu would pass
+        strip_suffix="_cpu", and they would both use a data file name based on
+        "test_xyz".
         """
-        def remove_prefix(text, prefix):
+        def remove_prefix_suffix(text, prefix, suffix):
             if text.startswith(prefix):
-                return text[len(prefix):]
+                text = text[len(prefix):]
+            if suffix is not None and text.endswith(suffix):
+                text = text[:len(text) - len(suffix)]
             return text
         # NB: we take __file__ from the module that defined the test
         # class, so we place the expect directory where the test script
         # lives, NOT where test/common_utils.py lives.
         module_id = self.__class__.__module__
-        munged_id = remove_prefix(self.id(), module_id + ".")
+        munged_id = remove_prefix_suffix(self.id(), module_id + ".", strip_suffix)
         test_file = os.path.realpath(sys.modules[module_id].__file__)
         expected_file = os.path.join(os.path.dirname(test_file),
                                      "expect",
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
index 1a8c77c827f..2e3477ad12b 100644
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -1,17 +1,41 @@
+import unittest
+import random
+import colorsys
+import math
+
+from PIL import Image
+from PIL.Image import NEAREST, BILINEAR, BICUBIC
+
+import numpy as np
+
 import torch
-from torch import Tensor
 import torchvision.transforms as transforms
 import torchvision.transforms.functional_tensor as F_t
+import torchvision.transforms.functional_pil as F_pil
 import torchvision.transforms.functional as F
-import numpy as np
-import unittest
-import random
-import colorsys
-from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple
 
 
 class Tester(unittest.TestCase):
 
+    def _create_data(self, height=3, width=3, channels=3):
+        tensor = torch.randint(0, 255, (channels, height, width), dtype=torch.uint8)
+        pil_img = Image.fromarray(tensor.permute(1, 2, 0).contiguous().numpy())
+        return tensor, pil_img
+
+    def compareTensorToPIL(self, tensor, pil_image, msg=None):
+        pil_tensor = torch.as_tensor(np.array(pil_image).transpose((2, 0, 1)))
+        if msg is None:
+            msg = "tensor:\n{} \ndid not equal PIL tensor:\n{}".format(tensor, pil_tensor)
+        self.assertTrue(tensor.equal(pil_tensor), msg)
+
+    def approxEqualTensorToPIL(self, tensor, pil_image, tol=1e-5, msg=None):
+        pil_tensor = torch.as_tensor(np.array(pil_image).transpose((2, 0, 1))).to(tensor)
+        mae = torch.abs(tensor - pil_tensor).mean().item()
+        self.assertTrue(
+            mae < tol,
+            msg="{}: mae={}, tol={}: \n{}\nvs\n{}".format(msg, mae, tol, tensor[0, :10, :10], pil_tensor[0, :10, :10])
+        )
+
     def test_vflip(self):
         script_vflip = torch.jit.script(F_t.vflip)
         img_tensor = torch.randn(3, 16, 16)
@@ -93,7 +117,13 @@ def test_rgb2hsv(self):
 
             colorsys_img = torch.tensor(hsv, dtype=torch.float32)
 
-            max_diff = (colorsys_img - ft_hsv_img).abs().max()
+            ft_hsv_img_h, ft_hsv_img_sv = torch.split(ft_hsv_img, [1, 2], dim=1)
+            colorsys_img_h, colorsys_img_sv = torch.split(colorsys_img, [1, 2], dim=1)
+
+            max_diff_h = ((colorsys_img_h * 2 * math.pi).sin() - (ft_hsv_img_h * 2 * math.pi).sin()).abs().max()
+            max_diff_sv = (colorsys_img_sv - ft_hsv_img_sv).abs().max()
+            max_diff = max(max_diff_h, max_diff_sv)
+
             self.assertLess(max_diff, 1e-5)
 
     def test_adjustments(self):
@@ -234,6 +264,215 @@ def test_ten_crop(self):
         for cropped_script_img, cropped_tensor_img in zip(cropped_script, cropped_tensor):
             self.assertTrue(torch.equal(cropped_script_img, cropped_tensor_img))
 
+    def test_pad(self):
+        script_fn = torch.jit.script(F_t.pad)
+        tensor, pil_img = self._create_data(7, 8)
+
+        for dt in [None, torch.float32, torch.float64]:
+            if dt is not None:
+                # This is a trivial cast to float of uint8 data to test all cases
+                tensor = tensor.to(dt)
+            for pad in [2, [3, ], [0, 3], (3, 3), [4, 2, 4, 3]]:
+                configs = [
+                    {"padding_mode": "constant", "fill": 0},
+                    {"padding_mode": "constant", "fill": 10},
+                    {"padding_mode": "constant", "fill": 20},
+                    {"padding_mode": "edge"},
+                    {"padding_mode": "reflect"},
+                    {"padding_mode": "symmetric"},
+                ]
+                for kwargs in configs:
+                    pad_tensor = F_t.pad(tensor, pad, **kwargs)
+                    pad_pil_img = F_pil.pad(pil_img, pad, **kwargs)
+
+                    pad_tensor_8b = pad_tensor
+                    # we need to cast to uint8 to compare with PIL image
+                    if pad_tensor_8b.dtype != torch.uint8:
+                        pad_tensor_8b = pad_tensor_8b.to(torch.uint8)
+
+                    self.compareTensorToPIL(pad_tensor_8b, pad_pil_img, msg="{}, {}".format(pad, kwargs))
+
+                    if isinstance(pad, int):
+                        script_pad = [pad, ]
+                    else:
+                        script_pad = pad
+                    pad_tensor_script = script_fn(tensor, script_pad, **kwargs)
+                    self.assertTrue(pad_tensor.equal(pad_tensor_script), msg="{}, {}".format(pad, kwargs))
+
+        with self.assertRaises(ValueError, msg="Padding can not be negative for symmetric padding_mode"):
+            F_t.pad(tensor, (-2, -3), padding_mode="symmetric")
+
+    def test_adjust_gamma(self):
+        script_fn = torch.jit.script(F_t.adjust_gamma)
+        tensor, pil_img = self._create_data(26, 36)
+
+        for dt in [torch.float64, torch.float32, None]:
+
+            if dt is not None:
+                tensor = F.convert_image_dtype(tensor, dt)
+
+            gammas = [0.8, 1.0, 1.2]
+            gains = [0.7, 1.0, 1.3]
+            for gamma, gain in zip(gammas, gains):
+
+                adjusted_tensor = F_t.adjust_gamma(tensor, gamma, gain)
+                adjusted_pil = F_pil.adjust_gamma(pil_img, gamma, gain)
+                scripted_result = script_fn(tensor, gamma, gain)
+                self.assertEqual(adjusted_tensor.dtype, scripted_result.dtype)
+                self.assertEqual(adjusted_tensor.size()[1:], adjusted_pil.size[::-1])
+
+                rbg_tensor = adjusted_tensor
+                if adjusted_tensor.dtype != torch.uint8:
+                    rbg_tensor = F.convert_image_dtype(adjusted_tensor, torch.uint8)
+
+                self.compareTensorToPIL(rbg_tensor, adjusted_pil)
+
+                self.assertTrue(adjusted_tensor.equal(scripted_result))
+
+    def test_resize(self):
+        script_fn = torch.jit.script(F_t.resize)
+        tensor, pil_img = self._create_data(26, 36)
+
+        for dt in [None, torch.float32, torch.float64]:
+            if dt is not None:
+                # This is a trivial cast to float of uint8 data to test all cases
+                tensor = tensor.to(dt)
+            for size in [32, [32, ], [32, 32], (32, 32), ]:
+                for interpolation in [BILINEAR, BICUBIC, NEAREST]:
+                    resized_tensor = F_t.resize(tensor, size=size, interpolation=interpolation)
+                    resized_pil_img = F_pil.resize(pil_img, size=size, interpolation=interpolation)
+
+                    self.assertEqual(
+                        resized_tensor.size()[1:], resized_pil_img.size[::-1], msg="{}, {}".format(size, interpolation)
+                    )
+
+                    if interpolation != NEAREST:
+                        # We can not check values if mode = NEAREST, as results are different
+                        # E.g. resized_tensor  = [[a, a, b, c, d, d, e, ...]]
+                        # E.g. resized_pil_img = [[a, b, c, c, d, e, f, ...]]
+                        resized_tensor_f = resized_tensor
+                        # we need to cast to uint8 to compare with PIL image
+                        if resized_tensor_f.dtype == torch.uint8:
+                            resized_tensor_f = resized_tensor_f.to(torch.float)
+
+                        # Pay attention to high tolerance for MAE
+                        self.approxEqualTensorToPIL(
+                            resized_tensor_f, resized_pil_img, tol=8.0, msg="{}, {}".format(size, interpolation)
+                        )
+
+                    if isinstance(size, int):
+                        script_size = [size, ]
+                    else:
+                        script_size = size
+                    resize_result = script_fn(tensor, size=script_size, interpolation=interpolation)
+                    self.assertTrue(resized_tensor.equal(resize_result), msg="{}, {}".format(size, interpolation))
+
+    def test_resized_crop(self):
+        # test values of F.resized_crop in several cases:
+        # 1) resize to the same size, crop to the same size => should be identity
+        tensor, _ = self._create_data(26, 36)
+        for i in [0, 2, 3]:
+            out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=i)
+            self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5]))
+
+        # 2) resize by half and crop a TL corner
+        tensor, _ = self._create_data(26, 36)
+        out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=0)
+        expected_out_tensor = tensor[:, :20:2, :30:2]
+        self.assertTrue(
+            expected_out_tensor.equal(out_tensor),
+            msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10])
+        )
+
+    def test_affine(self):
+        # Tests on square image
+        tensor, pil_img = self._create_data(26, 26)
+
+        scripted_affine = torch.jit.script(F.affine)
+        # 1) identity map
+        out_tensor = F.affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+        self.assertTrue(
+            tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
+        )
+        out_tensor = scripted_affine(tensor, angle=0, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+        self.assertTrue(
+            tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])
+        )
+
+        # 2) Test rotation
+        test_configs = [
+            (90, torch.rot90(tensor, k=1, dims=(-1, -2))),
+            (45, None),
+            (30, None),
+            (-30, None),
+            (-45, None),
+            (-90, torch.rot90(tensor, k=-1, dims=(-1, -2))),
+            (180, torch.rot90(tensor, k=2, dims=(-1, -2))),
+        ]
+        for a, true_tensor in test_configs:
+            for fn in [F.affine, scripted_affine]:
+                out_tensor = fn(tensor, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                if true_tensor is not None:
+                    self.assertTrue(
+                        true_tensor.equal(out_tensor),
+                        msg="{}\n{} vs \n{}".format(a, out_tensor[0, :5, :5], true_tensor[0, :5, :5])
+                    )
+                else:
+                    true_tensor = out_tensor
+
+                out_pil_img = F.affine(pil_img, angle=a, translate=[0, 0], scale=1.0, shear=[0.0, 0.0], resample=0)
+                out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+
+                num_diff_pixels = (true_tensor != out_pil_tensor).sum().item() / 3.0
+                ratio_diff_pixels = num_diff_pixels / true_tensor.shape[-1] / true_tensor.shape[-2]
+                # Tolerance : less than 6% of different pixels
+                self.assertLess(
+                    ratio_diff_pixels,
+                    0.06,
+                    msg="{}\n{} vs \n{}".format(
+                        ratio_diff_pixels, true_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                    )
+                )
+        # 3) Test translation
+        test_configs = [
+            [10, 12], (12, 13)
+        ]
+        for t in test_configs:
+            for fn in [F.affine, scripted_affine]:
+                out_tensor = fn(tensor, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
+                out_pil_img = F.affine(pil_img, angle=0, translate=t, scale=1.0, shear=[0.0, 0.0], resample=0)
+                self.compareTensorToPIL(out_tensor, out_pil_img)
+
+        # 3) Test rotation + translation + scale + share
+        test_configs = [
+            (45, [5, 6], 1.0, [0.0, 0.0]),
+            (33, (5, -4), 1.0, [0.0, 0.0]),
+            (45, [5, 4], 1.2, [0.0, 0.0]),
+            (33, (4, 8), 2.0, [0.0, 0.0]),
+            (85, (10, -10), 0.7, [0.0, 0.0]),
+            (0, [0, 0], 1.0, [35.0, ]),
+            (25, [0, 0], 1.2, [0.0, 15.0]),
+            (45, [10, 0], 0.7, [2.0, 5.0]),
+            (45, [10, -10], 1.2, [4.0, 5.0]),
+        ]
+        for r in [0, ]:
+            for a, t, s, sh in test_configs:
+                for fn in [F.affine, scripted_affine]:
+                    out_tensor = fn(tensor, angle=a, translate=t, scale=s, shear=sh, resample=r)
+                    out_pil_img = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh, resample=r)
+                    out_pil_tensor = torch.from_numpy(np.array(out_pil_img).transpose((2, 0, 1)))
+
+                    num_diff_pixels = (out_tensor != out_pil_tensor).sum().item() / 3.0
+                    ratio_diff_pixels = num_diff_pixels / out_tensor.shape[-1] / out_tensor.shape[-2]
+                    # Tolerance : less than 5% of different pixels
+                    self.assertLess(
+                        ratio_diff_pixels,
+                        0.05,
+                        msg="{}: {}\n{} vs \n{}".format(
+                            (r, a, t, s, sh), ratio_diff_pixels, out_tensor[0, :7, :7], out_pil_tensor[0, :7, :7]
+                        )
+                    )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_hub.py b/test/test_hub.py
index 4ae9e51021b..29ae90014d1 100644
--- a/test/test_hub.py
+++ b/test/test_hub.py
@@ -13,7 +13,7 @@ def sum_of_model_parameters(model):
     return s
 
 
-SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.99609375
+SUM_OF_PRETRAINED_RESNET18_PARAMS = -12703.9931640625
 
 
 @unittest.skipIf('torchvision' in sys.modules,
@@ -31,8 +31,9 @@ def test_load_from_github(self):
             'resnet18',
             pretrained=True,
             progress=False)
-        self.assertEqual(sum_of_model_parameters(hub_model).item(),
-                         SUM_OF_PRETRAINED_RESNET18_PARAMS)
+        self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(),
+                               SUM_OF_PRETRAINED_RESNET18_PARAMS,
+                               places=2)
 
     def test_set_dir(self):
         temp_dir = tempfile.gettempdir()
@@ -42,8 +43,9 @@ def test_set_dir(self):
             'resnet18',
             pretrained=True,
             progress=False)
-        self.assertEqual(sum_of_model_parameters(hub_model).item(),
-                         SUM_OF_PRETRAINED_RESNET18_PARAMS)
+        self.assertAlmostEqual(sum_of_model_parameters(hub_model).item(),
+                               SUM_OF_PRETRAINED_RESNET18_PARAMS,
+                               places=2)
         self.assertTrue(os.path.exists(temp_dir + '/pytorch_vision_master'))
         shutil.rmtree(temp_dir + '/pytorch_vision_master')
 
diff --git a/test/test_image.py b/test/test_image.py
new file mode 100644
index 00000000000..0bf3daf5528
--- /dev/null
+++ b/test/test_image.py
@@ -0,0 +1,88 @@
+import os
+import glob
+import unittest
+import sys
+
+import torch
+import torchvision
+from PIL import Image
+from torchvision.io.image import read_png, decode_png, read_jpeg, decode_jpeg
+import numpy as np
+
+IMAGE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
+IMAGE_DIR = os.path.join(IMAGE_ROOT, "fakedata", "imagefolder")
+DAMAGED_JPEG = os.path.join(IMAGE_ROOT, 'damaged_jpeg')
+
+
+def get_images(directory, img_ext):
+    assert os.path.isdir(directory)
+    for root, _, files in os.walk(directory):
+        if os.path.basename(root) == 'damaged_jpeg':
+            continue
+
+        for fl in files:
+            _, ext = os.path.splitext(fl)
+            if ext == img_ext:
+                yield os.path.join(root, fl)
+
+
+class ImageTester(unittest.TestCase):
+    def test_read_jpeg(self):
+        for img_path in get_images(IMAGE_ROOT, ".jpg"):
+            img_pil = torch.load(img_path.replace('jpg', 'pth'))
+            img_ljpeg = read_jpeg(img_path)
+            self.assertTrue(img_ljpeg.equal(img_pil))
+
+    def test_decode_jpeg(self):
+        for img_path in get_images(IMAGE_ROOT, ".jpg"):
+            img_pil = torch.load(img_path.replace('jpg', 'pth'))
+            size = os.path.getsize(img_path)
+            img_ljpeg = decode_jpeg(torch.from_file(img_path, dtype=torch.uint8, size=size))
+            self.assertTrue(img_ljpeg.equal(img_pil))
+
+        with self.assertRaisesRegex(ValueError, "Expected a non empty 1-dimensional tensor."):
+            decode_jpeg(torch.empty((100, 1), dtype=torch.uint8))
+
+        with self.assertRaisesRegex(ValueError, "Expected a torch.uint8 tensor."):
+            decode_jpeg(torch.empty((100, ), dtype=torch.float16))
+
+        with self.assertRaises(RuntimeError):
+            decode_jpeg(torch.empty((100), dtype=torch.uint8))
+
+    def test_damaged_images(self):
+        # Test image with bad Huffman encoding (should not raise)
+        bad_huff = os.path.join(DAMAGED_JPEG, 'bad_huffman.jpg')
+        try:
+            _ = read_jpeg(bad_huff)
+        except RuntimeError:
+            self.assertTrue(False)
+
+        # Truncated images should raise an exception
+        truncated_images = glob.glob(
+            os.path.join(DAMAGED_JPEG, 'corrupt*.jpg'))
+        for image_path in truncated_images:
+            with self.assertRaises(RuntimeError):
+                read_jpeg(image_path)
+
+    def test_read_png(self):
+        # Check across .png
+        for img_path in get_images(IMAGE_DIR, ".png"):
+            img_pil = torch.from_numpy(np.array(Image.open(img_path)))
+            img_lpng = read_png(img_path)
+            self.assertTrue(img_lpng.equal(img_pil))
+
+    def test_decode_png(self):
+        for img_path in get_images(IMAGE_DIR, ".png"):
+            img_pil = torch.from_numpy(np.array(Image.open(img_path)))
+            size = os.path.getsize(img_path)
+            img_lpng = decode_png(torch.from_file(img_path, dtype=torch.uint8, size=size))
+            self.assertTrue(img_lpng.equal(img_pil))
+
+            with self.assertRaises(ValueError):
+                decode_png(torch.empty((), dtype=torch.uint8))
+            with self.assertRaises(RuntimeError):
+                decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/test_models.py b/test/test_models.py
index 1cee7a90003..faa14f8250e 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -74,6 +74,26 @@ def get_available_video_models():
 }
 
 
+# The following models exhibit flaky numerics under autocast in _test_*_model harnesses.
+# This may be caused by the harness environment (e.g. num classes, input initialization
+# via torch.rand), and does not prove autocast is unsuitable when training with real data
+# (autocast has been used successfully with real data for some of these models).
+# TODO:  investigate why autocast numerics are flaky in the harnesses.
+#
+# For the following models, _test_*_model harnesses skip numerical checks on outputs when
+# trying autocast. However, they still try an autocasted forward pass, so they still ensure
+# autocast coverage suffices to prevent dtype errors in each model.
+autocast_flaky_numerics = (
+    "fasterrcnn_resnet50_fpn",
+    "inception_v3",
+    "keypointrcnn_resnet50_fpn",
+    "maskrcnn_resnet50_fpn",
+    "resnet101",
+    "resnet152",
+    "wide_resnet101_2",
+)
+
+
 class ModelTester(TestCase):
     def checkModule(self, model, name, args):
         if name not in script_test_models:
@@ -81,65 +101,87 @@ def checkModule(self, model, name, args):
         unwrapper = script_test_models[name].get('unwrapper', None)
         return super(ModelTester, self).checkModule(model, args, unwrapper=unwrapper, skip=False)
 
-    def _test_classification_model(self, name, input_shape):
+    def _test_classification_model(self, name, input_shape, dev):
         set_rng_seed(0)
         # passing num_class equal to a number other than 1000 helps in making the test
         # more enforcing in nature
         model = models.__dict__[name](num_classes=50)
-        model.eval()
-        x = torch.rand(input_shape)
+        model.eval().to(device=dev)
+        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
+        x = torch.rand(input_shape).to(device=dev)
         out = model(x)
-        self.assertExpected(out, prec=0.1)
+        self.assertExpected(out.cpu(), prec=0.1, strip_suffix="_" + dev)
         self.assertEqual(out.shape[-1], 50)
         self.checkModule(model, name, (x,))
 
-    def _test_segmentation_model(self, name):
+        if dev == "cuda":
+            with torch.cuda.amp.autocast():
+                out = model(x)
+                # See autocast_flaky_numerics comment at top of file.
+                if name not in autocast_flaky_numerics:
+                    self.assertExpected(out.cpu(), prec=0.1, strip_suffix="_" + dev)
+                self.assertEqual(out.shape[-1], 50)
+
+    def _test_segmentation_model(self, name, dev):
         # passing num_class equal to a number other than 1000 helps in making the test
         # more enforcing in nature
         model = models.segmentation.__dict__[name](num_classes=50, pretrained_backbone=False)
-        model.eval()
+        model.eval().to(device=dev)
         input_shape = (1, 3, 300, 300)
-        x = torch.rand(input_shape)
+        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
+        x = torch.rand(input_shape).to(device=dev)
         out = model(x)
         self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
         self.checkModule(model, name, (x,))
 
-    def _test_detection_model(self, name):
+        if dev == "cuda":
+            with torch.cuda.amp.autocast():
+                out = model(x)
+                self.assertEqual(tuple(out["out"].shape), (1, 50, 300, 300))
+
+    def _test_detection_model(self, name, dev):
         set_rng_seed(0)
         model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False)
-        model.eval()
+        model.eval().to(device=dev)
         input_shape = (3, 300, 300)
-        x = torch.rand(input_shape)
+        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
+        x = torch.rand(input_shape).to(device=dev)
         model_input = [x]
         out = model(model_input)
         self.assertIs(model_input[0], x)
-        self.assertEqual(len(out), 1)
 
-        def subsample_tensor(tensor):
-            num_elems = tensor.numel()
-            num_samples = 20
-            if num_elems <= num_samples:
-                return tensor
-
-            flat_tensor = tensor.flatten()
-            ith_index = num_elems // num_samples
-            return flat_tensor[ith_index - 1::ith_index]
-
-        def compute_mean_std(tensor):
-            # can't compute mean of integral tensor
-            tensor = tensor.to(torch.double)
-            mean = torch.mean(tensor)
-            std = torch.std(tensor)
-            return {"mean": mean, "std": std}
-
-        # maskrcnn_resnet_50_fpn numerically unstable across platforms, so for now
-        # compare results with mean and std
-        if name == "maskrcnn_resnet50_fpn":
-            test_value = map_nested_tensor_object(out, tensor_map_fn=compute_mean_std)
-            # mean values are small, use large prec
-            self.assertExpected(test_value, prec=.01)
-        else:
-            self.assertExpected(map_nested_tensor_object(out, tensor_map_fn=subsample_tensor), prec=0.01)
+        def check_out(out):
+            self.assertEqual(len(out), 1)
+
+            def subsample_tensor(tensor):
+                num_elems = tensor.numel()
+                num_samples = 20
+                if num_elems <= num_samples:
+                    return tensor
+
+                flat_tensor = tensor.flatten()
+                ith_index = num_elems // num_samples
+                return flat_tensor[ith_index - 1::ith_index]
+
+            def compute_mean_std(tensor):
+                # can't compute mean of integral tensor
+                tensor = tensor.to(torch.double)
+                mean = torch.mean(tensor)
+                std = torch.std(tensor)
+                return {"mean": mean, "std": std}
+
+            # maskrcnn_resnet_50_fpn numerically unstable across platforms, so for now
+            # compare results with mean and std
+            if name == "maskrcnn_resnet50_fpn":
+                test_value = map_nested_tensor_object(out, tensor_map_fn=compute_mean_std)
+                # mean values are small, use large prec
+                self.assertExpected(test_value, prec=.01, strip_suffix="_" + dev)
+            else:
+                self.assertExpected(map_nested_tensor_object(out, tensor_map_fn=subsample_tensor),
+                                    prec=0.01,
+                                    strip_suffix="_" + dev)
+
+        check_out(out)
 
         scripted_model = torch.jit.script(model)
         scripted_model.eval()
@@ -156,6 +198,13 @@ def compute_mean_std(tensor):
         # self.check_script(model, name)
         self.checkModule(model, name, ([x],))
 
+        if dev == "cuda":
+            with torch.cuda.amp.autocast():
+                out = model(model_input)
+                # See autocast_flaky_numerics comment at top of file.
+                if name not in autocast_flaky_numerics:
+                    check_out(out)
+
     def _test_detection_model_validation(self, name):
         set_rng_seed(0)
         model = models.detection.__dict__[name](num_classes=50, pretrained_backbone=False)
@@ -179,18 +228,24 @@ def _test_detection_model_validation(self, name):
         targets = [{'boxes': boxes}]
         self.assertRaises(ValueError, model, x, targets=targets)
 
-    def _test_video_model(self, name):
+    def _test_video_model(self, name, dev):
         # the default input shape is
         # bs * num_channels * clip_len * h *w
         input_shape = (1, 3, 4, 112, 112)
         # test both basicblock and Bottleneck
         model = models.video.__dict__[name](num_classes=50)
-        model.eval()
-        x = torch.rand(input_shape)
+        model.eval().to(device=dev)
+        # RNG always on CPU, to ensure x in cuda tests is bitwise identical to x in cpu tests
+        x = torch.rand(input_shape).to(device=dev)
         out = model(x)
         self.checkModule(model, name, (x,))
         self.assertEqual(out.shape[-1], 50)
 
+        if dev == "cuda":
+            with torch.cuda.amp.autocast():
+                out = model(x)
+                self.assertEqual(out.shape[-1], 50)
+
     def _make_sliced_model(self, model, stop_layer):
         layers = OrderedDict()
         for name, layer in model.named_children():
@@ -272,6 +327,12 @@ def test_googlenet_eval(self):
 
     @unittest.skipIf(not torch.cuda.is_available(), 'needs GPU')
     def test_fasterrcnn_switch_devices(self):
+        def checkOut(out):
+            self.assertEqual(len(out), 1)
+            self.assertTrue("boxes" in out[0])
+            self.assertTrue("scores" in out[0])
+            self.assertTrue("labels" in out[0])
+
         model = models.detection.fasterrcnn_resnet50_fpn(num_classes=50, pretrained_backbone=False)
         model.cuda()
         model.eval()
@@ -280,17 +341,20 @@ def test_fasterrcnn_switch_devices(self):
         model_input = [x]
         out = model(model_input)
         self.assertIs(model_input[0], x)
-        self.assertEqual(len(out), 1)
-        self.assertTrue("boxes" in out[0])
-        self.assertTrue("scores" in out[0])
-        self.assertTrue("labels" in out[0])
+
+        checkOut(out)
+
+        with torch.cuda.amp.autocast():
+            out = model(model_input)
+
+        checkOut(out)
+
         # now switch to cpu and make sure it works
         model.cpu()
         x = x.cpu()
         out_cpu = model([x])
-        self.assertTrue("boxes" in out_cpu[0])
-        self.assertTrue("scores" in out_cpu[0])
-        self.assertTrue("labels" in out_cpu[0])
+
+        checkOut(out_cpu)
 
     def test_generalizedrcnn_transform_repr(self):
 
@@ -312,34 +376,40 @@ def test_generalizedrcnn_transform_repr(self):
         self.assertEqual(t.__repr__(), expected_string)
 
 
+_devs = ["cpu", "cuda"] if torch.cuda.is_available() else ["cpu"]
+
+
 for model_name in get_available_classification_models():
-    # for-loop bodies don't define scopes, so we have to save the variables
-    # we want to close over in some way
-    def do_test(self, model_name=model_name):
-        input_shape = (1, 3, 224, 224)
-        if model_name in ['inception_v3']:
-            input_shape = (1, 3, 299, 299)
-        self._test_classification_model(model_name, input_shape)
+    for dev in _devs:
+        # for-loop bodies don't define scopes, so we have to save the variables
+        # we want to close over in some way
+        def do_test(self, model_name=model_name, dev=dev):
+            input_shape = (1, 3, 224, 224)
+            if model_name in ['inception_v3']:
+                input_shape = (1, 3, 299, 299)
+            self._test_classification_model(model_name, input_shape, dev)
 
-    setattr(ModelTester, "test_" + model_name, do_test)
+        setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
 
 
 for model_name in get_available_segmentation_models():
-    # for-loop bodies don't define scopes, so we have to save the variables
-    # we want to close over in some way
-    def do_test(self, model_name=model_name):
-        self._test_segmentation_model(model_name)
+    for dev in _devs:
+        # for-loop bodies don't define scopes, so we have to save the variables
+        # we want to close over in some way
+        def do_test(self, model_name=model_name, dev=dev):
+            self._test_segmentation_model(model_name, dev)
 
-    setattr(ModelTester, "test_" + model_name, do_test)
+        setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
 
 
 for model_name in get_available_detection_models():
-    # for-loop bodies don't define scopes, so we have to save the variables
-    # we want to close over in some way
-    def do_test(self, model_name=model_name):
-        self._test_detection_model(model_name)
+    for dev in _devs:
+        # for-loop bodies don't define scopes, so we have to save the variables
+        # we want to close over in some way
+        def do_test(self, model_name=model_name, dev=dev):
+            self._test_detection_model(model_name, dev)
 
-    setattr(ModelTester, "test_" + model_name, do_test)
+        setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
 
     def do_validation_test(self, model_name=model_name):
         self._test_detection_model_validation(model_name)
@@ -348,11 +418,11 @@ def do_validation_test(self, model_name=model_name):
 
 
 for model_name in get_available_video_models():
+    for dev in _devs:
+        def do_test(self, model_name=model_name, dev=dev):
+            self._test_video_model(model_name, dev)
 
-    def do_test(self, model_name=model_name):
-        self._test_video_model(model_name)
-
-    setattr(ModelTester, "test_" + model_name, do_test)
+        setattr(ModelTester, "test_" + model_name + "_" + dev, do_test)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_ops.py b/test/test_ops.py
index 2e3107f8d7e..564d5d54559 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -52,25 +52,30 @@ def _test_backward(self, device, contiguous):
 
 
 class RoIOpTester(OpTester):
-    def _test_forward(self, device, contiguous):
+    def _test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None):
+        x_dtype = self.dtype if x_dtype is None else x_dtype
+        rois_dtype = self.dtype if rois_dtype is None else rois_dtype
         pool_size = 5
         # n_channels % (pool_size ** 2) == 0 required for PS opeartions.
         n_channels = 2 * (pool_size ** 2)
-        x = torch.rand(2, n_channels, 10, 10, dtype=self.dtype, device=device)
+        x = torch.rand(2, n_channels, 10, 10, dtype=x_dtype, device=device)
         if not contiguous:
             x = x.permute(0, 1, 3, 2)
         rois = torch.tensor([[0, 0, 0, 9, 9],  # format is (xyxy)
                              [0, 0, 5, 4, 9],
                              [0, 5, 5, 9, 9],
                              [1, 0, 0, 9, 9]],
-                            dtype=self.dtype, device=device)
+                            dtype=rois_dtype, device=device)
 
         pool_h, pool_w = pool_size, pool_size
         y = self.fn(x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1)
+        # the following should be true whether we're running an autocast test or not.
+        self.assertTrue(y.dtype == x.dtype)
         gt_y = self.expected_fn(x, rois, pool_h, pool_w, spatial_scale=1,
                                 sampling_ratio=-1, device=device, dtype=self.dtype)
 
-        self.assertTrue(torch.allclose(gt_y, y))
+        tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
+        self.assertTrue(torch.allclose(gt_y.to(y.dtype), y, rtol=tol, atol=tol))
 
     def _test_backward(self, device, contiguous):
         pool_size = 2
@@ -290,6 +295,13 @@ def expected_fn(self, in_data, rois, pool_h, pool_w, spatial_scale=1, sampling_r
     def _test_boxes_shape(self):
         self._helper_boxes_shape(ops.roi_align)
 
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
+    def test_roi_align_autocast(self):
+        for x_dtype in (torch.float, torch.half):
+            for rois_dtype in (torch.float, torch.half):
+                with torch.cuda.amp.autocast():
+                    self._test_forward(torch.device("cuda"), contiguous=False, x_dtype=x_dtype, rois_dtype=rois_dtype)
+
 
 class PSRoIAlignTester(RoIOpTester, unittest.TestCase):
     def fn(self, x, rois, pool_h, pool_w, spatial_scale=1, sampling_ratio=-1, **kwargs):
diff --git a/test/test_transforms.py b/test/test_transforms.py
index 8423bf99ee3..61ec525961d 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -1179,14 +1179,14 @@ def test_adjust_gamma(self):
         # test 1
         y_pil = F.adjust_gamma(x_pil, 0.5)
         y_np = np.array(y_pil)
-        y_ans = [0, 35, 57, 117, 185, 240, 97, 45, 244, 151, 255, 15]
+        y_ans = [0, 35, 57, 117, 186, 241, 97, 45, 245, 152, 255, 16]
         y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
         self.assertTrue(np.allclose(y_np, y_ans))
 
         # test 2
         y_pil = F.adjust_gamma(x_pil, 2)
         y_np = np.array(y_pil)
-        y_ans = [0, 0, 0, 11, 71, 200, 5, 0, 214, 31, 255, 0]
+        y_ans = [0, 0, 0, 11, 71, 201, 5, 0, 215, 31, 255, 0]
         y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
         self.assertTrue(np.allclose(y_np, y_ans))
 
@@ -1311,17 +1311,14 @@ def test_rotate_fill(self):
 
     def test_affine(self):
         input_img = np.zeros((40, 40, 3), dtype=np.uint8)
-        pts = []
         cnt = [20, 20]
         for pt in [(16, 16), (20, 16), (20, 20)]:
             for i in range(-5, 5):
                 for j in range(-5, 5):
                     input_img[pt[0] + i, pt[1] + j, :] = [255, 155, 55]
-                    pts.append((pt[0] + i, pt[1] + j))
-        pts = list(set(pts))
 
-        with self.assertRaises(TypeError):
-            F.affine(input_img, 10)
+        with self.assertRaises(TypeError, msg="Argument translate should be a sequence"):
+            F.affine(input_img, 10, translate=0, scale=1, shear=1)
 
         pil_img = F.to_pil_image(input_img)
 
@@ -1373,9 +1370,12 @@ def _test_transformation(a, t, s, sh):
             inv_true_matrix = np.linalg.inv(true_matrix)
             for y in range(true_result.shape[0]):
                 for x in range(true_result.shape[1]):
-                    res = np.dot(inv_true_matrix, [x, y, 1])
-                    _x = int(res[0] + 0.5)
-                    _y = int(res[1] + 0.5)
+                    # Same as for PIL:
+                    # https://github.com/python-pillow/Pillow/blob/71f8ec6a0cfc1008076a023c0756542539d057ab/
+                    # src/libImaging/Geometry.c#L1060
+                    input_pt = np.array([x + 0.5, y + 0.5, 1.0])
+                    res = np.floor(np.dot(inv_true_matrix, input_pt)).astype(np.int)
+                    _x, _y = res[:2]
                     if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
                         true_result[y, x, :] = input_img[_y, _x, :]
 
@@ -1408,7 +1408,7 @@ def _test_transformation(a, t, s, sh):
         # Test rotation, scale, translation, shear
         for a in range(-90, 90, 25):
             for t1 in range(-10, 10, 5):
-                for s in [0.75, 0.98, 1.0, 1.1, 1.2]:
+                for s in [0.75, 0.98, 1.0, 1.2, 1.4]:
                     for sh in range(-15, 15, 5):
                         _test_transformation(a=a, t=(t1, t1), s=s, sh=(sh, sh))
 
@@ -1618,38 +1618,64 @@ def test_random_grayscale(self):
 
     def test_random_erasing(self):
         """Unit tests for random erasing transform"""
-
-        img = torch.rand([3, 60, 60])
-
-        # Test Set 1: Erasing with int value
-        img_re = transforms.RandomErasing(value=0.2)
-        i, j, h, w, v = img_re.get_params(img, scale=img_re.scale, ratio=img_re.ratio, value=img_re.value)
-        img_output = F.erase(img, i, j, h, w, v)
-        self.assertEqual(img_output.size(0), 3)
-
-        # Test Set 2: Check if the unerased region is preserved
-        orig_unerased = img.clone()
-        orig_unerased[:, i:i + h, j:j + w] = 0
-        output_unerased = img_output.clone()
-        output_unerased[:, i:i + h, j:j + w] = 0
-        self.assertTrue(torch.equal(orig_unerased, output_unerased))
-
-        # Test Set 3: Erasing with random value
-        img_re = transforms.RandomErasing(value='random')(img)
-        self.assertEqual(img_re.size(0), 3)
-
-        # Test Set 4: Erasing with tuple value
-        img_re = transforms.RandomErasing(value=(0.2, 0.2, 0.2))(img)
-        self.assertEqual(img_re.size(0), 3)
-
-        # Test Set 5: Testing the inplace behaviour
-        img_re = transforms.RandomErasing(value=(0.2), inplace=True)(img)
-        self.assertTrue(torch.equal(img_re, img))
-
-        # Test Set 6: Checking when no erased region is selected
-        img = torch.rand([3, 300, 1])
-        img_re = transforms.RandomErasing(ratio=(0.1, 0.2), value='random')(img)
-        self.assertTrue(torch.equal(img_re, img))
+        for is_scripted in [False, True]:
+            torch.manual_seed(12)
+            img = torch.rand(3, 60, 60)
+
+            # Test Set 0: invalid value
+            random_erasing = transforms.RandomErasing(value=(0.1, 0.2, 0.3, 0.4), p=1.0)
+            with self.assertRaises(ValueError, msg="If value is a sequence, it should have either a single value or 3"):
+                img_re = random_erasing(img)
+
+            # Test Set 1: Erasing with int value
+            random_erasing = transforms.RandomErasing(value=0.2)
+            if is_scripted:
+                random_erasing = torch.jit.script(random_erasing)
+
+            i, j, h, w, v = transforms.RandomErasing.get_params(
+                img, scale=random_erasing.scale, ratio=random_erasing.ratio, value=[random_erasing.value, ]
+            )
+            img_output = F.erase(img, i, j, h, w, v)
+            self.assertEqual(img_output.size(0), 3)
+
+            # Test Set 2: Check if the unerased region is preserved
+            true_output = img.clone()
+            true_output[:, i:i + h, j:j + w] = random_erasing.value
+            self.assertTrue(torch.equal(true_output, img_output))
+
+            # Test Set 3: Erasing with random value
+            random_erasing = transforms.RandomErasing(value="random")
+            if is_scripted:
+                random_erasing = torch.jit.script(random_erasing)
+            img_re = random_erasing(img)
+
+            self.assertEqual(img_re.size(0), 3)
+
+            # Test Set 4: Erasing with tuple value
+            random_erasing = transforms.RandomErasing(value=(0.2, 0.2, 0.2))
+            if is_scripted:
+                random_erasing = torch.jit.script(random_erasing)
+            img_re = random_erasing(img)
+            self.assertEqual(img_re.size(0), 3)
+            true_output = img.clone()
+            true_output[:, i:i + h, j:j + w] = torch.tensor(random_erasing.value)[:, None, None]
+            self.assertTrue(torch.equal(true_output, img_output))
+
+            # Test Set 5: Testing the inplace behaviour
+            random_erasing = transforms.RandomErasing(value=(0.2,), inplace=True)
+            if is_scripted:
+                random_erasing = torch.jit.script(random_erasing)
+
+            img_re = random_erasing(img)
+            self.assertTrue(torch.equal(img_re, img))
+
+            # Test Set 6: Checking when no erased region is selected
+            img = torch.rand([3, 300, 1])
+            random_erasing = transforms.RandomErasing(ratio=(0.1, 0.2), value="random")
+            if is_scripted:
+                random_erasing = torch.jit.script(random_erasing)
+            img_re = random_erasing(img)
+            self.assertTrue(torch.equal(img_re, img))
 
 
 if __name__ == '__main__':
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index 7791dd8b4f9..fbd3331a490 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -2,6 +2,7 @@
 from torchvision import transforms as T
 from torchvision.transforms import functional as F
 from PIL import Image
+from PIL.Image import NEAREST, BILINEAR, BICUBIC
 
 import numpy as np
 
@@ -18,26 +19,43 @@ def compareTensorToPIL(self, tensor, pil_image):
         pil_tensor = torch.as_tensor(np.array(pil_image).transpose((2, 0, 1)))
         self.assertTrue(tensor.equal(pil_tensor))
 
-    def _test_flip(self, func, method):
-        tensor, pil_img = self._create_data()
-        flip_tensor = getattr(F, func)(tensor)
-        flip_pil_img = getattr(F, func)(pil_img)
-        self.compareTensorToPIL(flip_tensor, flip_pil_img)
+    def _test_functional_geom_op(self, func, fn_kwargs):
+        if fn_kwargs is None:
+            fn_kwargs = {}
+        tensor, pil_img = self._create_data(height=10, width=10)
+        transformed_tensor = getattr(F, func)(tensor, **fn_kwargs)
+        transformed_pil_img = getattr(F, func)(pil_img, **fn_kwargs)
+        self.compareTensorToPIL(transformed_tensor, transformed_pil_img)
 
-        scripted_fn = torch.jit.script(getattr(F, func))
-        flip_tensor_script = scripted_fn(tensor)
-        self.assertTrue(flip_tensor.equal(flip_tensor_script))
+    def _test_class_geom_op(self, method, meth_kwargs=None):
+        if meth_kwargs is None:
+            meth_kwargs = {}
 
+        tensor, pil_img = self._create_data(height=10, width=10)
         # test for class interface
-        f = getattr(T, method)()
+        f = getattr(T, method)(**meth_kwargs)
         scripted_fn = torch.jit.script(f)
-        scripted_fn(tensor)
+
+        # set seed to reproduce the same transformation for tensor and PIL image
+        torch.manual_seed(12)
+        transformed_tensor = f(tensor)
+        torch.manual_seed(12)
+        transformed_pil_img = f(pil_img)
+        self.compareTensorToPIL(transformed_tensor, transformed_pil_img)
+
+        torch.manual_seed(12)
+        transformed_tensor_script = scripted_fn(tensor)
+        self.assertTrue(transformed_tensor.equal(transformed_tensor_script))
+
+    def _test_geom_op(self, func, method, fn_kwargs=None, meth_kwargs=None):
+        self._test_functional_geom_op(func, fn_kwargs)
+        self._test_class_geom_op(method, meth_kwargs)
 
     def test_random_horizontal_flip(self):
-        self._test_flip('hflip', 'RandomHorizontalFlip')
+        self._test_geom_op('hflip', 'RandomHorizontalFlip')
 
     def test_random_vertical_flip(self):
-        self._test_flip('vflip', 'RandomVerticalFlip')
+        self._test_geom_op('vflip', 'RandomVerticalFlip')
 
     def test_adjustments(self):
         fns = ['adjust_brightness', 'adjust_contrast', 'adjust_saturation']
@@ -65,6 +83,187 @@ def test_adjustments(self):
                 self.assertLess(max_diff, 5 / 255 + 1e-5)
                 self.assertLess(max_diff_scripted, 5 / 255 + 1e-5)
 
+    def test_pad(self):
+
+        # Test functional.pad (PIL and Tensor) with padding as single int
+        self._test_functional_geom_op(
+            "pad", fn_kwargs={"padding": 2, "fill": 0, "padding_mode": "constant"}
+        )
+        # Test functional.pad and transforms.Pad with padding as [int, ]
+        fn_kwargs = meth_kwargs = {"padding": [2, ], "fill": 0, "padding_mode": "constant"}
+        self._test_geom_op(
+            "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        # Test functional.pad and transforms.Pad with padding as list
+        fn_kwargs = meth_kwargs = {"padding": [4, 4], "fill": 0, "padding_mode": "constant"}
+        self._test_geom_op(
+            "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        # Test functional.pad and transforms.Pad with padding as tuple
+        fn_kwargs = meth_kwargs = {"padding": (2, 2, 2, 2), "fill": 127, "padding_mode": "constant"}
+        self._test_geom_op(
+            "pad", "Pad", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
+    def test_crop(self):
+        fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5}
+        # Test transforms.RandomCrop with size and padding as tuple
+        meth_kwargs = {"size": (4, 5), "padding": (4, 4), "pad_if_needed": True, }
+        self._test_geom_op(
+            'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
+        sizes = [5, [5, ], [6, 6]]
+        padding_configs = [
+            {"padding_mode": "constant", "fill": 0},
+            {"padding_mode": "constant", "fill": 10},
+            {"padding_mode": "constant", "fill": 20},
+            {"padding_mode": "edge"},
+            {"padding_mode": "reflect"},
+        ]
+
+        for size in sizes:
+            for padding_config in padding_configs:
+                config = dict(padding_config)
+                config["size"] = size
+                self._test_class_geom_op("RandomCrop", config)
+
+    def test_center_crop(self):
+        fn_kwargs = {"output_size": (4, 5)}
+        meth_kwargs = {"size": (4, 5), }
+        self._test_geom_op(
+            "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = {"output_size": (5,)}
+        meth_kwargs = {"size": (5, )}
+        self._test_geom_op(
+            "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        tensor = torch.randint(0, 255, (3, 10, 10), dtype=torch.uint8)
+        # Test torchscript of transforms.CenterCrop with size as int
+        f = T.CenterCrop(size=5)
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.CenterCrop with size as [int, ]
+        f = T.CenterCrop(size=[5, ])
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.CenterCrop with size as tuple
+        f = T.CenterCrop(size=(6, 6))
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+    def _test_geom_op_list_output(self, func, method, out_length, fn_kwargs=None, meth_kwargs=None):
+        if fn_kwargs is None:
+            fn_kwargs = {}
+        if meth_kwargs is None:
+            meth_kwargs = {}
+        tensor, pil_img = self._create_data(height=20, width=20)
+        transformed_t_list = getattr(F, func)(tensor, **fn_kwargs)
+        transformed_p_list = getattr(F, func)(pil_img, **fn_kwargs)
+        self.assertEqual(len(transformed_t_list), len(transformed_p_list))
+        self.assertEqual(len(transformed_t_list), out_length)
+        for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list):
+            self.compareTensorToPIL(transformed_tensor, transformed_pil_img)
+
+        scripted_fn = torch.jit.script(getattr(F, func))
+        transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs)
+        self.assertEqual(len(transformed_t_list), len(transformed_t_list_script))
+        self.assertEqual(len(transformed_t_list_script), out_length)
+        for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script):
+            self.assertTrue(transformed_tensor.equal(transformed_tensor_script),
+                            msg="{} vs {}".format(transformed_tensor, transformed_tensor_script))
+
+        # test for class interface
+        f = getattr(T, method)(**meth_kwargs)
+        scripted_fn = torch.jit.script(f)
+        output = scripted_fn(tensor)
+        self.assertEqual(len(output), len(transformed_t_list_script))
+
+    def test_five_crop(self):
+        fn_kwargs = meth_kwargs = {"size": (5,)}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [5, ]}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": (4, 5)}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [4, 5]}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
+    def test_ten_crop(self):
+        fn_kwargs = meth_kwargs = {"size": (5,)}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [5, ]}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": (4, 5)}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [4, 5]}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
+    def test_resize(self):
+        tensor, _ = self._create_data(height=34, width=36)
+        script_fn = torch.jit.script(F.resize)
+
+        for dt in [None, torch.float32, torch.float64]:
+            if dt is not None:
+                # This is a trivial cast to float of uint8 data to test all cases
+                tensor = tensor.to(dt)
+            for size in [32, [32, ], [32, 32], (32, 32), ]:
+                for interpolation in [BILINEAR, BICUBIC, NEAREST]:
+
+                    resized_tensor = F.resize(tensor, size=size, interpolation=interpolation)
+
+                    if isinstance(size, int):
+                        script_size = [size, ]
+                    else:
+                        script_size = size
+
+                    s_resized_tensor = script_fn(tensor, size=script_size, interpolation=interpolation)
+                    self.assertTrue(s_resized_tensor.equal(resized_tensor))
+
+                    transform = T.Resize(size=script_size, interpolation=interpolation)
+                    resized_tensor = transform(tensor)
+                    script_transform = torch.jit.script(transform)
+                    s_resized_tensor = script_transform(tensor)
+                    self.assertTrue(s_resized_tensor.equal(resized_tensor))
+
+    def test_resized_crop(self):
+        tensor = torch.randint(0, 255, size=(3, 44, 56), dtype=torch.uint8)
+
+        scale = (0.7, 1.2)
+        ratio = (0.75, 1.333)
+
+        for size in [(32, ), [32, ], [32, 32], (32, 32)]:
+            for interpolation in [NEAREST, BILINEAR, BICUBIC]:
+                transform = T.RandomResizedCrop(
+                    size=size, scale=scale, ratio=ratio, interpolation=interpolation
+                )
+                s_transform = torch.jit.script(transform)
+
+                torch.manual_seed(12)
+                out1 = transform(tensor)
+                torch.manual_seed(12)
+                out2 = s_transform(tensor)
+                self.assertTrue(out1.equal(out2))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/csrc/ROIAlign.h b/torchvision/csrc/ROIAlign.h
index 78dcb101dce..a7cbe954a4d 100644
--- a/torchvision/csrc/ROIAlign.h
+++ b/torchvision/csrc/ROIAlign.h
@@ -3,14 +3,17 @@
 #include "cpu/vision_cpu.h"
 
 #ifdef WITH_CUDA
+#include "autocast.h"
 #include "cuda/vision_cuda.h"
 #endif
 #ifdef WITH_HIP
 #include "hip/vision_cuda.h"
 #endif
 
-// Interface for Python
-at::Tensor ROIAlign_forward(
+// TODO: put this stuff in torchvision namespace
+
+// roi_align dispatch nexus
+at::Tensor roi_align(
     const at::Tensor& input, // Input feature map.
     const at::Tensor& rois, // List of ROIs to pool over.
     const double spatial_scale, // The scale of the image features. ROIs will be
@@ -21,21 +24,10 @@ at::Tensor ROIAlign_forward(
     const bool aligned) // The flag for pixel shift
 // along each axis.
 {
-  if (input.is_cuda()) {
-#if defined(WITH_CUDA) || defined(WITH_HIP)
-    return ROIAlign_forward_cuda(
-        input,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        sampling_ratio,
-        aligned);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-  return ROIAlign_forward_cpu(
+  static auto op = c10::Dispatcher::singleton()
+                       .findSchemaOrThrow("torchvision::roi_align", "")
+                       .typed<decltype(roi_align)>();
+  return op.call(
       input,
       rois,
       spatial_scale,
@@ -45,37 +37,45 @@ at::Tensor ROIAlign_forward(
       aligned);
 }
 
-at::Tensor ROIAlign_backward(
-    const at::Tensor& grad,
+#ifdef WITH_CUDA
+at::Tensor ROIAlign_autocast(
+    const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio,
     const bool aligned) {
-  if (grad.is_cuda()) {
-#if defined(WITH_CUDA) || defined(WITH_HIP)
-    return ROIAlign_backward_cuda(
-        grad,
-        rois,
-        spatial_scale,
-        pooled_height,
-        pooled_width,
-        batch_size,
-        channels,
-        height,
-        width,
-        sampling_ratio,
-        aligned);
-#else
-    AT_ERROR("Not compiled with GPU support");
+  c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
+  return roi_align(
+             autocast::_cast(at::kFloat, input),
+             autocast::_cast(at::kFloat, rois),
+             spatial_scale,
+             pooled_height,
+             pooled_width,
+             sampling_ratio,
+             aligned)
+      .to(input.scalar_type());
+}
 #endif
-  }
-  return ROIAlign_backward_cpu(
+
+at::Tensor _roi_align_backward(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
+    const bool aligned) {
+  static auto op =
+      c10::Dispatcher::singleton()
+          .findSchemaOrThrow("torchvision::_roi_align_backward", "")
+          .typed<decltype(_roi_align_backward)>();
+  return op.call(
       grad,
       rois,
       spatial_scale,
@@ -107,7 +107,8 @@ class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
     ctx->saved_data["aligned"] = aligned;
     ctx->saved_data["input_shape"] = input.sizes();
     ctx->save_for_backward({rois});
-    auto result = ROIAlign_forward(
+    at::AutoNonVariableTypeMode g;
+    auto result = roi_align(
         input,
         rois,
         spatial_scale,
@@ -125,7 +126,7 @@ class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
     auto saved = ctx->get_saved_variables();
     auto rois = saved[0];
     auto input_shape = ctx->saved_data["input_shape"].toIntList();
-    auto grad_in = ROIAlign_backward(
+    auto grad_in = _roi_align_backward(
         grad_output[0],
         rois,
         ctx->saved_data["spatial_scale"].toDouble(),
@@ -147,7 +148,47 @@ class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
   }
 };
 
-at::Tensor roi_align(
+// TODO: There should be an easier way to do this
+class ROIAlignBackwardFunction
+    : public torch::autograd::Function<ROIAlignBackwardFunction> {
+ public:
+  static torch::autograd::variable_list forward(
+      torch::autograd::AutogradContext* ctx,
+      torch::autograd::Variable grad,
+      torch::autograd::Variable rois,
+      const double spatial_scale,
+      const int64_t pooled_height,
+      const int64_t pooled_width,
+      const int64_t batch_size,
+      const int64_t channels,
+      const int64_t height,
+      const int64_t width,
+      const int64_t sampling_ratio,
+      const bool aligned) {
+    at::AutoNonVariableTypeMode g;
+    auto result = _roi_align_backward(
+        grad,
+        rois,
+        spatial_scale,
+        pooled_height,
+        pooled_width,
+        batch_size,
+        channels,
+        height,
+        width,
+        sampling_ratio,
+        aligned);
+    return {result};
+  }
+
+  static torch::autograd::variable_list backward(
+      torch::autograd::AutogradContext* ctx,
+      torch::autograd::variable_list grad_output) {
+    TORCH_CHECK(0, "double backwards on roi_align not supported");
+  }
+};
+
+at::Tensor ROIAlign_autograd(
     const at::Tensor& input,
     const at::Tensor& rois,
     const double spatial_scale,
@@ -164,3 +205,29 @@ at::Tensor roi_align(
       sampling_ratio,
       aligned)[0];
 }
+
+at::Tensor ROIAlign_backward_autograd(
+    const at::Tensor& grad,
+    const at::Tensor& rois,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
+    const bool aligned) {
+  return ROIAlignBackwardFunction::apply(
+      grad,
+      rois,
+      spatial_scale,
+      pooled_height,
+      pooled_width,
+      batch_size,
+      channels,
+      height,
+      width,
+      sampling_ratio,
+      aligned)[0];
+}
diff --git a/torchvision/csrc/autocast.h b/torchvision/csrc/autocast.h
new file mode 100644
index 00000000000..93c079fb1c5
--- /dev/null
+++ b/torchvision/csrc/autocast.h
@@ -0,0 +1,28 @@
+#pragma once
+
+#ifdef WITH_CUDA
+namespace autocast {
+
+inline bool is_eligible(const at::Tensor& arg) {
+  return (
+      arg.is_cuda() && arg.is_floating_point() &&
+      (arg.scalar_type() != at::kDouble));
+}
+
+// Overload to catch Tensor args
+inline at::Tensor _cast(at::ScalarType to_type, const at::Tensor& arg) {
+  if (is_eligible(arg) && (arg.scalar_type() != to_type)) {
+    return arg.to(to_type);
+  } else {
+    return arg;
+  }
+}
+
+// Template to catch non-Tensor args
+template <typename T>
+inline T _cast(at::ScalarType to_type, T arg) {
+  return arg;
+}
+
+} // namespace autocast
+#endif
diff --git a/torchvision/csrc/cpu/ROIAlign_cpu.cpp b/torchvision/csrc/cpu/ROIAlign_cpu.cpp
index 325221df65b..03545883a69 100644
--- a/torchvision/csrc/cpu/ROIAlign_cpu.cpp
+++ b/torchvision/csrc/cpu/ROIAlign_cpu.cpp
@@ -141,9 +141,13 @@ void ROIAlignForward(
     T roi_end_w = offset_rois[3] * spatial_scale - offset;
     T roi_end_h = offset_rois[4] * spatial_scale - offset;
 
-    // Force malformed ROIs to be 1x1
-    T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (!aligned) {
+      // Force malformed ROIs to be 1x1
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
 
     T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
     T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
@@ -309,9 +313,13 @@ void ROIAlignBackward(
     T roi_end_w = offset_rois[3] * spatial_scale - offset;
     T roi_end_h = offset_rois[4] * spatial_scale - offset;
 
-    // Force malformed ROIs to be 1x1
-    T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (!aligned) {
+      // Force malformed ROIs to be 1x1
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
 
     T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
     T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
@@ -381,10 +389,10 @@ void ROIAlignBackward(
 at::Tensor ROIAlign_forward_cpu(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio,
     const bool aligned) {
   AT_ASSERTM(input.device().is_cpu(), "input must be a CPU tensor");
   AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
@@ -430,14 +438,14 @@ at::Tensor ROIAlign_forward_cpu(
 at::Tensor ROIAlign_backward_cpu(
     const at::Tensor& grad,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
     const bool aligned) {
   AT_ASSERTM(grad.device().is_cpu(), "grad must be a CPU tensor");
   AT_ASSERTM(rois.device().is_cpu(), "rois must be a CPU tensor");
diff --git a/torchvision/csrc/cpu/decoder/seekable_buffer.cpp b/torchvision/csrc/cpu/decoder/seekable_buffer.cpp
index 0d7ec7236a2..41e3e689c7b 100644
--- a/torchvision/csrc/cpu/decoder/seekable_buffer.cpp
+++ b/torchvision/csrc/cpu/decoder/seekable_buffer.cpp
@@ -55,7 +55,7 @@ bool SeekableBuffer::readBytes(
     size_t maxBytes,
     uint64_t timeoutMs) {
   // Resize to th minimum 4K page or less
-  buffer_.resize(std::min(maxBytes, 4 * 1024UL));
+  buffer_.resize(std::min(maxBytes, size_t(4 * 1024UL)));
   end_ = 0;
   eof_ = false;
 
@@ -72,7 +72,7 @@ bool SeekableBuffer::readBytes(
     if (res > 0) {
       end_ += res;
       if (end_ == buffer_.size()) {
-        buffer_.resize(std::min(end_ * 4UL, maxBytes));
+        buffer_.resize(std::min(size_t(end_ * 4UL), maxBytes));
       }
     } else if (res == 0) {
       eof_ = true;
diff --git a/torchvision/csrc/cpu/decoder/util.cpp b/torchvision/csrc/cpu/decoder/util.cpp
index 0dbcf885cf5..774612d3927 100644
--- a/torchvision/csrc/cpu/decoder/util.cpp
+++ b/torchvision/csrc/cpu/decoder/util.cpp
@@ -395,8 +395,8 @@ void setFormatDimensions(
     }
   }
   // prevent zeros
-  destW = std::max(destW, 1UL);
-  destH = std::max(destH, 1UL);
+  destW = std::max(destW, size_t(1UL));
+  destH = std::max(destH, size_t(1UL));
 }
 } // namespace Util
 } // namespace ffmpeg
diff --git a/torchvision/csrc/cpu/image/image.cpp b/torchvision/csrc/cpu/image/image.cpp
new file mode 100644
index 00000000000..5efe53b02b7
--- /dev/null
+++ b/torchvision/csrc/cpu/image/image.cpp
@@ -0,0 +1,17 @@
+
+#include "image.h"
+#include <ATen/ATen.h>
+#include <Python.h>
+
+// If we are in a Windows environment, we need to define
+// initialization functions for the _custom_ops extension
+#ifdef _WIN32
+PyMODINIT_FUNC PyInit_image(void) {
+  // No need to do anything.
+  return NULL;
+}
+#endif
+
+static auto registry = torch::RegisterOperators()
+                           .op("image::decode_png", &decodePNG)
+                           .op("image::decode_jpeg", &decodeJPEG);
diff --git a/torchvision/csrc/cpu/image/image.h b/torchvision/csrc/cpu/image/image.h
new file mode 100644
index 00000000000..324ecea8a28
--- /dev/null
+++ b/torchvision/csrc/cpu/image/image.h
@@ -0,0 +1,8 @@
+
+#pragma once
+
+// Comment
+#include <torch/script.h>
+#include <torch/torch.h>
+#include "readjpeg_cpu.h"
+#include "readpng_cpu.h"
diff --git a/torchvision/csrc/cpu/image/readjpeg_cpu.cpp b/torchvision/csrc/cpu/image/readjpeg_cpu.cpp
new file mode 100644
index 00000000000..b3e3d2ffa5a
--- /dev/null
+++ b/torchvision/csrc/cpu/image/readjpeg_cpu.cpp
@@ -0,0 +1,143 @@
+#include "readjpeg_cpu.h"
+
+#include <ATen/ATen.h>
+#include <setjmp.h>
+#include <string>
+
+#if !JPEG_FOUND
+
+torch::Tensor decodeJPEG(const torch::Tensor& data) {
+  AT_ERROR("decodeJPEG: torchvision not compiled with libjpeg support");
+}
+
+#else
+#include <jpeglib.h>
+
+const static JOCTET EOI_BUFFER[1] = {JPEG_EOI};
+char jpegLastErrorMsg[JMSG_LENGTH_MAX];
+
+struct torch_jpeg_error_mgr {
+  struct jpeg_error_mgr pub; /* "public" fields */
+  jmp_buf setjmp_buffer; /* for return to caller */
+};
+
+typedef struct torch_jpeg_error_mgr* torch_jpeg_error_ptr;
+
+void torch_jpeg_error_exit(j_common_ptr cinfo) {
+  /* cinfo->err really points to a torch_jpeg_error_mgr struct, so coerce
+   * pointer */
+  torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err;
+
+  /* Always display the message. */
+  /* We could postpone this until after returning, if we chose. */
+  // (*cinfo->err->output_message)(cinfo);
+  /* Create the message */
+  (*(cinfo->err->format_message))(cinfo, jpegLastErrorMsg);
+
+  /* Return control to the setjmp point */
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+struct torch_jpeg_mgr {
+  struct jpeg_source_mgr pub;
+  const JOCTET* data;
+  size_t len;
+};
+
+static void torch_jpeg_init_source(j_decompress_ptr cinfo) {}
+
+static boolean torch_jpeg_fill_input_buffer(j_decompress_ptr cinfo) {
+  torch_jpeg_mgr* src = (torch_jpeg_mgr*)cinfo->src;
+  // No more data.  Probably an incomplete image;  Raise exception.
+  torch_jpeg_error_ptr myerr = (torch_jpeg_error_ptr)cinfo->err;
+  strcpy(jpegLastErrorMsg, "Image is incomplete or truncated");
+  longjmp(myerr->setjmp_buffer, 1);
+  src->pub.next_input_byte = EOI_BUFFER;
+  src->pub.bytes_in_buffer = 1;
+  return TRUE;
+}
+
+static void torch_jpeg_skip_input_data(j_decompress_ptr cinfo, long num_bytes) {
+  torch_jpeg_mgr* src = (torch_jpeg_mgr*)cinfo->src;
+  if (src->pub.bytes_in_buffer < num_bytes) {
+    // Skipping over all of remaining data;  output EOI.
+    src->pub.next_input_byte = EOI_BUFFER;
+    src->pub.bytes_in_buffer = 1;
+  } else {
+    // Skipping over only some of the remaining data.
+    src->pub.next_input_byte += num_bytes;
+    src->pub.bytes_in_buffer -= num_bytes;
+  }
+}
+
+static void torch_jpeg_term_source(j_decompress_ptr cinfo) {}
+
+static void torch_jpeg_set_source_mgr(
+    j_decompress_ptr cinfo,
+    const unsigned char* data,
+    size_t len) {
+  torch_jpeg_mgr* src;
+  if (cinfo->src == 0) { // if this is first time;  allocate memory
+    cinfo->src = (struct jpeg_source_mgr*)(*cinfo->mem->alloc_small)(
+        (j_common_ptr)cinfo, JPOOL_PERMANENT, sizeof(torch_jpeg_mgr));
+  }
+  src = (torch_jpeg_mgr*)cinfo->src;
+  src->pub.init_source = torch_jpeg_init_source;
+  src->pub.fill_input_buffer = torch_jpeg_fill_input_buffer;
+  src->pub.skip_input_data = torch_jpeg_skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; // default
+  src->pub.term_source = torch_jpeg_term_source;
+  // fill the buffers
+  src->data = (const JOCTET*)data;
+  src->len = len;
+  src->pub.bytes_in_buffer = len;
+  src->pub.next_input_byte = src->data;
+}
+
+torch::Tensor decodeJPEG(const torch::Tensor& data) {
+  struct jpeg_decompress_struct cinfo;
+  struct torch_jpeg_error_mgr jerr;
+
+  auto datap = data.data_ptr<uint8_t>();
+  // Setup decompression structure
+  cinfo.err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = torch_jpeg_error_exit;
+  /* Establish the setjmp return context for my_error_exit to use. */
+  if (setjmp(jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object.
+     */
+    jpeg_destroy_decompress(&cinfo);
+    AT_ERROR(jpegLastErrorMsg);
+  }
+
+  jpeg_create_decompress(&cinfo);
+  torch_jpeg_set_source_mgr(&cinfo, datap, data.numel());
+
+  // read info from header.
+  jpeg_read_header(&cinfo, TRUE);
+  jpeg_start_decompress(&cinfo);
+
+  int height = cinfo.output_height;
+  int width = cinfo.output_width;
+  int components = cinfo.output_components;
+
+  auto stride = width * components;
+  auto tensor = torch::empty(
+      {int64_t(height), int64_t(width), int64_t(components)}, torch::kU8);
+  auto ptr = tensor.data_ptr<uint8_t>();
+  while (cinfo.output_scanline < cinfo.output_height) {
+    /* jpeg_read_scanlines expects an array of pointers to scanlines.
+     * Here the array is only one element long, but you could ask for
+     * more than one scanline at a time if that's more convenient.
+     */
+    jpeg_read_scanlines(&cinfo, &ptr, 1);
+    ptr += stride;
+  }
+
+  jpeg_finish_decompress(&cinfo);
+  jpeg_destroy_decompress(&cinfo);
+  return tensor;
+}
+
+#endif // JPEG_FOUND
diff --git a/torchvision/csrc/cpu/image/readjpeg_cpu.h b/torchvision/csrc/cpu/image/readjpeg_cpu.h
new file mode 100644
index 00000000000..40404df29b5
--- /dev/null
+++ b/torchvision/csrc/cpu/image/readjpeg_cpu.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <torch/torch.h>
+
+torch::Tensor decodeJPEG(const torch::Tensor& data);
diff --git a/torchvision/csrc/cpu/image/readpng_cpu.cpp b/torchvision/csrc/cpu/image/readpng_cpu.cpp
new file mode 100644
index 00000000000..b284067b1ff
--- /dev/null
+++ b/torchvision/csrc/cpu/image/readpng_cpu.cpp
@@ -0,0 +1,84 @@
+#include "readpng_cpu.h"
+
+// Comment
+#include <ATen/ATen.h>
+#include <setjmp.h>
+#include <string>
+
+#if !PNG_FOUND
+torch::Tensor decodePNG(const torch::Tensor& data) {
+  AT_ERROR("decodePNG: torchvision not compiled with libPNG support");
+}
+#else
+#include <png.h>
+
+torch::Tensor decodePNG(const torch::Tensor& data) {
+  auto png_ptr =
+      png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr);
+  TORCH_CHECK(png_ptr, "libpng read structure allocation failed!")
+  auto info_ptr = png_create_info_struct(png_ptr);
+  if (!info_ptr) {
+    png_destroy_read_struct(&png_ptr, nullptr, nullptr);
+    // Seems redundant with the if statement. done here to avoid leaking memory.
+    TORCH_CHECK(info_ptr, "libpng info structure allocation failed!")
+  }
+
+  auto datap = data.accessor<unsigned char, 1>().data();
+
+  if (setjmp(png_jmpbuf(png_ptr)) != 0) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+    TORCH_CHECK(false, "Internal error.");
+  }
+  auto is_png = !png_sig_cmp(datap, 0, 8);
+  TORCH_CHECK(is_png, "Content is not png!")
+
+  struct Reader {
+    png_const_bytep ptr;
+  } reader;
+  reader.ptr = png_const_bytep(datap) + 8;
+
+  auto read_callback =
+      [](png_structp png_ptr, png_bytep output, png_size_t bytes) {
+        auto reader = static_cast<Reader*>(png_get_io_ptr(png_ptr));
+        std::copy(reader->ptr, reader->ptr + bytes, output);
+        reader->ptr += bytes;
+      };
+  png_set_sig_bytes(png_ptr, 8);
+  png_set_read_fn(png_ptr, &reader, read_callback);
+  png_read_info(png_ptr, info_ptr);
+
+  png_uint_32 width, height;
+  int bit_depth, color_type;
+  auto retval = png_get_IHDR(
+      png_ptr,
+      info_ptr,
+      &width,
+      &height,
+      &bit_depth,
+      &color_type,
+      nullptr,
+      nullptr,
+      nullptr);
+
+  if (retval != 1) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+    TORCH_CHECK(retval == 1, "Could read image metadata from content.")
+  }
+  if (color_type != PNG_COLOR_TYPE_RGB) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+    TORCH_CHECK(
+        color_type == PNG_COLOR_TYPE_RGB, "Non RGB images are not supported.")
+  }
+
+  auto tensor =
+      torch::empty({int64_t(height), int64_t(width), int64_t(3)}, torch::kU8);
+  auto ptr = tensor.accessor<uint8_t, 3>().data();
+  auto bytes = png_get_rowbytes(png_ptr, info_ptr);
+  for (decltype(height) i = 0; i < height; ++i) {
+    png_read_row(png_ptr, ptr, nullptr);
+    ptr += bytes;
+  }
+  png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
+  return tensor;
+}
+#endif // PNG_FOUND
diff --git a/torchvision/csrc/cpu/image/readpng_cpu.h b/torchvision/csrc/cpu/image/readpng_cpu.h
new file mode 100644
index 00000000000..38fab84dc7c
--- /dev/null
+++ b/torchvision/csrc/cpu/image/readpng_cpu.h
@@ -0,0 +1,7 @@
+#pragma once
+
+// Comment
+#include <torch/torch.h>
+#include <string>
+
+torch::Tensor decodePNG(const torch::Tensor& data);
diff --git a/torchvision/csrc/cpu/nms_cpu.cpp b/torchvision/csrc/cpu/nms_cpu.cpp
index 14c3b8b4f16..753a9c9e362 100644
--- a/torchvision/csrc/cpu/nms_cpu.cpp
+++ b/torchvision/csrc/cpu/nms_cpu.cpp
@@ -4,7 +4,7 @@ template <typename scalar_t>
 at::Tensor nms_cpu_kernel(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
   AT_ASSERTM(!dets.is_cuda(), "dets must be a CPU tensor");
   AT_ASSERTM(!scores.is_cuda(), "scores must be a CPU tensor");
   AT_ASSERTM(
@@ -72,7 +72,26 @@ at::Tensor nms_cpu_kernel(
 at::Tensor nms_cpu(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold) {
+    const double iou_threshold) {
+  TORCH_CHECK(
+      dets.dim() == 2, "boxes should be a 2d tensor, got ", dets.dim(), "D");
+  TORCH_CHECK(
+      dets.size(1) == 4,
+      "boxes should have 4 elements in dimension 1, got ",
+      dets.size(1));
+  TORCH_CHECK(
+      scores.dim() == 1,
+      "scores should be a 1d tensor, got ",
+      scores.dim(),
+      "D");
+  TORCH_CHECK(
+      dets.size(0) == scores.size(0),
+      "boxes and scores should have same number of elements in ",
+      "dimension 0, got ",
+      dets.size(0),
+      " and ",
+      scores.size(0));
+
   auto result = at::empty({0}, dets.options());
 
   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
diff --git a/torchvision/csrc/cpu/vision_cpu.h b/torchvision/csrc/cpu/vision_cpu.h
index d81a51a59c4..6b68b356225 100644
--- a/torchvision/csrc/cpu/vision_cpu.h
+++ b/torchvision/csrc/cpu/vision_cpu.h
@@ -23,23 +23,23 @@ at::Tensor ROIPool_backward_cpu(
 at::Tensor ROIAlign_forward_cpu(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio,
     const bool aligned);
 
 at::Tensor ROIAlign_backward_cpu(
     const at::Tensor& grad,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
     const bool aligned);
 
 std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
@@ -85,7 +85,7 @@ at::Tensor PSROIAlign_backward_cpu(
 at::Tensor nms_cpu(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold);
+    const double iou_threshold);
 
 at::Tensor DeformConv2d_forward_cpu(
     const at::Tensor& input,
diff --git a/torchvision/csrc/cuda/ROIAlign_cuda.cu b/torchvision/csrc/cuda/ROIAlign_cuda.cu
index 298af06c708..84a8ba4e3bd 100644
--- a/torchvision/csrc/cuda/ROIAlign_cuda.cu
+++ b/torchvision/csrc/cuda/ROIAlign_cuda.cu
@@ -91,9 +91,13 @@ __global__ void RoIAlignForward(
     T roi_end_w = offset_rois[3] * spatial_scale - offset;
     T roi_end_h = offset_rois[4] * spatial_scale - offset;
 
-    // Force malformed ROIs to be 1x1
-    T roi_width = max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = max(roi_end_h - roi_start_h, (T)1.);
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (!aligned) {
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (T)1.);
+      roi_height = max(roi_height, (T)1.);
+    }
 
     T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
     T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
@@ -229,9 +233,13 @@ __global__ void RoIAlignBackward(
     T roi_end_w = offset_rois[3] * spatial_scale - offset;
     T roi_end_h = offset_rois[4] * spatial_scale - offset;
 
-    // Force malformed ROIs to be 1x1
-    T roi_width = max(roi_end_w - roi_start_w, (T)1.);
-    T roi_height = max(roi_end_h - roi_start_h, (T)1.);
+    T roi_width = roi_end_w - roi_start_w;
+    T roi_height = roi_end_h - roi_start_h;
+    if (!aligned) {
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (T)1.);
+      roi_height = max(roi_height, (T)1.);
+    }
 
     T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
     T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
@@ -307,10 +315,10 @@ __global__ void RoIAlignBackward(
 at::Tensor ROIAlign_forward_cuda(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio,
     const bool aligned) {
   AT_ASSERTM(input.is_cuda(), "input must be a CUDA tensor");
   AT_ASSERTM(rois.is_cuda(), "rois must be a CUDA tensor");
@@ -368,14 +376,14 @@ at::Tensor ROIAlign_forward_cuda(
 at::Tensor ROIAlign_backward_cuda(
     const at::Tensor& grad,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
     const bool aligned) {
   AT_ASSERTM(grad.is_cuda(), "grad must be a CUDA tensor");
   AT_ASSERTM(rois.is_cuda(), "rois must be a CUDA tensor");
diff --git a/torchvision/csrc/cuda/nms_cuda.cu b/torchvision/csrc/cuda/nms_cuda.cu
index 2c519c4499d..f9c39541174 100644
--- a/torchvision/csrc/cuda/nms_cuda.cu
+++ b/torchvision/csrc/cuda/nms_cuda.cu
@@ -1,6 +1,11 @@
 #include <ATen/ATen.h>
 #include <ATen/cuda/CUDAContext.h>
+
+#if defined(WITH_CUDA)
 #include <c10/cuda/CUDAGuard.h>
+#elif defined(WITH_HIP)
+#include <c10/hip/HIPGuard.h>
+#endif
 
 #include "cuda_helpers.h"
 
@@ -70,10 +75,40 @@ __global__ void nms_kernel(
 
 at::Tensor nms_cuda(const at::Tensor& dets,
     const at::Tensor& scores,
-    float iou_threshold) {
+    const double iou_threshold) {
   AT_ASSERTM(dets.is_cuda(), "dets must be a CUDA tensor");
   AT_ASSERTM(scores.is_cuda(), "scores must be a CUDA tensor");
+
+  TORCH_CHECK(
+      dets.dim() == 2, "boxes should be a 2d tensor, got ", dets.dim(), "D");
+  TORCH_CHECK(
+      dets.size(1) == 4,
+      "boxes should have 4 elements in dimension 1, got ",
+      dets.size(1));
+  TORCH_CHECK(
+      scores.dim() == 1,
+      "scores should be a 1d tensor, got ",
+      scores.dim(),
+      "D");
+  TORCH_CHECK(
+      dets.size(0) == scores.size(0),
+      "boxes and scores should have same number of elements in ",
+      "dimension 0, got ",
+      dets.size(0),
+      " and ",
+      scores.size(0))
+
+#if defined(WITH_CUDA)
   at::cuda::CUDAGuard device_guard(dets.device());
+#elif defined(WITH_HIP)
+  at::cuda::HIPGuard device_guard(dets.device());
+#else
+  AT_ERROR("Not compiled with GPU support");
+#endif
+
+  if (dets.numel() == 0) {
+    return at::empty({0}, dets.options().dtype(at::kLong));
+  }
 
   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
   auto dets_sorted = dets.index_select(0, order_t).contiguous();
diff --git a/torchvision/csrc/cuda/vision_cuda.h b/torchvision/csrc/cuda/vision_cuda.h
index 5f0ff05246b..ef53d0c08b4 100644
--- a/torchvision/csrc/cuda/vision_cuda.h
+++ b/torchvision/csrc/cuda/vision_cuda.h
@@ -1,31 +1,26 @@
 #pragma once
-#if defined(WITH_CUDA)
-#include <c10/cuda/CUDAGuard.h>
-#elif defined(WITH_HIP)
-#include <c10/hip/HIPGuard.h>
-#endif
 #include <torch/extension.h>
 
 at::Tensor ROIAlign_forward_cuda(
     const at::Tensor& input,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t sampling_ratio,
     const bool aligned);
 
 at::Tensor ROIAlign_backward_cuda(
     const at::Tensor& grad,
     const at::Tensor& rois,
-    const float spatial_scale,
-    const int pooled_height,
-    const int pooled_width,
-    const int batch_size,
-    const int channels,
-    const int height,
-    const int width,
-    const int sampling_ratio,
+    const double spatial_scale,
+    const int64_t pooled_height,
+    const int64_t pooled_width,
+    const int64_t batch_size,
+    const int64_t channels,
+    const int64_t height,
+    const int64_t width,
+    const int64_t sampling_ratio,
     const bool aligned);
 
 std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(
@@ -90,7 +85,7 @@ at::Tensor PSROIAlign_backward_cuda(
 at::Tensor nms_cuda(
     const at::Tensor& dets,
     const at::Tensor& scores,
-    const float iou_threshold);
+    const double iou_threshold);
 
 at::Tensor DeformConv2d_forward_cuda(
     const at::Tensor& input,
diff --git a/torchvision/csrc/models/modelsimpl.h b/torchvision/csrc/models/modelsimpl.h
index ddde2071db5..1dc8d06b15e 100644
--- a/torchvision/csrc/models/modelsimpl.h
+++ b/torchvision/csrc/models/modelsimpl.h
@@ -14,22 +14,22 @@ namespace modelsimpl {
 // TODO here torch::relu_ and torch::adaptive_avg_pool2d wrapped in
 // torch::nn::Fuctional don't work. so keeping these for now
 
-inline torch::Tensor& relu_(torch::Tensor x) {
-  return torch::relu_(x);
+inline torch::Tensor& relu_(const torch::Tensor& x) {
+  return x.relu_();
 }
 
-inline torch::Tensor relu6_(torch::Tensor x) {
+inline torch::Tensor& relu6_(const torch::Tensor& x) {
   return x.clamp_(0, 6);
 }
 
 inline torch::Tensor adaptive_avg_pool2d(
-    torch::Tensor x,
+    const torch::Tensor& x,
     torch::ExpandingArray<2> output_size) {
   return torch::adaptive_avg_pool2d(x, output_size);
 }
 
 inline torch::Tensor max_pool2d(
-    torch::Tensor x,
+    const torch::Tensor& x,
     torch::ExpandingArray<2> kernel_size,
     torch::ExpandingArray<2> stride) {
   return torch::max_pool2d(x, kernel_size, stride);
diff --git a/torchvision/csrc/nms.h b/torchvision/csrc/nms.h
index 3c2faba8353..6bbd3e0bc65 100644
--- a/torchvision/csrc/nms.h
+++ b/torchvision/csrc/nms.h
@@ -2,52 +2,33 @@
 #include "cpu/vision_cpu.h"
 
 #ifdef WITH_CUDA
+#include "autocast.h"
 #include "cuda/vision_cuda.h"
 #endif
 #ifdef WITH_HIP
 #include "hip/vision_cuda.h"
 #endif
 
+// nms dispatch nexus
 at::Tensor nms(
     const at::Tensor& dets,
     const at::Tensor& scores,
     const double iou_threshold) {
-  TORCH_CHECK(
-      dets.dim() == 2, "boxes should be a 2d tensor, got ", dets.dim(), "D");
-  TORCH_CHECK(
-      dets.size(1) == 4,
-      "boxes should have 4 elements in dimension 1, got ",
-      dets.size(1));
-  TORCH_CHECK(
-      scores.dim() == 1,
-      "scores should be a 1d tensor, got ",
-      scores.dim(),
-      "D");
-  TORCH_CHECK(
-      dets.size(0) == scores.size(0),
-      "boxes and scores should have same number of elements in ",
-      "dimension 0, got ",
-      dets.size(0),
-      " and ",
-      scores.size(0));
-  if (dets.is_cuda()) {
-#if defined(WITH_CUDA)
-    if (dets.numel() == 0) {
-      at::cuda::CUDAGuard device_guard(dets.device());
-      return at::empty({0}, dets.options().dtype(at::kLong));
-    }
-    return nms_cuda(dets, scores, iou_threshold);
-#elif defined(WITH_HIP)
-    if (dets.numel() == 0) {
-      at::cuda::HIPGuard device_guard(dets.device());
-      return at::empty({0}, dets.options().dtype(at::kLong));
-    }
-    return nms_cuda(dets, scores, iou_threshold);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
+  static auto op = c10::Dispatcher::singleton()
+                       .findSchemaOrThrow("torchvision::nms", "")
+                       .typed<decltype(nms)>();
+  return op.call(dets, scores, iou_threshold);
+}
 
-  at::Tensor result = nms_cpu(dets, scores, iou_threshold);
-  return result;
+#ifdef WITH_CUDA
+at::Tensor nms_autocast(
+    const at::Tensor& dets,
+    const at::Tensor& scores,
+    const double iou_threshold) {
+  c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
+  return nms(
+      autocast::_cast(at::kFloat, dets),
+      autocast::_cast(at::kFloat, scores),
+      iou_threshold);
 }
+#endif
diff --git a/torchvision/csrc/vision.cpp b/torchvision/csrc/vision.cpp
index 9debc3da9b6..aa2ec26bfef 100644
--- a/torchvision/csrc/vision.cpp
+++ b/torchvision/csrc/vision.cpp
@@ -42,14 +42,44 @@ int64_t _cuda_version() {
 #endif
 }
 
-static auto registry =
-    torch::RegisterOperators()
-        .op("torchvision::nms", &nms)
-        .op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio, bool aligned) -> Tensor",
-            &roi_align)
-        .op("torchvision::roi_pool", &roi_pool)
-        .op("torchvision::_new_empty_tensor_op", &new_empty_tensor)
-        .op("torchvision::ps_roi_align", &ps_roi_align)
-        .op("torchvision::ps_roi_pool", &ps_roi_pool)
-        .op("torchvision::deform_conv2d", &deform_conv2d)
-        .op("torchvision::_cuda_version", &_cuda_version);
+TORCH_LIBRARY(torchvision, m) {
+  m.def("nms(Tensor dets, Tensor scores, float iou_threshold) -> Tensor");
+  m.def(
+      "roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio, bool aligned) -> Tensor");
+  m.def(
+      "_roi_align_backward(Tensor grad, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int batch_size, int channels, int height, int width, int sampling_ratio, bool aligned) -> Tensor");
+  m.def("roi_pool", &roi_pool);
+  m.def("_new_empty_tensor_op", &new_empty_tensor);
+  m.def("ps_roi_align", &ps_roi_align);
+  m.def("ps_roi_pool", &ps_roi_pool);
+  m.def("deform_conv2d", &deform_conv2d);
+  m.def("_cuda_version", &_cuda_version);
+}
+
+TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
+  m.impl("roi_align", ROIAlign_forward_cpu);
+  m.impl("_roi_align_backward", ROIAlign_backward_cpu);
+  m.impl("nms", nms_cpu);
+}
+
+// TODO: Place this in a hypothetical separate torchvision_cuda library
+#if defined(WITH_CUDA) || defined(WITH_HIP)
+TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
+  m.impl("roi_align", ROIAlign_forward_cuda);
+  m.impl("_roi_align_backward", ROIAlign_backward_cuda);
+  m.impl("nms", nms_cuda);
+}
+#endif
+
+// Autocast only needs to wrap forward pass ops.
+#if defined(WITH_CUDA)
+TORCH_LIBRARY_IMPL(torchvision, Autocast, m) {
+  m.impl("roi_align", ROIAlign_autocast);
+  m.impl("nms", nms_autocast);
+}
+#endif
+
+TORCH_LIBRARY_IMPL(torchvision, Autograd, m) {
+  m.impl("roi_align", ROIAlign_autograd);
+  m.impl("_roi_align_backward", ROIAlign_backward_autograd);
+}
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 6689eef649b..442add7e81d 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -95,16 +95,9 @@ def list_dir(root, prefix=False):
             only returns the name of the directories found
     """
     root = os.path.expanduser(root)
-    directories = list(
-        filter(
-            lambda p: os.path.isdir(os.path.join(root, p)),
-            os.listdir(root)
-        )
-    )
-
+    directories = [p for p in os.listdir(root) if os.path.isdir(os.path.join(root, p))]
     if prefix is True:
         directories = [os.path.join(root, d) for d in directories]
-
     return directories
 
 
@@ -119,19 +112,16 @@ def list_files(root, suffix, prefix=False):
             only returns the name of the files found
     """
     root = os.path.expanduser(root)
-    files = list(
-        filter(
-            lambda p: os.path.isfile(os.path.join(root, p)) and p.endswith(suffix),
-            os.listdir(root)
-        )
-    )
-
+    files = [p for p in os.listdir(root) if os.path.isfile(os.path.join(root, p)) and p.endswith(suffix)]
     if prefix is True:
         files = [os.path.join(root, d) for d in files]
-
     return files
 
 
+def _quota_exceeded(response: "requests.models.Response") -> bool:
+    return "Google Drive - Quota exceeded" in response.text
+
+
 def download_file_from_google_drive(file_id, root, filename=None, md5=None):
     """Download a Google Drive file from  and place it in root.
 
@@ -164,6 +154,14 @@ def download_file_from_google_drive(file_id, root, filename=None, md5=None):
             params = {'id': file_id, 'confirm': token}
             response = session.get(url, params=params, stream=True)
 
+        if _quota_exceeded(response):
+            msg = (
+                f"The daily quota of the file {filename} is exceeded and it "
+                f"can't be downloaded. This is a limitation of Google Drive "
+                f"and can only be overcome by trying again later."
+            )
+            raise RuntimeError(msg)
+
         _save_response_content(response, fpath)
 
 
diff --git a/torchvision/datasets/video_utils.py b/torchvision/datasets/video_utils.py
index 5c9244e5450..91b858d6b91 100644
--- a/torchvision/datasets/video_utils.py
+++ b/torchvision/datasets/video_utils.py
@@ -1,6 +1,7 @@
 import bisect
 import math
 from fractions import Fraction
+from typing import List
 
 import torch
 from torchvision.io import (
@@ -45,20 +46,23 @@ def unfold(tensor, size, step, dilation=1):
     return torch.as_strided(tensor, new_size, new_stride)
 
 
-class _DummyDataset(object):
+class _VideoTimestampsDataset(object):
     """
-    Dummy dataset used for DataLoader in VideoClips.
-    Defined at top level so it can be pickled when forking.
+    Dataset used to parallelize the reading of the timestamps
+    of a list of videos, given their paths in the filesystem.
+
+    Used in VideoClips and defined at top level so it can be
+    pickled when forking.
     """
 
-    def __init__(self, x):
-        self.x = x
+    def __init__(self, video_paths: List[str]):
+        self.video_paths = video_paths
 
     def __len__(self):
-        return len(self.x)
+        return len(self.video_paths)
 
     def __getitem__(self, idx):
-        return read_video_timestamps(self.x[idx])
+        return read_video_timestamps(self.video_paths[idx])
 
 
 class VideoClips(object):
@@ -132,7 +136,7 @@ def _compute_frame_pts(self):
         import torch.utils.data
 
         dl = torch.utils.data.DataLoader(
-            _DummyDataset(self.video_paths),
+            _VideoTimestampsDataset(self.video_paths),
             batch_size=16,
             num_workers=self.num_workers,
             collate_fn=self._collate_fn,
diff --git a/torchvision/io/__init__.py b/torchvision/io/__init__.py
index cbbf560412e..4c47d8a51d5 100644
--- a/torchvision/io/__init__.py
+++ b/torchvision/io/__init__.py
@@ -30,5 +30,5 @@
     "_read_video_clip_from_memory",
     "_read_video_meta_data",
     "VideoMetaData",
-    "Timebase",
+    "Timebase"
 ]
diff --git a/torchvision/io/image.py b/torchvision/io/image.py
new file mode 100644
index 00000000000..8d5da4899ca
--- /dev/null
+++ b/torchvision/io/image.py
@@ -0,0 +1,109 @@
+import torch
+from torch import nn, Tensor
+
+import os
+import os.path as osp
+import importlib
+
+_HAS_IMAGE_OPT = False
+
+try:
+    lib_dir = osp.join(osp.dirname(__file__), "..")
+
+    loader_details = (
+        importlib.machinery.ExtensionFileLoader,
+        importlib.machinery.EXTENSION_SUFFIXES
+    )
+
+    extfinder = importlib.machinery.FileFinder(lib_dir, loader_details)
+    ext_specs = extfinder.find_spec("image")
+    if ext_specs is not None:
+        torch.ops.load_library(ext_specs.origin)
+        _HAS_IMAGE_OPT = True
+except (ImportError, OSError):
+    pass
+
+
+def decode_png(input):
+    # type: (Tensor) -> Tensor
+    """
+    Decodes a PNG image into a 3 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+
+    Arguments:
+        input (Tensor[1]): a one dimensional int8 tensor containing
+    the raw bytes of the PNG image.
+
+    Returns:
+        output (Tensor[image_width, image_height, 3])
+    """
+    if not isinstance(input, torch.Tensor) or input.numel() == 0 or input.ndim != 1:
+        raise ValueError("Expected a non empty 1-dimensional tensor.")
+
+    if not input.dtype == torch.uint8:
+        raise ValueError("Expected a torch.uint8 tensor.")
+    output = torch.ops.image.decode_png(input)
+    return output
+
+
+def read_png(path):
+    # type: (str) -> Tensor
+    """
+    Reads a PNG image into a 3 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+
+    Arguments:
+        path (str): path of the PNG image.
+
+    Returns:
+        output (Tensor[image_width, image_height, 3])
+    """
+    if not os.path.isfile(path):
+        raise ValueError("Expected a valid file path.")
+
+    size = os.path.getsize(path)
+    if size == 0:
+        raise ValueError("Expected a non empty file.")
+    data = torch.from_file(path, dtype=torch.uint8, size=size)
+    return decode_png(data)
+
+
+def decode_jpeg(input):
+    # type: (Tensor) -> Tensor
+    """
+    Decodes a JPEG image into a 3 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+    Arguments:
+        input (Tensor[1]): a one dimensional int8 tensor containing
+    the raw bytes of the JPEG image.
+    Returns:
+        output (Tensor[image_width, image_height, 3])
+    """
+    if not isinstance(input, torch.Tensor) or len(input) == 0 or input.ndim != 1:
+        raise ValueError("Expected a non empty 1-dimensional tensor.")
+
+    if not input.dtype == torch.uint8:
+        raise ValueError("Expected a torch.uint8 tensor.")
+
+    output = torch.ops.image.decode_jpeg(input)
+    return output
+
+
+def read_jpeg(path):
+    # type: (str) -> Tensor
+    """
+    Reads a JPEG image into a 3 dimensional RGB Tensor.
+    The values of the output tensor are uint8 between 0 and 255.
+    Arguments:
+        path (str): path of the JPEG image.
+    Returns:
+        output (Tensor[image_width, image_height, 3])
+    """
+    if not os.path.isfile(path):
+        raise ValueError("Expected a valid file path.")
+
+    size = os.path.getsize(path)
+    if size == 0:
+        raise ValueError("Expected a non empty file.")
+    data = torch.from_file(path, dtype=torch.uint8, size=size)
+    return decode_jpeg(data)
diff --git a/torchvision/io/video.py b/torchvision/io/video.py
index f822cbd3343..5c8529a7b5d 100644
--- a/torchvision/io/video.py
+++ b/torchvision/io/video.py
@@ -2,7 +2,7 @@
 import math
 import re
 import warnings
-from typing import Tuple, List
+from typing import List, Tuple, Union
 
 import numpy as np
 import torch
@@ -49,7 +49,7 @@ def _av_available():
 _GC_COLLECTION_INTERVAL = 10
 
 
-def write_video(filename, video_array, fps, video_codec="libx264", options=None):
+def write_video(filename, video_array, fps: Union[int, float], video_codec="libx264", options=None):
     """
     Writes a 4d tensor in [T, H, W, C] format in a video file
 
@@ -65,27 +65,28 @@ def write_video(filename, video_array, fps, video_codec="libx264", options=None)
     _check_av_available()
     video_array = torch.as_tensor(video_array, dtype=torch.uint8).numpy()
 
-    container = av.open(filename, mode="w")
-
-    stream = container.add_stream(video_codec, rate=fps)
-    stream.width = video_array.shape[2]
-    stream.height = video_array.shape[1]
-    stream.pix_fmt = "yuv420p" if video_codec != "libx264rgb" else "rgb24"
-    stream.options = options or {}
-
-    for img in video_array:
-        frame = av.VideoFrame.from_ndarray(img, format="rgb24")
-        frame.pict_type = "NONE"
-        for packet in stream.encode(frame):
+    # PyAV does not support floating point numbers with decimal point
+    # and will throw OverflowException in case this is not the case
+    if isinstance(fps, float):
+        fps = np.round(fps)
+
+    with av.open(filename, mode="w") as container:
+        stream = container.add_stream(video_codec, rate=fps)
+        stream.width = video_array.shape[2]
+        stream.height = video_array.shape[1]
+        stream.pix_fmt = "yuv420p" if video_codec != "libx264rgb" else "rgb24"
+        stream.options = options or {}
+
+        for img in video_array:
+            frame = av.VideoFrame.from_ndarray(img, format="rgb24")
+            frame.pict_type = "NONE"
+            for packet in stream.encode(frame):
+                container.mux(packet)
+
+        # Flush stream
+        for packet in stream.encode():
             container.mux(packet)
 
-    # Flush stream
-    for packet in stream.encode():
-        container.mux(packet)
-
-    # Close the file
-    container.close()
-
 
 def _read_from_stream(
     container, start_offset, end_offset, pts_unit, stream, stream_name
@@ -229,37 +230,35 @@ def read_video(filename, start_pts=0, end_pts=None, pts_unit="pts"):
     audio_frames = []
 
     try:
-        container = av.open(filename, metadata_errors="ignore")
+        with av.open(filename, metadata_errors="ignore") as container:
+            if container.streams.video:
+                video_frames = _read_from_stream(
+                    container,
+                    start_pts,
+                    end_pts,
+                    pts_unit,
+                    container.streams.video[0],
+                    {"video": 0},
+                )
+                video_fps = container.streams.video[0].average_rate
+                # guard against potentially corrupted files
+                if video_fps is not None:
+                    info["video_fps"] = float(video_fps)
+
+            if container.streams.audio:
+                audio_frames = _read_from_stream(
+                    container,
+                    start_pts,
+                    end_pts,
+                    pts_unit,
+                    container.streams.audio[0],
+                    {"audio": 0},
+                )
+                info["audio_fps"] = container.streams.audio[0].rate
+
     except av.AVError:
         # TODO raise a warning?
         pass
-    else:
-        if container.streams.video:
-            video_frames = _read_from_stream(
-                container,
-                start_pts,
-                end_pts,
-                pts_unit,
-                container.streams.video[0],
-                {"video": 0},
-            )
-            video_fps = container.streams.video[0].average_rate
-            # guard against potentially corrupted files
-            if video_fps is not None:
-                info["video_fps"] = float(video_fps)
-
-        if container.streams.audio:
-            audio_frames = _read_from_stream(
-                container,
-                start_pts,
-                end_pts,
-                pts_unit,
-                container.streams.audio[0],
-                {"audio": 0},
-            )
-            info["audio_fps"] = container.streams.audio[0].rate
-
-        container.close()
 
     vframes = [frame.to_rgb().to_ndarray() for frame in video_frames]
     aframes = [frame.to_ndarray() for frame in audio_frames]
@@ -288,6 +287,14 @@ def _can_read_timestamps_from_packets(container):
     return False
 
 
+def _decode_video_timestamps(container):
+    if _can_read_timestamps_from_packets(container):
+        # fast path
+        return [x.pts for x in container.demux(video=0) if x.pts is not None]
+    else:
+        return [x.pts for x in container.decode(video=0) if x.pts is not None]
+
+
 def read_video_timestamps(filename, pts_unit="pts"):
     """
     List the video frames timestamps.
@@ -321,26 +328,18 @@ def read_video_timestamps(filename, pts_unit="pts"):
     pts = []
 
     try:
-        container = av.open(filename, metadata_errors="ignore")
+        with av.open(filename, metadata_errors="ignore") as container:
+            if container.streams.video:
+                video_stream = container.streams.video[0]
+                video_time_base = video_stream.time_base
+                try:
+                    pts = _decode_video_timestamps(container)
+                except av.AVError:
+                    warnings.warn(f"Failed decoding frames for file {filename}")
+                video_fps = float(video_stream.average_rate)
     except av.AVError:
         # TODO add a warning
         pass
-    else:
-        if container.streams.video:
-            video_stream = container.streams.video[0]
-            video_time_base = video_stream.time_base
-            try:
-                if _can_read_timestamps_from_packets(container):
-                    # fast path
-                    pts = [x.pts for x in container.demux(video=0) if x.pts is not None]
-                else:
-                    pts = [
-                        x.pts for x in container.decode(video=0) if x.pts is not None
-                    ]
-            except av.AVError:
-                warnings.warn(f"Failed decoding frames for file {filename}")
-            video_fps = float(video_stream.average_rate)
-        container.close()
 
     pts.sort()
 
diff --git a/torchvision/models/detection/_utils.py b/torchvision/models/detection/_utils.py
index 4b65ffa4a4e..3595114f24d 100644
--- a/torchvision/models/detection/_utils.py
+++ b/torchvision/models/detection/_utils.py
@@ -75,7 +75,7 @@ def __call__(self, matched_idxs):
         return pos_idx, neg_idx
 
 
-@torch.jit.script
+@torch.jit._script_if_tracing
 def encode_boxes(reference_boxes, proposals, weights):
     # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor
     """
diff --git a/torchvision/models/detection/roi_heads.py b/torchvision/models/detection/roi_heads.py
index 19cc15a8cc0..82ba6e8b5c0 100644
--- a/torchvision/models/detection/roi_heads.py
+++ b/torchvision/models/detection/roi_heads.py
@@ -205,7 +205,7 @@ def _onnx_heatmaps_to_keypoints(maps, maps_i, roi_map_width, roi_map_height,
     return xy_preds_i, end_scores_i
 
 
-@torch.jit.script
+@torch.jit._script_if_tracing
 def _onnx_heatmaps_to_keypoints_loop(maps, rois, widths_ceil, heights_ceil,
                                      widths, heights, offset_x, offset_y, num_keypoints):
     xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
@@ -451,7 +451,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
     return im_mask
 
 
-@torch.jit.script
+@torch.jit._script_if_tracing
 def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
     res_append = torch.zeros(0, im_h, im_w)
     for i in range(masks.size(0)):
diff --git a/torchvision/models/detection/transform.py b/torchvision/models/detection/transform.py
index 5564866c571..9059c184949 100644
--- a/torchvision/models/detection/transform.py
+++ b/torchvision/models/detection/transform.py
@@ -20,7 +20,7 @@ def _resize_image_and_masks_onnx(image, self_min_size, self_max_size, target):
     scale_factor = torch.min(self_min_size / min_size, self_max_size / max_size)
 
     image = torch.nn.functional.interpolate(
-        image[None], scale_factor=scale_factor, mode='bilinear',
+        image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True,
         align_corners=False)[0]
 
     if target is None:
@@ -42,7 +42,7 @@ def _resize_image_and_masks(image, self_min_size, self_max_size, target):
     if max_size * scale_factor > self_max_size:
         scale_factor = self_max_size / max_size
     image = torch.nn.functional.interpolate(
-        image[None], scale_factor=scale_factor, mode='bilinear',
+        image[None], scale_factor=scale_factor, mode='bilinear', recompute_scale_factor=True,
         align_corners=False)[0]
 
     if target is None:
diff --git a/torchvision/ops/_utils.py b/torchvision/ops/_utils.py
index f514664042b..6c9a040ecd0 100644
--- a/torchvision/ops/_utils.py
+++ b/torchvision/ops/_utils.py
@@ -1,10 +1,9 @@
 import torch
 from torch import Tensor
-from torch.jit.annotations import List
+from torch.jit.annotations import List, Tuple
 
 
-def _cat(tensors, dim=0):
-    # type: (List[Tensor], int) -> Tensor
+def _cat(tensors: List[Tensor], dim: int = 0) -> Tensor:
     """
     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
     """
@@ -15,8 +14,7 @@ def _cat(tensors, dim=0):
     return torch.cat(tensors, dim)
 
 
-def convert_boxes_to_roi_format(boxes):
-    # type: (List[Tensor]) -> Tensor
+def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
     concat_boxes = _cat([b for b in boxes], dim=0)
     temp = []
     for i, b in enumerate(boxes):
@@ -26,7 +24,7 @@ def convert_boxes_to_roi_format(boxes):
     return rois
 
 
-def check_roi_boxes_shape(boxes):
+def check_roi_boxes_shape(boxes: Tensor):
     if isinstance(boxes, (list, tuple)):
         for _tensor in boxes:
             assert _tensor.size(1) == 4, \
diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py
index e7442f57352..6183d259212 100644
--- a/torchvision/ops/boxes.py
+++ b/torchvision/ops/boxes.py
@@ -4,9 +4,7 @@
 import torchvision
 
 
-@torch.jit.script
-def nms(boxes, scores, iou_threshold):
-    # type: (Tensor, Tensor, float) -> Tensor
+def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
     """
     Performs non-maximum suppression (NMS) on the boxes according
     to their intersection-over-union (IoU).
@@ -41,9 +39,13 @@ def nms(boxes, scores, iou_threshold):
     return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
 
 
-@torch.jit.script
-def batched_nms(boxes, scores, idxs, iou_threshold):
-    # type: (Tensor, Tensor, Tensor, float) -> Tensor
+@torch.jit._script_if_tracing
+def batched_nms(
+    boxes: Tensor,
+    scores: Tensor,
+    idxs: Tensor,
+    iou_threshold: float,
+) -> Tensor:
     """
     Performs non-maximum suppression in a batched fashion.
 
@@ -84,8 +86,7 @@ def batched_nms(boxes, scores, idxs, iou_threshold):
         return keep
 
 
-def remove_small_boxes(boxes, min_size):
-    # type: (Tensor, float) -> Tensor
+def remove_small_boxes(boxes: Tensor, min_size: float) -> Tensor:
     """
     Remove boxes which contains at least one side smaller than min_size.
 
@@ -103,8 +104,7 @@ def remove_small_boxes(boxes, min_size):
     return keep
 
 
-def clip_boxes_to_image(boxes, size):
-    # type: (Tensor, Tuple[int, int]) -> Tensor
+def clip_boxes_to_image(boxes: Tensor, size: Tuple[int, int]) -> Tensor:
     """
     Clip boxes so that they lie inside an image of size `size`.
 
@@ -133,7 +133,7 @@ def clip_boxes_to_image(boxes, size):
     return clipped_boxes.reshape(boxes.shape)
 
 
-def box_area(boxes):
+def box_area(boxes: Tensor) -> Tensor:
     """
     Computes the area of a set of bounding boxes, which are specified by its
     (x1, y1, x2, y2) coordinates.
@@ -150,7 +150,7 @@ def box_area(boxes):
 
 # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
 # with slight modifications
-def box_iou(boxes1, boxes2):
+def box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor:
     """
     Return intersection-over-union (Jaccard index) of boxes.
 
diff --git a/torchvision/ops/deform_conv.py b/torchvision/ops/deform_conv.py
index c948b164196..aa5e42c4a6e 100644
--- a/torchvision/ops/deform_conv.py
+++ b/torchvision/ops/deform_conv.py
@@ -8,8 +8,15 @@
 from torch.jit.annotations import Optional, Tuple
 
 
-def deform_conv2d(input, offset, weight, bias=None, stride=(1, 1), padding=(0, 0), dilation=(1, 1)):
-    # type: (Tensor, Tensor, Tensor, Optional[Tensor], Tuple[int, int], Tuple[int, int], Tuple[int, int]) -> Tensor
+def deform_conv2d(
+    input: Tensor,
+    offset: Tensor,
+    weight: Tensor,
+    bias: Optional[Tensor] = None,
+    stride: Tuple[int, int] = (1, 1),
+    padding: Tuple[int, int] = (0, 0),
+    dilation: Tuple[int, int] = (1, 1),
+) -> Tensor:
     """
     Performs Deformable Convolution, described in Deformable Convolutional Networks
 
@@ -80,8 +87,17 @@ class DeformConv2d(nn.Module):
     """
     See deform_conv2d
     """
-    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0,
-                 dilation=1, groups=1, bias=True):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: int,
+        stride: int = 1,
+        padding: int = 0,
+        dilation: int = 1,
+        groups: int = 1,
+        bias: bool = True,
+    ):
         super(DeformConv2d, self).__init__()
 
         if in_channels % groups != 0:
@@ -107,14 +123,14 @@ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0,
 
         self.reset_parameters()
 
-    def reset_parameters(self):
+    def reset_parameters(self) -> None:
         init.kaiming_uniform_(self.weight, a=math.sqrt(5))
         if self.bias is not None:
             fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
             bound = 1 / math.sqrt(fan_in)
             init.uniform_(self.bias, -bound, bound)
 
-    def forward(self, input, offset):
+    def forward(self, input: Tensor, offset: Tensor) -> Tensor:
         """
         Arguments:
             input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor
@@ -125,7 +141,7 @@ def forward(self, input, offset):
         return deform_conv2d(input, offset, self.weight, self.bias, stride=self.stride,
                              padding=self.padding, dilation=self.dilation)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         s = self.__class__.__name__ + '('
         s += '{in_channels}'
         s += ', {out_channels}'
diff --git a/torchvision/ops/feature_pyramid_network.py b/torchvision/ops/feature_pyramid_network.py
index a2d8c409490..979bbfb1c10 100644
--- a/torchvision/ops/feature_pyramid_network.py
+++ b/torchvision/ops/feature_pyramid_network.py
@@ -4,7 +4,31 @@
 import torch.nn.functional as F
 from torch import nn, Tensor
 
-from torch.jit.annotations import Tuple, List, Dict
+from torch.jit.annotations import Tuple, List, Dict, Optional
+
+
+class ExtraFPNBlock(nn.Module):
+    """
+    Base class for the extra block in the FPN.
+
+    Arguments:
+        results (List[Tensor]): the result of the FPN
+        x (List[Tensor]): the original feature maps
+        names (List[str]): the names for each one of the
+            original feature maps
+
+    Returns:
+        results (List[Tensor]): the extended set of results
+            of the FPN
+        names (List[str]): the extended set of names for the results
+    """
+    def forward(
+        self,
+        results: List[Tensor],
+        x: List[Tensor],
+        names: List[str],
+    ) -> Tuple[List[Tensor], List[str]]:
+        pass
 
 
 class FeaturePyramidNetwork(nn.Module):
@@ -44,7 +68,12 @@ class FeaturePyramidNetwork(nn.Module):
         >>>    ('feat3', torch.Size([1, 5, 8, 8]))]
 
     """
-    def __init__(self, in_channels_list, out_channels, extra_blocks=None):
+    def __init__(
+        self,
+        in_channels_list: List[int],
+        out_channels: int,
+        extra_blocks: Optional[ExtraFPNBlock] = None,
+    ):
         super(FeaturePyramidNetwork, self).__init__()
         self.inner_blocks = nn.ModuleList()
         self.layer_blocks = nn.ModuleList()
@@ -66,8 +95,7 @@ def __init__(self, in_channels_list, out_channels, extra_blocks=None):
             assert isinstance(extra_blocks, ExtraFPNBlock)
         self.extra_blocks = extra_blocks
 
-    def get_result_from_inner_blocks(self, x, idx):
-        # type: (Tensor, int) -> Tensor
+    def get_result_from_inner_blocks(self, x: Tensor, idx: int) -> Tensor:
         """
         This is equivalent to self.inner_blocks[idx](x),
         but torchscript doesn't support this yet
@@ -85,8 +113,7 @@ def get_result_from_inner_blocks(self, x, idx):
             i += 1
         return out
 
-    def get_result_from_layer_blocks(self, x, idx):
-        # type: (Tensor, int) -> Tensor
+    def get_result_from_layer_blocks(self, x: Tensor, idx: int) -> Tensor:
         """
         This is equivalent to self.layer_blocks[idx](x),
         but torchscript doesn't support this yet
@@ -104,8 +131,7 @@ def get_result_from_layer_blocks(self, x, idx):
             i += 1
         return out
 
-    def forward(self, x):
-        # type: (Dict[str, Tensor]) -> Dict[str, Tensor]
+    def forward(self, x: Dict[str, Tensor]) -> Dict[str, Tensor]:
         """
         Computes the FPN for a set of feature maps.
 
@@ -140,31 +166,16 @@ def forward(self, x):
         return out
 
 
-class ExtraFPNBlock(nn.Module):
-    """
-    Base class for the extra block in the FPN.
-
-    Arguments:
-        results (List[Tensor]): the result of the FPN
-        x (List[Tensor]): the original feature maps
-        names (List[str]): the names for each one of the
-            original feature maps
-
-    Returns:
-        results (List[Tensor]): the extended set of results
-            of the FPN
-        names (List[str]): the extended set of names for the results
-    """
-    def forward(self, results, x, names):
-        pass
-
-
 class LastLevelMaxPool(ExtraFPNBlock):
     """
     Applies a max_pool2d on top of the last feature map
     """
-    def forward(self, x, y, names):
-        # type: (List[Tensor], List[Tensor], List[str]) -> Tuple[List[Tensor], List[str]]
+    def forward(
+        self,
+        x: List[Tensor],
+        y: List[Tensor],
+        names: List[str],
+    ) -> Tuple[List[Tensor], List[str]]:
         names.append("pool")
         x.append(F.max_pool2d(x[-1], 1, 2, 0))
         return x, names
@@ -174,7 +185,7 @@ class LastLevelP6P7(ExtraFPNBlock):
     """
     This module is used in RetinaNet to generate extra layers, P6 and P7.
     """
-    def __init__(self, in_channels, out_channels):
+    def __init__(self, in_channels: int, out_channels: int):
         super(LastLevelP6P7, self).__init__()
         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
@@ -183,7 +194,12 @@ def __init__(self, in_channels, out_channels):
             nn.init.constant_(module.bias, 0)
         self.use_P5 = in_channels == out_channels
 
-    def forward(self, p, c, names):
+    def forward(
+        self,
+        p: List[Tensor],
+        c: List[Tensor],
+        names: List[str],
+    ) -> Tuple[List[Tensor], List[str]]:
         p5, c5 = p[-1], c[-1]
         x = p5 if self.use_P5 else c5
         p6 = self.p6(x)
diff --git a/torchvision/ops/misc.py b/torchvision/ops/misc.py
index 61fab3edd7a..17e69c506d8 100644
--- a/torchvision/ops/misc.py
+++ b/torchvision/ops/misc.py
@@ -10,6 +10,8 @@
 
 import warnings
 import torch
+from torch import Tensor, Size
+from torch.jit.annotations import List, Optional, Tuple
 
 
 class Conv2d(torch.nn.Conv2d):
@@ -46,7 +48,12 @@ class FrozenBatchNorm2d(torch.nn.Module):
     are fixed
     """
 
-    def __init__(self, num_features, eps=0., n=None):
+    def __init__(
+        self,
+        num_features: int,
+        eps: float = 0.,
+        n: Optional[int] = None,
+    ):
         # n=None for backward-compatibility
         if n is not None:
             warnings.warn("`n` argument is deprecated and has been renamed `num_features`",
@@ -59,8 +66,16 @@ def __init__(self, num_features, eps=0., n=None):
         self.register_buffer("running_mean", torch.zeros(num_features))
         self.register_buffer("running_var", torch.ones(num_features))
 
-    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
-                              missing_keys, unexpected_keys, error_msgs):
+    def _load_from_state_dict(
+        self,
+        state_dict: dict,
+        prefix: str,
+        local_metadata: dict,
+        strict: bool,
+        missing_keys: List[str],
+        unexpected_keys: List[str],
+        error_msgs: List[str],
+    ):
         num_batches_tracked_key = prefix + 'num_batches_tracked'
         if num_batches_tracked_key in state_dict:
             del state_dict[num_batches_tracked_key]
@@ -69,7 +84,7 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
             state_dict, prefix, local_metadata, strict,
             missing_keys, unexpected_keys, error_msgs)
 
-    def forward(self, x):
+    def forward(self, x: Tensor) -> Tensor:
         # move reshapes to the beginning
         # to make it fuser-friendly
         w = self.weight.reshape(1, -1, 1, 1)
@@ -80,5 +95,5 @@ def forward(self, x):
         bias = b - rm * scale
         return x * scale + bias
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return f"{self.__class__.__name__}({self.weight.shape[0]})"
diff --git a/torchvision/ops/new_empty_tensor.py b/torchvision/ops/new_empty_tensor.py
index 74455a98c4f..e964e7a7e15 100644
--- a/torchvision/ops/new_empty_tensor.py
+++ b/torchvision/ops/new_empty_tensor.py
@@ -3,8 +3,7 @@
 from torch import Tensor
 
 
-def _new_empty_tensor(x, shape):
-    # type: (Tensor, List[int]) -> Tensor
+def _new_empty_tensor(x: Tensor, shape: List[int]) -> Tensor:
     """
     Arguments:
         input (Tensor): input tensor
diff --git a/torchvision/ops/poolers.py b/torchvision/ops/poolers.py
index 06bbc86a93c..32734cff86a 100644
--- a/torchvision/ops/poolers.py
+++ b/torchvision/ops/poolers.py
@@ -15,8 +15,7 @@
 # _onnx_merge_levels() is an implementation supported by ONNX
 # that merges the levels to the right indices
 @torch.jit.unused
-def _onnx_merge_levels(levels, unmerged_results):
-    # type: (Tensor, List[Tensor]) -> Tensor
+def _onnx_merge_levels(levels: Tensor, unmerged_results: List[Tensor]) -> Tensor:
     first_result = unmerged_results[0]
     dtype, device = first_result.dtype, first_result.device
     res = torch.zeros((levels.size(0), first_result.size(1),
@@ -33,8 +32,13 @@ def _onnx_merge_levels(levels, unmerged_results):
 
 
 # TODO: (eellison) T54974082 https://github.com/pytorch/pytorch/issues/26744/pytorch/issues/26744
-def initLevelMapper(k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
-    # type: (int, int, int, int, float) -> LevelMapper
+def initLevelMapper(
+    k_min: int,
+    k_max: int,
+    canonical_scale: int = 224,
+    canonical_level: int = 4,
+    eps: float = 1e-6,
+):
     return LevelMapper(k_min, k_max, canonical_scale, canonical_level, eps)
 
 
@@ -50,16 +54,21 @@ class LevelMapper(object):
         eps (float)
     """
 
-    def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
-        # type: (int, int, int, int, float) -> None
+    def __init__(
+        self,
+        k_min: int,
+        k_max: int,
+        canonical_scale: int = 224,
+        canonical_level: int = 4,
+        eps: float = 1e-6,
+    ):
         self.k_min = k_min
         self.k_max = k_max
         self.s0 = canonical_scale
         self.lvl0 = canonical_level
         self.eps = eps
 
-    def __call__(self, boxlists):
-        # type: (List[Tensor]) -> Tensor
+    def __call__(self, boxlists: List[Tensor]) -> Tensor:
         """
         Arguments:
             boxlists (list[BoxList])
@@ -107,7 +116,12 @@ class MultiScaleRoIAlign(nn.Module):
         'map_levels': Optional[LevelMapper]
     }
 
-    def __init__(self, featmap_names, output_size, sampling_ratio):
+    def __init__(
+        self,
+        featmap_names: List[str],
+        output_size: List[int],
+        sampling_ratio: int,
+    ):
         super(MultiScaleRoIAlign, self).__init__()
         if isinstance(output_size, int):
             output_size = (output_size, output_size)
@@ -117,8 +131,7 @@ def __init__(self, featmap_names, output_size, sampling_ratio):
         self.scales = None
         self.map_levels = None
 
-    def convert_to_roi_format(self, boxes):
-        # type: (List[Tensor]) -> Tensor
+    def convert_to_roi_format(self, boxes: List[Tensor]) -> Tensor:
         concat_boxes = torch.cat(boxes, dim=0)
         device, dtype = concat_boxes.device, concat_boxes.dtype
         ids = torch.cat(
@@ -131,8 +144,7 @@ def convert_to_roi_format(self, boxes):
         rois = torch.cat([ids, concat_boxes], dim=1)
         return rois
 
-    def infer_scale(self, feature, original_size):
-        # type: (Tensor, List[int]) -> float
+    def infer_scale(self, feature: Tensor, original_size: List[int]) -> float:
         # assumption: the scale is of the form 2 ** (-k), with k integer
         size = feature.shape[-2:]
         possible_scales = torch.jit.annotate(List[float], [])
@@ -143,8 +155,11 @@ def infer_scale(self, feature, original_size):
         assert possible_scales[0] == possible_scales[1]
         return possible_scales[0]
 
-    def setup_scales(self, features, image_shapes):
-        # type: (List[Tensor], List[Tuple[int, int]]) -> None
+    def setup_scales(
+        self,
+        features: List[Tensor],
+        image_shapes: List[Tuple[int, int]],
+    ) -> None:
         assert len(image_shapes) != 0
         max_x = 0
         max_y = 0
@@ -161,8 +176,12 @@ def setup_scales(self, features, image_shapes):
         self.scales = scales
         self.map_levels = initLevelMapper(int(lvl_min), int(lvl_max))
 
-    def forward(self, x, boxes, image_shapes):
-        # type: (Dict[str, Tensor], List[Tensor], List[Tuple[int, int]]) -> Tensor
+    def forward(
+        self,
+        x: Dict[str, Tensor],
+        boxes: List[Tensor],
+        image_shapes: List[Tuple[int, int]],
+    ) -> Tensor:
         """
         Arguments:
             x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have
@@ -224,7 +243,14 @@ def forward(self, x, boxes, image_shapes):
             if torchvision._is_tracing():
                 tracing_results.append(result_idx_in_level.to(dtype))
             else:
-                result[idx_in_level] = result_idx_in_level
+                # result and result_idx_in_level's dtypes are based on dtypes of different
+                # elements in x_filtered.  x_filtered contains tensors output by different
+                # layers.  When autocast is active, it may choose different dtypes for
+                # different layers' outputs.  Therefore, we defensively match result's dtype
+                # before copying elements from result_idx_in_level in the following op.
+                # We need to cast manually (can't rely on autocast to cast for us) because
+                # the op acts on result in-place, and autocast only affects out-of-place ops.
+                result[idx_in_level] = result_idx_in_level.to(result.dtype)
 
         if torchvision._is_tracing():
             result = _onnx_merge_levels(levels, tracing_results)
diff --git a/torchvision/ops/ps_roi_align.py b/torchvision/ops/ps_roi_align.py
index c0c761b72cc..49ee0c21fac 100644
--- a/torchvision/ops/ps_roi_align.py
+++ b/torchvision/ops/ps_roi_align.py
@@ -2,13 +2,18 @@
 from torch import nn, Tensor
 
 from torch.nn.modules.utils import _pair
-from torch.jit.annotations import List
+from torch.jit.annotations import List, Tuple
 
 from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
 
 
-def ps_roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
-    # type: (Tensor, Tensor, int, float, int) -> Tensor
+def ps_roi_align(
+    input: Tensor,
+    boxes: Tensor,
+    output_size: int,
+    spatial_scale: float = 1.0,
+    sampling_ratio: int = -1,
+) -> Tensor:
     """
     Performs Position-Sensitive Region of Interest (RoI) Align operator
     mentioned in Light-Head R-CNN.
@@ -49,17 +54,22 @@ class PSRoIAlign(nn.Module):
     """
     See ps_roi_align
     """
-    def __init__(self, output_size, spatial_scale, sampling_ratio):
+    def __init__(
+        self,
+        output_size: int,
+        spatial_scale: float,
+        sampling_ratio: int,
+    ):
         super(PSRoIAlign, self).__init__()
         self.output_size = output_size
         self.spatial_scale = spatial_scale
         self.sampling_ratio = sampling_ratio
 
-    def forward(self, input, rois):
+    def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_align(input, rois, self.output_size, self.spatial_scale,
                             self.sampling_ratio)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         tmpstr = self.__class__.__name__ + '('
         tmpstr += 'output_size=' + str(self.output_size)
         tmpstr += ', spatial_scale=' + str(self.spatial_scale)
diff --git a/torchvision/ops/ps_roi_pool.py b/torchvision/ops/ps_roi_pool.py
index 710f2cb0195..58c8aa2742a 100644
--- a/torchvision/ops/ps_roi_pool.py
+++ b/torchvision/ops/ps_roi_pool.py
@@ -2,13 +2,17 @@
 from torch import nn, Tensor
 
 from torch.nn.modules.utils import _pair
-from torch.jit.annotations import List
+from torch.jit.annotations import List, Tuple
 
 from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
 
 
-def ps_roi_pool(input, boxes, output_size, spatial_scale=1.0):
-    # type: (Tensor, Tensor, int, float) -> Tensor
+def ps_roi_pool(
+    input: Tensor,
+    boxes: Tensor,
+    output_size: int,
+    spatial_scale: float = 1.0,
+) -> Tensor:
     """
     Performs Position-Sensitive Region of Interest (RoI) Pool operator
     described in R-FCN
@@ -43,15 +47,15 @@ class PSRoIPool(nn.Module):
     """
     See ps_roi_pool
     """
-    def __init__(self, output_size, spatial_scale):
+    def __init__(self, output_size: int, spatial_scale: float):
         super(PSRoIPool, self).__init__()
         self.output_size = output_size
         self.spatial_scale = spatial_scale
 
-    def forward(self, input, rois):
+    def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return ps_roi_pool(input, rois, self.output_size, self.spatial_scale)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         tmpstr = self.__class__.__name__ + '('
         tmpstr += 'output_size=' + str(self.output_size)
         tmpstr += ', spatial_scale=' + str(self.spatial_scale)
diff --git a/torchvision/ops/roi_align.py b/torchvision/ops/roi_align.py
index 14224d8a83e..444f0d7addb 100644
--- a/torchvision/ops/roi_align.py
+++ b/torchvision/ops/roi_align.py
@@ -7,8 +7,14 @@
 from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
 
 
-def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1, aligned=False):
-    # type: (Tensor, Tensor, BroadcastingList2[int], float, int, bool) -> Tensor
+def roi_align(
+    input: Tensor,
+    boxes: Tensor,
+    output_size: BroadcastingList2[int],
+    spatial_scale: float = 1.0,
+    sampling_ratio: int = -1,
+    aligned: bool = False,
+) -> Tensor:
     """
     Performs Region of Interest (RoI) Align operator described in Mask R-CNN
 
@@ -49,17 +55,23 @@ class RoIAlign(nn.Module):
     """
     See roi_align
     """
-    def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=False):
+    def __init__(
+        self,
+        output_size: BroadcastingList2[int],
+        spatial_scale: float,
+        sampling_ratio: int,
+        aligned: bool = False,
+    ):
         super(RoIAlign, self).__init__()
         self.output_size = output_size
         self.spatial_scale = spatial_scale
         self.sampling_ratio = sampling_ratio
         self.aligned = aligned
 
-    def forward(self, input, rois):
+    def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_align(input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         tmpstr = self.__class__.__name__ + '('
         tmpstr += 'output_size=' + str(self.output_size)
         tmpstr += ', spatial_scale=' + str(self.spatial_scale)
diff --git a/torchvision/ops/roi_pool.py b/torchvision/ops/roi_pool.py
index 10232f16b4a..5a71e90d7e7 100644
--- a/torchvision/ops/roi_pool.py
+++ b/torchvision/ops/roi_pool.py
@@ -7,8 +7,12 @@
 from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
 
 
-def roi_pool(input, boxes, output_size, spatial_scale=1.0):
-    # type: (Tensor, Tensor, BroadcastingList2[int], float) -> Tensor
+def roi_pool(
+    input: Tensor,
+    boxes: Tensor,
+    output_size: BroadcastingList2[int],
+    spatial_scale: float = 1.0,
+) -> Tensor:
     """
     Performs Region of Interest (RoI) Pool operator described in Fast R-CNN
 
@@ -41,15 +45,15 @@ class RoIPool(nn.Module):
     """
     See roi_pool
     """
-    def __init__(self, output_size, spatial_scale):
+    def __init__(self, output_size: BroadcastingList2[int], spatial_scale: float):
         super(RoIPool, self).__init__()
         self.output_size = output_size
         self.spatial_scale = spatial_scale
 
-    def forward(self, input, rois):
+    def forward(self, input: Tensor, rois: Tensor) -> Tensor:
         return roi_pool(input, rois, self.output_size, self.spatial_scale)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         tmpstr = self.__class__.__name__ + '('
         tmpstr += 'output_size=' + str(self.output_size)
         tmpstr += ', spatial_scale=' + str(self.spatial_scale)
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index 5d8549ea883..f3d1f96089f 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -1,33 +1,44 @@
+import math
+import numbers
+import warnings
+from typing import Any, Optional
+
+import numpy as np
+from PIL import Image
+
 import torch
 from torch import Tensor
-import math
-from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
+from torch.jit.annotations import List, Tuple
+
 try:
     import accimage
 except ImportError:
     accimage = None
-import numpy as np
-from numpy import sin, cos, tan
-import numbers
-from collections.abc import Sequence, Iterable
-import warnings
 
 from . import functional_pil as F_pil
 from . import functional_tensor as F_t
 
 
-def _is_pil_image(img):
-    if accimage is not None:
-        return isinstance(img, (Image.Image, accimage.Image))
-    else:
-        return isinstance(img, Image.Image)
+_is_pil_image = F_pil._is_pil_image
+_parse_fill = F_pil._parse_fill
+
+
+def _get_image_size(img: Tensor) -> List[int]:
+    """Returns image sizea as (w, h)
+    """
+    if isinstance(img, torch.Tensor):
+        return F_t._get_image_size(img)
+
+    return F_pil._get_image_size(img)
 
 
-def _is_numpy(img):
+@torch.jit.unused
+def _is_numpy(img: Any) -> bool:
     return isinstance(img, np.ndarray)
 
 
-def _is_numpy_image(img):
+@torch.jit.unused
+def _is_numpy_image(img: Any) -> bool:
     return img.ndim in {2, 3}
 
 
@@ -42,7 +53,7 @@ def to_tensor(pic):
     Returns:
         Tensor: Converted image.
     """
-    if not(_is_pil_image(pic) or _is_numpy(pic)):
+    if not(F_pil._is_pil_image(pic) or _is_numpy(pic)):
         raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
 
     if _is_numpy(pic) and not _is_numpy_image(pic):
@@ -97,7 +108,7 @@ def pil_to_tensor(pic):
     Returns:
         Tensor: Converted image.
     """
-    if not(_is_pil_image(pic)):
+    if not(F_pil._is_pil_image(pic)):
         raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
 
     if accimage is not None and isinstance(pic, accimage.Image):
@@ -149,8 +160,14 @@ def convert_image_dtype(image: torch.Tensor, dtype: torch.dtype = torch.float) -
             msg = f"The cast from {image.dtype} to {dtype} cannot be performed safely."
             raise RuntimeError(msg)
 
+        # https://github.com/pytorch/vision/pull/2078#issuecomment-612045321
+        # For data in the range 0-1, (float * 255).to(uint) is only 255
+        # when float is exactly 1.0.
+        # `max + 1 - epsilon` provides more evenly distributed mapping of
+        # ranges of floats to ints.
         eps = 1e-3
-        return image.mul(torch.iinfo(dtype).max + 1 - eps).to(dtype)
+        result = image.mul(torch.iinfo(dtype).max + 1 - eps)
+        return result.to(dtype)
     else:
         # int to float
         if dtype.is_floating_point:
@@ -299,41 +316,31 @@ def normalize(tensor, mean, std, inplace=False):
     return tensor
 
 
-def resize(img, size, interpolation=Image.BILINEAR):
-    r"""Resize the input PIL Image to the given size.
+def resize(img: Tensor, size: List[int], interpolation: int = Image.BILINEAR) -> Tensor:
+    r"""Resize the input image to the given size.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
-        img (PIL Image): Image to be resized.
+        img (PIL Image or Tensor): Image to be resized.
         size (sequence or int): Desired output size. If size is a sequence like
             (h, w), the output size will be matched to this. If size is an int,
             the smaller edge of the image will be matched to this number maintaining
             the aspect ratio. i.e, if height > width, then image will be rescaled to
-            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`
-        interpolation (int, optional): Desired interpolation. Default is
-            ``PIL.Image.BILINEAR``
+            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
+            In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[size, ]``.
+        interpolation (int, optional): Desired interpolation enum defined by `filters`_.
+            Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR``
+            and ``PIL.Image.BICUBIC`` are supported.
 
     Returns:
-        PIL Image: Resized image.
+        PIL Image or Tensor: Resized image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
-    if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
-        raise TypeError('Got inappropriate size arg: {}'.format(size))
+    if not isinstance(img, torch.Tensor):
+        return F_pil.resize(img, size=size, interpolation=interpolation)
 
-    if isinstance(size, int):
-        w, h = img.size
-        if (w <= h and w == size) or (h <= w and h == size):
-            return img
-        if w < h:
-            ow = size
-            oh = int(size * h / w)
-            return img.resize((ow, oh), interpolation)
-        else:
-            oh = size
-            ow = int(size * w / h)
-            return img.resize((ow, oh), interpolation)
-    else:
-        return img.resize(size[::-1], interpolation)
+    return F_t.resize(img, size=size, interpolation=interpolation)
 
 
 def scale(*args, **kwargs):
@@ -342,20 +349,24 @@ def scale(*args, **kwargs):
     return resize(*args, **kwargs)
 
 
-def pad(img, padding, fill=0, padding_mode='constant'):
-    r"""Pad the given PIL Image on all sides with specified padding mode and fill value.
+def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
+    r"""Pad the given image on all sides with the given "pad" value.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
-        img (PIL Image): Image to be padded.
-        padding (int or tuple): Padding on each border. If a single int is provided this
+        img (PIL Image or Tensor): Image to be padded.
+        padding (int or tuple or list): Padding on each border. If a single int is provided this
             is used to pad all borders. If tuple of length 2 is provided this is the padding
             on left/right and top/bottom respectively. If a tuple of length 4 is provided
-            this is the padding for the left, top, right and bottom borders
-            respectively.
-        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+            this is the padding for the left, top, right and bottom borders respectively.
+            In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[padding, ]``.
+        fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
             length 3, it is used to fill R, G, B channels respectively.
-            This value is only used when the padding_mode is constant
+            This value is only used when the padding_mode is constant. Only int value is supported for Tensors.
         padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+            Mode symmetric is not yet supported for Tensor inputs.
 
             - constant: pads with a constant value, this value is specified with fill
 
@@ -372,142 +383,107 @@ def pad(img, padding, fill=0, padding_mode='constant'):
                          will result in [2, 1, 1, 2, 3, 4, 4, 3]
 
     Returns:
-        PIL Image: Padded image.
+        PIL Image or Tensor: Padded image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
-
-    if not isinstance(padding, (numbers.Number, tuple)):
-        raise TypeError('Got inappropriate padding arg')
-    if not isinstance(fill, (numbers.Number, str, tuple)):
-        raise TypeError('Got inappropriate fill arg')
-    if not isinstance(padding_mode, str):
-        raise TypeError('Got inappropriate padding_mode arg')
-
-    if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
-        raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
-                         "{} element tuple".format(len(padding)))
-
-    assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \
-        'Padding mode should be either constant, edge, reflect or symmetric'
-
-    if padding_mode == 'constant':
-        if isinstance(fill, numbers.Number):
-            fill = (fill,) * len(img.getbands())
-        if len(fill) != len(img.getbands()):
-            raise ValueError('fill should have the same number of elements '
-                             'as the number of channels in the image '
-                             '({}), got {} instead'.format(len(img.getbands()), len(fill)))
-        if img.mode == 'P':
-            palette = img.getpalette()
-            image = ImageOps.expand(img, border=padding, fill=fill)
-            image.putpalette(palette)
-            return image
-
-        return ImageOps.expand(img, border=padding, fill=fill)
-    else:
-        if isinstance(padding, int):
-            pad_left = pad_right = pad_top = pad_bottom = padding
-        if isinstance(padding, Sequence) and len(padding) == 2:
-            pad_left = pad_right = padding[0]
-            pad_top = pad_bottom = padding[1]
-        if isinstance(padding, Sequence) and len(padding) == 4:
-            pad_left = padding[0]
-            pad_top = padding[1]
-            pad_right = padding[2]
-            pad_bottom = padding[3]
-
-        if img.mode == 'P':
-            palette = img.getpalette()
-            img = np.asarray(img)
-            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
-            img = Image.fromarray(img)
-            img.putpalette(palette)
-            return img
-
-        img = np.asarray(img)
-        # RGB image
-        if len(img.shape) == 3:
-            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode)
-        # Grayscale image
-        if len(img.shape) == 2:
-            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+    if not isinstance(img, torch.Tensor):
+        return F_pil.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
 
-        return Image.fromarray(img)
+    return F_t.pad(img, padding=padding, fill=fill, padding_mode=padding_mode)
 
 
-def crop(img, top, left, height, width):
-    """Crop the given PIL Image.
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
+    """Crop the given image at specified location and output size.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     Args:
-        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
         width (int): Width of the crop box.
 
     Returns:
-        PIL Image: Cropped image.
+        PIL Image or Tensor: Cropped image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
-    return img.crop((left, top, left + width, top + height))
+    if not isinstance(img, torch.Tensor):
+        return F_pil.crop(img, top, left, height, width)
+
+    return F_t.crop(img, top, left, height, width)
 
 
-def center_crop(img, output_size):
-    """Crop the given PIL Image and resize it to desired size.
+def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
+    """Crops the given image at the center.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
-        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
-        output_size (sequence or int): (height, width) of the crop box. If int,
-            it is used for both directions
+        img (PIL Image or Tensor): Image to be cropped.
+        output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int
+            it is used for both directions.
+
     Returns:
-        PIL Image: Cropped image.
+        PIL Image or Tensor: Cropped image.
     """
     if isinstance(output_size, numbers.Number):
         output_size = (int(output_size), int(output_size))
-    image_width, image_height = img.size
+    elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
+        output_size = (output_size[0], output_size[0])
+
+    image_width, image_height = _get_image_size(img)
     crop_height, crop_width = output_size
-    crop_top = int(round((image_height - crop_height) / 2.))
-    crop_left = int(round((image_width - crop_width) / 2.))
+
+    # crop_top = int(round((image_height - crop_height) / 2.))
+    # Result can be different between python func and scripted func
+    # Temporary workaround:
+    crop_top = int((image_height - crop_height + 1) * 0.5)
+    # crop_left = int(round((image_width - crop_width) / 2.))
+    # Result can be different between python func and scripted func
+    # Temporary workaround:
+    crop_left = int((image_width - crop_width + 1) * 0.5)
     return crop(img, crop_top, crop_left, crop_height, crop_width)
 
 
-def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINEAR):
-    """Crop the given PIL Image and resize it to desired size.
+def resized_crop(
+        img: Tensor, top: int, left: int, height: int, width: int, size: List[int], interpolation: int = Image.BILINEAR
+) -> Tensor:
+    """Crop the given image and resize it to desired size.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Notably used in :class:`~torchvision.transforms.RandomResizedCrop`.
 
     Args:
-        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
         width (int): Width of the crop box.
         size (sequence or int): Desired output size. Same semantics as ``resize``.
-        interpolation (int, optional): Desired interpolation. Default is
-            ``PIL.Image.BILINEAR``.
+        interpolation (int, optional): Desired interpolation enum defined by `filters`_.
+            Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR``
+            and ``PIL.Image.BICUBIC`` are supported.
     Returns:
-        PIL Image: Cropped image.
+        PIL Image or Tensor: Cropped image.
     """
-    assert _is_pil_image(img), 'img should be PIL Image'
     img = crop(img, top, left, height, width)
     img = resize(img, size, interpolation)
     return img
 
 
 def hflip(img: Tensor) -> Tensor:
-    """Horizontally flip the given PIL Image or torch Tensor.
+    """Horizontally flip the given PIL Image or Tensor.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be flipped. If img
+        img (PIL Image or Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
             where ... means it can have an arbitrary number of trailing
             dimensions.
 
     Returns:
-        PIL Image:  Horizontally flipped image.
+        PIL Image or Tensor:  Horizontally flipped image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.hflip(img)
@@ -515,43 +491,6 @@ def hflip(img: Tensor) -> Tensor:
     return F_t.hflip(img)
 
 
-def _parse_fill(fill, img, min_pil_version):
-    """Helper function to get the fill color for rotate and perspective transforms.
-
-    Args:
-        fill (n-tuple or int or float): Pixel fill value for area outside the transformed
-            image. If int or float, the value is used for all bands respectively.
-            Defaults to 0 for all bands.
-        img (PIL Image): Image to be filled.
-        min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option
-            was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0)
-
-    Returns:
-        dict: kwarg for ``fillcolor``
-    """
-    major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2])
-    major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2])
-    if major_found < major_required or (major_found == major_required and minor_found < minor_required):
-        if fill is None:
-            return {}
-        else:
-            msg = ("The option to fill background area of the transformed image, "
-                   "requires pillow>={}")
-            raise RuntimeError(msg.format(min_pil_version))
-
-    num_bands = len(img.getbands())
-    if fill is None:
-        fill = 0
-    if isinstance(fill, (int, float)) and num_bands > 1:
-        fill = tuple([fill] * num_bands)
-    if not isinstance(fill, (int, float)) and len(fill) != num_bands:
-        msg = ("The number of elements in 'fill' does not match the number of "
-               "bands of the image ({} != {})")
-        raise ValueError(msg.format(len(fill), num_bands))
-
-    return {"fillcolor": fill}
-
-
 def _get_perspective_coeffs(startpoints, endpoints):
     """Helper function to get the coefficients (a, b, c, d, e, f, g, h) for the perspective transforms.
 
@@ -560,8 +499,7 @@ def _get_perspective_coeffs(startpoints, endpoints):
 
     Args:
         List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
-        List containing [top-left, top-right, bottom-right, bottom-left] of the transformed
-                   image
+        List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image
     Returns:
         octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
     """
@@ -593,7 +531,7 @@ def perspective(img, startpoints, endpoints, interpolation=Image.BICUBIC, fill=N
         PIL Image:  Perspectively transformed Image.
     """
 
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     opts = _parse_fill(fill, img, '5.0.0')
@@ -606,7 +544,7 @@ def vflip(img: Tensor) -> Tensor:
     """Vertically flip the given PIL Image or torch Tensor.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be flipped. If img
+        img (PIL Image or Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
             where ... means it can have an arbitrary number of trailing
             dimensions.
@@ -620,17 +558,20 @@ def vflip(img: Tensor) -> Tensor:
     return F_t.vflip(img)
 
 
-def five_crop(img, size):
-    """Crop the given PIL Image into four corners and the central crop.
+def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """Crop the given image into four corners and the central crop.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     .. Note::
         This transform returns a tuple of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
-           int instead of sequence like (h, w), a square crop (size, size) is
-           made.
+        img (PIL Image or Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
 
     Returns:
        tuple: tuple (tl, tr, bl, br, center)
@@ -638,37 +579,44 @@ def five_crop(img, size):
     """
     if isinstance(size, numbers.Number):
         size = (int(size), int(size))
-    else:
-        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
 
-    image_width, image_height = img.size
+    image_width, image_height = _get_image_size(img)
     crop_height, crop_width = size
     if crop_width > image_width or crop_height > image_height:
         msg = "Requested crop size {} is bigger than input size {}"
         raise ValueError(msg.format(size, (image_height, image_width)))
 
-    tl = img.crop((0, 0, crop_width, crop_height))
-    tr = img.crop((image_width - crop_width, 0, image_width, crop_height))
-    bl = img.crop((0, image_height - crop_height, crop_width, image_height))
-    br = img.crop((image_width - crop_width, image_height - crop_height,
-                   image_width, image_height))
-    center = center_crop(img, (crop_height, crop_width))
-    return (tl, tr, bl, br, center)
+    tl = crop(img, 0, 0, crop_height, crop_width)
+    tr = crop(img, 0, image_width - crop_width, crop_height, crop_width)
+    bl = crop(img, image_height - crop_height, 0, crop_height, crop_width)
+    br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width)
 
+    center = center_crop(img, [crop_height, crop_width])
 
-def ten_crop(img, size, vertical_flip=False):
-    """Generate ten cropped images from the given PIL Image.
-    Crop the given PIL Image into four corners and the central crop plus the
+    return tl, tr, bl, br, center
+
+
+def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]:
+    """Generate ten cropped images from the given image.
+    Crop the given image into four corners and the central crop plus the
     flipped version of these (horizontal flipping is used by default).
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     .. Note::
         This transform returns a tuple of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
+        img (PIL Image or Tensor): Image to be cropped.
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Returns:
@@ -678,8 +626,11 @@ def ten_crop(img, size, vertical_flip=False):
     """
     if isinstance(size, numbers.Number):
         size = (int(size), int(size))
-    else:
-        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
 
     first_five = five_crop(img, size)
 
@@ -696,13 +647,13 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
     """Adjust brightness of an Image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         brightness_factor (float):  How much to adjust the brightness. Can be
             any non negative number. 0 gives a black image, 1 gives the
             original image while 2 increases the brightness by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Brightness adjusted image.
+        PIL Image or Tensor: Brightness adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_brightness(img, brightness_factor)
@@ -714,13 +665,13 @@ def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
     """Adjust contrast of an Image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         contrast_factor (float): How much to adjust the contrast. Can be any
             non negative number. 0 gives a solid gray image, 1 gives the
             original image while 2 increases the contrast by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Contrast adjusted image.
+        PIL Image or Tensor: Contrast adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_contrast(img, contrast_factor)
@@ -732,13 +683,13 @@ def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
     """Adjust color saturation of an image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         saturation_factor (float):  How much to adjust the saturation. 0 will
             give a black and white image, 1 will give the original image while
             2 will enhance the saturation by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Saturation adjusted image.
+        PIL Image or Tensor: Saturation adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_saturation(img, saturation_factor)
@@ -777,7 +728,7 @@ def adjust_hue(img: Tensor, hue_factor: float) -> Tensor:
     raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
 
-def adjust_gamma(img, gamma, gain=1):
+def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
     r"""Perform gamma correction on an image.
 
     Also known as Power Law Transform. Intensities in RGB mode are adjusted
@@ -791,26 +742,18 @@ def adjust_gamma(img, gamma, gain=1):
     .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
 
     Args:
-        img (PIL Image): PIL Image to be adjusted.
+        img (PIL Image or Tensor): PIL Image to be adjusted.
         gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
             gamma larger than 1 make the shadows darker,
             while gamma smaller than 1 make dark regions lighter.
         gain (float): The constant multiplier.
+    Returns:
+        PIL Image or Tensor: Gamma correction adjusted image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
-
-    if gamma < 0:
-        raise ValueError('Gamma should be a non-negative real number')
-
-    input_mode = img.mode
-    img = img.convert('RGB')
-
-    gamma_map = [255 * gain * pow(ele / 255., gamma) for ele in range(256)] * 3
-    img = img.point(gamma_map)  # use PIL's point-function to accelerate this part
+    if not isinstance(img, torch.Tensor):
+        return F_pil.adjust_gamma(img, gamma, gain)
 
-    img = img.convert(input_mode)
-    return img
+    return F_t.adjust_gamma(img, gamma, gain)
 
 
 def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
@@ -837,7 +780,7 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
     .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     opts = _parse_fill(fill, img, '5.2.0')
@@ -845,7 +788,9 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
     return img.rotate(angle, resample, expand, center, **opts)
 
 
-def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
+def _get_inverse_affine_matrix(
+        center: List[int], angle: float, translate: List[float], scale: float, shear: List[float]
+) -> List[float]:
     # Helper method to compute inverse matrix for affine transformation
 
     # As it is explained in PIL.Image.rotate
@@ -865,14 +810,6 @@ def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
     #
     # Thus, the inverse is M^-1 = C * RSS^-1 * C^-1 * T^-1
 
-    if isinstance(shear, numbers.Number):
-        shear = [shear, 0]
-
-    if not isinstance(shear, (tuple, list)) and len(shear) == 2:
-        raise ValueError(
-            "Shear should be a single value or a tuple/list containing " +
-            "two values. Got {}".format(shear))
-
     rot = math.radians(angle)
     sx, sy = [math.radians(s) for s in shear]
 
@@ -880,57 +817,100 @@ def _get_inverse_affine_matrix(center, angle, translate, scale, shear):
     tx, ty = translate
 
     # RSS without scaling
-    a = cos(rot - sy) / cos(sy)
-    b = -cos(rot - sy) * tan(sx) / cos(sy) - sin(rot)
-    c = sin(rot - sy) / cos(sy)
-    d = -sin(rot - sy) * tan(sx) / cos(sy) + cos(rot)
+    a = math.cos(rot - sy) / math.cos(sy)
+    b = -math.cos(rot - sy) * math.tan(sx) / math.cos(sy) - math.sin(rot)
+    c = math.sin(rot - sy) / math.cos(sy)
+    d = -math.sin(rot - sy) * math.tan(sx) / math.cos(sy) + math.cos(rot)
 
     # Inverted rotation matrix with scale and shear
     # det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
-    M = [d, -b, 0,
-         -c, a, 0]
-    M = [x / scale for x in M]
+    matrix = [d, -b, 0.0, -c, a, 0.0]
+    matrix = [x / scale for x in matrix]
 
     # Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
-    M[2] += M[0] * (-cx - tx) + M[1] * (-cy - ty)
-    M[5] += M[3] * (-cx - tx) + M[4] * (-cy - ty)
+    matrix[2] += matrix[0] * (-cx - tx) + matrix[1] * (-cy - ty)
+    matrix[5] += matrix[3] * (-cx - tx) + matrix[4] * (-cy - ty)
 
     # Apply center translation: C * RSS^-1 * C^-1 * T^-1
-    M[2] += cx
-    M[5] += cy
-    return M
+    matrix[2] += cx
+    matrix[5] += cy
 
+    return matrix
 
-def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):
-    """Apply affine transformation on the image keeping image center invariant
+
+def affine(
+        img: Tensor, angle: float, translate: List[int], scale: float, shear: List[float],
+        resample: int = 0, fillcolor: Optional[int] = None
+) -> Tensor:
+    """Apply affine transformation on the image keeping image center invariant.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
 
     Args:
-        img (PIL Image): PIL Image to be rotated.
+        img (PIL Image or Tensor): image to be rotated.
         angle (float or int): rotation angle in degrees between -180 and 180, clockwise direction.
         translate (list or tuple of integers): horizontal and vertical translations (post-rotation translation)
         scale (float): overall scale
         shear (float or tuple or list): shear angle value in degrees between -180 to 180, clockwise direction.
-        If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while
-        the second value corresponds to a shear parallel to the y axis.
+            If a tuple of list is specified, the first value corresponds to a shear parallel to the x axis, while
+            the second value corresponds to a shear parallel to the y axis.
         resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
-            An optional resampling filter.
-            See `filters`_ for more information.
-            If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
+            An optional resampling filter. See `filters`_ for more information.
+            If omitted, or if the image is PIL Image and has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
+            If input is Tensor, only ``PIL.Image.NEAREST`` and ``PIL.Image.BILINEAR`` are supported.
         fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
+
+    Returns:
+        PIL Image or Tensor: Transformed image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    if not isinstance(angle, (int, float)):
+        raise TypeError("Argument angle should be int or float")
+
+    if not isinstance(translate, (list, tuple)):
+        raise TypeError("Argument translate should be a sequence")
+
+    if len(translate) != 2:
+        raise ValueError("Argument translate should be a sequence of length 2")
+
+    if scale <= 0.0:
+        raise ValueError("Argument scale should be positive")
+
+    if not isinstance(shear, (numbers.Number, (list, tuple))):
+        raise TypeError("Shear should be either a single value or a sequence of two values")
+
+    if isinstance(angle, int):
+        angle = float(angle)
+
+    if isinstance(translate, tuple):
+        translate = list(translate)
+
+    if isinstance(shear, numbers.Number):
+        shear = [shear, 0.0]
+
+    if isinstance(shear, tuple):
+        shear = list(shear)
+
+    if len(shear) == 1:
+        shear = [shear[0], shear[0]]
+
+    if len(shear) != 2:
+        raise ValueError("Shear should be a sequence containing two values. Got {}".format(shear))
+
+    img_size = _get_image_size(img)
+    if not isinstance(img, torch.Tensor):
+        # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5)
+        # it is visually better to estimate the center without 0.5 offset
+        # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
+        center = [img_size[0] * 0.5, img_size[1] * 0.5]
+        matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
 
-    assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
-        "Argument translate should be a list or tuple of length 2"
+        return F_pil.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)
 
-    assert scale > 0.0, "Argument scale should be positive"
+    # we need to rescale translate by image size / 2 as its values can be between -1 and 1
+    translate = [2.0 * t / s for s, t in zip(img_size, translate)]
 
-    output_size = img.size
-    center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
-    matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)
-    kwargs = {"fillcolor": fillcolor} if int(PILLOW_VERSION.split('.')[0]) >= 5 else {}
-    return img.transform(output_size, Image.AFFINE, matrix, resample, **kwargs)
+    matrix = _get_inverse_affine_matrix([0, 0], angle, translate, scale, shear)
+    return F_t.affine(img, matrix=matrix, resample=resample, fillcolor=fillcolor)
 
 
 def to_grayscale(img, num_output_channels=1):
@@ -945,7 +925,7 @@ def to_grayscale(img, num_output_channels=1):
 
             if num_output_channels = 3 : returned image is 3 channel with r = g = b
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     if num_output_channels == 1:
@@ -961,7 +941,7 @@ def to_grayscale(img, num_output_channels=1):
     return img
 
 
-def erase(img, i, j, h, w, v, inplace=False):
+def erase(img: Tensor, i: int, j: int, h: int, w: int, v: Tensor, inplace: bool = False) -> Tensor:
     """ Erase the input Tensor Image with given value.
 
     Args:
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
index 84e27e79040..f66c1a35bd7 100644
--- a/torchvision/transforms/functional_pil.py
+++ b/torchvision/transforms/functional_pil.py
@@ -1,20 +1,31 @@
+import numbers
+from typing import Any, List, Sequence
+
+import numpy as np
 import torch
+from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
+
 try:
     import accimage
 except ImportError:
     accimage = None
-from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
-import numpy as np
 
 
 @torch.jit.unused
-def _is_pil_image(img):
+def _is_pil_image(img: Any) -> bool:
     if accimage is not None:
         return isinstance(img, (Image.Image, accimage.Image))
     else:
         return isinstance(img, Image.Image)
 
 
+@torch.jit.unused
+def _get_image_size(img: Any) -> List[int]:
+    if _is_pil_image(img):
+        return img.size
+    raise TypeError("Unexpected type {}".format(type(img)))
+
+
 @torch.jit.unused
 def hflip(img):
     """Horizontally flip the given PIL Image.
@@ -152,3 +163,266 @@ def adjust_hue(img, hue_factor):
 
     img = Image.merge('HSV', (h, s, v)).convert(input_mode)
     return img
+
+
+@torch.jit.unused
+def adjust_gamma(img, gamma, gain=1):
+    r"""Perform gamma correction on an image.
+
+    Also known as Power Law Transform. Intensities in RGB mode are adjusted
+    based on the following equation:
+
+    .. math::
+        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+    See `Gamma Correction`_ for more details.
+
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+    Args:
+        img (PIL Image): PIL Image to be adjusted.
+        gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+            gamma larger than 1 make the shadows darker,
+            while gamma smaller than 1 make dark regions lighter.
+        gain (float): The constant multiplier.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    if gamma < 0:
+        raise ValueError('Gamma should be a non-negative real number')
+
+    input_mode = img.mode
+    img = img.convert('RGB')
+    gamma_map = [(255 + 1 - 1e-3) * gain * pow(ele / 255., gamma) for ele in range(256)] * 3
+    img = img.point(gamma_map)  # use PIL's point-function to accelerate this part
+
+    img = img.convert(input_mode)
+    return img
+
+
+@torch.jit.unused
+def pad(img, padding, fill=0, padding_mode="constant"):
+    r"""Pad the given PIL.Image on all sides with the given "pad" value.
+
+    Args:
+        img (PIL Image): Image to be padded.
+        padding (int or tuple or list): Padding on each border. If a single int is provided this
+            is used to pad all borders. If a tuple or list of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively. For compatibility reasons
+            with ``functional_tensor.pad``, if a tuple or list of length 1 is provided, it is interpreted as
+            a single int.
+        fill (int or str or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
+            length 3, it is used to fill R, G, B channels respectively.
+            This value is only used when the padding_mode is constant.
+        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value on the edge of the image
+
+            - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        PIL Image: Padded image.
+    """
+
+    if not _is_pil_image(img):
+        raise TypeError("img should be PIL Image. Got {}".format(type(img)))
+
+    if not isinstance(padding, (numbers.Number, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (numbers.Number, str, tuple)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, list):
+        padding = tuple(padding)
+
+    if isinstance(padding, tuple) and len(padding) not in [1, 2, 4]:
+        raise ValueError("Padding must be an int or a 1, 2, or 4 element tuple, not a " +
+                         "{} element tuple".format(len(padding)))
+
+    if isinstance(padding, tuple) and len(padding) == 1:
+        # Compatibility with `functional_tensor.pad`
+        padding = padding[0]
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+    if padding_mode == "constant":
+        if isinstance(fill, numbers.Number):
+            fill = (fill,) * len(img.getbands())
+        if len(fill) != len(img.getbands()):
+            raise ValueError("fill should have the same number of elements "
+                             "as the number of channels in the image "
+                             "({}), got {} instead".format(len(img.getbands()), len(fill)))
+        if img.mode == "P":
+            palette = img.getpalette()
+            image = ImageOps.expand(img, border=padding, fill=fill)
+            image.putpalette(palette)
+            return image
+
+        return ImageOps.expand(img, border=padding, fill=fill)
+    else:
+        if isinstance(padding, int):
+            pad_left = pad_right = pad_top = pad_bottom = padding
+        if isinstance(padding, tuple) and len(padding) == 2:
+            pad_left = pad_right = padding[0]
+            pad_top = pad_bottom = padding[1]
+        if isinstance(padding, tuple) and len(padding) == 4:
+            pad_left = padding[0]
+            pad_top = padding[1]
+            pad_right = padding[2]
+            pad_bottom = padding[3]
+
+        if img.mode == 'P':
+            palette = img.getpalette()
+            img = np.asarray(img)
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+            img = Image.fromarray(img)
+            img.putpalette(palette)
+            return img
+
+        img = np.asarray(img)
+        # RGB image
+        if len(img.shape) == 3:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right), (0, 0)), padding_mode)
+        # Grayscale image
+        if len(img.shape) == 2:
+            img = np.pad(img, ((pad_top, pad_bottom), (pad_left, pad_right)), padding_mode)
+
+        return Image.fromarray(img)
+
+
+@torch.jit.unused
+def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image:
+    """Crop the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        PIL Image: Cropped image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    return img.crop((left, top, left + width, top + height))
+
+
+@torch.jit.unused
+def resize(img, size, interpolation=Image.BILINEAR):
+    r"""Resize the input PIL Image to the given size.
+
+    Args:
+        img (PIL Image): Image to be resized.
+        size (sequence or int): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaining
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
+            For compatibility reasons with ``functional_tensor.resize``, if a tuple or list of length 1 is provided,
+            it is interpreted as a single int.
+        interpolation (int, optional): Desired interpolation. Default is ``PIL.Image.BILINEAR``.
+
+    Returns:
+        PIL Image: Resized image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+    if not (isinstance(size, int) or (isinstance(size, Sequence) and len(size) in (1, 2))):
+        raise TypeError('Got inappropriate size arg: {}'.format(size))
+
+    if isinstance(size, int) or len(size) == 1:
+        if isinstance(size, Sequence):
+            size = size[0]
+        w, h = img.size
+        if (w <= h and w == size) or (h <= w and h == size):
+            return img
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+            return img.resize((ow, oh), interpolation)
+        else:
+            oh = size
+            ow = int(size * w / h)
+            return img.resize((ow, oh), interpolation)
+    else:
+        return img.resize(size[::-1], interpolation)
+
+
+@torch.jit.unused
+def _parse_fill(fill, img, min_pil_version):
+    """Helper function to get the fill color for rotate and perspective transforms.
+
+    Args:
+        fill (n-tuple or int or float): Pixel fill value for area outside the transformed
+            image. If int or float, the value is used for all bands respectively.
+            Defaults to 0 for all bands.
+        img (PIL Image): Image to be filled.
+        min_pil_version (str): The minimum PILLOW version for when the ``fillcolor`` option
+            was first introduced in the calling function. (e.g. rotate->5.2.0, perspective->5.0.0)
+
+    Returns:
+        dict: kwarg for ``fillcolor``
+    """
+    major_found, minor_found = (int(v) for v in PILLOW_VERSION.split('.')[:2])
+    major_required, minor_required = (int(v) for v in min_pil_version.split('.')[:2])
+    if major_found < major_required or (major_found == major_required and minor_found < minor_required):
+        if fill is None:
+            return {}
+        else:
+            msg = ("The option to fill background area of the transformed image, "
+                   "requires pillow>={}")
+            raise RuntimeError(msg.format(min_pil_version))
+
+    num_bands = len(img.getbands())
+    if fill is None:
+        fill = 0
+    if isinstance(fill, (int, float)) and num_bands > 1:
+        fill = tuple([fill] * num_bands)
+    if not isinstance(fill, (int, float)) and len(fill) != num_bands:
+        msg = ("The number of elements in 'fill' does not match the number of "
+               "bands of the image ({} != {})")
+        raise ValueError(msg.format(len(fill), num_bands))
+
+    return {"fillcolor": fill}
+
+
+@torch.jit.unused
+def affine(img, matrix, resample=0, fillcolor=None):
+    """Apply affine transformation on the PIL Image keeping image center invariant.
+
+    Args:
+        img (PIL Image): image to be rotated.
+        matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation.
+        resample (``PIL.Image.NEAREST`` or ``PIL.Image.BILINEAR`` or ``PIL.Image.BICUBIC``, optional):
+            An optional resampling filter.
+            See `filters`_ for more information.
+            If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
+        fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
+
+    Returns:
+        PIL Image: Transformed image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    output_size = img.size
+    opts = _parse_fill(fillcolor, img, '5.0.0')
+    return img.transform(output_size, Image.AFFINE, matrix, resample, **opts)
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
index 89440701d17..f2e47b056d3 100644
--- a/torchvision/transforms/functional_tensor.py
+++ b/torchvision/transforms/functional_tensor.py
@@ -1,14 +1,24 @@
+import warnings
+from typing import Optional
+
 import torch
 from torch import Tensor
-from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple
+from torch.nn.functional import affine_grid, grid_sample
+from torch.jit.annotations import List, BroadcastingList2
+
 
+def _is_tensor_a_torch_image(x: Tensor) -> bool:
+    return x.ndim >= 2
 
-def _is_tensor_a_torch_image(input):
-    return input.ndim >= 2
 
+def _get_image_size(img: Tensor) -> List[int]:
+    """Returns (w, h) of tensor image"""
+    if _is_tensor_a_torch_image(img):
+        return [img.shape[-1], img.shape[-2]]
+    raise TypeError("Unexpected type {}".format(type(img)))
 
-def vflip(img):
-    # type: (Tensor) -> Tensor
+
+def vflip(img: Tensor) -> Tensor:
     """Vertically flip the given the Image Tensor.
 
     Args:
@@ -23,8 +33,7 @@ def vflip(img):
     return img.flip(-2)
 
 
-def hflip(img):
-    # type: (Tensor) -> Tensor
+def hflip(img: Tensor) -> Tensor:
     """Horizontally flip the given the Image Tensor.
 
     Args:
@@ -39,12 +48,11 @@ def hflip(img):
     return img.flip(-1)
 
 
-def crop(img, top, left, height, width):
-    # type: (Tensor, int, int, int, int) -> Tensor
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     """Crop the given Image Tensor.
 
     Args:
-        img (Tensor): Image to be cropped in the form [C, H, W]. (0,0) denotes the top left corner of the image.
+        img (Tensor): Image to be cropped in the form [..., H, W]. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
@@ -54,13 +62,12 @@ def crop(img, top, left, height, width):
         Tensor: Cropped image.
     """
     if not _is_tensor_a_torch_image(img):
-        raise TypeError('tensor is not a torch image.')
+        raise TypeError("tensor is not a torch image.")
 
     return img[..., top:top + height, left:left + width]
 
 
-def rgb_to_grayscale(img):
-    # type: (Tensor) -> Tensor
+def rgb_to_grayscale(img: Tensor) -> Tensor:
     """Convert the given RGB Image Tensor to Grayscale.
     For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which
     is L = R * 0.2989 + G * 0.5870 + B * 0.1140
@@ -78,8 +85,7 @@ def rgb_to_grayscale(img):
     return (0.2989 * img[0] + 0.5870 * img[1] + 0.1140 * img[2]).to(img.dtype)
 
 
-def adjust_brightness(img, brightness_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
     """Adjust brightness of an RGB image.
 
     Args:
@@ -91,14 +97,16 @@ def adjust_brightness(img, brightness_factor):
     Returns:
         Tensor: Brightness adjusted image.
     """
+    if brightness_factor < 0:
+        raise ValueError('brightness_factor ({}) is not non-negative.'.format(brightness_factor))
+
     if not _is_tensor_a_torch_image(img):
         raise TypeError('tensor is not a torch image.')
 
     return _blend(img, torch.zeros_like(img), brightness_factor)
 
 
-def adjust_contrast(img, contrast_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
     """Adjust contrast of an RGB image.
 
     Args:
@@ -110,6 +118,9 @@ def adjust_contrast(img, contrast_factor):
     Returns:
         Tensor: Contrast adjusted image.
     """
+    if contrast_factor < 0:
+        raise ValueError('contrast_factor ({}) is not non-negative.'.format(contrast_factor))
+
     if not _is_tensor_a_torch_image(img):
         raise TypeError('tensor is not a torch image.')
 
@@ -143,7 +154,7 @@ def adjust_hue(img, hue_factor):
     Returns:
          Tensor: Hue adjusted image.
     """
-    if not(-0.5 <= hue_factor <= 0.5):
+    if not (-0.5 <= hue_factor <= 0.5):
         raise ValueError('hue_factor ({}) is not in [-0.5, 0.5].'.format(hue_factor))
 
     if not _is_tensor_a_torch_image(img):
@@ -166,31 +177,73 @@ def adjust_hue(img, hue_factor):
     return img_hue_adj
 
 
-def adjust_saturation(img, saturation_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
     """Adjust color saturation of an RGB image.
 
     Args:
         img (Tensor): Image to be adjusted.
-        saturation_factor (float):  How much to adjust the saturation. 0 will
-            give a black and white image, 1 will give the original image while
-            2 will enhance the saturation by a factor of 2.
+        saturation_factor (float):  How much to adjust the saturation. Can be any
+            non negative number. 0 gives a black and white image, 1 gives the
+            original image while 2 enhances the saturation by a factor of 2.
 
     Returns:
         Tensor: Saturation adjusted image.
     """
+    if saturation_factor < 0:
+        raise ValueError('saturation_factor ({}) is not non-negative.'.format(saturation_factor))
+
     if not _is_tensor_a_torch_image(img):
         raise TypeError('tensor is not a torch image.')
 
     return _blend(img, rgb_to_grayscale(img), saturation_factor)
 
 
-def center_crop(img, output_size):
-    # type: (Tensor, BroadcastingList2[int]) -> Tensor
+def adjust_gamma(img: Tensor, gamma: float, gain: float = 1) -> Tensor:
+    r"""Adjust gamma of an RGB image.
+
+    Also known as Power Law Transform. Intensities in RGB mode are adjusted
+    based on the following equation:
+
+    .. math::
+        `I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}`
+
+    See `Gamma Correction`_ for more details.
+
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+    Args:
+        img (Tensor): Tensor of RBG values to be adjusted.
+        gamma (float): Non negative real number, same as :math:`\gamma` in the equation.
+            gamma larger than 1 make the shadows darker,
+            while gamma smaller than 1 make dark regions lighter.
+        gain (float): The constant multiplier.
+    """
+
+    if not isinstance(img, torch.Tensor):
+        raise TypeError('img should be a Tensor. Got {}'.format(type(img)))
+
+    if gamma < 0:
+        raise ValueError('Gamma should be a non-negative real number')
+
+    result = img
+    dtype = img.dtype
+    if not torch.is_floating_point(img):
+        result = result / 255.0
+
+    result = (gain * result ** gamma).clamp(0, 1)
+
+    if result.dtype != dtype:
+        eps = 1e-3
+        result = (255 + 1.0 - eps) * result
+    result = result.to(dtype)
+    return result
+
+
+def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
     """Crop the Image Tensor and resize it to desired size.
 
     Args:
-        img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (Tensor): Image to be cropped.
         output_size (sequence or int): (height, width) of the crop box. If int,
                 it is used for both directions
 
@@ -202,23 +255,29 @@ def center_crop(img, output_size):
 
     _, image_width, image_height = img.size()
     crop_height, crop_width = output_size
-    crop_top = int(round((image_height - crop_height) / 2.))
-    crop_left = int(round((image_width - crop_width) / 2.))
+    # crop_top = int(round((image_height - crop_height) / 2.))
+    # Result can be different between python func and scripted func
+    # Temporary workaround:
+    crop_top = int((image_height - crop_height + 1) * 0.5)
+    # crop_left = int(round((image_width - crop_width) / 2.))
+    # Result can be different between python func and scripted func
+    # Temporary workaround:
+    crop_left = int((image_width - crop_width + 1) * 0.5)
 
     return crop(img, crop_top, crop_left, crop_height, crop_width)
 
 
-def five_crop(img, size):
-    # type: (Tensor, BroadcastingList2[int]) -> List[Tensor]
+def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]:
     """Crop the given Image Tensor into four corners and the central crop.
     .. Note::
         This transform returns a List of Tensors and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
-           int instead of sequence like (h, w), a square crop (size, size) is
-           made.
+        img (Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
 
     Returns:
        List: List (tl, tr, bl, br, center)
@@ -244,19 +303,20 @@ def five_crop(img, size):
     return [tl, tr, bl, br, center]
 
 
-def ten_crop(img, size, vertical_flip=False):
-    # type: (Tensor, BroadcastingList2[int], bool) -> List[Tensor]
+def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]:
     """Crop the given Image Tensor into four corners and the central crop plus the
         flipped version of these (horizontal flipping is used by default).
+
     .. Note::
         This transform returns a List of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
+        img (Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
             made.
-       vertical_flip (bool): Use vertical flipping instead of horizontal
+        vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Returns:
        List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
@@ -279,8 +339,7 @@ def ten_crop(img, size, vertical_flip=False):
     return first_five + second_five
 
 
-def _blend(img1, img2, ratio):
-    # type: (Tensor, Tensor, float) -> Tensor
+def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor:
     bound = 1 if img1.dtype in [torch.half, torch.float32, torch.float64] else 255
     return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype)
 
@@ -288,22 +347,35 @@ def _blend(img1, img2, ratio):
 def _rgb2hsv(img):
     r, g, b = img.unbind(0)
 
-    maxc, _ = torch.max(img, dim=0)
-    minc, _ = torch.min(img, dim=0)
+    maxc = torch.max(img, dim=0).values
+    minc = torch.min(img, dim=0).values
+
+    # The algorithm erases S and H channel where `maxc = minc`. This avoids NaN
+    # from happening in the results, because
+    #   + S channel has division by `maxc`, which is zero only if `maxc = minc`
+    #   + H channel has division by `(maxc - minc)`.
+    #
+    # Instead of overwriting NaN afterwards, we just prevent it from occuring so
+    # we don't need to deal with it in case we save the NaN in a buffer in
+    # backprop, if it is ever supported, but it doesn't hurt to do so.
+    eqc = maxc == minc
 
     cr = maxc - minc
-    s = cr / maxc
-    rc = (maxc - r) / cr
-    gc = (maxc - g) / cr
-    bc = (maxc - b) / cr
+    # Since `eqc => cr = 0`, replacing denominator with 1 when `eqc` is fine.
+    s = cr / torch.where(eqc, maxc.new_ones(()), maxc)
+    # Note that `eqc => maxc = minc = r = g = b`. So the following calculation
+    # of `h` would reduce to `bc - gc + 2 + rc - bc + 4 + rc - bc = 6` so it
+    # would not matter what values `rc`, `gc`, and `bc` have here, and thus
+    # replacing denominator with 1 when `eqc` is fine.
+    cr_divisor = torch.where(eqc, maxc.new_ones(()), cr)
+    rc = (maxc - r) / cr_divisor
+    gc = (maxc - g) / cr_divisor
+    bc = (maxc - b) / cr_divisor
 
-    t = (maxc != minc)
-    s = t * s
     hr = (maxc == r) * (bc - gc)
     hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc)
     hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc)
     h = (hr + hg + hb)
-    h = t * h
     h = torch.fmod((h / 6.0 + 1.0), 1.0)
     return torch.stack((h, s, maxc))
 
@@ -327,3 +399,281 @@ def _hsv2rgb(img):
     a4 = torch.stack((a1, a2, a3))
 
     return torch.einsum("ijk, xijk -> xjk", mask.to(dtype=img.dtype), a4)
+
+
+def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor:
+    # padding is left, right, top, bottom
+    in_sizes = img.size()
+
+    x_indices = [i for i in range(in_sizes[-1])]  # [0, 1, 2, 3, ...]
+    left_indices = [i for i in range(padding[0] - 1, -1, -1)]  # e.g. [3, 2, 1, 0]
+    right_indices = [-(i + 1) for i in range(padding[1])]  # e.g. [-1, -2, -3]
+    x_indices = torch.tensor(left_indices + x_indices + right_indices)
+
+    y_indices = [i for i in range(in_sizes[-2])]
+    top_indices = [i for i in range(padding[2] - 1, -1, -1)]
+    bottom_indices = [-(i + 1) for i in range(padding[3])]
+    y_indices = torch.tensor(top_indices + y_indices + bottom_indices)
+
+    ndim = img.ndim
+    if ndim == 3:
+        return img[:, y_indices[:, None], x_indices[None, :]]
+    elif ndim == 4:
+        return img[:, :, y_indices[:, None], x_indices[None, :]]
+    else:
+        raise RuntimeError("Symmetric padding of N-D tensors are not supported yet")
+
+
+def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "constant") -> Tensor:
+    r"""Pad the given Tensor Image on all sides with specified padding mode and fill value.
+
+    Args:
+        img (Tensor): Image to be padded.
+        padding (int or tuple or list): Padding on each border. If a single int is provided this
+            is used to pad all borders. If a tuple or list of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple or list of length 4 is provided
+            this is the padding for the left, top, right and bottom borders
+            respectively. In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[padding, ]``.
+        fill (int): Pixel fill value for constant fill. Default is 0.
+            This value is only used when the padding_mode is constant
+        padding_mode (str): Type of padding. Should be: constant, edge or reflect. Default is constant.
+            Mode symmetric is not yet supported for Tensor inputs.
+
+            - constant: pads with a constant value, this value is specified with fill
+
+            - edge: pads with the last value on the edge of the image
+
+            - reflect: pads with reflection of image (without repeating the last value on the edge)
+
+                       padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode
+                       will result in [3, 2, 1, 2, 3, 4, 3, 2]
+
+            - symmetric: pads with reflection of image (repeating the last value on the edge)
+
+                         padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode
+                         will result in [2, 1, 1, 2, 3, 4, 4, 3]
+
+    Returns:
+        Tensor: Padded image.
+    """
+    if not _is_tensor_a_torch_image(img):
+        raise TypeError("tensor is not a torch image.")
+
+    if not isinstance(padding, (int, tuple, list)):
+        raise TypeError("Got inappropriate padding arg")
+    if not isinstance(fill, (int, float)):
+        raise TypeError("Got inappropriate fill arg")
+    if not isinstance(padding_mode, str):
+        raise TypeError("Got inappropriate padding_mode arg")
+
+    if isinstance(padding, tuple):
+        padding = list(padding)
+
+    if isinstance(padding, list) and len(padding) not in [1, 2, 4]:
+        raise ValueError("Padding must be an int or a 1, 2, or 4 element tuple, not a " +
+                         "{} element tuple".format(len(padding)))
+
+    if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+        raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+    if isinstance(padding, int):
+        if torch.jit.is_scripting():
+            # This maybe unreachable
+            raise ValueError("padding can't be an int while torchscripting, set it as a list [value, ]")
+        pad_left = pad_right = pad_top = pad_bottom = padding
+    elif len(padding) == 1:
+        pad_left = pad_right = pad_top = pad_bottom = padding[0]
+    elif len(padding) == 2:
+        pad_left = pad_right = padding[0]
+        pad_top = pad_bottom = padding[1]
+    else:
+        pad_left = padding[0]
+        pad_top = padding[1]
+        pad_right = padding[2]
+        pad_bottom = padding[3]
+
+    p = [pad_left, pad_right, pad_top, pad_bottom]
+
+    if padding_mode == "edge":
+        # remap padding_mode str
+        padding_mode = "replicate"
+    elif padding_mode == "symmetric":
+        # route to another implementation
+        if p[0] < 0 or p[1] < 0 or p[2] < 0 or p[3] < 0:  # no any support for torch script
+            raise ValueError("Padding can not be negative for symmetric padding_mode")
+        return _pad_symmetric(img, p)
+
+    need_squeeze = False
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    out_dtype = img.dtype
+    need_cast = False
+    if (padding_mode != "constant") and img.dtype not in (torch.float32, torch.float64):
+        # Here we temporary cast input tensor to float
+        # until pytorch issue is resolved :
+        # https://github.com/pytorch/pytorch/issues/40763
+        need_cast = True
+        img = img.to(torch.float32)
+
+    img = torch.nn.functional.pad(img, p, mode=padding_mode, value=float(fill))
+
+    if need_squeeze:
+        img = img.squeeze(dim=0)
+
+    if need_cast:
+        img = img.to(out_dtype)
+
+    return img
+
+
+def resize(img: Tensor, size: List[int], interpolation: int = 2) -> Tensor:
+    r"""Resize the input Tensor to the given size.
+
+    Args:
+        img (Tensor): Image to be resized.
+        size (int or tuple or list): Desired output size. If size is a sequence like
+            (h, w), the output size will be matched to this. If size is an int,
+            the smaller edge of the image will be matched to this number maintaining
+            the aspect ratio. i.e, if height > width, then image will be rescaled to
+            :math:`\left(\text{size} \times \frac{\text{height}}{\text{width}}, \text{size}\right)`.
+            In torchscript mode padding as a single int is not supported, use a tuple or
+            list of length 1: ``[size, ]``.
+        interpolation (int, optional): Desired interpolation. Default is bilinear (=2). Other supported values:
+            nearest(=0) and bicubic(=3).
+
+    Returns:
+        Tensor: Resized image.
+    """
+    if not _is_tensor_a_torch_image(img):
+        raise TypeError("tensor is not a torch image.")
+
+    if not isinstance(size, (int, tuple, list)):
+        raise TypeError("Got inappropriate size arg")
+    if not isinstance(interpolation, int):
+        raise TypeError("Got inappropriate interpolation arg")
+
+    _interpolation_modes = {
+        0: "nearest",
+        2: "bilinear",
+        3: "bicubic",
+    }
+
+    if interpolation not in _interpolation_modes:
+        raise ValueError("This interpolation mode is unsupported with Tensor input")
+
+    if isinstance(size, tuple):
+        size = list(size)
+
+    if isinstance(size, list) and len(size) not in [1, 2]:
+        raise ValueError("Size must be an int or a 1 or 2 element tuple/list, not a "
+                         "{} element tuple/list".format(len(size)))
+
+    w, h = _get_image_size(img)
+
+    if isinstance(size, int):
+        size_w, size_h = size, size
+    elif len(size) < 2:
+        size_w, size_h = size[0], size[0]
+    else:
+        size_w, size_h = size[1], size[0]  # Convention (h, w)
+
+    if isinstance(size, int) or len(size) < 2:
+        if w < h:
+            size_h = int(size_w * h / w)
+        else:
+            size_w = int(size_h * w / h)
+
+    if (w <= h and w == size_w) or (h <= w and h == size_h):
+        return img
+
+    # make image NCHW
+    need_squeeze = False
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    mode = _interpolation_modes[interpolation]
+
+    out_dtype = img.dtype
+    need_cast = False
+    if img.dtype not in (torch.float32, torch.float64):
+        need_cast = True
+        img = img.to(torch.float32)
+
+    # Define align_corners to avoid warnings
+    align_corners = False if mode in ["bilinear", "bicubic"] else None
+
+    img = torch.nn.functional.interpolate(img, size=(size_h, size_w), mode=mode, align_corners=align_corners)
+
+    if need_squeeze:
+        img = img.squeeze(dim=0)
+
+    if need_cast:
+        if mode == "bicubic":
+            img = img.clamp(min=0, max=255)
+        img = img.to(out_dtype)
+
+    return img
+
+
+def affine(
+        img: Tensor, matrix: List[float], resample: int = 0, fillcolor: Optional[int] = None
+) -> Tensor:
+    """Apply affine transformation on the Tensor image keeping image center invariant.
+
+    Args:
+        img (Tensor): image to be rotated.
+        matrix (list of floats): list of 6 float values representing inverse matrix for affine transformation.
+        resample (int, optional): An optional resampling filter. Default is nearest (=2). Other supported values:
+            bilinear(=2).
+        fillcolor (int, optional): this option is not supported for Tensor input. Fill value for the area outside the
+            transform in the output image is always 0.
+
+    Returns:
+        Tensor: Transformed image.
+    """
+    if not (isinstance(img, torch.Tensor) and _is_tensor_a_torch_image(img)):
+        raise TypeError('img should be Tensor Image. Got {}'.format(type(img)))
+
+    if fillcolor is not None:
+        warnings.warn("Argument fillcolor is not supported for Tensor input. Fill value is zero")
+
+    _interpolation_modes = {
+        0: "nearest",
+        2: "bilinear",
+    }
+
+    if resample not in _interpolation_modes:
+        raise ValueError("This resampling mode is unsupported with Tensor input")
+
+    theta = torch.tensor(matrix, dtype=torch.float).reshape(1, 2, 3)
+    shape = img.shape
+    grid = affine_grid(theta, size=(1, shape[-3], shape[-2], shape[-1]), align_corners=False)
+
+    # make image NCHW
+    need_squeeze = False
+    if img.ndim < 4:
+        img = img.unsqueeze(dim=0)
+        need_squeeze = True
+
+    mode = _interpolation_modes[resample]
+
+    out_dtype = img.dtype
+    need_cast = False
+    if img.dtype not in (torch.float32, torch.float64):
+        need_cast = True
+        img = img.to(torch.float32)
+
+    img = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False)
+
+    if need_squeeze:
+        img = img.squeeze(dim=0)
+
+    if need_cast:
+        # it is better to round before cast
+        img = torch.round(img).to(out_dtype)
+
+    return img
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index d54aa5099f2..f7d421d2b83 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -1,16 +1,19 @@
-import torch
 import math
+import numbers
 import random
+import warnings
+from collections.abc import Sequence
+from typing import Tuple, List, Optional
+
+import numpy as np
+import torch
 from PIL import Image
+from torch import Tensor
+
 try:
     import accimage
 except ImportError:
     accimage = None
-import numpy as np
-import numbers
-import types
-from collections.abc import Sequence, Iterable
-import warnings
 
 from . import functional as F
 
@@ -31,15 +34,6 @@
 }
 
 
-def _get_image_size(img):
-    if F._is_pil_image(img):
-        return img.size
-    elif isinstance(img, torch.Tensor) and img.dim() > 2:
-        return img.shape[-2:][::-1]
-    else:
-        raise TypeError("Unexpected type {}".format(type(img)))
-
-
 class Compose(object):
     """Composes several transforms together.
 
@@ -98,7 +92,7 @@ def __repr__(self):
 class PILToTensor(object):
     """Convert a ``PIL Image`` to a tensor of the same type.
 
-    Converts a PIL Image (H x W x C) to a torch.Tensor of shape (C x H x W).
+    Converts a PIL Image (H x W x C) to a Tensor of shape (C x H x W).
     """
 
     def __call__(self, pic):
@@ -215,31 +209,40 @@ def __repr__(self):
         return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)
 
 
-class Resize(object):
-    """Resize the input PIL Image to the given size.
+class Resize(torch.nn.Module):
+    """Resize the input image to the given size.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
         size (sequence or int): Desired output size. If size is a sequence like
             (h, w), output size will be matched to this. If size is an int,
             smaller edge of the image will be matched to this number.
             i.e, if height > width, then image will be rescaled to
-            (size * height / width, size)
-        interpolation (int, optional): Desired interpolation. Default is
-            ``PIL.Image.BILINEAR``
+            (size * height / width, size).
+            In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[size, ]``.
+        interpolation (int, optional): Desired interpolation enum defined by `filters`_.
+            Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR``
+            and ``PIL.Image.BICUBIC`` are supported.
     """
 
     def __init__(self, size, interpolation=Image.BILINEAR):
-        assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)
+        super().__init__()
+        if not isinstance(size, (int, Sequence)):
+            raise TypeError("Size should be int or sequence. Got {}".format(type(size)))
+        if isinstance(size, Sequence) and len(size) not in (1, 2):
+            raise ValueError("If size is a sequence, it should have 1 or 2 values")
         self.size = size
         self.interpolation = interpolation
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be scaled.
+            img (PIL Image or Tensor): Image to be scaled.
 
         Returns:
-            PIL Image: Rescaled image.
+            PIL Image or Tensor: Rescaled image.
         """
         return F.resize(img, self.size, self.interpolation)
 
@@ -258,28 +261,36 @@ def __init__(self, *args, **kwargs):
         super(Scale, self).__init__(*args, **kwargs)
 
 
-class CenterCrop(object):
-    """Crops the given PIL Image at the center.
+class CenterCrop(torch.nn.Module):
+    """Crops the given image at the center.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
     """
 
     def __init__(self, size):
+        super().__init__()
         if isinstance(size, numbers.Number):
             self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be cropped.
+            img (PIL Image or Tensor): Image to be cropped.
 
         Returns:
-            PIL Image: Cropped image.
+            PIL Image or Tensor: Cropped image.
         """
         return F.center_crop(img, self.size)
 
@@ -287,20 +298,23 @@ def __repr__(self):
         return self.__class__.__name__ + '(size={0})'.format(self.size)
 
 
-class Pad(object):
-    """Pad the given PIL Image on all sides with the given "pad" value.
+class Pad(torch.nn.Module):
+    """Pad the given image on all sides with the given "pad" value.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
-        padding (int or tuple): Padding on each border. If a single int is provided this
+        padding (int or tuple or list): Padding on each border. If a single int is provided this
             is used to pad all borders. If tuple of length 2 is provided this is the padding
             on left/right and top/bottom respectively. If a tuple of length 4 is provided
-            this is the padding for the left, top, right and bottom borders
-            respectively.
+            this is the padding for the left, top, right and bottom borders respectively.
+            In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[padding, ]``.
         fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
             length 3, it is used to fill R, G, B channels respectively.
             This value is only used when the padding_mode is constant
         padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric.
-            Default is constant.
+            Default is constant. Mode symmetric is not yet supported for Tensor inputs.
 
             - constant: pads with a constant value, this value is specified with fill
 
@@ -317,25 +331,32 @@ class Pad(object):
                 will result in [2, 1, 1, 2, 3, 4, 4, 3]
     """
 
-    def __init__(self, padding, fill=0, padding_mode='constant'):
-        assert isinstance(padding, (numbers.Number, tuple))
-        assert isinstance(fill, (numbers.Number, str, tuple))
-        assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
-        if isinstance(padding, Sequence) and len(padding) not in [2, 4]:
-            raise ValueError("Padding must be an int or a 2, or 4 element tuple, not a " +
+    def __init__(self, padding, fill=0, padding_mode="constant"):
+        super().__init__()
+        if not isinstance(padding, (numbers.Number, tuple, list)):
+            raise TypeError("Got inappropriate padding arg")
+
+        if not isinstance(fill, (numbers.Number, str, tuple)):
+            raise TypeError("Got inappropriate fill arg")
+
+        if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
+            raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
+
+        if isinstance(padding, Sequence) and len(padding) not in [1, 2, 4]:
+            raise ValueError("Padding must be an int or a 1, 2, or 4 element tuple, not a " +
                              "{} element tuple".format(len(padding)))
 
         self.padding = padding
         self.fill = fill
         self.padding_mode = padding_mode
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be padded.
+            img (PIL Image or Tensor): Image to be padded.
 
         Returns:
-            PIL Image: Padded image.
+            PIL Image or Tensor: Padded image.
         """
         return F.pad(img, self.padding, self.fill, self.padding_mode)
 
@@ -433,25 +454,31 @@ def __call__(self, img):
         return t(img)
 
 
-class RandomCrop(object):
-    """Crop the given PIL Image at a random location.
+class RandomCrop(torch.nn.Module):
+    """Crop the given image at a random location.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     Args:
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         padding (int or sequence, optional): Optional padding on each border
-            of the image. Default is None, i.e no padding. If a sequence of length
-            4 is provided, it is used to pad left, top, right, bottom borders
-            respectively. If a sequence of length 2 is provided, it is used to
-            pad left/right, top/bottom borders, respectively.
+            of the image. Default is None. If a single int is provided this
+            is used to pad all borders. If tuple of length 2 is provided this is the padding
+            on left/right and top/bottom respectively. If a tuple of length 4 is provided
+            this is the padding for the left, top, right and bottom borders respectively.
+            In torchscript mode padding as single int is not supported, use a tuple or
+            list of length 1: ``[padding, ]``.
         pad_if_needed (boolean): It will pad the image if smaller than the
             desired size to avoid raising an exception. Since cropping is done
             after padding, the padding seems to be done at a random offset.
-        fill: Pixel fill value for constant fill. Default is 0. If a tuple of
+        fill (int or tuple): Pixel fill value for constant fill. Default is 0. If a tuple of
             length 3, it is used to fill R, G, B channels respectively.
             This value is only used when the padding_mode is constant
-        padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+        padding_mode (str): Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant.
+            Mode symmetric is not yet supported for Tensor inputs.
 
              - constant: pads with a constant value, this value is specified with fill
 
@@ -469,60 +496,70 @@ class RandomCrop(object):
 
     """
 
-    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode='constant'):
-        if isinstance(size, numbers.Number):
-            self.size = (int(size), int(size))
-        else:
-            self.size = size
-        self.padding = padding
-        self.pad_if_needed = pad_if_needed
-        self.fill = fill
-        self.padding_mode = padding_mode
-
     @staticmethod
-    def get_params(img, output_size):
+    def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]:
         """Get parameters for ``crop`` for a random crop.
 
         Args:
-            img (PIL Image): Image to be cropped.
+            img (PIL Image or Tensor): Image to be cropped.
             output_size (tuple): Expected output size of the crop.
 
         Returns:
             tuple: params (i, j, h, w) to be passed to ``crop`` for random crop.
         """
-        w, h = _get_image_size(img)
+        w, h = F._get_image_size(img)
         th, tw = output_size
         if w == tw and h == th:
             return 0, 0, h, w
 
-        i = random.randint(0, h - th)
-        j = random.randint(0, w - tw)
+        i = torch.randint(0, h - th + 1, size=(1, )).item()
+        j = torch.randint(0, w - tw + 1, size=(1, )).item()
         return i, j, th, tw
 
-    def __call__(self, img):
+    def __init__(self, size, padding=None, pad_if_needed=False, fill=0, padding_mode="constant"):
+        super().__init__()
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
+        else:
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
+            # cast to tuple for torchscript
+            self.size = tuple(size)
+        self.padding = padding
+        self.pad_if_needed = pad_if_needed
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be cropped.
+            img (PIL Image or Tensor): Image to be cropped.
 
         Returns:
-            PIL Image: Cropped image.
+            PIL Image or Tensor: Cropped image.
         """
         if self.padding is not None:
             img = F.pad(img, self.padding, self.fill, self.padding_mode)
 
+        width, height = F._get_image_size(img)
         # pad the width if needed
-        if self.pad_if_needed and img.size[0] < self.size[1]:
-            img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
+        if self.pad_if_needed and width < self.size[1]:
+            padding = [self.size[1] - width, 0]
+            img = F.pad(img, padding, self.fill, self.padding_mode)
         # pad the height if needed
-        if self.pad_if_needed and img.size[1] < self.size[0]:
-            img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode)
+        if self.pad_if_needed and height < self.size[0]:
+            padding = [0, self.size[0] - height]
+            img = F.pad(img, padding, self.fill, self.padding_mode)
 
         i, j, h, w = self.get_params(img, self.size)
 
         return F.crop(img, i, j, h, w)
 
     def __repr__(self):
-        return self.__class__.__name__ + '(size={0}, padding={1})'.format(self.size, self.padding)
+        return self.__class__.__name__ + "(size={0}, padding={1})".format(self.size, self.padding)
 
 
 class RandomHorizontalFlip(torch.nn.Module):
@@ -556,7 +593,7 @@ def __repr__(self):
 
 
 class RandomVerticalFlip(torch.nn.Module):
-    """Vertically flip the given PIL Image randomly with a given probability.
+    """Vertically flip the given image randomly with a given probability.
     The image can be a PIL Image or a torch Tensor, in which case it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading
     dimensions
@@ -652,8 +689,10 @@ def __repr__(self):
         return self.__class__.__name__ + '(p={})'.format(self.p)
 
 
-class RandomResizedCrop(object):
-    """Crop the given PIL Image to random size and aspect ratio.
+class RandomResizedCrop(torch.nn.Module):
+    """Crop the given image to random size and aspect ratio.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     A crop of random size (default: of 0.08 to 1.0) of the original size and a random
     aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
@@ -661,59 +700,77 @@ class RandomResizedCrop(object):
     This is popularly used to train the Inception networks.
 
     Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
+        size (int or sequence): expected output size of each edge. If size is an
+            int instead of sequence like (h, w), a square output size ``(size, size)`` is
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
+        scale (tuple of float): range of size of the origin size cropped
+        ratio (tuple of float): range of aspect ratio of the origin aspect ratio cropped.
+        interpolation (int): Desired interpolation enum defined by `filters`_.
+            Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR``
+            and ``PIL.Image.BICUBIC`` are supported.
     """
 
     def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR):
-        if isinstance(size, (tuple, list)):
-            self.size = size
+        super().__init__()
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            self.size = (size, size)
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+            self.size = size
+
+        if not isinstance(scale, (tuple, list)):
+            raise TypeError("Scale should be a sequence")
+        if not isinstance(ratio, (tuple, list)):
+            raise TypeError("Ratio should be a sequence")
         if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
+            warnings.warn("Scale and ratio should be of kind (min, max)")
 
         self.interpolation = interpolation
         self.scale = scale
         self.ratio = ratio
 
     @staticmethod
-    def get_params(img, scale, ratio):
+    def get_params(
+            img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float]
+    ) -> Tuple[int, int, int, int]:
         """Get parameters for ``crop`` for a random sized crop.
 
         Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
+            img (PIL Image or Tensor): Input image.
+            scale (tuple): range of scale of the origin size cropped
             ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
 
         Returns:
             tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                 sized crop.
         """
-        width, height = _get_image_size(img)
+        width, height = F._get_image_size(img)
         area = height * width
 
         for _ in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
+            target_area = area * torch.empty(1).uniform_(*scale).item()
+            log_ratio = torch.log(torch.tensor(ratio))
+            aspect_ratio = torch.exp(
+                torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+            ).item()
 
             w = int(round(math.sqrt(target_area * aspect_ratio)))
             h = int(round(math.sqrt(target_area / aspect_ratio)))
 
             if 0 < w <= width and 0 < h <= height:
-                i = random.randint(0, height - h)
-                j = random.randint(0, width - w)
+                i = torch.randint(0, height - h + 1, size=(1,)).item()
+                j = torch.randint(0, width - w + 1, size=(1,)).item()
                 return i, j, h, w
 
         # Fallback to central crop
         in_ratio = float(width) / float(height)
-        if (in_ratio < min(ratio)):
+        if in_ratio < min(ratio):
             w = width
             h = int(round(w / min(ratio)))
-        elif (in_ratio > max(ratio)):
+        elif in_ratio > max(ratio):
             h = height
             w = int(round(h * max(ratio)))
         else:  # whole image
@@ -723,13 +780,13 @@ def get_params(img, scale, ratio):
         j = (width - w) // 2
         return i, j, h, w
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
-            img (PIL Image): Image to be cropped and resized.
+            img (PIL Image or Tensor): Image to be cropped and resized.
 
         Returns:
-            PIL Image: Randomly cropped and resized image.
+            PIL Image or Tensor: Randomly cropped and resized image.
         """
         i, j, h, w = self.get_params(img, self.scale, self.ratio)
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation)
@@ -753,8 +810,11 @@ def __init__(self, *args, **kwargs):
         super(RandomSizedCrop, self).__init__(*args, **kwargs)
 
 
-class FiveCrop(object):
-    """Crop the given PIL Image into four corners and the central crop
+class FiveCrop(torch.nn.Module):
+    """Crop the given image into four corners and the central crop.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -764,6 +824,7 @@ class FiveCrop(object):
     Args:
          size (sequence or int): Desired output size of the crop. If size is an ``int``
             instead of sequence like (h, w), a square crop of size (size, size) is made.
+            If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
 
     Example:
          >>> transform = Compose([
@@ -778,23 +839,37 @@ class FiveCrop(object):
     """
 
     def __init__(self, size):
-        self.size = size
+        super().__init__()
         if isinstance(size, numbers.Number):
             self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
 
-    def __call__(self, img):
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            tuple of 5 images. Image can be PIL Image or Tensor
+        """
         return F.five_crop(img, self.size)
 
     def __repr__(self):
         return self.__class__.__name__ + '(size={0})'.format(self.size)
 
 
-class TenCrop(object):
-    """Crop the given PIL Image into four corners and the central crop plus the flipped version of
-    these (horizontal flipping is used by default)
+class TenCrop(torch.nn.Module):
+    """Crop the given image into four corners and the central crop plus the flipped version of
+    these (horizontal flipping is used by default).
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -804,7 +879,7 @@ class TenCrop(object):
     Args:
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Example:
@@ -820,15 +895,26 @@ class TenCrop(object):
     """
 
     def __init__(self, size, vertical_flip=False):
-        self.size = size
+        super().__init__()
         if isinstance(size, numbers.Number):
             self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
         self.vertical_flip = vertical_flip
 
-    def __call__(self, img):
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            tuple of 10 images. Image can be PIL Image or Tensor
+        """
         return F.ten_crop(img, self.size, self.vertical_flip)
 
     def __repr__(self):
@@ -1081,6 +1167,8 @@ def __repr__(self):
         format_string += ', expand={0}'.format(self.expand)
         if self.center is not None:
             format_string += ', center={0}'.format(self.center)
+        if self.fill is not None:
+            format_string += ', fill={0}'.format(self.fill)
         format_string += ')'
         return format_string
 
@@ -1284,7 +1372,7 @@ def __repr__(self):
         return self.__class__.__name__ + '(p={0})'.format(self.p)
 
 
-class RandomErasing(object):
+class RandomErasing(torch.nn.Module):
     """ Randomly selects a rectangle region in an image and erases its pixels.
     'Random Erasing Data Augmentation' by Zhong et al. See https://arxiv.org/pdf/1708.04896.pdf
 
@@ -1311,13 +1399,21 @@ class RandomErasing(object):
     """
 
     def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace=False):
-        assert isinstance(value, (numbers.Number, str, tuple, list))
+        super().__init__()
+        if not isinstance(value, (numbers.Number, str, tuple, list)):
+            raise TypeError("Argument value should be either a number or str or a sequence")
+        if isinstance(value, str) and value != "random":
+            raise ValueError("If value is str, it should be 'random'")
+        if not isinstance(scale, (tuple, list)):
+            raise TypeError("Scale should be a sequence")
+        if not isinstance(ratio, (tuple, list)):
+            raise TypeError("Ratio should be a sequence")
         if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
+            warnings.warn("Scale and ratio should be of kind (min, max)")
         if scale[0] < 0 or scale[1] > 1:
-            raise ValueError("range of scale should be between 0 and 1")
+            raise ValueError("Scale should be between 0 and 1")
         if p < 0 or p > 1:
-            raise ValueError("range of random erasing probability should be between 0 and 1")
+            raise ValueError("Random erasing probability should be between 0 and 1")
 
         self.p = p
         self.scale = scale
@@ -1326,13 +1422,18 @@ def __init__(self, p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0, inplace
         self.inplace = inplace
 
     @staticmethod
-    def get_params(img, scale, ratio, value=0):
+    def get_params(
+            img: Tensor, scale: Tuple[float, float], ratio: Tuple[float, float], value: Optional[List[float]] = None
+    ) -> Tuple[int, int, int, int, Tensor]:
         """Get parameters for ``erase`` for a random erasing.
 
         Args:
             img (Tensor): Tensor image of size (C, H, W) to be erased.
-            scale: range of proportion of erased area against input image.
-            ratio: range of aspect ratio of erased area.
+            scale (tuple or list): range of proportion of erased area against input image.
+            ratio (tuple or list): range of aspect ratio of erased area.
+            value (list, optional): erasing value. If None, it is interpreted as "random"
+                (erasing each pixel with random values). If ``len(value)`` is 1, it is interpreted as a number,
+                i.e. ``value[0]``.
 
         Returns:
             tuple: params (i, j, h, w, v) to be passed to ``erase`` for random erasing.
@@ -1341,27 +1442,27 @@ def get_params(img, scale, ratio, value=0):
         area = img_h * img_w
 
         for _ in range(10):
-            erase_area = random.uniform(scale[0], scale[1]) * area
-            aspect_ratio = random.uniform(ratio[0], ratio[1])
+            erase_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item()
+            aspect_ratio = torch.empty(1).uniform_(ratio[0], ratio[1]).item()
 
             h = int(round(math.sqrt(erase_area * aspect_ratio)))
             w = int(round(math.sqrt(erase_area / aspect_ratio)))
+            if not (h < img_h and w < img_w):
+                continue
 
-            if h < img_h and w < img_w:
-                i = random.randint(0, img_h - h)
-                j = random.randint(0, img_w - w)
-                if isinstance(value, numbers.Number):
-                    v = value
-                elif isinstance(value, torch._six.string_classes):
-                    v = torch.empty([img_c, h, w], dtype=torch.float32).normal_()
-                elif isinstance(value, (list, tuple)):
-                    v = torch.tensor(value, dtype=torch.float32).view(-1, 1, 1).expand(-1, h, w)
-                return i, j, h, w, v
+            if value is None:
+                v = torch.empty([img_c, h, w], dtype=torch.float32).normal_()
+            else:
+                v = torch.tensor(value)[:, None, None]
+
+            i = torch.randint(0, img_h - h + 1, size=(1, )).item()
+            j = torch.randint(0, img_w - w + 1, size=(1, )).item()
+            return i, j, h, w, v
 
         # Return original image
         return 0, 0, img_h, img_w, img
 
-    def __call__(self, img):
+    def forward(self, img):
         """
         Args:
             img (Tensor): Tensor image of size (C, H, W) to be erased.
@@ -1369,7 +1470,24 @@ def __call__(self, img):
         Returns:
             img (Tensor): Erased Tensor image.
         """
-        if random.uniform(0, 1) < self.p:
-            x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=self.value)
+        if torch.rand(1) < self.p:
+
+            # cast self.value to script acceptable type
+            if isinstance(self.value, (int, float)):
+                value = [self.value, ]
+            elif isinstance(self.value, str):
+                value = None
+            elif isinstance(self.value, tuple):
+                value = list(self.value)
+            else:
+                value = self.value
+
+            if value is not None and not (len(value) in (1, img.shape[-3])):
+                raise ValueError(
+                    "If value is a sequence, it should have either a single value or "
+                    "{} (number of input channels)".format(img.shape[-3])
+                )
+
+            x, y, h, w, v = self.get_params(img, scale=self.scale, ratio=self.ratio, value=value)
             return F.erase(img, x, y, h, w, v, self.inplace)
         return img