From 0127d8e29bd96e439fb80049f2173015aea05724 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 15 May 2023 07:04:54 -0700 Subject: [PATCH] [fbsync] enable Windows GPU CI on GHA (#7498) Reviewed By: vmoens Differential Revision: D45522826 fbshipit-source-id: fb832f29c09ade1d7a796ebf31d35d74ed600aff --- .circleci/config.yml | 72 ------------------------------ .circleci/config.yml.in | 51 --------------------- .circleci/regenerate.py | 31 ------------- .github/workflows/test-windows.yml | 12 ++--- 4 files changed, 6 insertions(+), 160 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index ea5500723be..a37820bfcf8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -433,53 +433,6 @@ jobs: docker push ${image_name}:${CIRCLE_WORKFLOW_ID} docker push ${image_name}:latest - unittest_windows_gpu: - <<: *binary_common - executor: - name: windows-gpu - environment: - CUDA_VERSION: "11.7" - PYTHON_VERSION: << parameters.python_version >> - steps: - - checkout - - designate_upload_channel - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - - keys: - - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - - run: - name: Setup - command: .circleci/unittest/windows/scripts/setup_env.sh - - save_cache: - - key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - - paths: - - conda - - env - - run: - name: Install CUDA - command: packaging/windows/internal/cuda_install.bat - - run: - name: Update CUDA driver - command: packaging/windows/internal/driver_update.bat - - run: - name: Install torchvision - command: .circleci/unittest/windows/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/windows/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/windows/scripts/post_process.sh - - store_test_results: - path: test-results - cmake_linux_cpu: <<: *binary_common docker: @@ -853,31 +806,6 @@ workflows: build_environment: binary-libtorchvision_ops-android name: binary_libtorchvision_ops_android - unittest: - jobs: - - unittest_windows_gpu: - cu_version: cu117 - name: unittest_windows_gpu_py3.8 - python_version: '3.8' - - unittest_windows_gpu: - cu_version: cu117 - filters: - branches: - only: - - main - - nightly - name: unittest_windows_gpu_py3.9 - python_version: '3.9' - - unittest_windows_gpu: - cu_version: cu117 - filters: - branches: - only: - - main - - nightly - name: unittest_windows_gpu_py3.10 - python_version: '3.10' - cmake: jobs: - cmake_linux_cpu: diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index 0a6679938a9..d7ccd400c96 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -433,53 +433,6 @@ jobs: docker push ${image_name}:${CIRCLE_WORKFLOW_ID} docker push ${image_name}:latest - unittest_windows_gpu: - <<: *binary_common - executor: - name: windows-gpu - environment: - CUDA_VERSION: "11.7" - PYTHON_VERSION: << parameters.python_version >> - steps: - - checkout - - designate_upload_channel - - run: - name: Generate cache key - # This will refresh cache on Sundays, nightly build should generate new cache. - command: echo "$(date +"%Y-%U")" > .circleci-weekly - - restore_cache: - {% raw %} - keys: - - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - {% endraw %} - - run: - name: Setup - command: .circleci/unittest/windows/scripts/setup_env.sh - - save_cache: - {% raw %} - key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} - {% endraw %} - paths: - - conda - - env - - run: - name: Install CUDA - command: packaging/windows/internal/cuda_install.bat - - run: - name: Update CUDA driver - command: packaging/windows/internal/driver_update.bat - - run: - name: Install torchvision - command: .circleci/unittest/windows/scripts/install.sh - - run: - name: Run tests - command: .circleci/unittest/windows/scripts/run_test.sh - - run: - name: Post process - command: .circleci/unittest/windows/scripts/post_process.sh - - store_test_results: - path: test-results - cmake_linux_cpu: <<: *binary_common docker: @@ -575,10 +528,6 @@ workflows: {{ ios_workflows() }} {{ android_workflows() }} - unittest: - jobs: - {{ unittest_workflows() }} - cmake: jobs: {{ cmake_workflows() }} diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index 6e69dde4833..8db3637020f 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -217,36 +217,6 @@ def indent(indentation, data_list): return ("\n" + " " * indentation).join(yaml.dump(data_list, default_flow_style=False).splitlines()) -def unittest_workflows(indentation=6): - jobs = [] - for os_type in ["windows"]: - for device_type in ["gpu"]: - if os_type == "macos" and device_type == "gpu": - continue - - for i, python_version in enumerate(PYTHON_VERSIONS): - - # Turn off unit tests for 3.11, unit test are not setup properly - if python_version == "3.11": - continue - - job = { - "name": f"unittest_{os_type}_{device_type}_py{python_version}", - "python_version": python_version, - } - - if device_type == "gpu": - if python_version != "3.8": - job["filters"] = gen_filter_branch_tree("main", "nightly") - job["cu_version"] = "cu117" - else: - job["cu_version"] = "cpu" - - jobs.append({f"unittest_{os_type}_{device_type}": job}) - - return indent(indentation, jobs) - - def cmake_workflows(indentation=6): jobs = [] python_version = "3.8" @@ -331,7 +301,6 @@ def android_workflows(indentation=6, nightly=False): f.write( env.get_template("config.yml.in").render( build_workflows=build_workflows, - unittest_workflows=unittest_workflows, cmake_workflows=cmake_workflows, ios_workflows=ios_workflows, android_workflows=android_workflows, diff --git a/.github/workflows/test-windows.yml b/.github/workflows/test-windows.yml index dd663d9e939..40c97def8fc 100644 --- a/.github/workflows/test-windows.yml +++ b/.github/workflows/test-windows.yml @@ -20,12 +20,11 @@ jobs: - "3.11" runner: ["windows.4xlarge"] gpu-arch-type: ["cpu"] - # FIXME: enable this as soon as nvjpeg is available on the Windows runner -# include: -# - python-version: "3.8" -# runner: windows.8xlarge.nvidia.gpu -# gpu-arch-type: cuda -# gpu-arch-version: "11.7" + include: + - python-version: "3.8" + runner: windows.g5.4xlarge.nvidia.gpu + gpu-arch-type: cuda + gpu-arch-version: "11.7" fail-fast: false uses: pytorch/test-infra/.github/workflows/windows_job.yml@main with: @@ -46,6 +45,7 @@ jobs: # TODO: This should be handled by the generic Windows job the same as its done by the generic Linux job export CUDA_HOME="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v${{ matrix.gpu-arch-version }}" export CUDA_PATH="${CUDA_HOME}" + export PATH="${CUDA_PATH}/bin:${PATH}" fi ./.github/scripts/unittest.sh