Add Apple M1 tests (open-mmlab#796)

anton-l · pcuenca · web-flow · commit cca59ce3a289 · 2022-10-17T20:27:30.000+02:00
* [CI] Add Apple M1 tests

* setup-python

* python build

* conda install

* remove branch

* only 3.8 is built for osx-arm

* try fetching prebuilt tokenizers

* use user cache

* update shells

* Reports and cleanup

* -&gt; MPS

* Disable parallel tests

* Better naming

* investigate worker crash

* return xdist

* restart

* num_workers=2

* still crashing?

* faulthandler for segfaults

* faulthandler for segfaults

* remove restarts, stop on segfault

* torch version

* change installation order

* Use pre-RC version of PyTorch.

To be updated when it is released.

* Skip crashing test on MPS, add new one that works.

* Skip cuda tests in mps device.

* Actually use generator in test.

I think this was a typo.

* make style

Co-authored-by: Pedro Cuenca &lt;pedro@huggingface.co&gt;
diff --git a/.github/actions/setup-miniconda/action.yml b/.github/actions/setup-miniconda/action.yml
@@ -0,0 +1,146 @@
+name: Set up conda environment for testing
+
+description: Sets up miniconda in your ${RUNNER_TEMP} environment and gives you the ${CONDA_RUN} environment variable so you don't have to worry about polluting non-empeheral runners anymore
+
+inputs:
+  python-version:
+    description: If set to any value, dont use sudo to clean the workspace
+    required: false
+    type: string
+    default: "3.9"
+  miniconda-version:
+    description: Miniconda version to install
+    required: false
+    type: string
+    default: "4.12.0"
+  environment-file:
+    description: Environment file to install dependencies from
+    required: false
+    type: string
+    default: ""
+
+runs:
+  using: composite
+  steps:
+      # Use the same trick from https://github.com/marketplace/actions/setup-miniconda
+      # to refresh the cache daily. This is kind of optional though
+      - name: Get date
+        id: get-date
+        shell: bash
+        run: echo "::set-output name=today::$(/bin/date -u '+%Y%m%d')d"
+      - name: Setup miniconda cache
+        id: miniconda-cache
+        uses: actions/cache@v2
+        with:
+          path: ${{ runner.temp }}/miniconda
+          key: miniconda-${{ runner.os }}-${{ runner.arch }}-${{ inputs.python-version }}-${{ steps.get-date.outputs.today }}
+      - name: Install miniconda (${{ inputs.miniconda-version }})
+        if: steps.miniconda-cache.outputs.cache-hit != 'true'
+        env:
+          MINICONDA_VERSION: ${{ inputs.miniconda-version }}
+        shell: bash -l {0}
+        run: |
+          MINICONDA_INSTALL_PATH="${RUNNER_TEMP}/miniconda"
+          mkdir -p "${MINICONDA_INSTALL_PATH}"
+          case ${RUNNER_OS}-${RUNNER_ARCH} in
+            Linux-X64)
+              MINICONDA_ARCH="Linux-x86_64"
+              ;;
+            macOS-ARM64)
+              MINICONDA_ARCH="MacOSX-arm64"
+              ;;
+            macOS-X64)
+              MINICONDA_ARCH="MacOSX-x86_64"
+              ;;
+            *)
+            echo "::error::Platform ${RUNNER_OS}-${RUNNER_ARCH} currently unsupported using this action"
+              exit 1
+              ;;
+          esac
+          MINICONDA_URL="https://repo.anaconda.com/miniconda/Miniconda3-py39_${MINICONDA_VERSION}-${MINICONDA_ARCH}.sh"
+          curl -fsSL "${MINICONDA_URL}" -o "${MINICONDA_INSTALL_PATH}/miniconda.sh"
+          bash "${MINICONDA_INSTALL_PATH}/miniconda.sh" -b -u -p "${MINICONDA_INSTALL_PATH}"
+          rm -rf "${MINICONDA_INSTALL_PATH}/miniconda.sh"
+      - name: Update GitHub path to include miniconda install
+        shell: bash
+        run: |
+          MINICONDA_INSTALL_PATH="${RUNNER_TEMP}/miniconda"
+          echo "${MINICONDA_INSTALL_PATH}/bin" >> $GITHUB_PATH
+      - name: Setup miniconda env cache (with env file)
+        id: miniconda-env-cache-env-file
+        if: ${{ runner.os }} == 'macOS' && ${{ inputs.environment-file }} != ''
+        uses: actions/cache@v2
+        with:
+          path: ${{ runner.temp }}/conda-python-${{ inputs.python-version }}
+          key: miniconda-env-${{ runner.os }}-${{ runner.arch }}-${{ inputs.python-version }}-${{ steps.get-date.outputs.today }}-${{ hashFiles(inputs.environment-file) }}
+      - name: Setup miniconda env cache (without env file)
+        id: miniconda-env-cache
+        if: ${{ runner.os }} == 'macOS' && ${{ inputs.environment-file }} == ''
+        uses: actions/cache@v2
+        with:
+          path: ${{ runner.temp }}/conda-python-${{ inputs.python-version }}
+          key: miniconda-env-${{ runner.os }}-${{ runner.arch }}-${{ inputs.python-version }}-${{ steps.get-date.outputs.today }}
+      - name: Setup conda environment with python (v${{ inputs.python-version }})
+        if: steps.miniconda-env-cache-env-file.outputs.cache-hit != 'true' && steps.miniconda-env-cache.outputs.cache-hit != 'true'
+        shell: bash
+        env:
+          PYTHON_VERSION: ${{ inputs.python-version }}
+          ENV_FILE: ${{ inputs.environment-file }}
+        run: |
+          CONDA_BASE_ENV="${RUNNER_TEMP}/conda-python-${PYTHON_VERSION}"
+          ENV_FILE_FLAG=""
+          if [[ -f "${ENV_FILE}" ]]; then
+            ENV_FILE_FLAG="--file ${ENV_FILE}"
+          elif [[ -n "${ENV_FILE}" ]]; then
+            echo "::warning::Specified env file (${ENV_FILE}) not found, not going to include it"
+          fi
+          conda create \
+            --yes \
+            --prefix "${CONDA_BASE_ENV}" \
+            "python=${PYTHON_VERSION}" \
+            ${ENV_FILE_FLAG} \
+            cmake=3.22 \
+            conda-build=3.21 \
+            ninja=1.10 \
+            pkg-config=0.29 \
+            wheel=0.37
+      - name: Clone the base conda environment and update GitHub env
+        shell: bash
+        env:
+          PYTHON_VERSION: ${{ inputs.python-version }}
+          CONDA_BASE_ENV: ${{ runner.temp }}/conda-python-${{ inputs.python-version }}
+        run: |
+          CONDA_ENV="${RUNNER_TEMP}/conda_environment_${GITHUB_RUN_ID}"
+          conda create \
+            --yes \
+            --prefix "${CONDA_ENV}" \
+            --clone "${CONDA_BASE_ENV}"
+          # TODO: conda-build could not be cloned because it hardcodes the path, so it
+          # could not be cached
+          conda install --yes -p ${CONDA_ENV} conda-build=3.21
+          echo "CONDA_ENV=${CONDA_ENV}" >> "${GITHUB_ENV}"
+          echo "CONDA_RUN=conda run -p ${CONDA_ENV} --no-capture-output" >> "${GITHUB_ENV}"
+          echo "CONDA_BUILD=conda run -p ${CONDA_ENV} conda-build" >> "${GITHUB_ENV}"
+          echo "CONDA_INSTALL=conda install -p ${CONDA_ENV}" >> "${GITHUB_ENV}"
+      - name: Get disk space usage and throw an error for low disk space
+        shell: bash
+        run: |
+          echo "Print the available disk space for manual inspection"
+          df -h
+          # Set the minimum requirement space to 4GB
+          MINIMUM_AVAILABLE_SPACE_IN_GB=4
+          MINIMUM_AVAILABLE_SPACE_IN_KB=$(($MINIMUM_AVAILABLE_SPACE_IN_GB * 1024 * 1024))
+          # Use KB to avoid floating point warning like 3.1GB
+          df -k | tr -s ' ' | cut -d' ' -f 4,9 | while read -r LINE;
+          do
+            AVAIL=$(echo $LINE | cut -f1 -d' ')
+            MOUNT=$(echo $LINE | cut -f2 -d' ')
+            if [ "$MOUNT" = "/" ]; then
+              if [ "$AVAIL" -lt "$MINIMUM_AVAILABLE_SPACE_IN_KB" ]; then
+                echo "There is only ${AVAIL}KB free space left in $MOUNT, which is less than the minimum requirement of ${MINIMUM_AVAILABLE_SPACE_IN_KB}KB. Please help create an issue to PyTorch Release Engineering via https://github.com/pytorch/test-infra/issues and provide the link to the workflow run."
+                exit 1;
+              else
+                echo "There is ${AVAIL}KB free space left in $MOUNT, continue"
+              fi
+            fi
+          done
diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml
@@ -1,4 +1,4 @@
-name: Run non-slow tests
+name: Run fast tests
 
 on:
   pull_request:
@@ -10,14 +10,14 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  HF_HOME: /mnt/cache
   OMP_NUM_THREADS: 8
   MKL_NUM_THREADS: 8
   PYTEST_TIMEOUT: 60
+  MPS_TORCH_VERSION: 1.13.0
 
 jobs:
   run_tests_cpu:
-    name: Diffusers tests
+    name: CPU tests on Ubuntu
     runs-on: [ self-hosted, docker-gpu ]
     container:
       image: python:3.7
@@ -39,7 +39,7 @@ jobs:
       run: |
         python utils/print_env.py
 
-    - name: Run all non-slow selected tests on CPU
+    - name: Run all fast tests on CPU
       run: |
         python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_cpu tests/
 
@@ -51,5 +51,53 @@ jobs:
       if: ${{ always() }}
       uses: actions/upload-artifact@v2
       with:
-        name: pr_torch_test_reports
+        name: pr_torch_cpu_test_reports
+        path: reports
+
+  run_tests_apple_m1:
+    name: MPS tests on Apple M1
+    runs-on: [ self-hosted, apple-m1 ]
+
+    steps:
+    - name: Checkout diffusers
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: Clean checkout
+      shell: arch -arch arm64 bash {0}
+      run: |
+        git clean -fxd
+
+    - name: Setup miniconda
+      uses: ./.github/actions/setup-miniconda
+      with:
+        python-version: 3.9
+
+    - name: Install dependencies
+      shell: arch -arch arm64 bash {0}
+      run: |
+        ${CONDA_RUN} python -m pip install --upgrade pip
+        ${CONDA_RUN} python -m pip install -e .[quality,test]
+        ${CONDA_RUN} python -m pip install --pre torch==${MPS_TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/test/cpu
+
+    - name: Environment
+      shell: arch -arch arm64 bash {0}
+      run: |
+        ${CONDA_RUN} python utils/print_env.py
+
+    - name: Run all fast tests on MPS
+      shell: arch -arch arm64 bash {0}
+      run: |
+        ${CONDA_RUN} python -m pytest -n 4 --max-worker-restart=0 --dist=loadfile -s -v --make-reports=tests_torch_mps tests/
+
+    - name: Failure short reports
+      if: ${{ failure() }}
+      run: cat reports/tests_torch_mps_failures_short.txt
+
+    - name: Test suite reports artifacts
+      if: ${{ always() }}
+      uses: actions/upload-artifact@v2
+      with:
+        name: pr_torch_mps_test_reports
         path: reports
diff --git a/tests/test_layers_utils.py b/tests/test_layers_utils.py
@@ -221,6 +221,9 @@ def test_downsample_with_conv_out_dim(self):
 
 
 class AttentionBlockTests(unittest.TestCase):
+    @unittest.skipIf(
+        torch_device == "mps", "Matmul crashes on MPS, see https://github.com/pytorch/pytorch/issues/84039"
+    )
     def test_attention_block_default(self):
         torch.manual_seed(0)
         if torch.cuda.is_available():
@@ -245,6 +248,30 @@ def test_attention_block_default(self):
         )
         assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3)
 
+    def test_attention_block_sd(self):
+        # This version uses SD params and is compatible with mps
+        torch.manual_seed(0)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed_all(0)
+
+        sample = torch.randn(1, 512, 64, 64).to(torch_device)
+        attentionBlock = AttentionBlock(
+            channels=512,
+            rescale_output_factor=1.0,
+            eps=1e-6,
+            num_groups=32,
+        ).to(torch_device)
+        with torch.no_grad():
+            attention_scores = attentionBlock(sample)
+
+        assert attention_scores.shape == (1, 512, 64, 64)
+        output_slice = attention_scores[0, -1, -3:, -3:]
+
+        expected_slice = torch.tensor(
+            [-0.6621, -0.0156, -3.2766, 0.8025, -0.8609, 0.2820, 0.0905, -1.1179, -3.2126], device=torch_device
+        )
+        assert torch.allclose(output_slice.flatten(), expected_slice, atol=1e-3)
+
 
 class SpatialTransformerTests(unittest.TestCase):
     def test_spatial_transformer_default(self):
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
@@ -247,6 +247,7 @@ def recursive_check(tuple_object, dict_object):
 
         recursive_check(outputs_tuple, outputs_dict)
 
+    @unittest.skipIf(torch_device == "mps", "Gradient checkpointing skipped on MPS")
     def test_enable_disable_gradient_checkpointing(self):
         if not self.model_class._supports_gradient_checkpointing:
             return  # Skip test if model does not support gradient checkpointing
diff --git a/tests/test_models_unet.py b/tests/test_models_unet.py
@@ -135,7 +135,7 @@ def test_from_pretrained_hub(self):
 
         assert image is not None, "Make sure output is not None"
 
-    @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
     def test_from_pretrained_accelerate(self):
         model, _ = UNet2DModel.from_pretrained(
             "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto"
@@ -145,7 +145,7 @@ def test_from_pretrained_accelerate(self):
 
         assert image is not None, "Make sure output is not None"
 
-    @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
     def test_from_pretrained_accelerate_wont_change_results(self):
         model_accelerate, _ = UNet2DModel.from_pretrained(
             "fusing/unet-ldm-dummy-update", output_loading_info=True, device_map="auto"
@@ -177,7 +177,7 @@ def test_from_pretrained_accelerate_wont_change_results(self):
 
         assert torch.allclose(arr_accelerate, arr_normal_load, rtol=1e-3)
 
-    @unittest.skipIf(torch_device == "cpu", "This test is supposed to run on GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
     def test_memory_footprint_gets_reduced(self):
         torch.cuda.empty_cache()
         gc.collect()
@@ -267,6 +267,7 @@ def prepare_init_args_and_inputs_for_common(self):
         inputs_dict = self.dummy_input
         return init_dict, inputs_dict
 
+    @unittest.skipIf(torch_device == "mps", "Gradient checkpointing skipped on MPS")
     def test_gradient_checkpointing(self):
         # enable deterministic behavior for gradient checkpointing
         init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
diff --git a/tests/test_models_vae.py b/tests/test_models_vae.py
@@ -96,7 +96,7 @@ def test_output_pretrained(self):
             model.config.in_channels,
             model.config.sample_size,
             model.config.sample_size,
-            generator=torch.manual_seed(0),
+            generator=generator,
         )
         image = image.to(torch_device)
         with torch.no_grad():
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
@@ -1196,7 +1196,7 @@ def test_stable_diffusion_inpaint_num_images_per_prompt(self):
 
         assert images.shape == (batch_size * num_images_per_prompt, 32, 32, 3)
 
-    @unittest.skipIf(torch_device == "cpu", "This test requires a GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
     def test_stable_diffusion_fp16(self):
         """Test that stable diffusion works with fp16"""
         unet = self.dummy_cond_unet
@@ -1229,7 +1229,7 @@ def test_stable_diffusion_fp16(self):
 
         assert image.shape == (1, 128, 128, 3)
 
-    @unittest.skipIf(torch_device == "cpu", "This test requires a GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
     def test_stable_diffusion_img2img_fp16(self):
         """Test that stable diffusion img2img works with fp16"""
         unet = self.dummy_cond_unet
@@ -1270,7 +1270,7 @@ def test_stable_diffusion_img2img_fp16(self):
 
         assert image.shape == (1, 32, 32, 3)
 
-    @unittest.skipIf(torch_device == "cpu", "This test requires a GPU")
+    @unittest.skipIf(torch_device != "cuda", "This test requires a GPU")
     def test_stable_diffusion_inpaint_fp16(self):
         """Test that stable diffusion inpaint works with fp16"""
         unet = self.dummy_cond_unet

Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ def test_output_pretrained(self):`
`96`	`96`	`model.config.in_channels,`
`97`	`97`	`model.config.sample_size,`
`98`	`98`	`model.config.sample_size,`
`99`		`- generator=torch.manual_seed(0),`
	`99`	`+ generator=generator,`
`100`	`100`	`)`
`101`	`101`	`image = image.to(torch_device)`
`102`	`102`	`with torch.no_grad():`