Enable MPS CI runners (#252)

Use DISTRIBUTED=1 for MPS CI runners (#292) Remove unnecessary CI files (#327) Enable test modules on MPS and CI runners (#305) (#324) Enable 13.3 in CI (#359) Remove periodic file (running between PRs) (#336)
kulinseth · Apr 7, 2023 · 2931687 · 2931687
1 parent 57ea855
commit 2931687
Show file tree

Hide file tree

Showing 6 changed files with 112 additions and 65 deletions.
diff --git a/.ci/pytorch/macos-build.sh b/.ci/pytorch/macos-build.sh
@@ -40,16 +40,6 @@ cross_compile_arm64() {
  USE_DISTRIBUTED=1 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_OPENMP=OFF USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
 }
 
-compile_arm64() {
- # Compilation for arm64
- # TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
- USE_DISTRIBUTED=0 USE_OPENMP=0 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
-}
-
-compile_x86_64() {
- USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
-}
-
 compile_x86_64() {
  USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
 }

diff --git a/.github/workflows/_mac-test-mps.yml b/.github/workflows/_mac-test-mps.yml
@@ -137,6 +137,20 @@ jobs:
 
  ${CONDA_RUN} python3 test/test_modules.py -k mps --verbose
 
+ - name: Run MPS Test Modules
+ id: test_2
+ env:
+ ENV_NAME: conda-test-env-${{ github.run_id }}
+ shell: arch -arch arm64 bash {0}
+ # During bring up of test_modules don't show this as an error.
+ continue-on-error: true
+ run: |
+ # shellcheck disable=SC1090
+ set -ex
+ # TODO(https://github.com/pytorch/pytorch/issues/79293)
+
+ ${CONDA_RUN} python3 test/test_modules.py -k mps --verbose
+
  - name: Print remaining test logs
  shell: bash
  if: always() && steps.test.conclusion

diff --git a/.github/workflows/_mac-test.yml b/.github/workflows/_mac-test.yml
@@ -196,6 +196,12 @@ jobs:
  run: |
  cat test/**/*.log || true
 
+ - name: Print remaining test logs
+ shell: bash
+ if: always()
+ run: |
+ cat test/**/*.log || true
+
  - name: Get workflow job id
  id: get-job-id
  uses: ./.github/actions/get-workflow-job-id

diff --git a/.github/workflows/mac-mps.yml b/.github/workflows/mac-mps.yml
@@ -54,6 +54,25 @@ jobs:
  MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
  MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
 
+ macos-py38-arm64-build:
+ name: macos-py38-arm64
+ uses: ./.github/workflows/_mac-build.yml
+ with:
+ sync-tag: macos-py38-arm64-build
+ build-environment: macos-py38-arm64
+ xcode-version: "13.3.1"
+ runner-type: macos-builder
+ build-generates-artifacts: true
+ # To match the one pre-installed in the m1 runners
+ python_version: 3.8
+ # We need to set the environment file here instead of trying to detect it automatically because
+ # MacOS arm64 is cross-compiled from x86-64. Specifically, it means that arm64 conda environment
+ # is needed when building PyTorch MacOS arm64 from x86-64
+ environment-file: .github/requirements/conda-env-macOS-ARM64
+ secrets:
+ MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
+ MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}
+
  macos-12_5-py3-arm64-mps-test:
  name: macos-12.5-py3-arm64-mps
  uses: ./.github/workflows/_mac-test-mps.yml

diff --git a/test/test_modules.py b/test/test_modules.py
@@ -56,7 +56,11 @@ def _check_module(items, name, device=device, dtype=dtype):
 
  @modules(module_db)
  def test_forward(self, device, dtype, module_info, training):
- msg = _get_mps_error_msg(device, dtype, module_info, [])
+ MPS_BLOCKLIST = [
+ "nn.LSTM" # segfault
+ ]
+
+ msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
  if msg is not None:
  self.skipTest(msg)
 
@@ -239,7 +243,11 @@ def test_repr(self, device, dtype, module_info, training):
  def test_pickle(self, device, dtype, module_info, training):
  # Test that module can be pickled and unpickled.
 
- msg = _get_mps_error_msg(device, dtype, module_info, [])
+ MPS_BLOCKLIST = [
+ "nn.LSTM" # hard crash
+ ]
+
+ msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
  if msg is not None:
  self.skipTest(msg)
 
@@ -279,7 +287,11 @@ def test_check_inplace(self, device, dtype, module_info, training):
  # Check if the inplace variant of the module gives the same result as the out of place
  # variant.
 
- msg = _get_mps_error_msg(device, dtype, module_info, [])
+ MPS_BLOCKLIST = [
+ "nn.ELU" # hard crash
+ ]
+
+ msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
  if msg is not None:
  self.skipTest(msg)
 
@@ -364,7 +376,14 @@ def inner_zero_grad(obj):
  @skipIfTorchInductor("to be fixed")
  def test_non_contiguous_tensors(self, device, dtype, module_info, training):
  # Check modules work with non-contiguous tensors
- msg = _get_mps_error_msg(device, dtype, module_info, [])
+ MPS_BLOCKLIST = [
+ # hard crashes
+ "nn.GRU",
+ "nn.LSTM",
+ "nn.RNN"
+ ]
+
+ msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
  if msg is not None:
  self.skipTest(msg)
 
@@ -627,6 +646,7 @@ def check_backward(cpu_output, gpu_output):
  def test_memory_format(self, device, dtype, module_info, training):
  MPS_BLOCKLIST = [
  "nn.BatchNorm3d", # failed assert
+ "nn.LSTM", # segfault
  ]
 
  msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)

diff --git a/test/test_mps.py b/test/test_mps.py
@@ -6013,7 +6013,12 @@ def helper(shape, beta=1, threshold=20):
  for threshold in [0.5, 20, 30, 40, 50]:
  helper(shape, beta, threshold)
 
- # Test silu
+ # # Test empty shape too
+ # for shape in [(), (2, 3), (10, 10), (2, 3, 4, 5)]:
+ # for beta in [0.5, 1, 2, 3, 4]:
+ # for threshold in [0.5, 20, 30, 40, 50]:
+ # helper(shape, beta, threshold)
+
  def test_silu(self):
  def helper(shape):
  cpu_x = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=True)
@@ -11326,16 +11331,32 @@ class TestConsistency(TestCaseMPS):
  # All the entries in this list should be removed
  BLOCKLIST = {
  # Functions that hard crash
+ 'nn.functional.softplus': [torch.float32],
+ 'median': [torch.float32, torch.int16, torch.int32, torch.uint8, torch.int16],
+ 'sgn': [torch.bool],
+ 'linalg.inv': [torch.float32],
+ 'linalg.inv_ex': [torch.float32],
+ 'linalg.matrix_power': [torch.float32],
+ 'nn.functional.interpolate': [torch.float32],
  'resize_': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
+ 'nn.functional.interpolatearea': [torch.float32],
  'resize_as_': [torch.float16, torch.float32],
  'topk': [torch.int16, torch.int32, torch.int64, torch.uint8],
 
  # Functions with correctness issues
- 'multinomial': [torch.float32],
-
- # cpu result off, showing random values
+ 'unique': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
+ 'divfloor_rounding': [torch.int16, torch.int32, torch.int64],
+ 'divtrunc_rounding': [torch.float16],
+ 'norm': [torch.float16],
+ 'nn.functional.feature_alpha_dropoutwith_train': [torch.float32],
+ 'cumulative_trapezoid': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
+ 'addr': [torch.float16],
  'as_stridedpartial_views': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
- # cpu result off, showing inf values
+ 'trace': [torch.int64],
+ 'normalnumber_mean': [torch.float16, torch.float32],
+ 'new_empty_strided': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
+ 'multinomial': [torch.float32],
+ 'floor_divide': [torch.int16, torch.int32, torch.int64],
  'dist': [torch.float16],
 
  # failure due to issue: atan2() may generate NAN in output with
@@ -11345,13 +11366,12 @@ class TestConsistency(TestCaseMPS):
  'grid_sampler_2d': [torch.float32],
  'nn.functional.grid_sample': [torch.float32],
 
- # failures due to issue #102048039: powerWithPrimaryTensor() with integer input may return wrong results
- 'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
- '__rpow__': [torch.uint8],
-
- # failures before macOS 13.3
- 'nn.functional.conv_transpose2d': [torch.float32],
- 'nn.functional.pairwise_distance': [torch.float16],
+ # failures due to issue #103039644: Wrong results from avgPooling2DWithSourceTensor()
+ # when both ceilMode and includeZeroPadToAverage are True
+ 'nn.functional.avg_pool1d': [torch.float32, torch.int64],
+ 'nn.functional.avg_pool2d': [torch.float32, torch.int64],
+ 'nn.functional.adaptive_avg_pool1d': [torch.float32],
+ 'nn.functional.adaptive_avg_pool2d': [torch.float32],
  }
 
  UNIMPLEMENTED_OPS = {
@@ -11449,6 +11469,7 @@ class TestConsistency(TestCaseMPS):
  'lu_unpack': [torch.float32],
  'masked.cumprod': [torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'masked.median': [torch.float32],
+ 'masked_scatter': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
  'matrix_exp': [torch.float32],
  'mode': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
  'msort': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
@@ -11468,7 +11489,6 @@ class TestConsistency(TestCaseMPS):
  'nn.functional.fractional_max_pool3d': [torch.float32],
  'nn.functional.adaptive_avg_pool3d': [torch.float16, torch.float32],
  'nn.functional.adaptive_max_pool3d': [torch.float32],
- 'nn.functional.interpolatearea': [torch.float32],
  'nn.functional.interpolatebicubic': [torch.float32],
  'nn.functional.interpolatelinear': [torch.float32],
  'nn.functional.interpolatetrilinear': [torch.float32],
@@ -11478,6 +11498,7 @@ class TestConsistency(TestCaseMPS):
  'nn.functional.avg_pool3d': [torch.float32, torch.int64],
  'nn.functional.ctc_loss': [torch.float32],
  'nn.functional.embedding_bag': [torch.float16, torch.float32],
+ 'nn.functional.max_pool2d': [torch.float32],
  'nn.functional.hardshrink': [torch.float32],
  'nn.functional.hardsigmoid': [torch.float32],
  'nn.functional.logsigmoid': [torch.float32],
@@ -11506,6 +11527,7 @@ class TestConsistency(TestCaseMPS):
  'polygammapolygamma_n_4': [torch.bool, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'qr': [torch.float32],
  'quantile': [torch.float32],
+ 'remainder': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8],
  'renorm': [torch.float16, torch.float32],
  'roll': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'rsub': [torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
@@ -11579,7 +11601,6 @@ class TestConsistency(TestCaseMPS):
  'symeig': [torch.float32],
  'take': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'to_sparse': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
- 'unique': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'var_mean': [torch.float16, torch.float32],
  'var_meanunbiased': [torch.float16, torch.float32],
  'vdot': [torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
@@ -11591,8 +11612,6 @@ class TestConsistency(TestCaseMPS):
  # Failures due to unsupported data types on MPS backend
  'bfloat16': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'chalf': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
- # Byte tests are failing
- 'byte': [torch.float16, torch.float32],
  'nn.functional.conv1d': [torch.int64],
  'nn.functional.conv2d': [torch.int64],
  'nn.functional.conv_transpose1d': [torch.int64],
@@ -11609,7 +11628,6 @@ class TestConsistency(TestCaseMPS):
  'addmmdecomposed': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'addbmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'addmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
- 'addr': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'addmv': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'baddbmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'bmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
@@ -11670,22 +11688,21 @@ class TestConsistency(TestCaseMPS):
  'tensordot': [torch.int16, torch.int32, torch.int64, torch.uint8],
  'zeros_like': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'bincount': [torch.int16, torch.int32, torch.int64, torch.uint8],
+
+ # failures due to issue #102048039: powerWithPrimaryTensor() with integer input may return wrong results
+ 'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
+ '__rpow__': [torch.int16, torch.int32],
  }
 
  UNDEFINED_BEHAVIOUR = {
  # Failures due to random output that they generate using
  # Philox engine causing mismatch with CPU results
  'uniform': [torch.float16, torch.float32],
- 'randn': [torch.float16, torch.float32],
  'rand_like': [torch.float16, torch.float32],
  'randint_like': [torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'randn_like': [torch.float16, torch.float32],
  'bernoulli': [torch.float32],
- 'nn.functional.feature_alpha_dropoutwith_train': [torch.float32],
  'normal': [torch.float16, torch.float32, torch.float16, torch.float32],
- 'normal_': [torch.float16, torch.float32],
- 'normalin_place': [torch.float16, torch.float32],
- 'normalnumber_mean': [torch.float16, torch.float32],
  'nn.functional.alpha_dropout': [torch.float32],
  'nn.functional.dropout': [torch.float32],
  'nn.functional.dropout2d': [torch.float32],
@@ -11694,24 +11711,10 @@ class TestConsistency(TestCaseMPS):
  'new_empty': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'empty_like': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  'empty': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
- 'new_empty_strided': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  # problem 103190467, as_strided_scatter has non-deterministic behavior when the update indices are not unique
  'as_strided_scatter': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
  # duplicate indices are used in the testcase - undefined behaviour
  'index_put': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
- # problem 104760543, zero to negative integer powers are undefined
- '__rpow__': [torch.int16, torch.int32, torch.int64],
- }
-
- FAST_MATH_PRECISION_ISSUES = {
- # Failures due to precision issues
- 'tan': [torch.float32],
- 'pow': [torch.float32],
- 'masked.softmin': [torch.float32],
- 'masked.softmax': [torch.float32],
- 'masked.log_softmax': [torch.float32],
- 'cdist': [torch.float32],
- '__rpow__': [torch.float32]
  }
 
  FP16_LOW_PRECISION_LIST = {
@@ -11730,18 +11733,13 @@ class TestConsistency(TestCaseMPS):
  'mul',
  }
 
- BLOCKLIST_MACOS_12 = {
- # expected failures
- 'nn.functional.interpolatenearest': [torch.float32],
- 'nn.functional.upsample_nearest': [torch.float32],
- 'nn.functional.conv_transpose2d': [torch.float32]
- }
-
- ALLOWLIST_MACOS_13_3 = {
- 'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
- '__rpow__': [torch.uint8],
- 'nn.functional.conv_transpose2d': [torch.float32],
- }
+ dirname = os.path.dirname(__file__)
+ filename = os.path.join(dirname, "cuda_results.yaml")
+ with open(filename) as f:
+ data = yaml.safe_load(f)
+ CUDA_RESULT = dict()
+ for key, value in data.items():
+ CUDA_RESULT[key] = torch.as_tensor(value)
 
  MPS_SKIP_LIST = reduce(lambda x, y: dict(x, **y), (
  FAST_MATH_PRECISION_ISSUES, BLOCKLIST, UNDEFINED_BEHAVIOUR, EXPECTED_FAILURES, UNIMPLEMENTED_OPS))
@@ -11914,10 +11912,10 @@ def get_samples():
  self.assertEqual(cpu_out, mps_out, atol=atol, rtol=rtol)
 
  except Exception as e:
- if any(s in str(e).lower() for s in ["int64", "macos 13", "adaptive pool mps"]):
- self.skipTest(f"Expected Runtime Error: {str(e)}")
+ if any(s in str(e).lower() for s in ["int64", "macos 13"]):
+ self.skipTest(f"{str(e)}")
 
- if op.name in CUDA_RESULT and self.compare_with_CUDA(op, mps_out, atol=atol, rtol=rtol):
+ if op.name in self.CUDA_RESULT and self.compare_with_CUDA(op, mps_out, atol=atol, rtol=rtol):
  continue
 
  if not generate_new_truth: