Skip to content

Commit

Permalink
Enable MPS CI runners (#252)
Browse files Browse the repository at this point in the history
Use DISTRIBUTED=1 for MPS CI runners (#292)
Remove unnecessary CI files (#327)
Enable test modules on MPS and CI runners (#305) (#324)
Enable 13.3 in CI (#359)

Remove periodic file (running between PRs) (#336)
  • Loading branch information
DenisVieriu97 authored and skotapati committed Apr 7, 2023
1 parent 57ea855 commit 2931687
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 65 deletions.
10 changes: 0 additions & 10 deletions .ci/pytorch/macos-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,6 @@ cross_compile_arm64() {
USE_DISTRIBUTED=1 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_OPENMP=OFF USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
}

compile_arm64() {
# Compilation for arm64
# TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
USE_DISTRIBUTED=0 USE_OPENMP=0 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
}

compile_x86_64() {
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
}

compile_x86_64() {
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel
}
Expand Down
14 changes: 14 additions & 0 deletions .github/workflows/_mac-test-mps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ jobs:
${CONDA_RUN} python3 test/test_modules.py -k mps --verbose
- name: Run MPS Test Modules
id: test_2
env:
ENV_NAME: conda-test-env-${{ github.run_id }}
shell: arch -arch arm64 bash {0}
# During bring up of test_modules don't show this as an error.
continue-on-error: true
run: |
# shellcheck disable=SC1090
set -ex
# TODO(https://github.com/pytorch/pytorch/issues/79293)
${CONDA_RUN} python3 test/test_modules.py -k mps --verbose
- name: Print remaining test logs
shell: bash
if: always() && steps.test.conclusion
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/_mac-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ jobs:
run: |
cat test/**/*.log || true
- name: Print remaining test logs
shell: bash
if: always()
run: |
cat test/**/*.log || true
- name: Get workflow job id
id: get-job-id
uses: ./.github/actions/get-workflow-job-id
Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/mac-mps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,25 @@ jobs:
MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}

macos-py38-arm64-build:
name: macos-py38-arm64
uses: ./.github/workflows/_mac-build.yml
with:
sync-tag: macos-py38-arm64-build
build-environment: macos-py38-arm64
xcode-version: "13.3.1"
runner-type: macos-builder
build-generates-artifacts: true
# To match the one pre-installed in the m1 runners
python_version: 3.8
# We need to set the environment file here instead of trying to detect it automatically because
# MacOS arm64 is cross-compiled from x86-64. Specifically, it means that arm64 conda environment
# is needed when building PyTorch MacOS arm64 from x86-64
environment-file: .github/requirements/conda-env-macOS-ARM64
secrets:
MACOS_SCCACHE_S3_ACCESS_KEY_ID: ${{ secrets.MACOS_SCCACHE_S3_ACCESS_KEY_ID }}
MACOS_SCCACHE_S3_SECRET_ACCESS_KEY: ${{ secrets.MACOS_SCCACHE_S3_SECRET_ACCESS_KEY }}

macos-12_5-py3-arm64-mps-test:
name: macos-12.5-py3-arm64-mps
uses: ./.github/workflows/_mac-test-mps.yml
Expand Down
28 changes: 24 additions & 4 deletions test/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ def _check_module(items, name, device=device, dtype=dtype):

@modules(module_db)
def test_forward(self, device, dtype, module_info, training):
msg = _get_mps_error_msg(device, dtype, module_info, [])
MPS_BLOCKLIST = [
"nn.LSTM" # segfault
]

msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
if msg is not None:
self.skipTest(msg)

Expand Down Expand Up @@ -239,7 +243,11 @@ def test_repr(self, device, dtype, module_info, training):
def test_pickle(self, device, dtype, module_info, training):
# Test that module can be pickled and unpickled.

msg = _get_mps_error_msg(device, dtype, module_info, [])
MPS_BLOCKLIST = [
"nn.LSTM" # hard crash
]

msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
if msg is not None:
self.skipTest(msg)

Expand Down Expand Up @@ -279,7 +287,11 @@ def test_check_inplace(self, device, dtype, module_info, training):
# Check if the inplace variant of the module gives the same result as the out of place
# variant.

msg = _get_mps_error_msg(device, dtype, module_info, [])
MPS_BLOCKLIST = [
"nn.ELU" # hard crash
]

msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
if msg is not None:
self.skipTest(msg)

Expand Down Expand Up @@ -364,7 +376,14 @@ def inner_zero_grad(obj):
@skipIfTorchInductor("to be fixed")
def test_non_contiguous_tensors(self, device, dtype, module_info, training):
# Check modules work with non-contiguous tensors
msg = _get_mps_error_msg(device, dtype, module_info, [])
MPS_BLOCKLIST = [
# hard crashes
"nn.GRU",
"nn.LSTM",
"nn.RNN"
]

msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
if msg is not None:
self.skipTest(msg)

Expand Down Expand Up @@ -627,6 +646,7 @@ def check_backward(cpu_output, gpu_output):
def test_memory_format(self, device, dtype, module_info, training):
MPS_BLOCKLIST = [
"nn.BatchNorm3d", # failed assert
"nn.LSTM", # segfault
]

msg = _get_mps_error_msg(device, dtype, module_info, MPS_BLOCKLIST)
Expand Down
100 changes: 49 additions & 51 deletions test/test_mps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6013,7 +6013,12 @@ def helper(shape, beta=1, threshold=20):
for threshold in [0.5, 20, 30, 40, 50]:
helper(shape, beta, threshold)

# Test silu
# # Test empty shape too
# for shape in [(), (2, 3), (10, 10), (2, 3, 4, 5)]:
# for beta in [0.5, 1, 2, 3, 4]:
# for threshold in [0.5, 20, 30, 40, 50]:
# helper(shape, beta, threshold)

def test_silu(self):
def helper(shape):
cpu_x = torch.randn(shape, device='cpu', dtype=torch.float, requires_grad=True)
Expand Down Expand Up @@ -11326,16 +11331,32 @@ class TestConsistency(TestCaseMPS):
# All the entries in this list should be removed
BLOCKLIST = {
# Functions that hard crash
'nn.functional.softplus': [torch.float32],
'median': [torch.float32, torch.int16, torch.int32, torch.uint8, torch.int16],
'sgn': [torch.bool],
'linalg.inv': [torch.float32],
'linalg.inv_ex': [torch.float32],
'linalg.matrix_power': [torch.float32],
'nn.functional.interpolate': [torch.float32],
'resize_': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'nn.functional.interpolatearea': [torch.float32],
'resize_as_': [torch.float16, torch.float32],
'topk': [torch.int16, torch.int32, torch.int64, torch.uint8],

# Functions with correctness issues
'multinomial': [torch.float32],

# cpu result off, showing random values
'unique': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'divfloor_rounding': [torch.int16, torch.int32, torch.int64],
'divtrunc_rounding': [torch.float16],
'norm': [torch.float16],
'nn.functional.feature_alpha_dropoutwith_train': [torch.float32],
'cumulative_trapezoid': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'addr': [torch.float16],
'as_stridedpartial_views': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
# cpu result off, showing inf values
'trace': [torch.int64],
'normalnumber_mean': [torch.float16, torch.float32],
'new_empty_strided': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'multinomial': [torch.float32],
'floor_divide': [torch.int16, torch.int32, torch.int64],
'dist': [torch.float16],

# failure due to issue: atan2() may generate NAN in output with
Expand All @@ -11345,13 +11366,12 @@ class TestConsistency(TestCaseMPS):
'grid_sampler_2d': [torch.float32],
'nn.functional.grid_sample': [torch.float32],

# failures due to issue #102048039: powerWithPrimaryTensor() with integer input may return wrong results
'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
'__rpow__': [torch.uint8],

# failures before macOS 13.3
'nn.functional.conv_transpose2d': [torch.float32],
'nn.functional.pairwise_distance': [torch.float16],
# failures due to issue #103039644: Wrong results from avgPooling2DWithSourceTensor()
# when both ceilMode and includeZeroPadToAverage are True
'nn.functional.avg_pool1d': [torch.float32, torch.int64],
'nn.functional.avg_pool2d': [torch.float32, torch.int64],
'nn.functional.adaptive_avg_pool1d': [torch.float32],
'nn.functional.adaptive_avg_pool2d': [torch.float32],
}

UNIMPLEMENTED_OPS = {
Expand Down Expand Up @@ -11449,6 +11469,7 @@ class TestConsistency(TestCaseMPS):
'lu_unpack': [torch.float32],
'masked.cumprod': [torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'masked.median': [torch.float32],
'masked_scatter': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
'matrix_exp': [torch.float32],
'mode': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
'msort': [torch.bool, torch.float32, torch.float16, torch.int16, torch.int32, torch.int64, torch.uint8],
Expand All @@ -11468,7 +11489,6 @@ class TestConsistency(TestCaseMPS):
'nn.functional.fractional_max_pool3d': [torch.float32],
'nn.functional.adaptive_avg_pool3d': [torch.float16, torch.float32],
'nn.functional.adaptive_max_pool3d': [torch.float32],
'nn.functional.interpolatearea': [torch.float32],
'nn.functional.interpolatebicubic': [torch.float32],
'nn.functional.interpolatelinear': [torch.float32],
'nn.functional.interpolatetrilinear': [torch.float32],
Expand All @@ -11478,6 +11498,7 @@ class TestConsistency(TestCaseMPS):
'nn.functional.avg_pool3d': [torch.float32, torch.int64],
'nn.functional.ctc_loss': [torch.float32],
'nn.functional.embedding_bag': [torch.float16, torch.float32],
'nn.functional.max_pool2d': [torch.float32],
'nn.functional.hardshrink': [torch.float32],
'nn.functional.hardsigmoid': [torch.float32],
'nn.functional.logsigmoid': [torch.float32],
Expand Down Expand Up @@ -11506,6 +11527,7 @@ class TestConsistency(TestCaseMPS):
'polygammapolygamma_n_4': [torch.bool, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'qr': [torch.float32],
'quantile': [torch.float32],
'remainder': [torch.bool, torch.int16, torch.int32, torch.int64, torch.uint8],
'renorm': [torch.float16, torch.float32],
'roll': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'rsub': [torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
Expand Down Expand Up @@ -11579,7 +11601,6 @@ class TestConsistency(TestCaseMPS):
'symeig': [torch.float32],
'take': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'to_sparse': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'unique': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'var_mean': [torch.float16, torch.float32],
'var_meanunbiased': [torch.float16, torch.float32],
'vdot': [torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
Expand All @@ -11591,8 +11612,6 @@ class TestConsistency(TestCaseMPS):
# Failures due to unsupported data types on MPS backend
'bfloat16': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'chalf': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
# Byte tests are failing
'byte': [torch.float16, torch.float32],
'nn.functional.conv1d': [torch.int64],
'nn.functional.conv2d': [torch.int64],
'nn.functional.conv_transpose1d': [torch.int64],
Expand All @@ -11609,7 +11628,6 @@ class TestConsistency(TestCaseMPS):
'addmmdecomposed': [torch.int16, torch.int32, torch.int64, torch.uint8],
'addbmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
'addmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
'addr': [torch.int16, torch.int32, torch.int64, torch.uint8],
'addmv': [torch.int16, torch.int32, torch.int64, torch.uint8],
'baddbmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
'bmm': [torch.int16, torch.int32, torch.int64, torch.uint8],
Expand Down Expand Up @@ -11670,22 +11688,21 @@ class TestConsistency(TestCaseMPS):
'tensordot': [torch.int16, torch.int32, torch.int64, torch.uint8],
'zeros_like': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'bincount': [torch.int16, torch.int32, torch.int64, torch.uint8],

# failures due to issue #102048039: powerWithPrimaryTensor() with integer input may return wrong results
'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
'__rpow__': [torch.int16, torch.int32],
}

UNDEFINED_BEHAVIOUR = {
# Failures due to random output that they generate using
# Philox engine causing mismatch with CPU results
'uniform': [torch.float16, torch.float32],
'randn': [torch.float16, torch.float32],
'rand_like': [torch.float16, torch.float32],
'randint_like': [torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'randn_like': [torch.float16, torch.float32],
'bernoulli': [torch.float32],
'nn.functional.feature_alpha_dropoutwith_train': [torch.float32],
'normal': [torch.float16, torch.float32, torch.float16, torch.float32],
'normal_': [torch.float16, torch.float32],
'normalin_place': [torch.float16, torch.float32],
'normalnumber_mean': [torch.float16, torch.float32],
'nn.functional.alpha_dropout': [torch.float32],
'nn.functional.dropout': [torch.float32],
'nn.functional.dropout2d': [torch.float32],
Expand All @@ -11694,24 +11711,10 @@ class TestConsistency(TestCaseMPS):
'new_empty': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'empty_like': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'empty': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
'new_empty_strided': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
# problem 103190467, as_strided_scatter has non-deterministic behavior when the update indices are not unique
'as_strided_scatter': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
# duplicate indices are used in the testcase - undefined behaviour
'index_put': [torch.bool, torch.float16, torch.float32, torch.int16, torch.int32, torch.int64, torch.uint8],
# problem 104760543, zero to negative integer powers are undefined
'__rpow__': [torch.int16, torch.int32, torch.int64],
}

FAST_MATH_PRECISION_ISSUES = {
# Failures due to precision issues
'tan': [torch.float32],
'pow': [torch.float32],
'masked.softmin': [torch.float32],
'masked.softmax': [torch.float32],
'masked.log_softmax': [torch.float32],
'cdist': [torch.float32],
'__rpow__': [torch.float32]
}

FP16_LOW_PRECISION_LIST = {
Expand All @@ -11730,18 +11733,13 @@ class TestConsistency(TestCaseMPS):
'mul',
}

BLOCKLIST_MACOS_12 = {
# expected failures
'nn.functional.interpolatenearest': [torch.float32],
'nn.functional.upsample_nearest': [torch.float32],
'nn.functional.conv_transpose2d': [torch.float32]
}

ALLOWLIST_MACOS_13_3 = {
'pow': [torch.int16, torch.int32, torch.int64, torch.uint8],
'__rpow__': [torch.uint8],
'nn.functional.conv_transpose2d': [torch.float32],
}
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, "cuda_results.yaml")
with open(filename) as f:
data = yaml.safe_load(f)
CUDA_RESULT = dict()
for key, value in data.items():
CUDA_RESULT[key] = torch.as_tensor(value)

MPS_SKIP_LIST = reduce(lambda x, y: dict(x, **y), (
FAST_MATH_PRECISION_ISSUES, BLOCKLIST, UNDEFINED_BEHAVIOUR, EXPECTED_FAILURES, UNIMPLEMENTED_OPS))
Expand Down Expand Up @@ -11914,10 +11912,10 @@ def get_samples():
self.assertEqual(cpu_out, mps_out, atol=atol, rtol=rtol)

except Exception as e:
if any(s in str(e).lower() for s in ["int64", "macos 13", "adaptive pool mps"]):
self.skipTest(f"Expected Runtime Error: {str(e)}")
if any(s in str(e).lower() for s in ["int64", "macos 13"]):
self.skipTest(f"{str(e)}")

if op.name in CUDA_RESULT and self.compare_with_CUDA(op, mps_out, atol=atol, rtol=rtol):
if op.name in self.CUDA_RESULT and self.compare_with_CUDA(op, mps_out, atol=atol, rtol=rtol):
continue

if not generate_new_truth:
Expand Down

0 comments on commit 2931687

Please sign in to comment.