diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 0799d87..a0b6c9b 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -32,7 +32,7 @@ jobs: build_wheels: - name: Wheels on ${{ matrix.platform_id }} - ${{ matrix.os }} + name: Wheels - ${{ matrix.cibw_archs }} - ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -43,17 +43,30 @@ jobs: - os: windows-latest python-version: "3.8" platform_id: win_amd64 + cibw_archs: "auto" # Linux 64 bit manylinux2014 - os: ubuntu-latest python-version: "3.8" platform_id: manylinux_x86_64 manylinux_image: manylinux2014 + cibw_archs: "native" - # Use x86 macOS runner to build both x86 and ARM. GitHub does not offer M1/M2 yet (only self-hosted). + # Linux 64 bit manylinux2014 for aarch64 + # Separate runner because this requires emulation (only x86 runners are available) and is very slow. + - os: ubuntu-latest + python-version: "3.8" + platform_id: manylinux_x86_64 + manylinux_image: manylinux2014 + cibw_archs: "aarch64" + + # Use x86 macOS runner to build both x86 and ARM. + # GitHub does not offer Apple Silicon yet (only for self-hosted). + # See https://github.com/github/roadmap/issues/528 - os: macos-latest python-version: "3.8" platform_id: macosx_x86_64 + cibw_archs: "x86_64 arm64" steps: - uses: actions/checkout@v3 @@ -64,16 +77,31 @@ jobs: with: python-version: ${{ matrix.python-version }} -# - name: Install tools (macOS) -# if: contains(matrix.os, 'macos') -# # Install coreutils which includes `nproc` used by `make -j` in suitesparse.sh -# # -# # GitHub actions comes with libomp already installed, but for its native arch only. Must build universal one -# # manually so that both x86 and arm builds can be built. -# run: | -# brew install coreutils -# brew install libomp -# sh add_arm_to_libomp_dylib.sh + # aarch64 Linux builds are cross-compiled on x86 runners using emulation + # see https://cibuildwheel.readthedocs.io/en/stable/faq/#emulation + - name: Setup QEMU (for aarch64) + if: matrix.cibw_archs == 'aarch64' + uses: docker/setup-qemu-action@v2 + with: + platforms: arm64 + + - name: Setup env (for aarch64) + if: matrix.cibw_archs == 'aarch64' + # Ask suitesparse.sh to compile faster by optimizing fewer types. Otherwise, the build takes too long to finish + # in 6 hour limit. + run: | + echo "SUITESPARSE_FAST_BUILD=1" >> $GITHUB_ENV + + - name: Install tools (macOS) + if: contains(matrix.os, 'macos') + # Install coreutils which includes `nproc` used by `make -j` in suitesparse.sh + # + # GitHub actions comes with libomp already installed, but for its native arch only. Must build universal one + # manually so that both x86 and arm builds can be built. + run: | + brew install coreutils + brew install libomp + sh add_arm_to_libomp_dylib.sh - name: Build Wheels env: @@ -83,6 +111,8 @@ jobs: # Build SuiteSparse CIBW_BEFORE_ALL: bash suitesparse.sh ${{ github.ref }} + CIBW_ENVIRONMENT_LINUX: SUITESPARSE_FAST_BUILD=${{ env.SUITESPARSE_FAST_BUILD }} + # CMAKE_GNUtoMS=ON asks suitesparse.sh to build libraries in MSVC style on Windows. CIBW_ENVIRONMENT_WINDOWS: CMAKE_GNUtoMS=ON GRAPHBLAS_PREFIX="C:/GraphBLAS" @@ -92,11 +122,11 @@ jobs: # Uncomment to only build CPython wheels # CIBW_BUILD: "cp*" - # macOS: build x86_64 and arm64 - #CIBW_ARCHS_MACOS: "x86_64 arm64" + # Architectures to build specified in matrix + CIBW_ARCHS: ${{ matrix.cibw_archs }} - # No 32-bit builds - CIBW_SKIP: "*-win32 *_i686 *musl*" + # No 32-bit builds, no musllinux, no PyPy aarch64 (only due to build speed, numpy does not ship aarch64 pypy wheels) + CIBW_SKIP: "*-win32 *_i686 *musl* pp*aarch64" # Use delvewheel on Windows. # This copies graphblas.dll into the wheel. "repair" in cibuildwheel parlance includes copying any shared diff --git a/add_arm_to_libomp_dylib.sh b/add_arm_to_libomp_dylib.sh index 8492c7c..5109f98 100755 --- a/add_arm_to_libomp_dylib.sh +++ b/add_arm_to_libomp_dylib.sh @@ -1,4 +1,23 @@ #!/bin/sh +# Construct a universal2 version of homebrew's libomp. +# +# Homebrew's libomp works well to patch Apple clang's missing OpenMP support. The problem is a combination of: +# - Brew installs libomp built for x86 *or* ARM, matching the architecture of the machine it is running on. +# - GitHub Actions only has x86 runners as of now. Check back in Q4 2023. https://github.com/github/roadmap/issues/528 +# - The linker will select the first found libomp, and if that version does not include the expected architecture then +# linking will fail. +# +# One solution is to build a universal2 version of libomp that includes both architectures. That's what this script +# does. It adds the ARM version of libomp to the x86 version. +# +# This script assumes it is running on x86 with x86 libomp already installed. + +if [ "$(arch)" != "x86_64" ] && [ "$(arch)" != "i386" ]; then + echo "Not running on x86 as expected. Running on:" + arch + echo "If the above says arm64 then this hack is no longer necessary. Remove this script from the build." + exit 1; +fi #mkdir x86lib mkdir armlib @@ -11,8 +30,21 @@ brew fetch --force --bottle-tag=arm64_big_sur libomp #tar -xzf $(brew --cache --bottle-tag=x86_64_monterey libomp) --strip-components 2 -C x86lib tar -xzf $(brew --cache --bottle-tag=arm64_big_sur libomp) --strip-components 2 -C armlib -# merge +# ARM and x86 dylibs have different install names due to different brew install directories. +# The x86 install name will be expected so make the ARM install name match. +X86_INSTALL_NAME="$(otool -X -D $(brew --prefix libomp)/lib/libomp.dylib)" +install_name_tool -id "${X86_INSTALL_NAME}" armlib/lib/libomp.dylib +codesign --force -s - armlib/lib/libomp.dylib + +# merge the downloaded (arm) libomp with the already installed (x86) libomp to create a universal libomp lipo armlib/lib/libomp.dylib $(brew --prefix libomp)/lib/libomp.dylib -output libomp.dylib -create + +# print contents of universal library for reference +otool -arch all -L libomp.dylib + +# replace the x86-only libomp with the newly-created universal one cp -f libomp.dylib $(brew --prefix libomp)/lib + +# clean up rm libomp.dylib rm -rf armlib diff --git a/suitesparse.sh b/suitesparse.sh index 26b29fb..5d930dc 100755 --- a/suitesparse.sh +++ b/suitesparse.sh @@ -25,9 +25,8 @@ if [ -n "${BREW_LIBOMP}" ]; then cmake_params+=(-DOpenMP_libomp_LIBRARY="omp") export LDFLAGS="-L$(brew --prefix libomp)/lib" - export CFLAGS="-arch x86_64" -# # build both x86 and ARM -# export CFLAGS="-arch x86_64 -arch arm64" + # build both x86 and ARM + export CFLAGS="-arch x86_64 -arch arm64" fi if [ -n "${CMAKE_GNUtoMS}" ]; then @@ -63,6 +62,26 @@ echo "#define GxB_NO_UINT32 1" >> ../Source/GB_control.h # echo "#define GxB_NO_UINT64 1" >> ../Source/GB_control.h # echo "#define GxB_NO_UINT8 1" >> ../Source/GB_control.h +if [ -n "${SUITESPARSE_FAST_BUILD}" ]; then + echo "suitesparse.sh: Fast build requested." + # Disable optimizing even more types. This is for builds that don't finish in runner resource limits, + # such as emulated aarm64. + + echo "#define GxB_NO_BOOL 1" >> ../Source/GB_control.h +# echo "#define GxB_NO_FP32 1" >> ../Source/GB_control.h +# echo "#define GxB_NO_FP64 1" >> ../Source/GB_control.h + echo "#define GxB_NO_FC32 1" >> ../Source/GB_control.h + echo "#define GxB_NO_FC64 1" >> ../Source/GB_control.h + echo "#define GxB_NO_INT16 1" >> ../Source/GB_control.h + echo "#define GxB_NO_INT32 1" >> ../Source/GB_control.h +# echo "#define GxB_NO_INT64 1" >> ../Source/GB_control.h +# echo "#define GxB_NO_INT8 1" >> ../Source/GB_control.h + echo "#define GxB_NO_UINT16 1" >> ../Source/GB_control.h + echo "#define GxB_NO_UINT32 1" >> ../Source/GB_control.h + echo "#define GxB_NO_UINT64 1" >> ../Source/GB_control.h + echo "#define GxB_NO_UINT8 1" >> ../Source/GB_control.h +fi + # Disable all Source/Generated2 kernels. For workflow development only. #cmake_params+=(-DCMAKE_CUDA_DEV=1)