Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 37 additions & 22 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,25 +42,40 @@ jobs:

wheel:
name: Build Wheel
runs-on: ${{ matrix.os }}
needs: release

runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
pytorch-version: ['2.2.2', '2.3.1', '2.4.0', '2.5.1', '2.6.0']
cuda-version: ['12.4.0']
exclude:
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# Pytorch < 2.5 does not support Python 3.13
- pytorch-version: '2.2.2'
python-version: '3.13'
- pytorch-version: '2.3.1'
python-version: '3.13'
- pytorch-version: '2.4.0'
python-version: '3.13'
# Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
# manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
os: [ ubuntu-22.04, ubuntu-22.04-arm ]
python-version: [ '3.9', '3.10', '3.11', '3.12', '3.13' ]
pytorch-version: [ '2.4.0', '2.5.1', '2.6.0', '2.7.0' ]
cuda-version: [ '12.4.1', '12.8.1' ]
# We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
# Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
# Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
# when building without C++11 ABI and using it on nvcr images.
cxx11_abi: [ 'FALSE', 'TRUE' ]
exclude:
# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
# Pytorch < 2.5 does not support Python 3.13
# PyTorch < 2.5 doesn’t support Python 3.13
- pytorch-version: '2.4.0'
python-version: '3.13'

# PyTorch 2.7.0 must only use CUDA 12.8.1
- pytorch-version: '2.7.0'
cuda-version: '12.4.1'

# All other PyTorch (< 2.7.0) must only use CUDA 12.4.1
- pytorch-version: '2.4.0'
cuda-version: '12.8.1'
- pytorch-version: '2.5.1'
cuda-version: '12.8.1'
- pytorch-version: '2.6.0'
cuda-version: '12.8.1'

steps:
- name: Checkout
Expand Down Expand Up @@ -90,7 +105,7 @@ jobs:

- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.19
uses: Jimver/cuda-toolkit@v0.2.23
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
Expand Down Expand Up @@ -142,10 +157,10 @@ jobs:
strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.10']
pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['12.2.2']
os: ['ubuntu-latest']
python-version: ['3.12']
pytorch-version: ['2.7.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: [ '12.4.1' ]

steps:
- name: Checkout
Expand All @@ -163,7 +178,7 @@ jobs:
bash -x .github/workflows/scripts/env.sh

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

Expand All @@ -175,7 +190,7 @@ jobs:

- name: Install CUDA ${{ matrix.cuda-version }}
if: ${{ matrix.cuda-version != 'cpu' }}
uses: Jimver/cuda-toolkit@v0.2.14
uses: Jimver/cuda-toolkit@v0.2.23
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda-version }}
Expand Down
15 changes: 12 additions & 3 deletions .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,17 @@
python_executable=python$1
cuda_home=/usr/local/cuda-$2

# Check if the CUDA version is < 12.8.1
if [ "$2" = "12.8.1" ]; then
echo "CUDA version is 12.8.1, using the latest compatible version of flash-attn."
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
else
echo "CUDA version is $2, using the latest compatible version of flash-attn."
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
fi

# Update paths
PATH=${cuda_home}/bin:$PATH
LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
Expand All @@ -13,8 +24,6 @@ $python_executable -m pip install flash_attn triton

# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
# Build
if [ "$3" = sdist ];
then
Expand All @@ -24,4 +33,4 @@ MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}
wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2")
ls dist/*whl |xargs -I {} mv {} dist/${wheel_name}
fi
fi
31 changes: 30 additions & 1 deletion .github/workflows/scripts/cuda-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,37 @@ cuda_version=$(echo $1 | tr "." "-")
# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004
OS=$(echo $2 | tr -d ".\-")

ARCH=$(uname -m)
ARCH_TYPE=$ARCH

# Detectar si es Tegra
if [[ "$ARCH" == "aarch64" ]]; then
if uname -a | grep -qi tegra; then
ARCH_TYPE="tegra-aarch64"
fi
fi

echo "Detected architecture: ${ARCH_TYPE}"

# Installs CUDA
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb
if [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then
# Jetson (Tegra)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600

elif [[ "$ARCH_TYPE" == "tegra-aarch64" ]]; then
# Jetson (Tegra)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/arm64/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600
else
# ARM64 SBSA (Grace)
wget -nv \
https://developer.download.nvidia.com/compute/cuda/repos/${OS}/sbsa/cuda-${DISTRO}.pin \
-O /etc/apt/preferences.d/cuda-repository-pin-600
fi

sudo dpkg -i cuda-keyring_1.1-1_all.deb
rm cuda-keyring_1.1-1_all.deb
sudo apt -qq update
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/scripts/pytorch-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ pip install typing-extensions==4.12.2
echo $MATRIX_CUDA_VERSION
echo $MATRIX_TORCH_VERSION
export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \
minv = {'2.2': 118, '2.3': 118, '2.4': 118, '2.5': 118, '2.6': 118}[env['MATRIX_TORCH_VERSION']]; \
maxv = {'2.2': 121, '2.3': 121, '2.4': 124, '2.5': 124, '2.6': 124}[env['MATRIX_TORCH_VERSION']]; \
minv = {'2.4': 118, '2.5': 118, '2.6': 118, '2.7': 118}[env['MATRIX_TORCH_VERSION']]; \
maxv = {'2.4': 124, '2.5': 124, '2.6': 126, '2.7': 128}[env['MATRIX_TORCH_VERSION']]; \
print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \
)
if [[ ${pytorch_version} == *"dev"* ]]; then
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-2019]
python-version: ["3.9", "3.10", "3.11"]
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest, windows-11-arm]
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
exclude:
- os: macos-latest
python-version: '3.9'
Expand Down
33 changes: 29 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,15 @@ def get_minference_version() -> str:
return str(version)


def get_platform():
def get_arch():
"""
Returns the platform name as used in wheel filenames.
Returns the system aarch for the current system.
"""
if sys.platform.startswith("linux"):
return f"linux_{platform.uname().machine}"
if platform.machine() == "x86_64":
return "x86_64"
if platform.machine() == "arm64" or platform.machine() == "aarch64":
return "aarch64"
elif sys.platform == "darwin":
mac_version = ".".join(platform.mac_ver()[0].split(".")[:2])
return f"macosx_{mac_version}_x86_64"
Expand All @@ -134,6 +137,28 @@ def get_platform():
raise ValueError("Unsupported platform: {}".format(sys.platform))


def get_system() -> str:
"""
Returns the system name as used in wheel filenames.
"""
if platform.system() == "Windows":
return "win"
elif platform.system() == "Darwin":
mac_version = ".".join(platform.mac_ver()[0].split(".")[:1])
return f"macos_{mac_version}"
elif platform.system() == "Linux":
return "linux"
else:
raise ValueError("Unsupported system: {}".format(platform.system()))


def get_platform() -> str:
"""
Returns the platform name as used in wheel filenames.
"""
return f"{get_system()}_{get_arch()}"


def get_wheel_url():
# Determine the version numbers that will be used to determine the correct wheel
# We're using the CUDA version used to build torch, not the one currently installed
Expand Down Expand Up @@ -265,4 +290,4 @@ def __init__(self, *args, **kwargs) -> None:
else {
"bdist_wheel": CachedWheelsCommand,
},
)
)