Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding horovod #24472

Merged
merged 25 commits into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions recipes/horovod/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

set -ex

# See https://github.com/horovod/horovod/issues/3956
flatc -c -o horovod/common/wire horovod/common/wire/message.fbs
flatc -c -I . --include-prefix ../common/wire -o horovod/tensorflow horovod/tensorflow/custom_call_config.fbs

if [[ ${cuda_compiler_version} != "None" ]]; then
export HOROVOD_GPU_OPERATIONS=NCCL
export HOROVOD_NCCL_LINK=SHARED
export HOROVOD_CUDA_HOME=/usr/local/cuda
fi
export HOROVOD_WITH_TENSORFLOW=1
export HOROVOD_WITH_PYTORCH=1
# mxnet is not available on conda-forge
# https://github.com/conda-forge/staged-recipes/issues/4447
export HOROVOD_WITHOUT_MXNET=1
export HOROVOD_WITH_MPI=1
# gloo is not avaiable on conda-forge
export HOROVOD_WITHOUT_GLOO=1
if [[ "${target_platform}" == osx-* ]]; then
# https://conda-forge.org/docs/maintainer/knowledge_base.html#newer-c-features-with-old-sdk
export CXXFLAGS="${CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY"
fi

# default is -j8
export MAKEFLAGS="-j${CPU_COUNT}"
python -m pip install . -vv
3 changes: 3 additions & 0 deletions recipes/horovod/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
mpi:
- mpich
- openmpi
105 changes: 105 additions & 0 deletions recipes/horovod/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
{% set name = "horovod" %}
{% set version = "0.28.1" %}
{% set proc_type = "cuda" if cuda_compiler_version != "None" else "cpu" %}

package:
name: {{ name|lower }}
version: {{ version }}

source:
url: https://github.com/horovod/horovod/archive/refs/tags/v{{ version }}.tar.gz
sha256: 88fbcd2815083607a5d27962f323eae5752e3eb96511aaac98258f280cac0f8e

build:
number: 0
skip: true # [win]
string: {{ proc_type }}_mpi_{{ mpi }}_h{{ PKG_HASH }}_{{ PKG_BUILDNUM }}
ignore_run_exports_from:
- flatbuffers
- tensorflow
- pytorch
# Horovod supports multiple frameworks, and the package is built against TensorFlow and PyTorch.
# However, usually, the user only uses one of them simultaneously. (by importing either horovod.tensorflow or horovod.torch)
# Thus, TensorFlow and PyTorch are not listed in the `run` section, but in the `run_constrained` section.
# Users can manually install Horovod and only one of them.
# Both TensorFlow and PyTorch libraries are listed in `missing_dso_whitelist`, and a test is added to ensure Horovod libraries can be
# correctly loaded when TensorFlow and PyTorch are installed.
# (horovod.tensorflow loads Horovod TensorFlow library and horovod.torch loads Horovod PyTorch library)
missing_dso_whitelist:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment here why they are listed.

Copy link
Member Author

@njzjz njzjz Nov 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Thanks for the review.

- "*/libtensorflow_framework.so.*" # [linux]
- "*/libtensorflow_cc.so.*" # [linux]
- "*/torch/lib/*.so" # [linux]
- "*/libtensorflow_framework.*.dylib" # [osx]
- "*/libtensorflow_cc.*.dylib" # [osx]
- "*/_pywrap_tensorflow_internal.so"
- "*/torch/lib/*.dylib" # [osx]

requirements:
build:
- {{ compiler('cxx') }}
- {{ compiler('cuda') }} # [cuda_compiler_version != 'None']
- cmake >=3.13
- make # [unix]
- python # [build_platform != target_platform]
- cross-python_{{ target_platform }} # [build_platform != target_platform]
- flatbuffers
host:
- python
- pip
- wheel
- setuptools
- pyyaml
- cloudpickle
- packaging
- psutil
- nccl # [cuda_compiler_version != 'None']
- {{ mpi }}
- mpi4py
- tensorflow
- tensorflow * {{ proc_type }}*
- pytorch
- pytorch * {{ proc_type }}*
# for flatc
- flatbuffers
- libboost-headers
- lbfgspp

run:
- python
- pyyaml
- cloudpickle
- packaging
- psutil
- nccl # [cuda_compiler_version != 'None']
- {{ mpi }}
- mpi4py

run_constrained:
- {{ pin_compatible('tensorflow', max_pin='x.x') }} {{ proc_type }}*
- {{ pin_compatible('pytorch', max_pin='x.x') }} {{ proc_type }}*

test:
requires:
- pip
- tensorflow
- pytorch
imports:
- horovod
- horovod.tensorflow
- horovod.torch
commands:
- pip check
- horovodrun -h

about:
home: https://horovod.ai/
license: Apache-2.0
license_family: Apache
license_file: LICENSE
summary: 'Distributed training framework for TensorFlow, Keras, PyTorch, and Apache MXNet.'
doc_url: https://horovod.readthedocs.io/
dev_url: https://github.com/horovod/horovod

extra:
recipe-maintainers:
- njzjz