-
Notifications
You must be signed in to change notification settings - Fork 817
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for inter-node communication using sockets and InfiniBand/RoCE. Improve latency. Add support for aggregation. Improve LL/regular tuning. Remove tests as those are now at github.com/nvidia/nccl-tests .
- Loading branch information
Showing
132 changed files
with
12,422 additions
and
9,413 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
# Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved. | ||
/build | ||
*.gcov | ||
/coverage/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,236 +1,30 @@ | ||
# | ||
# Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved. | ||
# Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved. | ||
# | ||
# See LICENCE.txt for license information | ||
# See LICENSE.txt for license information | ||
# | ||
|
||
CUDA_HOME ?= /usr/local/cuda | ||
PREFIX ?= /usr/local | ||
VERBOSE ?= 0 | ||
KEEP ?= 0 | ||
DEBUG ?= 0 | ||
PROFAPI ?= 0 | ||
BUILDDIR ?= build | ||
BUILDDIR := $(abspath $(BUILDDIR)) | ||
|
||
CUDA_LIB ?= $(CUDA_HOME)/lib64 | ||
CUDA_INC ?= $(CUDA_HOME)/include | ||
NVCC ?= $(CUDA_HOME)/bin/nvcc | ||
|
||
NVCC_GENCODE ?= -gencode=arch=compute_35,code=sm_35 \ | ||
-gencode=arch=compute_50,code=sm_50 \ | ||
-gencode=arch=compute_52,code=sm_52 \ | ||
-gencode=arch=compute_60,code=sm_60\ | ||
-gencode=arch=compute_61,code=sm_61 \ | ||
-gencode=arch=compute_60,code=compute_60 | ||
|
||
CXXFLAGS := -I$(CUDA_INC) -fPIC -fvisibility=hidden | ||
NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -lineinfo -std=c++11 -maxrregcount 96 | ||
# Use addprefix so that we can specify more than one path | ||
LDFLAGS := $(addprefix -L,${CUDA_LIB}) -lcudart -lrt | ||
|
||
ifeq ($(DEBUG), 0) | ||
NVCUFLAGS += -O3 | ||
CXXFLAGS += -O3 | ||
else | ||
NVCUFLAGS += -O0 -G | ||
CXXFLAGS += -O0 -g -ggdb3 | ||
endif | ||
|
||
ifneq ($(VERBOSE), 0) | ||
NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra | ||
CXXFLAGS += -Wall -Wextra | ||
else | ||
.SILENT: | ||
endif | ||
|
||
ifneq ($(KEEP), 0) | ||
NVCUFLAGS += -keep | ||
endif | ||
|
||
ifneq ($(PROFAPI), 0) | ||
CXXFLAGS += -DPROFAPI | ||
endif | ||
|
||
NCCL_MAJOR := 1 | ||
NCCL_MINOR := 3 | ||
NCCL_PATCH := 5 | ||
CXXFLAGS += -DNCCL_MAJOR=$(NCCL_MAJOR) -DNCCL_MINOR=$(NCCL_MINOR) -DNCCL_PATCH=$(NCCL_PATCH) | ||
|
||
CUDA_VERSION ?= $(shell ls $(CUDA_LIB)/libcudart.so.* | head -1 | rev | cut -d "." -f -2 | rev) | ||
CUDA_MAJOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 1) | ||
CUDA_MINOR = $(shell echo $(CUDA_VERSION) | cut -d "." -f 2) | ||
CXXFLAGS += -DCUDA_MAJOR=$(CUDA_MAJOR) -DCUDA_MINOR=$(CUDA_MINOR) | ||
|
||
.PHONY : all lib staticlib clean test mpitest install deb debian debclean forlib fortest forclean | ||
.DEFAULT : all | ||
|
||
INCEXPORTS := nccl.h | ||
LIBSRCFILES := libwrap.cu core.cu all_gather.cu all_reduce.cu broadcast.cu reduce.cu reduce_scatter.cu | ||
LIBNAME := libnccl.so | ||
STATICLIBNAME := libnccl_static.a | ||
|
||
INCDIR := $(BUILDDIR)/include | ||
LIBDIR := $(BUILDDIR)/lib | ||
OBJDIR := $(BUILDDIR)/obj | ||
|
||
INCTARGETS := $(patsubst %, $(INCDIR)/%, $(INCEXPORTS)) | ||
LIBSONAME := $(patsubst %,%.$(NCCL_MAJOR),$(LIBNAME)) | ||
LIBTARGET := $(patsubst %,%.$(NCCL_MAJOR).$(NCCL_MINOR).$(NCCL_PATCH),$(LIBNAME)) | ||
STATICLIBTARGET := $(STATICLIBNAME) | ||
LIBLINK := $(patsubst lib%.so, -l%, $(LIBNAME)) | ||
LIBOBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(filter %.cu, $(LIBSRCFILES))) | ||
DEPFILES := $(patsubst %.o, %.d, $(LIBOBJ)) $(patsubst %, %.d, $(TESTBINS)) $(patsubst %, %.d, $(MPITESTBINS)) | ||
|
||
all : lib staticlib | ||
|
||
lib : $(INCTARGETS) $(LIBDIR)/$(LIBTARGET) | ||
|
||
staticlib : $(INCTARGETS) $(LIBDIR)/$(STATICLIBTARGET) | ||
|
||
-include $(DEPFILES) | ||
|
||
$(LIBDIR)/$(LIBTARGET) : $(LIBOBJ) | ||
@printf "Linking %-35s > %s\n" $(LIBTARGET) $@ | ||
mkdir -p $(LIBDIR) | ||
$(CXX) $(CXXFLAGS) -shared -Wl,--no-as-needed -Wl,-soname,$(LIBSONAME) -o $@ $(LDFLAGS) $(LIBOBJ) | ||
ln -sf $(LIBSONAME) $(LIBDIR)/$(LIBNAME) | ||
ln -sf $(LIBTARGET) $(LIBDIR)/$(LIBSONAME) | ||
|
||
$(LIBDIR)/$(STATICLIBTARGET) : $(LIBOBJ) | ||
@printf "Archiving %-35s > %s\n" $(STATICLIBTARGET) $@ | ||
mkdir -p $(LIBDIR) | ||
ar cr $@ $(LIBOBJ) | ||
|
||
$(INCDIR)/%.h : src/%.h | ||
@printf "Grabbing %-35s > %s\n" $< $@ | ||
mkdir -p $(INCDIR) | ||
cp -f $< $@ | ||
|
||
$(OBJDIR)/%.o : src/%.cu | ||
@printf "Compiling %-35s > %s\n" $< $@ | ||
mkdir -p $(OBJDIR) | ||
$(NVCC) -c $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< -o $@ | ||
@$(NVCC) -M $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< > $(@:%.o=%.d.tmp) | ||
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%.o=%.d.tmp) > $(@:%.o=%.d) | ||
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%.o=%.d.tmp) | fmt -1 | \ | ||
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%.o=%.d) | ||
@rm -f $(@:%.o=%.d.tmp) | ||
|
||
clean : | ||
rm -rf $(BUILDDIR) | ||
|
||
install : lib | ||
mkdir -p $(PREFIX)/lib | ||
mkdir -p $(PREFIX)/include | ||
cp -P -v $(BUILDDIR)/lib/* $(PREFIX)/lib/ | ||
cp -v $(BUILDDIR)/include/* $(PREFIX)/include/ | ||
|
||
|
||
#### TESTS #### | ||
|
||
TEST_ONLY ?= 0 | ||
|
||
# Tests depend on lib, except in TEST_ONLY mode. | ||
ifeq ($(TEST_ONLY), 0) | ||
TSTDEP = $(INCTARGETS) $(LIBDIR)/$(LIBTARGET) | ||
endif | ||
|
||
NCCL_LIB ?= $(LIBDIR) | ||
NCCL_INC ?= $(INCDIR) | ||
|
||
MPI_HOME ?= /usr | ||
MPI_INC ?= $(MPI_HOME)/include | ||
MPI_LIB ?= $(MPI_HOME)/lib | ||
MPIFLAGS := -I$(MPI_INC) -L$(MPI_LIB) -lmpi | ||
|
||
TESTS := all_gather_test all_gather_scan \ | ||
all_reduce_test all_reduce_scan \ | ||
broadcast_test broadcast_scan \ | ||
reduce_test reduce_scan \ | ||
reduce_scatter_test reduce_scatter_scan | ||
MPITESTS := mpi_test | ||
|
||
TSTINC := -I$(NCCL_INC) -Itest/include | ||
TSTLIB := -L$(NCCL_LIB) $(LIBLINK) $(LDFLAGS) | ||
TSTDIR := $(BUILDDIR)/test/single | ||
MPITSTDIR := $(BUILDDIR)/test/mpi | ||
TESTBINS := $(patsubst %, $(TSTDIR)/%, $(TESTS)) | ||
MPITESTBINS:= $(patsubst %, $(MPITSTDIR)/%, $(MPITESTS)) | ||
|
||
test : $(TESTBINS) | ||
|
||
$(TSTDIR)/% : test/single/%.cu test/include/*.h $(TSTDEP) | ||
@printf "Building %-35s > %s\n" $< $@ | ||
mkdir -p $(TSTDIR) | ||
$(NVCC) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt | ||
@$(NVCC) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcuda -lcurand -lnvToolsExt > $(@:%=%.d.tmp) | ||
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d) | ||
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \ | ||
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d) | ||
@rm -f $(@:%=%.d.tmp) | ||
|
||
mpitest : $(MPITESTBINS) | ||
|
||
$(MPITSTDIR)/% : test/mpi/%.cu $(TSTDEP) | ||
@printf "Building %-35s > %s\n" $< $@ | ||
mkdir -p $(MPITSTDIR) | ||
$(NVCC) $(MPIFLAGS) $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" -o $@ $< $(TSTLIB) -lcurand | ||
@$(NVCC) $(MPIFLAGS) -M $(TSTINC) $(NVCUFLAGS) --compiler-options "$(CXXFLAGS)" $< $(TSTLIB) -lcurand > $(@:%=%.d.tmp) | ||
@sed "0,/^.*:/s//$(subst /,\/,$@):/" $(@:%=%.d.tmp) > $(@:%=%.d) | ||
@sed -e 's/.*://' -e 's/\\$$//' < $(@:%=%.d.tmp) | fmt -1 | \ | ||
sed -e 's/^ *//' -e 's/$$/:/' >> $(@:%=%.d) | ||
@rm -f $(@:%=%.d.tmp) | ||
|
||
#### PACKAGING #### | ||
|
||
DEBIANDIR := $(BUILDDIR)/debian | ||
|
||
DEBGEN_IN := $(shell (cd debian ; ls *.in)) | ||
DEBGEN := $(DEBGEN_IN:.in=) | ||
DEBFILES := compat copyright libnccl-dev.install libnccl-dev.manpages nccl.7 rules $(DEBGEN) | ||
DEBTARGETS := $(patsubst %, $(DEBIANDIR)/%, $(DEBFILES)) | ||
|
||
DEB_REVISION ?= 1 | ||
DEB_TIMESTAMP := $(shell date -R) | ||
DEB_ARCH ?= amd64 | ||
|
||
debian : $(DEBTARGETS) | ||
|
||
deb : lib debian | ||
@printf "Building Debian package\n" | ||
(cd $(BUILDDIR); debuild -eLD_LIBRARY_PATH -uc -us -d -b) | ||
mkdir -p $(BUILDDIR)/deb/ | ||
mv $(BUILDDIR)/../libnccl*.deb $(BUILDDIR)/deb/ | ||
|
||
debclean : | ||
rm -Rf $(DEBIANDIR) | ||
|
||
$(DEBIANDIR)/% : debian/%.in | ||
@printf "Generating %-35s > %s\n" $< $@ | ||
sed -e "s/\$${nccl:Major}/$(NCCL_MAJOR)/g" \ | ||
-e "s/\$${nccl:Minor}/$(NCCL_MINOR)/g" \ | ||
-e "s/\$${nccl:Patch}/$(NCCL_PATCH)/g" \ | ||
-e "s/\$${cuda:Major}/$(CUDA_MAJOR)/g" \ | ||
-e "s/\$${cuda:Minor}/$(CUDA_MINOR)/g" \ | ||
-e "s/\$${deb:Revision}/$(DEB_REVISION)/g" \ | ||
-e "s/\$${deb:Timestamp}/$(DEB_TIMESTAMP)/g" \ | ||
-e "s/\$${deb:Arch}/$(DEB_ARCH)/g" \ | ||
$< > $@ | ||
|
||
$(DEBIANDIR)/% : debian/% | ||
@printf "Grabbing %-35s > %s\n" $< $@ | ||
mkdir -p $(DEBIANDIR) | ||
cp -f $< $@ | ||
|
||
#### FORTRAN BINDINGS #### | ||
|
||
export NCCL_MAJOR NCCL_MINOR NCCL_PATCH CUDA_MAJOR CUDA_MINOR LIBLINK CUDA_LIB BUILDDIR | ||
|
||
forlib : lib | ||
$(MAKE) -C fortran lib | ||
fortest : forlib | ||
$(MAKE) -C fortran test | ||
forclean : | ||
$(MAKE) -C fortran clean | ||
|
||
.PHONY : all clean | ||
|
||
default : src.build | ||
BUILDDIR ?= $(abspath ./build) | ||
ABSBUILDDIR := $(abspath $(BUILDDIR)) | ||
TARGETS := src pkg | ||
clean: ${TARGETS:%=%.clean} | ||
test.build: src.build | ||
LICENSE_FILES := LICENSE.txt | ||
LICENSE_TARGETS := $(LICENSE_FILES:%=$(BUILDDIR)/%) | ||
lic: $(LICENSE_TARGETS) | ||
|
||
${BUILDDIR}/%.txt: %.txt | ||
@printf "Copying %-35s > %s\n" $< $@ | ||
mkdir -p ${BUILDDIR} | ||
cp $< $@ | ||
|
||
src.%: | ||
${MAKE} -C src $* BUILDDIR=${ABSBUILDDIR} | ||
|
||
pkg.%: | ||
${MAKE} -C pkg $* BUILDDIR=${ABSBUILDDIR} | ||
|
||
pkg.debian.prep: lic | ||
pkg.txz.prep: lic |
Oops, something went wrong.