diff --git a/contrib/pgo-lto/.gitignore b/contrib/pgo-lto/.gitignore new file mode 100644 index 0000000000000..978d8f2ca86dd --- /dev/null +++ b/contrib/pgo-lto/.gitignore @@ -0,0 +1,4 @@ +profiles +stage0* +stage1* +stage2* diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile new file mode 100644 index 0000000000000..a73825e182561 --- /dev/null +++ b/contrib/pgo-lto/Makefile @@ -0,0 +1,80 @@ +.PHONY: top clean clean-profiles + +STAGE0_BUILD:=$(CURDIR)/stage0.build +STAGE1_BUILD:=$(CURDIR)/stage1.build +STAGE2_BUILD:=$(CURDIR)/stage2.build + +STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/ + +PROFILE_DIR:=$(CURDIR)/profiles +PROFILE_FILE:=$(PROFILE_DIR)/merged.prof +PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw) +JULIA_ROOT:=$(CURDIR)/../.. + +LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt +LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata +LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy + +# When building a single libLLVM.so we need to increase -vp-counters-per-site +# significantly +COUNTERS_PER_SITE:=6 + +AFTER_STAGE1_MESSAGE:='Run `make clean-profiles` to start with a clean slate. $\ + Then run Julia to collect realistic profile data, for example: `$(STAGE1_BUILD)/julia -O3 -e $\ + '\''using Pkg; Pkg.add("LoopVectorization"); Pkg.test("LoopVectorization")'\''`. This $\ + should produce about 15MB of data in $(PROFILE_DIR). Note that running extensive $\ + scripts may result in counter overflows, which can be detected by running $\ + `make top`. Afterwards run `make stage2`.' + +TOOLCHAIN_FLAGS = $\ + "CC=$(STAGE0_TOOLS)clang" $\ + "CXX=$(STAGE0_TOOLS)clang++" $\ + "LD=$(STAGE0_TOOLS)ld.lld" $\ + "AR=$(STAGE0_TOOLS)llvm-ar" $\ + "RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\ + "CFLAGS+=$(PGO_CFLAGS)" $\ + "CXXFLAGS+=$(PGO_CXXFLAGS)" $\ + "LDFLAGS+=$(PGO_LDFLAGS)" + +$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD): + $(MAKE) -C $(JULIA_ROOT) O=$@ configure + +stage0: export USE_BINARYBUILDER_LLVM=1 +stage0: | $(STAGE0_BUILD) + # Turn [cd]tors into init/fini_array sections in libclang_rt, since lld + # doesn't do that, and otherwise the profile constructor is not executed + $(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools && \ + find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \ + touch $@ + +$(STAGE1_BUILD): stage0 +stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) +stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE) +stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR) +stage1: export USE_BINARYBUILDER_LLVM=0 +stage1: | $(STAGE1_BUILD) + $(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ + @echo $(AFTER_STAGE1_MESSAGE) + +stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE) +stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE) +stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe +stage2: export USE_BINARYBUILDER_LLVM=0 +stage2: $(PROFILE_FILE) | $(STAGE2_BUILD) + $(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@ + +install: stage2 + $(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install + +$(PROFILE_FILE): stage1 $(PROFRAW_FILES) + $(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES) + +# show top 50 functions +top: $(PROFILE_FILE) + $(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT) + +clean-profiles: + rm -rf $(PROFILE_DIR) + +clean: + rm -f stage0 stage1 stage2 $(PROFILE_FILE)