diff --git a/.gitignore b/.gitignore index d9c4a7972f076d5..ef7642b09bc5d2a 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,10 @@ *.gc?? *.profclang? *.profraw +# Copies of binaries before BOLT optimizations. +*.prebolt +# BOLT profile data. +*.fdata *.dyn .gdb_history .purify @@ -124,6 +128,7 @@ Tools/unicode/data/ /platform /profile-clean-stamp /profile-run-stamp +/profile-bolt-stamp /Python/deepfreeze/*.c /pybuilddir.txt /pyconfig.h diff --git a/Makefile.pre.in b/Makefile.pre.in index da3a8f6c13f90b3..eb79c9c4ca18016 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -672,21 +672,55 @@ profile-opt: profile-run-stamp -rm -f profile-clean-stamp $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" -.PHONY: bolt-opt -bolt-opt: @PREBOLT_RULE@ +# List of binaries that BOLT runs on. +BOLT_BINARIES := @BOLT_BINARIES@ + +BOLT_INSTRUMENT_FLAGS := @BOLT_INSTRUMENT_FLAGS@ +BOLT_APPLY_FLAGS := @BOLT_APPLY_FLAGS@ + +.PHONY: clean-bolt +clean-bolt: + # Profile data. rm -f *.fdata - @if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\ - echo "skip: $(BUILDPYTHON) is already BOLTed."; \ - else \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \ - ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \ - @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \ - rm -f *.fdata; \ - rm -f $(BUILDPYTHON).bolt_inst; \ - mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \ - fi + # Pristine binaries before BOLT optimization. + rm -f *.prebolt + # BOLT instrumented binaries. + rm -f *.bolt_inst + +profile-bolt-stamp: $(BUILDPYTHON) + # Ensure a pristine, pre-BOLT copy of the binary and no profile data from last run. + for bin in $(BOLT_BINARIES); do \ + prebolt="$${bin}.prebolt"; \ + if [ -e "$${prebolt}" ]; then \ + echo "Restoring pre-BOLT binary $${prebolt}"; \ + mv "$${bin}.prebolt" "$${bin}"; \ + fi; \ + cp "$${bin}" "$${prebolt}"; \ + rm -f $${bin}.bolt.*.fdata $${bin}.fdata; \ + done + # Instrument each binary. + for bin in $(BOLT_BINARIES); do \ + @LLVM_BOLT@ "$${bin}" -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $${bin}.bolt) -o $${bin}.bolt_inst $(BOLT_INSTRUMENT_FLAGS); \ + mv "$${bin}.bolt_inst" "$${bin}"; \ + done + # Run instrumented binaries to collect data. + $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true + # Merge all the data files together. + for bin in $(BOLT_BINARIES); do \ + @MERGE_FDATA@ $${bin}.*.fdata > "$${bin}.fdata"; \ + rm -f $${bin}.*.fdata; \ + done + # Run bolt against the merged data to produce an optimized binary. + for bin in $(BOLT_BINARIES); do \ + @LLVM_BOLT@ "$${bin}.prebolt" -o "$${bin}.bolt" -data="$${bin}.fdata" $(BOLT_APPLY_FLAGS); \ + mv "$${bin}.bolt" "$${bin}"; \ + done + touch $@ +.PHONY: bolt-opt +bolt-opt: + $(MAKE) @PREBOLT_RULE@ + $(MAKE) profile-bolt-stamp # Compile and run with gcov .PHONY: coverage @@ -2623,10 +2657,11 @@ profile-removal: rm -f $(COVERAGE_INFO) rm -rf $(COVERAGE_REPORT) rm -f profile-run-stamp + rm -f profile-bolt-stamp .PHONY: clean -clean: clean-retain-profile - @if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \ +clean: clean-retain-profile clean-bolt + @if test @DEF_MAKE_ALL_RULE@ = profile-opt -o @DEF_MAKE_ALL_RULE@ = bolt-opt; then \ rm -f profile-gen-stamp profile-clean-stamp; \ $(MAKE) profile-removal; \ fi diff --git a/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst b/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst new file mode 100644 index 000000000000000..cc70d47c6c16dd9 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2023-05-20-16-09-59.gh-issue-101282.FvRARb.rst @@ -0,0 +1,4 @@ +BOLT optimization is now applied to the libpython shared library if building +a shared library. BOLT instrumentation and application settings can now be +influenced via the ``BOLT_INSTRUMENT_FLAGS`` and ``BOLT_APPLY_FLAGS`` +configure variables. diff --git a/configure b/configure index 7aad4fe89e3cbf2..790a92856da8629 100755 --- a/configure +++ b/configure @@ -883,10 +883,11 @@ CFLAGS_NODIST BASECFLAGS CFLAGS_ALIASING OPT +BOLT_APPLY_FLAGS +BOLT_INSTRUMENT_FLAGS +BOLT_BINARIES MERGE_FDATA LLVM_BOLT -ac_ct_READELF -READELF PREBOLT_RULE LLVM_PROF_FOUND LLVM_PROFDATA @@ -1105,6 +1106,8 @@ CPPFLAGS CPP HOSTRUNNER PROFILE_TASK +BOLT_INSTRUMENT_FLAGS +BOLT_APPLY_FLAGS LIBUUID_CFLAGS LIBUUID_LIBS LIBFFI_CFLAGS @@ -1916,6 +1919,10 @@ Some influential environment variables: HOSTRUNNER Program to run CPython for the host platform PROFILE_TASK Python args for PGO generation task + BOLT_INSTRUMENT_FLAGS + Arguments to llvm-bolt when instrumenting binaries + BOLT_APPLY_FLAGS + Arguments to llvm-bolt when creating a BOLT optimized binary LIBUUID_CFLAGS C compiler flags for LIBUUID, overriding pkg-config LIBUUID_LIBS @@ -8106,112 +8113,6 @@ if test "$Py_BOLT" = 'true' ; then DEF_MAKE_ALL_RULE="bolt-opt" DEF_MAKE_RULE="build_all" - - if test -n "$ac_tool_prefix"; then - for ac_prog in readelf - do - # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. -set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_READELF+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$READELF"; then - ac_cv_prog_READELF="$READELF" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_READELF="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -READELF=$ac_cv_prog_READELF -if test -n "$READELF"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $READELF" >&5 -$as_echo "$READELF" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$READELF" && break - done -fi -if test -z "$READELF"; then - ac_ct_READELF=$READELF - for ac_prog in readelf -do - # Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_READELF+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_READELF"; then - ac_cv_prog_ac_ct_READELF="$ac_ct_READELF" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_READELF="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -ac_ct_READELF=$ac_cv_prog_ac_ct_READELF -if test -n "$ac_ct_READELF"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_READELF" >&5 -$as_echo "$ac_ct_READELF" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - - - test -n "$ac_ct_READELF" && break -done - - if test "x$ac_ct_READELF" = x; then - READELF=""notfound"" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - READELF=$ac_ct_READELF - fi -fi - - if test "$READELF" == "notfound" - then - as_fn_error $? "readelf is required for a --enable-bolt build but could not be found." "$LINENO" 5 - fi - # -fno-reorder-blocks-and-partition is required for bolt to work. # Possibly GCC only. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-reorder-blocks-and-partition" >&5 @@ -8474,6 +8375,34 @@ $as_echo "\"Found merge-fdata\"" >&6; } fi fi +# Enable BOLT of libpython if built. + +BOLT_BINARIES='$(BUILDPYTHON)' +if test "${enable_shared}" = "yes" +then + BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)" +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_INSTRUMENT_FLAGS" >&5 +$as_echo_n "checking BOLT_INSTRUMENT_FLAGS... " >&6; } +if test -z "${BOLT_INSTRUMENT_FLAGS}" +then + BOLT_INSTRUMENT_FLAGS= +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_INSTRUMENT_FLAGS" >&5 +$as_echo "$BOLT_INSTRUMENT_FLAGS" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_APPLY_FLAGS" >&5 +$as_echo_n "checking BOLT_APPLY_FLAGS... " >&6; } +if test -z "${BOLT_APPLY_FLAGS}" +then + BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_APPLY_FLAGS" >&5 +$as_echo "$BOLT_APPLY_FLAGS" >&6; } + # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be # merged with this chunk of code? diff --git a/configure.ac b/configure.ac index 115998e0753b264..2ef3bbae388d389 100644 --- a/configure.ac +++ b/configure.ac @@ -2028,13 +2028,6 @@ if test "$Py_BOLT" = 'true' ; then DEF_MAKE_ALL_RULE="bolt-opt" DEF_MAKE_RULE="build_all" - AC_SUBST(READELF) - AC_CHECK_TOOLS(READELF, [readelf], "notfound") - if test "$READELF" == "notfound" - then - AC_MSG_ERROR([readelf is required for a --enable-bolt build but could not be found.]) - fi - # -fno-reorder-blocks-and-partition is required for bolt to work. # Possibly GCC only. AX_CHECK_COMPILE_FLAG([-fno-reorder-blocks-and-partition],[ @@ -2067,6 +2060,30 @@ if test "$Py_BOLT" = 'true' ; then fi fi +# Enable BOLT of libpython if built. +AC_SUBST(BOLT_BINARIES) +BOLT_BINARIES='$(BUILDPYTHON)' +if test "${enable_shared}" = "yes" +then + BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)" +fi + +AC_ARG_VAR(BOLT_INSTRUMENT_FLAGS, Arguments to llvm-bolt when instrumenting binaries) +AC_MSG_CHECKING(BOLT_INSTRUMENT_FLAGS) +if test -z "${BOLT_INSTRUMENT_FLAGS}" +then + BOLT_INSTRUMENT_FLAGS= +fi +AC_MSG_RESULT($BOLT_INSTRUMENT_FLAGS) + +AC_ARG_VAR(BOLT_APPLY_FLAGS, Arguments to llvm-bolt when creating a BOLT optimized binary) +AC_MSG_CHECKING(BOLT_APPLY_FLAGS) +if test -z "${BOLT_APPLY_FLAGS}" +then + BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot" +fi +AC_MSG_RESULT($BOLT_APPLY_FLAGS) + # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be # merged with this chunk of code?