diff --git a/.gitignore b/.gitignore index d9c4a7972f076d..547e6746e1d91b 100644 --- a/.gitignore +++ b/.gitignore @@ -122,8 +122,7 @@ Tools/unicode/data/ # hendrikmuhs/ccache-action@v1 /.ccache /platform -/profile-clean-stamp -/profile-run-stamp +/profile-*-stamp /Python/deepfreeze/*.c /pybuilddir.txt /pyconfig.h diff --git a/Makefile.pre.in b/Makefile.pre.in index 7c44b7be5dbe67..3d2386a62e0092 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -601,13 +601,27 @@ LIBHACL_SHA2_HEADERS= \ ######################################################################### # Rules -# Default target -all: @DEF_MAKE_ALL_RULE@ +# Default target. +# Likely either `build-plain` or `build-optimized`. +all: @MAKE_TARGET_ALL@ # First target in Makefile is implicit default. So .PHONY needs to come after # all. .PHONY: all +# Build without any optimizations or instrumented binaries. +.PHONY: build-plain +build-plain: @MAKE_TARGET_BUILD_PLAIN@ + +# Build with optimizations (PGO, BOLT, etc). +.PHONY: build-optimized +build-optimized: @MAKE_TARGET_BUILD_OPTIMIZED@ + +.PHONY: build-optimized-not-enabled +build-optimized-not-enabled: + @echo "build-optimized requires --enable-optimizations in configure; aborting" + @exit 1 + .PHONY: build_all build_all: check-clean-src $(BUILDPYTHON) platform sharedmods \ gdbhooks Programs/_testembed scripts checksharedmods rundsymutil @@ -629,81 +643,154 @@ check-clean-src: exit 1; \ fi -# Profile generation build must start from a clean tree. +# Profile-based optimization. +# +# PGO and BOLT profile-based optimization is supported. For each optimization, +# roughly the following steps are done: +# +# 1. "Instrument" binaries with run-time data collection (e.g. build or modify +# a variant of the binary.) +# 2. "Run" instrumented binaries (via subset of test suite) to collect data. +# 3. "Analyze" / collect / merge data files from previous step. +# 4. "Apply" collected data from above. (e.g. rebuild or modify a binary). +# +# 0, 1, or multiple profile based optimizations can be enabled. +# +# We track the progress of profile-based optimization using various "stamp" +# files. An empty stamp file tracks the stage of optimization we're in. +# Each *-stamp rule that follows is defined in execution / dependency order. + +# Remove files produced by or used for tracking profile-guided optimization. +.PHONY: profile-remove +profile-remove: clean-bolt + find . -name '*.gc??' -exec rm -f {} ';' + find . -name '*.profclang?' -exec rm -f {} ';' + find . -name '*.dyn' -exec rm -f {} ';' + rm -f $(COVERAGE_INFO) + rm -rf $(COVERAGE_REPORT) + # Remove all progress tracking stamps to ensure a clean slate. + rm -f profile-*-stamp + +# Profile-based optimization requires a fresh build environment. profile-clean-stamp: - $(MAKE) clean + $(MAKE) clean profile-remove touch $@ -# Compile with profile generation enabled. -profile-gen-stamp: profile-clean-stamp +# Build with PGO instrumentation enabled. +profile-pgo-instrument-stamp: profile-clean-stamp @if [ $(LLVM_PROF_ERR) = yes ]; then \ echo "Error: Cannot perform PGO build because llvm-profdata was not found in PATH" ;\ echo "Please add it to PATH and run ./configure again" ;\ exit 1;\ fi @echo "Building with support for profile generation:" - $(MAKE) build_all_generate_profile + $(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" touch $@ -# Run task with profile generation build to create profile information. -profile-run-stamp: +# Run PGO instrumented binaries and collect profile data. +profile-pgo-run-stamp: profile-pgo-instrument-stamp @echo "Running code to generate profile data (this can take a while):" - # First, we need to create a clean build with profile generation - # enabled. - $(MAKE) profile-gen-stamp - # Next, run the profile task to generate the profile information. - $(MAKE) run_profile_task - $(MAKE) build_all_merge_profile - # Remove profile generation binary since we are done with it. - $(MAKE) clean-retain-profile - # This is an expensive target to build and it does not have proper - # makefile dependency information. So, we create a "stamp" file - # to record its completion and avoid re-running it. - touch $@ - -.PHONY: build_all_generate_profile -build_all_generate_profile: - $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST) $(PGO_PROF_GEN_FLAG)" LIBS="$(LIBS)" - -.PHONY: run_profile_task -run_profile_task: @ # FIXME: can't run for a cross build $(LLVM_PROF_FILE) $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true + touch $@ -.PHONY: build_all_merge_profile -build_all_merge_profile: +# Collect data files produced by running PGO instrumented binaries. +profile-pgo-analyze-stamp: profile-pgo-run-stamp $(LLVM_PROF_MERGER) + # Remove profile generation binary since we are done with it. + $(MAKE) clean-retain-profile + touch $@ -# Compile Python binary with profile guided optimization. -# To force re-running of the profile task, remove the profile-run-stamp file. -.PHONY: profile-opt -profile-opt: profile-run-stamp +# Use collected PGO data to influence rebuild of binaries. +profile-pgo-apply-stamp: profile-pgo-analyze-stamp @echo "Rebuilding with profile guided optimizations:" - -rm -f profile-clean-stamp - $(MAKE) @DEF_MAKE_RULE@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" - -.PHONY: bolt-opt -bolt-opt: @PREBOLT_RULE@ - rm -f *.fdata - @if $(READELF) -p .note.bolt_info $(BUILDPYTHON) | grep BOLT > /dev/null; then\ - echo "skip: $(BUILDPYTHON) is already BOLTed."; \ - else \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $(BUILDPYTHON).bolt) -o $(BUILDPYTHON).bolt_inst; \ - ./$(BUILDPYTHON).bolt_inst $(PROFILE_TASK) || true; \ - @MERGE_FDATA@ $(BUILDPYTHON).*.fdata > $(BUILDPYTHON).fdata; \ - @LLVM_BOLT@ ./$(BUILDPYTHON) -o $(BUILDPYTHON).bolt -data=$(BUILDPYTHON).fdata -update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot; \ - rm -f *.fdata; \ - rm -f $(BUILDPYTHON).bolt_inst; \ - mv $(BUILDPYTHON).bolt $(BUILDPYTHON); \ + # Need to purge PGO instrumented build to force a rebuild. + $(MAKE) clean-retain-profile + $(MAKE) @MAKE_TARGET_BUILD_PLAIN@ CFLAGS_NODIST="$(CFLAGS_NODIST) $(PGO_PROF_USE_FLAG)" LDFLAGS_NODIST="$(LDFLAGS_NODIST)" + touch $@ + +# BOLT supports instrumenting and applying changes to standalone binaries +# without having to recompile. +# +# BOLT can run independently or in addition to PGO. If running with PGO, +# it always runs after PGO. Care needs to be taken to preserve PGO state +# when running BOLT so make doesn't re-apply PGO. +# +# BOLT also can't instrument binaries that have already had BOLT applied +# to them. So we make an attempt to preserve and re-use the pristine +# pre-BOLT binaries so developers can iterate on just BOLT optimization +# passes. + +# List of binaries that BOLT runs on. +BOLT_BINARIES = @BOLT_BINARIES@ + +BOLT_INSTRUMENT_FLAGS ?= @BOLT_INSTRUMENT_FLAGS@ +BOLT_APPLY_FLAGS ?= @BOLT_APPLY_FLAGS@ + +# Remove traces of bolt. +.PHONY: clean-bolt +clean-bolt: + # Instrumented binaries. + find . -name '*.bolt_inst' -exec rm -f {} ';' + # The data files they produce. + find . -name '*.fdata' -exec rm -f {} ';' + +# BOLTs dependencies are a bit wonky. +# +# If PGO is enabled, we can take a native rule dependency on a stamp file. +# If PGO isn't enabled, we don't have a stamp to key off of and the phony +# target (e.g. build_all) will always force rebuilds. So we call out to +# make externally to sidestep the dependency. +# +# We can simplify this hack if we ever get stamp files for plain builds. +profile-bolt-prebuild-stamp: @MAKE_BOLT_NATIVE_DEPENDENCY@ + if [ -n "@MAKE_BOLT_MAKE_DEPENDENCY@" ]; then \ + $(MAKE) @MAKE_BOLT_MAKE_DEPENDENCY@; \ fi + touch $@ +profile-bolt-instrument-stamp: profile-bolt-prebuild-stamp + for bin in $(BOLT_BINARIES); do \ + prebolt="$${bin}.prebolt"; \ + if [ -e "$${prebolt}" ]; then \ + echo "Restoring pre-BOLT binary $${prebolt}"; \ + mv "$${bin}.prebolt" "$${bin}"; \ + fi; \ + cp "$${bin}" "$${prebolt}"; \ + done + # Ensure prior BOLT state is purged. + $(MAKE) clean-bolt + for bin in $(BOLT_BINARIES); do \ + @LLVM_BOLT@ $${bin} -instrument -instrumentation-file-append-pid -instrumentation-file=$(abspath $${bin}.bolt) -o $${bin}.bolt_inst $(BOLT_INSTRUMENT_FLAGS); \ + mv "$${bin}.bolt_inst" "$${bin}"; \ + done + touch $@ + +profile-bolt-run-stamp: profile-bolt-instrument-stamp + $(RUNSHARED) ./$(BUILDPYTHON) $(PROFILE_TASK) || true + touch $@ + +profile-bolt-analyze-stamp: profile-bolt-run-stamp + for bin in $(BOLT_BINARIES); do \ + @MERGE_FDATA@ $${bin}.*.fdata > $${bin}.fdata; \ + done + touch $@ + +profile-bolt-apply-stamp: profile-bolt-analyze-stamp + for bin in $(BOLT_BINARIES); do \ + @LLVM_BOLT@ "$${bin}.prebolt" -o "$${bin}.bolt" -data="$${bin}.fdata" $(BOLT_APPLY_FLAGS); \ + mv "$${bin}.bolt" "$${bin}"; \ + done + touch $@ + +# End of profile-based optimization rules. # Compile and run with gcov .PHONY: coverage coverage: @echo "Building with support for coverage checking:" $(MAKE) clean - $(MAKE) @DEF_MAKE_RULE@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage" + $(MAKE) @MAKE_SIMPLE_BUILD_TARGET@ CFLAGS="$(CFLAGS) -O0 -pg --coverage" LDFLAGS="$(LDFLAGS) --coverage" .PHONY: coverage-lcov coverage-lcov: @@ -2622,23 +2709,9 @@ clean-retain-profile: pycremoval -rm -f Python/frozen_modules/MANIFEST -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h - -rm -f profile-gen-stamp - -.PHONY: profile-removal -profile-removal: - find . -name '*.gc??' -exec rm -f {} ';' - find . -name '*.profclang?' -exec rm -f {} ';' - find . -name '*.dyn' -exec rm -f {} ';' - rm -f $(COVERAGE_INFO) - rm -rf $(COVERAGE_REPORT) - rm -f profile-run-stamp .PHONY: clean clean: clean-retain-profile - @if test @DEF_MAKE_ALL_RULE@ = profile-opt; then \ - rm -f profile-gen-stamp profile-clean-stamp; \ - $(MAKE) profile-removal; \ - fi .PHONY: clobber clobber: clean diff --git a/configure b/configure index 7aad4fe89e3cbf..b2cd8c24a4f0c4 100755 --- a/configure +++ b/configure @@ -883,11 +883,15 @@ CFLAGS_NODIST BASECFLAGS CFLAGS_ALIASING OPT +BOLT_APPLY_FLAGS +BOLT_INSTRUMENT_FLAGS +BOLT_BINARIES MERGE_FDATA LLVM_BOLT ac_ct_READELF READELF -PREBOLT_RULE +MAKE_BOLT_MAKE_DEPENDENCY +MAKE_BOLT_NATIVE_DEPENDENCY LLVM_PROF_FOUND LLVM_PROFDATA LLVM_PROF_ERR @@ -898,8 +902,9 @@ PGO_PROF_GEN_FLAG LLVM_AR_FOUND LLVM_AR PROFILE_TASK -DEF_MAKE_RULE -DEF_MAKE_ALL_RULE +MAKE_TARGET_BUILD_OPTIMIZED +MAKE_TARGET_BUILD_PLAIN +MAKE_TARGET_ALL ABIFLAGS LN MKDIR_P @@ -1056,6 +1061,7 @@ with_assertions enable_optimizations with_lto enable_bolt +with_bolt_libpython with_strict_overflow with_dsymutil with_address_sanitizer @@ -1097,6 +1103,7 @@ PKG_CONFIG PKG_CONFIG_PATH PKG_CONFIG_LIBDIR MACHDEP +_PYTHON_HOST_PLATFORM CC CFLAGS LDFLAGS @@ -1105,6 +1112,8 @@ CPPFLAGS CPP HOSTRUNNER PROFILE_TASK +BOLT_INSTRUMENT_FLAGS +BOLT_APPLY_FLAGS LIBUUID_CFLAGS LIBUUID_LIBS LIBFFI_CFLAGS @@ -1828,6 +1837,8 @@ Optional Packages: --with-lto=[full|thin|no|yes] enable Link-Time-Optimization in any build (default is no) + --with-bolt-libpython enable BOLT optimization of libpython (WARNING: + known to crash BOLT) --with-strict-overflow if 'yes', add -fstrict-overflow to CFLAGS, else add -fno-strict-overflow (default is no) --with-dsymutil link debug information into final executable with @@ -1905,6 +1916,14 @@ Some influential environment variables: PKG_CONFIG_LIBDIR path overriding pkg-config's built-in search path MACHDEP name for machine-dependent library files + _PYTHON_HOST_PLATFORM + Forces a platform tag value for use in sysconfig data. This will + be calculated automatically in non-cross builds by running + sysconfig code in the bootstrapped interpreter. In cross builds, + an attempt will be made to derive an appropriate value in + configure. But some targets may derive incorrect values. This + variable can be set to force a value. Example values: + linux-x86_64, macosx-10.9-universal2, win-amd64 CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a @@ -1916,6 +1935,10 @@ Some influential environment variables: HOSTRUNNER Program to run CPython for the host platform PROFILE_TASK Python args for PGO generation task + BOLT_INSTRUMENT_FLAGS + Arguments to llvm-bolt when instrumenting binaries + BOLT_APPLY_FLAGS + Arguments to llvm-bolt when creating a BOLT optimized binary LIBUUID_CFLAGS C compiler flags for LIBUUID, overriding pkg-config LIBUUID_LIBS @@ -3914,34 +3937,42 @@ fi $as_echo "\"$MACHDEP\"" >&6; } -if test "$cross_compiling" = yes; then - case "$host" in - *-*-linux*) - case "$host_cpu" in - arm*) - _host_cpu=arm - ;; - *) - _host_cpu=$host_cpu - esac - ;; - *-*-cygwin*) - _host_cpu= - ;; - *-*-vxworks*) - _host_cpu=$host_cpu - ;; - wasm32-*-* | wasm64-*-*) - _host_cpu=$host_cpu - ;; - *) - # for now, limit cross builds to known configurations - MACHDEP="unknown" - as_fn_error $? "cross build not supported for $host" "$LINENO" 5 - esac - _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking _PYTHON_HOST_PLATFORM" >&5 +$as_echo_n "checking _PYTHON_HOST_PLATFORM... " >&6; } + +if test -z "${_PYTHON_HOST_PLATFORM}"; then + if test "$cross_compiling" = yes; then + case "$host" in + *-*-linux*) + case "$host_cpu" in + arm*) + _host_cpu=arm + ;; + *) + _host_cpu=$host_cpu + esac + ;; + *-*-cygwin*) + _host_cpu= + ;; + *-*-vxworks*) + _host_cpu=$host_cpu + ;; + wasm32-*-* | wasm64-*-*) + _host_cpu=$host_cpu + ;; + *) + # for now, limit cross builds to known configurations + MACHDEP="unknown" + as_fn_error $? "cross build not supported for $host" "$LINENO" 5 + esac + _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" + fi fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $_PYTHON_HOST_PLATFORM" >&5 +$as_echo "$_PYTHON_HOST_PLATFORM" >&6; } + # Some systems cannot stand _XOPEN_SOURCE being defined at all; they # disable features if it is defined, without any means to access these # features as extensions. For these systems, we skip the definition of @@ -7448,8 +7479,18 @@ $as_echo "no" >&6; } fi # Enable optimization flags +# Which target `all` (the default make target) depends on. + +# Which target to evaluate for non-optimized builds. + +# Do a non-optimized generic build by default. Build configurations below +# can override as appropriate. +MAKE_TARGET_ALL="build-plain" +MAKE_TARGET_BUILD_PLAIN="build_all" +MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled" + Py_OPT='false' { $as_echo "$as_me:${as_lineno-$LINENO}: checking for --enable-optimizations" >&5 $as_echo_n "checking for --enable-optimizations... " >&6; } @@ -7473,13 +7514,15 @@ fi if test "$Py_OPT" = 'true' ; then + # PGO is implied by optimizations mode. + PGO_ENABLED=1 + MAKE_TARGET_ALL="build-optimized" + MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp" + # Intentionally not forcing Py_LTO='true' here. Too many toolchains do not # compile working code using it and both test_distutils and test_gdb are # broken when you do manage to get a toolchain that works with it. People # who want LTO need to use --with-lto themselves. - DEF_MAKE_ALL_RULE="profile-opt" - REQUIRE_PGO="yes" - DEF_MAKE_RULE="build_all" case $CC in *gcc*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-semantic-interposition" >&5 @@ -7523,13 +7566,7 @@ fi ;; esac elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then - DEF_MAKE_ALL_RULE="build_wasm" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" -else - DEF_MAKE_ALL_RULE="build_all" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" + MAKE_TARGET_BUILD_PLAIN="build_wasm" fi @@ -8039,7 +8076,8 @@ case $CC in if test $LLVM_PROF_FOUND = not-found then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" + + if test -n "${ENABLE_PGO}" then as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 fi @@ -8055,10 +8093,10 @@ case $CC in if test "${LLVM_PROF_FOUND}" = "not-found" then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 - fi + if test -n "${ENABLE_PGO}" + then + as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 + fi fi ;; *) @@ -8101,10 +8139,19 @@ fi + + if test "$Py_BOLT" = 'true' ; then - PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" - DEF_MAKE_ALL_RULE="bolt-opt" - DEF_MAKE_RULE="build_all" + MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp" + + # Hook up make dependencies differently depending on whether PGO is + # enabled. See inline comment in Makefile.pre.in for how this works. + if test -n "${PGO_ENABLED}" + then + MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp" + else + MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}" + fi if test -n "$ac_tool_prefix"; then @@ -8474,6 +8521,49 @@ $as_echo "\"Found merge-fdata\"" >&6; } fi fi +# Enable BOLT optimizations of libpython. Optional for now due to known +# crashes on LLVM 15. Seems to be fixed in LLVM 16. + +BOLT_BINARIES='$(BUILDPYTHON)' + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for --with-bolt-libpython" >&5 +$as_echo_n "checking for --with-bolt-libpython... " >&6; } + +# Check whether --with-bolt_libpython was given. +if test "${with_bolt_libpython+set}" = set; then : + withval=$with_bolt_libpython; with_bolt_libpython="yes" +else + with_bolt_libpython="no" +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_bolt_libpython" >&5 +$as_echo "$with_bolt_libpython" >&6; } + +if test "${enable_shared}" = "yes" -a "${with_bolt_libpython}" = "yes" +then + BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)" +fi + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_INSTRUMENT_FLAGS" >&5 +$as_echo_n "checking BOLT_INSTRUMENT_FLAGS... " >&6; } +if test -z "${BOLT_INSTRUMENT_FLAGS}" +then + BOLT_INSTRUMENT_FLAGS= +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_INSTRUMENT_FLAGS" >&5 +$as_echo "$BOLT_INSTRUMENT_FLAGS" >&6; } + + +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking BOLT_APPLY_FLAGS" >&5 +$as_echo_n "checking BOLT_APPLY_FLAGS... " >&6; } +if test -z "${BOLT_APPLY_FLAGS}" +then + BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot" +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $BOLT_APPLY_FLAGS" >&5 +$as_echo "$BOLT_APPLY_FLAGS" >&6; } + # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be # merged with this chunk of code? diff --git a/configure.ac b/configure.ac index 115998e0753b26..e9f927c6de4ea4 100644 --- a/configure.ac +++ b/configure.ac @@ -584,35 +584,47 @@ then fi AC_MSG_RESULT("$MACHDEP") -AC_SUBST(_PYTHON_HOST_PLATFORM) -if test "$cross_compiling" = yes; then - case "$host" in - *-*-linux*) - case "$host_cpu" in - arm*) - _host_cpu=arm - ;; - *) - _host_cpu=$host_cpu - esac - ;; - *-*-cygwin*) - _host_cpu= - ;; - *-*-vxworks*) - _host_cpu=$host_cpu - ;; - wasm32-*-* | wasm64-*-*) - _host_cpu=$host_cpu - ;; - *) - # for now, limit cross builds to known configurations - MACHDEP="unknown" - AC_MSG_ERROR([cross build not supported for $host]) - esac - _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" +AC_ARG_VAR(_PYTHON_HOST_PLATFORM, [ + Forces a platform tag value for use in sysconfig data. This will be calculated + automatically in non-cross builds by running sysconfig code in the + bootstrapped interpreter. In cross builds, an attempt will be made to + derive an appropriate value in configure. But some targets may derive + incorrect values. This variable can be set to force a value. Example + values: linux-x86_64, macosx-10.9-universal2, win-amd64]) +AC_MSG_CHECKING(_PYTHON_HOST_PLATFORM) + +if test -z "${_PYTHON_HOST_PLATFORM}"; then + if test "$cross_compiling" = yes; then + case "$host" in + *-*-linux*) + case "$host_cpu" in + arm*) + _host_cpu=arm + ;; + *) + _host_cpu=$host_cpu + esac + ;; + *-*-cygwin*) + _host_cpu= + ;; + *-*-vxworks*) + _host_cpu=$host_cpu + ;; + wasm32-*-* | wasm64-*-*) + _host_cpu=$host_cpu + ;; + *) + # for now, limit cross builds to known configurations + MACHDEP="unknown" + AC_MSG_ERROR([cross build not supported for $host]) + esac + _PYTHON_HOST_PLATFORM="$MACHDEP${_host_cpu:+-$_host_cpu}" + fi fi +AC_MSG_RESULT([$_PYTHON_HOST_PLATFORM]) + # Some systems cannot stand _XOPEN_SOURCE being defined at all; they # disable features if it is defined, without any means to access these # features as extensions. For these systems, we skip the definition of @@ -1722,8 +1734,18 @@ else fi # Enable optimization flags -AC_SUBST(DEF_MAKE_ALL_RULE) -AC_SUBST(DEF_MAKE_RULE) +# Which target `all` (the default make target) depends on. +AC_SUBST(MAKE_TARGET_ALL) +# Which target to evaluate for non-optimized builds. +AC_SUBST(MAKE_TARGET_BUILD_PLAIN) +AC_SUBST(MAKE_TARGET_BUILD_OPTIMIZED) + +# Do a non-optimized generic build by default. Build configurations below +# can override as appropriate. +MAKE_TARGET_ALL="build-plain" +MAKE_TARGET_BUILD_PLAIN="build_all" +MAKE_TARGET_BUILD_OPTIMIZED="build-optimized-not-enabled" + Py_OPT='false' AC_MSG_CHECKING(for --enable-optimizations) AC_ARG_ENABLE(optimizations, AS_HELP_STRING( @@ -1741,13 +1763,15 @@ fi], [AC_MSG_RESULT(no)]) if test "$Py_OPT" = 'true' ; then + # PGO is implied by optimizations mode. + PGO_ENABLED=1 + MAKE_TARGET_ALL="build-optimized" + MAKE_TARGET_BUILD_OPTIMIZED="profile-pgo-apply-stamp" + # Intentionally not forcing Py_LTO='true' here. Too many toolchains do not # compile working code using it and both test_distutils and test_gdb are # broken when you do manage to get a toolchain that works with it. People # who want LTO need to use --with-lto themselves. - DEF_MAKE_ALL_RULE="profile-opt" - REQUIRE_PGO="yes" - DEF_MAKE_RULE="build_all" case $CC in *gcc*) AX_CHECK_COMPILE_FLAG([-fno-semantic-interposition],[ @@ -1759,13 +1783,7 @@ if test "$Py_OPT" = 'true' ; then elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then dnl Emscripten does not support shared extensions yet. Build dnl "python.[js,wasm]", "pybuilddir.txt", and "platform" files. - DEF_MAKE_ALL_RULE="build_wasm" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" -else - DEF_MAKE_ALL_RULE="build_all" - REQUIRE_PGO="no" - DEF_MAKE_RULE="all" + MAKE_TARGET_BUILD_PLAIN="build_wasm" fi AC_ARG_VAR(PROFILE_TASK, Python args for PGO generation task) @@ -1967,7 +1985,8 @@ case $CC in if test $LLVM_PROF_FOUND = not-found then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" + + if test -n "${ENABLE_PGO}" then AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) fi @@ -1983,10 +2002,10 @@ case $CC in if test "${LLVM_PROF_FOUND}" = "not-found" then LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) - fi + if test -n "${ENABLE_PGO}" + then + AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) + fi fi ;; *) @@ -2022,11 +2041,20 @@ else fi], [AC_MSG_RESULT(no)]) -AC_SUBST(PREBOLT_RULE) +AC_SUBST(MAKE_BOLT_NATIVE_DEPENDENCY) +AC_SUBST(MAKE_BOLT_MAKE_DEPENDENCY) + if test "$Py_BOLT" = 'true' ; then - PREBOLT_RULE="${DEF_MAKE_ALL_RULE}" - DEF_MAKE_ALL_RULE="bolt-opt" - DEF_MAKE_RULE="build_all" + MAKE_TARGET_BUILD_OPTIMIZED="profile-bolt-apply-stamp" + + # Hook up make dependencies differently depending on whether PGO is + # enabled. See inline comment in Makefile.pre.in for how this works. + if test -n "${PGO_ENABLED}" + then + MAKE_BOLT_NATIVE_DEPENDENCY="profile-pgo-apply-stamp" + else + MAKE_BOLT_MAKE_DEPENDENCY="${MAKE_TARGET_BUILD_PLAIN}" + fi AC_SUBST(READELF) AC_CHECK_TOOLS(READELF, [readelf], "notfound") @@ -2067,6 +2095,39 @@ if test "$Py_BOLT" = 'true' ; then fi fi +# Enable BOLT optimizations of libpython. Optional for now due to known +# crashes on LLVM 15. Seems to be fixed in LLVM 16. +AC_SUBST(BOLT_BINARIES) +BOLT_BINARIES='$(BUILDPYTHON)' + +AC_MSG_CHECKING(for --with-bolt-libpython) +AC_ARG_WITH(bolt_libpython, + AS_HELP_STRING([--with-bolt-libpython], [enable BOLT optimization of libpython (WARNING: known to crash BOLT)]), + [with_bolt_libpython="yes"], + [with_bolt_libpython="no"]) +AC_MSG_RESULT($with_bolt_libpython) + +if test "${enable_shared}" = "yes" -a "${with_bolt_libpython}" = "yes" +then + BOLT_BINARIES="${BOLT_BINARIES} \$(INSTSONAME)" +fi + +AC_ARG_VAR(BOLT_INSTRUMENT_FLAGS, Arguments to llvm-bolt when instrumenting binaries) +AC_MSG_CHECKING(BOLT_INSTRUMENT_FLAGS) +if test -z "${BOLT_INSTRUMENT_FLAGS}" +then + BOLT_INSTRUMENT_FLAGS= +fi +AC_MSG_RESULT($BOLT_INSTRUMENT_FLAGS) + +AC_ARG_VAR(BOLT_APPLY_FLAGS, Arguments to llvm-bolt when creating a BOLT optimized binary) +AC_MSG_CHECKING(BOLT_APPLY_FLAGS) +if test -z "${BOLT_APPLY_FLAGS}" +then + BOLT_APPLY_FLAGS="-update-debug-sections -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions=3 -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=all -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot" +fi +AC_MSG_RESULT($BOLT_APPLY_FLAGS) + # XXX Shouldn't the code above that fiddles with BASECFLAGS and OPT be # merged with this chunk of code?