Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 7.1] Revert compiler generated Fused Multiply Addition optimized routines #2328

Merged
merged 1 commit into from
Aug 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,6 @@ AC_DEFINE_UNQUOTED(HAVE_GCC_WARNING_ZERO_AS_NULL_POINTER_CONSTANT, 1,
fi
AC_LANG_POP([C++])

dnl ---------------------------------------------------------------------------
dnl Check if __attribute__((target_clones("fma","default"))) works
dnl This is needed for example on Alpine Linux where for some reason, building
dnl such tagged functions fails with 'error: the call requires 'ifunc', which is not supported by this target'
dnl ---------------------------------------------------------------------------

TARGET_CLONES_FMA_FLAGS=""
AC_MSG_CHECKING([if target_clones_fma works])
SAVED_CFLAGS="$CFLAGS"
CFLAGS="$CFLAGS -Werror"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
[[
__attribute__((target_clones("fma","default"))) void foo() {}
]])],
[AC_MSG_RESULT([yes])]
[TARGET_CLONES_FMA_FLAGS="-DTARGET_CLONES_FMA_ALLOWED"],
[AC_MSG_RESULT([no])])
CFLAGS="$SAVED_CFLAGS"
AC_SUBST(TARGET_CLONES_FMA_FLAGS,$TARGET_CLONES_FMA_FLAGS)


dnl ---------------------------------------------------------------------------
dnl Check for --enable-lto
dnl ---------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ check_PROGRAMS = geodtest

AM_CPPFLAGS = -DPROJ_LIB=\"$(pkgdatadir)\" \
-DMUTEX_@MUTEX_SETTING@ -I$(top_srcdir)/include @SQLITE3_CFLAGS@ @TIFF_CFLAGS@ @TIFF_ENABLED_FLAGS@ @CURL_CFLAGS@ @CURL_ENABLED_FLAGS@
AM_CXXFLAGS = @CXX_WFLAGS@ @FLTO_FLAG@ @TARGET_CLONES_FMA_FLAGS@
AM_CXXFLAGS = @CXX_WFLAGS@ @FLTO_FLAG@

include_HEADERS = proj.h proj_experimental.h proj_constants.h proj_api.h geodesic.h \
proj_symbol_rename.h
Expand Down
13 changes: 0 additions & 13 deletions src/lib_proj.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -303,19 +303,6 @@ source_group("CMake Files" FILES CMakeLists.txt)
# Embed PROJ_LIB data files location
add_definitions(-DPROJ_LIB="${CMAKE_INSTALL_PREFIX}/${DATADIR}")

# The gcc "target_clones" function attribute relies on an extension
# to the ELF standard. It must not be used on MinGW.
include(CheckCXXSourceCompiles)
set(CMAKE_REQUIRED_QUIET TRUE)
check_cxx_source_compiles([[
__attribute__((target_clones("fma","default")))
int clonable() { return 0; }
int main() { return clonable(); }
]] TARGET_CLONES_FMA_ALLOWED)
if(TARGET_CLONES_FMA_ALLOWED)
add_definitions(-DTARGET_CLONES_FMA_ALLOWED)
endif()

#################################################
## targets: libproj and proj_config.h
#################################################
Expand Down
28 changes: 2 additions & 26 deletions src/projections/tmerc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,14 @@ struct tmerc_data {
/* Constant for "exact" transverse mercator */
#define PROJ_ETMERC_ORDER 6

// Determine if we should try to provide optimized versions for the Fused Multiply Addition
// Intel instruction set. We use GCC 6 __attribute__((target_clones("fma","default")))
// mechanism for that, where the compiler builds a default version, and one that
// uses FMA. And at runtimes it figures out automatically which version can be used
// by the current CPU. This allows to create general purpose binaries.
#if defined(TARGET_CLONES_FMA_ALLOWED) && defined(__GNUC__) && __GNUC__ >= 6 && defined(__x86_64__) && !defined(__FMA__)
#define BUILD_FMA_OPTIMIZED_VERSION
#endif

/*****************************************************************************/
//
// Approximate Transverse Mercator functions
//
/*****************************************************************************/


#ifdef BUILD_FMA_OPTIMIZED_VERSION
__attribute__((target_clones("fma","default")))
#endif
inline static PJ_XY approx_e_fwd_internal (PJ_LP lp, PJ *P)
static PJ_XY approx_e_fwd (PJ_LP lp, PJ *P)
{
PJ_XY xy = {0.0, 0.0};
const auto *Q = &(static_cast<struct tmerc_data*>(P->opaque)->approx);
Expand Down Expand Up @@ -127,11 +115,6 @@ inline static PJ_XY approx_e_fwd_internal (PJ_LP lp, PJ *P)
return (xy);
}

static PJ_XY approx_e_fwd (PJ_LP lp, PJ *P)
{
return approx_e_fwd_internal(lp, P);
}

static PJ_XY approx_s_fwd (PJ_LP lp, PJ *P) {
PJ_XY xy = {0.0,0.0};
double b, cosphi;
Expand Down Expand Up @@ -177,10 +160,7 @@ static PJ_XY approx_s_fwd (PJ_LP lp, PJ *P) {
return xy;
}

#ifdef BUILD_FMA_OPTIMIZED_VERSION
__attribute__((target_clones("fma","default")))
#endif
inline static PJ_LP approx_e_inv_internal (PJ_XY xy, PJ *P) {
static PJ_LP approx_e_inv (PJ_XY xy, PJ *P) {
PJ_LP lp = {0.0,0.0};
const auto *Q = &(static_cast<struct tmerc_data*>(P->opaque)->approx);

Expand Down Expand Up @@ -212,10 +192,6 @@ inline static PJ_LP approx_e_inv_internal (PJ_XY xy, PJ *P) {
return lp;
}

static PJ_LP approx_e_inv (PJ_XY xy, PJ *P) {
return approx_e_inv_internal(xy, P);
}

static PJ_LP approx_s_inv (PJ_XY xy, PJ *P) {
PJ_LP lp = {0.0, 0.0};
double h, g;
Expand Down