Skip to content
This repository has been archived by the owner on Mar 27, 2019. It is now read-only.

Commit

Permalink
Merge pull request #2 from OpenGamma/MAT-287-switch-off-ffast-math
Browse files Browse the repository at this point in the history
Mat 287 multilibalise lapack
  • Loading branch information
gmarkall committed Nov 8, 2013
2 parents e9dfd5a + 118e1fc commit 9ad68be
Show file tree
Hide file tree
Showing 12 changed files with 454 additions and 195 deletions.
4 changes: 0 additions & 4 deletions BLAS/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,3 @@ if(BUILD_TESTING)
add_subdirectory(TESTING)
endif(BUILD_TESTING)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/blas.pc)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
DESTINATION ${PKG_CONFIG_DIR}
)
34 changes: 22 additions & 12 deletions BLAS/SRC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,27 @@ if(BLAS_COMPLEX16)
${ALLBLAS} ${ZBLAS2} ${ZBLAS3})
endif()

set(XERBLA xerbla.f)

add_library(ogxerbla SHARED ${XERBLA})
set_target_properties(ogxerbla PROPERTIES SOVERSION ${LAPACK_VERSION_MAJOR} VERSION ${LAPACK_VERSION})
lapack_install_library(ogxerbla)

add_library(ogblas SHARED ${ALLOBJ})
set_target_properties(ogblas PROPERTIES SOVERSION ${LAPACK_VERSION_MAJOR} VERSION ${LAPACK_VERSION})
target_link_libraries(ogblas ogxerbla)
set(XERBLA xerbla.f)

add_multitarget_library(ogxerbla
VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_VERSION_MAJOR}
SOURCES ${XERBLA}
TARGETS ${TARGET_TYPES}
)

set(LINK_MULTILIBRARIES ogblas ogxerbla)
if(UNIX)
target_link_libraries(ogblas m)
set(LINK_LIBRARIES m)
else()
set(LINK_LIBRARIES "")
endif()
target_link_libraries(ogblas)
lapack_install_library(ogblas)

add_multitarget_library(ogblas
VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_VERSION_MAJOR}
SOURCES ${ALLOBJ}
TARGETS ${TARGET_TYPES}
LINK_MULTILIBRARIES ${LINK_MULTILIBRARIES}
LINK_LIBRARIES ${LINK_LIBRARIES}
)
50 changes: 50 additions & 0 deletions CMAKE/CheckCPUID.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#
# Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies
#
# Please see distribution for license.
#

# Inclusion of this module sets the targets supported by the machine on which
# CMake is running. SUPPORT_${TARGET} is set to TRUE or FALSE for each target in
# dbg, std, sse41, sse42, avx1 and avx2. std and sbg require no special CPU
# support and are always set.

# this module caches the fact it has run by setting CPUID_CACHED = TRUE

macro( CheckCPUID )
if(NOT DEFINED CPUID_CACHED)
set(CPUID_CACHED TRUE CACHE INTERNAL "Whether CPUID has run")
try_run(CPUID_FLAG CPUID_COMPILE_RESULT
${CMAKE_BINARY_DIR}
${CMAKE_SOURCE_DIR}/CMAKE/cmcpuid.c)

set(SUPPORT_dbg TRUE CACHE INTERNAL "Support for debug build")
set(SUPPORT_std TRUE CACHE INTERNAL "Support for standard build")

# NOTE: These flags must match the numbers in cmcpuid.c
if(${CPUID_FLAG} GREATER 1)
set(SUPPORT_sse41 TRUE CACHE INTERNAL "Local CPU support for SSE4.1")
message(STATUS "CPU supports SSE4.1")
else()
message(STATUS "CPU does not support SSE4.1")
endif()
if(${CPUID_FLAG} GREATER 2)
set(SUPPORT_sse42 TRUE CACHE INTERNAL "Local CPU support for SSE4.2")
message(STATUS "CPU supports SSE4.2")
else()
message(STATUS "CPU does not support SSE4.2")
endif()
if(${CPUID_FLAG} GREATER 3)
set(SUPPORT_avx1 TRUE CACHE INTERNAL "Local CPU support for AVX1")
message(STATUS "CPU supports AVX1")
else()
message(STATUS "CPU does not support AVX1")
endif()
if(${CPUID_FLAG} GREATER 4)
set(SUPPORT_avx2 TRUE CACHE INTERNAL "Local CPU support for AVX2")
message(STATUS "CPU supports AVX2")
else()
message(STATUS "CPU does not support AVX2")
endif()
endif()
endmacro()
75 changes: 75 additions & 0 deletions CMAKE/MultiInstructionTarget.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies
#
# Please see distribution for license.
#

include(CMakeParseArguments)

include( CheckCPUID )
CheckCPUID()

# Create a library for multiple target instruction sets. Note that this is
# specialised to its use in Nyqwk2 and does not fully provide all of the
# functionality that can be provided by add_library and set_target_properties.
# One instance of the library is created for each of the TARGETS.

function(add_multitarget_library _TARGET_NAME)
cmake_parse_arguments(MTLIB
""
"VERSION;SOVERSION"
"SOURCES;TARGETS;LINK_MULTILIBRARIES;LINK_LIBRARIES;COMPILE_DEFINITIONS;MULTI_DEPENDS"
${ARGN})
foreach(TARGET ${MTLIB_TARGETS})
set(LIBNAME ${_TARGET_NAME}_${TARGET})
add_library(${LIBNAME} SHARED ${MTLIB_SOURCES})
set_target_properties(${LIBNAME} PROPERTIES
VERSION ${MTLIB_VERSION}
SOVERSION ${MTLIB_SOVERSION}
COMPILE_FLAGS ${OPT_FLAGS_${TARGET}})
foreach(LINK_LIB ${MTLIB_LINK_MULTILIBRARIES})
target_link_libraries(${LIBNAME} ${LINK_LIB}_${TARGET})
endforeach()
foreach(LINK_LIB ${MTLIB_LINK_LIBRARIES})
target_link_libraries(${LIBNAME} ${LINK_LIB})
endforeach()
foreach(COMPILE_DEFINITION ${MTLIB_COMPILE_DEFINITIONS})
set_target_properties(${LIBNAME} PROPERTIES COMPILE_DEFINITIONS ${COMPILE_DEFINITION})
endforeach()
foreach(DEPENDENCY ${MTLIB_MULTI_DEPENDS})
add_dependencies(${LIBNAME} ${DEPENDENCY}_${TARGET})
endforeach()
endforeach()
endfunction()

# Create a binary for multiple target instruction sets. Note that this is
# specialised to its use in Nyqwk2 and does not fully provide all of the
# functionality that can be provided by add_library and set_target_properties.
# One instance of the library is created for each of the TARGETS.

function(add_multitarget_binary _TARGET_NAME)
cmake_parse_arguments(MTBIN
""
"VERSION;SOVERSION"
"SOURCES;TARGETS;LINK_MULTILIBRARIES;LINK_LIBRARIES;COMPILE_DEFINITIONS;MULTI_DEPENDS"
${ARGN})
foreach(TARGET ${MTBIN_TARGETS})
set(BINNAME ${_TARGET_NAME}_${TARGET})
add_executable(${BINNAME} ${MTBIN_SOURCES})
set_target_properties(${BINNAME} PROPERTIES
COMPILE_FLAGS ${OPT_FLAGS_${TARGET}})
foreach(LINK_LIB ${MTBIN_LINK_MULTILIBRARIES})
target_link_libraries(${BINNAME} ${LINK_LIB}_${TARGET})
endforeach()
foreach(LINK_LIB ${MTBIN_LINK_LIBRARIES})
target_link_libraries(${BINNAME} ${LINK_LIB})
endforeach()
foreach(COMPILE_DEFINITION ${MTBIN_COMPILE_DEFINITIONS})
set_target_properties(${BINNAME} PROPERTIES COMPILE_DEFINITIONS ${COMPILE_DEFINITION})
endforeach()
foreach(DEPENDENCY ${MTBIN_MULTI_DEPENDS})
add_dependencies(${BINNAME} ${DEPENDENCY}_${TARGET})
endforeach()
endforeach()
endfunction()

113 changes: 113 additions & 0 deletions CMAKE/cmcpuid.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/**
* Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies
*
* Please see distribution for license.
*/

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>


/**
* The supported instruction set on this machine for use in other functions.
*
* These must match up with the numbers used in MultiLib.cmake.
*/
typedef enum instructions_available_e
{
supports_STANDARD = 1,
supports_SSE41 = 2,
supports_SSE42 = 3,
supports_AVX1 = 4,
supports_AVX2 = 5
} instructions_available;


instructions_available getSupportedInstructionSet() {

// probes of cpuid with %eax=0000_0001h
enum instructions_available_eax0000_0001h_e
{
probe01_SSE_3 = 1<<0,
probe01_SSE_4_1 = 1<<19,
probe01_SSE_4_2 = 1<<20,
probe01_AVX1 = 1<<28
};

// probes of cpuid with %eax=0000_0007h
enum instructions_available_eax0000_0007h_e
{
probe07_AVX2 = 1<<5
};

// the eax register
int32_t EAX;
// contends of the returned register
int32_t supported;

// Call cpuid with eax=0x00000001 and get ecx
EAX = 0x00000001;
__asm__("cpuid"
:"=c"(supported) // %ecx contains large feature flag set
:"0"(EAX) // call with 0x1
:"%eax","%ebx","%edx"); // clobbered

if(supported & probe01_AVX1) // we have at least AVX1
{
EAX = 0x00000007;
__asm__("cpuid"
:"=b"(supported) // %ebx contains feature flag AVX2
:"0"(EAX) // call with 0x7
:"%eax","%ecx","%edx"); // clobbered

if(supported & probe07_AVX2) // we have at least AVX2
{
printf("AVX2 SUPPORTED\n");
return supports_AVX2;
}
printf("AVX1 SUPPORTED\n");
return supports_AVX1;
}
else if(supported & probe01_SSE_4_1) // we have at least SSE4.1
{
printf("SSE4.2 SUPPORTED\n");
return supports_SSE42;
}
else // we have nothing specifically useful!
{
printf("STANDARD SUPPORTED\n");
return supports_STANDARD;
}
}


int main(void)
{
instructions_available ia;
ia = getSupportedInstructionSet();

switch(ia)
{
case supports_AVX2:
printf("AVX2\n");
break;
case supports_AVX1:
printf("AVX1\n");
break;
case supports_SSE42:
printf("SSE42\n");
break;
case supports_SSE41:
printf("SSE41\n");
break;
case supports_STANDARD:
printf("STANDARD\n");
break;
default:
printf("Failed to find supported instruction set, this is an error!\n");
exit(-1);
}
return ia;
}
Loading

0 comments on commit 9ad68be

Please sign in to comment.