This repository has been archived by the owner on Mar 27, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from OpenGamma/MAT-287-switch-off-ffast-math
Mat 287 multilibalise lapack
- Loading branch information
Showing
12 changed files
with
454 additions
and
195 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# | ||
# Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies | ||
# | ||
# Please see distribution for license. | ||
# | ||
|
||
# Inclusion of this module sets the targets supported by the machine on which | ||
# CMake is running. SUPPORT_${TARGET} is set to TRUE or FALSE for each target in | ||
# dbg, std, sse41, sse42, avx1 and avx2. std and sbg require no special CPU | ||
# support and are always set. | ||
|
||
# this module caches the fact it has run by setting CPUID_CACHED = TRUE | ||
|
||
macro( CheckCPUID ) | ||
if(NOT DEFINED CPUID_CACHED) | ||
set(CPUID_CACHED TRUE CACHE INTERNAL "Whether CPUID has run") | ||
try_run(CPUID_FLAG CPUID_COMPILE_RESULT | ||
${CMAKE_BINARY_DIR} | ||
${CMAKE_SOURCE_DIR}/CMAKE/cmcpuid.c) | ||
|
||
set(SUPPORT_dbg TRUE CACHE INTERNAL "Support for debug build") | ||
set(SUPPORT_std TRUE CACHE INTERNAL "Support for standard build") | ||
|
||
# NOTE: These flags must match the numbers in cmcpuid.c | ||
if(${CPUID_FLAG} GREATER 1) | ||
set(SUPPORT_sse41 TRUE CACHE INTERNAL "Local CPU support for SSE4.1") | ||
message(STATUS "CPU supports SSE4.1") | ||
else() | ||
message(STATUS "CPU does not support SSE4.1") | ||
endif() | ||
if(${CPUID_FLAG} GREATER 2) | ||
set(SUPPORT_sse42 TRUE CACHE INTERNAL "Local CPU support for SSE4.2") | ||
message(STATUS "CPU supports SSE4.2") | ||
else() | ||
message(STATUS "CPU does not support SSE4.2") | ||
endif() | ||
if(${CPUID_FLAG} GREATER 3) | ||
set(SUPPORT_avx1 TRUE CACHE INTERNAL "Local CPU support for AVX1") | ||
message(STATUS "CPU supports AVX1") | ||
else() | ||
message(STATUS "CPU does not support AVX1") | ||
endif() | ||
if(${CPUID_FLAG} GREATER 4) | ||
set(SUPPORT_avx2 TRUE CACHE INTERNAL "Local CPU support for AVX2") | ||
message(STATUS "CPU supports AVX2") | ||
else() | ||
message(STATUS "CPU does not support AVX2") | ||
endif() | ||
endif() | ||
endmacro() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# | ||
# Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies | ||
# | ||
# Please see distribution for license. | ||
# | ||
|
||
include(CMakeParseArguments) | ||
|
||
include( CheckCPUID ) | ||
CheckCPUID() | ||
|
||
# Create a library for multiple target instruction sets. Note that this is | ||
# specialised to its use in Nyqwk2 and does not fully provide all of the | ||
# functionality that can be provided by add_library and set_target_properties. | ||
# One instance of the library is created for each of the TARGETS. | ||
|
||
function(add_multitarget_library _TARGET_NAME) | ||
cmake_parse_arguments(MTLIB | ||
"" | ||
"VERSION;SOVERSION" | ||
"SOURCES;TARGETS;LINK_MULTILIBRARIES;LINK_LIBRARIES;COMPILE_DEFINITIONS;MULTI_DEPENDS" | ||
${ARGN}) | ||
foreach(TARGET ${MTLIB_TARGETS}) | ||
set(LIBNAME ${_TARGET_NAME}_${TARGET}) | ||
add_library(${LIBNAME} SHARED ${MTLIB_SOURCES}) | ||
set_target_properties(${LIBNAME} PROPERTIES | ||
VERSION ${MTLIB_VERSION} | ||
SOVERSION ${MTLIB_SOVERSION} | ||
COMPILE_FLAGS ${OPT_FLAGS_${TARGET}}) | ||
foreach(LINK_LIB ${MTLIB_LINK_MULTILIBRARIES}) | ||
target_link_libraries(${LIBNAME} ${LINK_LIB}_${TARGET}) | ||
endforeach() | ||
foreach(LINK_LIB ${MTLIB_LINK_LIBRARIES}) | ||
target_link_libraries(${LIBNAME} ${LINK_LIB}) | ||
endforeach() | ||
foreach(COMPILE_DEFINITION ${MTLIB_COMPILE_DEFINITIONS}) | ||
set_target_properties(${LIBNAME} PROPERTIES COMPILE_DEFINITIONS ${COMPILE_DEFINITION}) | ||
endforeach() | ||
foreach(DEPENDENCY ${MTLIB_MULTI_DEPENDS}) | ||
add_dependencies(${LIBNAME} ${DEPENDENCY}_${TARGET}) | ||
endforeach() | ||
endforeach() | ||
endfunction() | ||
|
||
# Create a binary for multiple target instruction sets. Note that this is | ||
# specialised to its use in Nyqwk2 and does not fully provide all of the | ||
# functionality that can be provided by add_library and set_target_properties. | ||
# One instance of the library is created for each of the TARGETS. | ||
|
||
function(add_multitarget_binary _TARGET_NAME) | ||
cmake_parse_arguments(MTBIN | ||
"" | ||
"VERSION;SOVERSION" | ||
"SOURCES;TARGETS;LINK_MULTILIBRARIES;LINK_LIBRARIES;COMPILE_DEFINITIONS;MULTI_DEPENDS" | ||
${ARGN}) | ||
foreach(TARGET ${MTBIN_TARGETS}) | ||
set(BINNAME ${_TARGET_NAME}_${TARGET}) | ||
add_executable(${BINNAME} ${MTBIN_SOURCES}) | ||
set_target_properties(${BINNAME} PROPERTIES | ||
COMPILE_FLAGS ${OPT_FLAGS_${TARGET}}) | ||
foreach(LINK_LIB ${MTBIN_LINK_MULTILIBRARIES}) | ||
target_link_libraries(${BINNAME} ${LINK_LIB}_${TARGET}) | ||
endforeach() | ||
foreach(LINK_LIB ${MTBIN_LINK_LIBRARIES}) | ||
target_link_libraries(${BINNAME} ${LINK_LIB}) | ||
endforeach() | ||
foreach(COMPILE_DEFINITION ${MTBIN_COMPILE_DEFINITIONS}) | ||
set_target_properties(${BINNAME} PROPERTIES COMPILE_DEFINITIONS ${COMPILE_DEFINITION}) | ||
endforeach() | ||
foreach(DEPENDENCY ${MTBIN_MULTI_DEPENDS}) | ||
add_dependencies(${BINNAME} ${DEPENDENCY}_${TARGET}) | ||
endforeach() | ||
endforeach() | ||
endfunction() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/** | ||
* Copyright (C) 2013 - present by OpenGamma Inc. and the OpenGamma group of companies | ||
* | ||
* Please see distribution for license. | ||
*/ | ||
|
||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <stdint.h> | ||
#include <stdio.h> | ||
|
||
|
||
/** | ||
* The supported instruction set on this machine for use in other functions. | ||
* | ||
* These must match up with the numbers used in MultiLib.cmake. | ||
*/ | ||
typedef enum instructions_available_e | ||
{ | ||
supports_STANDARD = 1, | ||
supports_SSE41 = 2, | ||
supports_SSE42 = 3, | ||
supports_AVX1 = 4, | ||
supports_AVX2 = 5 | ||
} instructions_available; | ||
|
||
|
||
instructions_available getSupportedInstructionSet() { | ||
|
||
// probes of cpuid with %eax=0000_0001h | ||
enum instructions_available_eax0000_0001h_e | ||
{ | ||
probe01_SSE_3 = 1<<0, | ||
probe01_SSE_4_1 = 1<<19, | ||
probe01_SSE_4_2 = 1<<20, | ||
probe01_AVX1 = 1<<28 | ||
}; | ||
|
||
// probes of cpuid with %eax=0000_0007h | ||
enum instructions_available_eax0000_0007h_e | ||
{ | ||
probe07_AVX2 = 1<<5 | ||
}; | ||
|
||
// the eax register | ||
int32_t EAX; | ||
// contends of the returned register | ||
int32_t supported; | ||
|
||
// Call cpuid with eax=0x00000001 and get ecx | ||
EAX = 0x00000001; | ||
__asm__("cpuid" | ||
:"=c"(supported) // %ecx contains large feature flag set | ||
:"0"(EAX) // call with 0x1 | ||
:"%eax","%ebx","%edx"); // clobbered | ||
|
||
if(supported & probe01_AVX1) // we have at least AVX1 | ||
{ | ||
EAX = 0x00000007; | ||
__asm__("cpuid" | ||
:"=b"(supported) // %ebx contains feature flag AVX2 | ||
:"0"(EAX) // call with 0x7 | ||
:"%eax","%ecx","%edx"); // clobbered | ||
|
||
if(supported & probe07_AVX2) // we have at least AVX2 | ||
{ | ||
printf("AVX2 SUPPORTED\n"); | ||
return supports_AVX2; | ||
} | ||
printf("AVX1 SUPPORTED\n"); | ||
return supports_AVX1; | ||
} | ||
else if(supported & probe01_SSE_4_1) // we have at least SSE4.1 | ||
{ | ||
printf("SSE4.2 SUPPORTED\n"); | ||
return supports_SSE42; | ||
} | ||
else // we have nothing specifically useful! | ||
{ | ||
printf("STANDARD SUPPORTED\n"); | ||
return supports_STANDARD; | ||
} | ||
} | ||
|
||
|
||
int main(void) | ||
{ | ||
instructions_available ia; | ||
ia = getSupportedInstructionSet(); | ||
|
||
switch(ia) | ||
{ | ||
case supports_AVX2: | ||
printf("AVX2\n"); | ||
break; | ||
case supports_AVX1: | ||
printf("AVX1\n"); | ||
break; | ||
case supports_SSE42: | ||
printf("SSE42\n"); | ||
break; | ||
case supports_SSE41: | ||
printf("SSE41\n"); | ||
break; | ||
case supports_STANDARD: | ||
printf("STANDARD\n"); | ||
break; | ||
default: | ||
printf("Failed to find supported instruction set, this is an error!\n"); | ||
exit(-1); | ||
} | ||
return ia; | ||
} |
Oops, something went wrong.