Skip to content

Commit

Permalink
add comments, test=develop
Browse files Browse the repository at this point in the history
  • Loading branch information
Shixiaowei02 committed Dec 26, 2020
1 parent 1f69360 commit 6e7ae0d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
3 changes: 3 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ if(WITH_AVX AND AVX_FOUND)
add_definitions(-DPADDLE_WITH_AVX)
elseif(SSE3_FOUND)
set(SIMD_FLAG ${SSE3_FLAG})
endif()

if (SSE3_FOUND)
# TODO: Runtime detection should be used here.
add_definitions(-DPADDLE_WITH_SSE3)
endif()
Expand Down
15 changes: 11 additions & 4 deletions paddle/fluid/platform/denormal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <tuple>
#include <utility>

// Refer to https://github.com/tensorflow/tensorflow/pull/17141

// If we're on gcc 4.8 or older, there's a known bug that prevents the use of
// intrinsics when the architecture is not defined in the flags. See
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
Expand All @@ -25,10 +27,11 @@
#define GCC_WITHOUT_INTRINSICS
#endif

// Only try to use SSE3 instructions if we're on an x86 platform, and it's not
// mobile, and we're not on a known bad gcc version.
#if !defined(GCC_WITHOUT_INTRINSICS)
#define DENORM_USE_INTRINSICS
#endif

#ifdef DENORM_USE_INTRINSICS
#include <pmmintrin.h>
#endif

Expand All @@ -38,7 +41,12 @@ namespace platform {
static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
// Restore flags
// Intel's C and Fortran compilers enable the denormals-are-zero (DAZ) and
// flush-to-zero (FTZ) flags for SSE by default for optimization levels higher
// than -O0.
// AArch32 NEON (SIMD) FPU always uses a flush-to-zero mode.
// Refer to https://en.wikipedia.org/wiki/Denormal_number
// and https://software.intel.com/sites/landingpage/IntrinsicsGuide/
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON
: _MM_FLUSH_ZERO_OFF);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON
Expand All @@ -50,7 +58,6 @@ static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
static std::pair<bool, bool> GetDenormalState() {
#ifdef DENORM_USE_INTRINSICS
#ifdef PADDLE_WITH_SSE3
// Save existing flags
bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON;
bool denormals_zero_mode =
_MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON;
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/platform/denormal.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
namespace paddle {
namespace platform {

// Used to restore the initial value at the end of the scope.
class ScopedRestoreFlushDenormalState {
public:
ScopedRestoreFlushDenormalState();
Expand Down

0 comments on commit 6e7ae0d

Please sign in to comment.