Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix compilation with GCC and Clang #587

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions cmake/cpu/BuildFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ if(MSVC)
else(MSVC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-field-initializers")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-type-limits")
Expand All @@ -58,9 +59,14 @@ else(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pedantic")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=redundant-decls")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=old-style-cast")
# Eigen fails to build with some versions, so convert this to a warning
# Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-ignored-attributes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-pessimizing-move")

if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-vla-cxx-extension")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-builtins")
Comment on lines +62 to +67
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you comment on the build errors you found without these?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clang-only, for -Wno-vla-cxx-extension, tons of warnings in all places:

/src/intel-extension-for-pytorch/csrc/cpu/aten/kernels/MergedEmbeddingBagKrnl.cpp:880:33: warning: variable length arrays in C++ are a Clang extension [-Wvla-cxx-extension]
  880 |               scalar_t* val_ptr[world_size];
      |                                 ^~~~~~~~~~
/usr/include/ATen/Dispatch.h:797:46: note: expanded from macro 'AT_DISPATCH_INDEX_TYPES'
  797 |               at::ScalarType::Long, index_t, __VA_ARGS__))
      |                                              ^~~~~~~~~~~
/usr/include/ATen/Dispatch.h:70:12: note: expanded from macro 'AT_PRIVATE_CASE_TYPE_USING_HINT'
   70 |     return __VA_ARGS__();                                               \
      |            ^~~~~~~~~~~
/usr/include/ATen/Dispatch.h:221:7: note: expanded from macro 'AT_DISPATCH_SWITCH'
  221 |       __VA_ARGS__                                                           \
      |       ^~~~~~~~~~~
note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)

Clang-only, for -Wno-deprecated-builtins all related to ROBIN_HOOD_IS_TRIVIALLY_COPYABLE (few warnings)

/src/intel-extension-for-pytorch/csrc/cpu/utils/robin_hood.h:1545:29: warning: builtin __has_trivial_copy is deprecated; use __is_trivially_copyable instead [-Wdeprecated-builtins]
 1545 |     Cloner<Table, IsFlat && ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(Node)>()(o, *this);
      |                             ^
/src/intel-extension-for-pytorch/csrc/cpu/utils/robin_hood.h:218:47: note: expanded from macro 'ROBIN_HOOD_IS_TRIVIALLY_COPYABLE'
  218 | #define ROBIN_HOOD_IS_TRIVIALLY_COPYABLE(...) __has_trivial_copy(__VA_ARGS__)

Clang-only, for -Wno-pessimizing-move, repeated many times for inner_product/deconv/matmul/tensor.hpp/etc.

/src/intel-extension-for-pytorch/third_party/ideep/include/ideep/operators/matmul.hpp:1028:23: warning: moving a temporary object prevents copy elision [-Wpessimizing-move]
 1028 |     param.primitive = std::move(super(param.pd));
      |                       ^
/src/intel-extension-for-pytorch/third_party/ideep/include/ideep/operators/matmul.hpp:1028:23: note: remove std::move call here
 1028 |     param.primitive = std::move(super(param.pd));
      |                       ^~~~~~~~~~               ~

GCC-only, for -Wno-ignored-attributes, tons of warnings in all places:

/src/intel-extension-for-pytorch/csrc/cpu/tpp/woq/vec.h:43:21: warning: ignoring attributes on template argument ‘__m512’ [-Wignored-attributes]
   43 | struct VecOps<__m512> {
      |                     ^
/src/intel-extension-for-pytorch/csrc/cpu/tpp/woq/vec.h:228:33: warning: ignoring attributes on template argument ‘__m512’ [-Wignored-attributes]
  228 | inline std::tuple<__m512, __m512> _vec_load_bfloat16_as_two_floats(
      |                                 ^
/src/intel-extension-for-pytorch/csrc/cpu/tpp/woq/vec.h:228:33: warning: ignoring attributes on template argument ‘__m512’ [-Wignored-attributes]
/src/intel-extension-for-pytorch/csrc/cpu/tpp/woq/vec.h:284:34: warning: ignoring attributes on template argument ‘VecArray<N, c10::BFloat16>::vec_type’ {aka ‘__m512’} [-Wignored-attributes]
  284 |   using vec_ops = VecOps<vec_type>;
      |                                  ^
/src/intel-extension-for-pytorch/csrc/cpu/tpp/woq/vec.h:286:74: warning: ignoring attributes on template argument ‘VecType<float>::type’ {aka ‘__m512’} [-Wignored-attributes]
  286 |   using type = typename std::array<typename VecType<float>::type, num_vec>;
      |

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I guess we should fix most of these warnings instead of just ignoring them. @xuhancn can you take a look?

endif()

if (CMAKE_COMPILER_IS_GNUCXX AND NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0))
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-stringop-overflow")
endif()
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/aten/kernels/MaskedMultiHeadAttentionKrnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ void mul_attenion_weights_and_value_of_head_half(
auto hsi = 0;
auto vec_size = 32; // 512/16
for (hsi = 0; hsi <= head_size - vec_size; hsi += vec_size) {
auto attn_w_vec = _mm512_set1_ph(attn_w);
auto attn_w_vec = _mm512_set1_ph(*(_Float16*)&attn_w);
auto v_vec = _mm512_loadu_ph(v_ptr_start + hsi);
if (accumulate) {
auto attn_out_vec = _mm512_loadu_ph(attn_out_start + hsi);
Expand Down
2 changes: 1 addition & 1 deletion csrc/cpu/jit/codegen/LlgaTensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ struct LlgaTensorDesc {
}

bool operator!=(const LlgaTensorDesc& desc) const {
return *this != desc;
return !(*this == desc);
}

static size_t hash(const LlgaTensorDesc& desc) {
Expand Down
21 changes: 21 additions & 0 deletions csrc/cpu/tpp/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,30 @@ typedef at::Half half;
rvalue of type X'. Keep the original code as backup:
*/
#ifdef __clang__

#ifdef __INTEL_LLVM_COMPILER
#define DECL_VLA_PTR_PT(type, name, dims, t) \
auto name = (type(*) dims)(t.data_ptr<type>())
#else
/*
Workaround for Clang crash https://github.com/llvm/llvm-project/issues/75428.
This generates 2 statements, which is unsafe for
`if (...) DECL_VLA_PTR_PT(...)` blocks.
However, due to first line it will be a compile error
(unknown type name 'vla_type_42'), not a runtime error.
*/
#define DECL_VLA_PTR_PT_MERGE_(a, b, type, name, dims, t) \
using a##b = type(*) dims; \
a##b name = reinterpret_cast<a##b>(t.data_ptr<type>())

#define DECL_VLA_PTR_PT_LABEL_(cnt, type, name, dims, t) \
DECL_VLA_PTR_PT_MERGE_(vla_type_, cnt, type, name, dims, t)

#define DECL_VLA_PTR_PT(type, name, dims, t) \
DECL_VLA_PTR_PT_LABEL_(__COUNTER__, type, name, dims, t)
#endif

#else // not clang
#define DECL_VLA_PTR_PT(type, name, dims, t) \
type(*name) dims = (type(*) dims)(t.data_ptr<type>())
#endif
Expand Down
131 changes: 67 additions & 64 deletions csrc/cpu/tpp/woq/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ struct VecOps<__m512h> {
static inline __m512h set1(ST v) {
return _mm512_set1_ph(v);
}
static inline __m512h set1(float v) {
return _mm512_set1_ph(ST(v));
}
static inline __m512h setzero() {
return _mm512_setzero_ph();
}
Expand All @@ -133,73 +136,73 @@ struct VecOps<__m512h> {
}
static inline __m512h set_0_to_15() {
return _mm512_set_ph(
15.0f,
14.0f,
13.0f,
12.0f,
11.0f,
10.0f,
9.0f,
8.0f,
7.0f,
6.0f,
5.0f,
4.0f,
3.0f,
2.0f,
1.0f,
0.0f,
15.0f,
14.0f,
13.0f,
12.0f,
11.0f,
10.0f,
9.0f,
8.0f,
7.0f,
6.0f,
5.0f,
4.0f,
3.0f,
2.0f,
1.0f,
0.0f);
static_cast<_Float16>(15.0f),
static_cast<_Float16>(14.0f),
static_cast<_Float16>(13.0f),
static_cast<_Float16>(12.0f),
static_cast<_Float16>(11.0f),
static_cast<_Float16>(10.0f),
static_cast<_Float16>(9.0f),
static_cast<_Float16>(8.0f),
static_cast<_Float16>(7.0f),
static_cast<_Float16>(6.0f),
static_cast<_Float16>(5.0f),
static_cast<_Float16>(4.0f),
static_cast<_Float16>(3.0f),
static_cast<_Float16>(2.0f),
static_cast<_Float16>(1.0f),
static_cast<_Float16>(0.0f),
static_cast<_Float16>(15.0f),
static_cast<_Float16>(14.0f),
static_cast<_Float16>(13.0f),
static_cast<_Float16>(12.0f),
static_cast<_Float16>(11.0f),
static_cast<_Float16>(10.0f),
static_cast<_Float16>(9.0f),
static_cast<_Float16>(8.0f),
static_cast<_Float16>(7.0f),
static_cast<_Float16>(6.0f),
static_cast<_Float16>(5.0f),
static_cast<_Float16>(4.0f),
static_cast<_Float16>(3.0f),
static_cast<_Float16>(2.0f),
static_cast<_Float16>(1.0f),
static_cast<_Float16>(0.0f));
}
static inline __m512h set_nf4_lut() {
return _mm512_set_ph(
1.0f,
0.7229568362236023,
0.5626170039176941,
0.44070982933044434,
0.33791524171829224,
0.24611230194568634,
0.16093020141124725,
0.07958029955625534,
0.0f,
-0.09105003625154495,
-0.18477343022823334,
-0.28444138169288635,
-0.39491748809814453,
-0.5250730514526367,
-0.6961928009986877,
-1.0f,
1.0f,
0.7229568362236023,
0.5626170039176941,
0.44070982933044434,
0.33791524171829224,
0.24611230194568634,
0.16093020141124725,
0.07958029955625534,
0.0f,
-0.09105003625154495,
-0.18477343022823334,
-0.28444138169288635,
-0.39491748809814453,
-0.5250730514526367,
-0.6961928009986877,
-1.0f);
static_cast<_Float16>(1.0f),
static_cast<_Float16>(0.7229568362236023f),
static_cast<_Float16>(0.5626170039176941f),
static_cast<_Float16>(0.44070982933044434f),
static_cast<_Float16>(0.33791524171829224f),
static_cast<_Float16>(0.24611230194568634f),
static_cast<_Float16>(0.16093020141124725f),
static_cast<_Float16>(0.07958029955625534f),
static_cast<_Float16>(0.0f),
static_cast<_Float16>(-0.09105003625154495f),
static_cast<_Float16>(-0.18477343022823334f),
static_cast<_Float16>(-0.28444138169288635f),
static_cast<_Float16>(-0.39491748809814453f),
static_cast<_Float16>(-0.5250730514526367f),
static_cast<_Float16>(-0.6961928009986877f),
static_cast<_Float16>(-1.0f),
static_cast<_Float16>(1.0f),
static_cast<_Float16>(0.7229568362236023f),
static_cast<_Float16>(0.5626170039176941f),
static_cast<_Float16>(0.44070982933044434f),
static_cast<_Float16>(0.33791524171829224f),
static_cast<_Float16>(0.24611230194568634f),
static_cast<_Float16>(0.16093020141124725f),
static_cast<_Float16>(0.07958029955625534f),
static_cast<_Float16>(0.0f),
static_cast<_Float16>(-0.09105003625154495f),
static_cast<_Float16>(-0.18477343022823334f),
static_cast<_Float16>(-0.28444138169288635f),
static_cast<_Float16>(-0.39491748809814453f),
static_cast<_Float16>(-0.5250730514526367f),
static_cast<_Float16>(-0.6961928009986877f),
static_cast<_Float16>(-1.0f));
}
};

Expand Down
3 changes: 0 additions & 3 deletions csrc/cpu/utils/library.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@

#include <c10/util/Logging.h>

// The flag is used to control the pytorch log level and defined at c10
extern int FLAGS_caffe2_log_level;

// This FLAGS_caffe2_log_level flag is used to control the log level and defined
// at Pytorch c10 library. The default is log level is warn. But it triggers the
// warn to override the kernel under a particular dispatch key. Unfortunately,
Expand Down
12 changes: 6 additions & 6 deletions csrc/cpu/vec/vec512/perf_kernel/add_softmax.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,13 +195,13 @@ inline void _dil_div_add_reduce_max_fusion_kernel_half(
const int& size,
at::Half* out,
at::Half& max) {
auto vec_ps_min = _mm512_set1_ph((at::Half)(-65504.0));
auto vec_ps_min = _mm512_set1_ph(static_cast<_Float16>(-65504.0f));
auto vec_a = vec_ps_min;
auto vec_b = vec_ps_min;
auto vec_out = vec_ps_min;

int i = 0;
auto vec_r_dim_per_head = _mm512_set1_ph((at::Half)(1.0 / dim_per_head));
auto vec_r_dim_per_head = _mm512_set1_ph(_Float16(1.0 / dim_per_head));
for (; i <= size - 32; i += 32) {
vec_a = _loadu_half(a + i);
vec_b = _loadu_half(b + i);
Expand Down Expand Up @@ -304,9 +304,9 @@ inline void _dil_exp_reduce_sum_fusion_kernel_half(
const int& size,
at::Half* out,
at::Half& val) {
static auto vec_zero = _mm512_set1_ph((at::Half)(0.0));
auto vec_max = _mm512_set1_ph(val);
auto vec_sum = _mm512_set1_ph(at::Half(0.0));
static auto vec_zero = _mm512_set1_ph(static_cast<_Float16>(0.0f));
auto vec_max = _mm512_set1_ph(*(_Float16*)&val);
auto vec_sum = _mm512_set1_ph(static_cast<_Float16>(0.0f));
__m512h vec_a = {};
__m512h vec_out = {};

Expand Down Expand Up @@ -364,7 +364,7 @@ inline void _dil_normalization_kernel_half(
const at::Half& sum,
const int& size,
at::Half* out) {
auto vec_sum = _mm512_set1_ph(sum);
auto vec_sum = _mm512_set1_ph(*(_Float16*)&sum);
__m512h vec_a = {};
__m512h vec_out = {};

Expand Down
3 changes: 2 additions & 1 deletion tests/cpu/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ if(NOT EXISTS ${TORCH_INSTALL_PREFIX})
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-Bsymbolic-functions")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-Bsymbolic-functions")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-Bsymbolic-functions")

# Set the include dir
include_directories(${PYTORCH_INSTALL_DIR}/include)
Expand Down
Loading